From: Luke Kenneth Casson Leighton Date: Mon, 6 May 2019 07:52:01 +0000 (+0100) Subject: add global pending collator X-Git-Tag: ls180-24jan2020~1016 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=93613b3c43964a39ab7cf1caaa205e40f9975e6a;p=ieee754fpu.git add global pending collator --- diff --git a/src/scoreboard/fn_unit.py b/src/scoreboard/fn_unit.py new file mode 100644 index 00000000..304ce3f9 --- /dev/null +++ b/src/scoreboard/fn_unit.py @@ -0,0 +1,182 @@ +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil +from nmigen import Module, Signal, Cat, Elaboratable +from nmutil.latch import SRLatch +from nmigen.lib.coding import Decoder + +from shadow_fn import ShadowFn + + +class FnUnit(Elaboratable): + """ implements 11.4.8 function unit, p31 + also implements optional shadowing 11.5.1, p55 + + shadowing can be used for branches as well as exceptions (interrupts), + load/store hold (exceptions again), and vector-element predication + (once the predicate is known, which it may not be at instruction issue) + + notes: + + * req_rel_i (request release) is the direct equivalent of pipeline + "output valid" + * recover is a local python variable (actually go_die_o) + * when shadow_wid = 0, recover and shadown are Consts (i.e. do nothing) + """ + def __init__(self, wid, shadow_wid=0): + self.reg_width = wid + self.shadow_wid = shadow_wid + + # inputs + self.dest_i = Signal(wid, reset_less=True) # Dest in (top) + self.src1_i = Signal(wid, reset_less=True) # oper1 in (top) + self.src2_i = Signal(wid, reset_less=True) # oper2 in (top) + self.issue_i = Signal(reset_less=True) # Issue in (top) + + self.go_write_i = Signal(reset_less=True) # Go Write in (left) + self.go_read_i = Signal(reset_less=True) # Go Read in (left) + self.req_rel_i = Signal(wid, reset_less=True) # request release (left) + + self.g_rd_pend_i = Signal(wid, reset_less=True) # global rd (right) + self.g_wr_pend_i = Signal(wid, reset_less=True) # global wr (right) + + if shadow_wid: + self.shadow_i = Signal(shadow_wid, reset_less=True) + self.s_fail_i = Signal(shadow_wid, reset_less=True) + self.s_good_i = Signal(shadow_wid, reset_less=True) + self.go_die_o = Signal(reset_less=True) + + # outputs + self.readable_o = Signal(reset_less=True) # Readable out (right) + self.writable_o = Signal(reset_less=True) # Writable out (right) + self.busy_o = Signal(reset_less=True) # busy out (left) + + self.rd_pend_o = Signal(wid, reset_less=True) # rd pending (right) + self.wr_pend_o = Signal(wid, reset_less=True) # wr pending (right) + + def elaborate(self, platform): + m = Module() + m.submodules.rd_l = rd_l = SRLatch(sync=False) + m.submodules.wr_l = wr_l = SRLatch(sync=False) + m.submodules.dest_d = dest_d = Decoder(self.reg_width) + m.submodules.src1_d = src1_d = Decoder(self.reg_width) + m.submodules.src2_d = src2_d = Decoder(self.reg_width) + s_latches = [] + for i in range(self.shadow_wid): + sh = ShadowFn() + setattr(m.submodules, "shadow%d" % i, sh) + s_latches.append(sh) + + # shadow / recover (optional: shadow_wid > 0) + if self.shadow_wid: + recover = self.go_die_o + shadown = Signal(reset_less=True) + i_l = [] + fail_l = [] + good_l = [] + shi_l = [] + sho_l = [] + rec_l = [] + # get list of latch signals. really must be a better way to do this + for l in s_latches: + i_l.append(l.issue_i) + shi_l.append(l.shadow_i) + fail_l.append(l.s_fail_i) + good_l.append(l.s_good_i) + sho_l.append(l.shadow_o) + rec_l.append(l.recover_o) + m.d.comb += Cat(*i_l).eq(self.issue_i) + m.d.comb += Cat(*fail_l).eq(self.s_fail_i) + m.d.comb += Cat(*good_l).eq(self.s_good_i) + m.d.comb += Cat(*shi_l).eq(self.shadow_i) + m.d.comb += shadown.eq(~(Cat(*sho_l).bool())) + m.d.comb += recover.eq(Cat(*rec_l).bool()) + else: + shadown = Const(1) + recover = Const(0) + + # go_write latch: reset on go_write HI, set on issue + m.d.comb += wr_l.s.eq(self.issue_i) + m.d.comb += wr_l.r.eq(self.go_write_i | recover) + + # src1 latch: reset on go_read HI, set on issue + m.d.comb += rd_l.s.eq(self.issue_i) + m.d.comb += rd_l.r.eq(self.go_read_i | recover) + + # dest decoder: write-pending out + m.d.comb += dest_d.i.eq(self.dest_i) + m.d.comb += dest_d.n.eq(wr_l.qn) # decode is inverted + m.d.comb += self.busy_o.eq(wr_l.q) # busy if set + m.d.comb += self.wr_pend_o.eq(dest_d.o) + + # src1/src2 decoder: read-pending out + m.d.comb += src1_d.i.eq(self.src1_i) + m.d.comb += src1_d.n.eq(rd_l.qn) # decode is inverted + m.d.comb += src2_d.i.eq(self.src2_i) + m.d.comb += src2_d.n.eq(rd_l.qn) # decode is inverted + m.d.comb += self.rd_pend_o.eq(src1_d.o | src2_d.o) + + # readable output signal + int_g_wr = Signal(self.reg_width, reset_less=True) + m.d.comb += int_g_wr.eq(self.g_wr_pend_i & self.rd_pend_o) + m.d.comb += self.readable_o.eq(int_g_wr.bool()) + + # writable output signal + int_g_rw = Signal(self.reg_width, reset_less=True) + g_rw = Signal(reset_less=True) + m.d.comb += int_g_rw.eq(self.g_rd_pend_i & self.wr_pend_o) + m.d.comb += g_rw.eq(~int_g_rw.bool()) + m.d.comb += self.writable_o.eq(g_rw & rd_l.q & self.req_rel_i & shadown) + + return m + + def __iter__(self): + yield self.dest_i + yield self.src1_i + yield self.src2_i + yield self.issue_i + yield self.go_write_i + yield self.go_read_i + yield self.req_rel_i + yield self.g_rd_pend_i + yield self.g_wr_pend_i + yield self.readable_o + yield self.writable_o + yield self.rd_pend_o + yield self.wr_pend_o + + def ports(self): + return list(self) + + +def int_fn_unit_sim(dut): + yield dut.dest_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.src1_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield + yield + yield dut.issue_i.eq(0) + yield + yield dut.go_read_i.eq(1) + yield + yield dut.go_read_i.eq(0) + yield + yield dut.go_write_i.eq(1) + yield + yield dut.go_write_i.eq(0) + yield + +def test_int_fn_unit(): + dut = FnUnit(32, 2) + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_int_fn_unit.il", "w") as f: + f.write(vl) + + run_simulation(dut, int_fn_unit_sim(dut), vcd_name='test_int_fn_unit.vcd') + +if __name__ == '__main__': + test_int_fn_unit() diff --git a/src/scoreboard/global_pending.py b/src/scoreboard/global_pending.py new file mode 100644 index 00000000..50e43378 --- /dev/null +++ b/src/scoreboard/global_pending.py @@ -0,0 +1,93 @@ +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil +from nmigen import Module, Signal, Cat, Elaboratable +from nmutil.latch import SRLatch +from nmigen.lib.coding import Decoder + + +class GlobalPending(Elaboratable): + """ implements Global Pending Vector, basically ORs all incoming Function + Unit vectors together. Can be used for creating Read or Write Global + Pending. Can be used for INT or FP Global Pending. + + Inputs: + * :wid: register file width + * :fu_vecs: a python list of function unit "pending" vectors, each + vector being a Signal of width equal to the reg file. + + Notes: + + * the regfile may be Int or FP, this code doesn't care which. + obviously do not try to put in a mixture of regfiles into fu_vecs. + * this code also doesn't care if it's used for Read Pending or Write + pending, it can be used for both: again, obviously, do not try to + put in a mixture of read *and* write pending vectors in. + * if some Function Units happen not to be uniform (don't operate + on a particular register (extremely unusual), they must set a Const + zero bit in the vector. + """ + def __init__(self, wid, fu_vecs): + self.reg_width = wid + # inputs + self.fu_vecs = fu_vecs + for v in fu_vecs: + assert len(v) == wid, "FU Vector must be same width as regfile" + + self.g_pend_o = Signal(wid, reset_less=True) # global pending vector + + def elaborate(self, platform): + m = Module() + + pend_l = [] + for i in range(self.reg_width): # per-register + vec_bit_l = [] + for v in self.fu_vecs: + vec_bit_l.append(v[i]) # fu bit for same register + pend_l.append(Cat(*vec_bit_l).bool()) # OR all bits for same reg + m.d.comb += self.g_pend_o.eq(Cat(*pend_l)) # merge all OR'd bits + + return m + + def __iter__(self): + yield from self.fu_vecs + yield self.g_pend_o + + def ports(self): + return list(self) + + +def g_vec_sim(dut): + yield dut.dest_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.src1_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield + yield + yield dut.issue_i.eq(0) + yield + yield dut.go_read_i.eq(1) + yield + yield dut.go_read_i.eq(0) + yield + yield dut.go_write_i.eq(1) + yield + yield dut.go_write_i.eq(0) + yield + +def test_g_vec(): + vecs = [] + for i in range(3): + vecs.append(Signal(32, name="fu%d" % i)) + dut = GlobalPending(32, vecs) + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_global_pending.il", "w") as f: + f.write(vl) + + run_simulation(dut, g_vec_sim(dut), vcd_name='test_global_pending.vcd') + +if __name__ == '__main__': + test_g_vec() diff --git a/src/scoreboard/int_fn_unit.py b/src/scoreboard/int_fn_unit.py deleted file mode 100644 index 70393fc9..00000000 --- a/src/scoreboard/int_fn_unit.py +++ /dev/null @@ -1,182 +0,0 @@ -from nmigen.compat.sim import run_simulation -from nmigen.cli import verilog, rtlil -from nmigen import Module, Signal, Cat, Elaboratable -from nmutil.latch import SRLatch -from nmigen.lib.coding import Decoder - -from shadow_fn import ShadowFn - - -class IntFnUnit(Elaboratable): - """ implements 11.4.8 integer function unit, p31 - also implements optional shadowing 11.5.1, p55 - - shadowing can be used for branches as well as exceptions (interrupts), - load/store hold (exceptions again), and vector-element predication - (once the predicate is known, which it may not be at instruction issue) - - notes: - - * req_rel_i (request release) is the direct equivalent of pipeline - "output valid" - * recover is a local python variable (actually go_die_o) - * when shadow_wid = 0, recover and shadown are Consts (i.e. do nothing) - """ - def __init__(self, wid, shadow_wid=0): - self.reg_width = wid - self.shadow_wid = shadow_wid - - # inputs - self.dest_i = Signal(wid, reset_less=True) # Dest in (top) - self.src1_i = Signal(wid, reset_less=True) # oper1 in (top) - self.src2_i = Signal(wid, reset_less=True) # oper2 in (top) - self.issue_i = Signal(reset_less=True) # Issue in (top) - - self.go_write_i = Signal(reset_less=True) # Go Write in (left) - self.go_read_i = Signal(reset_less=True) # Go Read in (left) - self.req_rel_i = Signal(wid, reset_less=True) # request release (left) - - self.g_rd_pend_i = Signal(wid, reset_less=True) # global rd (right) - self.g_wr_pend_i = Signal(wid, reset_less=True) # global wr (right) - - if shadow_wid: - self.shadow_i = Signal(shadow_wid, reset_less=True) - self.s_fail_i = Signal(shadow_wid, reset_less=True) - self.s_good_i = Signal(shadow_wid, reset_less=True) - self.go_die_o = Signal(reset_less=True) - - # outputs - self.readable_o = Signal(reset_less=True) # Readable out (right) - self.writable_o = Signal(reset_less=True) # Writable out (right) - self.busy_o = Signal(reset_less=True) # busy out (left) - - self.rd_pend_o = Signal(wid, reset_less=True) # rd pending (right) - self.wr_pend_o = Signal(wid, reset_less=True) # wr pending (right) - - def elaborate(self, platform): - m = Module() - m.submodules.rd_l = rd_l = SRLatch(sync=False) - m.submodules.wr_l = wr_l = SRLatch(sync=False) - m.submodules.dest_d = dest_d = Decoder(self.reg_width) - m.submodules.src1_d = src1_d = Decoder(self.reg_width) - m.submodules.src2_d = src2_d = Decoder(self.reg_width) - s_latches = [] - for i in range(self.shadow_wid): - sh = ShadowFn() - setattr(m.submodules, "shadow%d" % i, sh) - s_latches.append(sh) - - # shadow / recover (optional: shadow_wid > 0) - if self.shadow_wid: - recover = self.go_die_o - shadown = Signal(reset_less=True) - i_l = [] - fail_l = [] - good_l = [] - shi_l = [] - sho_l = [] - rec_l = [] - # get list of latch signals. really must be a better way to do this - for l in s_latches: - i_l.append(l.issue_i) - shi_l.append(l.shadow_i) - fail_l.append(l.s_fail_i) - good_l.append(l.s_good_i) - sho_l.append(l.shadow_o) - rec_l.append(l.recover_o) - m.d.comb += Cat(*i_l).eq(self.issue_i) - m.d.comb += Cat(*fail_l).eq(self.s_fail_i) - m.d.comb += Cat(*good_l).eq(self.s_good_i) - m.d.comb += Cat(*shi_l).eq(self.shadow_i) - m.d.comb += shadown.eq(~(Cat(*sho_l).bool())) - m.d.comb += recover.eq(Cat(*rec_l).bool()) - else: - shadown = Const(1) - recover = Const(0) - - # go_write latch: reset on go_write HI, set on issue - m.d.comb += wr_l.s.eq(self.issue_i) - m.d.comb += wr_l.r.eq(self.go_write_i | recover) - - # src1 latch: reset on go_read HI, set on issue - m.d.comb += rd_l.s.eq(self.issue_i) - m.d.comb += rd_l.r.eq(self.go_read_i | recover) - - # dest decoder: write-pending out - m.d.comb += dest_d.i.eq(self.dest_i) - m.d.comb += dest_d.n.eq(wr_l.qn) # decode is inverted - m.d.comb += self.busy_o.eq(wr_l.q) # busy if set - m.d.comb += self.wr_pend_o.eq(dest_d.o) - - # src1/src2 decoder: read-pending out - m.d.comb += src1_d.i.eq(self.src1_i) - m.d.comb += src1_d.n.eq(rd_l.qn) # decode is inverted - m.d.comb += src2_d.i.eq(self.src2_i) - m.d.comb += src2_d.n.eq(rd_l.qn) # decode is inverted - m.d.comb += self.rd_pend_o.eq(src1_d.o | src2_d.o) - - # readable output signal - int_g_wr = Signal(self.reg_width, reset_less=True) - m.d.comb += int_g_wr.eq(self.g_wr_pend_i & self.rd_pend_o) - m.d.comb += self.readable_o.eq(int_g_wr.bool()) - - # writable output signal - int_g_rw = Signal(self.reg_width, reset_less=True) - g_rw = Signal(reset_less=True) - m.d.comb += int_g_rw.eq(self.g_rd_pend_i & self.wr_pend_o) - m.d.comb += g_rw.eq(~int_g_rw.bool()) - m.d.comb += self.writable_o.eq(g_rw & rd_l.q & self.req_rel_i & shadown) - - return m - - def __iter__(self): - yield self.dest_i - yield self.src1_i - yield self.src2_i - yield self.issue_i - yield self.go_write_i - yield self.go_read_i - yield self.req_rel_i - yield self.g_rd_pend_i - yield self.g_wr_pend_i - yield self.readable_o - yield self.writable_o - yield self.rd_pend_o - yield self.wr_pend_o - - def ports(self): - return list(self) - - -def int_fn_unit_sim(dut): - yield dut.dest_i.eq(1) - yield dut.issue_i.eq(1) - yield - yield dut.issue_i.eq(0) - yield - yield dut.src1_i.eq(1) - yield dut.issue_i.eq(1) - yield - yield - yield - yield dut.issue_i.eq(0) - yield - yield dut.go_read_i.eq(1) - yield - yield dut.go_read_i.eq(0) - yield - yield dut.go_write_i.eq(1) - yield - yield dut.go_write_i.eq(0) - yield - -def test_int_fn_unit(): - dut = IntFnUnit(32, 2) - vl = rtlil.convert(dut, ports=dut.ports()) - with open("test_int_fn_unit.il", "w") as f: - f.write(vl) - - run_simulation(dut, int_fn_unit_sim(dut), vcd_name='test_int_fn_unit.vcd') - -if __name__ == '__main__': - test_int_fn_unit()