--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Cat, Elaboratable
+from nmutil.latch import SRLatch
+from nmigen.lib.coding import Decoder
+
+from shadow_fn import ShadowFn
+
+
+class FnUnit(Elaboratable):
+ """ implements 11.4.8 function unit, p31
+ also implements optional shadowing 11.5.1, p55
+
+ shadowing can be used for branches as well as exceptions (interrupts),
+ load/store hold (exceptions again), and vector-element predication
+ (once the predicate is known, which it may not be at instruction issue)
+
+ notes:
+
+ * req_rel_i (request release) is the direct equivalent of pipeline
+ "output valid"
+ * recover is a local python variable (actually go_die_o)
+ * when shadow_wid = 0, recover and shadown are Consts (i.e. do nothing)
+ """
+ def __init__(self, wid, shadow_wid=0):
+ self.reg_width = wid
+ self.shadow_wid = shadow_wid
+
+ # inputs
+ self.dest_i = Signal(wid, reset_less=True) # Dest in (top)
+ self.src1_i = Signal(wid, reset_less=True) # oper1 in (top)
+ self.src2_i = Signal(wid, reset_less=True) # oper2 in (top)
+ self.issue_i = Signal(reset_less=True) # Issue in (top)
+
+ self.go_write_i = Signal(reset_less=True) # Go Write in (left)
+ self.go_read_i = Signal(reset_less=True) # Go Read in (left)
+ self.req_rel_i = Signal(wid, reset_less=True) # request release (left)
+
+ self.g_rd_pend_i = Signal(wid, reset_less=True) # global rd (right)
+ self.g_wr_pend_i = Signal(wid, reset_less=True) # global wr (right)
+
+ if shadow_wid:
+ self.shadow_i = Signal(shadow_wid, reset_less=True)
+ self.s_fail_i = Signal(shadow_wid, reset_less=True)
+ self.s_good_i = Signal(shadow_wid, reset_less=True)
+ self.go_die_o = Signal(reset_less=True)
+
+ # outputs
+ self.readable_o = Signal(reset_less=True) # Readable out (right)
+ self.writable_o = Signal(reset_less=True) # Writable out (right)
+ self.busy_o = Signal(reset_less=True) # busy out (left)
+
+ self.rd_pend_o = Signal(wid, reset_less=True) # rd pending (right)
+ self.wr_pend_o = Signal(wid, reset_less=True) # wr pending (right)
+
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.rd_l = rd_l = SRLatch(sync=False)
+ m.submodules.wr_l = wr_l = SRLatch(sync=False)
+ m.submodules.dest_d = dest_d = Decoder(self.reg_width)
+ m.submodules.src1_d = src1_d = Decoder(self.reg_width)
+ m.submodules.src2_d = src2_d = Decoder(self.reg_width)
+ s_latches = []
+ for i in range(self.shadow_wid):
+ sh = ShadowFn()
+ setattr(m.submodules, "shadow%d" % i, sh)
+ s_latches.append(sh)
+
+ # shadow / recover (optional: shadow_wid > 0)
+ if self.shadow_wid:
+ recover = self.go_die_o
+ shadown = Signal(reset_less=True)
+ i_l = []
+ fail_l = []
+ good_l = []
+ shi_l = []
+ sho_l = []
+ rec_l = []
+ # get list of latch signals. really must be a better way to do this
+ for l in s_latches:
+ i_l.append(l.issue_i)
+ shi_l.append(l.shadow_i)
+ fail_l.append(l.s_fail_i)
+ good_l.append(l.s_good_i)
+ sho_l.append(l.shadow_o)
+ rec_l.append(l.recover_o)
+ m.d.comb += Cat(*i_l).eq(self.issue_i)
+ m.d.comb += Cat(*fail_l).eq(self.s_fail_i)
+ m.d.comb += Cat(*good_l).eq(self.s_good_i)
+ m.d.comb += Cat(*shi_l).eq(self.shadow_i)
+ m.d.comb += shadown.eq(~(Cat(*sho_l).bool()))
+ m.d.comb += recover.eq(Cat(*rec_l).bool())
+ else:
+ shadown = Const(1)
+ recover = Const(0)
+
+ # go_write latch: reset on go_write HI, set on issue
+ m.d.comb += wr_l.s.eq(self.issue_i)
+ m.d.comb += wr_l.r.eq(self.go_write_i | recover)
+
+ # src1 latch: reset on go_read HI, set on issue
+ m.d.comb += rd_l.s.eq(self.issue_i)
+ m.d.comb += rd_l.r.eq(self.go_read_i | recover)
+
+ # dest decoder: write-pending out
+ m.d.comb += dest_d.i.eq(self.dest_i)
+ m.d.comb += dest_d.n.eq(wr_l.qn) # decode is inverted
+ m.d.comb += self.busy_o.eq(wr_l.q) # busy if set
+ m.d.comb += self.wr_pend_o.eq(dest_d.o)
+
+ # src1/src2 decoder: read-pending out
+ m.d.comb += src1_d.i.eq(self.src1_i)
+ m.d.comb += src1_d.n.eq(rd_l.qn) # decode is inverted
+ m.d.comb += src2_d.i.eq(self.src2_i)
+ m.d.comb += src2_d.n.eq(rd_l.qn) # decode is inverted
+ m.d.comb += self.rd_pend_o.eq(src1_d.o | src2_d.o)
+
+ # readable output signal
+ int_g_wr = Signal(self.reg_width, reset_less=True)
+ m.d.comb += int_g_wr.eq(self.g_wr_pend_i & self.rd_pend_o)
+ m.d.comb += self.readable_o.eq(int_g_wr.bool())
+
+ # writable output signal
+ int_g_rw = Signal(self.reg_width, reset_less=True)
+ g_rw = Signal(reset_less=True)
+ m.d.comb += int_g_rw.eq(self.g_rd_pend_i & self.wr_pend_o)
+ m.d.comb += g_rw.eq(~int_g_rw.bool())
+ m.d.comb += self.writable_o.eq(g_rw & rd_l.q & self.req_rel_i & shadown)
+
+ return m
+
+ def __iter__(self):
+ yield self.dest_i
+ yield self.src1_i
+ yield self.src2_i
+ yield self.issue_i
+ yield self.go_write_i
+ yield self.go_read_i
+ yield self.req_rel_i
+ yield self.g_rd_pend_i
+ yield self.g_wr_pend_i
+ yield self.readable_o
+ yield self.writable_o
+ yield self.rd_pend_o
+ yield self.wr_pend_o
+
+ def ports(self):
+ return list(self)
+
+
+def int_fn_unit_sim(dut):
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_read_i.eq(1)
+ yield
+ yield dut.go_read_i.eq(0)
+ yield
+ yield dut.go_write_i.eq(1)
+ yield
+ yield dut.go_write_i.eq(0)
+ yield
+
+def test_int_fn_unit():
+ dut = FnUnit(32, 2)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_int_fn_unit.il", "w") as f:
+ f.write(vl)
+
+ run_simulation(dut, int_fn_unit_sim(dut), vcd_name='test_int_fn_unit.vcd')
+
+if __name__ == '__main__':
+ test_int_fn_unit()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Cat, Elaboratable
+from nmutil.latch import SRLatch
+from nmigen.lib.coding import Decoder
+
+
+class GlobalPending(Elaboratable):
+ """ implements Global Pending Vector, basically ORs all incoming Function
+ Unit vectors together. Can be used for creating Read or Write Global
+ Pending. Can be used for INT or FP Global Pending.
+
+ Inputs:
+ * :wid: register file width
+ * :fu_vecs: a python list of function unit "pending" vectors, each
+ vector being a Signal of width equal to the reg file.
+
+ Notes:
+
+ * the regfile may be Int or FP, this code doesn't care which.
+ obviously do not try to put in a mixture of regfiles into fu_vecs.
+ * this code also doesn't care if it's used for Read Pending or Write
+ pending, it can be used for both: again, obviously, do not try to
+ put in a mixture of read *and* write pending vectors in.
+ * if some Function Units happen not to be uniform (don't operate
+ on a particular register (extremely unusual), they must set a Const
+ zero bit in the vector.
+ """
+ def __init__(self, wid, fu_vecs):
+ self.reg_width = wid
+ # inputs
+ self.fu_vecs = fu_vecs
+ for v in fu_vecs:
+ assert len(v) == wid, "FU Vector must be same width as regfile"
+
+ self.g_pend_o = Signal(wid, reset_less=True) # global pending vector
+
+ def elaborate(self, platform):
+ m = Module()
+
+ pend_l = []
+ for i in range(self.reg_width): # per-register
+ vec_bit_l = []
+ for v in self.fu_vecs:
+ vec_bit_l.append(v[i]) # fu bit for same register
+ pend_l.append(Cat(*vec_bit_l).bool()) # OR all bits for same reg
+ m.d.comb += self.g_pend_o.eq(Cat(*pend_l)) # merge all OR'd bits
+
+ return m
+
+ def __iter__(self):
+ yield from self.fu_vecs
+ yield self.g_pend_o
+
+ def ports(self):
+ return list(self)
+
+
+def g_vec_sim(dut):
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_read_i.eq(1)
+ yield
+ yield dut.go_read_i.eq(0)
+ yield
+ yield dut.go_write_i.eq(1)
+ yield
+ yield dut.go_write_i.eq(0)
+ yield
+
+def test_g_vec():
+ vecs = []
+ for i in range(3):
+ vecs.append(Signal(32, name="fu%d" % i))
+ dut = GlobalPending(32, vecs)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_global_pending.il", "w") as f:
+ f.write(vl)
+
+ run_simulation(dut, g_vec_sim(dut), vcd_name='test_global_pending.vcd')
+
+if __name__ == '__main__':
+ test_g_vec()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Cat, Elaboratable
-from nmutil.latch import SRLatch
-from nmigen.lib.coding import Decoder
-
-from shadow_fn import ShadowFn
-
-
-class IntFnUnit(Elaboratable):
- """ implements 11.4.8 integer function unit, p31
- also implements optional shadowing 11.5.1, p55
-
- shadowing can be used for branches as well as exceptions (interrupts),
- load/store hold (exceptions again), and vector-element predication
- (once the predicate is known, which it may not be at instruction issue)
-
- notes:
-
- * req_rel_i (request release) is the direct equivalent of pipeline
- "output valid"
- * recover is a local python variable (actually go_die_o)
- * when shadow_wid = 0, recover and shadown are Consts (i.e. do nothing)
- """
- def __init__(self, wid, shadow_wid=0):
- self.reg_width = wid
- self.shadow_wid = shadow_wid
-
- # inputs
- self.dest_i = Signal(wid, reset_less=True) # Dest in (top)
- self.src1_i = Signal(wid, reset_less=True) # oper1 in (top)
- self.src2_i = Signal(wid, reset_less=True) # oper2 in (top)
- self.issue_i = Signal(reset_less=True) # Issue in (top)
-
- self.go_write_i = Signal(reset_less=True) # Go Write in (left)
- self.go_read_i = Signal(reset_less=True) # Go Read in (left)
- self.req_rel_i = Signal(wid, reset_less=True) # request release (left)
-
- self.g_rd_pend_i = Signal(wid, reset_less=True) # global rd (right)
- self.g_wr_pend_i = Signal(wid, reset_less=True) # global wr (right)
-
- if shadow_wid:
- self.shadow_i = Signal(shadow_wid, reset_less=True)
- self.s_fail_i = Signal(shadow_wid, reset_less=True)
- self.s_good_i = Signal(shadow_wid, reset_less=True)
- self.go_die_o = Signal(reset_less=True)
-
- # outputs
- self.readable_o = Signal(reset_less=True) # Readable out (right)
- self.writable_o = Signal(reset_less=True) # Writable out (right)
- self.busy_o = Signal(reset_less=True) # busy out (left)
-
- self.rd_pend_o = Signal(wid, reset_less=True) # rd pending (right)
- self.wr_pend_o = Signal(wid, reset_less=True) # wr pending (right)
-
- def elaborate(self, platform):
- m = Module()
- m.submodules.rd_l = rd_l = SRLatch(sync=False)
- m.submodules.wr_l = wr_l = SRLatch(sync=False)
- m.submodules.dest_d = dest_d = Decoder(self.reg_width)
- m.submodules.src1_d = src1_d = Decoder(self.reg_width)
- m.submodules.src2_d = src2_d = Decoder(self.reg_width)
- s_latches = []
- for i in range(self.shadow_wid):
- sh = ShadowFn()
- setattr(m.submodules, "shadow%d" % i, sh)
- s_latches.append(sh)
-
- # shadow / recover (optional: shadow_wid > 0)
- if self.shadow_wid:
- recover = self.go_die_o
- shadown = Signal(reset_less=True)
- i_l = []
- fail_l = []
- good_l = []
- shi_l = []
- sho_l = []
- rec_l = []
- # get list of latch signals. really must be a better way to do this
- for l in s_latches:
- i_l.append(l.issue_i)
- shi_l.append(l.shadow_i)
- fail_l.append(l.s_fail_i)
- good_l.append(l.s_good_i)
- sho_l.append(l.shadow_o)
- rec_l.append(l.recover_o)
- m.d.comb += Cat(*i_l).eq(self.issue_i)
- m.d.comb += Cat(*fail_l).eq(self.s_fail_i)
- m.d.comb += Cat(*good_l).eq(self.s_good_i)
- m.d.comb += Cat(*shi_l).eq(self.shadow_i)
- m.d.comb += shadown.eq(~(Cat(*sho_l).bool()))
- m.d.comb += recover.eq(Cat(*rec_l).bool())
- else:
- shadown = Const(1)
- recover = Const(0)
-
- # go_write latch: reset on go_write HI, set on issue
- m.d.comb += wr_l.s.eq(self.issue_i)
- m.d.comb += wr_l.r.eq(self.go_write_i | recover)
-
- # src1 latch: reset on go_read HI, set on issue
- m.d.comb += rd_l.s.eq(self.issue_i)
- m.d.comb += rd_l.r.eq(self.go_read_i | recover)
-
- # dest decoder: write-pending out
- m.d.comb += dest_d.i.eq(self.dest_i)
- m.d.comb += dest_d.n.eq(wr_l.qn) # decode is inverted
- m.d.comb += self.busy_o.eq(wr_l.q) # busy if set
- m.d.comb += self.wr_pend_o.eq(dest_d.o)
-
- # src1/src2 decoder: read-pending out
- m.d.comb += src1_d.i.eq(self.src1_i)
- m.d.comb += src1_d.n.eq(rd_l.qn) # decode is inverted
- m.d.comb += src2_d.i.eq(self.src2_i)
- m.d.comb += src2_d.n.eq(rd_l.qn) # decode is inverted
- m.d.comb += self.rd_pend_o.eq(src1_d.o | src2_d.o)
-
- # readable output signal
- int_g_wr = Signal(self.reg_width, reset_less=True)
- m.d.comb += int_g_wr.eq(self.g_wr_pend_i & self.rd_pend_o)
- m.d.comb += self.readable_o.eq(int_g_wr.bool())
-
- # writable output signal
- int_g_rw = Signal(self.reg_width, reset_less=True)
- g_rw = Signal(reset_less=True)
- m.d.comb += int_g_rw.eq(self.g_rd_pend_i & self.wr_pend_o)
- m.d.comb += g_rw.eq(~int_g_rw.bool())
- m.d.comb += self.writable_o.eq(g_rw & rd_l.q & self.req_rel_i & shadown)
-
- return m
-
- def __iter__(self):
- yield self.dest_i
- yield self.src1_i
- yield self.src2_i
- yield self.issue_i
- yield self.go_write_i
- yield self.go_read_i
- yield self.req_rel_i
- yield self.g_rd_pend_i
- yield self.g_wr_pend_i
- yield self.readable_o
- yield self.writable_o
- yield self.rd_pend_o
- yield self.wr_pend_o
-
- def ports(self):
- return list(self)
-
-
-def int_fn_unit_sim(dut):
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_read_i.eq(1)
- yield
- yield dut.go_read_i.eq(0)
- yield
- yield dut.go_write_i.eq(1)
- yield
- yield dut.go_write_i.eq(0)
- yield
-
-def test_int_fn_unit():
- dut = IntFnUnit(32, 2)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_int_fn_unit.il", "w") as f:
- f.write(vl)
-
- run_simulation(dut, int_fn_unit_sim(dut), vcd_name='test_int_fn_unit.vcd')
-
-if __name__ == '__main__':
- test_int_fn_unit()