massive reduction in gate count by using alternative read/write port mux
[soc.git] / src / soc / simple / core.py
index 740cc9908b752cf9f45970db62abc8401b1ae8e0..71a9324387dd3f41a36af14462eeb001a67925d6 100644 (file)
@@ -19,7 +19,7 @@ and consequently it is safer to wait for the Function Unit to complete
 before allowing a new instruction to proceed.
 """
 
-from nmigen import Elaboratable, Module, Signal
+from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
 from nmigen.cli import rtlil
 
 from nmutil.picker import PriorityPicker
@@ -30,17 +30,20 @@ from soc.regfile.regfiles import RegFiles
 from soc.decoder.power_decoder import create_pdecode
 from soc.decoder.power_decoder2 import PowerDecode2
 from soc.decoder.decode2execute1 import Data
-from soc.experiment.l0_cache import TstL0CacheBuffer # test only
+from soc.experiment.l0_cache import TstL0CacheBuffer  # test only
 from soc.config.test.test_loadstore import TestMemPspec
 from soc.decoder.power_enums import MicrOp
 import operator
 
+from nmutil.util import rising_edge
+
 
 # helper function for reducing a list of signals down to a parallel
 # ORed single signal.
 def ortreereduce(tree, attr="data_o"):
     return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
 
+
 def ortreereduce_sig(tree):
     return treereduce(tree, operator.or_, lambda x: x)
 
@@ -54,7 +57,7 @@ def sort_fuspecs(fuspecs):
     for (regname, fspec) in fuspecs.items():
         if not regname.startswith("full"):
             res.append((regname, fspec))
-    return res # enumerate(res)
+    return res  # enumerate(res)
 
 
 class NonProductionCore(Elaboratable):
@@ -83,9 +86,9 @@ class NonProductionCore(Elaboratable):
         self.raw_opcode_i = self.pdecode2.dec.raw_opcode_in
 
         # start/stop and terminated signalling
-        self.core_start_i = Signal(reset_less=True)
-        self.core_stop_i = Signal(reset_less=True)
-        self.core_terminated_o = Signal(reset=1) # indicates stopped
+        self.core_stopped_i = Signal(reset_less=True)
+        self.core_reset_i = Signal()
+        self.core_terminate_o = Signal(reset=0)  # indicates stopped
 
     def elaborate(self, platform):
         m = Module()
@@ -97,24 +100,17 @@ class NonProductionCore(Elaboratable):
         regs = self.regs
         fus = self.fus.fus
 
-        # core start/stopped state
-        core_stopped = Signal(reset=1) # begins in stopped state
-
-        # start/stop signalling
-        with m.If(self.core_start_i):
-            m.d.sync += core_stopped.eq(0)
-        with m.If(self.core_stop_i):
-            m.d.sync += core_stopped.eq(1)
-        m.d.comb += self.core_terminated_o.eq(core_stopped)
-
         # connect up Function Units, then read/write ports
-        fu_bitdict = self.connect_instruction(m, core_stopped)
+        fu_bitdict = self.connect_instruction(m)
         self.connect_rdports(m, fu_bitdict)
         self.connect_wrports(m, fu_bitdict)
 
+        # connect up reset
+        m.d.comb += ResetSignal().eq(self.core_reset_i)
+
         return m
 
-    def connect_instruction(self, m, core_stopped):
+    def connect_instruction(self, m):
         """connect_instruction
 
         uses decoded (from PowerOp) function unit information from CSV files
@@ -135,9 +131,6 @@ class NonProductionCore(Elaboratable):
         fu_bitdict = {}
         for i, funame in enumerate(fus.keys()):
             fu_bitdict[funame] = fu_enable[i]
-        # only run when allowed and when instruction is valid
-        can_run = Signal(reset_less=True)
-        comb += can_run.eq(self.ivalid_i & ~core_stopped)
 
         # enable the required Function Unit based on the opcode decode
         # note: this *only* works correctly for simple core when one and
@@ -154,11 +147,11 @@ class NonProductionCore(Elaboratable):
             sync += counter.eq(counter - 1)
             comb += self.busy_o.eq(1)
 
-        with m.If(can_run):
+        with m.If(self.ivalid_i): # run only when valid
             with m.Switch(dec2.e.do.insn_type):
-            # check for ATTN: halt if true
+                # check for ATTN: halt if true
                 with m.Case(MicrOp.OP_ATTN):
-                    m.d.sync += core_stopped.eq(1)
+                    m.d.sync += self.core_terminate_o.eq(1)
 
                 with m.Case(MicrOp.OP_NOP):
                     sync += counter.eq(2)
@@ -180,6 +173,77 @@ class NonProductionCore(Elaboratable):
 
         return fu_bitdict
 
+    def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
+        comb, sync = m.d.comb, m.d.sync
+        fus = self.fus.fus
+        regs = self.regs
+
+        rpidx = regname
+
+        # select the required read port.  these are pre-defined sizes
+        print(rpidx, regfile, regs.rf.keys())
+        rport = regs.rf[regfile.lower()].r_ports[rpidx]
+
+        fspecs = fspec
+        if not isinstance(fspecs, list):
+            fspecs = [fspecs]
+
+        rdflags = []
+        pplen = 0
+        reads = []
+        ppoffs = []
+        for i, fspec in enumerate(fspecs):
+            # get the regfile specs for this regfile port
+            (rf, read, write, wid, fuspec) = fspec
+            print ("fpsec", i, fspec, len(fuspec))
+            ppoffs.append(pplen) # record offset for picker
+            pplen += len(fuspec)
+            name = "rdflag_%s_%s_%d" % (regfile, regname, i)
+            rdflag = Signal(name=name, reset_less=True)
+            comb += rdflag.eq(rf)
+            rdflags.append(rdflag)
+            reads.append(read)
+
+        print ("pplen", pplen)
+
+        # create a priority picker to manage this port
+        rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
+        setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
+
+        rens = []
+        for i, fspec in enumerate(fspecs):
+            (rf, read, write, wid, fuspec) = fspec
+            # connect up the FU req/go signals, and the reg-read to the FU
+            # and create a Read Broadcast Bus
+            for pi, (funame, fu, idx) in enumerate(fuspec):
+                pi += ppoffs[i]
+                src = fu.src_i[idx]
+
+                # connect request-read to picker input, and output to go-rd
+                fu_active = fu_bitdict[funame]
+                pick = Signal()
+                comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i])
+                print (pick, len(pick))
+                print (rdpick.i, len(rdpick.i), pi)
+                comb += rdpick.i[pi].eq(pick)
+                comb += fu.go_rd_i[idx].eq(rdpick.o[pi])
+
+                # if picked, select read-port "reg select" number to port
+                read_en = Signal.like(reads[i])
+                comb += read_en.eq(Mux(rdpick.o[pi] & rdpick.en_o, reads[i], 0))
+                rens.append(read_en)
+
+                with m.If(rdpick.o[pi] & rdpick.en_o):
+                    # connect regfile port to input, creating a Broadcast Bus
+                    print("reg connect widths",
+                          regfile, regname, pi, funame,
+                          src.shape(), rport.data_o.shape())
+                    # all FUs connect to same port
+                    comb += src.eq(rport.data_o)
+
+        # or-reduce the muxed read signals
+        comb += rport.ren.eq(ortreereduce_sig(rens))
+
     def connect_rdports(self, m, fu_bitdict):
         """connect read ports
 
@@ -201,44 +265,92 @@ class NonProductionCore(Elaboratable):
             fuspecs = byregfiles_rdspec[regfile]
             rdpickers[regfile] = {}
 
+            # argh.  an experiment to merge RA and RB in the INT regfile
+            # (we have too many read/write ports)
+            if regfile == 'INT':
+                fuspecs['rbc'] = [fuspecs.pop('rb')]
+                fuspecs['rbc'].append(fuspecs.pop('rc'))
+            if regfile == 'FAST':
+                fuspecs['fast1'] = [fuspecs.pop('fast1')]
+                fuspecs['fast1'].append(fuspecs.pop('fast2'))
+
             # for each named regfile port, connect up all FUs to that port
             for (regname, fspec) in sort_fuspecs(fuspecs):
-                print ("connect rd", regname, fspec)
-                rpidx = regname
-                # get the regfile specs for this regfile port
-                (rf, read, write, wid, fuspec) = fspec
-                name = "rdflag_%s_%s" % (regfile, regname)
-                rdflag = Signal(name=name, reset_less=True)
-                comb += rdflag.eq(rf)
-
-                # select the required read port.  these are pre-defined sizes
-                print (rpidx, regfile, regs.rf.keys())
-                rport = regs.rf[regfile.lower()].r_ports[rpidx]
-
-                # create a priority picker to manage this port
-                rdpickers[regfile][rpidx] = rdpick = PriorityPicker(len(fuspec))
-                setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
-
-                # connect the regspec "reg select" number to this port
-                with m.If(rdpick.en_o):
-                    comb += rport.ren.eq(read)
-
-                # connect up the FU req/go signals, and the reg-read to the FU
-                # and create a Read Broadcast Bus
-                for pi, (funame, fu, idx) in enumerate(fuspec):
-                    src = fu.src_i[idx]
-
-                    # connect request-read to picker input, and output to go-rd
-                    fu_active = fu_bitdict[funame]
-                    pick = fu.rd_rel_o[idx] & fu_active & rdflag
-                    comb += rdpick.i[pi].eq(pick)
-                    comb += fu.go_rd_i[idx].eq(rdpick.o[pi])
+                print("connect rd", regname, fspec)
+                self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
+                                       regname, fspec)
 
-                    # connect regfile port to input, creating a Broadcast Bus
-                    print ("reg connect widths",
-                           regfile, regname, pi, funame,
-                           src.shape(), rport.data_o.shape())
-                    comb += src.eq(rport.data_o) # all FUs connect to same port
+    def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec):
+        comb, sync = m.d.comb, m.d.sync
+        fus = self.fus.fus
+        regs = self.regs
+
+        print("connect wr", regname, fspec)
+        rpidx = regname
+
+        # select the required write port.  these are pre-defined sizes
+        print(regfile, regs.rf.keys())
+        wport = regs.rf[regfile.lower()].w_ports[rpidx]
+
+        fspecs = fspec
+        if not isinstance(fspecs, list):
+            fspecs = [fspecs]
+
+        pplen = 0
+        writes = []
+        ppoffs = []
+        for i, fspec in enumerate(fspecs):
+            # get the regfile specs for this regfile port
+            (rf, read, write, wid, fuspec) = fspec
+            print ("fpsec", i, fspec, len(fuspec))
+            ppoffs.append(pplen) # record offset for picker
+            pplen += len(fuspec)
+
+        # create a priority picker to manage this port
+        wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
+        setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
+
+        wsigs = []
+        wens = []
+        for i, fspec in enumerate(fspecs):
+            # connect up the FU req/go signals and the reg-read to the FU
+            # these are arbitrated by Data.ok signals
+            (rf, read, write, wid, fuspec) = fspec
+            for pi, (funame, fu, idx) in enumerate(fuspec):
+                pi += ppoffs[i]
+
+                # write-request comes from dest.ok
+                dest = fu.get_out(idx)
+                fu_dest_latch = fu.get_fu_out(idx)  # latched output
+                name = "wrflag_%s_%s_%d" % (funame, regname, idx)
+                wrflag = Signal(name=name, reset_less=True)
+                comb += wrflag.eq(dest.ok & fu.busy_o)
+
+                # connect request-write to picker input, and output to go-wr
+                fu_active = fu_bitdict[funame]
+                pick = fu.wr.rel_o[idx] & fu_active  # & wrflag
+                comb += wrpick.i[pi].eq(pick)
+                # create a single-pulse go write from the picker output
+                wr_pick = Signal()
+                comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
+                comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
+
+                # connect the regspec write "reg select" number to this port
+                # only if one FU actually requests (and is granted) the port
+                # will the write-enable be activated
+                write_en = Signal.like(write)
+                comb += write_en.eq(Mux(wr_pick & wrpick.en_o, write, 0))
+                wens.append(write_en)
+
+                # connect regfile port to input
+                print("reg connect widths",
+                      regfile, regname, pi, funame,
+                      dest.shape(), wport.data_i.shape())
+                wsigs.append(fu_dest_latch)
+
+        # here is where we create the Write Broadcast Bus. simple, eh?
+        comb += wport.data_i.eq(ortreereduce_sig(wsigs))
+        comb += wport.wen.eq(ortreereduce_sig(wens))
 
     def connect_wrports(self, m, fu_bitdict):
         """connect write ports
@@ -263,52 +375,18 @@ class NonProductionCore(Elaboratable):
         for regfile, spec in byregfiles_wr.items():
             fuspecs = byregfiles_wrspec[regfile]
             wrpickers[regfile] = {}
-            for (regname, fspec) in sort_fuspecs(fuspecs):
-                print ("connect wr", regname, fspec)
-                rpidx = regname
-                # get the regfile specs for this regfile port
-                (rf, read, write, wid, fuspec) = fspec
 
-                # select the required write port.  these are pre-defined sizes
-                print (regfile, regs.rf.keys())
-                wport = regs.rf[regfile.lower()].w_ports[rpidx]
+            # argh, more port-merging
+            if regfile == 'INT':
+                fuspecs['o'] = [fuspecs.pop('o')]
+                fuspecs['o'].append(fuspecs.pop('o1'))
+            if regfile == 'FAST':
+                fuspecs['fast1'] = [fuspecs.pop('fast1')]
+                fuspecs['fast1'].append(fuspecs.pop('fast2'))
 
-                # create a priority picker to manage this port
-                wrpickers[regfile][rpidx] = wrpick = PriorityPicker(len(fuspec))
-                setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
-
-                # connect the regspec write "reg select" number to this port
-                # only if one FU actually requests (and is granted) the port
-                # will the write-enable be activated
-                with m.If(wrpick.en_o):
-                    comb += wport.wen.eq(write)
-                with m.Else():
-                    comb += wport.wen.eq(0)
-
-                # connect up the FU req/go signals and the reg-read to the FU
-                # these are arbitrated by Data.ok signals
-                wsigs = []
-                for pi, (funame, fu, idx) in enumerate(fuspec):
-                    # write-request comes from dest.ok
-                    dest = fu.get_out(idx)
-                    fu_dest_latch = fu.get_fu_out(idx) # latched output
-                    name = "wrflag_%s_%s_%d" % (funame, regname, idx)
-                    wrflag = Signal(name=name, reset_less=True)
-                    comb += wrflag.eq(dest.ok & fu.busy_o)
-
-                    # connect request-read to picker input, and output to go-wr
-                    fu_active = fu_bitdict[funame]
-                    pick = fu.wr.rel[idx] & fu_active #& wrflag
-                    comb += wrpick.i[pi].eq(pick)
-                    comb += fu.go_wr_i[idx].eq(wrpick.o[pi] & wrpick.en_o)
-                    # connect regfile port to input
-                    print ("reg connect widths",
-                           regfile, regname, pi, funame,
-                           dest.shape(), wport.data_i.shape())
-                    wsigs.append(fu_dest_latch)
-
-                # here is where we create the Write Broadcast Bus. simple, eh?
-                comb += wport.data_i.eq(ortreereduce_sig(wsigs))
+            for (regname, fspec) in sort_fuspecs(fuspecs):
+                self.connect_wrport(m, fu_bitdict, wrpickers,
+                                        regfile, regname, fspec)
 
     def get_byregfiles(self, readmode):
 
@@ -321,13 +399,13 @@ class NonProductionCore(Elaboratable):
         byregfiles = {}
         byregfiles_spec = {}
         for (funame, fu) in fus.items():
-            print ("%s ports for %s" % (mode, funame))
+            print("%s ports for %s" % (mode, funame))
             for idx in range(fu.n_src if readmode else fu.n_dst):
                 if readmode:
                     (regfile, regname, wid) = fu.get_in_spec(idx)
                 else:
                     (regfile, regname, wid) = fu.get_out_spec(idx)
-                print ("    %d %s %s %s" % (idx, regfile, regname, str(wid)))
+                print("    %d %s %s %s" % (idx, regfile, regname, str(wid)))
                 if readmode:
                     rdflag, read = dec2.regspecmap_read(regfile, regname)
                     write = None
@@ -339,7 +417,7 @@ class NonProductionCore(Elaboratable):
                     byregfiles_spec[regfile] = {}
                 if regname not in byregfiles_spec[regfile]:
                     byregfiles_spec[regfile][regname] = \
-                                [rdflag, read, write, wid, []]
+                        (rdflag, read, write, wid, [])
                 # here we start to create "lanes"
                 if idx not in byregfiles[regfile]:
                     byregfiles[regfile][idx] = []
@@ -349,16 +427,16 @@ class NonProductionCore(Elaboratable):
 
         # ok just print that out, for convenience
         for regfile, spec in byregfiles.items():
-            print ("regfile %s ports:" % mode, regfile)
+            print("regfile %s ports:" % mode, regfile)
             fuspecs = byregfiles_spec[regfile]
             for regname, fspec in fuspecs.items():
                 [rdflag, read, write, wid, fuspec] = fspec
-                print ("  rf %s port %s lane: %s" % (mode, regfile, regname))
-                print ("  %s" % regname, wid, read, write, rdflag)
+                print("  rf %s port %s lane: %s" % (mode, regfile, regname))
+                print("  %s" % regname, wid, read, write, rdflag)
                 for (funame, fu, idx) in fuspec:
                     fusig = fu.src_i[idx] if readmode else fu.dest[idx]
-                    print ("    ", funame, fu, idx, fusig)
-                    print ()
+                    print("    ", funame, fu, idx, fusig)
+                    print()
 
         return byregfiles, byregfiles_spec