massive reduction in gate count by using alternative read/write port mux
[soc.git] / src / soc / simple / core.py
index 91663c7aaad56f67b7728c855c3f01a0c57343a5..71a9324387dd3f41a36af14462eeb001a67925d6 100644 (file)
@@ -19,7 +19,7 @@ and consequently it is safer to wait for the Function Unit to complete
 before allowing a new instruction to proceed.
 """
 
-from nmigen import Elaboratable, Module, Signal, ResetSignal
+from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
 from nmigen.cli import rtlil
 
 from nmutil.picker import PriorityPicker
@@ -173,6 +173,77 @@ class NonProductionCore(Elaboratable):
 
         return fu_bitdict
 
+    def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
+        comb, sync = m.d.comb, m.d.sync
+        fus = self.fus.fus
+        regs = self.regs
+
+        rpidx = regname
+
+        # select the required read port.  these are pre-defined sizes
+        print(rpidx, regfile, regs.rf.keys())
+        rport = regs.rf[regfile.lower()].r_ports[rpidx]
+
+        fspecs = fspec
+        if not isinstance(fspecs, list):
+            fspecs = [fspecs]
+
+        rdflags = []
+        pplen = 0
+        reads = []
+        ppoffs = []
+        for i, fspec in enumerate(fspecs):
+            # get the regfile specs for this regfile port
+            (rf, read, write, wid, fuspec) = fspec
+            print ("fpsec", i, fspec, len(fuspec))
+            ppoffs.append(pplen) # record offset for picker
+            pplen += len(fuspec)
+            name = "rdflag_%s_%s_%d" % (regfile, regname, i)
+            rdflag = Signal(name=name, reset_less=True)
+            comb += rdflag.eq(rf)
+            rdflags.append(rdflag)
+            reads.append(read)
+
+        print ("pplen", pplen)
+
+        # create a priority picker to manage this port
+        rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
+        setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
+
+        rens = []
+        for i, fspec in enumerate(fspecs):
+            (rf, read, write, wid, fuspec) = fspec
+            # connect up the FU req/go signals, and the reg-read to the FU
+            # and create a Read Broadcast Bus
+            for pi, (funame, fu, idx) in enumerate(fuspec):
+                pi += ppoffs[i]
+                src = fu.src_i[idx]
+
+                # connect request-read to picker input, and output to go-rd
+                fu_active = fu_bitdict[funame]
+                pick = Signal()
+                comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i])
+                print (pick, len(pick))
+                print (rdpick.i, len(rdpick.i), pi)
+                comb += rdpick.i[pi].eq(pick)
+                comb += fu.go_rd_i[idx].eq(rdpick.o[pi])
+
+                # if picked, select read-port "reg select" number to port
+                read_en = Signal.like(reads[i])
+                comb += read_en.eq(Mux(rdpick.o[pi] & rdpick.en_o, reads[i], 0))
+                rens.append(read_en)
+
+                with m.If(rdpick.o[pi] & rdpick.en_o):
+                    # connect regfile port to input, creating a Broadcast Bus
+                    print("reg connect widths",
+                          regfile, regname, pi, funame,
+                          src.shape(), rport.data_o.shape())
+                    # all FUs connect to same port
+                    comb += src.eq(rport.data_o)
+
+        # or-reduce the muxed read signals
+        comb += rport.ren.eq(ortreereduce_sig(rens))
+
     def connect_rdports(self, m, fu_bitdict):
         """connect read ports
 
@@ -194,47 +265,92 @@ class NonProductionCore(Elaboratable):
             fuspecs = byregfiles_rdspec[regfile]
             rdpickers[regfile] = {}
 
+            # argh.  an experiment to merge RA and RB in the INT regfile
+            # (we have too many read/write ports)
+            if regfile == 'INT':
+                fuspecs['rbc'] = [fuspecs.pop('rb')]
+                fuspecs['rbc'].append(fuspecs.pop('rc'))
+            if regfile == 'FAST':
+                fuspecs['fast1'] = [fuspecs.pop('fast1')]
+                fuspecs['fast1'].append(fuspecs.pop('fast2'))
+
             # for each named regfile port, connect up all FUs to that port
             for (regname, fspec) in sort_fuspecs(fuspecs):
                 print("connect rd", regname, fspec)
-                rpidx = regname
-                # get the regfile specs for this regfile port
-                (rf, read, write, wid, fuspec) = fspec
-                name = "rdflag_%s_%s" % (regfile, regname)
-                rdflag = Signal(name=name, reset_less=True)
-                comb += rdflag.eq(rf)
-
-                # select the required read port.  these are pre-defined sizes
-                print(rpidx, regfile, regs.rf.keys())
-                rport = regs.rf[regfile.lower()].r_ports[rpidx]
-
-                # create a priority picker to manage this port
-                rdpickers[regfile][rpidx] = rdpick = PriorityPicker(
-                    len(fuspec))
-                setattr(m.submodules, "rdpick_%s_%s" %
-                        (regfile, rpidx), rdpick)
-
-                # connect the regspec "reg select" number to this port
-                with m.If(rdpick.en_o):
-                    comb += rport.ren.eq(read)
-
-                # connect up the FU req/go signals, and the reg-read to the FU
-                # and create a Read Broadcast Bus
-                for pi, (funame, fu, idx) in enumerate(fuspec):
-                    src = fu.src_i[idx]
-
-                    # connect request-read to picker input, and output to go-rd
-                    fu_active = fu_bitdict[funame]
-                    pick = fu.rd_rel_o[idx] & fu_active & rdflag
-                    comb += rdpick.i[pi].eq(pick)
-                    comb += fu.go_rd_i[idx].eq(rdpick.o[pi])
+                self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
+                                       regname, fspec)
 
-                    # connect regfile port to input, creating a Broadcast Bus
-                    print("reg connect widths",
-                          regfile, regname, pi, funame,
-                          src.shape(), rport.data_o.shape())
-                    # all FUs connect to same port
-                    comb += src.eq(rport.data_o)
+    def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec):
+        comb, sync = m.d.comb, m.d.sync
+        fus = self.fus.fus
+        regs = self.regs
+
+        print("connect wr", regname, fspec)
+        rpidx = regname
+
+        # select the required write port.  these are pre-defined sizes
+        print(regfile, regs.rf.keys())
+        wport = regs.rf[regfile.lower()].w_ports[rpidx]
+
+        fspecs = fspec
+        if not isinstance(fspecs, list):
+            fspecs = [fspecs]
+
+        pplen = 0
+        writes = []
+        ppoffs = []
+        for i, fspec in enumerate(fspecs):
+            # get the regfile specs for this regfile port
+            (rf, read, write, wid, fuspec) = fspec
+            print ("fpsec", i, fspec, len(fuspec))
+            ppoffs.append(pplen) # record offset for picker
+            pplen += len(fuspec)
+
+        # create a priority picker to manage this port
+        wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
+        setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
+
+        wsigs = []
+        wens = []
+        for i, fspec in enumerate(fspecs):
+            # connect up the FU req/go signals and the reg-read to the FU
+            # these are arbitrated by Data.ok signals
+            (rf, read, write, wid, fuspec) = fspec
+            for pi, (funame, fu, idx) in enumerate(fuspec):
+                pi += ppoffs[i]
+
+                # write-request comes from dest.ok
+                dest = fu.get_out(idx)
+                fu_dest_latch = fu.get_fu_out(idx)  # latched output
+                name = "wrflag_%s_%s_%d" % (funame, regname, idx)
+                wrflag = Signal(name=name, reset_less=True)
+                comb += wrflag.eq(dest.ok & fu.busy_o)
+
+                # connect request-write to picker input, and output to go-wr
+                fu_active = fu_bitdict[funame]
+                pick = fu.wr.rel_o[idx] & fu_active  # & wrflag
+                comb += wrpick.i[pi].eq(pick)
+                # create a single-pulse go write from the picker output
+                wr_pick = Signal()
+                comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
+                comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
+
+                # connect the regspec write "reg select" number to this port
+                # only if one FU actually requests (and is granted) the port
+                # will the write-enable be activated
+                write_en = Signal.like(write)
+                comb += write_en.eq(Mux(wr_pick & wrpick.en_o, write, 0))
+                wens.append(write_en)
+
+                # connect regfile port to input
+                print("reg connect widths",
+                      regfile, regname, pi, funame,
+                      dest.shape(), wport.data_i.shape())
+                wsigs.append(fu_dest_latch)
+
+        # here is where we create the Write Broadcast Bus. simple, eh?
+        comb += wport.data_i.eq(ortreereduce_sig(wsigs))
+        comb += wport.wen.eq(ortreereduce_sig(wens))
 
     def connect_wrports(self, m, fu_bitdict):
         """connect write ports
@@ -259,57 +375,18 @@ class NonProductionCore(Elaboratable):
         for regfile, spec in byregfiles_wr.items():
             fuspecs = byregfiles_wrspec[regfile]
             wrpickers[regfile] = {}
-            for (regname, fspec) in sort_fuspecs(fuspecs):
-                print("connect wr", regname, fspec)
-                rpidx = regname
-                # get the regfile specs for this regfile port
-                (rf, read, write, wid, fuspec) = fspec
-
-                # select the required write port.  these are pre-defined sizes
-                print(regfile, regs.rf.keys())
-                wport = regs.rf[regfile.lower()].w_ports[rpidx]
 
-                # create a priority picker to manage this port
-                wrpickers[regfile][rpidx] = wrpick = PriorityPicker(
-                    len(fuspec))
-                setattr(m.submodules, "wrpick_%s_%s" %
-                        (regfile, rpidx), wrpick)
+            # argh, more port-merging
+            if regfile == 'INT':
+                fuspecs['o'] = [fuspecs.pop('o')]
+                fuspecs['o'].append(fuspecs.pop('o1'))
+            if regfile == 'FAST':
+                fuspecs['fast1'] = [fuspecs.pop('fast1')]
+                fuspecs['fast1'].append(fuspecs.pop('fast2'))
 
-                # connect the regspec write "reg select" number to this port
-                # only if one FU actually requests (and is granted) the port
-                # will the write-enable be activated
-                with m.If(wrpick.en_o):
-                    comb += wport.wen.eq(write)
-                with m.Else():
-                    comb += wport.wen.eq(0)
-
-                # connect up the FU req/go signals and the reg-read to the FU
-                # these are arbitrated by Data.ok signals
-                wsigs = []
-                for pi, (funame, fu, idx) in enumerate(fuspec):
-                    # write-request comes from dest.ok
-                    dest = fu.get_out(idx)
-                    fu_dest_latch = fu.get_fu_out(idx)  # latched output
-                    name = "wrflag_%s_%s_%d" % (funame, regname, idx)
-                    wrflag = Signal(name=name, reset_less=True)
-                    comb += wrflag.eq(dest.ok & fu.busy_o)
-
-                    # connect request-write to picker input, and output to go-wr
-                    fu_active = fu_bitdict[funame]
-                    pick = fu.wr.rel_o[idx] & fu_active  # & wrflag
-                    comb += wrpick.i[pi].eq(pick)
-                    # create a single-pulse go write from the picker output
-                    wr_pick = Signal()
-                    comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
-                    comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
-                    # connect regfile port to input
-                    print("reg connect widths",
-                          regfile, regname, pi, funame,
-                          dest.shape(), wport.data_i.shape())
-                    wsigs.append(fu_dest_latch)
-
-                # here is where we create the Write Broadcast Bus. simple, eh?
-                comb += wport.data_i.eq(ortreereduce_sig(wsigs))
+            for (regname, fspec) in sort_fuspecs(fuspecs):
+                self.connect_wrport(m, fu_bitdict, wrpickers,
+                                        regfile, regname, fspec)
 
     def get_byregfiles(self, readmode):
 
@@ -340,7 +417,7 @@ class NonProductionCore(Elaboratable):
                     byregfiles_spec[regfile] = {}
                 if regname not in byregfiles_spec[regfile]:
                     byregfiles_spec[regfile][regname] = \
-                        [rdflag, read, write, wid, []]
+                        (rdflag, read, write, wid, [])
                 # here we start to create "lanes"
                 if idx not in byregfiles[regfile]:
                     byregfiles[regfile][idx] = []