reducing regfile port usage by sharing read ports
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Tue, 11 Aug 2020 12:07:22 +0000 (13:07 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Tue, 11 Aug 2020 12:07:25 +0000 (13:07 +0100)
gets gate count down considerably

src/soc/regfile/regfiles.py
src/soc/simple/core.py
src/soc/simple/issuer.py

index 4fe98d3a3b14e8519f5e8a70add459ed9fc3a882..124fb03205f2c0c32819a9577a51cf53c642ddd5 100644 (file)
@@ -25,6 +25,7 @@ Links:
 from soc.regfile.regfile import RegFile, RegFileArray
 from soc.regfile.virtual_port import VirtualRegPort
 from soc.decoder.power_enums import SPR
+from nmigen import Memory, Elaboratable
 
 
 # Integer Regfile
@@ -41,8 +42,7 @@ class IntRegs(RegFileArray):
         self.w_ports = {'o': self.write_port("dest1"),
                         'o1': self.write_port("dest2")} # for now (LD/ST update)
         self.r_ports = {'ra': self.read_port("src1"),
-                        'rb': self.read_port("src2"),
-                        'rc': self.read_port("src3"),
+                        'rbc': self.read_port("src3"),
                         'dmi': self.read_port("dmi")} # needed for Debug (DMI)
 
 
@@ -78,7 +78,6 @@ class FastRegs(RegFileArray):
         self.r_ports = {'cia': self.read_port("cia"), # reading PC (issuer)
                         'msr': self.read_port("msr"), # reading MSR (issuer)
                         'fast1': self.read_port("src1"),
-                        'fast2': self.read_port("src2"),
                         }
 
 
@@ -127,7 +126,7 @@ class XERRegs(VirtualRegPort):
 
 
 # SPR Regfile
-class SPRRegs(RegFile):
+class SPRRegs(Memory, Elaboratable):
     """SPRRegs
 
     * QTY len(SPRs) 64-bit registers
@@ -137,9 +136,15 @@ class SPRRegs(RegFile):
     """
     def __init__(self):
         n_sprs = len(SPR)
-        super().__init__(64, n_sprs)
-        self.w_ports = {'spr1': self.write_port(name="dest")}
-        self.r_ports = {'spr1': self.read_port("src")}
+        super().__init__(width=64, depth=n_sprs)
+        self.w_ports = {'spr1': self.write_port()}
+        self.r_ports = {'spr1': self.read_port()}
+
+        self.w_ports['spr1'].wen = self.w_ports['spr1'].en
+        self.w_ports['spr1'].data_i = self.w_ports['spr1'].data
+
+        self.r_ports['spr1'].ren = self.w_ports['spr1'].en
+        self.r_ports['spr1'].data_o = self.w_ports['spr1'].data
 
 
 # class containing all regfiles: int, cr, xer, fast, spr
index 91663c7aaad56f67b7728c855c3f01a0c57343a5..70d5bb4adc244a60f4849c956b5f1da91f91a361 100644 (file)
@@ -19,7 +19,7 @@ and consequently it is safer to wait for the Function Unit to complete
 before allowing a new instruction to proceed.
 """
 
-from nmigen import Elaboratable, Module, Signal, ResetSignal
+from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat
 from nmigen.cli import rtlil
 
 from nmutil.picker import PriorityPicker
@@ -173,6 +173,71 @@ class NonProductionCore(Elaboratable):
 
         return fu_bitdict
 
+    def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
+        comb, sync = m.d.comb, m.d.sync
+        fus = self.fus.fus
+        regs = self.regs
+
+        rpidx = regname
+
+        # select the required read port.  these are pre-defined sizes
+        print(rpidx, regfile, regs.rf.keys())
+        rport = regs.rf[regfile.lower()].r_ports[rpidx]
+
+        fspecs = fspec
+        if not isinstance(fspecs, list):
+            fspecs = [fspecs]
+
+        rdflags = []
+        pplen = 0
+        reads = []
+        ppoffs = []
+        for i, fspec in enumerate(fspecs):
+            # get the regfile specs for this regfile port
+            (rf, read, write, wid, fuspec) = fspec
+            print ("fpsec", i, fspec, len(fuspec))
+            ppoffs.append(pplen) # record offset for picker
+            pplen += len(fuspec)
+            name = "rdflag_%s_%s_%d" % (regfile, regname, i)
+            rdflag = Signal(name=name, reset_less=True)
+            comb += rdflag.eq(rf)
+            rdflags.append(rdflag)
+            reads.append(read)
+
+        print ("pplen", pplen)
+
+        # create a priority picker to manage this port
+        rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
+        setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
+
+        for i, fspec in enumerate(fspecs):
+            (rf, read, write, wid, fuspec) = fspec
+            # connect up the FU req/go signals, and the reg-read to the FU
+            # and create a Read Broadcast Bus
+            for pi, (funame, fu, idx) in enumerate(fuspec):
+                pi += ppoffs[i]
+                src = fu.src_i[idx]
+
+                # connect request-read to picker input, and output to go-rd
+                fu_active = fu_bitdict[funame]
+                pick = Signal()
+                comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i])
+                print (pick, len(pick))
+                print (rdpick.i, len(rdpick.i), pi)
+                comb += rdpick.i[pi].eq(pick)
+                comb += fu.go_rd_i[idx].eq(rdpick.o[pi])
+
+                # if picked, select read-port "reg select" number to port
+                with m.If(rdpick.o[pi] & rdpick.en_o):
+                    comb += rport.ren.eq(reads[i])
+
+                    # connect regfile port to input, creating a Broadcast Bus
+                    print("reg connect widths",
+                          regfile, regname, pi, funame,
+                          src.shape(), rport.data_o.shape())
+                    # all FUs connect to same port
+                    comb += src.eq(rport.data_o)
+
     def connect_rdports(self, m, fu_bitdict):
         """connect read ports
 
@@ -194,47 +259,20 @@ class NonProductionCore(Elaboratable):
             fuspecs = byregfiles_rdspec[regfile]
             rdpickers[regfile] = {}
 
+            # argh.  an experiment to merge RA and RB in the INT regfile
+            # (we have too many read/write ports)
+            if regfile == 'INT':
+                fuspecs['rbc'] = [fuspecs.pop('rb')]
+                fuspecs['rbc'].append(fuspecs.pop('rc'))
+            if regfile == 'FAST':
+                fuspecs['fast1'] = [fuspecs.pop('fast1')]
+                fuspecs['fast1'].append(fuspecs.pop('fast2'))
+
             # for each named regfile port, connect up all FUs to that port
             for (regname, fspec) in sort_fuspecs(fuspecs):
                 print("connect rd", regname, fspec)
-                rpidx = regname
-                # get the regfile specs for this regfile port
-                (rf, read, write, wid, fuspec) = fspec
-                name = "rdflag_%s_%s" % (regfile, regname)
-                rdflag = Signal(name=name, reset_less=True)
-                comb += rdflag.eq(rf)
-
-                # select the required read port.  these are pre-defined sizes
-                print(rpidx, regfile, regs.rf.keys())
-                rport = regs.rf[regfile.lower()].r_ports[rpidx]
-
-                # create a priority picker to manage this port
-                rdpickers[regfile][rpidx] = rdpick = PriorityPicker(
-                    len(fuspec))
-                setattr(m.submodules, "rdpick_%s_%s" %
-                        (regfile, rpidx), rdpick)
-
-                # connect the regspec "reg select" number to this port
-                with m.If(rdpick.en_o):
-                    comb += rport.ren.eq(read)
-
-                # connect up the FU req/go signals, and the reg-read to the FU
-                # and create a Read Broadcast Bus
-                for pi, (funame, fu, idx) in enumerate(fuspec):
-                    src = fu.src_i[idx]
-
-                    # connect request-read to picker input, and output to go-rd
-                    fu_active = fu_bitdict[funame]
-                    pick = fu.rd_rel_o[idx] & fu_active & rdflag
-                    comb += rdpick.i[pi].eq(pick)
-                    comb += fu.go_rd_i[idx].eq(rdpick.o[pi])
-
-                    # connect regfile port to input, creating a Broadcast Bus
-                    print("reg connect widths",
-                          regfile, regname, pi, funame,
-                          src.shape(), rport.data_o.shape())
-                    # all FUs connect to same port
-                    comb += src.eq(rport.data_o)
+                self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
+                                       regname, fspec)
 
     def connect_wrports(self, m, fu_bitdict):
         """connect write ports
@@ -340,7 +378,7 @@ class NonProductionCore(Elaboratable):
                     byregfiles_spec[regfile] = {}
                 if regname not in byregfiles_spec[regfile]:
                     byregfiles_spec[regfile][regname] = \
-                        [rdflag, read, write, wid, []]
+                        (rdflag, read, write, wid, [])
                 # here we start to create "lanes"
                 if idx not in byregfiles[regfile]:
                     byregfiles[regfile][idx] = []
index d93e3782b599dc4b17c98bbdaee0b3198e016f88..558ee51ed92c1727fcc2fdac1fefd419cc09e5e6 100644 (file)
@@ -264,7 +264,8 @@ if __name__ == '__main__':
              'spr': 1,
              'div': 1,
              'mul': 1,
-             'shiftrot': 1}
+             'shiftrot': 1
+            }
     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
                          imem_ifacetype='bare_wb',
                          addr_wid=48,