pass SPR MicroOp to MMU function unit
[soc.git] / src / soc / simple / core.py
index 4c92fa7f8480ebfcb6b7a0dcc9fdb4c4cf43f04b..d3a66bb1e9916c50277b1d5b1dc2a859707fc90b 100644 (file)
@@ -19,22 +19,31 @@ and consequently it is safer to wait for the Function Unit to complete
 before allowing a new instruction to proceed.
 """
 
 before allowing a new instruction to proceed.
 """
 
-from nmigen import Elaboratable, Module, Signal
+from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
 from nmigen.cli import rtlil
 
 from nmigen.cli import rtlil
 
+from soc.decoder.power_decoder2 import PowerDecodeSubset
+from soc.decoder.power_regspec_map import regspec_decode_read
+from soc.decoder.power_regspec_map import regspec_decode_write
+
 from nmutil.picker import PriorityPicker
 from nmutil.util import treereduce
 
 from soc.fu.compunits.compunits import AllFunctionUnits
 from soc.regfile.regfiles import RegFiles
 from nmutil.picker import PriorityPicker
 from nmutil.util import treereduce
 
 from soc.fu.compunits.compunits import AllFunctionUnits
 from soc.regfile.regfiles import RegFiles
-from soc.decoder.power_decoder import create_pdecode
-from soc.decoder.power_decoder2 import PowerDecode2
+from soc.decoder.decode2execute1 import Decode2ToExecute1Type
+from soc.decoder.decode2execute1 import IssuerDecode2ToOperand
+from soc.decoder.power_decoder2 import get_rdflags
 from soc.decoder.decode2execute1 import Data
 from soc.experiment.l0_cache import TstL0CacheBuffer  # test only
 from soc.config.test.test_loadstore import TestMemPspec
 from soc.decoder.power_enums import MicrOp
 from soc.decoder.decode2execute1 import Data
 from soc.experiment.l0_cache import TstL0CacheBuffer  # test only
 from soc.config.test.test_loadstore import TestMemPspec
 from soc.decoder.power_enums import MicrOp
+from soc.config.state import CoreState
+
 import operator
 
 import operator
 
+from nmutil.util import rising_edge
+
 
 # helper function for reducing a list of signals down to a parallel
 # ORed single signal.
 
 # helper function for reducing a list of signals down to a parallel
 # ORed single signal.
@@ -60,62 +69,86 @@ def sort_fuspecs(fuspecs):
 
 class NonProductionCore(Elaboratable):
     def __init__(self, pspec):
 
 class NonProductionCore(Elaboratable):
     def __init__(self, pspec):
+        self.pspec = pspec
+
         # single LD/ST funnel for memory access
         self.l0 = TstL0CacheBuffer(pspec, n_units=1)
         pi = self.l0.l0.dports[0]
 
         # function units (only one each)
         # single LD/ST funnel for memory access
         self.l0 = TstL0CacheBuffer(pspec, n_units=1)
         pi = self.l0.l0.dports[0]
 
         # function units (only one each)
+        # only include mmu if enabled in pspec
         self.fus = AllFunctionUnits(pspec, pilist=[pi])
 
         # register files (yes plural)
         self.regs = RegFiles()
 
         self.fus = AllFunctionUnits(pspec, pilist=[pi])
 
         # register files (yes plural)
         self.regs = RegFiles()
 
-        # instruction decoder
-        pdecode = create_pdecode()
-        self.pdecode2 = PowerDecode2(pdecode)   # instruction decoder
+        # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
+        self.e = Decode2ToExecute1Type("core", opkls=IssuerDecode2ToOperand)
+
+        self.state = CoreState("core")
+        self.raw_insn_i = Signal(32) # raw instruction
+        self.bigendian_i = Signal() # bigendian
 
         # issue/valid/busy signalling
 
         # issue/valid/busy signalling
-        self.ivalid_i = self.pdecode2.valid   # instruction is valid
+        self.ivalid_i = Signal(reset_less=True) # instruction is valid
         self.issue_i = Signal(reset_less=True)
         self.busy_o = Signal(name="corebusy_o", reset_less=True)
 
         self.issue_i = Signal(reset_less=True)
         self.busy_o = Signal(name="corebusy_o", reset_less=True)
 
-        # instruction input
-        self.bigendian_i = self.pdecode2.dec.bigendian
-        self.raw_opcode_i = self.pdecode2.dec.raw_opcode_in
-
         # start/stop and terminated signalling
         # start/stop and terminated signalling
-        self.core_start_i = Signal(reset_less=True)
-        self.core_stop_i = Signal(reset_less=True)
-        self.core_terminated_o = Signal(reset=0)  # indicates stopped
+        self.core_stopped_i = Signal(reset_less=True)
+        self.core_terminate_o = Signal(reset=0)  # indicates stopped
+
+        # create per-FU instruction decoders (subsetted)
+        self.decoders = {}
+        self.des = {}
+
+        for funame, fu in self.fus.fus.items():
+            f_name = fu.fnunit.name
+            fnunit = fu.fnunit.value
+            opkls = fu.opsubsetkls
+            if f_name == 'TRAP':
+                self.trapunit = funame
+                continue
+            self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
+                                                      final=True,
+                                                      state=self.state)
+            self.des[funame] = self.decoders[funame].do
+
+        if "mmu0" in self.decoders:
+            self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"]
 
     def elaborate(self, platform):
         m = Module()
 
     def elaborate(self, platform):
         m = Module()
+        # for testing purposes, to cut down on build time in coriolis2
+        if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
+            x = Signal() # dummy signal
+            m.d.sync += x.eq(~x)
+            return m
+        comb = m.d.comb
 
 
-        m.submodules.pdecode2 = dec2 = self.pdecode2
         m.submodules.fus = self.fus
         m.submodules.l0 = l0 = self.l0
         self.regs.elaborate_into(m, platform)
         regs = self.regs
         fus = self.fus.fus
 
         m.submodules.fus = self.fus
         m.submodules.l0 = l0 = self.l0
         self.regs.elaborate_into(m, platform)
         regs = self.regs
         fus = self.fus.fus
 
-        # core start/stopped state
-        core_stopped = Signal(reset=0) # begins in running state
+        # connect decoders
+        for k, v in self.decoders.items():
+            setattr(m.submodules, "dec_%s" % v.fn_name, v)
+            comb += v.dec.raw_opcode_in.eq(self.raw_insn_i)
+            comb += v.dec.bigendian.eq(self.bigendian_i)
 
 
-        # start/stop signalling
-        with m.If(self.core_start_i):
-            m.d.sync += core_stopped.eq(0)
-        with m.If(self.core_stop_i):
-            m.d.sync += core_stopped.eq(1)
-        m.d.comb += self.core_terminated_o.eq(core_stopped)
+        # ssh, cheat: trap uses the main decoder because of the rewriting
+        self.des[self.trapunit] = self.e.do
 
         # connect up Function Units, then read/write ports
 
         # connect up Function Units, then read/write ports
-        fu_bitdict = self.connect_instruction(m, core_stopped)
+        fu_bitdict = self.connect_instruction(m)
         self.connect_rdports(m, fu_bitdict)
         self.connect_wrports(m, fu_bitdict)
 
         return m
 
         self.connect_rdports(m, fu_bitdict)
         self.connect_wrports(m, fu_bitdict)
 
         return m
 
-    def connect_instruction(self, m, core_stopped):
+    def connect_instruction(self, m):
         """connect_instruction
 
         uses decoded (from PowerOp) function unit information from CSV files
         """connect_instruction
 
         uses decoded (from PowerOp) function unit information from CSV files
@@ -129,16 +162,12 @@ class NonProductionCore(Elaboratable):
         """
         comb, sync = m.d.comb, m.d.sync
         fus = self.fus.fus
         """
         comb, sync = m.d.comb, m.d.sync
         fus = self.fus.fus
-        dec2 = self.pdecode2
 
         # enable-signals for each FU, get one bit for each FU (by name)
         fu_enable = Signal(len(fus), reset_less=True)
         fu_bitdict = {}
         for i, funame in enumerate(fus.keys()):
             fu_bitdict[funame] = fu_enable[i]
 
         # enable-signals for each FU, get one bit for each FU (by name)
         fu_enable = Signal(len(fus), reset_less=True)
         fu_bitdict = {}
         for i, funame in enumerate(fus.keys()):
             fu_bitdict[funame] = fu_enable[i]
-        # only run when allowed and when instruction is valid
-        can_run = Signal(reset_less=True)
-        comb += can_run.eq(self.ivalid_i & ~core_stopped)
 
         # enable the required Function Unit based on the opcode decode
         # note: this *only* works correctly for simple core when one and
 
         # enable the required Function Unit based on the opcode decode
         # note: this *only* works correctly for simple core when one and
@@ -146,7 +175,7 @@ class NonProductionCore(Elaboratable):
         for funame, fu in fus.items():
             fnunit = fu.fnunit.value
             enable = Signal(name="en_%s" % funame, reset_less=True)
         for funame, fu in fus.items():
             fnunit = fu.fnunit.value
             enable = Signal(name="en_%s" % funame, reset_less=True)
-            comb += enable.eq((dec2.e.do.fn_unit & fnunit).bool())
+            comb += enable.eq((self.e.do.fn_unit & fnunit).bool())
             comb += fu_bitdict[funame].eq(enable)
 
         # sigh - need a NOP counter
             comb += fu_bitdict[funame].eq(enable)
 
         # sigh - need a NOP counter
@@ -155,11 +184,11 @@ class NonProductionCore(Elaboratable):
             sync += counter.eq(counter - 1)
             comb += self.busy_o.eq(1)
 
             sync += counter.eq(counter - 1)
             comb += self.busy_o.eq(1)
 
-        with m.If(can_run):
-            with m.Switch(dec2.e.do.insn_type):
+        with m.If(self.ivalid_i): # run only when valid
+            with m.Switch(self.e.do.insn_type):
                 # check for ATTN: halt if true
                 with m.Case(MicrOp.OP_ATTN):
                 # check for ATTN: halt if true
                 with m.Case(MicrOp.OP_ATTN):
-                    m.d.sync += core_stopped.eq(1)
+                    m.d.sync += self.core_terminate_o.eq(1)
 
                 with m.Case(MicrOp.OP_NOP):
                     sync += counter.eq(2)
 
                 with m.Case(MicrOp.OP_NOP):
                     sync += counter.eq(2)
@@ -168,19 +197,119 @@ class NonProductionCore(Elaboratable):
                 with m.Default():
                     # connect up instructions.  only one enabled at a time
                     for funame, fu in fus.items():
                 with m.Default():
                     # connect up instructions.  only one enabled at a time
                     for funame, fu in fus.items():
+                        do = self.des[funame]
                         enable = fu_bitdict[funame]
 
                         # run this FunctionUnit if enabled
                         enable = fu_bitdict[funame]
 
                         # run this FunctionUnit if enabled
+                        # route op, issue, busy, read flags and mask to FU
                         with m.If(enable):
                         with m.If(enable):
-                            # route op, issue, busy, read flags and mask to FU
-                            comb += fu.oper_i.eq_from_execute1(dec2.e)
+                            # operand comes from the *local*  decoder
+                            comb += fu.oper_i.eq_from(do)
+                            #comb += fu.oper_i.eq_from_execute1(e)
                             comb += fu.issue_i.eq(self.issue_i)
                             comb += self.busy_o.eq(fu.busy_o)
                             comb += fu.issue_i.eq(self.issue_i)
                             comb += self.busy_o.eq(fu.busy_o)
-                            rdmask = dec2.rdflags(fu)
+                            # rdmask, which is for registers, needs to come
+                            # from the *main* decoder
+                            rdmask = get_rdflags(self.e, fu)
                             comb += fu.rdmaskn.eq(~rdmask)
 
         return fu_bitdict
 
                             comb += fu.rdmaskn.eq(~rdmask)
 
         return fu_bitdict
 
+    def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
+        comb, sync = m.d.comb, m.d.sync
+        fus = self.fus.fus
+        regs = self.regs
+
+        rpidx = regname
+
+        # select the required read port.  these are pre-defined sizes
+        rfile = regs.rf[regfile.lower()]
+        rport = rfile.r_ports[rpidx]
+        print("read regfile", rpidx, regfile, regs.rf.keys(),
+                              rfile, rfile.unary)
+
+        fspecs = fspec
+        if not isinstance(fspecs, list):
+            fspecs = [fspecs]
+
+        rdflags = []
+        pplen = 0
+        reads = []
+        ppoffs = []
+        for i, fspec in enumerate(fspecs):
+            # get the regfile specs for this regfile port
+            (rf, read, write, wid, fuspec) = fspec
+            print ("fpsec", i, fspec, len(fuspec))
+            ppoffs.append(pplen) # record offset for picker
+            pplen += len(fuspec)
+            name = "rdflag_%s_%s_%d" % (regfile, regname, i)
+            rdflag = Signal(name=name, reset_less=True)
+            comb += rdflag.eq(rf)
+            rdflags.append(rdflag)
+            reads.append(read)
+
+        print ("pplen", pplen)
+
+        # create a priority picker to manage this port
+        rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
+        setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
+
+        rens = []
+        addrs = []
+        for i, fspec in enumerate(fspecs):
+            (rf, read, write, wid, fuspec) = fspec
+            # connect up the FU req/go signals, and the reg-read to the FU
+            # and create a Read Broadcast Bus
+            for pi, (funame, fu, idx) in enumerate(fuspec):
+                pi += ppoffs[i]
+
+                # connect request-read to picker input, and output to go-rd
+                fu_active = fu_bitdict[funame]
+                name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
+                addr_en = Signal.like(reads[i], name="addr_en_"+name)
+                pick = Signal(name="pick_"+name)     # picker input
+                rp = Signal(name="rp_"+name)         # picker output
+                delay_pick = Signal(name="dp_"+name) # read-enable "underway"
+
+                # exclude any currently-enabled read-request (mask out active)
+                comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
+                                ~delay_pick)
+                comb += rdpick.i[pi].eq(pick)
+                comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
+
+                # if picked, select read-port "reg select" number to port
+                comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
+                sync += delay_pick.eq(rp) # delayed "pick"
+                comb += addr_en.eq(Mux(rp, reads[i], 0))
+
+                # the read-enable happens combinatorially (see mux-bus below)
+                # but it results in the data coming out on a one-cycle delay.
+                if rfile.unary:
+                    rens.append(addr_en)
+                else:
+                    addrs.append(addr_en)
+                    rens.append(rp)
+
+                # use the *delayed* pick signal to put requested data onto bus
+                with m.If(delay_pick):
+                    # connect regfile port to input, creating fan-out Bus
+                    src = fu.src_i[idx]
+                    print("reg connect widths",
+                          regfile, regname, pi, funame,
+                          src.shape(), rport.data_o.shape())
+                    # all FUs connect to same port
+                    comb += src.eq(rport.data_o)
+
+        # or-reduce the muxed read signals
+        if rfile.unary:
+            # for unary-addressed
+            comb += rport.ren.eq(ortreereduce_sig(rens))
+        else:
+            # for binary-addressed
+            comb += rport.addr.eq(ortreereduce_sig(addrs))
+            comb += rport.ren.eq(Cat(*rens).bool())
+            print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
+
     def connect_rdports(self, m, fu_bitdict):
         """connect read ports
 
     def connect_rdports(self, m, fu_bitdict):
         """connect read ports
 
@@ -202,47 +331,108 @@ class NonProductionCore(Elaboratable):
             fuspecs = byregfiles_rdspec[regfile]
             rdpickers[regfile] = {}
 
             fuspecs = byregfiles_rdspec[regfile]
             rdpickers[regfile] = {}
 
+            # argh.  an experiment to merge RA and RB in the INT regfile
+            # (we have too many read/write ports)
+            #if regfile == 'INT':
+                #fuspecs['rabc'] = [fuspecs.pop('rb')]
+                #fuspecs['rabc'].append(fuspecs.pop('rc'))
+                #fuspecs['rabc'].append(fuspecs.pop('ra'))
+            #if regfile == 'FAST':
+            #    fuspecs['fast1'] = [fuspecs.pop('fast1')]
+            #    if 'fast2' in fuspecs:
+            #        fuspecs['fast1'].append(fuspecs.pop('fast2'))
+
             # for each named regfile port, connect up all FUs to that port
             for (regname, fspec) in sort_fuspecs(fuspecs):
                 print("connect rd", regname, fspec)
             # for each named regfile port, connect up all FUs to that port
             for (regname, fspec) in sort_fuspecs(fuspecs):
                 print("connect rd", regname, fspec)
-                rpidx = regname
-                # get the regfile specs for this regfile port
-                (rf, read, write, wid, fuspec) = fspec
-                name = "rdflag_%s_%s" % (regfile, regname)
-                rdflag = Signal(name=name, reset_less=True)
-                comb += rdflag.eq(rf)
-
-                # select the required read port.  these are pre-defined sizes
-                print(rpidx, regfile, regs.rf.keys())
-                rport = regs.rf[regfile.lower()].r_ports[rpidx]
-
-                # create a priority picker to manage this port
-                rdpickers[regfile][rpidx] = rdpick = PriorityPicker(
-                    len(fuspec))
-                setattr(m.submodules, "rdpick_%s_%s" %
-                        (regfile, rpidx), rdpick)
-
-                # connect the regspec "reg select" number to this port
-                with m.If(rdpick.en_o):
-                    comb += rport.ren.eq(read)
-
-                # connect up the FU req/go signals, and the reg-read to the FU
-                # and create a Read Broadcast Bus
-                for pi, (funame, fu, idx) in enumerate(fuspec):
-                    src = fu.src_i[idx]
+                self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
+                                       regname, fspec)
 
 
-                    # connect request-read to picker input, and output to go-rd
-                    fu_active = fu_bitdict[funame]
-                    pick = fu.rd_rel_o[idx] & fu_active & rdflag
-                    comb += rdpick.i[pi].eq(pick)
-                    comb += fu.go_rd_i[idx].eq(rdpick.o[pi])
+    def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec):
+        comb, sync = m.d.comb, m.d.sync
+        fus = self.fus.fus
+        regs = self.regs
 
 
-                    # connect regfile port to input, creating a Broadcast Bus
-                    print("reg connect widths",
-                          regfile, regname, pi, funame,
-                          src.shape(), rport.data_o.shape())
-                    # all FUs connect to same port
-                    comb += src.eq(rport.data_o)
+        print("connect wr", regname, fspec)
+        rpidx = regname
+
+        # select the required write port.  these are pre-defined sizes
+        print(regfile, regs.rf.keys())
+        rfile = regs.rf[regfile.lower()]
+        wport = rfile.w_ports[rpidx]
+
+        fspecs = fspec
+        if not isinstance(fspecs, list):
+            fspecs = [fspecs]
+
+        pplen = 0
+        writes = []
+        ppoffs = []
+        for i, fspec in enumerate(fspecs):
+            # get the regfile specs for this regfile port
+            (rf, read, write, wid, fuspec) = fspec
+            print ("fpsec", i, fspec, len(fuspec))
+            ppoffs.append(pplen) # record offset for picker
+            pplen += len(fuspec)
+
+        # create a priority picker to manage this port
+        wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
+        setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
+
+        wsigs = []
+        wens = []
+        addrs = []
+        for i, fspec in enumerate(fspecs):
+            # connect up the FU req/go signals and the reg-read to the FU
+            # these are arbitrated by Data.ok signals
+            (rf, read, write, wid, fuspec) = fspec
+            for pi, (funame, fu, idx) in enumerate(fuspec):
+                pi += ppoffs[i]
+
+                # write-request comes from dest.ok
+                dest = fu.get_out(idx)
+                fu_dest_latch = fu.get_fu_out(idx)  # latched output
+                name = "wrflag_%s_%s_%d" % (funame, regname, idx)
+                wrflag = Signal(name=name, reset_less=True)
+                comb += wrflag.eq(dest.ok & fu.busy_o)
+
+                # connect request-write to picker input, and output to go-wr
+                fu_active = fu_bitdict[funame]
+                pick = fu.wr.rel_o[idx] & fu_active  # & wrflag
+                comb += wrpick.i[pi].eq(pick)
+                # create a single-pulse go write from the picker output
+                wr_pick = Signal()
+                comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
+                comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
+
+                # connect the regspec write "reg select" number to this port
+                # only if one FU actually requests (and is granted) the port
+                # will the write-enable be activated
+                addr_en = Signal.like(write)
+                wp = Signal()
+                comb += wp.eq(wr_pick & wrpick.en_o)
+                comb += addr_en.eq(Mux(wp, write, 0))
+                if rfile.unary:
+                    wens.append(addr_en)
+                else:
+                    addrs.append(addr_en)
+                    wens.append(wp)
+
+                # connect regfile port to input
+                print("reg connect widths",
+                      regfile, regname, pi, funame,
+                      dest.shape(), wport.data_i.shape())
+                wsigs.append(fu_dest_latch)
+
+        # here is where we create the Write Broadcast Bus. simple, eh?
+        comb += wport.data_i.eq(ortreereduce_sig(wsigs))
+        if rfile.unary:
+            # for unary-addressed
+            comb += wport.wen.eq(ortreereduce_sig(wens))
+        else:
+            # for binary-addressed
+            comb += wport.addr.eq(ortreereduce_sig(addrs))
+            comb += wport.wen.eq(ortreereduce_sig(wens))
 
     def connect_wrports(self, m, fu_bitdict):
         """connect write ports
 
     def connect_wrports(self, m, fu_bitdict):
         """connect write ports
@@ -267,61 +457,26 @@ class NonProductionCore(Elaboratable):
         for regfile, spec in byregfiles_wr.items():
             fuspecs = byregfiles_wrspec[regfile]
             wrpickers[regfile] = {}
         for regfile, spec in byregfiles_wr.items():
             fuspecs = byregfiles_wrspec[regfile]
             wrpickers[regfile] = {}
-            for (regname, fspec) in sort_fuspecs(fuspecs):
-                print("connect wr", regname, fspec)
-                rpidx = regname
-                # get the regfile specs for this regfile port
-                (rf, read, write, wid, fuspec) = fspec
 
 
-                # select the required write port.  these are pre-defined sizes
-                print(regfile, regs.rf.keys())
-                wport = regs.rf[regfile.lower()].w_ports[rpidx]
+            # argh, more port-merging
+            if regfile == 'INT':
+                fuspecs['o'] = [fuspecs.pop('o')]
+                fuspecs['o'].append(fuspecs.pop('o1'))
+            if regfile == 'FAST':
+                fuspecs['fast1'] = [fuspecs.pop('fast1')]
+                if 'fast2' in fuspecs:
+                    fuspecs['fast1'].append(fuspecs.pop('fast2'))
 
 
-                # create a priority picker to manage this port
-                wrpickers[regfile][rpidx] = wrpick = PriorityPicker(
-                    len(fuspec))
-                setattr(m.submodules, "wrpick_%s_%s" %
-                        (regfile, rpidx), wrpick)
-
-                # connect the regspec write "reg select" number to this port
-                # only if one FU actually requests (and is granted) the port
-                # will the write-enable be activated
-                with m.If(wrpick.en_o):
-                    comb += wport.wen.eq(write)
-                with m.Else():
-                    comb += wport.wen.eq(0)
-
-                # connect up the FU req/go signals and the reg-read to the FU
-                # these are arbitrated by Data.ok signals
-                wsigs = []
-                for pi, (funame, fu, idx) in enumerate(fuspec):
-                    # write-request comes from dest.ok
-                    dest = fu.get_out(idx)
-                    fu_dest_latch = fu.get_fu_out(idx)  # latched output
-                    name = "wrflag_%s_%s_%d" % (funame, regname, idx)
-                    wrflag = Signal(name=name, reset_less=True)
-                    comb += wrflag.eq(dest.ok & fu.busy_o)
-
-                    # connect request-read to picker input, and output to go-wr
-                    fu_active = fu_bitdict[funame]
-                    pick = fu.wr.rel[idx] & fu_active  # & wrflag
-                    comb += wrpick.i[pi].eq(pick)
-                    comb += fu.go_wr_i[idx].eq(wrpick.o[pi] & wrpick.en_o)
-                    # connect regfile port to input
-                    print("reg connect widths",
-                          regfile, regname, pi, funame,
-                          dest.shape(), wport.data_i.shape())
-                    wsigs.append(fu_dest_latch)
-
-                # here is where we create the Write Broadcast Bus. simple, eh?
-                comb += wport.data_i.eq(ortreereduce_sig(wsigs))
+            for (regname, fspec) in sort_fuspecs(fuspecs):
+                self.connect_wrport(m, fu_bitdict, wrpickers,
+                                        regfile, regname, fspec)
 
     def get_byregfiles(self, readmode):
 
         mode = "read" if readmode else "write"
 
     def get_byregfiles(self, readmode):
 
         mode = "read" if readmode else "write"
-        dec2 = self.pdecode2
         regs = self.regs
         fus = self.fus.fus
         regs = self.regs
         fus = self.fus.fus
+        e = self.e # decoded instruction to execute
 
         # dictionary of lists of regfile ports
         byregfiles = {}
 
         # dictionary of lists of regfile ports
         byregfiles = {}
@@ -335,17 +490,17 @@ class NonProductionCore(Elaboratable):
                     (regfile, regname, wid) = fu.get_out_spec(idx)
                 print("    %d %s %s %s" % (idx, regfile, regname, str(wid)))
                 if readmode:
                     (regfile, regname, wid) = fu.get_out_spec(idx)
                 print("    %d %s %s %s" % (idx, regfile, regname, str(wid)))
                 if readmode:
-                    rdflag, read = dec2.regspecmap_read(regfile, regname)
+                    rdflag, read = regspec_decode_read(e, regfile, regname)
                     write = None
                 else:
                     rdflag, read = None, None
                     write = None
                 else:
                     rdflag, read = None, None
-                    wrport, write = dec2.regspecmap_write(regfile, regname)
+                    wrport, write = regspec_decode_write(e, regfile, regname)
                 if regfile not in byregfiles:
                     byregfiles[regfile] = {}
                     byregfiles_spec[regfile] = {}
                 if regname not in byregfiles_spec[regfile]:
                     byregfiles_spec[regfile][regname] = \
                 if regfile not in byregfiles:
                     byregfiles[regfile] = {}
                     byregfiles_spec[regfile] = {}
                 if regname not in byregfiles_spec[regfile]:
                     byregfiles_spec[regfile][regname] = \
-                        [rdflag, read, write, wid, []]
+                        (rdflag, read, write, wid, [])
                 # here we start to create "lanes"
                 if idx not in byregfiles[regfile]:
                     byregfiles[regfile][idx] = []
                 # here we start to create "lanes"
                 if idx not in byregfiles[regfile]:
                     byregfiles[regfile][idx] = []
@@ -370,7 +525,7 @@ class NonProductionCore(Elaboratable):
 
     def __iter__(self):
         yield from self.fus.ports()
 
     def __iter__(self):
         yield from self.fus.ports()
-        yield from self.pdecode2.ports()
+        yield from self.e.ports()
         yield from self.l0.ports()
         # TODO: regs
 
         yield from self.l0.ports()
         # TODO: regs