pass in CoreState to PowerDecoder rather than eq a copy of it
[soc.git] / src / soc / simple / core.py
index ff7f9277b683ec28edd35a0f98ed84edbf65b803..8ec18ce968b792fa9645bda671b35e69e0ec40f8 100644 (file)
 not in any way intended for production use.  connects up FunctionUnits to
 Register Files in a brain-dead fashion that only permits one and only one
 Function Unit to be operational.
 not in any way intended for production use.  connects up FunctionUnits to
 Register Files in a brain-dead fashion that only permits one and only one
 Function Unit to be operational.
+
+the principle here is to take the Function Units, analyse their regspecs,
+and turn their requirements for access to register file read/write ports
+into groupings by Register File and Register File Port name.
+
+under each grouping - by regfile/port - a list of Function Units that
+need to connect to that port is created.  as these are a contended
+resource a "Broadcast Bus" per read/write port is then also created,
+with access to it managed by a PriorityPicker.
+
+the brain-dead part of this module is that even though there is no
+conflict of access, regfile read/write hazards are *not* analysed,
+and consequently it is safer to wait for the Function Unit to complete
+before allowing a new instruction to proceed.
 """
 """
-from nmigen import Elaboratable, Module, Signal
+
+from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
 from nmigen.cli import rtlil
 
 from nmigen.cli import rtlil
 
+from soc.decoder.power_decoder2 import PowerDecodeSubset
+from soc.decoder.power_regspec_map import regspec_decode_read
+from soc.decoder.power_regspec_map import regspec_decode_write
+
 from nmutil.picker import PriorityPicker
 from nmutil.util import treereduce
 
 from soc.fu.compunits.compunits import AllFunctionUnits
 from soc.regfile.regfiles import RegFiles
 from nmutil.picker import PriorityPicker
 from nmutil.util import treereduce
 
 from soc.fu.compunits.compunits import AllFunctionUnits
 from soc.regfile.regfiles import RegFiles
-from soc.decoder.power_decoder import create_pdecode
-from soc.decoder.power_decoder2 import PowerDecode2
+from soc.decoder.decode2execute1 import Decode2ToExecute1Type
+from soc.decoder.power_decoder2 import get_rdflags
+from soc.decoder.decode2execute1 import Data
+from soc.experiment.l0_cache import TstL0CacheBuffer  # test only
+from soc.config.test.test_loadstore import TestMemPspec
+from soc.decoder.power_enums import MicrOp
+from soc.config.state import CoreState
+
+import operator
 
 
+from nmutil.util import rising_edge
 
 
 
 
+# helper function for reducing a list of signals down to a parallel
+# ORed single signal.
 def ortreereduce(tree, attr="data_o"):
     return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
 
 
 def ortreereduce(tree, attr="data_o"):
     return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
 
 
+def ortreereduce_sig(tree):
+    return treereduce(tree, operator.or_, lambda x: x)
+
+
+# helper function to place full regs declarations first
+def sort_fuspecs(fuspecs):
+    res = []
+    for (regname, fspec) in fuspecs.items():
+        if regname.startswith("full"):
+            res.append((regname, fspec))
+    for (regname, fspec) in fuspecs.items():
+        if not regname.startswith("full"):
+            res.append((regname, fspec))
+    return res  # enumerate(res)
+
+
 class NonProductionCore(Elaboratable):
 class NonProductionCore(Elaboratable):
-    def __init__(self):
-        self.fus = AllFunctionUnits()
+    def __init__(self, pspec):
+
+        # single LD/ST funnel for memory access
+        self.l0 = TstL0CacheBuffer(pspec, n_units=1)
+        pi = self.l0.l0.dports[0]
+
+        # function units (only one each)
+        self.fus = AllFunctionUnits(pspec, pilist=[pi])
+
+        # register files (yes plural)
         self.regs = RegFiles()
         self.regs = RegFiles()
-        self.pdecode = pdecode = create_pdecode()
-        self.pdecode2 = PowerDecode2(pdecode)   # instruction decoder
-        self.ivalid_i = self.pdecode2.e.valid   # instruction is valid
+
+        # instruction decoder
+        self.e = Decode2ToExecute1Type() # decoded instruction
+        self.state = CoreState("core")
+        self.raw_insn_i = Signal(32) # raw instruction
+        self.bigendian_i = Signal() # bigendian
+
+        # issue/valid/busy signalling
+        self.ivalid_i = Signal(reset_less=True) # instruction is valid
+        self.issue_i = Signal(reset_less=True)
+        self.busy_o = Signal(name="corebusy_o", reset_less=True)
+
+        # start/stop and terminated signalling
+        self.core_stopped_i = Signal(reset_less=True)
+        self.core_reset_i = Signal()
+        self.core_terminate_o = Signal(reset=0)  # indicates stopped
+
+        # create per-FU instruction decoders (subsetted)
+        self.decoders = {}
+        self.ees = {}
+
+        for funame, fu in self.fus.fus.items():
+            f_name = fu.fnunit.name
+            fnunit = fu.fnunit.value
+            opkls = fu.opsubsetkls
+            if f_name == 'TRAP':
+                self.trapunit = funame
+                continue
+            self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
+                                                      final=True,
+                                                      state=self.state)
+            self.ees[funame] = self.decoders[funame].e
 
     def elaborate(self, platform):
         m = Module()
         comb = m.d.comb
 
 
     def elaborate(self, platform):
         m = Module()
         comb = m.d.comb
 
-        m.submodules.pdecode2 = dec2 = self.pdecode2
         m.submodules.fus = self.fus
         m.submodules.fus = self.fus
+        m.submodules.l0 = l0 = self.l0
         self.regs.elaborate_into(m, platform)
         regs = self.regs
         fus = self.fus.fus
 
         self.regs.elaborate_into(m, platform)
         regs = self.regs
         fus = self.fus.fus
 
+        # connect decoders
+        for k, v in self.decoders.items():
+            setattr(m.submodules, "dec_%s" % v.fn_name, v)
+            comb += v.dec.raw_opcode_in.eq(self.raw_insn_i)
+            comb += v.dec.bigendian.eq(self.bigendian_i)
+
+        # ssh, cheat: trap uses the main decoder because of the rewriting
+        self.ees[self.trapunit] = self.e
+
+        # connect up Function Units, then read/write ports
+        fu_bitdict = self.connect_instruction(m)
+        self.connect_rdports(m, fu_bitdict)
+        self.connect_wrports(m, fu_bitdict)
+
+        # connect up reset
+        m.d.comb += ResetSignal().eq(self.core_reset_i)
+
+        return m
+
+    def connect_instruction(self, m):
+        """connect_instruction
+
+        uses decoded (from PowerOp) function unit information from CSV files
+        to ascertain which Function Unit should deal with the current
+        instruction.
+
+        some (such as OP_ATTN, OP_NOP) are dealt with here, including
+        ignoring it and halting the processor.  OP_NOP is a bit annoying
+        because the issuer expects busy flag still to be raised then lowered.
+        (this requires a fake counter to be set).
+        """
+        comb, sync = m.d.comb, m.d.sync
+        fus = self.fus.fus
+
         # enable-signals for each FU, get one bit for each FU (by name)
         fu_enable = Signal(len(fus), reset_less=True)
         fu_bitdict = {}
         for i, funame in enumerate(fus.keys()):
             fu_bitdict[funame] = fu_enable[i]
 
         # enable-signals for each FU, get one bit for each FU (by name)
         fu_enable = Signal(len(fus), reset_less=True)
         fu_bitdict = {}
         for i, funame in enumerate(fus.keys()):
             fu_bitdict[funame] = fu_enable[i]
 
-        # dictionary of lists of regfile read ports
-        byregfiles_rd = {}
-        byregfiles_rdspec = {}
-        for (funame, fu) in fus.items():
-            print ("read ports for %s" % funame)
-            for idx in range(fu.n_src):
-                (regfile, regname, wid) = fu.get_in_spec(idx)
-                print ("    %s %s %s" % (regfile, regname, str(wid)))
-                rdflag, read, _ = dec2.regspecmap(regfile, regname)
-                if regfile not in byregfiles_rd:
-                    byregfiles_rd[regfile] = {}
-                    byregfiles_rdspec[regfile] = (regname, rdflag, read, wid)
-                # here we start to create "lanes"
-                if idx not in byregfiles_rd[regfile]:
-                    byregfiles_rd[regfile][idx] = []
-                fuspec = (funame, fu)
-                byregfiles_rd[regfile][idx].append(fuspec)
+        # enable the required Function Unit based on the opcode decode
+        # note: this *only* works correctly for simple core when one and
+        # *only* one FU is allocated per instruction
+        for funame, fu in fus.items():
+            fnunit = fu.fnunit.value
+            enable = Signal(name="en_%s" % funame, reset_less=True)
+            comb += enable.eq((self.e.do.fn_unit & fnunit).bool())
+            comb += fu_bitdict[funame].eq(enable)
 
 
-        # ok just print that out, for convenience
-        for regfile, spec in byregfiles_rd.items():
-            print ("regfile read ports:", regfile)
-            for idx, fuspec in spec.items():
-                print ("  regfile read port %s lane: %d" % (regfile, idx))
-                (regname, rdflag, read, wid) = byregfiles_rdspec[regfile]
-                print ("  %s" % regname, wid, read, rdflag)
-                for (funame, fu) in fuspec:
-                    print ("    ", funame, fu, fu.src_i[idx])
-                    print ()
+        # sigh - need a NOP counter
+        counter = Signal(2)
+        with m.If(counter != 0):
+            sync += counter.eq(counter - 1)
+            comb += self.busy_o.eq(1)
+
+        with m.If(self.ivalid_i): # run only when valid
+            with m.Switch(self.e.do.insn_type):
+                # check for ATTN: halt if true
+                with m.Case(MicrOp.OP_ATTN):
+                    m.d.sync += self.core_terminate_o.eq(1)
+
+                with m.Case(MicrOp.OP_NOP):
+                    sync += counter.eq(2)
+                    comb += self.busy_o.eq(1)
+
+                with m.Default():
+                    # connect up instructions.  only one enabled at a time
+                    for funame, fu in fus.items():
+                        e = self.ees[funame]
+                        enable = fu_bitdict[funame]
+
+                        # run this FunctionUnit if enabled
+                        # route op, issue, busy, read flags and mask to FU
+                        with m.If(enable):
+                            # operand comes from the *local*  decoder
+                            comb += fu.oper_i.eq_from(e.do)
+                            #comb += fu.oper_i.eq_from_execute1(e)
+                            comb += fu.issue_i.eq(self.issue_i)
+                            comb += self.busy_o.eq(fu.busy_o)
+                            # rdmask, which is for registers, needs to come
+                            # from the *main* decoder
+                            rdmask = get_rdflags(self.e, fu)
+                            comb += fu.rdmaskn.eq(~rdmask)
+
+        return fu_bitdict
+
+    def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
+        comb, sync = m.d.comb, m.d.sync
+        fus = self.fus.fus
+        regs = self.regs
+
+        rpidx = regname
+
+        # select the required read port.  these are pre-defined sizes
+        rfile = regs.rf[regfile.lower()]
+        rport = rfile.r_ports[rpidx]
+        print("read regfile", rpidx, regfile, regs.rf.keys(),
+                              rfile, rfile.unary)
+
+        fspecs = fspec
+        if not isinstance(fspecs, list):
+            fspecs = [fspecs]
+
+        rdflags = []
+        pplen = 0
+        reads = []
+        ppoffs = []
+        for i, fspec in enumerate(fspecs):
+            # get the regfile specs for this regfile port
+            (rf, read, write, wid, fuspec) = fspec
+            print ("fpsec", i, fspec, len(fuspec))
+            ppoffs.append(pplen) # record offset for picker
+            pplen += len(fuspec)
+            name = "rdflag_%s_%s_%d" % (regfile, regname, i)
+            rdflag = Signal(name=name, reset_less=True)
+            comb += rdflag.eq(rf)
+            rdflags.append(rdflag)
+            reads.append(read)
+
+        print ("pplen", pplen)
+
+        # create a priority picker to manage this port
+        rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
+        setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
+
+        rens = []
+        addrs = []
+        for i, fspec in enumerate(fspecs):
+            (rf, read, write, wid, fuspec) = fspec
+            # connect up the FU req/go signals, and the reg-read to the FU
+            # and create a Read Broadcast Bus
+            for pi, (funame, fu, idx) in enumerate(fuspec):
+                pi += ppoffs[i]
+
+                # connect request-read to picker input, and output to go-rd
+                fu_active = fu_bitdict[funame]
+                name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
+                addr_en = Signal.like(reads[i], name="addr_en_"+name)
+                pick = Signal(name="pick_"+name)     # picker input
+                rp = Signal(name="rp_"+name)         # picker output
+                delay_pick = Signal(name="dp_"+name) # read-enable "underway"
+
+                # exclude any currently-enabled read-request (mask out active)
+                comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
+                                ~delay_pick)
+                comb += rdpick.i[pi].eq(pick)
+                comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
+
+                # if picked, select read-port "reg select" number to port
+                comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
+                sync += delay_pick.eq(rp) # delayed "pick"
+                comb += addr_en.eq(Mux(rp, reads[i], 0))
+
+                # the read-enable happens combinatorially (see mux-bus below)
+                # but it results in the data coming out on a one-cycle delay.
+                if rfile.unary:
+                    rens.append(addr_en)
+                else:
+                    addrs.append(addr_en)
+                    rens.append(rp)
+
+                # use the *delayed* pick signal to put requested data onto bus
+                with m.If(delay_pick):
+                    # connect regfile port to input, creating fan-out Bus
+                    src = fu.src_i[idx]
+                    print("reg connect widths",
+                          regfile, regname, pi, funame,
+                          src.shape(), rport.data_o.shape())
+                    # all FUs connect to same port
+                    comb += src.eq(rport.data_o)
+
+        # or-reduce the muxed read signals
+        if rfile.unary:
+            # for unary-addressed
+            comb += rport.ren.eq(ortreereduce_sig(rens))
+        else:
+            # for binary-addressed
+            comb += rport.addr.eq(ortreereduce_sig(addrs))
+            comb += rport.ren.eq(Cat(*rens).bool())
+            print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
+
+    def connect_rdports(self, m, fu_bitdict):
+        """connect read ports
+
+        orders the read regspecs into a dict-of-dicts, by regfile, by
+        regport name, then connects all FUs that want that regport by
+        way of a PriorityPicker.
+        """
+        comb, sync = m.d.comb, m.d.sync
+        fus = self.fus.fus
+        regs = self.regs
+
+        # dictionary of lists of regfile read ports
+        byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
 
         # okaay, now we need a PriorityPicker per regfile per regfile port
         # loootta pickers... peter piper picked a pack of pickled peppers...
         rdpickers = {}
         for regfile, spec in byregfiles_rd.items():
 
         # okaay, now we need a PriorityPicker per regfile per regfile port
         # loootta pickers... peter piper picked a pack of pickled peppers...
         rdpickers = {}
         for regfile, spec in byregfiles_rd.items():
+            fuspecs = byregfiles_rdspec[regfile]
             rdpickers[regfile] = {}
             rdpickers[regfile] = {}
-            for rpidx, (idx, fuspec) in enumerate(spec.items()):
-                # select the required read port.  these are pre-defined sizes
-                print (regfile, regs.rf.keys())
-                rport = regs.rf[regfile.lower()].r_ports[rpidx]
-
-                # create a priority picker to manage this port
-                rdpickers[regfile][idx] = rdpick = PriorityPicker(len(fuspec))
-                setattr(m.submodules, "rdpick_%s_%d" % (regfile, idx), rdpick)
-
-                # connect the regspec "reg select" number to this port
-                (regname, rdflag, read, wid) = byregfiles_rdspec[regfile]
-                comb += rport.ren.eq(read)
-
-                # connect up the FU req/go signals and the reg-read to the FU
-                for pi, (funame, fu) in enumerate(fuspec):
-                    # connect request-read to picker input, and output to go-rd
-                    fu_active = fu_bitdict[funame]
-                    comb += rdpick.i[pi].eq(fu.rd_rel_o[idx] & fu_active)
-                    comb += fu.go_rd_i[idx].eq(rdpick.o[pi])
-                    # connect regfile port to input
-                    comb += fu.src_i[idx].eq(rport.data_o)
 
 
-        return m
+            # argh.  an experiment to merge RA and RB in the INT regfile
+            # (we have too many read/write ports)
+            #if regfile == 'INT':
+                #fuspecs['rabc'] = [fuspecs.pop('rb')]
+                #fuspecs['rabc'].append(fuspecs.pop('rc'))
+                #fuspecs['rabc'].append(fuspecs.pop('ra'))
+            #if regfile == 'FAST':
+            #    fuspecs['fast1'] = [fuspecs.pop('fast1')]
+            #    if 'fast2' in fuspecs:
+            #        fuspecs['fast1'].append(fuspecs.pop('fast2'))
+
+            # for each named regfile port, connect up all FUs to that port
+            for (regname, fspec) in sort_fuspecs(fuspecs):
+                print("connect rd", regname, fspec)
+                self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
+                                       regname, fspec)
+
+    def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec):
+        comb, sync = m.d.comb, m.d.sync
+        fus = self.fus.fus
+        regs = self.regs
+
+        print("connect wr", regname, fspec)
+        rpidx = regname
+
+        # select the required write port.  these are pre-defined sizes
+        print(regfile, regs.rf.keys())
+        rfile = regs.rf[regfile.lower()]
+        wport = rfile.w_ports[rpidx]
+
+        fspecs = fspec
+        if not isinstance(fspecs, list):
+            fspecs = [fspecs]
+
+        pplen = 0
+        writes = []
+        ppoffs = []
+        for i, fspec in enumerate(fspecs):
+            # get the regfile specs for this regfile port
+            (rf, read, write, wid, fuspec) = fspec
+            print ("fpsec", i, fspec, len(fuspec))
+            ppoffs.append(pplen) # record offset for picker
+            pplen += len(fuspec)
+
+        # create a priority picker to manage this port
+        wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
+        setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
+
+        wsigs = []
+        wens = []
+        addrs = []
+        for i, fspec in enumerate(fspecs):
+            # connect up the FU req/go signals and the reg-read to the FU
+            # these are arbitrated by Data.ok signals
+            (rf, read, write, wid, fuspec) = fspec
+            for pi, (funame, fu, idx) in enumerate(fuspec):
+                pi += ppoffs[i]
+
+                # write-request comes from dest.ok
+                dest = fu.get_out(idx)
+                fu_dest_latch = fu.get_fu_out(idx)  # latched output
+                name = "wrflag_%s_%s_%d" % (funame, regname, idx)
+                wrflag = Signal(name=name, reset_less=True)
+                comb += wrflag.eq(dest.ok & fu.busy_o)
+
+                # connect request-write to picker input, and output to go-wr
+                fu_active = fu_bitdict[funame]
+                pick = fu.wr.rel_o[idx] & fu_active  # & wrflag
+                comb += wrpick.i[pi].eq(pick)
+                # create a single-pulse go write from the picker output
+                wr_pick = Signal()
+                comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
+                comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
+
+                # connect the regspec write "reg select" number to this port
+                # only if one FU actually requests (and is granted) the port
+                # will the write-enable be activated
+                addr_en = Signal.like(write)
+                wp = Signal()
+                comb += wp.eq(wr_pick & wrpick.en_o)
+                comb += addr_en.eq(Mux(wp, write, 0))
+                if rfile.unary:
+                    wens.append(addr_en)
+                else:
+                    addrs.append(addr_en)
+                    wens.append(wp)
+
+                # connect regfile port to input
+                print("reg connect widths",
+                      regfile, regname, pi, funame,
+                      dest.shape(), wport.data_i.shape())
+                wsigs.append(fu_dest_latch)
+
+        # here is where we create the Write Broadcast Bus. simple, eh?
+        comb += wport.data_i.eq(ortreereduce_sig(wsigs))
+        if rfile.unary:
+            # for unary-addressed
+            comb += wport.wen.eq(ortreereduce_sig(wens))
+        else:
+            # for binary-addressed
+            comb += wport.addr.eq(ortreereduce_sig(addrs))
+            comb += wport.wen.eq(ortreereduce_sig(wens))
+
+    def connect_wrports(self, m, fu_bitdict):
+        """connect write ports
+
+        orders the write regspecs into a dict-of-dicts, by regfile,
+        by regport name, then connects all FUs that want that regport
+        by way of a PriorityPicker.
+
+        note that the write-port wen, write-port data, and go_wr_i all need to
+        be on the exact same clock cycle.  as there is a combinatorial loop bug
+        at the moment, these all use sync.
+        """
+        comb, sync = m.d.comb, m.d.sync
+        fus = self.fus.fus
+        regs = self.regs
+        # dictionary of lists of regfile write ports
+        byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
+
+        # same for write ports.
+        # BLECH!  complex code-duplication! BLECH!
+        wrpickers = {}
+        for regfile, spec in byregfiles_wr.items():
+            fuspecs = byregfiles_wrspec[regfile]
+            wrpickers[regfile] = {}
+
+            # argh, more port-merging
+            if regfile == 'INT':
+                fuspecs['o'] = [fuspecs.pop('o')]
+                fuspecs['o'].append(fuspecs.pop('o1'))
+            if regfile == 'FAST':
+                fuspecs['fast1'] = [fuspecs.pop('fast1')]
+                if 'fast2' in fuspecs:
+                    fuspecs['fast1'].append(fuspecs.pop('fast2'))
+
+            for (regname, fspec) in sort_fuspecs(fuspecs):
+                self.connect_wrport(m, fu_bitdict, wrpickers,
+                                        regfile, regname, fspec)
+
+    def get_byregfiles(self, readmode):
+
+        mode = "read" if readmode else "write"
+        regs = self.regs
+        fus = self.fus.fus
+        e = self.e # decoded instruction to execute
+
+        # dictionary of lists of regfile ports
+        byregfiles = {}
+        byregfiles_spec = {}
+        for (funame, fu) in fus.items():
+            print("%s ports for %s" % (mode, funame))
+            for idx in range(fu.n_src if readmode else fu.n_dst):
+                if readmode:
+                    (regfile, regname, wid) = fu.get_in_spec(idx)
+                else:
+                    (regfile, regname, wid) = fu.get_out_spec(idx)
+                print("    %d %s %s %s" % (idx, regfile, regname, str(wid)))
+                if readmode:
+                    rdflag, read = regspec_decode_read(e, regfile, regname)
+                    write = None
+                else:
+                    rdflag, read = None, None
+                    wrport, write = regspec_decode_write(e, regfile, regname)
+                if regfile not in byregfiles:
+                    byregfiles[regfile] = {}
+                    byregfiles_spec[regfile] = {}
+                if regname not in byregfiles_spec[regfile]:
+                    byregfiles_spec[regfile][regname] = \
+                        (rdflag, read, write, wid, [])
+                # here we start to create "lanes"
+                if idx not in byregfiles[regfile]:
+                    byregfiles[regfile][idx] = []
+                fuspec = (funame, fu, idx)
+                byregfiles[regfile][idx].append(fuspec)
+                byregfiles_spec[regfile][regname][4].append(fuspec)
+
+        # ok just print that out, for convenience
+        for regfile, spec in byregfiles.items():
+            print("regfile %s ports:" % mode, regfile)
+            fuspecs = byregfiles_spec[regfile]
+            for regname, fspec in fuspecs.items():
+                [rdflag, read, write, wid, fuspec] = fspec
+                print("  rf %s port %s lane: %s" % (mode, regfile, regname))
+                print("  %s" % regname, wid, read, write, rdflag)
+                for (funame, fu, idx) in fuspec:
+                    fusig = fu.src_i[idx] if readmode else fu.dest[idx]
+                    print("    ", funame, fu, idx, fusig)
+                    print()
+
+        return byregfiles, byregfiles_spec
 
     def __iter__(self):
         yield from self.fus.ports()
 
     def __iter__(self):
         yield from self.fus.ports()
-        yield from self.pdecode2.ports()
+        yield from self.e.ports()
+        yield from self.l0.ports()
         # TODO: regs
 
     def ports(self):
         # TODO: regs
 
     def ports(self):
@@ -113,7 +526,12 @@ class NonProductionCore(Elaboratable):
 
 
 if __name__ == '__main__':
 
 
 if __name__ == '__main__':
-    dut = NonProductionCore()
+    pspec = TestMemPspec(ldst_ifacetype='testpi',
+                         imem_ifacetype='',
+                         addr_wid=48,
+                         mask_wid=8,
+                         reg_wid=64)
+    dut = NonProductionCore(pspec)
     vl = rtlil.convert(dut, ports=dut.ports())
     vl = rtlil.convert(dut, ports=dut.ports())
-    with open("non_production_core.il", "w") as f:
+    with open("test_core.il", "w") as f:
         f.write(vl)
         f.write(vl)