pass SPR MicroOp to MMU function unit
[soc.git] / src / soc / simple / core.py
index a44f4d797a4c4beb499996ca1c4a5761beb244d4..d3a66bb1e9916c50277b1d5b1dc2a859707fc90b 100644 (file)
@@ -22,17 +22,24 @@ before allowing a new instruction to proceed.
 from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
 from nmigen.cli import rtlil
 
+from soc.decoder.power_decoder2 import PowerDecodeSubset
+from soc.decoder.power_regspec_map import regspec_decode_read
+from soc.decoder.power_regspec_map import regspec_decode_write
+
 from nmutil.picker import PriorityPicker
 from nmutil.util import treereduce
 
 from soc.fu.compunits.compunits import AllFunctionUnits
 from soc.regfile.regfiles import RegFiles
-from soc.decoder.power_decoder import create_pdecode
-from soc.decoder.power_decoder2 import PowerDecode2
+from soc.decoder.decode2execute1 import Decode2ToExecute1Type
+from soc.decoder.decode2execute1 import IssuerDecode2ToOperand
+from soc.decoder.power_decoder2 import get_rdflags
 from soc.decoder.decode2execute1 import Data
 from soc.experiment.l0_cache import TstL0CacheBuffer  # test only
 from soc.config.test.test_loadstore import TestMemPspec
 from soc.decoder.power_enums import MicrOp
+from soc.config.state import CoreState
+
 import operator
 
 from nmutil.util import rising_edge
@@ -62,52 +69,83 @@ def sort_fuspecs(fuspecs):
 
 class NonProductionCore(Elaboratable):
     def __init__(self, pspec):
+        self.pspec = pspec
+
         # single LD/ST funnel for memory access
         self.l0 = TstL0CacheBuffer(pspec, n_units=1)
         pi = self.l0.l0.dports[0]
 
         # function units (only one each)
+        # only include mmu if enabled in pspec
         self.fus = AllFunctionUnits(pspec, pilist=[pi])
 
         # register files (yes plural)
         self.regs = RegFiles()
 
-        # instruction decoder
-        pdecode = create_pdecode()
-        self.pdecode2 = PowerDecode2(pdecode)   # instruction decoder
+        # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
+        self.e = Decode2ToExecute1Type("core", opkls=IssuerDecode2ToOperand)
+
+        self.state = CoreState("core")
+        self.raw_insn_i = Signal(32) # raw instruction
+        self.bigendian_i = Signal() # bigendian
 
         # issue/valid/busy signalling
-        self.ivalid_i = self.pdecode2.valid   # instruction is valid
+        self.ivalid_i = Signal(reset_less=True) # instruction is valid
         self.issue_i = Signal(reset_less=True)
         self.busy_o = Signal(name="corebusy_o", reset_less=True)
 
-        # instruction input
-        self.bigendian_i = self.pdecode2.dec.bigendian
-        self.raw_opcode_i = self.pdecode2.dec.raw_opcode_in
-
         # start/stop and terminated signalling
         self.core_stopped_i = Signal(reset_less=True)
-        self.core_reset_i = Signal()
         self.core_terminate_o = Signal(reset=0)  # indicates stopped
 
+        # create per-FU instruction decoders (subsetted)
+        self.decoders = {}
+        self.des = {}
+
+        for funame, fu in self.fus.fus.items():
+            f_name = fu.fnunit.name
+            fnunit = fu.fnunit.value
+            opkls = fu.opsubsetkls
+            if f_name == 'TRAP':
+                self.trapunit = funame
+                continue
+            self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
+                                                      final=True,
+                                                      state=self.state)
+            self.des[funame] = self.decoders[funame].do
+
+        if "mmu0" in self.decoders:
+            self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"]
+
     def elaborate(self, platform):
         m = Module()
+        # for testing purposes, to cut down on build time in coriolis2
+        if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
+            x = Signal() # dummy signal
+            m.d.sync += x.eq(~x)
+            return m
+        comb = m.d.comb
 
-        m.submodules.pdecode2 = dec2 = self.pdecode2
         m.submodules.fus = self.fus
         m.submodules.l0 = l0 = self.l0
         self.regs.elaborate_into(m, platform)
         regs = self.regs
         fus = self.fus.fus
 
+        # connect decoders
+        for k, v in self.decoders.items():
+            setattr(m.submodules, "dec_%s" % v.fn_name, v)
+            comb += v.dec.raw_opcode_in.eq(self.raw_insn_i)
+            comb += v.dec.bigendian.eq(self.bigendian_i)
+
+        # ssh, cheat: trap uses the main decoder because of the rewriting
+        self.des[self.trapunit] = self.e.do
+
         # connect up Function Units, then read/write ports
         fu_bitdict = self.connect_instruction(m)
         self.connect_rdports(m, fu_bitdict)
         self.connect_wrports(m, fu_bitdict)
 
-        # connect up reset
-        m.d.comb += ResetSignal().eq(self.core_reset_i)
-
         return m
 
     def connect_instruction(self, m):
@@ -124,7 +162,6 @@ class NonProductionCore(Elaboratable):
         """
         comb, sync = m.d.comb, m.d.sync
         fus = self.fus.fus
-        dec2 = self.pdecode2
 
         # enable-signals for each FU, get one bit for each FU (by name)
         fu_enable = Signal(len(fus), reset_less=True)
@@ -138,7 +175,7 @@ class NonProductionCore(Elaboratable):
         for funame, fu in fus.items():
             fnunit = fu.fnunit.value
             enable = Signal(name="en_%s" % funame, reset_less=True)
-            comb += enable.eq((dec2.e.do.fn_unit & fnunit).bool())
+            comb += enable.eq((self.e.do.fn_unit & fnunit).bool())
             comb += fu_bitdict[funame].eq(enable)
 
         # sigh - need a NOP counter
@@ -148,7 +185,7 @@ class NonProductionCore(Elaboratable):
             comb += self.busy_o.eq(1)
 
         with m.If(self.ivalid_i): # run only when valid
-            with m.Switch(dec2.e.do.insn_type):
+            with m.Switch(self.e.do.insn_type):
                 # check for ATTN: halt if true
                 with m.Case(MicrOp.OP_ATTN):
                     m.d.sync += self.core_terminate_o.eq(1)
@@ -160,15 +197,20 @@ class NonProductionCore(Elaboratable):
                 with m.Default():
                     # connect up instructions.  only one enabled at a time
                     for funame, fu in fus.items():
+                        do = self.des[funame]
                         enable = fu_bitdict[funame]
 
                         # run this FunctionUnit if enabled
+                        # route op, issue, busy, read flags and mask to FU
                         with m.If(enable):
-                            # route op, issue, busy, read flags and mask to FU
-                            comb += fu.oper_i.eq_from_execute1(dec2.e)
+                            # operand comes from the *local*  decoder
+                            comb += fu.oper_i.eq_from(do)
+                            #comb += fu.oper_i.eq_from_execute1(e)
                             comb += fu.issue_i.eq(self.issue_i)
                             comb += self.busy_o.eq(fu.busy_o)
-                            rdmask = dec2.rdflags(fu)
+                            # rdmask, which is for registers, needs to come
+                            # from the *main* decoder
+                            rdmask = get_rdflags(self.e, fu)
                             comb += fu.rdmaskn.eq(~rdmask)
 
         return fu_bitdict
@@ -220,32 +262,38 @@ class NonProductionCore(Elaboratable):
             # and create a Read Broadcast Bus
             for pi, (funame, fu, idx) in enumerate(fuspec):
                 pi += ppoffs[i]
-                src = fu.src_i[idx]
 
                 # connect request-read to picker input, and output to go-rd
                 fu_active = fu_bitdict[funame]
-                pick = Signal()
-                comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i])
-                print (pick, len(pick))
-                print (rdpick.i, len(rdpick.i), pi)
+                name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
+                addr_en = Signal.like(reads[i], name="addr_en_"+name)
+                pick = Signal(name="pick_"+name)     # picker input
+                rp = Signal(name="rp_"+name)         # picker output
+                delay_pick = Signal(name="dp_"+name) # read-enable "underway"
+
+                # exclude any currently-enabled read-request (mask out active)
+                comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
+                                ~delay_pick)
                 comb += rdpick.i[pi].eq(pick)
-                comb += fu.go_rd_i[idx].eq(rdpick.o[pi])
+                comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
 
                 # if picked, select read-port "reg select" number to port
-                name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
-                addr_en = Signal.like(reads[i])
-                rp = Signal(name="rp_"+name)
-                addr_en.name = "addr_en_"+name
                 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
+                sync += delay_pick.eq(rp) # delayed "pick"
                 comb += addr_en.eq(Mux(rp, reads[i], 0))
+
+                # the read-enable happens combinatorially (see mux-bus below)
+                # but it results in the data coming out on a one-cycle delay.
                 if rfile.unary:
                     rens.append(addr_en)
                 else:
                     addrs.append(addr_en)
                     rens.append(rp)
 
-                with m.If(rp):
+                # use the *delayed* pick signal to put requested data onto bus
+                with m.If(delay_pick):
                     # connect regfile port to input, creating fan-out Bus
+                    src = fu.src_i[idx]
                     print("reg connect widths",
                           regfile, regname, pi, funame,
                           src.shape(), rport.data_o.shape())
@@ -285,13 +333,14 @@ class NonProductionCore(Elaboratable):
 
             # argh.  an experiment to merge RA and RB in the INT regfile
             # (we have too many read/write ports)
-            if regfile == 'INT':
-                fuspecs['rabc'] = [fuspecs.pop('rb')]
-                fuspecs['rabc'].append(fuspecs.pop('rc'))
-                fuspecs['rabc'].append(fuspecs.pop('ra'))
-            if regfile == 'FAST':
-                fuspecs['fast1'] = [fuspecs.pop('fast1')]
-                fuspecs['fast1'].append(fuspecs.pop('fast2'))
+            #if regfile == 'INT':
+                #fuspecs['rabc'] = [fuspecs.pop('rb')]
+                #fuspecs['rabc'].append(fuspecs.pop('rc'))
+                #fuspecs['rabc'].append(fuspecs.pop('ra'))
+            #if regfile == 'FAST':
+            #    fuspecs['fast1'] = [fuspecs.pop('fast1')]
+            #    if 'fast2' in fuspecs:
+            #        fuspecs['fast1'].append(fuspecs.pop('fast2'))
 
             # for each named regfile port, connect up all FUs to that port
             for (regname, fspec) in sort_fuspecs(fuspecs):
@@ -415,7 +464,8 @@ class NonProductionCore(Elaboratable):
                 fuspecs['o'].append(fuspecs.pop('o1'))
             if regfile == 'FAST':
                 fuspecs['fast1'] = [fuspecs.pop('fast1')]
-                fuspecs['fast1'].append(fuspecs.pop('fast2'))
+                if 'fast2' in fuspecs:
+                    fuspecs['fast1'].append(fuspecs.pop('fast2'))
 
             for (regname, fspec) in sort_fuspecs(fuspecs):
                 self.connect_wrport(m, fu_bitdict, wrpickers,
@@ -424,9 +474,9 @@ class NonProductionCore(Elaboratable):
     def get_byregfiles(self, readmode):
 
         mode = "read" if readmode else "write"
-        dec2 = self.pdecode2
         regs = self.regs
         fus = self.fus.fus
+        e = self.e # decoded instruction to execute
 
         # dictionary of lists of regfile ports
         byregfiles = {}
@@ -440,11 +490,11 @@ class NonProductionCore(Elaboratable):
                     (regfile, regname, wid) = fu.get_out_spec(idx)
                 print("    %d %s %s %s" % (idx, regfile, regname, str(wid)))
                 if readmode:
-                    rdflag, read = dec2.regspecmap_read(regfile, regname)
+                    rdflag, read = regspec_decode_read(e, regfile, regname)
                     write = None
                 else:
                     rdflag, read = None, None
-                    wrport, write = dec2.regspecmap_write(regfile, regname)
+                    wrport, write = regspec_decode_write(e, regfile, regname)
                 if regfile not in byregfiles:
                     byregfiles[regfile] = {}
                     byregfiles_spec[regfile] = {}
@@ -475,7 +525,7 @@ class NonProductionCore(Elaboratable):
 
     def __iter__(self):
         yield from self.fus.ports()
-        yield from self.pdecode2.ports()
+        yield from self.e.ports()
         yield from self.l0.ports()
         # TODO: regs