Merge branch 'master' of git.libre-soc.org:soc
[soc.git] / src / soc / simple / core.py
index 0b6ea0cb6ec86e188be9c7a5a45257ae66399d17..91663c7aaad56f67b7728c855c3f01a0c57343a5 100644 (file)
@@ -19,7 +19,7 @@ and consequently it is safer to wait for the Function Unit to complete
 before allowing a new instruction to proceed.
 """
 
-from nmigen import Elaboratable, Module, Signal
+from nmigen import Elaboratable, Module, Signal, ResetSignal
 from nmigen.cli import rtlil
 
 from nmutil.picker import PriorityPicker
@@ -30,17 +30,24 @@ from soc.regfile.regfiles import RegFiles
 from soc.decoder.power_decoder import create_pdecode
 from soc.decoder.power_decoder2 import PowerDecode2
 from soc.decoder.decode2execute1 import Data
-from soc.experiment.l0_cache import TstL0CacheBuffer # test only
-from soc.experiment.testmem import TestMemory # test only for instructions
-from soc.regfile.regfiles import FastRegs
+from soc.experiment.l0_cache import TstL0CacheBuffer  # test only
+from soc.config.test.test_loadstore import TestMemPspec
+from soc.decoder.power_enums import MicrOp
 import operator
 
+from nmutil.util import rising_edge
+
 
 # helper function for reducing a list of signals down to a parallel
 # ORed single signal.
 def ortreereduce(tree, attr="data_o"):
     return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
 
+
+def ortreereduce_sig(tree):
+    return treereduce(tree, operator.or_, lambda x: x)
+
+
 # helper function to place full regs declarations first
 def sort_fuspecs(fuspecs):
     res = []
@@ -50,17 +57,17 @@ def sort_fuspecs(fuspecs):
     for (regname, fspec) in fuspecs.items():
         if not regname.startswith("full"):
             res.append((regname, fspec))
-    return res # enumerate(res)
+    return res  # enumerate(res)
 
 
 class NonProductionCore(Elaboratable):
-    def __init__(self, addrwid=6, idepth=16):
+    def __init__(self, pspec):
         # single LD/ST funnel for memory access
-        self.l0 = TstL0CacheBuffer(n_units=1, regwid=64, addrwid=addrwid)
-        pi = self.l0.l0.dports[0].pi
+        self.l0 = TstL0CacheBuffer(pspec, n_units=1)
+        pi = self.l0.l0.dports[0]
 
         # function units (only one each)
-        self.fus = AllFunctionUnits(pilist=[pi], addrwid=addrwid)
+        self.fus = AllFunctionUnits(pspec, pilist=[pi])
 
         # register files (yes plural)
         self.regs = RegFiles()
@@ -70,14 +77,19 @@ class NonProductionCore(Elaboratable):
         self.pdecode2 = PowerDecode2(pdecode)   # instruction decoder
 
         # issue/valid/busy signalling
-        self.ivalid_i = self.pdecode2.e.valid   # instruction is valid
+        self.ivalid_i = self.pdecode2.valid   # instruction is valid
         self.issue_i = Signal(reset_less=True)
-        self.busy_o = Signal(reset_less=True)
+        self.busy_o = Signal(name="corebusy_o", reset_less=True)
 
         # instruction input
         self.bigendian_i = self.pdecode2.dec.bigendian
         self.raw_opcode_i = self.pdecode2.dec.raw_opcode_in
 
+        # start/stop and terminated signalling
+        self.core_stopped_i = Signal(reset_less=True)
+        self.core_reset_i = Signal()
+        self.core_terminate_o = Signal(reset=0)  # indicates stopped
+
     def elaborate(self, platform):
         m = Module()
 
@@ -88,13 +100,28 @@ class NonProductionCore(Elaboratable):
         regs = self.regs
         fus = self.fus.fus
 
+        # connect up Function Units, then read/write ports
         fu_bitdict = self.connect_instruction(m)
         self.connect_rdports(m, fu_bitdict)
         self.connect_wrports(m, fu_bitdict)
 
+        # connect up reset
+        m.d.comb += ResetSignal().eq(self.core_reset_i)
+
         return m
 
     def connect_instruction(self, m):
+        """connect_instruction
+
+        uses decoded (from PowerOp) function unit information from CSV files
+        to ascertain which Function Unit should deal with the current
+        instruction.
+
+        some (such as OP_ATTN, OP_NOP) are dealt with here, including
+        ignoring it and halting the processor.  OP_NOP is a bit annoying
+        because the issuer expects busy flag still to be raised then lowered.
+        (this requires a fake counter to be set).
+        """
         comb, sync = m.d.comb, m.d.sync
         fus = self.fus.fus
         dec2 = self.pdecode2
@@ -105,19 +132,45 @@ class NonProductionCore(Elaboratable):
         for i, funame in enumerate(fus.keys()):
             fu_bitdict[funame] = fu_enable[i]
 
-        # connect up instructions.  only one is enabled at any given time
+        # enable the required Function Unit based on the opcode decode
+        # note: this *only* works correctly for simple core when one and
+        # *only* one FU is allocated per instruction
         for funame, fu in fus.items():
             fnunit = fu.fnunit.value
             enable = Signal(name="en_%s" % funame, reset_less=True)
-            comb += enable.eq(self.ivalid_i & (dec2.e.fn_unit & fnunit).bool())
-            with m.If(enable):
-                comb += fu.oper_i.eq_from_execute1(dec2.e)
-                comb += fu.issue_i.eq(self.issue_i)
-                comb += self.busy_o.eq(fu.busy_o)
-                rdmask = dec2.rdflags(fu)
-                comb += fu.rdmaskn.eq(~rdmask)
+            comb += enable.eq((dec2.e.do.fn_unit & fnunit).bool())
             comb += fu_bitdict[funame].eq(enable)
 
+        # sigh - need a NOP counter
+        counter = Signal(2)
+        with m.If(counter != 0):
+            sync += counter.eq(counter - 1)
+            comb += self.busy_o.eq(1)
+
+        with m.If(self.ivalid_i): # run only when valid
+            with m.Switch(dec2.e.do.insn_type):
+                # check for ATTN: halt if true
+                with m.Case(MicrOp.OP_ATTN):
+                    m.d.sync += self.core_terminate_o.eq(1)
+
+                with m.Case(MicrOp.OP_NOP):
+                    sync += counter.eq(2)
+                    comb += self.busy_o.eq(1)
+
+                with m.Default():
+                    # connect up instructions.  only one enabled at a time
+                    for funame, fu in fus.items():
+                        enable = fu_bitdict[funame]
+
+                        # run this FunctionUnit if enabled
+                        with m.If(enable):
+                            # route op, issue, busy, read flags and mask to FU
+                            comb += fu.oper_i.eq_from_execute1(dec2.e)
+                            comb += fu.issue_i.eq(self.issue_i)
+                            comb += self.busy_o.eq(fu.busy_o)
+                            rdmask = dec2.rdflags(fu)
+                            comb += fu.rdmaskn.eq(~rdmask)
+
         return fu_bitdict
 
     def connect_rdports(self, m, fu_bitdict):
@@ -143,7 +196,7 @@ class NonProductionCore(Elaboratable):
 
             # for each named regfile port, connect up all FUs to that port
             for (regname, fspec) in sort_fuspecs(fuspecs):
-                print ("connect rd", regname, fspec)
+                print("connect rd", regname, fspec)
                 rpidx = regname
                 # get the regfile specs for this regfile port
                 (rf, read, write, wid, fuspec) = fspec
@@ -152,12 +205,14 @@ class NonProductionCore(Elaboratable):
                 comb += rdflag.eq(rf)
 
                 # select the required read port.  these are pre-defined sizes
-                print (rpidx, regfile, regs.rf.keys())
+                print(rpidx, regfile, regs.rf.keys())
                 rport = regs.rf[regfile.lower()].r_ports[rpidx]
 
                 # create a priority picker to manage this port
-                rdpickers[regfile][rpidx] = rdpick = PriorityPicker(len(fuspec))
-                setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
+                rdpickers[regfile][rpidx] = rdpick = PriorityPicker(
+                    len(fuspec))
+                setattr(m.submodules, "rdpick_%s_%s" %
+                        (regfile, rpidx), rdpick)
 
                 # connect the regspec "reg select" number to this port
                 with m.If(rdpick.en_o):
@@ -175,10 +230,11 @@ class NonProductionCore(Elaboratable):
                     comb += fu.go_rd_i[idx].eq(rdpick.o[pi])
 
                     # connect regfile port to input, creating a Broadcast Bus
-                    print ("reg connect widths",
-                           regfile, regname, pi, funame,
-                           src.shape(), rport.data_o.shape())
-                    comb += src.eq(rport.data_o) # all FUs connect to same port
+                    print("reg connect widths",
+                          regfile, regname, pi, funame,
+                          src.shape(), rport.data_o.shape())
+                    # all FUs connect to same port
+                    comb += src.eq(rport.data_o)
 
     def connect_wrports(self, m, fu_bitdict):
         """connect write ports
@@ -204,26 +260,28 @@ class NonProductionCore(Elaboratable):
             fuspecs = byregfiles_wrspec[regfile]
             wrpickers[regfile] = {}
             for (regname, fspec) in sort_fuspecs(fuspecs):
-                print ("connect wr", regname, fspec)
+                print("connect wr", regname, fspec)
                 rpidx = regname
                 # get the regfile specs for this regfile port
                 (rf, read, write, wid, fuspec) = fspec
 
                 # select the required write port.  these are pre-defined sizes
-                print (regfile, regs.rf.keys())
+                print(regfile, regs.rf.keys())
                 wport = regs.rf[regfile.lower()].w_ports[rpidx]
 
                 # create a priority picker to manage this port
-                wrpickers[regfile][rpidx] = wrpick = PriorityPicker(len(fuspec))
-                setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
+                wrpickers[regfile][rpidx] = wrpick = PriorityPicker(
+                    len(fuspec))
+                setattr(m.submodules, "wrpick_%s_%s" %
+                        (regfile, rpidx), wrpick)
 
                 # connect the regspec write "reg select" number to this port
                 # only if one FU actually requests (and is granted) the port
                 # will the write-enable be activated
                 with m.If(wrpick.en_o):
-                    sync += wport.wen.eq(write)
+                    comb += wport.wen.eq(write)
                 with m.Else():
-                    sync += wport.wen.eq(0)
+                    comb += wport.wen.eq(0)
 
                 # connect up the FU req/go signals and the reg-read to the FU
                 # these are arbitrated by Data.ok signals
@@ -231,23 +289,27 @@ class NonProductionCore(Elaboratable):
                 for pi, (funame, fu, idx) in enumerate(fuspec):
                     # write-request comes from dest.ok
                     dest = fu.get_out(idx)
+                    fu_dest_latch = fu.get_fu_out(idx)  # latched output
                     name = "wrflag_%s_%s_%d" % (funame, regname, idx)
                     wrflag = Signal(name=name, reset_less=True)
-                    comb += wrflag.eq(dest.ok)
+                    comb += wrflag.eq(dest.ok & fu.busy_o)
 
-                    # connect request-read to picker input, and output to go-wr
+                    # connect request-write to picker input, and output to go-wr
                     fu_active = fu_bitdict[funame]
-                    pick = fu.wr.rel[idx] & fu_active #& wrflag
+                    pick = fu.wr.rel_o[idx] & fu_active  # & wrflag
                     comb += wrpick.i[pi].eq(pick)
-                    sync += fu.go_wr_i[idx].eq(wrpick.o[pi] & wrpick.en_o)
+                    # create a single-pulse go write from the picker output
+                    wr_pick = Signal()
+                    comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
+                    comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
                     # connect regfile port to input
-                    print ("reg connect widths",
-                           regfile, regname, pi, funame,
-                           dest.shape(), wport.data_i.shape())
-                    wsigs.append(dest)
+                    print("reg connect widths",
+                          regfile, regname, pi, funame,
+                          dest.shape(), wport.data_i.shape())
+                    wsigs.append(fu_dest_latch)
 
                 # here is where we create the Write Broadcast Bus. simple, eh?
-                sync += wport.data_i.eq(ortreereduce(wsigs, "data"))
+                comb += wport.data_i.eq(ortreereduce_sig(wsigs))
 
     def get_byregfiles(self, readmode):
 
@@ -260,13 +322,13 @@ class NonProductionCore(Elaboratable):
         byregfiles = {}
         byregfiles_spec = {}
         for (funame, fu) in fus.items():
-            print ("%s ports for %s" % (mode, funame))
+            print("%s ports for %s" % (mode, funame))
             for idx in range(fu.n_src if readmode else fu.n_dst):
                 if readmode:
                     (regfile, regname, wid) = fu.get_in_spec(idx)
                 else:
                     (regfile, regname, wid) = fu.get_out_spec(idx)
-                print ("    %d %s %s %s" % (idx, regfile, regname, str(wid)))
+                print("    %d %s %s %s" % (idx, regfile, regname, str(wid)))
                 if readmode:
                     rdflag, read = dec2.regspecmap_read(regfile, regname)
                     write = None
@@ -278,7 +340,7 @@ class NonProductionCore(Elaboratable):
                     byregfiles_spec[regfile] = {}
                 if regname not in byregfiles_spec[regfile]:
                     byregfiles_spec[regfile][regname] = \
-                                [rdflag, read, write, wid, []]
+                        [rdflag, read, write, wid, []]
                 # here we start to create "lanes"
                 if idx not in byregfiles[regfile]:
                     byregfiles[regfile][idx] = []
@@ -288,149 +350,36 @@ class NonProductionCore(Elaboratable):
 
         # ok just print that out, for convenience
         for regfile, spec in byregfiles.items():
-            print ("regfile %s ports:" % mode, regfile)
+            print("regfile %s ports:" % mode, regfile)
             fuspecs = byregfiles_spec[regfile]
             for regname, fspec in fuspecs.items():
                 [rdflag, read, write, wid, fuspec] = fspec
-                print ("  rf %s port %s lane: %s" % (mode, regfile, regname))
-                print ("  %s" % regname, wid, read, write, rdflag)
+                print("  rf %s port %s lane: %s" % (mode, regfile, regname))
+                print("  %s" % regname, wid, read, write, rdflag)
                 for (funame, fu, idx) in fuspec:
                     fusig = fu.src_i[idx] if readmode else fu.dest[idx]
-                    print ("    ", funame, fu, idx, fusig)
-                    print ()
+                    print("    ", funame, fu, idx, fusig)
+                    print()
 
         return byregfiles, byregfiles_spec
 
     def __iter__(self):
         yield from self.fus.ports()
         yield from self.pdecode2.ports()
+        yield from self.l0.ports()
         # TODO: regs
 
     def ports(self):
         return list(self)
 
 
-class TestIssuer(Elaboratable):
-    """TestIssuer - reads instructions from TestMemory and issues them
-
-    efficiency and speed is not the main goal here: functional correctness is.
-    """
-    def __init__(self, addrwid=6, idepth=6):
-        # main instruction core
-        self.core = core = NonProductionCore(addrwid)
-
-        # Test Instruction memory
-        self.imem = TestMemory(32, idepth)
-        self.i_rd = self.imem.rdport
-        #self.i_wr = self.imem.write_port() errr...
-
-        # instruction go/monitor
-        self.go_insn_i = Signal(reset_less=True)
-        self.pc_o = Signal(64, reset_less=True)
-        self.pc_i = Data(64, "pc") # set "ok" to indicate "please change me"
-        self.busy_o = core.busy_o
-        self.memerr_o = Signal(reset_less=True)
-
-        # FAST regfile read /write ports
-        self.fast_rd1 = self.core.regs.rf['fast'].r_ports['d_rd1']
-        self.fast_wr1 = self.core.regs.rf['fast'].w_ports['d_wr1']
-
-    def elaborate(self, platform):
-        m = Module()
-        comb, sync = m.d.comb, m.d.sync
-
-        m.submodules.core = core = self.core
-        m.submodules.imem = imem = self.imem
-
-        # temporary hack: says "go" immediately for both address gen and ST
-        l0 = core.l0
-        ldst = core.fus.fus['ldst0']
-        m.d.comb += ldst.ad.go.eq(ldst.ad.rel) # link addr-go direct to rel
-        m.d.comb += ldst.st.go.eq(ldst.st.rel) # link store-go direct to rel
-
-        # PC and instruction from I-Memory
-        current_insn = Signal(32) # current fetched instruction (note sync)
-        current_pc = Signal(64) # current PC (note it is reset/sync)
-        comb += self.pc_o.eq(current_pc)
-
-        # next instruction (+4 on current)
-        nia = Signal(64, reset_less=True)
-        comb += nia.eq(current_insn + 4)
-
-        # temporaries
-        core_busy_o = core.busy_o         # core is busy
-        core_ivalid_i = core.ivalid_i     # instruction is valid
-        core_issue_i = core.issue_i       # instruction is issued
-        core_be_i = core.bigendian_i      # bigendian mode
-        core_opcode_i = core.raw_opcode_i # raw opcode
-
-        # actually use a nmigen FSM for the first time (w00t)
-        with m.FSM() as fsm:
-
-            # waiting (zzz)
-            with m.State("IDLE"):
-                with m.If(self.go_insn_i):
-                    # instruction allowed to go: start by reading the PC
-                    pc = Signal(64, reset_less=True)
-                    with m.If(self.pc_i.ok):
-                        # incoming override (start from pc_i)
-                        comb += pc.eq(self.pc_i.data)
-                    with m.Else():
-                        # otherwise read FastRegs regfile for PC
-                        comb += self.fast_rd1.ren.eq(1<<FastRegs.PC)
-                        comb += pc.eq(self.fast_rd1.data_o)
-                    # capture the PC and also drop it into Insn Memory
-                    # we have joined a pair of combinatorial memory
-                    # lookups together.  this is Generally Bad.
-                    sync += current_pc.eq(pc)
-                    comb += self.i_rd.addr.eq(pc)
-                    #comb += self.i_rd.en.eq(1) # comb-read (no need to set)
-                    sync += current_insn.eq(self.i_rd.data)
-                    m.next = "INSN_READ" # move to "issue" phase
-
-            # got the instruction: start issue
-            with m.State("INSN_READ"):
-                sync += core_ivalid_i.eq(1) # say instruction is valid
-                sync += core_issue_i.eq(1)  # and issued (ivalid_i redundant)
-                sync += core_be_i.eq(0)     # little-endian mode
-                sync += core_opcode_i.eq(current_insn) # actual opcode
-                m.next = "INSN_ACTIVE" # move to "wait for completion" phase
-
-            # instruction started: must wait till it finishes
-            with m.State("INSN_ACTIVE"):
-                sync += core_issue_i.eq(0) # issue raises for only one cycle
-                with m.If(~core_busy_o): # instruction done!
-                    sync += core_ivalid_i.eq(0) # say instruction is invalid
-                    sync += core_opcode_i.eq(0) # clear out (no good reason)
-                    # ok here we are not reading the branch unit.  TODO
-                    # this just blithely overwrites whatever pipeline updated
-                    # the PC
-                    comb += self.fast_wr1.wen.eq(1<<FastRegs.PC)
-                    comb += self.fast_wr1.data_i.eq(nia)
-                    m.next = "IDLE" # back to idle
-
-        return m
-
-    def __iter__(self):
-        yield from self.pc_i.ports()
-        yield self.pc_o
-        yield self.go_insn_i
-        yield self.memerr_o
-        yield from self.core.ports()
-        yield from self.imem.ports()
-
-    def ports(self):
-        return list(self)
-
-
 if __name__ == '__main__':
-    dut = TestIssuer()
+    pspec = TestMemPspec(ldst_ifacetype='testpi',
+                         imem_ifacetype='',
+                         addr_wid=48,
+                         mask_wid=8,
+                         reg_wid=64)
+    dut = NonProductionCore(pspec)
     vl = rtlil.convert(dut, ports=dut.ports())
-    with open("test_issuer.il", "w") as f:
+    with open("test_core.il", "w") as f:
         f.write(vl)
-
-    dut = NonProductionCore()
-    vl = rtlil.convert(dut, ports=dut.ports())
-    with open("non_production_core.il", "w") as f:
-        f.write(vl)
-