local variable rename in FetchFSM
[soc.git] / src / soc / simple / core.py
index 9e398c0365dd8e3811be3d575f3786fb500ac95b..9f3a114136b74284c503e1858b1d34e06800ea93 100644 (file)
@@ -19,7 +19,8 @@ and consequently it is safer to wait for the Function Unit to complete
 before allowing a new instruction to proceed.
 """
 
-from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
+from nmigen import (Elaboratable, Module, Signal, ResetSignal, Cat, Mux,
+                    Const)
 from nmigen.cli import rtlil
 
 from openpower.decoder.power_decoder2 import PowerDecodeSubset
@@ -184,11 +185,12 @@ class NonProductionCore(ControlBase):
         self.des[self.trapunit] = self.ireg.e.do
 
         # connect up Function Units, then read/write ports, and hazard conflict
-        issue_conflict = Signal()
-        fu_bitdict, fu_selected = self.connect_instruction(m, issue_conflict)
-        raw_hazard = self.connect_rdports(m, fu_selected)
-        self.connect_wrports(m, fu_selected)
-        comb += issue_conflict.eq(raw_hazard)
+        self.issue_conflict = Signal()
+        fu_bitdict, fu_selected = self.connect_instruction(m)
+        raw_hazard = self.connect_rdports(m, fu_bitdict, fu_selected)
+        self.connect_wrports(m, fu_bitdict, fu_selected)
+        if self.allow_overlap:
+            comb += self.issue_conflict.eq(raw_hazard)
 
         # note if an exception happened.  in a pipelined or OoO design
         # this needs to be accompanied by "shadowing" (or stalling)
@@ -224,7 +226,7 @@ class NonProductionCore(ControlBase):
                         comb += v.use_svp64_ldst_dec.eq(
                                         self.ireg.use_svp64_ldst_dec)
 
-    def connect_instruction(self, m, issue_conflict):
+    def connect_instruction(self, m):
         """connect_instruction
 
         uses decoded (from PowerOp) function unit information from CSV files
@@ -241,13 +243,14 @@ class NonProductionCore(ControlBase):
 
         # indicate if core is busy
         busy_o = self.o.busy_o
+        any_busy_o = self.o.any_busy_o
 
         # connect up temporary copy of incoming instruction. the FSM will
         # either blat the incoming instruction (if valid) into self.ireg
         # or if the instruction could not be delivered, keep dropping the
         # latched copy into ireg
         ilatch = self.ispec()
-        self.instruction_active = Signal()
+        self.instr_active = Signal()
 
         # enable/busy-signals for each FU, get one bit for each FU (by name)
         fu_enable = Signal(len(fus), reset_less=True)
@@ -290,7 +293,8 @@ class NonProductionCore(ControlBase):
                 fnunit = fu.fnunit.value
                 en_req = Signal(name="issue_en_%s" % funame, reset_less=True)
                 fnmatch = (self.ireg.e.do.fn_unit & fnunit).bool()
-                comb += en_req.eq(fnmatch & ~fu.busy_o & self.instruction_active)
+                comb += en_req.eq(fnmatch & ~fu.busy_o &
+                                    self.instr_active)
                 i_l.append(en_req) # store in list for doing the Cat-trick
                 # picker output, gated by enable: store in fu_bitdict
                 po = Signal(name="o_issue_pick_"+funame) # picker output
@@ -300,11 +304,16 @@ class NonProductionCore(ControlBase):
                 # if we don't do this, then when there are no FUs available,
                 # the "p.o_ready" signal will go back "ok we accepted this
                 # instruction" which of course isn't true.
-                with m.If(~issue_conflict & i_pp.en_o):
+                with m.If(i_pp.en_o):
                     comb += fu_found.eq(1)
             # for each input, Cat them together and drop them into the picker
             comb += i_pp.i.eq(Cat(*i_l))
 
+        # rdmask, which is for registers needs to come from the *main* decoder
+        for funame, fu in fus.items():
+            rdmask = get_rdflags(self.ireg.e, fu)
+            comb += fu.rdmaskn.eq(~rdmask)
+
         # sigh - need a NOP counter
         counter = Signal(2)
         with m.If(counter != 0):
@@ -317,12 +326,9 @@ class NonProductionCore(ControlBase):
         # always say "ready" except if overridden
         comb += self.p.o_ready.eq(1)
 
-        l_issue_conflict = Signal()
-
         with m.FSM():
             with m.State("READY"):
                 with m.If(self.p.i_valid): # run only when valid
-                    comb += self.instruction_active.eq(1)
                     with m.Switch(self.ireg.e.do.insn_type):
                         # check for ATTN: halt if true
                         with m.Case(MicrOp.OP_ATTN):
@@ -334,6 +340,7 @@ class NonProductionCore(ControlBase):
                             comb += busy_o.eq(1)
 
                         with m.Default():
+                            comb += self.instr_active.eq(1)
                             comb += self.p.o_ready.eq(0)
                             # connect instructions. only one enabled at a time
                             for funame, fu in fus.items():
@@ -342,35 +349,28 @@ class NonProductionCore(ControlBase):
 
                                 # run this FunctionUnit if enabled route op,
                                 # issue, busy, read flags and mask to FU
-                                with m.If(enable & fu_found):
+                                with m.If(enable):
                                     # operand comes from the *local*  decoder
                                     comb += fu.oper_i.eq_from(do)
                                     comb += fu.issue_i.eq(1) # issue when valid
-                                    # rdmask, which is for registers,
-                                    # needs to come
-                                    # from the *main* decoder
-                                    rdmask = get_rdflags(self.ireg.e, fu)
-                                    comb += fu.rdmaskn.eq(~rdmask)
                                     # instruction ok, indicate ready
                                     comb += self.p.o_ready.eq(1)
 
-                            with m.If(~fu_found):
-                                # latch copy of instruction
-                                sync += ilatch.eq(self.i)
-                                sync += l_issue_conflict.eq(issue_conflict)
-                                comb += self.p.o_ready.eq(1) # accept
-                                comb += busy_o.eq(1)
-                                m.next = "WAITING"
+                            if self.allow_overlap:
+                                with m.If(~fu_found):
+                                    # latch copy of instruction
+                                    sync += ilatch.eq(self.i)
+                                    comb += self.p.o_ready.eq(1) # accept
+                                    comb += busy_o.eq(1)
+                                    m.next = "WAITING"
 
             with m.State("WAITING"):
-                comb += self.instruction_active.eq(1)
-                with m.If(fu_found):
-                    sync += l_issue_conflict.eq(0)
+                comb += self.instr_active.eq(1)
                 comb += self.p.o_ready.eq(0)
                 comb += busy_o.eq(1)
                 # using copy of instruction, keep waiting until an FU is free
                 comb += self.ireg.eq(ilatch)
-                with m.If(~l_issue_conflict): # wait for conflict to clear
+                with m.If(fu_found): # wait for conflict to clear
                     # connect instructions. only one enabled at a time
                     for funame, fu in fus.items():
                         do = self.des[funame]
@@ -382,28 +382,30 @@ class NonProductionCore(ControlBase):
                             # operand comes from the *local*  decoder
                             comb += fu.oper_i.eq_from(do)
                             comb += fu.issue_i.eq(1) # issue when valid
-                            # rdmask, which is for registers,
-                            # needs to come
-                            # from the *main* decoder
-                            rdmask = get_rdflags(self.ireg.e, fu)
-                            comb += fu.rdmaskn.eq(~rdmask)
                             comb += self.p.o_ready.eq(1)
                             comb += busy_o.eq(0)
                             m.next = "READY"
 
         print ("core: overlap allowed", self.allow_overlap)
+        busys = map(lambda fu: fu.busy_o, fus.values())
+        comb += any_busy_o.eq(Cat(*busys).bool())
         if not self.allow_overlap:
             # for simple non-overlap, if any instruction is busy, set
             # busy output for core.
-            busys = map(lambda fu: fu.busy_o, fus.values())
-            comb += busy_o.eq(Cat(*busys).bool())
+            comb += busy_o.eq(any_busy_o)
+        else:
+            # sigh deal with a fun situation that needs to be investigated
+            # and resolved
+            with m.If(self.issue_conflict):
+                comb += busy_o.eq(1)
 
         # return both the function unit "enable" dict as well as the "busy".
         # the "busy-or-issued" can be passed in to the Read/Write port
         # connecters to give them permission to request access to regfiles
         return fu_bitdict, fu_selected
 
-    def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
+    def connect_rdport(self, m, fu_bitdict, fu_selected,
+                                rdpickers, regfile, regname, fspec):
         comb, sync = m.d.comb, m.d.sync
         fus = self.fus.fus
         regs = self.regs
@@ -421,6 +423,11 @@ class NonProductionCore(ControlBase):
             wv = regs.wv[regfile.lower()]
             wvchk = wv.r_ports["issue"] # write-vec bit-level hazard check
 
+        # if a hazard is detected on this read port, simply blithely block
+        # every FU from reading on it.  this is complete overkill but very
+        # simple for now.
+        hazard_detected = Signal(name="raw_%s_%s" % (regfile, rpidx))
+
         fspecs = fspec
         if not isinstance(fspecs, list):
             fspecs = [fspecs]
@@ -450,23 +457,43 @@ class NonProductionCore(ControlBase):
         wvens = []
 
         for i, fspec in enumerate(fspecs):
-            (rf, wf, read, write, wid, fuspec) = fspec
+            (rf, wf, _read, _write, wid, fuspec) = fspec
             # connect up the FU req/go signals, and the reg-read to the FU
             # and create a Read Broadcast Bus
             for pi, (funame, fu, idx) in enumerate(fuspec):
                 pi += ppoffs[i]
+                name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
+                fu_active = fu_selected[funame]
+                fu_issued = fu_bitdict[funame]
+
+                # get (or set up) a latched copy of read register number
+                rname = "%s_%s_%s_%d" % (funame, regfile, regname, pi)
+                read = Signal.like(_read, name="read_"+name)
+                if rname not in fu.rd_latches:
+                    rdl = Signal.like(_read, name="rdlatch_"+rname)
+                    fu.rd_latches[rname] = rdl
+                    with m.If(fu.issue_i):
+                        sync += rdl.eq(_read)
+                else:
+                    rdl = fu.rd_latches[rname]
+                # latch to make the read immediately available on issue cycle
+                # after the read cycle, use the latched copy
+                with m.If(fu.issue_i):
+                    comb += read.eq(_read)
+                with m.Else():
+                    comb += read.eq(rdl)
 
                 # connect request-read to picker input, and output to go-rd
-                fu_active = fu_bitdict[funame]
-                name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
                 addr_en = Signal.like(read, name="addr_en_"+name)
                 pick = Signal(name="pick_"+name)     # picker input
                 rp = Signal(name="rp_"+name)         # picker output
                 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
+                rhazard = Signal(name="rhaz_"+name)
 
                 # exclude any currently-enabled read-request (mask out active)
                 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
-                                ~delay_pick)
+                                ~delay_pick & ~rhazard)
+                # entirely block anything hazarded from being picked
                 comb += rdpick.i[pi].eq(pick)
                 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
 
@@ -499,12 +526,21 @@ class NonProductionCore(ControlBase):
                 # read the write-hazard bitvector (wv) for any bit that is
                 wvchk_en = Signal(len(wvchk.ren), name="wv_chk_addr_en_"+name)
                 issue_active = Signal(name="rd_iactive_"+name)
-                comb += issue_active.eq(self.instruction_active & rdflags[i])
+                # XXX combinatorial loop here
+                comb += issue_active.eq(fu_active & rf)
                 with m.If(issue_active):
                     if rfile.unary:
                         comb += wvchk_en.eq(read)
                     else:
                         comb += wvchk_en.eq(1<<read)
+                # if FU is busy (which doesn't get set at the same time as
+                # issue) and no hazard was detected, clear wvchk_en (i.e.
+                # stop checking for hazards)
+                with m.If(fu.busy_o & ~rhazard):
+                        comb += wvchk_en.eq(0)
+
+                comb += rhazard.eq((wvchk.o_data & wvchk_en).bool())
+
                 wvens.append(wvchk_en)
 
         # or-reduce the muxed read signals
@@ -523,11 +559,10 @@ class NonProductionCore(ControlBase):
         # enable the read bitvectors for this issued instruction
         # and return whether any write-hazard bit is set
         comb += wvchk.ren.eq(ortreereduce_sig(wvens))
-        hazard_detected = Signal(name="raw_%s_%s" % (regfile, rpidx))
         comb += hazard_detected.eq(wvchk.o_data.bool())
         return hazard_detected
 
-    def connect_rdports(self, m, fu_bitdict):
+    def connect_rdports(self, m, fu_bitdict, fu_selected):
         """connect read ports
 
         orders the read regspecs into a dict-of-dicts, by regfile, by
@@ -567,7 +602,8 @@ class NonProductionCore(ControlBase):
             # also return (and collate) hazard detection)
             for (regname, fspec) in sort_fuspecs(fuspecs):
                 print("connect rd", regname, fspec)
-                rh = self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
+                rh = self.connect_rdport(m, fu_bitdict, fu_selected,
+                                       rdpickers, regfile,
                                        regname, fspec)
                 rd_hazard.append(rh)
 
@@ -640,7 +676,8 @@ class NonProductionCore(ControlBase):
 
         return wvaddr_en, wviaddr_en
 
-    def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec):
+    def connect_wrport(self, m, fu_bitdict, fu_selected,
+                                wrpickers, regfile, regname, fspec):
         comb, sync = m.d.comb, m.d.sync
         fus = self.fus.fus
         regs = self.regs
@@ -707,18 +744,22 @@ class NonProductionCore(ControlBase):
             # these are arbitrated by Data.ok signals
             (rf, wf, read, _write, wid, fuspec) = fspec
             for pi, (funame, fu, idx) in enumerate(fuspec):
+                pi += ppoffs[i]
+                name = "%s_%s_%s_%d" % (funame, regfile, regname, idx)
                 # get (or set up) a write-latched copy of write register number
+                write = Signal.like(_write, name="write_"+name)
                 rname = "%s_%s_%s" % (funame, regfile, regname)
                 if rname not in fu.wr_latches:
-                    write = Signal.like(_write, name="wrlatch_"+rname)
+                    wrl = Signal.like(_write, name="wrlatch_"+rname)
                     fu.wr_latches[rname] = write
                     with m.If(fu.issue_i):
-                        sync += write.eq(_write)
+                        sync += wrl.eq(_write)
+                        comb += write.eq(_write)
+                    with m.Else():
+                        comb += write.eq(wrl)
                 else:
                     write = fu.wr_latches[rname]
 
-                pi += ppoffs[i]
-
                 # write-request comes from dest.ok
                 dest = fu.get_out(idx)
                 fu_dest_latch = fu.get_fu_out(idx)  # latched output
@@ -727,7 +768,7 @@ class NonProductionCore(ControlBase):
                 comb += fu_wrok.eq(dest.ok & fu.busy_o)
 
                 # connect request-write to picker input, and output to go-wr
-                fu_active = fu_bitdict[funame]
+                fu_active = fu_selected[funame]
                 pick = fu.wr.rel_o[idx] & fu_active
                 comb += wrpick.i[pi].eq(pick)
                 # create a single-pulse go write from the picker output
@@ -785,7 +826,7 @@ class NonProductionCore(ControlBase):
         comb += wvset.wen.eq(ortreereduce_sig(wvseten)) # set (issue time)
         comb += wvset.i_data.eq(ortreereduce_sig(wvsets))
 
-    def connect_wrports(self, m, fu_bitdict):
+    def connect_wrports(self, m, fu_bitdict, fu_selected):
         """connect write ports
 
         orders the write regspecs into a dict-of-dicts, by regfile,
@@ -822,7 +863,7 @@ class NonProductionCore(ControlBase):
                         fuspecs['fast1'].append(fuspecs.pop('fast3'))
 
             for (regname, fspec) in sort_fuspecs(fuspecs):
-                self.connect_wrport(m, fu_bitdict, wrpickers,
+                self.connect_wrport(m, fu_bitdict, fu_selected, wrpickers,
                                         regfile, regname, fspec)
 
     def get_byregfiles(self, readmode):