fixed issue with hazard dependencies, read will nott
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Sun, 21 Nov 2021 21:09:09 +0000 (21:09 +0000)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Sun, 21 Nov 2021 21:10:53 +0000 (21:10 +0000)
take place until write dependencies are clear.  currently extremely
draconian "ban" on entire FU progressing, actually probably all FUs
progressing, until write-hazards are clear.  can sort out later

src/soc/regfile/regfiles.py
src/soc/simple/core.py
src/soc/simple/core_data.py

index 234c06e7c3817d4627a28f8add1fa5d6aa88eb10..fc87bee924fff5cb75af431e9033a988ccef2259 100644 (file)
@@ -285,9 +285,9 @@ class RegFiles:
 
     def make_hazard_vec(self, rf, name):
         if isinstance(rf, VirtualRegPort):
-            vec = VirtualRegPort(rf.nregs, rf.nregs, wr2=True, synced=False)
+            vec = VirtualRegPort(rf.nregs, rf.nregs, wr2=True)
         else:
-            vec = VirtualRegPort(rf.depth, rf.depth, wr2=True, synced=False)
+            vec = VirtualRegPort(rf.depth, rf.depth, wr2=True)
         # get read/write port specs and create bitvector ports with same names
         wr_spec, rd_spec = rf.get_port_specs()
         # ok, this is complicated/fun.
index 88803582a7871209075d8f9d07aec13d65328e80..fc851d40e3a9303c288c8af820efed45ce606723 100644 (file)
@@ -19,7 +19,8 @@ and consequently it is safer to wait for the Function Unit to complete
 before allowing a new instruction to proceed.
 """
 
-from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
+from nmigen import (Elaboratable, Module, Signal, ResetSignal, Cat, Mux,
+                    Const)
 from nmigen.cli import rtlil
 
 from openpower.decoder.power_decoder2 import PowerDecodeSubset
@@ -242,6 +243,7 @@ class NonProductionCore(ControlBase):
 
         # indicate if core is busy
         busy_o = self.o.busy_o
+        any_busy_o = self.o.any_busy_o
 
         # connect up temporary copy of incoming instruction. the FSM will
         # either blat the incoming instruction (if valid) into self.ireg
@@ -302,7 +304,7 @@ class NonProductionCore(ControlBase):
                 # if we don't do this, then when there are no FUs available,
                 # the "p.o_ready" signal will go back "ok we accepted this
                 # instruction" which of course isn't true.
-                with m.If(~self.issue_conflict & i_pp.en_o):
+                with m.If(i_pp.en_o):
                     comb += fu_found.eq(1)
             # for each input, Cat them together and drop them into the picker
             comb += i_pp.i.eq(Cat(*i_l))
@@ -324,8 +326,6 @@ class NonProductionCore(ControlBase):
         # always say "ready" except if overridden
         comb += self.p.o_ready.eq(1)
 
-        l_issue_conflict = Signal()
-
         with m.FSM():
             with m.State("READY"):
                 with m.If(self.p.i_valid): # run only when valid
@@ -349,34 +349,28 @@ class NonProductionCore(ControlBase):
 
                                 # run this FunctionUnit if enabled route op,
                                 # issue, busy, read flags and mask to FU
-                                with m.If(enable & ~self.issue_conflict):
+                                with m.If(enable):
                                     # operand comes from the *local*  decoder
                                     comb += fu.oper_i.eq_from(do)
                                     comb += fu.issue_i.eq(1) # issue when valid
                                     # instruction ok, indicate ready
                                     comb += self.p.o_ready.eq(1)
-                                    comb += busy_o.eq(1)
 
                             if self.allow_overlap:
                                 with m.If(~fu_found):
-                                    comb += self.instr_active.eq(1)
                                     # latch copy of instruction
                                     sync += ilatch.eq(self.i)
-                                    sync += l_issue_conflict.eq(
-                                                      self.issue_conflict)
                                     comb += self.p.o_ready.eq(1) # accept
                                     comb += busy_o.eq(1)
                                     m.next = "WAITING"
 
             with m.State("WAITING"):
                 comb += self.instr_active.eq(1)
-                with m.If(fu_found):
-                    sync += l_issue_conflict.eq(0)
                 comb += self.p.o_ready.eq(0)
                 comb += busy_o.eq(1)
                 # using copy of instruction, keep waiting until an FU is free
                 comb += self.ireg.eq(ilatch)
-                with m.If(~l_issue_conflict): # wait for conflict to clear
+                with m.If(fu_found): # wait for conflict to clear
                     # connect instructions. only one enabled at a time
                     for funame, fu in fus.items():
                         do = self.des[funame]
@@ -393,11 +387,17 @@ class NonProductionCore(ControlBase):
                             m.next = "READY"
 
         print ("core: overlap allowed", self.allow_overlap)
+        busys = map(lambda fu: fu.busy_o, fus.values())
+        comb += any_busy_o.eq(Cat(*busys).bool())
         if not self.allow_overlap:
             # for simple non-overlap, if any instruction is busy, set
             # busy output for core.
-            busys = map(lambda fu: fu.busy_o, fus.values())
-            comb += busy_o.eq(Cat(*busys).bool())
+            comb += busy_o.eq(any_busy_o)
+        else:
+            # sigh deal with a fun situation that needs to be investigated
+            # and resolved
+            with m.If(self.issue_conflict):
+                comb += busy_o.eq(1)
 
         # return both the function unit "enable" dict as well as the "busy".
         # the "busy-or-issued" can be passed in to the Read/Write port
@@ -423,6 +423,11 @@ class NonProductionCore(ControlBase):
             wv = regs.wv[regfile.lower()]
             wvchk = wv.r_ports["issue"] # write-vec bit-level hazard check
 
+        # if a hazard is detected on this read port, simply blithely block
+        # every FU from reading on it.  this is complete overkill but very
+        # simple for now.
+        hazard_detected = Signal(name="raw_%s_%s" % (regfile, rpidx))
+
         fspecs = fspec
         if not isinstance(fspecs, list):
             fspecs = [fspecs]
@@ -459,6 +464,7 @@ class NonProductionCore(ControlBase):
                 pi += ppoffs[i]
                 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
                 fu_active = fu_selected[funame]
+                fu_issued = fu_bitdict[funame]
 
                 # get (or set up) a latched copy of read register number
                 rname = "%s_%s_%s_%d" % (funame, regfile, regname, pi)
@@ -486,7 +492,8 @@ class NonProductionCore(ControlBase):
                 # exclude any currently-enabled read-request (mask out active)
                 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
                                 ~delay_pick)
-                comb += rdpick.i[pi].eq(pick)
+                # entirely block anything hazarded from being picked
+                comb += rdpick.i[pi].eq(pick & ~hazard_detected)
                 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
 
                 # if picked, select read-port "reg select" number to port
@@ -519,12 +526,18 @@ class NonProductionCore(ControlBase):
                 wvchk_en = Signal(len(wvchk.ren), name="wv_chk_addr_en_"+name)
                 issue_active = Signal(name="rd_iactive_"+name)
                 # XXX combinatorial loop here
-                comb += issue_active.eq(self.instr_active & rf)
+                comb += issue_active.eq(fu_active & rf)
                 with m.If(issue_active):
                     if rfile.unary:
                         comb += wvchk_en.eq(read)
                     else:
                         comb += wvchk_en.eq(1<<read)
+                # if FU is busy (which doesn't get set at the same time as
+                # issue) and no hazard was detected, clear wvchk_en (i.e.
+                # stop checking for hazards)
+                with m.If(fu.busy_o & ~hazard_detected):
+                        comb += wvchk_en.eq(0)
+
                 wvens.append(wvchk_en)
 
         # or-reduce the muxed read signals
@@ -543,7 +556,6 @@ class NonProductionCore(ControlBase):
         # enable the read bitvectors for this issued instruction
         # and return whether any write-hazard bit is set
         comb += wvchk.ren.eq(ortreereduce_sig(wvens))
-        hazard_detected = Signal(name="raw_%s_%s" % (regfile, rpidx))
         comb += hazard_detected.eq(wvchk.o_data.bool())
         return hazard_detected
 
index 4146d189ce3eb18a3d9dc0a6adfaff92750ecd39..109fa8520c4c7e59b3c6c80b7442d0f4f6a78b01 100644 (file)
@@ -99,12 +99,14 @@ class CoreOutput:
     def __init__(self):
         # start/stop and terminated signalling
         self.core_terminate_o = Signal()  # indicates stopped
-        self.busy_o = Signal(name="corebusy_o")  # at least one ALU busy
+        self.busy_o = Signal(name="corebusy_o")  # ALU is busy, no input
+        self.any_busy_o = Signal(name="any_busy_o")  # at least one ALU busy
         self.exc_happened = Signal()             # exception happened
 
     def eq(self, i):
         return [self.core_terminate_o.eq(i.core_terminate_o),
                 self.busy_o.eq(i.busy_o),
+                self.any_busy_o.eq(i.any_busy_o),
                 self.exc_happened.eq(i.exc_happened),
                ]