add write-after-write hazard detection
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Wed, 24 Nov 2021 16:06:26 +0000 (16:06 +0000)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Wed, 24 Nov 2021 16:06:26 +0000 (16:06 +0000)
stall if a WaW hazard is detected, continuing to assert the main decoder
so that the relevant bits of the write-vector can be read.  these
bits are read *one clock behind* which is perfectly fine

src/soc/simple/core.py

index 7d98bb9b097332a0bb71364784deba1968651a91..9f9bed7e8d6e04ce22f32344b434079482e42b45 100644 (file)
@@ -189,9 +189,12 @@ class NonProductionCore(ControlBase):
         regs = self.regs
         fus = self.fus.fus
 
-        # set FU hazards default to 1 (as a test)
+        # amalgamate write-hazards into a single top-level Signal
+        self.waw_hazard = Signal()
+        whaz = []
         for funame, fu in self.fus.fus.items():
-            comb += fu._waw_hazard.eq(1)
+            whaz.append(fu._waw_hazard)
+        comb += self.waw_hazard.eq(Cat(*whaz).bool())
 
         # connect decoders
         self.connect_satellite_decoders(m)
@@ -366,13 +369,17 @@ class NonProductionCore(ControlBase):
                                 # issue, busy, read flags and mask to FU
                                 with m.If(enable):
                                     # operand comes from the *local*  decoder
+                                    # do not actually issue, though, if there
+                                    # is a waw hazard. decoder has to still
+                                    # be asserted in order to detect that, tho
                                     comb += fu.oper_i.eq_from(do)
-                                    comb += fu.issue_i.eq(1) # issue when valid
+                                    # issue when valid (and no write-hazard)
+                                    comb += fu.issue_i.eq(~self.waw_hazard)
                                     # instruction ok, indicate ready
                                     comb += self.p.o_ready.eq(1)
 
                             if self.allow_overlap:
-                                with m.If(~fu_found):
+                                with m.If(~fu_found | self.waw_hazard):
                                     # latch copy of instruction
                                     sync += ilatch.eq(self.i)
                                     comb += self.p.o_ready.eq(1) # accept
@@ -394,12 +401,18 @@ class NonProductionCore(ControlBase):
                         # run this FunctionUnit if enabled route op,
                         # issue, busy, read flags and mask to FU
                         with m.If(enable):
-                            # operand comes from the *local*  decoder
+                            # operand comes from the *local* decoder,
+                            # which is asserted even if not issued,
+                            # so that WaW-detection can check for hazards.
+                            # only if the waw hazard is clear does the
+                            # instruction actually get issued
                             comb += fu.oper_i.eq_from(do)
-                            comb += fu.issue_i.eq(1) # issue when valid
-                            comb += self.p.o_ready.eq(1)
-                            comb += busy_o.eq(0)
-                            m.next = "READY"
+                            # issue when valid
+                            comb += fu.issue_i.eq(~self.waw_hazard)
+                            with m.If(~self.waw_hazard):
+                                comb += self.p.o_ready.eq(1)
+                                comb += busy_o.eq(0)
+                                m.next = "READY"
 
         print ("core: overlap allowed", self.allow_overlap)
         busys = map(lambda fu: fu.busy_o, fus.values())
@@ -411,7 +424,7 @@ class NonProductionCore(ControlBase):
         else:
             # sigh deal with a fun situation that needs to be investigated
             # and resolved
-            with m.If(self.issue_conflict):
+            with m.If(self.issue_conflict | self.waw_hazard):
                 comb += busy_o.eq(1)
 
         # return both the function unit "enable" dict as well as the "busy".
@@ -721,6 +734,7 @@ class NonProductionCore(ControlBase):
             wv = regs.wv[regfile.lower()]
             wvset = wv.w_ports["set"] # write-vec bit-level hazard ctrl
             wvclr = wv.w_ports["clr"] # write-vec bit-level hazard ctrl
+            wvchk = wv.r_ports["whazard"] # write-after-write hazard check
 
         fspecs = fspec
         if not isinstance(fspecs, list):
@@ -763,6 +777,7 @@ class NonProductionCore(ControlBase):
         wvsets = []
         wvseten = []
         wvclren = []
+        wvens = []
         addrs = []
         for i, fspec in enumerate(fspecs):
             # connect up the FU req/go signals and the reg-read to the FU
@@ -791,8 +806,8 @@ class NonProductionCore(ControlBase):
                 # write-request comes from dest.ok
                 dest = fu.get_out(idx)
                 fu_dest_latch = fu.get_fu_out(idx)  # latched output
-                name = "fu_wrok_%s_%s_%d" % (funame, regname, idx)
-                fu_wrok = Signal(name=name, reset_less=True)
+                name = "%s_%s_%d" % (funame, regname, idx)
+                fu_wrok = Signal(name="fu_wrok_"+name, reset_less=True)
                 comb += fu_wrok.eq(dest.ok & fu.busy_o)
 
                 # connect request-write to picker input, and output to go-wr
@@ -836,6 +851,38 @@ class NonProductionCore(ControlBase):
                 wvseten.append(wv_issue_en) # set data same as enable
                 wvsets.append(wv_issue_en)  # because enable needs a 1
 
+                # read the write-hazard bitvector (wv) for any bit that is
+                fu_issue = fu_bitdict[funame]
+                wvchk_en = Signal(len(wvchk.ren), name="waw_chk_addr_en_"+name)
+                issue_active = Signal(name="waw_iactive_"+name)
+                whazard = Signal(name="whaz_"+name)
+                if wf is None:
+                    # XXX EEK! STATE regfile (branch) does not have an
+                    # write-active indicator in regspec_decode_write()
+                    print ("XXX FIXME waw_iactive", issue_active, fu_issue, wf)
+                else:
+                    #comb += issue_active.eq(fu_issue & wf)
+                    comb += issue_active.eq(wf)
+                with m.If(issue_active):
+                    if rfile.unary:
+                        comb += wvchk_en.eq(write)
+                    else:
+                        comb += wvchk_en.eq(1<<write)
+                # if FU is busy (which doesn't get set at the same time as
+                # issue) and no hazard was detected, clear wvchk_en (i.e.
+                # stop checking for hazards).  there is a loop here, but it's
+                # via a DFF, so is ok. some linters may complain, but hey.
+                with m.If(fu.busy_o & ~whazard):
+                        comb += wvchk_en.eq(0)
+
+                # write-hazard is ANDed with (filtered by) what is actually
+                # being requested.
+                comb += whazard.eq((wvchk.o_data & wvchk_en).bool())
+                with m.If(whazard):
+                    comb += fu._waw_hazard.eq(1)
+
+                #wvens.append(wvchk_en)
+
         # here is where we create the Write Broadcast Bus. simple, eh?
         comb += wport.i_data.eq(ortreereduce_sig(wsigs))
         if rfile.unary:
@@ -854,6 +901,10 @@ class NonProductionCore(ControlBase):
         comb += wvset.wen.eq(ortreereduce_sig(wvseten)) # set (issue time)
         comb += wvset.i_data.eq(ortreereduce_sig(wvsets))
 
+        # for write-after-write.  this gets the write vector one cycle
+        # late but that's ok
+        comb += wvchk.ren.eq(-1) # always enable #ortreereduce_sig(wvens))
+
     def connect_wrports(self, m, fu_bitdict, fu_selected):
         """connect write ports