From 821ddb9dcf75c713735adcbe1ff2d23324110cca Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Sun, 21 Nov 2021 21:09:09 +0000 Subject: [PATCH] fixed issue with hazard dependencies, read will nott take place until write dependencies are clear. currently extremely draconian "ban" on entire FU progressing, actually probably all FUs progressing, until write-hazards are clear. can sort out later --- src/soc/regfile/regfiles.py | 4 ++-- src/soc/simple/core.py | 46 +++++++++++++++++++++++-------------- src/soc/simple/core_data.py | 4 +++- 3 files changed, 34 insertions(+), 20 deletions(-) diff --git a/src/soc/regfile/regfiles.py b/src/soc/regfile/regfiles.py index 234c06e7..fc87bee9 100644 --- a/src/soc/regfile/regfiles.py +++ b/src/soc/regfile/regfiles.py @@ -285,9 +285,9 @@ class RegFiles: def make_hazard_vec(self, rf, name): if isinstance(rf, VirtualRegPort): - vec = VirtualRegPort(rf.nregs, rf.nregs, wr2=True, synced=False) + vec = VirtualRegPort(rf.nregs, rf.nregs, wr2=True) else: - vec = VirtualRegPort(rf.depth, rf.depth, wr2=True, synced=False) + vec = VirtualRegPort(rf.depth, rf.depth, wr2=True) # get read/write port specs and create bitvector ports with same names wr_spec, rd_spec = rf.get_port_specs() # ok, this is complicated/fun. diff --git a/src/soc/simple/core.py b/src/soc/simple/core.py index 88803582..fc851d40 100644 --- a/src/soc/simple/core.py +++ b/src/soc/simple/core.py @@ -19,7 +19,8 @@ and consequently it is safer to wait for the Function Unit to complete before allowing a new instruction to proceed. """ -from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux +from nmigen import (Elaboratable, Module, Signal, ResetSignal, Cat, Mux, + Const) from nmigen.cli import rtlil from openpower.decoder.power_decoder2 import PowerDecodeSubset @@ -242,6 +243,7 @@ class NonProductionCore(ControlBase): # indicate if core is busy busy_o = self.o.busy_o + any_busy_o = self.o.any_busy_o # connect up temporary copy of incoming instruction. the FSM will # either blat the incoming instruction (if valid) into self.ireg @@ -302,7 +304,7 @@ class NonProductionCore(ControlBase): # if we don't do this, then when there are no FUs available, # the "p.o_ready" signal will go back "ok we accepted this # instruction" which of course isn't true. - with m.If(~self.issue_conflict & i_pp.en_o): + with m.If(i_pp.en_o): comb += fu_found.eq(1) # for each input, Cat them together and drop them into the picker comb += i_pp.i.eq(Cat(*i_l)) @@ -324,8 +326,6 @@ class NonProductionCore(ControlBase): # always say "ready" except if overridden comb += self.p.o_ready.eq(1) - l_issue_conflict = Signal() - with m.FSM(): with m.State("READY"): with m.If(self.p.i_valid): # run only when valid @@ -349,34 +349,28 @@ class NonProductionCore(ControlBase): # run this FunctionUnit if enabled route op, # issue, busy, read flags and mask to FU - with m.If(enable & ~self.issue_conflict): + with m.If(enable): # operand comes from the *local* decoder comb += fu.oper_i.eq_from(do) comb += fu.issue_i.eq(1) # issue when valid # instruction ok, indicate ready comb += self.p.o_ready.eq(1) - comb += busy_o.eq(1) if self.allow_overlap: with m.If(~fu_found): - comb += self.instr_active.eq(1) # latch copy of instruction sync += ilatch.eq(self.i) - sync += l_issue_conflict.eq( - self.issue_conflict) comb += self.p.o_ready.eq(1) # accept comb += busy_o.eq(1) m.next = "WAITING" with m.State("WAITING"): comb += self.instr_active.eq(1) - with m.If(fu_found): - sync += l_issue_conflict.eq(0) comb += self.p.o_ready.eq(0) comb += busy_o.eq(1) # using copy of instruction, keep waiting until an FU is free comb += self.ireg.eq(ilatch) - with m.If(~l_issue_conflict): # wait for conflict to clear + with m.If(fu_found): # wait for conflict to clear # connect instructions. only one enabled at a time for funame, fu in fus.items(): do = self.des[funame] @@ -393,11 +387,17 @@ class NonProductionCore(ControlBase): m.next = "READY" print ("core: overlap allowed", self.allow_overlap) + busys = map(lambda fu: fu.busy_o, fus.values()) + comb += any_busy_o.eq(Cat(*busys).bool()) if not self.allow_overlap: # for simple non-overlap, if any instruction is busy, set # busy output for core. - busys = map(lambda fu: fu.busy_o, fus.values()) - comb += busy_o.eq(Cat(*busys).bool()) + comb += busy_o.eq(any_busy_o) + else: + # sigh deal with a fun situation that needs to be investigated + # and resolved + with m.If(self.issue_conflict): + comb += busy_o.eq(1) # return both the function unit "enable" dict as well as the "busy". # the "busy-or-issued" can be passed in to the Read/Write port @@ -423,6 +423,11 @@ class NonProductionCore(ControlBase): wv = regs.wv[regfile.lower()] wvchk = wv.r_ports["issue"] # write-vec bit-level hazard check + # if a hazard is detected on this read port, simply blithely block + # every FU from reading on it. this is complete overkill but very + # simple for now. + hazard_detected = Signal(name="raw_%s_%s" % (regfile, rpidx)) + fspecs = fspec if not isinstance(fspecs, list): fspecs = [fspecs] @@ -459,6 +464,7 @@ class NonProductionCore(ControlBase): pi += ppoffs[i] name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi) fu_active = fu_selected[funame] + fu_issued = fu_bitdict[funame] # get (or set up) a latched copy of read register number rname = "%s_%s_%s_%d" % (funame, regfile, regname, pi) @@ -486,7 +492,8 @@ class NonProductionCore(ControlBase): # exclude any currently-enabled read-request (mask out active) comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] & ~delay_pick) - comb += rdpick.i[pi].eq(pick) + # entirely block anything hazarded from being picked + comb += rdpick.i[pi].eq(pick & ~hazard_detected) comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick # if picked, select read-port "reg select" number to port @@ -519,12 +526,18 @@ class NonProductionCore(ControlBase): wvchk_en = Signal(len(wvchk.ren), name="wv_chk_addr_en_"+name) issue_active = Signal(name="rd_iactive_"+name) # XXX combinatorial loop here - comb += issue_active.eq(self.instr_active & rf) + comb += issue_active.eq(fu_active & rf) with m.If(issue_active): if rfile.unary: comb += wvchk_en.eq(read) else: comb += wvchk_en.eq(1<