sorting out issue hazard conflicts in core.
[soc.git] / src / soc / simple / core.py
index bea7060a87243503faa07fb56598c342e79d8866..021e4fa3bc2483b98c44e518d60616b90af1c81f 100644 (file)
@@ -188,7 +188,8 @@ class NonProductionCore(ControlBase):
         fu_bitdict, fu_selected = self.connect_instruction(m, issue_conflict)
         raw_hazard = self.connect_rdports(m, fu_selected)
         self.connect_wrports(m, fu_selected)
-        comb += issue_conflict.eq(raw_hazard)
+        if self.allow_overlap:
+            comb += issue_conflict.eq(raw_hazard)
 
         # note if an exception happened.  in a pipelined or OoO design
         # this needs to be accompanied by "shadowing" (or stalling)
@@ -247,7 +248,7 @@ class NonProductionCore(ControlBase):
         # or if the instruction could not be delivered, keep dropping the
         # latched copy into ireg
         ilatch = self.ispec()
-        instruction_active = Signal()
+        self.instr_active = Signal()
 
         # enable/busy-signals for each FU, get one bit for each FU (by name)
         fu_enable = Signal(len(fus), reset_less=True)
@@ -290,7 +291,7 @@ class NonProductionCore(ControlBase):
                 fnunit = fu.fnunit.value
                 en_req = Signal(name="issue_en_%s" % funame, reset_less=True)
                 fnmatch = (self.ireg.e.do.fn_unit & fnunit).bool()
-                comb += en_req.eq(fnmatch & ~fu.busy_o & instruction_active)
+                comb += en_req.eq(fnmatch & ~fu.busy_o & self.instr_active)
                 i_l.append(en_req) # store in list for doing the Cat-trick
                 # picker output, gated by enable: store in fu_bitdict
                 po = Signal(name="o_issue_pick_"+funame) # picker output
@@ -305,6 +306,11 @@ class NonProductionCore(ControlBase):
             # for each input, Cat them together and drop them into the picker
             comb += i_pp.i.eq(Cat(*i_l))
 
+        # rdmask, which is for registers needs to come from the *main* decoder
+        for funame, fu in fus.items():
+            rdmask = get_rdflags(self.ireg.e, fu)
+            comb += fu.rdmaskn.eq(~rdmask)
+
         # sigh - need a NOP counter
         counter = Signal(2)
         with m.If(counter != 0):
@@ -316,7 +322,6 @@ class NonProductionCore(ControlBase):
         comb += self.ireg.eq(self.i)
         # always say "ready" except if overridden
         comb += self.p.o_ready.eq(1)
-        comb += instruction_active.eq(1)
 
         l_issue_conflict = Signal()
 
@@ -334,6 +339,7 @@ class NonProductionCore(ControlBase):
                             comb += busy_o.eq(1)
 
                         with m.Default():
+                            comb += self.instr_active.eq(1)
                             comb += self.p.o_ready.eq(0)
                             # connect instructions. only one enabled at a time
                             for funame, fu in fus.items():
@@ -342,28 +348,24 @@ class NonProductionCore(ControlBase):
 
                                 # run this FunctionUnit if enabled route op,
                                 # issue, busy, read flags and mask to FU
-                                with m.If(enable):
+                                with m.If(enable & ~issue_conflict):
                                     # operand comes from the *local*  decoder
                                     comb += fu.oper_i.eq_from(do)
                                     comb += fu.issue_i.eq(1) # issue when valid
-                                    # rdmask, which is for registers,
-                                    # needs to come
-                                    # from the *main* decoder
-                                    rdmask = get_rdflags(self.ireg.e, fu)
-                                    comb += fu.rdmaskn.eq(~rdmask)
                                     # instruction ok, indicate ready
                                     comb += self.p.o_ready.eq(1)
 
-                            with m.If(~fu_found):
-                                # latch copy of instruction
-                                sync += ilatch.eq(self.i)
-                                sync += l_issue_conflict.eq(issue_conflict)
-                                comb += self.p.o_ready.eq(1) # accept
-                                comb += busy_o.eq(1)
-                                m.next = "WAITING"
+                            if self.allow_overlap:
+                                with m.If(~fu_found):
+                                    # latch copy of instruction
+                                    sync += ilatch.eq(self.i)
+                                    sync += l_issue_conflict.eq(issue_conflict)
+                                    comb += self.p.o_ready.eq(1) # accept
+                                    comb += busy_o.eq(1)
+                                    m.next = "WAITING"
 
             with m.State("WAITING"):
-                comb += instruction_active.eq(1)
+                comb += self.instr_active.eq(1)
                 with m.If(fu_found):
                     sync += l_issue_conflict.eq(0)
                 comb += self.p.o_ready.eq(0)
@@ -382,11 +384,6 @@ class NonProductionCore(ControlBase):
                             # operand comes from the *local*  decoder
                             comb += fu.oper_i.eq_from(do)
                             comb += fu.issue_i.eq(1) # issue when valid
-                            # rdmask, which is for registers,
-                            # needs to come
-                            # from the *main* decoder
-                            rdmask = get_rdflags(self.ireg.e, fu)
-                            comb += fu.rdmaskn.eq(~rdmask)
                             comb += self.p.o_ready.eq(1)
                             comb += busy_o.eq(0)
                             m.next = "READY"
@@ -427,7 +424,6 @@ class NonProductionCore(ControlBase):
 
         rdflags = []
         pplen = 0
-        reads = []
         ppoffs = []
         for i, fspec in enumerate(fspecs):
             # get the regfile specs for this regfile port
@@ -439,7 +435,6 @@ class NonProductionCore(ControlBase):
             rdflag = Signal(name=name, reset_less=True)
             comb += rdflag.eq(rf)
             rdflags.append(rdflag)
-            reads.append(read)
 
         print ("pplen", pplen)
 
@@ -452,16 +447,33 @@ class NonProductionCore(ControlBase):
         wvens = []
 
         for i, fspec in enumerate(fspecs):
-            (rf, wf, read, write, wid, fuspec) = fspec
+            (rf, wf, _read, _write, wid, fuspec) = fspec
             # connect up the FU req/go signals, and the reg-read to the FU
             # and create a Read Broadcast Bus
             for pi, (funame, fu, idx) in enumerate(fuspec):
                 pi += ppoffs[i]
+                name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
+                fu_active = fu_bitdict[funame]
+
+                # get (or set up) a latched copy of read register number
+                rname = "%s_%s_%s_%d" % (funame, regfile, regname, pi)
+                read = Signal.like(_read, name="read_"+name)
+                if rname not in fu.rd_latches:
+                    rdl = Signal.like(_read, name="rdlatch_"+rname)
+                    fu.rd_latches[rname] = rdl
+                    with m.If(fu.issue_i):
+                        sync += rdl.eq(_read)
+                else:
+                    rdl = fu.rd_latches[rname]
+                # latch to make the read immediately available on issue cycle
+                # after the read cycle, use the latched copy
+                with m.If(fu.issue_i):
+                    comb += read.eq(_read)
+                with m.Else():
+                    comb += read.eq(rdl)
 
                 # connect request-read to picker input, and output to go-rd
-                fu_active = fu_bitdict[funame]
-                name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
-                addr_en = Signal.like(reads[i], name="addr_en_"+name)
+                addr_en = Signal.like(read, name="addr_en_"+name)
                 pick = Signal(name="pick_"+name)     # picker input
                 rp = Signal(name="rp_"+name)         # picker output
                 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
@@ -475,7 +487,7 @@ class NonProductionCore(ControlBase):
                 # if picked, select read-port "reg select" number to port
                 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
                 sync += delay_pick.eq(rp) # delayed "pick"
-                comb += addr_en.eq(Mux(rp, reads[i], 0))
+                comb += addr_en.eq(Mux(rp, read, 0))
 
                 # the read-enable happens combinatorially (see mux-bus below)
                 # but it results in the data coming out on a one-cycle delay.
@@ -501,12 +513,13 @@ class NonProductionCore(ControlBase):
                 # read the write-hazard bitvector (wv) for any bit that is
                 wvchk_en = Signal(len(wvchk.ren), name="wv_chk_addr_en_"+name)
                 issue_active = Signal(name="rd_iactive_"+name)
-                comb += issue_active.eq(fu.issue_i & rdflags[i])
+                # XXX combinatorial loop here
+                comb += issue_active.eq(self.instr_active & rf)
                 with m.If(issue_active):
                     if rfile.unary:
-                        comb += wvchk_en.eq(reads[i])
+                        comb += wvchk_en.eq(read)
                     else:
-                        comb += wvchk_en.eq(1<<reads[i])
+                        comb += wvchk_en.eq(1<<read)
                 wvens.append(wvchk_en)
 
         # or-reduce the muxed read signals
@@ -708,11 +721,22 @@ class NonProductionCore(ControlBase):
             # connect up the FU req/go signals and the reg-read to the FU
             # these are arbitrated by Data.ok signals
             (rf, wf, read, _write, wid, fuspec) = fspec
-            wrname = "write_%s_%s_%d" % (regfile, regname, i)
-            write = Signal.like(_write, name=wrname)
-            comb += write.eq(_write)
             for pi, (funame, fu, idx) in enumerate(fuspec):
                 pi += ppoffs[i]
+                name = "%s_%s_%s_%d" % (funame, regfile, regname, idx)
+                # get (or set up) a write-latched copy of write register number
+                write = Signal.like(_write, name="write_"+name)
+                rname = "%s_%s_%s" % (funame, regfile, regname)
+                if rname not in fu.wr_latches:
+                    wrl = Signal.like(_write, name="wrlatch_"+rname)
+                    fu.wr_latches[rname] = write
+                    with m.If(fu.issue_i):
+                        sync += wrl.eq(_write)
+                        comb += write.eq(_write)
+                    with m.Else():
+                        comb += write.eq(wrl)
+                else:
+                    write = fu.wr_latches[rname]
 
                 # write-request comes from dest.ok
                 dest = fu.get_out(idx)
@@ -827,26 +851,41 @@ class NonProductionCore(ControlBase):
         fus = self.fus.fus
         e = self.ireg.e # decoded instruction to execute
 
-        # dictionary of lists of regfile ports
-        byregfiles = {}
-        byregfiles_spec = {}
+        # dictionary of dictionaries of lists of regfile ports.
+        # first key: regfile.  second key: regfile port name
+        byregfiles = defaultdict(dict)
+        byregfiles_spec = defaultdict(dict)
+
         for (funame, fu) in fus.items():
+            # create in each FU a receptacle for the read/write register
+            # hazard numbers.  to be latched in connect_rd/write_ports
+            # XXX better that this is moved into the actual FUs, but
+            # the issue there is that this function is actually better
+            # suited at the moment
+            if readmode:
+                fu.rd_latches = {}
+            else:
+                fu.wr_latches = {}
+
             print("%s ports for %s" % (mode, funame))
             for idx in range(fu.n_src if readmode else fu.n_dst):
+                # construct regfile specs: read uses inspec, write outspec
                 if readmode:
                     (regfile, regname, wid) = fu.get_in_spec(idx)
                 else:
                     (regfile, regname, wid) = fu.get_out_spec(idx)
                 print("    %d %s %s %s" % (idx, regfile, regname, str(wid)))
+
+                # the PowerDecoder2 (main one, not the satellites) contains
+                # the decoded regfile numbers. obtain these now
                 if readmode:
                     rdflag, read = regspec_decode_read(e, regfile, regname)
                     wrport, write = None, None
                 else:
                     rdflag, read = None, None
                     wrport, write = regspec_decode_write(e, regfile, regname)
-                if regfile not in byregfiles:
-                    byregfiles[regfile] = {}
-                    byregfiles_spec[regfile] = {}
+
+                # construct the dictionary of regspec information by regfile
                 if regname not in byregfiles_spec[regfile]:
                     byregfiles_spec[regfile][regname] = \
                         (rdflag, wrport, read, write, wid, [])
@@ -857,7 +896,19 @@ class NonProductionCore(ControlBase):
                 byregfiles[regfile][idx].append(fuspec)
                 byregfiles_spec[regfile][regname][5].append(fuspec)
 
-        # ok just print that out, for convenience
+                continue
+                # append a latch Signal to the FU's list of latches
+                rname = "%s_%s" % (regfile, regname)
+                if readmode:
+                    if rname not in fu.rd_latches:
+                        rdl = Signal.like(read, name="rdlatch_"+rname)
+                        fu.rd_latches[rname] = rdl
+                else:
+                    if rname not in fu.wr_latches:
+                        wrl = Signal.like(write, name="wrlatch_"+rname)
+                        fu.wr_latches[rname] = wrl
+
+        # ok just print that all out, for convenience
         for regfile, spec in byregfiles.items():
             print("regfile %s ports:" % mode, regfile)
             fuspecs = byregfiles_spec[regfile]