block picker hazard on input to PriorityPicker rather than output
[soc.git] / src / soc / simple / issuer.py
index b13b5ebeaa4a9a09934c6e02a0017c7cc0bfbdd5..671f113f4b00dc4c4dba89462ec0eac8f4e6bb99 100644 (file)
@@ -166,6 +166,10 @@ class TestIssuerInternal(Elaboratable):
         self.regreduce_en = (hasattr(pspec, "regreduce") and
                                             (pspec.regreduce == True))
 
+        # and if overlap requested
+        self.allow_overlap = (hasattr(pspec, "allow_overlap") and
+                                            (pspec.allow_overlap == True))
+
         # JTAG interface.  add this right at the start because if it's
         # added it *modifies* the pspec, by adding enable/disable signals
         # for parts of the rest of the core
@@ -275,12 +279,15 @@ class TestIssuerInternal(Elaboratable):
         # pulse to synchronize the simulator at instruction end
         self.insn_done = Signal()
 
+        # indicate any instruction still outstanding, in execution
+        self.any_busy = Signal()
+
         if self.svp64_en:
             # store copies of predicate masks
             self.srcmask = Signal(64)
             self.dstmask = Signal(64)
 
-    def fetch_fsm(self, m, core, pc, svstate, nia, is_svp64_mode,
+    def fetch_fsm(self, m, dbg, core, pc, svstate, nia, is_svp64_mode,
                         fetch_pc_o_ready, fetch_pc_i_valid,
                         fetch_insn_o_valid, fetch_insn_i_ready):
         """fetch FSM
@@ -301,7 +308,8 @@ class TestIssuerInternal(Elaboratable):
 
             # waiting (zzz)
             with m.State("IDLE"):
-                comb += fetch_pc_o_ready.eq(1)
+                with m.If(~dbg.stopping_o):
+                    comb += fetch_pc_o_ready.eq(1)
                 with m.If(fetch_pc_i_valid):
                     # instruction allowed to go: start by reading the PC
                     # capture the PC and also drop it into Insn Memory
@@ -321,47 +329,55 @@ class TestIssuerInternal(Elaboratable):
 
             # dummy pause to find out why simulation is not keeping up
             with m.State("INSN_READ"):
-                # one cycle later, msr/sv read arrives.  valid only once.
-                with m.If(~msr_read):
-                    sync += msr_read.eq(1) # yeah don't read it again
-                    sync += cur_state.msr.eq(self.state_r_msr.o_data)
-                with m.If(self.imem.f_busy_o): # zzz...
-                    # busy: stay in wait-read
-                    comb += self.imem.a_i_valid.eq(1)
-                    comb += self.imem.f_i_valid.eq(1)
+                if self.allow_overlap:
+                    stopping = dbg.stopping_o
+                else:
+                    stopping = Const(0)
+                with m.If(stopping):
+                    # stopping: jump back to idle
+                    m.next = "IDLE"
                 with m.Else():
-                    # not busy: instruction fetched
-                    insn = get_insn(self.imem.f_instr_o, cur_state.pc)
-                    if self.svp64_en:
-                        svp64 = self.svp64
-                        # decode the SVP64 prefix, if any
-                        comb += svp64.raw_opcode_in.eq(insn)
-                        comb += svp64.bigendian.eq(self.core_bigendian_i)
-                        # pass the decoded prefix (if any) to PowerDecoder2
-                        sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
-                        sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
-                        # remember whether this is a prefixed instruction, so
-                        # the FSM can readily loop when VL==0
-                        sync += is_svp64_mode.eq(svp64.is_svp64_mode)
-                        # calculate the address of the following instruction
-                        insn_size = Mux(svp64.is_svp64_mode, 8, 4)
-                        sync += nia.eq(cur_state.pc + insn_size)
-                        with m.If(~svp64.is_svp64_mode):
-                            # with no prefix, store the instruction
-                            # and hand it directly to the next FSM
+                    # one cycle later, msr/sv read arrives.  valid only once.
+                    with m.If(~msr_read):
+                        sync += msr_read.eq(1) # yeah don't read it again
+                        sync += cur_state.msr.eq(self.state_r_msr.o_data)
+                    with m.If(self.imem.f_busy_o): # zzz...
+                        # busy: stay in wait-read
+                        comb += self.imem.a_i_valid.eq(1)
+                        comb += self.imem.f_i_valid.eq(1)
+                    with m.Else():
+                        # not busy: instruction fetched
+                        insn = get_insn(self.imem.f_instr_o, cur_state.pc)
+                        if self.svp64_en:
+                            svp64 = self.svp64
+                            # decode the SVP64 prefix, if any
+                            comb += svp64.raw_opcode_in.eq(insn)
+                            comb += svp64.bigendian.eq(self.core_bigendian_i)
+                            # pass the decoded prefix (if any) to PowerDecoder2
+                            sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
+                            sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
+                            # remember whether this is a prefixed instruction, 
+                            # so the FSM can readily loop when VL==0
+                            sync += is_svp64_mode.eq(svp64.is_svp64_mode)
+                            # calculate the address of the following instruction
+                            insn_size = Mux(svp64.is_svp64_mode, 8, 4)
+                            sync += nia.eq(cur_state.pc + insn_size)
+                            with m.If(~svp64.is_svp64_mode):
+                                # with no prefix, store the instruction
+                                # and hand it directly to the next FSM
+                                sync += dec_opcode_i.eq(insn)
+                                m.next = "INSN_READY"
+                            with m.Else():
+                                # fetch the rest of the instruction from memory
+                                comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
+                                comb += self.imem.a_i_valid.eq(1)
+                                comb += self.imem.f_i_valid.eq(1)
+                                m.next = "INSN_READ2"
+                        else:
+                            # not SVP64 - 32-bit only
+                            sync += nia.eq(cur_state.pc + 4)
                             sync += dec_opcode_i.eq(insn)
                             m.next = "INSN_READY"
-                        with m.Else():
-                            # fetch the rest of the instruction from memory
-                            comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
-                            comb += self.imem.a_i_valid.eq(1)
-                            comb += self.imem.f_i_valid.eq(1)
-                            m.next = "INSN_READ2"
-                    else:
-                        # not SVP64 - 32-bit only
-                        sync += nia.eq(cur_state.pc + 4)
-                        sync += dec_opcode_i.eq(insn)
-                        m.next = "INSN_READY"
 
             with m.State("INSN_READ2"):
                 with m.If(self.imem.f_busy_o):  # zzz...
@@ -604,12 +620,7 @@ class TestIssuerInternal(Elaboratable):
 
         # note if an exception happened.  in a pipelined or OoO design
         # this needs to be accompanied by "shadowing" (or stalling)
-        el = []
-        for exc in core.fus.excs.values():
-            el.append(exc.happened)
-        exc_happened = Signal()
-        if len(el) > 0: # at least one exception
-            comb += exc_happened.eq(Cat(*el).bool())
+        exc_happened = self.core.o.exc_happened
 
         with m.FSM(name="issue_fsm"):
 
@@ -733,26 +744,26 @@ class TestIssuerInternal(Elaboratable):
 
                         # pass predicate mask bits through to satellite decoders
                         # TODO: for SIMD this will be *multiple* bits
-                        sync += core.sv_pred_sm.eq(self.srcmask[0])
-                        sync += core.sv_pred_dm.eq(self.dstmask[0])
+                        sync += core.i.sv_pred_sm.eq(self.srcmask[0])
+                        sync += core.i.sv_pred_dm.eq(self.dstmask[0])
 
             # after src/dst step have been updated, we are ready
             # to decode the instruction
             with m.State("DECODE_SV"):
                 # decode the instruction
-                sync += core.e.eq(pdecode2.e)
-                sync += core.state.eq(cur_state)
-                sync += core.raw_insn_i.eq(dec_opcode_i)
-                sync += core.bigendian_i.eq(self.core_bigendian_i)
+                sync += core.i.e.eq(pdecode2.e)
+                sync += core.i.state.eq(cur_state)
+                sync += core.i.raw_insn_i.eq(dec_opcode_i)
+                sync += core.i.bigendian_i.eq(self.core_bigendian_i)
                 if self.svp64_en:
-                    sync += core.sv_rm.eq(pdecode2.sv_rm)
+                    sync += core.i.sv_rm.eq(pdecode2.sv_rm)
                     # set RA_OR_ZERO detection in satellite decoders
-                    sync += core.sv_a_nz.eq(pdecode2.sv_a_nz)
+                    sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
                     # and svp64 detection
-                    sync += core.is_svp64_mode.eq(is_svp64_mode)
+                    sync += core.i.is_svp64_mode.eq(is_svp64_mode)
                     # and svp64 bit-rev'd ldst mode
                     ldst_dec = pdecode2.use_svp64_ldst_dec
-                    sync += core.use_svp64_ldst_dec.eq(ldst_dec)
+                    sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
                 # after decoding, reset any previous exception condition,
                 # allowing it to be set again during the next execution
                 sync += pdecode2.ldst_exc.eq(0)
@@ -866,10 +877,8 @@ class TestIssuerInternal(Elaboratable):
         pdecode2 = self.pdecode2
 
         # temporaries
-        core_busy_o = core.busy_o                 # core is busy
-        core_ivalid_i = core.ivalid_i             # instruction is valid
-        core_issue_i = core.issue_i               # instruction is issued
-        insn_type = core.e.do.insn_type           # instruction MicroOp type
+        core_busy_o = core.n.o_data.busy_o # core is busy
+        core_ivalid_i = core.p.i_valid              # instruction is valid
 
         with m.FSM(name="exec_fsm"):
 
@@ -877,16 +886,14 @@ class TestIssuerInternal(Elaboratable):
             with m.State("INSN_START"):
                 comb += exec_insn_o_ready.eq(1)
                 with m.If(exec_insn_i_valid):
-                    comb += core_ivalid_i.eq(1)  # instruction is valid
-                    comb += core_issue_i.eq(1)  # and issued
+                    comb += core_ivalid_i.eq(1)  # instruction is valid/issued
                     sync += sv_changed.eq(0)
                     sync += pc_changed.eq(0)
-                    m.next = "INSN_ACTIVE"  # move to "wait completion"
+                    with m.If(core.p.o_ready): # only move if accepted
+                        m.next = "INSN_ACTIVE"  # move to "wait completion"
 
             # instruction started: must wait till it finishes
             with m.State("INSN_ACTIVE"):
-                with m.If(insn_type != MicrOp.OP_NOP):
-                    comb += core_ivalid_i.eq(1) # instruction is valid
                 # note changes to PC and SVSTATE
                 with m.If(self.state_nia.wen & (1<<StateRegs.SVSTATE)):
                     sync += sv_changed.eq(1)
@@ -989,7 +996,8 @@ class TestIssuerInternal(Elaboratable):
             comb += dbg_rst.eq(ResetSignal())
 
         # busy/halted signals from core
-        comb += self.busy_o.eq(core.busy_o)
+        core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o # core is busy
+        comb += self.busy_o.eq(core_busy_o)
         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
 
         # temporary hack: says "go" immediately for both address gen and ST
@@ -1021,6 +1029,9 @@ class TestIssuerInternal(Elaboratable):
         pc_changed = Signal() # note write to PC
         sv_changed = Signal() # note write to SVSTATE
 
+        # indicate to outside world if any FU is still executing
+        comb += self.any_busy.eq(core.n.o_data.any_busy_o) # any FU executing
+
         # read state either from incoming override or from regfile
         # TODO: really should be doing MSR in the same way
         pc = state_get(m, core_rst, self.pc_i,
@@ -1043,7 +1054,7 @@ class TestIssuerInternal(Elaboratable):
 
         # connect up debug signals
         # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
-        comb += dbg.terminate_i.eq(core.core_terminate_o)
+        comb += dbg.terminate_i.eq(core.o.core_terminate_o)
         comb += dbg.state.pc.eq(pc)
         comb += dbg.state.svstate.eq(svstate)
         comb += dbg.state.msr.eq(cur_state.msr)
@@ -1089,7 +1100,7 @@ class TestIssuerInternal(Elaboratable):
         # Issue is where the VL for-loop # lives.  the ready/valid
         # signalling is used to communicate between the four.
 
-        self.fetch_fsm(m, core, pc, svstate, nia, is_svp64_mode,
+        self.fetch_fsm(m, dbg, core, pc, svstate, nia, is_svp64_mode,
                        fetch_pc_o_ready, fetch_pc_i_valid,
                        fetch_insn_o_valid, fetch_insn_i_ready)