Extract the fetch FSM out from the main FSM
authorCesar Strauss <cestrauss@gmail.com>
Sat, 6 Feb 2021 20:53:22 +0000 (17:53 -0300)
committerCesar Strauss <cestrauss@gmail.com>
Sat, 6 Feb 2021 21:31:10 +0000 (18:31 -0300)
Allows future extension to prefixed instructions, as well as being an
opportunity for pipeline optimization.

At start, the fetch FSM waits for the PC to be stable. This
happens when there is no longer an instruction being executed.
When done, it hands over the instruction to the decoder.

src/soc/simple/issuer.py
src/soc/simple/test/test_issuer.py

index d662dff4c20e7904e6ca7cb3ae2ceaa6f03b6e06..1021fa7abbc1fc90b258d2a396f23860a63f8c14 100644 (file)
@@ -237,34 +237,41 @@ class TestIssuerInternal(Elaboratable):
 
         insn_type = core.e.do.insn_type
 
+        # handshake signals between fetch and decode/execute
+        # fetch FSM can run as soon as the PC is valid
+        fetch_pc_valid_i = Signal()
+        fetch_pc_ready_o = Signal()
+        # when done, deliver the instruction to the next FSM
+        fetch_insn_o = Signal(32, reset_less=True)
+        fetch_insn_valid_o = Signal()
+        fetch_insn_ready_i = Signal()
+
         # actually use a nmigen FSM for the first time (w00t)
         # this FSM is perhaps unusual in that it detects conditions
         # then "holds" information, combinatorially, for the core
         # (as opposed to using sync - which would be on a clock's delay)
         # this includes the actual opcode, valid flags and so on.
-        with m.FSM() as fsm:
+        with m.FSM(name='fetch_fsm'):
 
             # waiting (zzz)
             with m.State("IDLE"):
-                sync += pc_changed.eq(0)
-                sync += core.e.eq(0)
-                sync += core.raw_insn_i.eq(0)
-                sync += core.bigendian_i.eq(0)
                 with m.If(~dbg.core_stop_o & ~core_rst):
-                    # instruction allowed to go: start by reading the PC
-                    # capture the PC and also drop it into Insn Memory
-                    # we have joined a pair of combinatorial memory
-                    # lookups together.  this is Generally Bad.
-                    comb += self.imem.a_pc_i.eq(pc)
-                    comb += self.imem.a_valid_i.eq(1)
-                    comb += self.imem.f_valid_i.eq(1)
-                    sync += cur_state.pc.eq(pc)
-
-                    # initiate read of MSR.  arrives one clock later
-                    comb += self.state_r_msr.ren.eq(1<<StateRegs.MSR)
-                    sync += msr_read.eq(0)
-
-                    m.next = "INSN_READ" # move to "wait for bus" phase
+                    comb += fetch_pc_ready_o.eq(1)
+                    with m.If(fetch_pc_valid_i):
+                        # instruction allowed to go: start by reading the PC
+                        # capture the PC and also drop it into Insn Memory
+                        # we have joined a pair of combinatorial memory
+                        # lookups together.  this is Generally Bad.
+                        comb += self.imem.a_pc_i.eq(pc)
+                        comb += self.imem.a_valid_i.eq(1)
+                        comb += self.imem.f_valid_i.eq(1)
+                        sync += cur_state.pc.eq(pc)
+
+                        # initiate read of MSR.  arrives one clock later
+                        comb += self.state_r_msr.ren.eq(1 << StateRegs.MSR)
+                        sync += msr_read.eq(0)
+
+                        m.next = "INSN_READ"  # move to "wait for bus" phase
                 with m.Else():
                     comb += core.core_stopped_i.eq(1)
                     comb += dbg.core_stopped_i.eq(1)
@@ -286,7 +293,33 @@ class TestIssuerInternal(Elaboratable):
                         insn = f_instr_o
                     else:
                         insn = f_instr_o.word_select(cur_state.pc[2], 32)
-                    comb += dec_opcode_i.eq(insn) # actual opcode
+                    # capture and hold the instruction from memory
+                    sync += fetch_insn_o.eq(insn)
+                    m.next = "INSN_READY"
+
+            with m.State("INSN_READY"):
+                # hand over the instruction, to be decoded
+                comb += fetch_insn_valid_o.eq(1)
+                with m.If(fetch_insn_ready_i):
+                    m.next = "IDLE"
+
+        # decode / issue / execute FSM
+        with m.FSM():
+
+            # go fetch the instruction at the current PC
+            # at this point, there is no instruction running, that
+            # could inadvertently update the PC.
+            with m.State("INSN_FETCH"):
+                comb += fetch_pc_valid_i.eq(1)
+                with m.If(fetch_pc_ready_o):
+                    m.next = "INSN_WAIT"
+
+            # decode the instruction when it arrives
+            with m.State("INSN_WAIT"):
+                comb += fetch_insn_ready_i.eq(1)
+                with m.If(fetch_insn_valid_o):
+                    # decode the instruction
+                    comb += dec_opcode_i.eq(fetch_insn_o)  # actual opcode
                     sync += core.e.eq(pdecode2.e)
                     sync += core.state.eq(cur_state)
                     sync += core.raw_insn_i.eq(dec_opcode_i)
@@ -299,6 +332,7 @@ class TestIssuerInternal(Elaboratable):
             with m.State("INSN_START"):
                 comb += core_ivalid_i.eq(1) # instruction is valid
                 comb += core_issue_i.eq(1)  # and issued
+                sync += pc_changed.eq(0)
 
                 m.next = "INSN_ACTIVE" # move to "wait completion"
 
@@ -318,7 +352,7 @@ class TestIssuerInternal(Elaboratable):
                     sync += core.e.eq(0)
                     sync += core.raw_insn_i.eq(0)
                     sync += core.bigendian_i.eq(0)
-                    m.next = "IDLE" # back to idle
+                    m.next = "INSN_FETCH"  # back to fetch
 
         # this bit doesn't have to be in the FSM: connect up to read
         # regfiles on demand from DMI
index 2a45ec2d898721b18f7d16c0632b45d6899bfc8b..97fa16d472e22ece87344c19138ec5a5607aefac 100644 (file)
@@ -309,7 +309,10 @@ class TestRunner(FHDLTestCase):
                                 (test.name, int_reg, value))
 
         traces = [
-            'clk',  'fsm_state',
+            'clk',
+            {'comment': 'state machines'},
+            'fetch_pc_valid_i', 'fetch_pc_ready_o', 'fetch_fsm_state',
+            'fetch_insn_valid_o', 'fetch_insn_ready_i', 'fsm_state',
             {'comment': 'fetch and decode'},
             'cia[63:0]', 'nia[63:0]', 'pc[63:0]', 'raw_insn_i[31:0]',
             'raw_opcode_in[31:0]', 'insn_type',