Move DECODE_SV to its place between MASK_WAIT and INSN_EXECUTE
[soc.git] / src / soc / simple / issuer.py
index cc70996d5b10865b482c19387fc75ffae6a3f86c..9b7ed9c59d4f23a4ecfe37ef9e48137c5d1e98bb 100644 (file)
@@ -77,18 +77,23 @@ def get_predint(m, mask, name):
     this is identical to the equivalent function in ISACaller except that
     it doesn't read the INT directly, it just decodes "what needs to be done"
     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
+
+    * all1s is set to indicate that no mask is to be applied.
+    * regread indicates the GPR register number to be read
+    * invert is set to indicate that the register value is to be inverted
+    * unary indicates that the contents of the register is to be shifted 1<<r3
     """
     comb = m.d.comb
     regread = Signal(5, name=name+"regread")
     invert = Signal(name=name+"invert")
     unary = Signal(name=name+"unary")
+    all1s = Signal(name=name+"all1s")
     with m.Switch(mask):
         with m.Case(SVP64PredInt.ALWAYS.value):
-            comb += regread.eq(0)
-            comb += invert.eq(1)
+            comb += all1s.eq(1)      # use 0b1111 (all ones)
         with m.Case(SVP64PredInt.R3_UNARY.value):
             comb += regread.eq(3)
-            comb += unary.eq(1)
+            comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
         with m.Case(SVP64PredInt.R3.value):
             comb += regread.eq(3)
         with m.Case(SVP64PredInt.R3_N.value):
@@ -104,7 +109,7 @@ def get_predint(m, mask, name):
         with m.Case(SVP64PredInt.R30_N.value):
             comb += regread.eq(30)
             comb += invert.eq(1)
-    return regread, invert, unary
+    return regread, invert, unary, all1s
 
 def get_predcr(m, mask, name):
     """decode SVP64 predicate CR to reg number field and invert status
@@ -153,6 +158,10 @@ class TestIssuerInternal(Elaboratable):
         # test is SVP64 is to be enabled
         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 
+        # and if regfiles are reduced
+        self.regreduce_en = (hasattr(pspec, "regreduce") and
+                                            (pspec.regreduce == True))
+
         # JTAG interface.  add this right at the start because if it's
         # added it *modifies* the pspec, by adding enable/disable signals
         # for parts of the rest of the core
@@ -202,7 +211,8 @@ class TestIssuerInternal(Elaboratable):
         self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
         self.pdecode2 = PowerDecode2(pdecode, state=self.cur_state,
                                      opkls=IssuerDecode2ToOperand,
-                                     svp64_en=self.svp64_en)
+                                     svp64_en=self.svp64_en,
+                                     regreduce_en=self.regreduce_en)
         if self.svp64_en:
             self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
 
@@ -236,9 +246,10 @@ class TestIssuerInternal(Elaboratable):
         self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
         self.xer_r = xerrf.r_ports['full_xer'] # XER read
 
-        # for predication
-        self.int_pred = intrf.r_ports['pred'] # INT predicate read
-        self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
+        if self.svp64_en:
+            # for predication
+            self.int_pred = intrf.r_ports['pred'] # INT predicate read
+            self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
 
         # hack method of keeping an eye on whether branch/trap set the PC
         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
@@ -348,7 +359,8 @@ class TestIssuerInternal(Elaboratable):
                     # here or maybe even in INSN_READ state, if svp64_mode
                     # detected, in order to trigger - and wait for - the
                     # predicate reading.
-                    pmode = pdecode2.rm_dec.predmode
+                    if self.svp64_en:
+                        pmode = pdecode2.rm_dec.predmode
                     """
                     if pmode != SVP64PredMode.ALWAYS.value:
                         fire predicate loading FSM and wait before
@@ -378,6 +390,8 @@ class TestIssuerInternal(Elaboratable):
         later, a faster way would be to use the 32-bit-wide CR port but
         this is more complex decoding, here.  equivalent code used in
         ISACaller is "from soc.decoder.isa.caller import get_predcr"
+
+        note: this ENTIRE FSM is not to be called when svp64 is disabled
         """
         comb = m.d.comb
         sync = m.d.sync
@@ -406,8 +420,8 @@ class TestIssuerInternal(Elaboratable):
         #               comd += self.srcmask[cr_idx].eq(inv ^ cr_bit)
 
         # decode predicates
-        sregread, sinvert, sunary = get_predint(m, srcpred, 's')
-        dregread, dinvert, dunary = get_predint(m, dstpred, 'd')
+        sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
+        dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
         sidx, scrinvert = get_predcr(m, srcpred, 's')
         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 
@@ -418,7 +432,7 @@ class TestIssuerInternal(Elaboratable):
                 with m.If(pred_insn_valid_i):
                     with m.If(predmode == SVP64PredMode.INT):
                         # skip fetching destination mask register, when zero
-                        with m.If(dregread == 0):
+                        with m.If(dall1s):
                             sync += self.dstmask.eq(-1)
                             # directly go to fetch source mask register
                             # guaranteed not to be zero (otherwise predmode
@@ -441,7 +455,7 @@ class TestIssuerInternal(Elaboratable):
                 inv = Repl(dinvert, 64)
                 sync += self.dstmask.eq(self.int_pred.data_o ^ inv)
                 # skip fetching source mask register, when zero
-                with m.If(sregread == 0):
+                with m.If(sall1s):
                     sync += self.srcmask.eq(-1)
                     m.next = "FETCH_PRED_DONE"
                 # fetch source predicate register
@@ -527,17 +541,10 @@ class TestIssuerInternal(Elaboratable):
                         comb += update_svstate.eq(1)
                         sync += sv_changed.eq(1)
 
-            # decode the instruction when it arrives
+            # wait for an instruction to arrive from Fetch
             with m.State("INSN_WAIT"):
                 comb += fetch_insn_ready_i.eq(1)
                 with m.If(fetch_insn_valid_o):
-                    # decode the instruction
-                    sync += core.e.eq(pdecode2.e)
-                    sync += core.state.eq(cur_state)
-                    sync += core.raw_insn_i.eq(dec_opcode_i)
-                    sync += core.bigendian_i.eq(self.core_bigendian_i)
-                    # set RA_OR_ZERO detection in satellite decoders
-                    sync += core.sv_a_nz.eq(pdecode2.sv_a_nz)
                     # loop into ISSUE_START if it's a SVP64 instruction
                     # and VL == 0.  this because VL==0 is a for-loop
                     # from 0 to 0 i.e. always, always a NOP.
@@ -551,7 +558,10 @@ class TestIssuerInternal(Elaboratable):
                         comb += self.insn_done.eq(1)
                         m.next = "ISSUE_START"
                     with m.Else():
-                        m.next = "PRED_START"  # start fetching the predicate
+                        if self.svp64_en:
+                            m.next = "PRED_START"  # start fetching predicate
+                        else:
+                            m.next = "DECODE_SV"  # skip predication
 
             with m.State("PRED_START"):
                 comb += pred_insn_valid_i.eq(1)  # tell fetch_pred to start
@@ -561,43 +571,56 @@ class TestIssuerInternal(Elaboratable):
             with m.State("MASK_WAIT"):
                 comb += pred_mask_ready_i.eq(1) # ready to receive the masks
                 with m.If(pred_mask_valid_o): # predication masks are ready
-                    m.next = "INSN_EXECUTE"
-
-            # handshake with execution FSM, move to "wait" once acknowledged
-            with m.State("INSN_EXECUTE"):
-                # with m.If(is_svp64_mode):
-                #    TODO advance src/dst step to "skip" over predicated-out
-                #    from self.srcmask and self.dstmask
-                #    https://bugs.libre-soc.org/show_bug.cgi?id=617#c3
-                #    but still without exceeding VL in either case
-                # IMPORTANT: when changing src/dest step, have to
-                # jump to m.next = "DECODE_SV" to deal with the change in
-                # SVSTATE
-
-                with m.If(is_svp64_mode):
+                    # with m.If(is_svp64_mode):
+                    #    TODO advance src/dst step to "skip" over predicated-out
+                    #    from self.srcmask and self.dstmask
+                    #    https://bugs.libre-soc.org/show_bug.cgi?id=617#c3
+                    #    but still without exceeding VL in either case
+                    # IMPORTANT: when changing src/dest step, have to
+                    # jump to m.next = "DECODE_SV" to deal with the change in
+                    # SVSTATE
+
+                    with m.If(is_svp64_mode):
+                        if self.svp64_en:
+                            pred_src_zero = pdecode2.rm_dec.pred_sz
+                            pred_dst_zero = pdecode2.rm_dec.pred_dz
+
+                        """
+                        TODO: actually, can use
+                        PriorityEncoder(self.srcmask | (1<<cur_srcstep))
+
+                        if not pred_src_zero:
+                            if (((1<<cur_srcstep) & self.srcmask) == 0) and
+                                  (cur_srcstep != vl):
+                                comb += update_svstate.eq(1)
+                                comb += new_svstate.srcstep.eq(next_srcstep)
 
-                    pred_src_zero = pdecode2.rm_dec.pred_sz
-                    pred_dst_zero = pdecode2.rm_dec.pred_dz
+                        if not pred_dst_zero:
+                            if (((1<<cur_dststep) & self.dstmask) == 0) and
+                                  (cur_dststep != vl):
+                                comb += new_svstate.dststep.eq(next_dststep)
+                                comb += update_svstate.eq(1)
 
-                    """
-                    if not pred_src_zero:
-                        if (((1<<cur_srcstep) & self.srcmask) == 0) and
-                              (cur_srcstep != vl):
-                            comb += update_svstate.eq(1)
-                            comb += new_svstate.srcstep.eq(next_srcstep)
-                            sync += sv_changed.eq(1)
+                        if update_svstate:
+                            m.next = "DECODE_SV"
+                        """
 
-                    if not pred_dst_zero:
-                        if (((1<<cur_dststep) & self.dstmask) == 0) and
-                              (cur_dststep != vl):
-                            comb += new_svstate.dststep.eq(next_dststep)
-                            comb += update_svstate.eq(1)
-                            sync += sv_changed.eq(1)
+                    m.next = "DECODE_SV"
 
-                    if update_svstate:
-                        m.next = "DECODE_SV"
-                    """
+            # after src/dst step have been updated, we are ready
+            # to decode the instruction
+            with m.State("DECODE_SV"):
+                # decode the instruction
+                sync += core.e.eq(pdecode2.e)
+                sync += core.state.eq(cur_state)
+                sync += core.raw_insn_i.eq(dec_opcode_i)
+                sync += core.bigendian_i.eq(self.core_bigendian_i)
+                # set RA_OR_ZERO detection in satellite decoders
+                sync += core.sv_a_nz.eq(pdecode2.sv_a_nz)
+                m.next = "INSN_EXECUTE"  # move to "execute"
 
+            # handshake with execution FSM, move to "wait" once acknowledged
+            with m.State("INSN_EXECUTE"):
                 comb += exec_insn_valid_i.eq(1) # trigger execute
                 with m.If(exec_insn_ready_o):   # execute acknowledged us
                     m.next = "EXECUTE_WAIT"
@@ -658,17 +681,6 @@ class TestIssuerInternal(Elaboratable):
                         comb += update_svstate.eq(1)
                         sync += sv_changed.eq(1)
 
-            # need to decode the instruction again, after updating SRCSTEP
-            # in the previous state.
-            # mostly a copy of INSN_WAIT, but without the actual wait
-            with m.State("DECODE_SV"):
-                # decode the instruction
-                sync += core.e.eq(pdecode2.e)
-                sync += core.state.eq(cur_state)
-                sync += core.bigendian_i.eq(self.core_bigendian_i)
-                sync += core.sv_a_nz.eq(pdecode2.sv_a_nz)
-                m.next = "INSN_EXECUTE"  # move to "execute"
-
         # check if svstate needs updating: if so, write it to State Regfile
         with m.If(update_svstate):
             comb += self.state_w_sv.wen.eq(1<<StateRegs.SVSTATE)
@@ -895,9 +907,10 @@ class TestIssuerInternal(Elaboratable):
                        exec_insn_valid_i, exec_insn_ready_o,
                        exec_pc_valid_o, exec_pc_ready_i)
 
-        self.fetch_predicate_fsm(m,
-                                 pred_insn_valid_i, pred_insn_ready_o,
-                                 pred_mask_valid_o, pred_mask_ready_i)
+        if self.svp64_en:
+            self.fetch_predicate_fsm(m,
+                                     pred_insn_valid_i, pred_insn_ready_o,
+                                     pred_mask_valid_o, pred_mask_ready_i)
 
         self.execute_fsm(m, core, pc_changed, sv_changed,
                          exec_insn_valid_i, exec_insn_ready_o,