add inner sub-loop testing from svstep Rc=1
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Tue, 20 Jul 2021 13:06:04 +0000 (14:06 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Tue, 20 Jul 2021 13:06:04 +0000 (14:06 +0100)
src/openpower/decoder/isa/caller.py
src/openpower/decoder/isa/remap_fft_yield.py
src/openpower/decoder/isa/test_caller_setvl.py

index 7206e108c69e781b76ed0b73eed557f0e4b76918..ab0f8ce99dec27453e080e6f21a1d0ef695ed6d7 100644 (file)
@@ -1019,6 +1019,41 @@ class ISACaller:
                 asmop = 'mtcrf'
         return asmop
 
+    def get_remap_indices(self):
+        # go through all iterators in lock-step, advance to next remap_idx
+        srcstep, dststep = self.new_srcstep, self.new_dststep
+        # get four SVSHAPEs. here we are hard-coding
+        SVSHAPE0 = self.spr['SVSHAPE0']
+        SVSHAPE1 = self.spr['SVSHAPE1']
+        SVSHAPE2 = self.spr['SVSHAPE2']
+        SVSHAPE3 = self.spr['SVSHAPE3']
+        # set up the iterators
+        remaps = [(SVSHAPE0, SVSHAPE0.get_iterator()),
+                  (SVSHAPE1, SVSHAPE1.get_iterator()),
+                  (SVSHAPE2, SVSHAPE2.get_iterator()),
+                  (SVSHAPE3, SVSHAPE3.get_iterator()),
+                 ]
+
+        remap_idxs = []
+        self.remap_loopends = [0] * 4
+        dbg = []
+        for i, (shape, remap) in enumerate(remaps):
+            # zero is "disabled"
+            if shape.value == 0x0:
+                remap_idxs.append(0)
+            # pick src or dststep depending on reg num (0-2=in, 3-4=out)
+            step = dststep if (i in [3, 4]) else srcstep
+            # this is terrible.  O(N^2) looking for the match. but hey.
+            for idx, (remap_idx, loopends) in enumerate(remap):
+                if idx == step:
+                    break
+            remap_idxs.append(remap_idx)
+            self.remap_loopends[i] = loopends
+            dbg.append((i, step, remap_idx, loopends))
+        for (i, step, remap_idx, loopends) in dbg:
+            log ("SVSHAPE %d idx, end" % i, step, remap_idx, bin(loopends))
+        return remap_idxs, remaps
+
     def get_spr_msb(self):
         dec_insn = yield self.dec2.e.do.insn
         return dec_insn & (1 << 20) != 0  # sigh - XFF.spr[-1]?
@@ -1169,11 +1204,6 @@ class ISACaller:
         yield self.dec2.remap_active.eq(remap_en if active else 0)
         yield Settle()
         if self.is_svp64_mode and (persist or self.last_op_svshape):
-            # get four SVSHAPEs. here we are hard-coding
-            SVSHAPE0 = self.spr['SVSHAPE0']
-            SVSHAPE1 = self.spr['SVSHAPE1']
-            SVSHAPE2 = self.spr['SVSHAPE2']
-            SVSHAPE3 = self.spr['SVSHAPE3']
             # just some convenient debug info
             for i in range(4):
                 sname = 'SVSHAPE%d' % i
@@ -1195,28 +1225,7 @@ class ISACaller:
                      (self.dec2.o_step, mo0),   # RT
                      (self.dec2.o2_step, mo1),   # EA
                     ]
-            # set up the iterators
-            remaps = [(SVSHAPE0, SVSHAPE0.get_iterator()),
-                      (SVSHAPE1, SVSHAPE1.get_iterator()),
-                      (SVSHAPE2, SVSHAPE2.get_iterator()),
-                      (SVSHAPE3, SVSHAPE3.get_iterator()),
-                     ]
-            # go through all iterators in lock-step, advance to next remap_idx
-            remap_idxs = []
-            self.remap_loopends = []
-            for i, (shape, remap) in enumerate(remaps):
-                # zero is "disabled"
-                if shape.value == 0x0:
-                    remap_idxs.append(0)
-                # pick src or dststep depending on reg num (0-2=in, 3-4=out)
-                step = dststep if (i in [3, 4]) else srcstep
-                # this is terrible.  O(N^2) looking for the match. but hey.
-                for idx, (remap_idx, loopends) in enumerate(remap):
-                    if idx == step:
-                        break
-                remap_idxs.append(remap_idx)
-                self.remap_loopends.append(loopends)
-
+            remap_idxs, remaps = self.get_remap_indices()
             rremaps = []
             # now cross-index the required SHAPE for each of 3-in 2-out regs
             rnames = ['RA', 'RB', 'RC', 'RT', 'EA']
@@ -1230,7 +1239,7 @@ class ISACaller:
                 yield dstep.eq(remap_idx)
 
                 # debug printout info
-                rremaps.append((shape.mode, i, rnames[i], step, shape_idx,
+                rremaps.append((shape.mode, i, rnames[i], shape_idx,
                                 remap_idx))
             for x in rremaps:
                 log ("shape remap", x)
@@ -1471,6 +1480,7 @@ class ISACaller:
             else:
                 log ("SVSTATE_NEXT: post-inc")
                 srcstep, dststep = self.new_srcstep, self.new_dststep
+                remap_idxs, remaps = self.get_remap_indices()
                 vl = self.svstate.vl
                 end_src = srcstep == vl-1
                 end_dst = dststep == vl-1
@@ -1490,7 +1500,8 @@ class ISACaller:
                     # see if svstep was requested, if so, which SVSTATE
                     endings = 0b111
                     if self.svstate_next_mode > 0:
-                        endings = self.remap_loopends[self.svstate_nextmode-1]
+                        shape_idx = self.svstate_next_mode.value-1
+                        endings = self.remap_loopends[shape_idx]
                     cr_field = SelectableInt((~endings)<<1 | endtest, 4)
                     print ("svstep Rc=1, CR0", cr_field)
                     self.crl[0].eq(cr_field) # CR0
index ea9421b5e92df16e49f2d035ed80bfa361474fc2..422c2187867bba75c5a33d395e74d2d1081199d1 100644 (file)
@@ -82,7 +82,7 @@ def iterate_butterfly_indices(SVSHAPE):
 
 def demo():
     # set the dimension sizes here
-    xdim = 16
+    xdim = 8
     ydim = 0 # not needed
     zdim = 0 # again, not needed
 
index 2706332c1ad4a9b5915c5fc90a3099cdd4ae3c5c..d1adae8c26878286fd30232263cb9c4d7747492f 100644 (file)
@@ -23,7 +23,7 @@ class DecoderTestCase(FHDLTestCase):
         for i in range(32):
             self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
 
-    def test__svstep_1(self):
+    def test_svstep_1(self):
         lst = SVP64Asm(["setvl 0, 0, 10, 1, 1, 1", # actual setvl (VF mode)
                         "setvl 0, 0, 1, 1, 0, 0", # svstep
                         "setvl 0, 0, 1, 1, 0, 0" # svstep
@@ -52,7 +52,7 @@ class DecoderTestCase(FHDLTestCase):
             print("      gpr1", sim.gpr(0))
             self.assertEqual(sim.gpr(0), SelectableInt(0, 64))
 
-    def test__svstep_2(self):
+    def test_svstep_2(self):
         """tests svstep when it reaches VL
         """
         lst = SVP64Asm(["setvl 0, 0, 2, 1, 1, 1",  # actual setvl (VF mode)
@@ -90,7 +90,7 @@ class DecoderTestCase(FHDLTestCase):
             self.assertEqual(CR0[CRFields.GT], 0)
             self.assertEqual(CR0[CRFields.SO], 1)
 
-    def test__svstep_3(self):
+    def test_svstep_3(self):
         """tests svstep when it *doesn't* reach VL
         """
         lst = SVP64Asm(["setvl 0, 0, 3, 1, 1, 1",  # actual setvl (VF mode)
@@ -129,7 +129,7 @@ class DecoderTestCase(FHDLTestCase):
             self.assertEqual(CR0[CRFields.SO], 0)
 
 
-    def test__setvl_1(self):
+    def test_setvl_1(self):
         """straight setvl, testing if VL and MVL are over-ridden
         """
         lst = SVP64Asm(["setvl 1, 0, 10, 0, 1, 1",
@@ -153,7 +153,134 @@ class DecoderTestCase(FHDLTestCase):
             print("      gpr1", sim.gpr(1))
             self.assertEqual(sim.gpr(1), SelectableInt(10, 64))
 
-    def test__sv_add(self):
+    def test_svstep_inner_loop_6(self):
+        """tests svstep inner loop, running 6 times, looking for "k"
+        """
+        lst = SVP64Asm([
+                        # set triple butterfly mode with persistent "REMAP"
+                        "svshape 8, 1, 1, 1, 1",
+                        "svremap 31, 1, 0, 2, 0, 1, 1",
+                        "setvl. 0, 0, 2, 1, 0, 0",# svstep (Rc=1)
+                        "setvl. 0, 0, 2, 1, 0, 0",# svstep (Rc=1)
+                        "setvl. 0, 0, 2, 1, 0, 0",# svstep (Rc=1)
+                        "setvl. 0, 0, 2, 1, 0, 0",# svstep (Rc=1)
+                        "setvl. 0, 0, 2, 1, 0, 0",# svstep (Rc=1)
+                        "setvl. 0, 0, 2, 1, 0, 0",# svstep (Rc=1)
+                        ])
+        lst = list(lst)
+
+        # SVSTATE
+        svstate = SVP64State()
+        #svstate.vl = 2 # VL
+        #svstate.maxvl = 2 # MAXVL
+        print ("SVSTATE", bin(svstate.asint()))
+
+        with Program(lst, bigendian=False) as program:
+            sim = self.run_tst_program(program, svstate=svstate)
+            print ("SVSTATE after", bin(sim.svstate.asint()))
+            print ("        vl", bin(sim.svstate.vl))
+            print ("        mvl", bin(sim.svstate.maxvl))
+            print ("    srcstep", bin(sim.svstate.srcstep))
+            print ("    dststep", bin(sim.svstate.dststep))
+            print ("     vfirst", bin(sim.svstate. vfirst))
+            self.assertEqual(sim.svstate.vl, 12)
+            self.assertEqual(sim.svstate.maxvl, 12)
+            # svstep called twice, didn't reach VL, so srcstep/dststep both 2
+            self.assertEqual(sim.svstate.srcstep, 6)
+            self.assertEqual(sim.svstate.dststep, 6)
+            self.assertEqual(sim.gpr(0), SelectableInt(0, 64))
+            self.assertEqual(sim.svstate.vfirst, 1)
+            CR0 = sim.crl[0]
+            print("      CR0", bin(CR0.get_range().value))
+            self.assertEqual(CR0[CRFields.EQ], 0)
+            self.assertEqual(CR0[CRFields.LT], 1)
+            self.assertEqual(CR0[CRFields.GT], 1)
+            self.assertEqual(CR0[CRFields.SO], 0)
+
+    def test_svstep_inner_loop_3(self):
+        """tests svstep inner loop, running 3 times
+        """
+        lst = SVP64Asm([
+                        # set triple butterfly mode with persistent "REMAP"
+                        "svshape 8, 1, 1, 1, 1",
+                        "svremap 31, 1, 0, 2, 0, 1, 1",
+                        "setvl. 0, 0, 2, 1, 0, 0",# svstep (Rc=1)
+                        "setvl. 0, 0, 2, 1, 0, 0",# svstep (Rc=1)
+                        "setvl. 0, 0, 2, 1, 0, 0", # svstep (Rc=1)
+                        ])
+        lst = list(lst)
+
+        # SVSTATE
+        svstate = SVP64State()
+        #svstate.vl = 2 # VL
+        #svstate.maxvl = 2 # MAXVL
+        print ("SVSTATE", bin(svstate.asint()))
+
+        with Program(lst, bigendian=False) as program:
+            sim = self.run_tst_program(program, svstate=svstate)
+            print ("SVSTATE after", bin(sim.svstate.asint()))
+            print ("        vl", bin(sim.svstate.vl))
+            print ("        mvl", bin(sim.svstate.maxvl))
+            print ("    srcstep", bin(sim.svstate.srcstep))
+            print ("    dststep", bin(sim.svstate.dststep))
+            print ("     vfirst", bin(sim.svstate. vfirst))
+            self.assertEqual(sim.svstate.vl, 12)
+            self.assertEqual(sim.svstate.maxvl, 12)
+            # svstep called twice, didn't reach VL, so srcstep/dststep both 2
+            self.assertEqual(sim.svstate.srcstep, 3)
+            self.assertEqual(sim.svstate.dststep, 3)
+            self.assertEqual(sim.gpr(0), SelectableInt(0, 64))
+            self.assertEqual(sim.svstate.vfirst, 1)
+            CR0 = sim.crl[0]
+            print("      CR0", bin(CR0.get_range().value))
+            self.assertEqual(CR0[CRFields.EQ], 0)
+            self.assertEqual(CR0[CRFields.LT], 1)
+            self.assertEqual(CR0[CRFields.GT], 1)
+            self.assertEqual(CR0[CRFields.SO], 0)
+
+    def test_svstep_inner_loop_4(self):
+        """tests svstep inner loop, running 4 times
+        """
+        lst = SVP64Asm([
+                        # set triple butterfly mode with persistent "REMAP"
+                        "svshape 8, 1, 1, 1, 1",
+                        "svremap 31, 1, 0, 2, 0, 1, 1",
+                        "setvl. 0, 0, 2, 1, 0, 0",# svstep (Rc=1)
+                        "setvl. 0, 0, 2, 1, 0, 0",# svstep (Rc=1)
+                        "setvl. 0, 0, 2, 1, 0, 0", # svstep (Rc=1)
+                        "setvl. 0, 0, 2, 1, 0, 0", # svstep (Rc=1)
+                        ])
+        lst = list(lst)
+
+        # SVSTATE
+        svstate = SVP64State()
+        #svstate.vl = 2 # VL
+        #svstate.maxvl = 2 # MAXVL
+        print ("SVSTATE", bin(svstate.asint()))
+
+        with Program(lst, bigendian=False) as program:
+            sim = self.run_tst_program(program, svstate=svstate)
+            print ("SVSTATE after", bin(sim.svstate.asint()))
+            print ("        vl", bin(sim.svstate.vl))
+            print ("        mvl", bin(sim.svstate.maxvl))
+            print ("    srcstep", bin(sim.svstate.srcstep))
+            print ("    dststep", bin(sim.svstate.dststep))
+            print ("     vfirst", bin(sim.svstate. vfirst))
+            self.assertEqual(sim.svstate.vl, 12)
+            self.assertEqual(sim.svstate.maxvl, 12)
+            # svstep called twice, didn't reach VL, so srcstep/dststep both 2
+            self.assertEqual(sim.svstate.srcstep, 4)
+            self.assertEqual(sim.svstate.dststep, 4)
+            self.assertEqual(sim.gpr(0), SelectableInt(0, 64))
+            self.assertEqual(sim.svstate.vfirst, 1)
+            CR0 = sim.crl[0]
+            print("      CR0", bin(CR0.get_range().value))
+            self.assertEqual(CR0[CRFields.EQ], 0)
+            self.assertEqual(CR0[CRFields.LT], 1)
+            self.assertEqual(CR0[CRFields.GT], 0)
+            self.assertEqual(CR0[CRFields.SO], 0)
+
+    def test_sv_add(self):
         """sets VL=2 then adds:
            * 1 = 5 + 9   => 0x5555 = 0x4321+0x1234
            * 2 = 6 + 10  => 0x3334 = 0x2223+0x1111
@@ -181,7 +308,7 @@ class DecoderTestCase(FHDLTestCase):
             sim = self.run_tst_program(program, initial_regs)
             self._check_regs(sim, expected_regs)
 
-    def test__svstep_add_1(self):
+    def test_svstep_add_1(self):
         """tests svstep with an add, when it reaches VL
         lst = SVP64Asm(["setvl 3, 0, 2, 1, 1, 1",
                         'sv.add 1.v, 5.v, 9.v',