bit of a reorg, adding option to test end of inner loops of SVSTATE(s)
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Mon, 19 Jul 2021 19:45:25 +0000 (20:45 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Mon, 19 Jul 2021 19:45:25 +0000 (20:45 +0100)
needed to pass the immediate to svstep as an option of which
SVSTATE0-3 to test

openpower/isa/simplev.mdwn
src/openpower/decoder/isa/caller.py
src/openpower/decoder/isa/remap_fft_yield.py
src/openpower/decoder/isa/remapyield.py
src/openpower/decoder/isa/svstate.py
src/openpower/decoder/isa/test_caller_setvl.py
src/openpower/decoder/isa/test_caller_svp64_fft.py
src/openpower/decoder/power_pseudo.py
src/openpower/decoder/pseudo/parser.py

index b825e4904884b98467ec8b1517a0327733d73a9e..d76d823a719aa9d171559b1eb2d4cbda1301b880 100644 (file)
@@ -12,7 +12,7 @@ SVL-Form
 Pseudo-code:
 
     if (vf & (¬vs) & ¬(ms)) = 1 then
-        SVSTATE_NEXT
+        SVSTATE_NEXT(SVi)
     else
         VLimm <- SVi + 1
         if vs = 1 then
index c57434cb6a15e7367ca801f81de852ea7bf40534..7206e108c69e781b76ed0b73eed557f0e4b76918 100644 (file)
@@ -1113,8 +1113,10 @@ class ISACaller:
             return
 
         # this is for setvl "Vertical" mode: if set true,
-        # srcstep/dststep is explicitly advanced
+        # srcstep/dststep is explicitly advanced. mode says which SVSTATE to
+        # test for Rc=1 end condition.  3 bits of all 3 loops are put into CR0
         self.allow_next_step_inc = False
+        self.svstate_next_mode = 0
 
         # nop has to be supported, we could let the actual op calculate
         # but PowerDecoder has a pattern for nop
@@ -1201,6 +1203,7 @@ class ISACaller:
                      ]
             # go through all iterators in lock-step, advance to next remap_idx
             remap_idxs = []
+            self.remap_loopends = []
             for i, (shape, remap) in enumerate(remaps):
                 # zero is "disabled"
                 if shape.value == 0x0:
@@ -1208,10 +1211,11 @@ class ISACaller:
                 # pick src or dststep depending on reg num (0-2=in, 3-4=out)
                 step = dststep if (i in [3, 4]) else srcstep
                 # this is terrible.  O(N^2) looking for the match. but hey.
-                for idx, remap_idx in enumerate(remap):
+                for idx, (remap_idx, loopends) in enumerate(remap):
                     if idx == step:
                         break
                 remap_idxs.append(remap_idx)
+                self.remap_loopends.append(loopends)
 
             rremaps = []
             # now cross-index the required SHAPE for each of 3-in 2-out regs
@@ -1452,7 +1456,7 @@ class ISACaller:
         pre = False
         post = False
         if self.allow_next_step_inc:
-            log("SVSTATE_NEXT: inc requested")
+            log("SVSTATE_NEXT: inc requested, mode", self.svstate_next_mode)
             yield from self.svstate_pre_inc()
             pre = yield from self.update_new_svstate_steps()
             if pre:
@@ -1479,9 +1483,17 @@ class ISACaller:
                 if rc_en:
                     srcstep = self.svstate.srcstep
                     dststep = self.svstate.srcstep
-                    endtest = 0 if (end_src or end_dst) else 1
-                    results = [SelectableInt(endtest, 64)]
-                    self.handle_comparison(results) # CR0
+                    endtest = 1 if (end_src or end_dst) else 0
+                    #results = [SelectableInt(endtest, 64)]
+                    #self.handle_comparison(results) # CR0
+
+                    # see if svstep was requested, if so, which SVSTATE
+                    endings = 0b111
+                    if self.svstate_next_mode > 0:
+                        endings = self.remap_loopends[self.svstate_nextmode-1]
+                    cr_field = SelectableInt((~endings)<<1 | endtest, 4)
+                    print ("svstep Rc=1, CR0", cr_field)
+                    self.crl[0].eq(cr_field) # CR0
                 if end_src or end_dst:
                     # reset at end of loop including exit Vertical Mode
                     log ("SVSTATE_NEXT: after increments, reset")
@@ -1500,13 +1512,14 @@ class ISACaller:
 
         self.update_pc_next()
 
-    def SVSTATE_NEXT(self):
+    def SVSTATE_NEXT(self, mode):
         """explicitly moves srcstep/dststep on to next element, for
         "Vertical-First" mode.  this function is called from
         setvl pseudo-code, as a pseudo-op "svstep"
         """
-        log("SVSTATE_NEXT")
+        log("SVSTATE_NEXT mode", mode)
         self.allow_next_step_inc = True
+        self.svstate_next_mode = mode
 
     def svstate_pre_inc(self):
         """check if srcstep/dststep need to skip over masked-out predicate bits
index 824bc2ab6d2e38d8b61b630fc33f5d455060b0ff..ea9421b5e92df16e49f2d035ed80bfa361474fc2 100644 (file)
@@ -43,6 +43,7 @@ def iterate_butterfly_indices(SVSHAPE):
     skip = 0
     while True:
         for size in x_r:           # loop over 3rd order dimension (size)
+            x_end = size == x_r[-1]
             # y_r schedule depends on size
             halfsize = size // 2
             tablestep = n // size
@@ -52,6 +53,7 @@ def iterate_butterfly_indices(SVSHAPE):
             # invert if requested
             if SVSHAPE.invxyz[1]: y_r.reverse()
             for i in y_r:       # loop over 2nd order dimension
+                y_end = i == y_r[-1]
                 k_r = []
                 j_r = []
                 k = 0
@@ -63,10 +65,7 @@ def iterate_butterfly_indices(SVSHAPE):
                 if SVSHAPE.invxyz[2]: k_r.reverse()
                 if SVSHAPE.invxyz[2]: j_r.reverse()
                 for j, k in zip(j_r, k_r):   # loop over 1st order dimension
-                    # skip the first entries up to offset
-                    if skip < SVSHAPE.offset:
-                        skip += 1
-                        continue
+                    z_end = j == j_r[-1]
                     # now depending on MODE return the index
                     if SVSHAPE.skip == 0b00:
                         result = j              # for vec[j]
@@ -75,11 +74,15 @@ def iterate_butterfly_indices(SVSHAPE):
                     elif SVSHAPE.skip == 0b10:
                         result = k              # for exptable[k]
 
-                    yield result
+                    loopends = (z_end |
+                               ((y_end and z_end)<<1) |
+                                ((y_end and x_end and z_end)<<2))
+
+                    yield result + SVSHAPE.offset, loopends
 
 def demo():
     # set the dimension sizes here
-    xdim = 8
+    xdim = 16
     ydim = 0 # not needed
     zdim = 0 # again, not needed
 
@@ -145,11 +148,13 @@ def demo():
             prefix = "i %d\t" % i
             k = 0
             for j in range(i, i + halfsize):
-                jl, jh, ks = schedule[idx]
+                (jl, je), (jh, he), (ks, ke) = schedule[idx]
                 print ("  %-3d\t%s j=%-2d jh=%-2d k=%-2d -> "
-                        "j[jl=%-2d] j[jh=%-2d] exptable[k=%d]" % \
+                        "j[jl=%-2d] j[jh=%-2d] ex[k=%d]" % \
                                 (idx, prefix, j, j+halfsize, k,
-                                      jl, jh, ks))
+                                      jl, jh, ks,
+                                ),
+                                "end", bin(je)[2:], bin(je)[2:], bin(ke)[2:])
                 k += tablestep
                 idx += 1
         size *= 2
index a18cfd7ef6587e859acf6313de28c47dd05c2993..d0cfd295de4bc1823e8c319b916e5a44f4038cbb 100644 (file)
@@ -19,8 +19,11 @@ def iterate_indices(SVSHAPE):
     # start an infinite (wrapping) loop
     while True:
         for z in z_r:   # loop over 1st order dimension
+            z_end = z == z_r[-1]
             for y in y_r:       # loop over 2nd order dimension
+                y_end = y == y_r[-1]
                 for x in x_r:           # loop over 3rd order dimension
+                    x_end = x == x_r[-1]
                     # ok work out which order to construct things in.
                     # start by creating a list of tuples of the dimension
                     # and its limit
@@ -61,13 +64,17 @@ def iterate_indices(SVSHAPE):
                             result += idx # adds on this dimension
                             mult *= lim   # for the next dimension
 
-                    yield result + SVSHAPE.offset
+                    loopends = (x_end |
+                               ((y_end and x_end)<<1) |
+                                ((y_end and x_end and z_end)<<2))
+
+                    yield result + SVSHAPE.offset, loopends
 
 def demo():
     # set the dimension sizes here
     xdim = 3
     ydim = 2
-    zdim = 1
+    zdim = 4
 
     # set total (can repeat, e.g. VL=x*y*z*4)
     VL = xdim * ydim * zdim
@@ -84,10 +91,10 @@ def demo():
     SVSHAPE0.invxyz = [0,0,0] # inversion if desired
 
     # enumerate over the iterator function, getting new indices
-    for idx, new_idx in enumerate(iterate_indices(SVSHAPE0)):
+    for idx, (new_idx, end) in enumerate(iterate_indices(SVSHAPE0)):
         if idx >= VL:
             break
-        print ("%d->%d" % (idx, new_idx))
+        print ("%d->%d" % (idx, new_idx), "end", bin(end)[2:])
 
 # run the demo
 if __name__ == '__main__':
index 3ad12a777fea1c8ec0e6fa8d576505c22a8ed7cc..68c2a8b2c9fea8ddd51fa6cd7e99afb573c568e4 100644 (file)
@@ -1,7 +1,6 @@
-from openpower.decoder.selectable_int import (FieldSelectableInt, SelectableInt,
-                                        selectconcat)
-from openpower.decoder.isa.remapyield import iterate_indices
-from openpower.decoder.isa.remap_fft_yield import iterate_butterfly_indices
+from openpower.decoder.selectable_int import (FieldSelectableInt,
+                                              SelectableInt,
+                                                )
 from openpower.sv.svstate import SVSTATERec
 import os
 from copy import deepcopy
index a3f239b2b7cae4d9a50090c8a7a2f1c24f9847f6..2706332c1ad4a9b5915c5fc90a3099cdd4ae3c5c 100644 (file)
@@ -85,10 +85,10 @@ class DecoderTestCase(FHDLTestCase):
             self.assertEqual(sim.gpr(0), SelectableInt(0, 64))
             CR0 = sim.crl[0]
             print("      CR0", bin(CR0.get_range().value))
-            self.assertEqual(CR0[CRFields.EQ], 1)
+            self.assertEqual(CR0[CRFields.EQ], 0)
             self.assertEqual(CR0[CRFields.LT], 0)
             self.assertEqual(CR0[CRFields.GT], 0)
-            self.assertEqual(CR0[CRFields.SO], 0)
+            self.assertEqual(CR0[CRFields.SO], 1)
 
     def test__svstep_3(self):
         """tests svstep when it *doesn't* reach VL
@@ -125,7 +125,7 @@ class DecoderTestCase(FHDLTestCase):
             print("      CR0", bin(CR0.get_range().value))
             self.assertEqual(CR0[CRFields.EQ], 0)
             self.assertEqual(CR0[CRFields.LT], 0)
-            self.assertEqual(CR0[CRFields.GT], 1)
+            self.assertEqual(CR0[CRFields.GT], 0)
             self.assertEqual(CR0[CRFields.SO], 0)
 
 
@@ -242,20 +242,20 @@ class DecoderTestCase(FHDLTestCase):
             self.assertEqual(sim.svstate.vfirst, 0)
             CR0 = sim.crl[0]
             print("      CR0", bin(CR0.get_range().value))
-            self.assertEqual(CR0[CRFields.EQ], 1)
+            self.assertEqual(CR0[CRFields.EQ], 0)
             self.assertEqual(CR0[CRFields.LT], 0)
             self.assertEqual(CR0[CRFields.GT], 0)
-            self.assertEqual(CR0[CRFields.SO], 0)
+            self.assertEqual(CR0[CRFields.SO], 1)
 
             # check registers as expected
             self._check_regs(sim, expected_regs)
 
-    def test__svstep_add_2(self):
+    def test_svstep_add_2(self):
         """tests svstep with a branch.
         lst = SVP64Asm(["setvl 3, 0, 2, 1, 1, 1",
                         'sv.add 1.v, 5.v, 9.v',
                         "setvl. 0, 0, 1, 1, 0, 0",
-                        "bc 4, 2, -0xc"
+                        "bc 6, 3, -0xc"
                         ])
         sequence is as follows:
         * setvl sets VL=2 but also "Vertical First" mode.
@@ -281,7 +281,7 @@ class DecoderTestCase(FHDLTestCase):
         lst = SVP64Asm(["setvl 3, 0, 2, 1, 1, 1",
                         'sv.add 1.v, 5.v, 9.v',
                         "setvl. 0, 0, 1, 1, 0, 0", # svstep - this is 64-bit!
-                        "bc 4, 2, -0xc" # branch to add (64-bit op so -0xc!)
+                        "bc 6, 3, -0xc" # branch to add (64-bit op so -0xc!)
                         ])
         lst = list(lst)
 
@@ -320,10 +320,10 @@ class DecoderTestCase(FHDLTestCase):
             self.assertEqual(sim.svstate.vfirst, 0)
             CR0 = sim.crl[0]
             print("      CR0", bin(CR0.get_range().value))
-            self.assertEqual(CR0[CRFields.EQ], 1)
+            self.assertEqual(CR0[CRFields.EQ], 0)
             self.assertEqual(CR0[CRFields.LT], 0)
             self.assertEqual(CR0[CRFields.GT], 0)
-            self.assertEqual(CR0[CRFields.SO], 0)
+            self.assertEqual(CR0[CRFields.SO], 1)
 
             # check registers as expected
             self._check_regs(sim, expected_regs)
index b6a6efa65d78c4c61dbb6cea2178c1b5cd8890c0..28aca452946edbbca95e372e26be3b4d8626ff03 100644 (file)
@@ -200,7 +200,7 @@ class FFTTestCase(FHDLTestCase):
                              "svremap 31, 1, 0, 2, 0, 1, 0",
                             "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
                             "setvl. 0, 0, 1, 1, 0, 0",
-                            "bc 4, 2, -16"
+                            "bc 6, 3, -16"
                             ])
             runs a full in-place O(N log2 N) butterfly schedule for
             Discrete Fourier Transform.  this version however uses
@@ -215,7 +215,7 @@ class FFTTestCase(FHDLTestCase):
                          "svremap 31, 1, 0, 2, 0, 1, 0",
                         "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
                         "setvl. 0, 0, 1, 1, 0, 0",
-                        "bc 4, 2, -16"
+                        "bc 6, 3, -16"
                         ])
         lst = list(lst)
 
@@ -287,7 +287,7 @@ class FFTTestCase(FHDLTestCase):
                          "svremap 26, 0, 0, 0, 0, 1, 1",
                         "sv.ffadds 0.v, 24, 0.v",
                         "setvl. 0, 0, 1, 1, 0, 0",
-                        "bc 4, 2, -28"
+                        "bc 6, 3, -28"
                             ])
 
             runs a full in-place O(N log2 N) butterfly schedule for
@@ -326,7 +326,7 @@ class FFTTestCase(FHDLTestCase):
                          "svremap 26, 0, 0, 0, 0, 1, 0",
                         "sv.ffadds 0.v, 24, 0.v",
                         "setvl. 0, 0, 1, 1, 0, 0",
-                        "bc 4, 2, -28"
+                        "bc 6, 3, -28"
                         ])
         lst = list(lst)
 
@@ -554,7 +554,7 @@ class FFTTestCase(FHDLTestCase):
 
                         # svstep loop
                         "setvl. 0, 0, 1, 1, 0, 0",
-                        "bc 4, 2, -56"
+                        "bc 6, 3, -56"
                         ])
         lst = list(lst)
 
index 9e4bc81ddb0bd780ac79d7ad338d702cf6ad6bfc..fde36b250f8b5ec5bdda8ebc443095555dd07ef7 100644 (file)
@@ -181,7 +181,12 @@ hextest = """
 RT <- 0x0001_a000_0000_0000
 """
 
-code = hextest
+SVSTATE_next = """
+SVSTATE_NEXT(5)
+"""
+
+code = SVSTATE_next
+#code = hextest
 #code = lswx
 #code = testcond
 #code = testdo
@@ -243,7 +248,7 @@ def test():
     gsc.regfile = {}
     for i in range(32):
         gsc.regfile[i] = i
-    gsc.gpr = GPR(gsc.parser.sd, gsc.regfile)
+    gsc.gpr = GPR(None, None, None, gsc.regfile)
     gsc.mem = Mem()
 
     _compile = gsc.compile
index 2d6d295e4f8c78d25a57925371b4d956ca2a1772..4f11f6f14c996caf50ddb5a83e99c2a9e1e707c7 100644 (file)
@@ -201,6 +201,10 @@ def apply_trailer(atom, trailer, read_regs):
                 name = arg.id
                 if name in regs + fregs:
                     read_regs.add(name)
+        # special-case, function named "SVSTATE_NEXT" must be made "self.xxxx"
+        if atom.id == 'SVSTATE_NEXT':
+            name = ast.Name("self", ast.Load())
+            atom = ast.Attribute(name, atom, ast.Load())
         return ast.Call(atom, trailer[1], [])
         # if p[1].id == 'print':
         #    p[0] = ast.Printnl(ast.Tuple(p[2][1]), None, None)