add first functional confirmed unit test for parallel reduce REMAP
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Tue, 6 Sep 2022 14:50:35 +0000 (15:50 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Tue, 6 Sep 2022 14:50:35 +0000 (15:50 +0100)
https://bugs.libre-soc.org/show_bug.cgi?id=864
using remap_preduce_yield directly, confirmed operational through ISACaller
added through decoder/isa/svshape.py which is responsible
in SVSHAPE.getiterator() for returning the appropriate yield-iterator

openpower/isa/simplev.mdwn
src/openpower/decoder/isa/remap_preduce_yield.py
src/openpower/decoder/isa/svshape.py
src/openpower/decoder/isa/test_caller_svp64_parallel_reduce.py

index 61dabd984ccb98f1ac63a633bdcf95efa4a9556d..32ad13147ee850f386f564575c845db8014d3bac 100644 (file)
@@ -279,25 +279,15 @@ Pseudo-code:
                 j <- j + newstep
                 i <- i + 1
             step <- newstep
+        # VL in Parallel-Reduce is the number of operations
         vlen[0:6] <- i
-        # VL in Matrix Multiply is xd*yd*zd
-        n <- (0b00 || SVxd) * (0b00 || SVyd) * (0b00 || SVzd)
-        vlen[0:6] <- n[14:20]
-        # set up template in SVSHAPE0, then copy to 1-3
+        # set up template in SVSHAPE0, then copy to 1. only 2 needed
         SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
-        SVSHAPE0[6:11] <- (0b0 || SVyd)   # ydim
-        SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim
-        SVSHAPE0[28:29] <- 0b11           # skip z
+        SVSHAPE0[30:31] <- 0b10          # parallel reduce submode
         # copy
         SVSHAPE1[0:31] <- SVSHAPE0[0:31]
-        SVSHAPE2[0:31] <- SVSHAPE0[0:31]
-        SVSHAPE3[0:31] <- SVSHAPE0[0:31]
-        # set up FRA
-        SVSHAPE1[18:20] <- 0b001          # permute x,z,y
-        SVSHAPE1[28:29] <- 0b01           # skip z
-        # FRC
-        SVSHAPE2[18:20] <- 0b001          # permute x,z,y
-        SVSHAPE2[28:29] <- 0b11           # skip y
+        # set up right operand (left operand 28:29 is zero)
+        SVSHAPE1[28:29] <- 0b01           # right operand
     # set VL, MVL and Vertical-First
     SVSTATE[0:6] <- vlen
     SVSTATE[7:13] <- vlen
index 8c9e506afe98928dc688422bf34d8f89316390e0..b9433fb89593de0194d51fc76558ccea3bc77584 100644 (file)
@@ -1,6 +1,7 @@
 # a "yield" version of the Parallel Reduction REMAP algorithm. 
 # the algorithm is in-place. it does not perform "MV" operations.
 # instead, where a masked-out value *should* be read from is tracked
+from copy import deepcopy
 
 # python "yield" can be iterated. use this to make it clear how
 # the indices are generated by using natural-looking nested loops
@@ -75,12 +76,43 @@ def demo():
                "end", bin(lend)[2:], bin(rend)[2:])
 
 
-def preduce_y(vl, vec, pred):
-   for i, other in preduce_yield(vl, vec, pred):
-       vec[i] += vec[other]
+def preduce_y(vec, pred=None):
+    res = deepcopy(vec)
+    xdim = len(vec)
+    # set up an SVSHAPE
+    class SVSHAPE:
+        pass
+    SVSHAPE0 = SVSHAPE()
+    SVSHAPE0.lims = [xdim, 0, 0]
+    SVSHAPE0.order = [0,1,2]
+    SVSHAPE0.mode = 0b10
+    SVSHAPE0.skip = 0b00
+    SVSHAPE0.offset = 0       # experiment with different offset, here
+    SVSHAPE0.invxyz = [0,0,0] # inversion if desired
 
+    SVSHAPE1 = SVSHAPE()
+    SVSHAPE1.lims = [xdim, 0, 0]
+    SVSHAPE1.order = [0,1,2]
+    SVSHAPE1.mode = 0b10
+    SVSHAPE1.skip = 0b01
+    SVSHAPE1.offset = 0       # experiment with different offset, here
+    SVSHAPE1.invxyz = [0,0,0] # inversion if desired
+
+    # enumerate over the iterator function, getting new indices
+    shapes = list(iterate_indices(SVSHAPE0)), \
+              list(iterate_indices(SVSHAPE1))
+    for idx in range(len(shapes[0])):
+        l = shapes[0][idx]
+        r = shapes[1][idx]
+        (l_idx, lend) = l
+        (r_idx, rend) = r
+        res[l_idx] += res[r_idx]
+    return res
 
 # run the demo
 if __name__ == '__main__':
     demo()
-
+    vec = [1, 2, 3, 4, 9, 5, 6]
+    res = preduce_y(vec)
+    print (vec)
+    print (res)
index 5530b28de0ed0a5c8f139bc35156a04173b10d31..2bdf530b010ef2e87f44bf004ffdc68c9fde3c1c 100644 (file)
@@ -9,6 +9,8 @@ https://libre-soc.org/openpower/sv/remap
 from openpower.decoder.selectable_int import (FieldSelectableInt, SelectableInt,
                                         selectconcat)
 from openpower.decoder.isa.remapyield import iterate_indices
+from openpower.decoder.isa.remap_preduce_yield import (iterate_indices as
+                                                iterate_preduce_indices)
 from openpower.decoder.isa.remap_fft_yield import iterate_butterfly_indices
 from openpower.decoder.isa.remap_dct_yield import (
                                 iterate_dct_inner_butterfly_indices,
@@ -163,6 +165,8 @@ class SVSHAPE(SelectableInt):
         log ("SVSHAPE get_iterator", self.mode, self.ydimsz, self.is_indexed())
         if self.mode == 0b00:
             iterate_fn = iterate_indices
+        elif self.mode == 0b10:
+            iterate_fn = iterate_preduce_indices
         elif self.mode in [0b01, 0b11]:
             # further sub-selection
             if self.ydimsz == 1:
index 5e9b7c6b8919f90288acd5273fd81b6626106a8b..7b607ab8f43d65451006e98a9467aa646e18c624 100644 (file)
@@ -15,6 +15,7 @@ from openpower.sv.trans.svp64 import SVP64Asm
 from openpower.consts import SVP64CROffs
 from copy import deepcopy
 from openpower.decoder.helpers import fp64toselectable
+from openpower.decoder.isa.remap_preduce_yield import preduce_y
 from functools import reduce
 import operator
 
@@ -26,57 +27,32 @@ class DecoderTestCase(FHDLTestCase):
             self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
 
     def test_sv_remap1(self):
-        """>>> lst = ["svshape 2, 2, 0, 7, 0",
-                        "svremap 31, 1, 2, 3, 0, 0, 0",
-                       #"sv.fadd *0, *8, *16"
+        """>>> lst = ["svshape 7, 0, 0, 7, 0",
+                        "svremap 31, 0, 1, 0, 0, 0, 0",
+                       "sv.add *0, *8, *16"
                         ]
                 REMAP fmadds FRT, FRA, FRC, FRB
         """
-        lst = SVP64Asm(["svshape 2, 2, 0, 7, 0",
-                        #"svremap 31, 1, 2, 3, 0, 0, 0",
-                       #"sv.fmadds *0, *16, *32, *0"
+        lst = SVP64Asm(["svshape 7, 0, 0, 7, 0",
+                        "svremap 31, 0, 1, 0, 0, 0, 0",
+                       "sv.add *0, *0, *0"
                         ])
         lst = list(lst)
 
-        fprs = [0] * 64
-        # 3x2 matrix
-        X1 = [[1, 2, 3],
-              [3, 4, 5],
-             ]
-        # 2x3 matrix
-        Y1 = [[6, 7],
-              [8, 9],
-              [10, 11],
-             ]
-
-        X = X1
-        Y = Y1
-
-        xf = reduce(operator.add, X)
-        yf = reduce(operator.add, Y)
-        print ("flattened X,Y")
-        print ("\t", xf)
-        print ("\t", yf)
+        gprs = [0] * 64
+        vec = [1, 2, 3, 4, 9, 5, 6]
 
         # and create a linear result2, same scheme
         #result1 = [0] * (ydim1*xdim2)
 
 
         res = []
-        # store FPs
-        for i, x in enumerate(xf):
-            fprs[i+16] = fp64toselectable(float(x))  # X matrix
-        for i, y in enumerate(yf):
-            fprs[i+32] = fp64toselectable(float(y)) # Y matrix
-            continue
-            #t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
-            #u = DOUBLE2SINGLE(fp64toselectable(u)) # from double
-            #res.append((t, u))
-            #print ("FFT", i, "in", a, b, "coeff", c, "mul",
-            #       mul, "res", t, u)
+        # store GPRs
+        for i, x in enumerate(vec):
+            gprs[i] = x
 
         with Program(lst, bigendian=False) as program:
-            sim = self.run_tst_program(program, initial_fprs=fprs)
+            sim = self.run_tst_program(program, initial_regs=gprs)
             print ("spr svshape0", sim.spr['SVSHAPE0'])
             print ("    xdimsz", sim.spr['SVSHAPE0'].xdimsz)
             print ("    ydimsz", sim.spr['SVSHAPE0'].ydimsz)
@@ -84,12 +60,14 @@ class DecoderTestCase(FHDLTestCase):
             print ("spr svshape1", sim.spr['SVSHAPE1'])
             print ("spr svshape2", sim.spr['SVSHAPE2'])
             print ("spr svshape3", sim.spr['SVSHAPE3'])
-            for i in range(4):
-                print ("i", i, float(sim.fpr(i)))
+            for i in range(7):
+                val = sim.gpr(i).value
+                res.append(val)
+                print ("i", i, val)
             # confirm that the results are as expected
-            #for i, (t, u) in enumerate(res):
-            #    self.assertEqual(sim.fpr(i+2), t)
-            #    self.assertEqual(sim.fpr(i+6), u)
+            expected = preduce_y(vec)
+            for i, v in enumerate(res):
+                self.assertEqual(v, expected[i])
 
 
     def run_tst_program(self, prog, initial_regs=None,