matrix multiply remap slight redesign
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Sun, 4 Jul 2021 15:00:36 +0000 (16:00 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Sun, 4 Jul 2021 15:00:40 +0000 (16:00 +0100)
openpower/sv/remapmatrixprint.py
openpower/sv/remapyield.py

index c724a8b5227b796951cb7d829b9e77fc1d7962d4..94a4c4b5200bf2d7785356bc372a74ef219e1f80 100644 (file)
@@ -53,22 +53,22 @@ def matrixscheduledemo():
         pass
     # result uses SVSHAPE0
     SVSHAPE0 = SVSHAPE()
-    SVSHAPE0.lims = [ydim2, xdim2, 1]
-    SVSHAPE0.order = [0,2,1]  # result iterates through i and j (modulo)
+    SVSHAPE0.lims = [xdim2, ydim2, 1]
+    SVSHAPE0.order = [0,1,2]  # result iterates through i and j (modulo)
     SVSHAPE0.mode = 0b00
     SVSHAPE0.offset = 0       # no offset
     SVSHAPE0.invxyz = [0,0,0] # no inversion
     # X uses SVSHAPE1
     SVSHAPE1 = SVSHAPE()
-    SVSHAPE1.lims = [ydim2, xdim2, ydim1]
-    SVSHAPE1.order = [1,2,0]  # X iterates through i and k
+    SVSHAPE1.lims = [xdim2, ydim2, ydim1]
+    SVSHAPE1.order = [0,2,1]  # X iterates through i and k
     SVSHAPE1.mode = 0b10
     SVSHAPE1.offset = 0       # no offset
     SVSHAPE1.invxyz = [0,0,0] # no inversion
     # y-selector uses SHAPE2
     SVSHAPE2 = SVSHAPE()
-    SVSHAPE2.lims = [ydim2, xdim2, ydim1]
-    SVSHAPE2.order = [1,2,0]  # Y iterates through k and j
+    SVSHAPE2.lims = [xdim2, ydim2, ydim1]
+    SVSHAPE2.order = [0,2,1]  # X iterates through i and k
     SVSHAPE2.mode = 0b01
     SVSHAPE2.offset = 0       # no offset
     SVSHAPE2.invxyz = [0,0,0] # no inversion
@@ -83,7 +83,7 @@ def matrixscheduledemo():
 
         if i == VL:
             break
-        print ("(", i, ",", idxs, "),")
+        print ("(", i, ",", idxs, "),", "expected", expected[i])
         if expected[i] != (i, idxs):
             print ("row incorrect")
 
index ac3217b84a4c4161e5b2808c0600fdbe07cfdce3..6c53f65b6919745700781baa9a7e6b37e5b3c144 100644 (file)
@@ -5,44 +5,55 @@
 # the indices are generated by using natural-looking nested loops
 def iterate_indices(SVSHAPE):
     # get indices to iterate over, in the required order
-    xd = SVSHAPE.lims[SVSHAPE.order[2]]
-    yd = SVSHAPE.lims[SVSHAPE.order[1]]
-    zd = SVSHAPE.lims[SVSHAPE.order[0]]
+    xd = SVSHAPE.lims[0]
+    yd = SVSHAPE.lims[1]
+    zd = SVSHAPE.lims[2]
     # create lists of indices to iterate over in each dimension
     x_r = list(range(xd))
     y_r = list(range(yd))
     z_r = list(range(zd))
     # invert the indices if needed
-    if SVSHAPE.invxyz[SVSHAPE.order[2]]: x_r.reverse()
-    if SVSHAPE.invxyz[SVSHAPE.order[1]]: y_r.reverse()
-    if SVSHAPE.invxyz[SVSHAPE.order[0]]: z_r.reverse()
+    if SVSHAPE.invxyz[0]: x_r.reverse()
+    if SVSHAPE.invxyz[1]: y_r.reverse()
+    if SVSHAPE.invxyz[2]: z_r.reverse()
     # start an infinite (wrapping) loop
-    skip = 0
     while True:
-        for x in x_r:           # loop over 3rd order dimension
+        for z in z_r:   # loop over 1st order dimension
             for y in y_r:       # loop over 2nd order dimension
-                for z in z_r:   # loop over 1st order dimension
-                    # skip the first entries up to offset
-                    if skip < SVSHAPE.offset:
-                        skip += 1
-                        continue
-                    # construct the (up to) 3D remap schedule
+                for x in x_r:           # loop over 3rd order dimension
+                    # ok work out which order to construct things in.
+                    # start by creating a list of tuples of the dimension
+                    # and its limit
+                    vals = [(SVSHAPE.lims[0], x, "x"),
+                            (SVSHAPE.lims[1], y, "y"),
+                            (SVSHAPE.lims[2], z, "z")
+                           ]
+                    # now select those by order:
+                    vals = [vals[SVSHAPE.order[0]],
+                            vals[SVSHAPE.order[1]],
+                            vals[SVSHAPE.order[2]]
+                           ]
+                    # ok now we can construct the result, using bits of
+                    # "order" to say which ones get stacked on
+                    result = 0
+                    mult = 1
                     if SVSHAPE.mode == 0b00:
-                        result = z
-                        result += y * zd
-                        result += x * zd * yd
+                        permute = 0b111
                     elif SVSHAPE.mode == 0b01:
-                        result = z
-                        result += x * zd
-                        #result = z
-                        #result = result * xd + x
-                        #result = result * yd + y
+                        permute = 0b011
                     elif SVSHAPE.mode == 0b10:
-                        result = x
-                        result += y * xd
-                        #result += z * xd * yd
+                        permute = 0b110
+                    else:
+                        permute = 0b111
+                    for i in range(3):
+                        lim, idx, dbg = vals[i]
+                        if permute & (1<<i):
+                            #print ("select %d %s" % (i, dbg))
+                            idx *= mult   # shifts up by previous dimension(s)
+                            result += idx # adds on this dimension
+                            mult *= lim   # for the next dimension
 
-                    yield result
+                    yield result + SVSHAPE.offset
 
 def demo():
     # set the dimension sizes here