add first version of parallel reduction yield
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Tue, 6 Sep 2022 00:39:00 +0000 (01:39 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Tue, 6 Sep 2022 00:39:49 +0000 (01:39 +0100)
src/openpower/decoder/isa/remap_preduce_yield.py [new file with mode: 0644]

diff --git a/src/openpower/decoder/isa/remap_preduce_yield.py b/src/openpower/decoder/isa/remap_preduce_yield.py
new file mode 100644 (file)
index 0000000..231b51f
--- /dev/null
@@ -0,0 +1,90 @@
+# a "yield" version of the Parallel Reduction REMAP algorithm. 
+# the algorithm is in-place. it does not perform "MV" operations.
+# instead, where a masked-out value *should* be read from is tracked
+
+# python "yield" can be iterated. use this to make it clear how
+# the indices are generated by using natural-looking nested loops
+def iterate_indices(SVSHAPE, pred=None):
+    # get indices to iterate over, in the required order
+    xd = SVSHAPE.lims[0]
+    # create lists of indices to iterate over in each dimension
+    ix = list(range(xd))
+    # invert the indices if needed
+    if SVSHAPE.invxyz[0]: ix.reverse()
+    # start a loop from the lowest step
+    step = 1 
+    steps = []
+    while step < xd:
+        step *= 2
+        steps.append(step)
+    # invert the indices if needed
+    if SVSHAPE.invxyz[1]: steps.reverse()
+    for step in steps:
+        stepend = (step == steps[-1]) # note end of steps
+        idxs = list(range(0, xd, step)):
+        for i in idxs:
+            idxend = (i == idxs[-1]) # note end of inner loop
+            loopends = (stepend<<1) | idxend # notify end of loops
+            other = i + step // 2
+            ci = ix[i]
+            oi = ix[other] if other < xd else None
+            other_pred = other < xd and (pred is None or pred[oi])
+            if (pred is None or pred[ci]) and other_pred:
+                if SVSHAPE.skip == 0b00: # submode 00
+                    result = ci
+                elif SVSHAPE.skip == 0b01: # submode 01
+                    result = oi
+                yield result + SVSHAPE.offset, loopends
+            elif other_pred:
+                ix[i] = oi
+
+def demo():
+    # set the dimension sizes here
+    xdim = 3
+    ydim = 2
+    zdim = 4
+
+    # set total (can repeat, e.g. VL=x*y*z*4)
+    VL = xdim * ydim * zdim
+
+    # set up an SVSHAPE
+    class SVSHAPE:
+        pass
+    SVSHAPE0 = SVSHAPE()
+    SVSHAPE0.lims = [xdim, ydim, zdim]
+    SVSHAPE0.order = [1,0,2]  # experiment with different permutations, here
+    SVSHAPE0.mode = 0b00
+    SVSHAPE0.skip = 0b00
+    SVSHAPE0.offset = 0       # experiment with different offset, here
+    SVSHAPE0.invxyz = [0,0,0] # inversion if desired
+
+    # enumerate over the iterator function, getting new indices
+    for idx, (new_idx, end) in enumerate(iterate_indices(SVSHAPE0)):
+        if idx >= VL:
+            break
+        print ("%d->%d" % (idx, new_idx), "end", bin(end)[2:])
+
+# run the demo
+if __name__ == '__main__':
+    demo()
+
+
+
+def preduce_yield(vl, vec, pred):
+    step = 1
+    ix = list(range(vl))
+    while step < vl:
+        step *= 2
+        for i in range(0, vl, step):
+            other = i + step // 2
+            ci = ix[i]
+            oi = ix[other] if other < vl else None
+            other_pred = other < vl and pred[oi]
+            if pred[ci] and other_pred:
+                yield ci, oi
+            elif other_pred:
+                ix[i] = oi
+
+def preduce_y(vl, vec, pred):
+   for i, other in preduce_yield(vl, vec, pred):
+       vec[i] += vec[other]