--- /dev/null
+# a "yield" version of the Parallel Reduction REMAP algorithm.
+# the algorithm is in-place. it does not perform "MV" operations.
+# instead, where a masked-out value *should* be read from is tracked
+
+# python "yield" can be iterated. use this to make it clear how
+# the indices are generated by using natural-looking nested loops
+def iterate_indices(SVSHAPE, pred=None):
+ # get indices to iterate over, in the required order
+ xd = SVSHAPE.lims[0]
+ # create lists of indices to iterate over in each dimension
+ ix = list(range(xd))
+ # invert the indices if needed
+ if SVSHAPE.invxyz[0]: ix.reverse()
+ # start a loop from the lowest step
+ step = 1
+ steps = []
+ while step < xd:
+ step *= 2
+ steps.append(step)
+ # invert the indices if needed
+ if SVSHAPE.invxyz[1]: steps.reverse()
+ for step in steps:
+ stepend = (step == steps[-1]) # note end of steps
+ idxs = list(range(0, xd, step)):
+ for i in idxs:
+ idxend = (i == idxs[-1]) # note end of inner loop
+ loopends = (stepend<<1) | idxend # notify end of loops
+ other = i + step // 2
+ ci = ix[i]
+ oi = ix[other] if other < xd else None
+ other_pred = other < xd and (pred is None or pred[oi])
+ if (pred is None or pred[ci]) and other_pred:
+ if SVSHAPE.skip == 0b00: # submode 00
+ result = ci
+ elif SVSHAPE.skip == 0b01: # submode 01
+ result = oi
+ yield result + SVSHAPE.offset, loopends
+ elif other_pred:
+ ix[i] = oi
+
+def demo():
+ # set the dimension sizes here
+ xdim = 3
+ ydim = 2
+ zdim = 4
+
+ # set total (can repeat, e.g. VL=x*y*z*4)
+ VL = xdim * ydim * zdim
+
+ # set up an SVSHAPE
+ class SVSHAPE:
+ pass
+ SVSHAPE0 = SVSHAPE()
+ SVSHAPE0.lims = [xdim, ydim, zdim]
+ SVSHAPE0.order = [1,0,2] # experiment with different permutations, here
+ SVSHAPE0.mode = 0b00
+ SVSHAPE0.skip = 0b00
+ SVSHAPE0.offset = 0 # experiment with different offset, here
+ SVSHAPE0.invxyz = [0,0,0] # inversion if desired
+
+ # enumerate over the iterator function, getting new indices
+ for idx, (new_idx, end) in enumerate(iterate_indices(SVSHAPE0)):
+ if idx >= VL:
+ break
+ print ("%d->%d" % (idx, new_idx), "end", bin(end)[2:])
+
+# run the demo
+if __name__ == '__main__':
+ demo()
+
+
+
+def preduce_yield(vl, vec, pred):
+ step = 1
+ ix = list(range(vl))
+ while step < vl:
+ step *= 2
+ for i in range(0, vl, step):
+ other = i + step // 2
+ ci = ix[i]
+ oi = ix[other] if other < vl else None
+ other_pred = other < vl and pred[oi]
+ if pred[ci] and other_pred:
+ yield ci, oi
+ elif other_pred:
+ ix[i] = oi
+
+def preduce_y(vl, vec, pred):
+ for i, other in preduce_yield(vl, vec, pred):
+ vec[i] += vec[other]