From: Luke Kenneth Casson Leighton Date: Tue, 6 Sep 2022 00:39:00 +0000 (+0100) Subject: add first version of parallel reduction yield X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=80786dbfb496dc6e03fca7a1dfbe021beb584948;p=openpower-isa.git add first version of parallel reduction yield --- diff --git a/src/openpower/decoder/isa/remap_preduce_yield.py b/src/openpower/decoder/isa/remap_preduce_yield.py new file mode 100644 index 00000000..231b51f0 --- /dev/null +++ b/src/openpower/decoder/isa/remap_preduce_yield.py @@ -0,0 +1,90 @@ +# a "yield" version of the Parallel Reduction REMAP algorithm. +# the algorithm is in-place. it does not perform "MV" operations. +# instead, where a masked-out value *should* be read from is tracked + +# python "yield" can be iterated. use this to make it clear how +# the indices are generated by using natural-looking nested loops +def iterate_indices(SVSHAPE, pred=None): + # get indices to iterate over, in the required order + xd = SVSHAPE.lims[0] + # create lists of indices to iterate over in each dimension + ix = list(range(xd)) + # invert the indices if needed + if SVSHAPE.invxyz[0]: ix.reverse() + # start a loop from the lowest step + step = 1 + steps = [] + while step < xd: + step *= 2 + steps.append(step) + # invert the indices if needed + if SVSHAPE.invxyz[1]: steps.reverse() + for step in steps: + stepend = (step == steps[-1]) # note end of steps + idxs = list(range(0, xd, step)): + for i in idxs: + idxend = (i == idxs[-1]) # note end of inner loop + loopends = (stepend<<1) | idxend # notify end of loops + other = i + step // 2 + ci = ix[i] + oi = ix[other] if other < xd else None + other_pred = other < xd and (pred is None or pred[oi]) + if (pred is None or pred[ci]) and other_pred: + if SVSHAPE.skip == 0b00: # submode 00 + result = ci + elif SVSHAPE.skip == 0b01: # submode 01 + result = oi + yield result + SVSHAPE.offset, loopends + elif other_pred: + ix[i] = oi + +def demo(): + # set the dimension sizes here + xdim = 3 + ydim = 2 + zdim = 4 + + # set total (can repeat, e.g. VL=x*y*z*4) + VL = xdim * ydim * zdim + + # set up an SVSHAPE + class SVSHAPE: + pass + SVSHAPE0 = SVSHAPE() + SVSHAPE0.lims = [xdim, ydim, zdim] + SVSHAPE0.order = [1,0,2] # experiment with different permutations, here + SVSHAPE0.mode = 0b00 + SVSHAPE0.skip = 0b00 + SVSHAPE0.offset = 0 # experiment with different offset, here + SVSHAPE0.invxyz = [0,0,0] # inversion if desired + + # enumerate over the iterator function, getting new indices + for idx, (new_idx, end) in enumerate(iterate_indices(SVSHAPE0)): + if idx >= VL: + break + print ("%d->%d" % (idx, new_idx), "end", bin(end)[2:]) + +# run the demo +if __name__ == '__main__': + demo() + + + +def preduce_yield(vl, vec, pred): + step = 1 + ix = list(range(vl)) + while step < vl: + step *= 2 + for i in range(0, vl, step): + other = i + step // 2 + ci = ix[i] + oi = ix[other] if other < vl else None + other_pred = other < vl and pred[oi] + if pred[ci] and other_pred: + yield ci, oi + elif other_pred: + ix[i] = oi + +def preduce_y(vl, vec, pred): + for i, other in preduce_yield(vl, vec, pred): + vec[i] += vec[other]