From 0c34c59920b58adfcc803c9dabaa34a8e81eb45d Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Thu, 22 Jul 2021 15:46:56 +0100 Subject: [PATCH] add inner and outer yield version of DCT inner and out butterfly --- src/openpower/decoder/isa/remap_dct_yield.py | 172 +++++++++++++------ 1 file changed, 121 insertions(+), 51 deletions(-) diff --git a/src/openpower/decoder/isa/remap_dct_yield.py b/src/openpower/decoder/isa/remap_dct_yield.py index 44a43753..63928d4d 100644 --- a/src/openpower/decoder/isa/remap_dct_yield.py +++ b/src/openpower/decoder/isa/remap_dct_yield.py @@ -43,7 +43,7 @@ def halfrev2(vec, pre_rev=True): # python "yield" can be iterated. use this to make it clear how # the indices are generated by using natural-looking nested loops -def iterate_dct_inner_butterfly_indices(SVSHAPE): +def iterate_dct_butterfly_indices(SVSHAPE): # get indices to iterate over, in the required order n = SVSHAPE.lims[0] # createing lists of indices to iterate over in each dimension @@ -55,20 +55,23 @@ def iterate_dct_inner_butterfly_indices(SVSHAPE): x_r.append(size) size *= 2 # invert order if requested - if SVSHAPE.invxyz[0]: x_r.reverse() + if SVSHAPE.invxyz[0]: + x_r.reverse() if len(x_r) == 0: return # reference (read/write) the in-place data in *reverse-bit-order* ri = list(range(n)) - ri = [ri[reverse_bits(i, levels)] for i in range(n)] + if SVSHAPE.mode == 0b01: + levels = n.bit_length() - 1 + ri = [ri[reverse_bits(i, levels)] for i in range(n)] # reference list for not needing to do data-swaps, just swap what # *indices* are referenced (two levels of indirection at the moment) # pre-reverse the data-swap list so that it *ends up* in the order 0123.. ji = list(range(n)) - inplace_mode = SVSHAPE.skip not in [0b10, 0b11] + inplace_mode = SVSHAPE.mode == 0b01 and SVSHAPE.skip not in [0b10, 0b11] if inplace_mode: ji = halfrev2(ji, True) @@ -98,7 +101,7 @@ def iterate_dct_inner_butterfly_indices(SVSHAPE): hz2 = halfsize // 2 # zero stops reversing 1-item lists # if you *really* want to do the in-place swapping manually, # this allows you to do it. good luck... - if not inplace_mode: + if SVSHAPE.mode == 0b01 and not inplace_mode: jr = j_r[:hz2] for j in j_r: # loop over 1st order dimension z_end = j == j_r[-1] @@ -114,60 +117,127 @@ def iterate_dct_inner_butterfly_indices(SVSHAPE): yield result + SVSHAPE.offset, loopends # now in-place swap - if inplace_mode: - for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])): + if SVSHAPE.mode == 0b01 and inplace_mode: + for ci, jl in enumerate(j_r[:hz2]): + jh = size-jl-1 # upper half, reverse order jlh = jl+halfsize tmp1, tmp2 = ji[jlh], ji[jh] ji[jlh], ji[jh] = tmp2, tmp1 -# totally cool *in-place* DCT algorithm -def transform2(vec): +def pprint_schedule(schedule, n): + size = 2 + idx = 0 + while size <= n: + halfsize = size // 2 + tablestep = n // size + print ("size %d halfsize %d tablestep %d" % \ + (size, halfsize, tablestep)) + for i in range(0, n, size): + prefix = "i %d\t" % i + for j in range(i, i + halfsize): + (jl, je), (jh, he) = schedule[idx] + print (" %-3d\t%s j=%-2d jh=%-2d " + "j[jl=%-2d] j[jh=%-2d]" % \ + (idx, prefix, j, j+halfsize, + jl, jh, + ), + "end", bin(je)[2:], bin(je)[2:]) + idx += 1 + size *= 2 - # Initialization - n = len(vec) - levels = n.bit_length() - 1 - # and pretend we LDed data in half-swapped *and* bit-reversed order as well - # TODO: merge these two - vec = halfrev2(vec, False) - vec = [vec[ri[i]] for i in range(n)] +def demo(): + # set the dimension sizes here + xdim = 8 + ydim = 0 # not needed + zdim = 0 # again, not needed - # start the inner butterfly - size = n - while size >= 2: + # set total. err don't know how to calculate how many there are... + # do it manually for now + VL = 0 + size = 2 + n = xdim + while size <= n: halfsize = size // 2 tablestep = n // size - ir = list(range(0, n, size)) - for i in ir: - # two lists of half-range indices, e.g. j 0123, jr 7654 - j = list(range(i, i + halfsize)) - jr = list(range(i+halfsize, i + size)) - jr.reverse() - for ci, (jl, jh) in enumerate(zip(j, jr)): - vec[ri[ji[jl]]] = t1 + t2 - vec[ri[ji[jh]]] = (t1 - t2) * (1/coeff) - hz2 = halfsize // 2 # can be zero which stops reversing 1-item lists - for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])): - jlh = jl+halfsize - tmp1, tmp2 = ji[jlh], ji[jh] - ji[jlh], ji[jh] = tmp2, tmp1 - size //= 2 - -def dct_outer_butterfly(SVSHAPE): - n = len(vec) - size = n // 2 - while size >= 2: - halfsize = size // 2 - ir = list(range(0, halfsize)) - print ("itersum", halfsize, size, ir) - for i in ir: - jr = list(range(i+halfsize, i+n-halfsize, size)) - print ("itersum jr", i+halfsize, i+size, jr) - for jh in jr: - vec[jh] += vec[jh+size] - print (" itersum", size, i, jh, jh+size) - size //= 2 - - return vec + for i in range(0, n, size): + for j in range(i, i + halfsize): + VL += 1 + size *= 2 + ################ + # INNER butterfly + ################ + + # set up an SVSHAPE + class SVSHAPE: + pass + # j schedule + SVSHAPE0 = SVSHAPE() + SVSHAPE0.lims = [xdim, ydim, zdim] + SVSHAPE0.order = [0,1,2] # experiment with different permutations, here + SVSHAPE0.mode = 0b01 + SVSHAPE0.skip = 0b00 + SVSHAPE0.offset = 0 # experiment with different offset, here + SVSHAPE0.invxyz = [0,0,0] # inversion if desired + # j+halfstep schedule + SVSHAPE1 = SVSHAPE() + SVSHAPE1.lims = [xdim, ydim, zdim] + SVSHAPE1.order = [0,1,2] # experiment with different permutations, here + SVSHAPE1.mode = 0b01 + SVSHAPE1.skip = 0b01 + SVSHAPE1.offset = 0 # experiment with different offset, here + SVSHAPE1.invxyz = [0,0,0] # inversion if desired + + # enumerate over the iterator function, getting new indices + schedule = [] + i0 = iterate_dct_butterfly_indices(SVSHAPE0) + i1 = iterate_dct_butterfly_indices(SVSHAPE1) + for idx, (jl, jh) in enumerate(zip(i0, i1)): + if idx >= VL: + break + schedule.append((jl, jh)) + + # ok now pretty-print the results, with some debug output + print ("inner butterfly") + pprint_schedule(schedule, n) + print ("") + + ################ + # outer butterfly + ################ + + # j schedule + SVSHAPE0 = SVSHAPE() + SVSHAPE0.lims = [xdim, ydim, zdim] + SVSHAPE0.order = [0,1,2] # experiment with different permutations, here + SVSHAPE0.mode = 0b10 + SVSHAPE0.skip = 0b00 + SVSHAPE0.offset = 0 # experiment with different offset, here + SVSHAPE0.invxyz = [1,0,0] # inversion if desired + # j+halfstep schedule + SVSHAPE1 = SVSHAPE() + SVSHAPE1.lims = [xdim, ydim, zdim] + SVSHAPE1.order = [0,1,2] # experiment with different permutations, here + SVSHAPE1.mode = 0b10 + SVSHAPE1.skip = 0b01 + SVSHAPE1.offset = 0 # experiment with different offset, here + SVSHAPE1.invxyz = [1,0,0] # inversion if desired + + # enumerate over the iterator function, getting new indices + schedule = [] + i0 = iterate_dct_butterfly_indices(SVSHAPE0) + i1 = iterate_dct_butterfly_indices(SVSHAPE1) + for idx, (jl, jh) in enumerate(zip(i0, i1)): + if idx >= VL: + break + schedule.append((jl, jh)) + + # ok now pretty-print the results, with some debug output + print ("outer butterfly") + pprint_schedule(schedule, n) + +# run the demo +if __name__ == '__main__': + demo() -- 2.30.2