add inner and outer yield version of DCT inner and out butterfly
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Thu, 22 Jul 2021 14:46:56 +0000 (15:46 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Thu, 22 Jul 2021 14:46:56 +0000 (15:46 +0100)
src/openpower/decoder/isa/remap_dct_yield.py

index 44a4375346d217dad1f17326d40f8bd2d90c98e9..63928d4dc3e0f3b234ef1570c13f57cc338d4b7b 100644 (file)
@@ -43,7 +43,7 @@ def halfrev2(vec, pre_rev=True):
 
 # python "yield" can be iterated. use this to make it clear how
 # the indices are generated by using natural-looking nested loops
-def iterate_dct_inner_butterfly_indices(SVSHAPE):
+def iterate_dct_butterfly_indices(SVSHAPE):
     # get indices to iterate over, in the required order
     n = SVSHAPE.lims[0]
     # createing lists of indices to iterate over in each dimension
@@ -55,20 +55,23 @@ def iterate_dct_inner_butterfly_indices(SVSHAPE):
         x_r.append(size)
         size *= 2
     # invert order if requested
-    if SVSHAPE.invxyz[0]: x_r.reverse()
+    if SVSHAPE.invxyz[0]:
+        x_r.reverse()
 
     if len(x_r) == 0:
         return
 
     # reference (read/write) the in-place data in *reverse-bit-order*
     ri = list(range(n))
-    ri = [ri[reverse_bits(i, levels)] for i in range(n)]
+    if SVSHAPE.mode == 0b01:
+        levels = n.bit_length() - 1
+        ri = [ri[reverse_bits(i, levels)] for i in range(n)]
 
     # reference list for not needing to do data-swaps, just swap what
     # *indices* are referenced (two levels of indirection at the moment)
     # pre-reverse the data-swap list so that it *ends up* in the order 0123..
     ji = list(range(n))
-    inplace_mode = SVSHAPE.skip not in [0b10, 0b11]
+    inplace_mode = SVSHAPE.mode == 0b01 and SVSHAPE.skip not in [0b10, 0b11]
     if inplace_mode:
         ji = halfrev2(ji, True)
 
@@ -98,7 +101,7 @@ def iterate_dct_inner_butterfly_indices(SVSHAPE):
                 hz2 = halfsize // 2 # zero stops reversing 1-item lists
                 # if you *really* want to do the in-place swapping manually,
                 # this allows you to do it.  good luck...
-                if not inplace_mode:
+                if SVSHAPE.mode == 0b01 and not inplace_mode:
                     jr = j_r[:hz2]
                 for j in j_r:   # loop over 1st order dimension
                     z_end = j == j_r[-1]
@@ -114,60 +117,127 @@ def iterate_dct_inner_butterfly_indices(SVSHAPE):
                     yield result + SVSHAPE.offset, loopends
 
                 # now in-place swap
-                if inplace_mode:
-                    for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
+                if SVSHAPE.mode == 0b01 and inplace_mode:
+                    for ci, jl in enumerate(j_r[:hz2]):
+                        jh = size-jl-1 # upper half, reverse order
                         jlh = jl+halfsize
                         tmp1, tmp2 = ji[jlh], ji[jh]
                         ji[jlh], ji[jh] = tmp2, tmp1
 
 
-# totally cool *in-place* DCT algorithm
-def transform2(vec):
+def pprint_schedule(schedule, n):
+    size = 2
+    idx = 0
+    while size <= n:
+        halfsize = size // 2
+        tablestep = n // size
+        print ("size %d halfsize %d tablestep %d" % \
+                (size, halfsize, tablestep))
+        for i in range(0, n, size):
+            prefix = "i %d\t" % i
+            for j in range(i, i + halfsize):
+                (jl, je), (jh, he) = schedule[idx]
+                print ("  %-3d\t%s j=%-2d jh=%-2d "
+                        "j[jl=%-2d] j[jh=%-2d]" % \
+                                (idx, prefix, j, j+halfsize,
+                                      jl, jh,
+                                ),
+                                "end", bin(je)[2:], bin(je)[2:])
+                idx += 1
+        size *= 2
 
-    # Initialization
-    n = len(vec)
-    levels = n.bit_length() - 1
 
-    # and pretend we LDed data in half-swapped *and* bit-reversed order as well
-    # TODO: merge these two
-    vec = halfrev2(vec, False)
-    vec = [vec[ri[i]] for i in range(n)]
+def demo():
+    # set the dimension sizes here
+    xdim = 8
+    ydim = 0 # not needed
+    zdim = 0 # again, not needed
 
-    # start the inner butterfly
-    size = n
-    while size >= 2:
+    # set total. err don't know how to calculate how many there are...
+    # do it manually for now
+    VL = 0
+    size = 2
+    n = xdim
+    while size <= n:
         halfsize = size // 2
         tablestep = n // size
-        ir = list(range(0, n, size))
-        for i in ir:
-            # two lists of half-range indices, e.g. j 0123, jr 7654
-            j = list(range(i, i + halfsize))
-            jr = list(range(i+halfsize, i + size))
-            jr.reverse()
-            for ci, (jl, jh) in enumerate(zip(j, jr)):
-                vec[ri[ji[jl]]] = t1 + t2
-                vec[ri[ji[jh]]] = (t1 - t2) * (1/coeff)
-            hz2 = halfsize // 2 # can be zero which stops reversing 1-item lists
-            for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
-                jlh = jl+halfsize
-                tmp1, tmp2 = ji[jlh], ji[jh]
-                ji[jlh], ji[jh] = tmp2, tmp1
-        size //= 2
-
-def dct_outer_butterfly(SVSHAPE):
-    n = len(vec)
-    size = n // 2
-    while size >= 2:
-        halfsize = size // 2
-        ir = list(range(0, halfsize))
-        print ("itersum", halfsize, size, ir)
-        for i in ir:
-            jr = list(range(i+halfsize, i+n-halfsize, size))
-            print ("itersum    jr", i+halfsize, i+size, jr)
-            for jh in jr:
-                vec[jh] += vec[jh+size]
-                print ("    itersum", size, i, jh, jh+size)
-        size //= 2
-
-    return vec
+        for i in range(0, n, size):
+            for j in range(i, i + halfsize):
+                VL += 1
+        size *= 2
 
+    ################
+    # INNER butterfly
+    ################
+
+    # set up an SVSHAPE
+    class SVSHAPE:
+        pass
+    # j schedule
+    SVSHAPE0 = SVSHAPE()
+    SVSHAPE0.lims = [xdim, ydim, zdim]
+    SVSHAPE0.order = [0,1,2]  # experiment with different permutations, here
+    SVSHAPE0.mode = 0b01
+    SVSHAPE0.skip = 0b00
+    SVSHAPE0.offset = 0       # experiment with different offset, here
+    SVSHAPE0.invxyz = [0,0,0] # inversion if desired
+    # j+halfstep schedule
+    SVSHAPE1 = SVSHAPE()
+    SVSHAPE1.lims = [xdim, ydim, zdim]
+    SVSHAPE1.order = [0,1,2]  # experiment with different permutations, here
+    SVSHAPE1.mode = 0b01
+    SVSHAPE1.skip = 0b01
+    SVSHAPE1.offset = 0       # experiment with different offset, here
+    SVSHAPE1.invxyz = [0,0,0] # inversion if desired
+
+    # enumerate over the iterator function, getting new indices
+    schedule = []
+    i0 = iterate_dct_butterfly_indices(SVSHAPE0)
+    i1 = iterate_dct_butterfly_indices(SVSHAPE1)
+    for idx, (jl, jh) in enumerate(zip(i0, i1)):
+        if idx >= VL:
+            break
+        schedule.append((jl, jh))
+
+    # ok now pretty-print the results, with some debug output
+    print ("inner butterfly")
+    pprint_schedule(schedule, n)
+    print ("")
+
+    ################
+    # outer butterfly
+    ################
+
+    # j schedule
+    SVSHAPE0 = SVSHAPE()
+    SVSHAPE0.lims = [xdim, ydim, zdim]
+    SVSHAPE0.order = [0,1,2]  # experiment with different permutations, here
+    SVSHAPE0.mode = 0b10
+    SVSHAPE0.skip = 0b00
+    SVSHAPE0.offset = 0       # experiment with different offset, here
+    SVSHAPE0.invxyz = [1,0,0] # inversion if desired
+    # j+halfstep schedule
+    SVSHAPE1 = SVSHAPE()
+    SVSHAPE1.lims = [xdim, ydim, zdim]
+    SVSHAPE1.order = [0,1,2]  # experiment with different permutations, here
+    SVSHAPE1.mode = 0b10
+    SVSHAPE1.skip = 0b01
+    SVSHAPE1.offset = 0       # experiment with different offset, here
+    SVSHAPE1.invxyz = [1,0,0] # inversion if desired
+
+    # enumerate over the iterator function, getting new indices
+    schedule = []
+    i0 = iterate_dct_butterfly_indices(SVSHAPE0)
+    i1 = iterate_dct_butterfly_indices(SVSHAPE1)
+    for idx, (jl, jh) in enumerate(zip(i0, i1)):
+        if idx >= VL:
+            break
+        schedule.append((jl, jh))
+
+    # ok now pretty-print the results, with some debug output
+    print ("outer butterfly")
+    pprint_schedule(schedule, n)
+
+# run the demo
+if __name__ == '__main__':
+    demo()