add REMAP DCT yield schedule function, TODO
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Thu, 22 Jul 2021 14:12:43 +0000 (15:12 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Thu, 22 Jul 2021 14:12:43 +0000 (15:12 +0100)
src/openpower/decoder/isa/remap_dct_yield.py [new file with mode: 0644]

diff --git a/src/openpower/decoder/isa/remap_dct_yield.py b/src/openpower/decoder/isa/remap_dct_yield.py
new file mode 100644 (file)
index 0000000..eaaeaf8
--- /dev/null
@@ -0,0 +1,174 @@
+# DCT "REMAP" scheduler
+#
+# Modifications made to create an in-place iterative DCT:
+# Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
+#
+# SPDX: LGPLv3+
+#
+# Original fastdctlee.py by Nayuki:
+# Copyright (c) 2020 Project Nayuki. (MIT License)
+# https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
+
+import math
+
+# bits of the integer 'val'.
+def reverse_bits(val, width):
+    result = 0
+    for _ in range(width):
+        result = (result << 1) | (val & 1)
+        val >>= 1
+    return result
+
+
+# iterative version of [recursively-applied] half-rev.
+# relies on the list lengths being power-of-two and the fact
+# that bit-inversion of a list of binary numbers is the same
+# as reversing the order of the list
+# this version is dead easy to implement in hardware.
+# a big surprise is that the half-reversal can be done with
+# such a simple XOR. the inverse operation is slightly trickier
+def halfrev2(vec, pre_rev=True):
+    res = []
+    for i in range(len(vec)):
+        if pre_rev:
+            res.append(i ^ (i>>1))
+        else:
+            ri = i
+            bl = i.bit_length()
+            for ji in range(1, bl):
+                if (1<<ji) & i:
+                    ri ^= ((1<<ji)-1)
+            res.append(vec[ri])
+    return res
+
+
+# python "yield" can be iterated. use this to make it clear how
+# the indices are generated by using natural-looking nested loops
+def iterate_dct_inner_butterfly_indices(SVSHAPE):
+    # get indices to iterate over, in the required order
+    n = SVSHAPE.lims[0]
+    # createing lists of indices to iterate over in each dimension
+    # has to be done dynamically, because it depends on the size
+    # first, the size-based loop (which can be done statically)
+    x_r = []
+    size = 2
+    while size <= n:
+        x_r.append(size)
+        size *= 2
+    # invert order if requested
+    if SVSHAPE.invxyz[0]: x_r.reverse()
+
+    if len(x_r) == 0:
+        return
+
+    # reference (read/write) the in-place data in *reverse-bit-order*
+    ri = list(range(n))
+    ri = [ri[reverse_bits(i, levels)] for i in range(n)]
+
+    # reference list for not needing to do data-swaps, just swap what
+    # *indices* are referenced (two levels of indirection at the moment)
+    # pre-reverse the data-swap list so that it *ends up* in the order 0123..
+    ji = list(range(n))
+    inplace_mode = SVSHAPE.skip not in [0b10, 0b11]
+    if inplace_mode:
+        ji = halfrev2(ji, True)
+
+    # start an infinite (wrapping) loop
+    skip = 0
+    while True:
+        for size in x_r:           # loop over 3rd order dimension (size)
+            x_end = size == x_r[-1]
+            # y_r schedule depends on size
+            halfsize = size // 2
+            y_r = []
+            for i in range(0, n, size):
+                y_r.append(i)
+            # invert if requested
+            if SVSHAPE.invxyz[1]: y_r.reverse()
+            for i in y_r:       # loop over 2nd order dimension
+                y_end = i == y_r[-1]
+                k_r = []
+                j_r = []
+                k = 0
+                for j in range(i, i+halfsize):
+                    k_r.append(k)
+                    j_r.append(j)
+                # invert if requested
+                if SVSHAPE.invxyz[2]: k_r.reverse()
+                if SVSHAPE.invxyz[2]: j_r.reverse()
+                hz2 = halfsize // 2 # zero stops reversing 1-item lists
+                # if you *really* want to do the in-place swapping manually,
+                # this allows you to do it.  good luck...
+                if not inplace_mode:
+                    jr = j_r[:hz2]
+                for j in j_r:   # loop over 1st order dimension
+                    z_end = j == j_r[-1]
+                    # now depending on MODE return the index
+                    if SVSHAPE.skip in [0b00, 0b10]:
+                        result = ri[ji[j]]        # lower half
+                    elif SVSHAPE.skip in [0b01, 0b11]:
+                        result = ri[ji[size-j-1]] # upper half, reverse order
+                    loopends = (z_end |
+                               ((y_end and z_end)<<1) |
+                                ((y_end and x_end and z_end)<<2))
+
+                    yield result + SVSHAPE.offset, loopends
+
+                # now in-place swap
+                if inplace_mode:
+                    for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
+                        jlh = jl+halfsize
+                        tmp1, tmp2 = ji[jlh], ji[jh]
+                        ji[jlh], ji[jh] = tmp2, tmp1
+
+
+# totally cool *in-place* DCT algorithm
+def transform2(vec):
+
+    # Initialization
+    n = len(vec)
+    levels = n.bit_length() - 1
+
+    # and pretend we LDed data in half-swapped *and* bit-reversed order as well
+    # TODO: merge these two
+    vec = halfrev2(vec, False)
+    vec = [vec[ri[i]] for i in range(n)]
+
+    # start the inner butterfly
+    size = n
+    while size >= 2:
+        halfsize = size // 2
+        tablestep = n // size
+        ir = list(range(0, n, size))
+        for i in ir:
+            # two lists of half-range indices, e.g. j 0123, jr 7654
+            j = list(range(i, i + halfsize))
+            jr = list(range(i+halfsize, i + size))
+            jr.reverse()
+            for ci, (jl, jh) in enumerate(zip(j, jr)):
+                vec[ri[ji[jl]]] = t1 + t2
+                vec[ri[ji[jh]]] = (t1 - t2) * (1/coeff)
+            hz2 = halfsize // 2 # can be zero which stops reversing 1-item lists
+            for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
+                jlh = jl+halfsize
+                tmp1, tmp2 = ji[jlh], ji[jh]
+                ji[jlh], ji[jh] = tmp2, tmp1
+        size //= 2
+
+def dct_outer_butterfly(SVSHAPE):
+    n = len(vec)
+    size = n // 2
+    while size >= 2:
+        halfsize = size // 2
+        ir = list(range(0, halfsize))
+        print ("itersum", halfsize, size, ir)
+        for i in ir:
+            jr = list(range(i+halfsize, i+n-halfsize, size))
+            print ("itersum    jr", i+halfsize, i+size, jr)
+            for jh in jr:
+                vec[jh] += vec[jh+size]
+                print ("    itersum", size, i, jh, jh+size)
+        size //= 2
+
+    return vec
+