--- /dev/null
+# DCT "REMAP" scheduler
+#
+# Modifications made to create an in-place iterative DCT:
+# Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
+#
+# SPDX: LGPLv3+
+#
+# Original fastdctlee.py by Nayuki:
+# Copyright (c) 2020 Project Nayuki. (MIT License)
+# https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
+
+import math
+
+# bits of the integer 'val'.
+def reverse_bits(val, width):
+ result = 0
+ for _ in range(width):
+ result = (result << 1) | (val & 1)
+ val >>= 1
+ return result
+
+
+# iterative version of [recursively-applied] half-rev.
+# relies on the list lengths being power-of-two and the fact
+# that bit-inversion of a list of binary numbers is the same
+# as reversing the order of the list
+# this version is dead easy to implement in hardware.
+# a big surprise is that the half-reversal can be done with
+# such a simple XOR. the inverse operation is slightly trickier
+def halfrev2(vec, pre_rev=True):
+ res = []
+ for i in range(len(vec)):
+ if pre_rev:
+ res.append(i ^ (i>>1))
+ else:
+ ri = i
+ bl = i.bit_length()
+ for ji in range(1, bl):
+ if (1<<ji) & i:
+ ri ^= ((1<<ji)-1)
+ res.append(vec[ri])
+ return res
+
+
+# python "yield" can be iterated. use this to make it clear how
+# the indices are generated by using natural-looking nested loops
+def iterate_dct_inner_butterfly_indices(SVSHAPE):
+ # get indices to iterate over, in the required order
+ n = SVSHAPE.lims[0]
+ # createing lists of indices to iterate over in each dimension
+ # has to be done dynamically, because it depends on the size
+ # first, the size-based loop (which can be done statically)
+ x_r = []
+ size = 2
+ while size <= n:
+ x_r.append(size)
+ size *= 2
+ # invert order if requested
+ if SVSHAPE.invxyz[0]: x_r.reverse()
+
+ if len(x_r) == 0:
+ return
+
+ # reference (read/write) the in-place data in *reverse-bit-order*
+ ri = list(range(n))
+ ri = [ri[reverse_bits(i, levels)] for i in range(n)]
+
+ # reference list for not needing to do data-swaps, just swap what
+ # *indices* are referenced (two levels of indirection at the moment)
+ # pre-reverse the data-swap list so that it *ends up* in the order 0123..
+ ji = list(range(n))
+ inplace_mode = SVSHAPE.skip not in [0b10, 0b11]
+ if inplace_mode:
+ ji = halfrev2(ji, True)
+
+ # start an infinite (wrapping) loop
+ skip = 0
+ while True:
+ for size in x_r: # loop over 3rd order dimension (size)
+ x_end = size == x_r[-1]
+ # y_r schedule depends on size
+ halfsize = size // 2
+ y_r = []
+ for i in range(0, n, size):
+ y_r.append(i)
+ # invert if requested
+ if SVSHAPE.invxyz[1]: y_r.reverse()
+ for i in y_r: # loop over 2nd order dimension
+ y_end = i == y_r[-1]
+ k_r = []
+ j_r = []
+ k = 0
+ for j in range(i, i+halfsize):
+ k_r.append(k)
+ j_r.append(j)
+ # invert if requested
+ if SVSHAPE.invxyz[2]: k_r.reverse()
+ if SVSHAPE.invxyz[2]: j_r.reverse()
+ hz2 = halfsize // 2 # zero stops reversing 1-item lists
+ # if you *really* want to do the in-place swapping manually,
+ # this allows you to do it. good luck...
+ if not inplace_mode:
+ jr = j_r[:hz2]
+ for j in j_r: # loop over 1st order dimension
+ z_end = j == j_r[-1]
+ # now depending on MODE return the index
+ if SVSHAPE.skip in [0b00, 0b10]:
+ result = ri[ji[j]] # lower half
+ elif SVSHAPE.skip in [0b01, 0b11]:
+ result = ri[ji[size-j-1]] # upper half, reverse order
+ loopends = (z_end |
+ ((y_end and z_end)<<1) |
+ ((y_end and x_end and z_end)<<2))
+
+ yield result + SVSHAPE.offset, loopends
+
+ # now in-place swap
+ if inplace_mode:
+ for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
+ jlh = jl+halfsize
+ tmp1, tmp2 = ji[jlh], ji[jh]
+ ji[jlh], ji[jh] = tmp2, tmp1
+
+
+# totally cool *in-place* DCT algorithm
+def transform2(vec):
+
+ # Initialization
+ n = len(vec)
+ levels = n.bit_length() - 1
+
+ # and pretend we LDed data in half-swapped *and* bit-reversed order as well
+ # TODO: merge these two
+ vec = halfrev2(vec, False)
+ vec = [vec[ri[i]] for i in range(n)]
+
+ # start the inner butterfly
+ size = n
+ while size >= 2:
+ halfsize = size // 2
+ tablestep = n // size
+ ir = list(range(0, n, size))
+ for i in ir:
+ # two lists of half-range indices, e.g. j 0123, jr 7654
+ j = list(range(i, i + halfsize))
+ jr = list(range(i+halfsize, i + size))
+ jr.reverse()
+ for ci, (jl, jh) in enumerate(zip(j, jr)):
+ vec[ri[ji[jl]]] = t1 + t2
+ vec[ri[ji[jh]]] = (t1 - t2) * (1/coeff)
+ hz2 = halfsize // 2 # can be zero which stops reversing 1-item lists
+ for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
+ jlh = jl+halfsize
+ tmp1, tmp2 = ji[jlh], ji[jh]
+ ji[jlh], ji[jh] = tmp2, tmp1
+ size //= 2
+
+def dct_outer_butterfly(SVSHAPE):
+ n = len(vec)
+ size = n // 2
+ while size >= 2:
+ halfsize = size // 2
+ ir = list(range(0, halfsize))
+ print ("itersum", halfsize, size, ir)
+ for i in ir:
+ jr = list(range(i+halfsize, i+n-halfsize, size))
+ print ("itersum jr", i+halfsize, i+size, jr)
+ for jh in jr:
+ vec[jh] += vec[jh+size]
+ print (" itersum", size, i, jh, jh+size)
+ size //= 2
+
+ return vec
+