#
-# Fast discrete cosine transform algorithms (Python)
+# Fast discrete cosine transform algorithm (Python)
#
+# Modifications made to create an in-place iterative DCT:
+# Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
+#
+# License for modifications - SPDX: LGPLv3+
+#
+# Original fastdctlee.py by Nayuki:
# Copyright (c) 2020 Project Nayuki. (MIT License)
# https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
#
# or other dealings in the Software.
#
#
-# Modifications made to in-place iterative DCT - SPDX: LGPLv3+
-# Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
-#
# The modifications made are firstly to create an iterative schedule,
# rather than the more normal recursive algorithm. Secondly, the
# two butterflys are also separated out: inner butterfly does COS +/-
# the top half is read in reverse order (7 6 5 4) and written out
# to the target 4 5 6 7. the plan was to do this in two stages:
# write in-place in order 4 5 6 7 then swap afterwards (7-4), (6-5).
-# the insight then was: to modify the *indirection* indices rather
-# than swap the actual data, and then try the same trick as was done
-# with the bit-reversed LOAD. by a bizarre twist of good fortune
-# *that was not needed*! simply swapping the indices was enough!
+# however by leaving the data *in-place* and having subsequent
+# loops refer to the data *where it now is*, the swap is avoided
+# - thirdly, arrange for the data to be *pre-swapped* (in an inverse
+# order of how it would have got there, if that makes sense), such
+# that *when* it gets swapped, it ends up in the right order.
+# given that that will be a LD operation it's no big deal.
+#
# End result is that once the first butterfly is done - bear in mind
# it's in-place - the data is in the right order so that a second
# dead-straightforward iterative sum can be done: again, in-place.
return result
+# reverse top half of a list, recursively. the recursion can be
+# applied *after* or *before* the reversal of the top half. these
+# are inverses of each other.
+# this function is unused except to test the iterative version (halfrev2)
+def halfrev(l, pre_rev=True):
+ n = len(l)
+ if n == 1:
+ return l
+ ll, lh = l[:n//2], l[n//2:]
+ if pre_rev:
+ ll, lh = halfrev(ll, pre_rev), halfrev(lh, pre_rev)
+ lh.reverse()
+ if not pre_rev:
+ ll, lh = halfrev(ll, pre_rev), halfrev(lh, pre_rev)
+ return ll + lh
+
+
+# iterative version of [recursively-applied] half-rev.
+# relies on the list lengths being power-of-two and the fact
+# that bit-inversion of a list of binary numbers is the same
+# as reversing the order of the list
+# this version is dead easy to implement in hardware.
+# a big surprise is that the half-reversal can be done with
+# such a simple XOR. the inverse operation is slightly trickier
+def halfrev2(vec, pre_rev=True):
+ res = []
+ for i in range(len(vec)):
+ if pre_rev:
+ res.append(vec[i ^ (i>>1)])
+ else:
+ ri = i
+ bl = i.bit_length()
+ for ji in range(1, bl):
+ ri ^= (i >> ji)
+ res.append(vec[ri])
+ return res
+
+
# DCT type II, unscaled. Algorithm by Byeong Gi Lee, 1984.
# See: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.118.3056&rep=rep1&type=pdf#page=34
# original (recursive) algorithm by Nayuki
return result
-# modified (iterative) algorithm by lkcl, based on Nayuki original
+# modified recursive algorithm, based on Nayuki original, which simply
+# prints out an awful lot of debug data. used to work out the ordering
+# for the iterative version by analysing the indices printed out
def transform(vector, indent=0):
idt = " " * indent
n = len(vector)
return result
-def transform_itersum(vector, indent=0):
- idt = " " * indent
- n = len(vector)
- if n == 1:
- return list(vector)
- elif n == 0 or n % 2 != 0:
- raise ValueError()
- else:
- half = n // 2
- alpha = [0] * half
- beta = [0] * half
- for i in range(half):
- t1, t2 = vector[i], vector[i+half]
- alpha[i] = t1
- beta[i] = t2
- alpha = transform_itersum(alpha, indent+1)
- beta = transform_itersum(beta , indent+1)
- result = [0] * n
- for i in range(half):
- result[i*2] = alpha[i]
- result[i*2+1] = beta[i]
- print(idt, "iter-merge", result)
- for i in range(half - 1):
- result[i*2+1] += result[i*2+3]
- print(idt, "iter-result", result)
- return result
-
-
# totally cool *in-place* DCT algorithm
def transform2(vec):
- vec = deepcopy(vec)
# Initialization
n = len(vec)
print ()
ri = list(range(n))
ri = [ri[reverse_bits(i, levels)] for i in range(n)]
- # and pretend we LDed the data in bit-reversed order as well
- vec = [vec[reverse_bits(i, levels)] for i in range(n)]
+ # reference list for not needing to do data-swaps, just swap what
+ # *indices* are referenced (two levels of indirection at the moment)
+ # pre-reverse the data-swap list so that it *ends up* in the order 0123..
+ ji = list(range(n))
+ ji = halfrev2(ji, True)
+ # and pretend we LDed data in half-swapped *and* bit-reversed order as well
+ # TODO: merge these two
+ vec = halfrev2(vec, False)
+ vec = [vec[ri[i]] for i in range(n)]
+
+ print ("ri", ri)
+ print ("ji", ji)
+
+ # create a cos table: not strictly necessary but here for illustrative
+ # purposes, to demonstrate the point that it really *is* iterative.
+ # this table could be cached and used multiple times rather than
+ # computed every time.
+ ctable = []
+ size = n
+ while size >= 2:
+ halfsize = size // 2
+ for i in range(n//size):
+ for ci in range(halfsize):
+ ctable.append((math.cos((ci + 0.5) * math.pi / size) * 2.0))
+ size //= 2
+
+ # start the inner butterfly
size = n
+ k = 0
while size >= 2:
halfsize = size // 2
tablestep = n // size
ir = list(range(0, n, size))
print (" xform", size, ir)
for i in ir:
- k = 0
+ # two lists of half-range indices, e.g. j 0123, jr 7654
j = list(range(i, i + halfsize))
jr = list(range(i+halfsize, i + size))
jr.reverse()
print (" xform jr", j, jr)
for ci, (jl, jh) in enumerate(zip(j, jr)):
- t1, t2 = vec[ri[jl]], vec[ri[jh]]
- coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
+ t1, t2 = vec[ri[ji[jl]]], vec[ri[ji[jh]]]
+ #coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
+ coeff = ctable[k]
+ k += 1
# normally DCT would use jl+halfsize not jh, here.
# to be able to work in-place, the idea is to perform a
- # swap afterwards. however actually we swap the *indices*
- vec[ri[jl]] = t1 + t2
- vec[ri[jh]] = (t1 - t2) * (1/coeff)
- print ("coeff", size, i, k, "jl", jl, "jh", jh,
- "i/n", (k+0.5)/size, coeff, vec[ri[jl]], vec[ri[jh]])
- k += tablestep
+ # swap afterwards.
+ vec[ri[ji[jl]]] = t1 + t2
+ vec[ri[ji[jh]]] = (t1 - t2) * (1/coeff)
+ print ("coeff", size, i, "ci", ci,
+ "jl", ri[ji[jl]], "jh", ri[ji[jh]],
+ "i/n", (ci+0.5)/size, coeff, vec[ri[ji[jl]]],
+ vec[ri[ji[jh]]])
# instead of using jl+halfsize, perform a swap here.
# use half of j/jr because actually jl+halfsize = reverse(j)
- # actually: swap the *indices*... not the actual data.
- # incredibly... bizarrely... this works *without* having
- # to do anything else.
hz2 = halfsize // 2 # can be zero which stops reversing 1-item lists
for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
- tmp = ri[jl+halfsize]
- ri[jl+halfsize] = ri[jh]
- ri[jh] = tmp
- print (" swap", size, i, ri[jl+halfsize], ri[jh])
+ jlh = jl+halfsize
+ # swap indices, NOT the data
+ tmp1, tmp2 = ji[jlh], ji[jh]
+ ji[jlh], ji[jh] = tmp2, tmp1
+ print (" swap", size, i, ji[jlh], ji[jh])
size //= 2
print("post-swapped", ri)
+ print("ji-swapped", ji)
print("transform2 pre-itersum", vec)
+ # now things are in the right order for the outer butterfly.
n = len(vec)
size = n // 2
while size >= 2:
inverse_transform(alpha, False, indent+1)
inverse_transform(beta , False, indent+1)
for i in range(half):
- x = alpha[i]
- y = beta[i] / (math.cos((i + 0.5) * math.pi / n) * 2)
+ x, y = alpha[i], beta[i]
+ coeff = (math.cos((i + 0.5) * math.pi / n) * 2)
+ y /= coeff
vector[i] = x + y
- vector[-(i + 1)] = x - y
+ vector[n-(i+1)] = x - y
print (idt, " v[%d] = alpha[%d]+beta[%d]" % (i, i, i))
print (idt, " v[%d] = alpha[%d]-beta[%d]" % (n-i-1, i, i))
return vector
-def inverse_transform2(vector, root=True):
+# totally cool *in-place* DCT algorithm
+def inverse_transform_iter(vec):
+
+ # in-place, but actually have to protect the input list!
+ vec = deepcopy(vec)
+
+ # Initialization
+ n = len(vec)
+ print ()
+ print ("transform2 inv", n, vec)
+ levels = n.bit_length() - 1
+
+ # reference (read/write) the in-place data in *reverse-bit-order*
+ ri = list(range(n))
+ ri = [ri[reverse_bits(i, levels)] for i in range(n)]
+
+ # reference list for not needing to do data-swaps, just swap what
+ # *indices* are referenced (two levels of indirection at the moment)
+ # pre-reverse the data-swap list so that it *ends up* in the order 0123..
+ ji = list(range(n))
+ ji = halfrev2(ji, False)
+
+ print ("ri", ri)
+ print ("ji", ji)
+
+ # create a cos table: not strictly necessary but here for illustrative
+ # purposes, to demonstrate the point that it really *is* iterative.
+ # this table could be cached and used multiple times rather than
+ # computed every time.
+ ctable = []
+ size = 2
+ while size <= n:
+ halfsize = size // 2
+ for i in range(n//size):
+ for ci in range(halfsize):
+ ctable.append((math.cos((ci + 0.5) * math.pi / size) * 2.0))
+ size *= 2
+
+ # first divide element 0 by 2
+ vec[0] /= 2.0
+
+ print("transform2-inv pre-itersum", vec)
+ vec = [vec[ri[i]] for i in range(n)]
+ vec = halfrev2(vec, True)
+ #print("transform2-inv post-itersum-reorder", vec)
+
+ # first the outer butterfly (iterative sum thing)
+ n = len(vec)
+ size = 2
+ while size <= n:
+ halfsize = size // 2
+ ir = list(range(0, halfsize))
+ print ("itersum", halfsize, size, ir)
+ for i in ir:
+ jr = list(range(i+halfsize, i+n-halfsize, size))
+ jr.reverse()
+ print ("itersum jr", i+halfsize, i+size, jr)
+ for jh in jr:
+ x = vec[ji[ri[jh]]]
+ y = vec[ji[ri[jh+size]]]
+ vec[ji[ri[jh+size]]] = x + y
+ print (" itersum", size, i, jh, jh+size,
+ x, y, "jh+sz", vec[ji[jh+size]])
+ size *= 2
+
+ print("transform2-inv post-itersum", vec)
+
+ # start the inner butterfly (coefficients)
+ size = 2
+ k = 0
+ while size <= n:
+ halfsize = size // 2
+ tablestep = n // size
+ ir = list(range(0, n, size))
+ print (" xform", size, ir)
+ for i in ir:
+ # two lists of half-range indices, e.g. j 0123, jr 7654
+ j = list(range(i, i + halfsize))
+ jr = list(range(i+halfsize, i + size))
+ jr.reverse()
+ print (" xform jr", j, jr)
+ for ci, (jl, jh) in enumerate(zip(j, jr)):
+ t1, t2 = vec[ji[jl]], vec[ji[jl+halfsize]]
+ #coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
+ coeff = ctable[k]
+ k += 1
+ # normally DCT would use jl+halfsize not jh, here.
+ # to be able to work in-place, the idea is to perform a
+ # swap afterwards.
+ vec[ji[jl]] = t1 + t2/coeff
+ vec[ji[jl+halfsize]] = t1 - t2/coeff
+ print ("coeff", size, i, "ci", ci,
+ "jl", ri[ji[jl]], "jh", ri[ji[jh]],
+ "i/n", (ci+0.5)/size, coeff,
+ "t1,t2", t1, t2,
+ "+/i", vec[ji[jl]], vec[ji[jh]])
+ #"+/i", vec2[ri[ji[jl]]], vec2[ri[ji[jh]]])
+ # instead of using jl+halfsize, perform a swap here.
+ # use half of j/jr because actually jl+halfsize = reverse(j)
+ hz2 = halfsize // 2 # can be zero which stops reversing 1-item lists
+ for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
+ jlh = jl+halfsize
+ # swap indices, NOT the data
+ tmp1, tmp2 = ji[jlh], ji[jh]
+ ji[jlh], ji[jh] = tmp2, tmp1
+ print (" swap", size, i, ji[jlh], ji[jh])
+ size *= 2
+
+ print("post-swapped", ri)
+ print("ji-swapped", ji)
+ ji = list(range(n))
+ ji = halfrev2(ji, True)
+ print("ji-calc ", ji)
+
+ print("transform2-inv result", vec)
+
+ return vec
+
+
+def inverse_transform2(vector, root=True, indent=0):
+ idt = " " * indent
+ n = len(vector)
+ if root:
+ vector = list(vector)
+ vector[0] /= 2
+ if n == 1:
+ return vector
+ elif n == 0 or n % 2 != 0:
+ raise ValueError()
+ else:
+ print (idt, "inverse_xform2", vector)
+ half = n // 2
+ alpha = [vector[0]]
+ beta = [vector[1]]
+ for i in range(2, n, 2):
+ alpha.append(vector[i])
+ beta.append(vector[i - 1] + vector[i + 1])
+ print (idt, " alpha", i, vector[i])
+ print (idt, " beta", i-1, i+1, vector[i-1], vector[i+1], "->",
+ beta[-1])
+ inverse_transform2(alpha, False, indent+1)
+ inverse_transform2(beta , False, indent+1)
+ for i in range(half):
+ x, y = alpha[i], beta[i]
+ coeff = (math.cos((i + 0.5) * math.pi / n) * 2)
+ vector[i] = x + y / coeff
+ vector[n-(i+1)] = x - y / coeff
+ print (idt, " v[%d] = %f+%f/%f=%f" % (i, x, y, coeff, vector[i]))
+ print (idt, " v[%d] = %f-%f/%f=%f" % (n-i-1, x, y,
+ coeff, vector[n-i-1]))
+ return vector
+
+
+def inverse_transform2_explore(vector, root=True, indent=0):
n = len(vector)
if root:
vector = list(vector)
raise ValueError()
else:
half = n // 2
- alpha = [0]
- beta = [1]
+ alpha = [vector[0]]
+ beta = [vector[1]]
for i in range(2, n, 2):
- alpha.append(i)
- beta.append(("add", i - 1, i + 1))
- inverse_transform2(alpha, False)
- inverse_transform2(beta , False)
+ alpha.append(vector[i])
+ beta.append(("add%d" % indent, vector[i - 1], vector[i + 1]))
+ inverse_transform2_explore(alpha, False, indent+1)
+ inverse_transform2_explore(beta , False, indent+1)
for i in range(half):
x = alpha[i]
- y = ("cos", beta[i], i)
- vector[i] = ("add", x, y)
- vector[-(i + 1)] = ("sub", x, y)
+ y = ("cos%d" % indent, beta[i], i, n)
+ vector[i] = ("add%d" % indent, x, y)
+ vector[n-(i + 1)] = ("sub%d" % indent, x, y)
return vector
+
+# does the outer butterfly in a recursive fashion, used in an
+# intermediary development of the in-place DCT.
+def transform_itersum(vector, indent=0):
+ idt = " " * indent
+ n = len(vector)
+ if n == 1:
+ return list(vector)
+ elif n == 0 or n % 2 != 0:
+ raise ValueError()
+ else:
+ half = n // 2
+ alpha = [0] * half
+ beta = [0] * half
+ for i in range(half):
+ t1, t2 = vector[i], vector[i+half]
+ alpha[i] = t1
+ beta[i] = t2
+ alpha = transform_itersum(alpha, indent+1)
+ beta = transform_itersum(beta , indent+1)
+ result = [0] * n
+ for i in range(half):
+ result[i*2] = alpha[i]
+ result[i*2+1] = beta[i]
+ print(idt, "iter-merge", result)
+ for i in range(half - 1):
+ result[i*2+1] += result[i*2+3]
+ print(idt, "iter-result", result)
+ return result
+
+
+# prints out an "add" schedule for the outer butterfly, recursively,
+# matching what transform_itersum does.
def itersum_explore(vector, indent=0):
idt = " " * indent
n = len(vector)
return result
+# prints out the exact same outer butterfly but does so iteratively.
+# by comparing the output from itersum_explore and itersum_explore2
+# and by drawing out the resultant ADDs as a graph it was possible
+# to deduce what the heck was going on.
def itersum_explore2(vec, indent=0):
n = len(vec)
size = n // 2
while size >= 2:
halfsize = size // 2
ir = list(range(0, halfsize))
- #ir.reverse()
print ("itersum", halfsize, size, ir)
for i in ir:
jr = list(range(i+halfsize, i+n-halfsize, size))
print (" itersum", size, i, jh, jh+size)
size //= 2
- #if reverse:
- # vec = [vec[reverse_bits(i, levels)] for i in range(n)]
-
return vec
if __name__ == '__main__':
levels = n.bit_length() - 1
vec = [vec[reverse_bits(i, levels)] for i in range(n)]
ops = itersum_explore(vec)
- #ops = [ops[reverse_bits(i, levels)] for i in range(n)]
for i, x in enumerate(ops):
print (i, x)
n = 16
vec = list(range(n))
levels = n.bit_length() - 1
- #vec = [vec[reverse_bits(i, levels)] for i in range(n)]
ops = itersum_explore2(vec)
for i, x in enumerate(ops):
print (i, x)
+
+ # halfrev test
+ vec = list(range(16))
+ print ("orig vec", vec)
+ vecr = halfrev(vec, True)
+ print ("reversed", vecr)
+ for i, v in enumerate(vecr):
+ print ("%2d %2d %04s %04s %04s" % (i, v,
+ bin(i)[2:], bin(v ^ i)[2:], bin(v)[2:]))
+ vecrr = halfrev(vecr, False)
+ assert vec == vecrr
+ vecrr = halfrev(vec, False)
+ print ("pre-reversed", vecrr)
+ for i, v in enumerate(vecrr):
+ print ("%2d %2d %04s %04s %04s" % (i, v,
+ bin(i)[2:], bin(v ^ i)[2:], bin(v)[2:]))
+ il = halfrev2(vec, False)
+ print ("iterative rev", il)
+ il = halfrev2(vec, True)
+ print ("iterative rev-true", il)
+
+ n = 4
+ vec = list(range(n))
+ levels = n.bit_length() - 1
+ ops = inverse_transform2_explore(vec)
+ for i, x in enumerate(ops):
+ print (i, x)
+