2 # Fast discrete cosine transform algorithms (Python)
4 # Copyright (c) 2020 Project Nayuki. (MIT License)
5 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
7 # License for original fastdctlee.py by Nayuki:
9 # Permission is hereby granted, free of charge, to any person obtaining
10 # a copy of this software and associated documentation files (the
11 # "Software"), to deal in the Software without restriction, including
12 # without limitation the rights to use, copy, modify, merge, publish,
13 # distribute, sublicense, and/or sell copies of the Software, and to
14 # permit persons to whom the Software is furnished to do so, subject to
15 # the following conditions:
16 # - The above copyright notice and this permission notice shall be included in
17 # all copies or substantial portions of the Software.
18 # - The Software is provided "as is", without warranty of any kind, express or
19 # implied, including but not limited to the warranties of merchantability,
20 # fitness for a particular purpose and noninfringement. In no event shall the
21 # authors or copyright holders be liable for any claim, damages or other
22 # liability, whether in an action of contract, tort or otherwise,
23 # arising from, out of or in connection with the Software or the use
24 # or other dealings in the Software.
27 # Modifications made to create an in-place iterative DCT - SPDX: LGPLv3+
28 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
30 # The modifications made are firstly to create an iterative schedule,
31 # rather than the more normal recursive algorithm. Secondly, the
32 # two butterflys are also separated out: inner butterfly does COS +/-
33 # whilst outer butterfly does the iterative summing.
35 # However, to avoid data copying some additional tricks are played:
36 # - firstly, the data is LOADed in bit-reversed order (which is normally
37 # covered by the recursive algorithm due to the odd-even reconstruction)
38 # but then to reference the data in the correct order an array of
39 # bit-reversed indices is created, as a level of indirection.
40 # the data is bit-reversed but so are the indices, making it all A-Ok.
41 # - secondly, normally in DCT a 2nd target (copy) array is used where
42 # the top half is read in reverse order (7 6 5 4) and written out
43 # to the target 4 5 6 7. the plan was to do this in two stages:
44 # write in-place in order 4 5 6 7 then swap afterwards (7-4), (6-5).
46 # End result is that once the first butterfly is done - bear in mind
47 # it's in-place - the data is in the right order so that a second
48 # dead-straightforward iterative sum can be done: again, in-place.
52 from copy
import deepcopy
54 # bits of the integer 'val'.
55 def reverse_bits(val
, width
):
57 for _
in range(width
):
58 result
= (result
<< 1) |
(val
& 1)
63 # DCT type II, unscaled. Algorithm by Byeong Gi Lee, 1984.
64 # See: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.118.3056&rep=rep1&type=pdf#page=34
65 # original (recursive) algorithm by Nayuki
66 def transform(vector
, indent
=0):
71 elif n
== 0 or n
% 2 != 0:
75 alpha
= [(vector
[i
] + vector
[-(i
+ 1)]) for i
in range(half
)]
76 beta
= [(vector
[i
] - vector
[-(i
+ 1)]) /
77 (math
.cos((i
+ 0.5) * math
.pi
/ n
) * 2.0)
79 alpha
= transform(alpha
)
80 beta
= transform(beta
)
82 for i
in range(half
- 1):
83 result
.append(alpha
[i
])
84 result
.append(beta
[i
] + beta
[i
+ 1])
85 result
.append(alpha
[-1])
86 result
.append(beta
[-1])
90 # modified (iterative) algorithm by lkcl, based on Nayuki original
91 def transform(vector
, indent
=0):
96 elif n
== 0 or n
% 2 != 0:
102 print (idt
, "xf", vector
)
103 print (idt
, "coeff", n
, "->", end
=" ")
104 for i
in range(half
):
105 t1
, t2
= vector
[i
], vector
[n
-i
-1]
106 k
= (math
.cos((i
+ 0.5) * math
.pi
/ n
) * 2.0)
107 print (i
, n
-i
-1, "i/n", (i
+0.5)/n
, ":", k
, end
= " ")
109 beta
[i
] = (t1
- t2
) * (1/k
)
111 print (idt
, "n", n
, "alpha", end
=" ")
112 for i
in range(0, n
, 2):
113 print (i
, i
//2, alpha
[i
//2], end
=" ")
115 print (idt
, "n", n
, "beta", end
=" ")
116 for i
in range(0, n
, 2):
117 print (i
, beta
[i
//2], end
=" ")
119 alpha
= transform(alpha
, indent
+1)
120 beta
= transform(beta
, indent
+1)
122 for i
in range(half
):
123 result
[i
*2] = alpha
[i
]
124 result
[i
*2+1] = beta
[i
]
125 print(idt
, "merge", result
)
126 for i
in range(half
- 1):
127 result
[i
*2+1] += result
[i
*2+3]
128 print(idt
, "result", result
)
132 # totally cool *in-place* DCT algorithm
139 print ("transform2", n
)
140 levels
= n
.bit_length() - 1
142 # reference (read/write) the in-place data in *reverse-bit-order*
144 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
146 # and pretend we LDed the data in bit-reversed order as well
147 vec
= [vec
[ri
[i
]] for i
in range(n
)]
149 # reference list for not needing to do data-swaps, just swap what
150 # *indices* are referenced (two levels of indirection at the moment)
153 # start the inner butterfly
157 tablestep
= n
// size
158 ir
= list(range(0, n
, size
))
159 print (" xform", size
, ir
)
161 # two lists of half-range indices, e.g. j 0123, jr 7654
162 j
= list(range(i
, i
+ halfsize
))
163 jr
= list(range(i
+halfsize
, i
+ size
))
165 print (" xform jr", j
, jr
)
166 for ci
, (jl
, jh
) in enumerate(zip(j
, jr
)):
167 t1
, t2
= vec
[ri
[ji
[jl
]]], vec
[ri
[ji
[jh
]]]
168 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
169 # normally DCT would use jl+halfsize not jh, here.
170 # to be able to work in-place, the idea is to perform a
172 vec
[ri
[ji
[jl
]]] = t1
+ t2
173 vec
[ri
[ji
[jh
]]] = (t1
- t2
) * (1/coeff
)
174 print ("coeff", size
, i
, "ci", ci
,
175 "jl", ri
[jl
], "jh", ri
[jh
],
176 "i/n", (ci
+0.5)/size
, coeff
, vec
[ri
[ji
[jl
]]],
178 # instead of using jl+halfsize, perform a swap here.
179 # use half of j/jr because actually jl+halfsize = reverse(j)
180 hz2
= halfsize
// 2 # can be zero which stops reversing 1-item lists
181 for ci
, (jl
, jh
) in enumerate(zip(j
[:hz2
], jr
[:hz2
])):
186 vec
[ri
[jlh
]] = vec
[ri
[jh
]]
193 print (" swap", size
, i
, ji
[jlh
], ji
[jh
])
196 print("post-swapped", ri
)
197 print("ji-swapped", ji
)
199 print("transform2 pre-itersum", vec
)
201 # ok so the above in-place-ness caused the order to change.
202 # need to work out how to invert it back into the correct order
203 idx_search
= range(n
)
204 ir
= [idx_search
[reverse_bits(i
, levels
)] for i
in range(n
)]
205 print("transform2 expected bit-rev", ir
)
209 print("restored", vv
)
210 vv
= [vv
[reverse_bits(i
, levels
)] for i
in range(n
)]
211 print("restored-inverted", vv
)
213 # this is TEMPORARY! it should be possible to establish
214 # a *LOAD* schedule which has everything work out such that
215 # all data ends up *in* this order at the end of the inner butterfly
220 # now things are in the right order for the outer butterfly.
225 ir
= list(range(0, halfsize
))
226 print ("itersum", halfsize
, size
, ir
)
228 jr
= list(range(i
+halfsize
, i
+n
-halfsize
, size
))
229 print ("itersum jr", i
+halfsize
, i
+size
, jr
)
231 vec
[jh
] += vec
[jh
+size
]
232 print (" itersum", size
, i
, jh
, jh
+size
)
235 print("transform2 result", vec
)
240 # DCT type III, unscaled. Algorithm by Byeong Gi Lee, 1984.
241 # See: https://www.nayuki.io/res/fast-discrete-cosine-transform-algorithms/lee-new-algo-discrete-cosine-transform.pdf
242 def inverse_transform(vector
, root
=True, indent
=0):
245 vector
= list(vector
)
250 elif n
== 0 or n
% 2 != 0:
256 for i
in range(2, n
, 2):
257 alpha
.append(vector
[i
])
258 beta
.append(vector
[i
- 1] + vector
[i
+ 1])
259 print (idt
, "n", n
, "alpha 0", end
=" ")
260 for i
in range(2, n
, 2):
262 print ("beta 1", end
=" ")
263 for i
in range(2, n
, 2):
264 print ("%d+%d" % (i
-1, i
+1), end
=" ")
266 inverse_transform(alpha
, False, indent
+1)
267 inverse_transform(beta
, False, indent
+1)
268 for i
in range(half
):
270 y
= beta
[i
] / (math
.cos((i
+ 0.5) * math
.pi
/ n
) * 2)
272 vector
[-(i
+ 1)] = x
- y
273 print (idt
, " v[%d] = alpha[%d]+beta[%d]" % (i
, i
, i
))
274 print (idt
, " v[%d] = alpha[%d]-beta[%d]" % (n
-i
-1, i
, i
))
278 def inverse_transform2(vector
, root
=True):
281 vector
= list(vector
)
284 elif n
== 0 or n
% 2 != 0:
290 for i
in range(2, n
, 2):
292 beta
.append(("add", i
- 1, i
+ 1))
293 inverse_transform2(alpha
, False)
294 inverse_transform2(beta
, False)
295 for i
in range(half
):
297 y
= ("cos", beta
[i
], i
)
298 vector
[i
] = ("add", x
, y
)
299 vector
[-(i
+ 1)] = ("sub", x
, y
)
303 # does the outer butterfly in a recursive fashion, used in an
304 # intermediary development of the in-place DCT.
305 def transform_itersum(vector
, indent
=0):
310 elif n
== 0 or n
% 2 != 0:
316 for i
in range(half
):
317 t1
, t2
= vector
[i
], vector
[i
+half
]
320 alpha
= transform_itersum(alpha
, indent
+1)
321 beta
= transform_itersum(beta
, indent
+1)
323 for i
in range(half
):
324 result
[i
*2] = alpha
[i
]
325 result
[i
*2+1] = beta
[i
]
326 print(idt
, "iter-merge", result
)
327 for i
in range(half
- 1):
328 result
[i
*2+1] += result
[i
*2+3]
329 print(idt
, "iter-result", result
)
333 # prints out an "add" schedule for the outer butterfly, recursively,
334 # matching what transform_itersum does.
335 def itersum_explore(vector
, indent
=0):
340 elif n
== 0 or n
% 2 != 0:
346 for i
in range(half
):
347 t1
, t2
= vector
[i
], vector
[i
+half
]
350 alpha
= itersum_explore(alpha
, indent
+1)
351 beta
= itersum_explore(beta
, indent
+1)
353 for i
in range(half
):
354 result
[i
*2] = alpha
[i
]
355 result
[i
*2+1] = beta
[i
]
356 print(idt
, "iter-merge", result
)
357 for i
in range(half
- 1):
358 result
[i
*2+1] = ("add", result
[i
*2+1], result
[i
*2+3])
359 print(idt
, "iter-result", result
)
363 # prints out the exact same outer butterfly but does so iteratively.
364 # by comparing the output from itersum_explore and itersum_explore2
365 # and by drawing out the resultant ADDs as a graph it was possible
366 # to deduce what the heck was going on.
367 def itersum_explore2(vec
, indent
=0):
372 ir
= list(range(0, halfsize
))
373 print ("itersum", halfsize
, size
, ir
)
375 jr
= list(range(i
+halfsize
, i
+n
-halfsize
, size
))
376 print ("itersum jr", i
+halfsize
, i
+size
, jr
)
378 vec
[jh
] = ("add", vec
[jh
], vec
[jh
+size
])
379 print (" itersum", size
, i
, jh
, jh
+size
)
384 if __name__
== '__main__':
387 levels
= n
.bit_length() - 1
388 vec
= [vec
[reverse_bits(i
, levels
)] for i
in range(n
)]
389 ops
= itersum_explore(vec
)
390 for i
, x
in enumerate(ops
):
395 levels
= n
.bit_length() - 1
396 ops
= itersum_explore2(vec
)
397 for i
, x
in enumerate(ops
):