2 # Fast discrete cosine transform algorithm (Python)
4 # Modifications made to create an in-place iterative DCT:
5 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
7 # License for modifications - SPDX: LGPLv3+
9 # Original fastdctlee.py by Nayuki:
10 # Copyright (c) 2020 Project Nayuki. (MIT License)
11 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
13 # License for original fastdctlee.py by Nayuki:
15 # Permission is hereby granted, free of charge, to any person obtaining
16 # a copy of this software and associated documentation files (the
17 # "Software"), to deal in the Software without restriction, including
18 # without limitation the rights to use, copy, modify, merge, publish,
19 # distribute, sublicense, and/or sell copies of the Software, and to
20 # permit persons to whom the Software is furnished to do so, subject to
21 # the following conditions:
22 # - The above copyright notice and this permission notice shall be included in
23 # all copies or substantial portions of the Software.
24 # - The Software is provided "as is", without warranty of any kind, express or
25 # implied, including but not limited to the warranties of merchantability,
26 # fitness for a particular purpose and noninfringement. In no event shall the
27 # authors or copyright holders be liable for any claim, damages or other
28 # liability, whether in an action of contract, tort or otherwise,
29 # arising from, out of or in connection with the Software or the use
30 # or other dealings in the Software.
33 # The modifications made are firstly to create an iterative schedule,
34 # rather than the more normal recursive algorithm. Secondly, the
35 # two butterflys are also separated out: inner butterfly does COS +/-
36 # whilst outer butterfly does the iterative summing.
38 # However, to avoid data copying some additional tricks are played:
39 # - firstly, the data is LOADed in bit-reversed order (which is normally
40 # covered by the recursive algorithm due to the odd-even reconstruction)
41 # but then to reference the data in the correct order an array of
42 # bit-reversed indices is created, as a level of indirection.
43 # the data is bit-reversed but so are the indices, making it all A-Ok.
44 # - secondly, normally in DCT a 2nd target (copy) array is used where
45 # the top half is read in reverse order (7 6 5 4) and written out
46 # to the target 4 5 6 7. the plan was to do this in two stages:
47 # write in-place in order 4 5 6 7 then swap afterwards (7-4), (6-5).
48 # however by leaving the data *in-place* and having subsequent
49 # loops refer to the data *where it now is*, the swap is avoided
50 # - thirdly, arrange for the data to be *pre-swapped* (in an inverse
51 # order of how it would have got there, if that makes sense), such
52 # that *when* it gets swapped, it ends up in the right order.
53 # given that that will be a LD operation it's no big deal.
55 # End result is that once the first butterfly is done - bear in mind
56 # it's in-place - the data is in the right order so that a second
57 # dead-straightforward iterative sum can be done: again, in-place.
61 from copy
import deepcopy
63 # bits of the integer 'val'.
64 def reverse_bits(val
, width
):
66 for _
in range(width
):
67 result
= (result
<< 1) |
(val
& 1)
72 # reverse top half of a list, recursively. the recursion can be
73 # applied *after* or *before* the reversal of the top half. these
74 # are inverses of each other.
75 # this function is unused except to test the iterative version (halfrev2)
76 def halfrev(l
, pre_rev
=True):
80 ll
, lh
= l
[:n
//2], l
[n
//2:]
82 ll
, lh
= halfrev(ll
, pre_rev
), halfrev(lh
, pre_rev
)
85 ll
, lh
= halfrev(ll
, pre_rev
), halfrev(lh
, pre_rev
)
89 # iterative version of [recursively-applied] half-rev.
90 # relies on the list lengths being power-of-two and the fact
91 # that bit-inversion of a list of binary numbers is the same
92 # as reversing the order of the list
93 # this version is dead easy to implement in hardware.
94 # a big surprise is that the half-reversal can be done with
95 # such a simple XOR. the inverse operation is slightly trickier
96 def halfrev2(vec
, pre_rev
=True):
98 for i
in range(len(vec
)):
100 res
.append(vec
[i ^
(i
>>1)])
104 for ji
in range(1, bl
):
110 # DCT type II, unscaled. Algorithm by Byeong Gi Lee, 1984.
111 # See: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.118.3056&rep=rep1&type=pdf#page=34
112 # original (recursive) algorithm by Nayuki
113 def transform(vector
, indent
=0):
118 elif n
== 0 or n
% 2 != 0:
122 alpha
= [(vector
[i
] + vector
[-(i
+ 1)]) for i
in range(half
)]
123 beta
= [(vector
[i
] - vector
[-(i
+ 1)]) /
124 (math
.cos((i
+ 0.5) * math
.pi
/ n
) * 2.0)
125 for i
in range(half
)]
126 alpha
= transform(alpha
)
127 beta
= transform(beta
)
129 for i
in range(half
- 1):
130 result
.append(alpha
[i
])
131 result
.append(beta
[i
] + beta
[i
+ 1])
132 result
.append(alpha
[-1])
133 result
.append(beta
[-1])
137 # modified recursive algorithm, based on Nayuki original, which simply
138 # prints out an awful lot of debug data. used to work out the ordering
139 # for the iterative version by analysing the indices printed out
140 def transform(vector
, indent
=0):
145 elif n
== 0 or n
% 2 != 0:
151 print (idt
, "xf", vector
)
152 print (idt
, "coeff", n
, "->", end
=" ")
153 for i
in range(half
):
154 t1
, t2
= vector
[i
], vector
[n
-i
-1]
155 k
= (math
.cos((i
+ 0.5) * math
.pi
/ n
) * 2.0)
156 print (i
, n
-i
-1, "i/n", (i
+0.5)/n
, ":", k
, end
= " ")
158 beta
[i
] = (t1
- t2
) * (1/k
)
160 print (idt
, "n", n
, "alpha", end
=" ")
161 for i
in range(0, n
, 2):
162 print (i
, i
//2, alpha
[i
//2], end
=" ")
164 print (idt
, "n", n
, "beta", end
=" ")
165 for i
in range(0, n
, 2):
166 print (i
, beta
[i
//2], end
=" ")
168 alpha
= transform(alpha
, indent
+1)
169 beta
= transform(beta
, indent
+1)
171 for i
in range(half
):
172 result
[i
*2] = alpha
[i
]
173 result
[i
*2+1] = beta
[i
]
174 print(idt
, "merge", result
)
175 for i
in range(half
- 1):
176 result
[i
*2+1] += result
[i
*2+3]
177 print(idt
, "result", result
)
181 # totally cool *in-place* DCT algorithm
187 print ("transform2", n
)
188 levels
= n
.bit_length() - 1
190 # reference (read/write) the in-place data in *reverse-bit-order*
192 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
194 # reference list for not needing to do data-swaps, just swap what
195 # *indices* are referenced (two levels of indirection at the moment)
196 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
198 ji
= halfrev2(ji
, True)
200 # and pretend we LDed data in half-swapped *and* bit-reversed order as well
201 # TODO: merge these two
202 vec
= halfrev2(vec
, False)
203 vec
= [vec
[ri
[i
]] for i
in range(n
)]
208 # create a cos table: not strictly necessary but here for illustrative
209 # purposes, to demonstrate the point that it really *is* iterative.
210 # this table could be cached and used multiple times rather than
211 # computed every time.
216 for i
in range(n
//size
):
217 for ci
in range(halfsize
):
218 ctable
.append((math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0))
221 # start the inner butterfly
226 tablestep
= n
// size
227 ir
= list(range(0, n
, size
))
228 print (" xform", size
, ir
)
230 # two lists of half-range indices, e.g. j 0123, jr 7654
231 j
= list(range(i
, i
+ halfsize
))
232 jr
= list(range(i
+halfsize
, i
+ size
))
234 print (" xform jr", j
, jr
)
235 for ci
, (jl
, jh
) in enumerate(zip(j
, jr
)):
236 t1
, t2
= vec
[ri
[ji
[jl
]]], vec
[ri
[ji
[jh
]]]
237 #coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
240 # normally DCT would use jl+halfsize not jh, here.
241 # to be able to work in-place, the idea is to perform a
243 vec
[ri
[ji
[jl
]]] = t1
+ t2
244 vec
[ri
[ji
[jh
]]] = (t1
- t2
) * (1/coeff
)
245 print ("coeff", size
, i
, "ci", ci
,
246 "jl", ri
[ji
[jl
]], "jh", ri
[ji
[jh
]],
247 "i/n", (ci
+0.5)/size
, coeff
, vec
[ri
[ji
[jl
]]],
249 # instead of using jl+halfsize, perform a swap here.
250 # use half of j/jr because actually jl+halfsize = reverse(j)
251 hz2
= halfsize
// 2 # can be zero which stops reversing 1-item lists
252 for ci
, (jl
, jh
) in enumerate(zip(j
[:hz2
], jr
[:hz2
])):
254 # swap indices, NOT the data
255 tmp1
, tmp2
= ji
[jlh
], ji
[jh
]
256 ji
[jlh
], ji
[jh
] = tmp2
, tmp1
257 print (" swap", size
, i
, ji
[jlh
], ji
[jh
])
260 print("post-swapped", ri
)
261 print("ji-swapped", ji
)
262 print("transform2 pre-itersum", vec
)
264 # now things are in the right order for the outer butterfly.
269 ir
= list(range(0, halfsize
))
270 print ("itersum", halfsize
, size
, ir
)
272 jr
= list(range(i
+halfsize
, i
+n
-halfsize
, size
))
273 print ("itersum jr", i
+halfsize
, i
+size
, jr
)
275 vec
[jh
] += vec
[jh
+size
]
276 print (" itersum", size
, i
, jh
, jh
+size
)
279 print("transform2 result", vec
)
284 # DCT type III, unscaled. Algorithm by Byeong Gi Lee, 1984.
285 # See: https://www.nayuki.io/res/fast-discrete-cosine-transform-algorithms/lee-new-algo-discrete-cosine-transform.pdf
286 def inverse_transform(vector
, root
=True, indent
=0):
289 vector
= list(vector
)
294 elif n
== 0 or n
% 2 != 0:
300 for i
in range(2, n
, 2):
301 alpha
.append(vector
[i
])
302 beta
.append(vector
[i
- 1] + vector
[i
+ 1])
303 print (idt
, "n", n
, "alpha 0", end
=" ")
304 for i
in range(2, n
, 2):
306 print ("beta 1", end
=" ")
307 for i
in range(2, n
, 2):
308 print ("%d+%d" % (i
-1, i
+1), end
=" ")
310 inverse_transform(alpha
, False, indent
+1)
311 inverse_transform(beta
, False, indent
+1)
312 for i
in range(half
):
313 x
, y
= alpha
[i
], beta
[i
]
314 coeff
= (math
.cos((i
+ 0.5) * math
.pi
/ n
) * 2)
317 vector
[n
-(i
+1)] = x
- y
318 print (idt
, " v[%d] = alpha[%d]+beta[%d]" % (i
, i
, i
))
319 print (idt
, " v[%d] = alpha[%d]-beta[%d]" % (n
-i
-1, i
, i
))
323 # totally cool *in-place* DCT algorithm
324 def inverse_transform_iter(vec
):
326 # in-place, but actually have to protect the input list!
332 print ("transform2 inv", n
, vec
)
333 levels
= n
.bit_length() - 1
335 # reference (read/write) the in-place data in *reverse-bit-order*
337 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
339 # reference list for not needing to do data-swaps, just swap what
340 # *indices* are referenced (two levels of indirection at the moment)
341 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
343 ji
= halfrev2(ji
, False)
348 # create a cos table: not strictly necessary but here for illustrative
349 # purposes, to demonstrate the point that it really *is* iterative.
350 # this table could be cached and used multiple times rather than
351 # computed every time.
356 for i
in range(n
//size
):
357 for ci
in range(halfsize
):
358 ctable
.append((math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0))
361 # first divide element 0 by 2
364 print("transform2-inv pre-itersum", vec
)
365 vec
= [vec
[ri
[i
]] for i
in range(n
)]
366 vec
= halfrev2(vec
, True)
367 #print("transform2-inv post-itersum-reorder", vec)
369 # first the outer butterfly (iterative sum thing)
374 ir
= list(range(0, halfsize
))
375 print ("itersum", halfsize
, size
, ir
)
377 jr
= list(range(i
+halfsize
, i
+n
-halfsize
, size
))
379 print ("itersum jr", i
+halfsize
, i
+size
, jr
)
382 y
= vec
[ji
[ri
[jh
+size
]]]
383 vec
[ji
[ri
[jh
+size
]]] = x
+ y
384 print (" itersum", size
, i
, jh
, jh
+size
,
385 x
, y
, "jh+sz", vec
[ji
[jh
+size
]])
388 print("transform2-inv post-itersum", vec
)
390 # start the inner butterfly (coefficients)
395 tablestep
= n
// size
396 ir
= list(range(0, n
, size
))
397 print (" xform", size
, ir
)
399 # two lists of half-range indices, e.g. j 0123, jr 7654
400 j
= list(range(i
, i
+ halfsize
))
401 jr
= list(range(i
+halfsize
, i
+ size
))
403 print (" xform jr", j
, jr
)
404 for ci
, (jl
, jh
) in enumerate(zip(j
, jr
)):
405 t1
, t2
= vec
[ji
[jl
]], vec
[ji
[jl
+halfsize
]]
406 #coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
409 # normally DCT would use jl+halfsize not jh, here.
410 # to be able to work in-place, the idea is to perform a
412 vec
[ji
[jl
]] = t1
+ t2
/coeff
413 vec
[ji
[jl
+halfsize
]] = t1
- t2
/coeff
414 print ("coeff", size
, i
, "ci", ci
,
415 "jl", ri
[ji
[jl
]], "jh", ri
[ji
[jh
]],
416 "i/n", (ci
+0.5)/size
, coeff
,
418 "+/i", vec
[ji
[jl
]], vec
[ji
[jh
]])
419 #"+/i", vec2[ri[ji[jl]]], vec2[ri[ji[jh]]])
420 # instead of using jl+halfsize, perform a swap here.
421 # use half of j/jr because actually jl+halfsize = reverse(j)
422 hz2
= halfsize
// 2 # can be zero which stops reversing 1-item lists
423 for ci
, (jl
, jh
) in enumerate(zip(j
[:hz2
], jr
[:hz2
])):
425 # swap indices, NOT the data
426 tmp1
, tmp2
= ji
[jlh
], ji
[jh
]
427 ji
[jlh
], ji
[jh
] = tmp2
, tmp1
428 print (" swap", size
, i
, ji
[jlh
], ji
[jh
])
431 print("post-swapped", ri
)
432 print("ji-swapped", ji
)
434 ji
= halfrev2(ji
, True)
435 print("ji-calc ", ji
)
437 print("transform2-inv result", vec
)
442 def inverse_transform2(vector
, root
=True, indent
=0):
446 vector
= list(vector
)
450 elif n
== 0 or n
% 2 != 0:
453 print (idt
, "inverse_xform2", vector
)
457 for i
in range(2, n
, 2):
458 alpha
.append(vector
[i
])
459 beta
.append(vector
[i
- 1] + vector
[i
+ 1])
460 print (idt
, " alpha", i
, vector
[i
])
461 print (idt
, " beta", i
-1, i
+1, vector
[i
-1], vector
[i
+1], "->",
463 inverse_transform2(alpha
, False, indent
+1)
464 inverse_transform2(beta
, False, indent
+1)
465 for i
in range(half
):
466 x
, y
= alpha
[i
], beta
[i
]
467 coeff
= (math
.cos((i
+ 0.5) * math
.pi
/ n
) * 2)
468 vector
[i
] = x
+ y
/ coeff
469 vector
[n
-(i
+1)] = x
- y
/ coeff
470 print (idt
, " v[%d] = %f+%f/%f=%f" % (i
, x
, y
, coeff
, vector
[i
]))
471 print (idt
, " v[%d] = %f-%f/%f=%f" % (n
-i
-1, x
, y
,
472 coeff
, vector
[n
-i
-1]))
476 def inverse_transform2_explore(vector
, root
=True, indent
=0):
479 vector
= list(vector
)
482 elif n
== 0 or n
% 2 != 0:
488 for i
in range(2, n
, 2):
489 alpha
.append(vector
[i
])
490 beta
.append(("add%d" % indent
, vector
[i
- 1], vector
[i
+ 1]))
491 inverse_transform2_explore(alpha
, False, indent
+1)
492 inverse_transform2_explore(beta
, False, indent
+1)
493 for i
in range(half
):
495 y
= ("cos%d" % indent
, beta
[i
], i
, n
)
496 vector
[i
] = ("add%d" % indent
, x
, y
)
497 vector
[n
-(i
+ 1)] = ("sub%d" % indent
, x
, y
)
502 # does the outer butterfly in a recursive fashion, used in an
503 # intermediary development of the in-place DCT.
504 def transform_itersum(vector
, indent
=0):
509 elif n
== 0 or n
% 2 != 0:
515 for i
in range(half
):
516 t1
, t2
= vector
[i
], vector
[i
+half
]
519 alpha
= transform_itersum(alpha
, indent
+1)
520 beta
= transform_itersum(beta
, indent
+1)
522 for i
in range(half
):
523 result
[i
*2] = alpha
[i
]
524 result
[i
*2+1] = beta
[i
]
525 print(idt
, "iter-merge", result
)
526 for i
in range(half
- 1):
527 result
[i
*2+1] += result
[i
*2+3]
528 print(idt
, "iter-result", result
)
532 # prints out an "add" schedule for the outer butterfly, recursively,
533 # matching what transform_itersum does.
534 def itersum_explore(vector
, indent
=0):
539 elif n
== 0 or n
% 2 != 0:
545 for i
in range(half
):
546 t1
, t2
= vector
[i
], vector
[i
+half
]
549 alpha
= itersum_explore(alpha
, indent
+1)
550 beta
= itersum_explore(beta
, indent
+1)
552 for i
in range(half
):
553 result
[i
*2] = alpha
[i
]
554 result
[i
*2+1] = beta
[i
]
555 print(idt
, "iter-merge", result
)
556 for i
in range(half
- 1):
557 result
[i
*2+1] = ("add", result
[i
*2+1], result
[i
*2+3])
558 print(idt
, "iter-result", result
)
562 # prints out the exact same outer butterfly but does so iteratively.
563 # by comparing the output from itersum_explore and itersum_explore2
564 # and by drawing out the resultant ADDs as a graph it was possible
565 # to deduce what the heck was going on.
566 def itersum_explore2(vec
, indent
=0):
571 ir
= list(range(0, halfsize
))
572 print ("itersum", halfsize
, size
, ir
)
574 jr
= list(range(i
+halfsize
, i
+n
-halfsize
, size
))
575 print ("itersum jr", i
+halfsize
, i
+size
, jr
)
577 vec
[jh
] = ("add", vec
[jh
], vec
[jh
+size
])
578 print (" itersum", size
, i
, jh
, jh
+size
)
583 if __name__
== '__main__':
586 levels
= n
.bit_length() - 1
587 vec
= [vec
[reverse_bits(i
, levels
)] for i
in range(n
)]
588 ops
= itersum_explore(vec
)
589 for i
, x
in enumerate(ops
):
594 levels
= n
.bit_length() - 1
595 ops
= itersum_explore2(vec
)
596 for i
, x
in enumerate(ops
):
600 vec
= list(range(16))
601 print ("orig vec", vec
)
602 vecr
= halfrev(vec
, True)
603 print ("reversed", vecr
)
604 for i
, v
in enumerate(vecr
):
605 print ("%2d %2d %04s %04s %04s" % (i
, v
,
606 bin(i
)[2:], bin(v ^ i
)[2:], bin(v
)[2:]))
607 vecrr
= halfrev(vecr
, False)
609 vecrr
= halfrev(vec
, False)
610 print ("pre-reversed", vecrr
)
611 for i
, v
in enumerate(vecrr
):
612 print ("%2d %2d %04s %04s %04s" % (i
, v
,
613 bin(i
)[2:], bin(v ^ i
)[2:], bin(v
)[2:]))
614 il
= halfrev2(vec
, False)
615 print ("iterative rev", il
)
616 il
= halfrev2(vec
, True)
617 print ("iterative rev-true", il
)
621 levels
= n
.bit_length() - 1
622 ops
= inverse_transform2_explore(vec
)
623 for i
, x
in enumerate(ops
):