cleanup
[openpower-isa.git] / src / openpower / decoder / isa / fastdctlee.py
1 #
2 # Fast discrete cosine transform algorithm (Python)
3 #
4 # Modifications made to create an in-place iterative DCT:
5 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
6 #
7 # License for modifications - SPDX: LGPLv3+
8 #
9 # Original fastdctlee.py by Nayuki:
10 # Copyright (c) 2020 Project Nayuki. (MIT License)
11 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
12 #
13 # License for original fastdctlee.py by Nayuki:
14 #
15 # Permission is hereby granted, free of charge, to any person obtaining
16 # a copy of this software and associated documentation files (the
17 # "Software"), to deal in the Software without restriction, including
18 # without limitation the rights to use, copy, modify, merge, publish,
19 # distribute, sublicense, and/or sell copies of the Software, and to
20 # permit persons to whom the Software is furnished to do so, subject to
21 # the following conditions:
22 # - The above copyright notice and this permission notice shall be included in
23 # all copies or substantial portions of the Software.
24 # - The Software is provided "as is", without warranty of any kind, express or
25 # implied, including but not limited to the warranties of merchantability,
26 # fitness for a particular purpose and noninfringement. In no event shall the
27 # authors or copyright holders be liable for any claim, damages or other
28 # liability, whether in an action of contract, tort or otherwise,
29 # arising from, out of or in connection with the Software or the use
30 # or other dealings in the Software.
31 #
32 #
33 # The modifications made are firstly to create an iterative schedule,
34 # rather than the more normal recursive algorithm. Secondly, the
35 # two butterflys are also separated out: inner butterfly does COS +/-
36 # whilst outer butterfly does the iterative summing.
37 #
38 # However, to avoid data copying some additional tricks are played:
39 # - firstly, the data is LOADed in bit-reversed order (which is normally
40 # covered by the recursive algorithm due to the odd-even reconstruction)
41 # but then to reference the data in the correct order an array of
42 # bit-reversed indices is created, as a level of indirection.
43 # the data is bit-reversed but so are the indices, making it all A-Ok.
44 # - secondly, normally in DCT a 2nd target (copy) array is used where
45 # the top half is read in reverse order (7 6 5 4) and written out
46 # to the target 4 5 6 7. the plan was to do this in two stages:
47 # write in-place in order 4 5 6 7 then swap afterwards (7-4), (6-5).
48 # however by leaving the data *in-place* and having subsequent
49 # loops refer to the data *where it now is*, the swap is avoided
50 # - thirdly, arrange for the data to be *pre-swapped* (in an inverse
51 # order of how it would have got there, if that makes sense), such
52 # that *when* it gets swapped, it ends up in the right order.
53 # given that that will be a LD operation it's no big deal.
54 #
55 # End result is that once the first butterfly is done - bear in mind
56 # it's in-place - the data is in the right order so that a second
57 # dead-straightforward iterative sum can be done: again, in-place.
58 # Really surprising.
59
60 import math
61 from copy import deepcopy
62
63 # bits of the integer 'val'.
64 def reverse_bits(val, width):
65 result = 0
66 for _ in range(width):
67 result = (result << 1) | (val & 1)
68 val >>= 1
69 return result
70
71
72 # reverse top half of a list, recursively. the recursion can be
73 # applied *after* or *before* the reversal of the top half. these
74 # are inverses of each other.
75 # this function is unused except to test the iterative version (halfrev2)
76 def halfrev(l, pre_rev=True):
77 n = len(l)
78 if n == 1:
79 return l
80 ll, lh = l[:n//2], l[n//2:]
81 if pre_rev:
82 ll, lh = halfrev(ll, pre_rev), halfrev(lh, pre_rev)
83 lh.reverse()
84 if not pre_rev:
85 ll, lh = halfrev(ll, pre_rev), halfrev(lh, pre_rev)
86 return ll + lh
87
88
89 # iterative version of [recursively-applied] half-rev.
90 # relies on the list lengths being power-of-two and the fact
91 # that bit-inversion of a list of binary numbers is the same
92 # as reversing the order of the list
93 # this version is dead easy to implement in hardware.
94 # a big surprise is that the half-reversal can be done with
95 # such a simple XOR. the inverse operation is slightly trickier
96 def halfrev2(vec, pre_rev=True):
97 res = []
98 for i in range(len(vec)):
99 if pre_rev:
100 res.append(i ^ (i>>1))
101 else:
102 ri = i
103 bl = i.bit_length()
104 for ji in range(1, bl):
105 if (1<<ji) & i:
106 ri ^= ((1<<ji)-1)
107 res.append(vec[ri])
108 return res
109
110
111 # DCT type II, unscaled. Algorithm by Byeong Gi Lee, 1984.
112 # See: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.118.3056&rep=rep1&type=pdf#page=34
113 # original (recursive) algorithm by Nayuki
114 def transform(vector, indent=0):
115 idt = " " * indent
116 n = len(vector)
117 if n == 1:
118 return list(vector)
119 elif n == 0 or n % 2 != 0:
120 raise ValueError()
121 else:
122 half = n // 2
123 alpha = [(vector[i] + vector[-(i + 1)]) for i in range(half)]
124 beta = [(vector[i] - vector[-(i + 1)]) /
125 (math.cos((i + 0.5) * math.pi / n) * 2.0)
126 for i in range(half)]
127 alpha = transform(alpha)
128 beta = transform(beta )
129 result = []
130 for i in range(half - 1):
131 result.append(alpha[i])
132 result.append(beta[i] + beta[i + 1])
133 result.append(alpha[-1])
134 result.append(beta [-1])
135 return result
136
137
138 # modified recursive algorithm, based on Nayuki original, which simply
139 # prints out an awful lot of debug data. used to work out the ordering
140 # for the iterative version by analysing the indices printed out
141 def transform(vector, indent=0):
142 idt = " " * indent
143 n = len(vector)
144 if n == 1:
145 return list(vector)
146 elif n == 0 or n % 2 != 0:
147 raise ValueError()
148 else:
149 half = n // 2
150 alpha = [0] * half
151 beta = [0] * half
152 print (idt, "xf", vector)
153 print (idt, "coeff", n, "->", end=" ")
154 for i in range(half):
155 t1, t2 = vector[i], vector[n-i-1]
156 k = (math.cos((i + 0.5) * math.pi / n) * 2.0)
157 print (i, n-i-1, "i/n", (i+0.5)/n, ":", k, end= " ")
158 alpha[i] = t1 + t2
159 beta[i] = (t1 - t2) * (1/k)
160 print ()
161 print (idt, "n", n, "alpha", end=" ")
162 for i in range(0, n, 2):
163 print (i, i//2, alpha[i//2], end=" ")
164 print()
165 print (idt, "n", n, "beta", end=" ")
166 for i in range(0, n, 2):
167 print (i, beta[i//2], end=" ")
168 print()
169 alpha = transform(alpha, indent+1)
170 beta = transform(beta , indent+1)
171 result = [0] * n
172 for i in range(half):
173 result[i*2] = alpha[i]
174 result[i*2+1] = beta[i]
175 print(idt, "merge", result)
176 for i in range(half - 1):
177 result[i*2+1] += result[i*2+3]
178 print(idt, "result", result)
179 return result
180
181
182 # totally cool *in-place* DCT algorithm
183 def transform2(vec):
184
185 # Initialization
186 n = len(vec)
187 print ()
188 print ("transform2", n)
189 levels = n.bit_length() - 1
190
191 # reference (read/write) the in-place data in *reverse-bit-order*
192 ri = list(range(n))
193 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
194
195 # reference list for not needing to do data-swaps, just swap what
196 # *indices* are referenced (two levels of indirection at the moment)
197 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
198 ji = list(range(n))
199 ji = halfrev2(ji, True)
200
201 # and pretend we LDed data in half-swapped *and* bit-reversed order as well
202 # TODO: merge these two
203 vec = halfrev2(vec, False)
204 vec = [vec[ri[i]] for i in range(n)]
205
206 # start the inner butterfly
207 size = n
208 while size >= 2:
209 halfsize = size // 2
210 tablestep = n // size
211 ir = list(range(0, n, size))
212 print (" xform", size, ir)
213 for i in ir:
214 # two lists of half-range indices, e.g. j 0123, jr 7654
215 j = list(range(i, i + halfsize))
216 jr = list(range(i+halfsize, i + size))
217 jr.reverse()
218 print (" xform jr", j, jr)
219 for ci, (jl, jh) in enumerate(zip(j, jr)):
220 t1, t2 = vec[ri[ji[jl]]], vec[ri[ji[jh]]]
221 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
222 # normally DCT would use jl+halfsize not jh, here.
223 # to be able to work in-place, the idea is to perform a
224 # swap afterwards.
225 vec[ri[ji[jl]]] = t1 + t2
226 vec[ri[ji[jh]]] = (t1 - t2) * (1/coeff)
227 print ("coeff", size, i, "ci", ci,
228 "jl", ri[jl], "jh", ri[jh],
229 "i/n", (ci+0.5)/size, coeff, vec[ri[ji[jl]]],
230 vec[ri[ji[jh]]])
231 # instead of using jl+halfsize, perform a swap here.
232 # use half of j/jr because actually jl+halfsize = reverse(j)
233 hz2 = halfsize // 2 # can be zero which stops reversing 1-item lists
234 for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
235 jlh = jl+halfsize
236 # swap indices, NOT the data
237 tmp1, tmp2 = ji[jlh], ji[jh]
238 ji[jlh], ji[jh] = tmp2, tmp1
239 print (" swap", size, i, ji[jlh], ji[jh])
240 size //= 2
241
242 print("post-swapped", ri)
243 print("ji-swapped", ji)
244 print("transform2 pre-itersum", vec)
245
246 # now things are in the right order for the outer butterfly.
247 n = len(vec)
248 size = n // 2
249 while size >= 2:
250 halfsize = size // 2
251 ir = list(range(0, halfsize))
252 print ("itersum", halfsize, size, ir)
253 for i in ir:
254 jr = list(range(i+halfsize, i+n-halfsize, size))
255 print ("itersum jr", i+halfsize, i+size, jr)
256 for jh in jr:
257 vec[jh] += vec[jh+size]
258 print (" itersum", size, i, jh, jh+size)
259 size //= 2
260
261 print("transform2 result", vec)
262
263 return vec
264
265
266 # DCT type III, unscaled. Algorithm by Byeong Gi Lee, 1984.
267 # See: https://www.nayuki.io/res/fast-discrete-cosine-transform-algorithms/lee-new-algo-discrete-cosine-transform.pdf
268 def inverse_transform(vector, root=True, indent=0):
269 idt = " " * indent
270 if root:
271 vector = list(vector)
272 vector[0] /= 2
273 n = len(vector)
274 if n == 1:
275 return vector, [0]
276 elif n == 0 or n % 2 != 0:
277 raise ValueError()
278 else:
279 half = n // 2
280 alpha = [vector[0]]
281 beta = [vector[1]]
282 for i in range(2, n, 2):
283 alpha.append(vector[i])
284 beta.append(vector[i - 1] + vector[i + 1])
285 print (idt, "n", n, "alpha 0", end=" ")
286 for i in range(2, n, 2):
287 print (i, end=" ")
288 print ("beta 1", end=" ")
289 for i in range(2, n, 2):
290 print ("%d+%d" % (i-1, i+1), end=" ")
291 print()
292 inverse_transform(alpha, False, indent+1)
293 inverse_transform(beta , False, indent+1)
294 for i in range(half):
295 x = alpha[i]
296 y = beta[i] / (math.cos((i + 0.5) * math.pi / n) * 2)
297 vector[i] = x + y
298 vector[-(i + 1)] = x - y
299 print (idt, " v[%d] = alpha[%d]+beta[%d]" % (i, i, i))
300 print (idt, " v[%d] = alpha[%d]-beta[%d]" % (n-i-1, i, i))
301 return vector
302
303
304 def inverse_transform2(vector, root=True):
305 n = len(vector)
306 if root:
307 vector = list(vector)
308 if n == 1:
309 return vector
310 elif n == 0 or n % 2 != 0:
311 raise ValueError()
312 else:
313 half = n // 2
314 alpha = [0]
315 beta = [1]
316 for i in range(2, n, 2):
317 alpha.append(i)
318 beta.append(("add", i - 1, i + 1))
319 inverse_transform2(alpha, False)
320 inverse_transform2(beta , False)
321 for i in range(half):
322 x = alpha[i]
323 y = ("cos", beta[i], i)
324 vector[i] = ("add", x, y)
325 vector[-(i + 1)] = ("sub", x, y)
326 return vector
327
328
329 # does the outer butterfly in a recursive fashion, used in an
330 # intermediary development of the in-place DCT.
331 def transform_itersum(vector, indent=0):
332 idt = " " * indent
333 n = len(vector)
334 if n == 1:
335 return list(vector)
336 elif n == 0 or n % 2 != 0:
337 raise ValueError()
338 else:
339 half = n // 2
340 alpha = [0] * half
341 beta = [0] * half
342 for i in range(half):
343 t1, t2 = vector[i], vector[i+half]
344 alpha[i] = t1
345 beta[i] = t2
346 alpha = transform_itersum(alpha, indent+1)
347 beta = transform_itersum(beta , indent+1)
348 result = [0] * n
349 for i in range(half):
350 result[i*2] = alpha[i]
351 result[i*2+1] = beta[i]
352 print(idt, "iter-merge", result)
353 for i in range(half - 1):
354 result[i*2+1] += result[i*2+3]
355 print(idt, "iter-result", result)
356 return result
357
358
359 # prints out an "add" schedule for the outer butterfly, recursively,
360 # matching what transform_itersum does.
361 def itersum_explore(vector, indent=0):
362 idt = " " * indent
363 n = len(vector)
364 if n == 1:
365 return list(vector)
366 elif n == 0 or n % 2 != 0:
367 raise ValueError()
368 else:
369 half = n // 2
370 alpha = [0] * half
371 beta = [0] * half
372 for i in range(half):
373 t1, t2 = vector[i], vector[i+half]
374 alpha[i] = t1
375 beta[i] = t2
376 alpha = itersum_explore(alpha, indent+1)
377 beta = itersum_explore(beta , indent+1)
378 result = [0] * n
379 for i in range(half):
380 result[i*2] = alpha[i]
381 result[i*2+1] = beta[i]
382 print(idt, "iter-merge", result)
383 for i in range(half - 1):
384 result[i*2+1] = ("add", result[i*2+1], result[i*2+3])
385 print(idt, "iter-result", result)
386 return result
387
388
389 # prints out the exact same outer butterfly but does so iteratively.
390 # by comparing the output from itersum_explore and itersum_explore2
391 # and by drawing out the resultant ADDs as a graph it was possible
392 # to deduce what the heck was going on.
393 def itersum_explore2(vec, indent=0):
394 n = len(vec)
395 size = n // 2
396 while size >= 2:
397 halfsize = size // 2
398 ir = list(range(0, halfsize))
399 print ("itersum", halfsize, size, ir)
400 for i in ir:
401 jr = list(range(i+halfsize, i+n-halfsize, size))
402 print ("itersum jr", i+halfsize, i+size, jr)
403 for jh in jr:
404 vec[jh] = ("add", vec[jh], vec[jh+size])
405 print (" itersum", size, i, jh, jh+size)
406 size //= 2
407
408 return vec
409
410 if __name__ == '__main__':
411 n = 16
412 vec = list(range(n))
413 levels = n.bit_length() - 1
414 vec = [vec[reverse_bits(i, levels)] for i in range(n)]
415 ops = itersum_explore(vec)
416 for i, x in enumerate(ops):
417 print (i, x)
418
419 n = 16
420 vec = list(range(n))
421 levels = n.bit_length() - 1
422 ops = itersum_explore2(vec)
423 for i, x in enumerate(ops):
424 print (i, x)
425
426 # halfrev test
427 vec = list(range(16))
428 print ("orig vec", vec)
429 vecr = halfrev(vec, True)
430 print ("reversed", vecr)
431 for i, v in enumerate(vecr):
432 print ("%2d %2d %04s %04s %04s" % (i, v,
433 bin(i)[2:], bin(v ^ i)[2:], bin(v)[2:]))
434 vecrr = halfrev(vecr, False)
435 assert vec == vecrr
436 vecrr = halfrev(vec, False)
437 print ("pre-reversed", vecrr)
438 for i, v in enumerate(vecrr):
439 print ("%2d %2d %04s %04s %04s" % (i, v,
440 bin(i)[2:], bin(v ^ i)[2:], bin(v)[2:]))
441 il = halfrev2(vec, False)
442 print ("iterative rev", il)
443 il = halfrev2(vec, True)
444 print ("iterative rev-true", il)