pre-reverse order of data indices in DCT so that *after* the inner
[openpower-isa.git] / src / openpower / decoder / isa / fastdctlee.py
1 #
2 # Fast discrete cosine transform algorithms (Python)
3 #
4 # Copyright (c) 2020 Project Nayuki. (MIT License)
5 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
6 #
7 # License for original fastdctlee.py by Nayuki:
8 #
9 # Permission is hereby granted, free of charge, to any person obtaining
10 # a copy of this software and associated documentation files (the
11 # "Software"), to deal in the Software without restriction, including
12 # without limitation the rights to use, copy, modify, merge, publish,
13 # distribute, sublicense, and/or sell copies of the Software, and to
14 # permit persons to whom the Software is furnished to do so, subject to
15 # the following conditions:
16 # - The above copyright notice and this permission notice shall be included in
17 # all copies or substantial portions of the Software.
18 # - The Software is provided "as is", without warranty of any kind, express or
19 # implied, including but not limited to the warranties of merchantability,
20 # fitness for a particular purpose and noninfringement. In no event shall the
21 # authors or copyright holders be liable for any claim, damages or other
22 # liability, whether in an action of contract, tort or otherwise,
23 # arising from, out of or in connection with the Software or the use
24 # or other dealings in the Software.
25 #
26 #
27 # Modifications made to create an in-place iterative DCT - SPDX: LGPLv3+
28 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
29 #
30 # The modifications made are firstly to create an iterative schedule,
31 # rather than the more normal recursive algorithm. Secondly, the
32 # two butterflys are also separated out: inner butterfly does COS +/-
33 # whilst outer butterfly does the iterative summing.
34 #
35 # However, to avoid data copying some additional tricks are played:
36 # - firstly, the data is LOADed in bit-reversed order (which is normally
37 # covered by the recursive algorithm due to the odd-even reconstruction)
38 # but then to reference the data in the correct order an array of
39 # bit-reversed indices is created, as a level of indirection.
40 # the data is bit-reversed but so are the indices, making it all A-Ok.
41 # - secondly, normally in DCT a 2nd target (copy) array is used where
42 # the top half is read in reverse order (7 6 5 4) and written out
43 # to the target 4 5 6 7. the plan was to do this in two stages:
44 # write in-place in order 4 5 6 7 then swap afterwards (7-4), (6-5).
45 #
46 # End result is that once the first butterfly is done - bear in mind
47 # it's in-place - the data is in the right order so that a second
48 # dead-straightforward iterative sum can be done: again, in-place.
49 # Really surprising.
50
51 import math
52 from copy import deepcopy
53
54 # bits of the integer 'val'.
55 def reverse_bits(val, width):
56 result = 0
57 for _ in range(width):
58 result = (result << 1) | (val & 1)
59 val >>= 1
60 return result
61
62
63 # DCT type II, unscaled. Algorithm by Byeong Gi Lee, 1984.
64 # See: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.118.3056&rep=rep1&type=pdf#page=34
65 # original (recursive) algorithm by Nayuki
66 def transform(vector, indent=0):
67 idt = " " * indent
68 n = len(vector)
69 if n == 1:
70 return list(vector)
71 elif n == 0 or n % 2 != 0:
72 raise ValueError()
73 else:
74 half = n // 2
75 alpha = [(vector[i] + vector[-(i + 1)]) for i in range(half)]
76 beta = [(vector[i] - vector[-(i + 1)]) /
77 (math.cos((i + 0.5) * math.pi / n) * 2.0)
78 for i in range(half)]
79 alpha = transform(alpha)
80 beta = transform(beta )
81 result = []
82 for i in range(half - 1):
83 result.append(alpha[i])
84 result.append(beta[i] + beta[i + 1])
85 result.append(alpha[-1])
86 result.append(beta [-1])
87 return result
88
89
90 # modified (iterative) algorithm by lkcl, based on Nayuki original
91 def transform(vector, indent=0):
92 idt = " " * indent
93 n = len(vector)
94 if n == 1:
95 return list(vector)
96 elif n == 0 or n % 2 != 0:
97 raise ValueError()
98 else:
99 half = n // 2
100 alpha = [0] * half
101 beta = [0] * half
102 print (idt, "xf", vector)
103 print (idt, "coeff", n, "->", end=" ")
104 for i in range(half):
105 t1, t2 = vector[i], vector[n-i-1]
106 k = (math.cos((i + 0.5) * math.pi / n) * 2.0)
107 print (i, n-i-1, "i/n", (i+0.5)/n, ":", k, end= " ")
108 alpha[i] = t1 + t2
109 beta[i] = (t1 - t2) * (1/k)
110 print ()
111 print (idt, "n", n, "alpha", end=" ")
112 for i in range(0, n, 2):
113 print (i, i//2, alpha[i//2], end=" ")
114 print()
115 print (idt, "n", n, "beta", end=" ")
116 for i in range(0, n, 2):
117 print (i, beta[i//2], end=" ")
118 print()
119 alpha = transform(alpha, indent+1)
120 beta = transform(beta , indent+1)
121 result = [0] * n
122 for i in range(half):
123 result[i*2] = alpha[i]
124 result[i*2+1] = beta[i]
125 print(idt, "merge", result)
126 for i in range(half - 1):
127 result[i*2+1] += result[i*2+3]
128 print(idt, "result", result)
129 return result
130
131 # reverse top half of a list, recursively. the recursion can be
132 # applied *after* or *before* the reversal of the top half. these
133 # are inverses of each other
134 def halfrev(l, pre_rev=True):
135 n = len(l)
136 if n == 1:
137 return l
138 ll, lh = l[:n//2], l[n//2:]
139 if pre_rev:
140 ll, lh = halfrev(ll, pre_rev), halfrev(lh, pre_rev)
141 lh.reverse()
142 if not pre_rev:
143 ll, lh = halfrev(ll, pre_rev), halfrev(lh, pre_rev)
144 return ll + lh
145
146
147 # totally cool *in-place* DCT algorithm
148 def transform2(vec):
149
150 vec = deepcopy(vec)
151 # Initialization
152 n = len(vec)
153 print ()
154 print ("transform2", n)
155 levels = n.bit_length() - 1
156
157 # reference (read/write) the in-place data in *reverse-bit-order*
158 ri = list(range(n))
159 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
160
161 # reference list for not needing to do data-swaps, just swap what
162 # *indices* are referenced (two levels of indirection at the moment)
163 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
164 ji = list(range(n))
165 ji = halfrev(ji, True)
166
167 # and pretend we LDed data in half-swapped *and* bit-reversed order as well
168 # TODO: merge these two
169 vec = halfrev(vec, False)
170 vec = [vec[ri[i]] for i in range(n)]
171
172 # start the inner butterfly
173 size = n
174 while size >= 2:
175 halfsize = size // 2
176 tablestep = n // size
177 ir = list(range(0, n, size))
178 print (" xform", size, ir)
179 for i in ir:
180 # two lists of half-range indices, e.g. j 0123, jr 7654
181 j = list(range(i, i + halfsize))
182 jr = list(range(i+halfsize, i + size))
183 jr.reverse()
184 print (" xform jr", j, jr)
185 for ci, (jl, jh) in enumerate(zip(j, jr)):
186 t1, t2 = vec[ri[ji[jl]]], vec[ri[ji[jh]]]
187 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
188 # normally DCT would use jl+halfsize not jh, here.
189 # to be able to work in-place, the idea is to perform a
190 # swap afterwards.
191 vec[ri[ji[jl]]] = t1 + t2
192 vec[ri[ji[jh]]] = (t1 - t2) * (1/coeff)
193 print ("coeff", size, i, "ci", ci,
194 "jl", ri[jl], "jh", ri[jh],
195 "i/n", (ci+0.5)/size, coeff, vec[ri[ji[jl]]],
196 vec[ri[ji[jh]]])
197 # instead of using jl+halfsize, perform a swap here.
198 # use half of j/jr because actually jl+halfsize = reverse(j)
199 hz2 = halfsize // 2 # can be zero which stops reversing 1-item lists
200 for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
201 jlh = jl+halfsize
202 # swap indices
203 tmp1 = ji[jlh]
204 tmp2 = ji[jh]
205 ji[jlh] = tmp2
206 ji[jh] = tmp1
207 print (" swap", size, i, ji[jlh], ji[jh])
208 size //= 2
209
210 print("post-swapped", ri)
211 print("ji-swapped", ji)
212
213 print("transform2 pre-itersum", vec)
214
215 # now things are in the right order for the outer butterfly.
216 n = len(vec)
217 size = n // 2
218 while size >= 2:
219 halfsize = size // 2
220 ir = list(range(0, halfsize))
221 print ("itersum", halfsize, size, ir)
222 for i in ir:
223 jr = list(range(i+halfsize, i+n-halfsize, size))
224 print ("itersum jr", i+halfsize, i+size, jr)
225 for jh in jr:
226 vec[jh] += vec[jh+size]
227 print (" itersum", size, i, jh, jh+size)
228 size //= 2
229
230 print("transform2 result", vec)
231
232 return vec
233
234
235 # DCT type III, unscaled. Algorithm by Byeong Gi Lee, 1984.
236 # See: https://www.nayuki.io/res/fast-discrete-cosine-transform-algorithms/lee-new-algo-discrete-cosine-transform.pdf
237 def inverse_transform(vector, root=True, indent=0):
238 idt = " " * indent
239 if root:
240 vector = list(vector)
241 vector[0] /= 2
242 n = len(vector)
243 if n == 1:
244 return vector, [0]
245 elif n == 0 or n % 2 != 0:
246 raise ValueError()
247 else:
248 half = n // 2
249 alpha = [vector[0]]
250 beta = [vector[1]]
251 for i in range(2, n, 2):
252 alpha.append(vector[i])
253 beta.append(vector[i - 1] + vector[i + 1])
254 print (idt, "n", n, "alpha 0", end=" ")
255 for i in range(2, n, 2):
256 print (i, end=" ")
257 print ("beta 1", end=" ")
258 for i in range(2, n, 2):
259 print ("%d+%d" % (i-1, i+1), end=" ")
260 print()
261 inverse_transform(alpha, False, indent+1)
262 inverse_transform(beta , False, indent+1)
263 for i in range(half):
264 x = alpha[i]
265 y = beta[i] / (math.cos((i + 0.5) * math.pi / n) * 2)
266 vector[i] = x + y
267 vector[-(i + 1)] = x - y
268 print (idt, " v[%d] = alpha[%d]+beta[%d]" % (i, i, i))
269 print (idt, " v[%d] = alpha[%d]-beta[%d]" % (n-i-1, i, i))
270 return vector
271
272
273 def inverse_transform2(vector, root=True):
274 n = len(vector)
275 if root:
276 vector = list(vector)
277 if n == 1:
278 return vector
279 elif n == 0 or n % 2 != 0:
280 raise ValueError()
281 else:
282 half = n // 2
283 alpha = [0]
284 beta = [1]
285 for i in range(2, n, 2):
286 alpha.append(i)
287 beta.append(("add", i - 1, i + 1))
288 inverse_transform2(alpha, False)
289 inverse_transform2(beta , False)
290 for i in range(half):
291 x = alpha[i]
292 y = ("cos", beta[i], i)
293 vector[i] = ("add", x, y)
294 vector[-(i + 1)] = ("sub", x, y)
295 return vector
296
297
298 # does the outer butterfly in a recursive fashion, used in an
299 # intermediary development of the in-place DCT.
300 def transform_itersum(vector, indent=0):
301 idt = " " * indent
302 n = len(vector)
303 if n == 1:
304 return list(vector)
305 elif n == 0 or n % 2 != 0:
306 raise ValueError()
307 else:
308 half = n // 2
309 alpha = [0] * half
310 beta = [0] * half
311 for i in range(half):
312 t1, t2 = vector[i], vector[i+half]
313 alpha[i] = t1
314 beta[i] = t2
315 alpha = transform_itersum(alpha, indent+1)
316 beta = transform_itersum(beta , indent+1)
317 result = [0] * n
318 for i in range(half):
319 result[i*2] = alpha[i]
320 result[i*2+1] = beta[i]
321 print(idt, "iter-merge", result)
322 for i in range(half - 1):
323 result[i*2+1] += result[i*2+3]
324 print(idt, "iter-result", result)
325 return result
326
327
328 # prints out an "add" schedule for the outer butterfly, recursively,
329 # matching what transform_itersum does.
330 def itersum_explore(vector, indent=0):
331 idt = " " * indent
332 n = len(vector)
333 if n == 1:
334 return list(vector)
335 elif n == 0 or n % 2 != 0:
336 raise ValueError()
337 else:
338 half = n // 2
339 alpha = [0] * half
340 beta = [0] * half
341 for i in range(half):
342 t1, t2 = vector[i], vector[i+half]
343 alpha[i] = t1
344 beta[i] = t2
345 alpha = itersum_explore(alpha, indent+1)
346 beta = itersum_explore(beta , indent+1)
347 result = [0] * n
348 for i in range(half):
349 result[i*2] = alpha[i]
350 result[i*2+1] = beta[i]
351 print(idt, "iter-merge", result)
352 for i in range(half - 1):
353 result[i*2+1] = ("add", result[i*2+1], result[i*2+3])
354 print(idt, "iter-result", result)
355 return result
356
357
358 # prints out the exact same outer butterfly but does so iteratively.
359 # by comparing the output from itersum_explore and itersum_explore2
360 # and by drawing out the resultant ADDs as a graph it was possible
361 # to deduce what the heck was going on.
362 def itersum_explore2(vec, indent=0):
363 n = len(vec)
364 size = n // 2
365 while size >= 2:
366 halfsize = size // 2
367 ir = list(range(0, halfsize))
368 print ("itersum", halfsize, size, ir)
369 for i in ir:
370 jr = list(range(i+halfsize, i+n-halfsize, size))
371 print ("itersum jr", i+halfsize, i+size, jr)
372 for jh in jr:
373 vec[jh] = ("add", vec[jh], vec[jh+size])
374 print (" itersum", size, i, jh, jh+size)
375 size //= 2
376
377 return vec
378
379 if __name__ == '__main__':
380 n = 16
381 vec = list(range(n))
382 levels = n.bit_length() - 1
383 vec = [vec[reverse_bits(i, levels)] for i in range(n)]
384 ops = itersum_explore(vec)
385 for i, x in enumerate(ops):
386 print (i, x)
387
388 n = 16
389 vec = list(range(n))
390 levels = n.bit_length() - 1
391 ops = itersum_explore2(vec)
392 for i, x in enumerate(ops):
393 print (i, x)
394
395 # halfrev test
396 vec = list(range(8))
397 print ("orig vec", vec)
398 vecr = halfrev(vec, True)
399 print ("reversed", vecr)
400 vecrr = halfrev(vecr, False)
401 assert vec == vecrr
402 vecrr = halfrev(vec, False)
403 print ("pre-reversed", vecrr)