swap the indices rather than the data in DCT top half: bizarrely this works!
[openpower-isa.git] / src / openpower / decoder / isa / fastdctlee.py
1 #
2 # Fast discrete cosine transform algorithms (Python)
3 #
4 # Copyright (c) 2020 Project Nayuki. (MIT License)
5 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
6 #
7 # Permission is hereby granted, free of charge, to any person obtaining a copy of
8 # this software and associated documentation files (the "Software"), to deal in
9 # the Software without restriction, including without limitation the rights to
10 # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
11 # the Software, and to permit persons to whom the Software is furnished to do so,
12 # subject to the following conditions:
13 # - The above copyright notice and this permission notice shall be included in
14 # all copies or substantial portions of the Software.
15 # - The Software is provided "as is", without warranty of any kind, express or
16 # implied, including but not limited to the warranties of merchantability,
17 # fitness for a particular purpose and noninfringement. In no event shall the
18 # authors or copyright holders be liable for any claim, damages or other
19 # liability, whether in an action of contract, tort or otherwise, arising from,
20 # out of or in connection with the Software or the use or other dealings in the
21 # Software.
22 #
23
24 import math
25 from copy import deepcopy
26
27 # bits of the integer 'val'.
28 def reverse_bits(val, width):
29 result = 0
30 for _ in range(width):
31 result = (result << 1) | (val & 1)
32 val >>= 1
33 return result
34
35
36 # DCT type II, unscaled. Algorithm by Byeong Gi Lee, 1984.
37 # See: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.118.3056&rep=rep1&type=pdf#page=34
38 def transform(vector, indent=0):
39 idt = " " * indent
40 n = len(vector)
41 if n == 1:
42 return list(vector)
43 elif n == 0 or n % 2 != 0:
44 raise ValueError()
45 else:
46 half = n // 2
47 alpha = [(vector[i] + vector[-(i + 1)]) for i in range(half)]
48 beta = [(vector[i] - vector[-(i + 1)]) /
49 (math.cos((i + 0.5) * math.pi / n) * 2.0)
50 for i in range(half)]
51 alpha = transform(alpha)
52 beta = transform(beta )
53 result = []
54 for i in range(half - 1):
55 result.append(alpha[i])
56 result.append(beta[i] + beta[i + 1])
57 result.append(alpha[-1])
58 result.append(beta [-1])
59 return result
60
61
62 def transform(vector, indent=0):
63 idt = " " * indent
64 n = len(vector)
65 if n == 1:
66 return list(vector)
67 elif n == 0 or n % 2 != 0:
68 raise ValueError()
69 else:
70 half = n // 2
71 alpha = [0] * half
72 beta = [0] * half
73 print (idt, "xf", vector)
74 print (idt, "coeff", n, "->", end=" ")
75 for i in range(half):
76 t1, t2 = vector[i], vector[n-i-1]
77 k = (math.cos((i + 0.5) * math.pi / n) * 2.0)
78 print (i, n-i-1, "i/n", (i+0.5)/n, ":", k, end= " ")
79 alpha[i] = t1 + t2
80 beta[i] = (t1 - t2) * (1/k)
81 print ()
82 print (idt, "n", n, "alpha", end=" ")
83 for i in range(0, n, 2):
84 print (i, i//2, alpha[i//2], end=" ")
85 print()
86 print (idt, "n", n, "beta", end=" ")
87 for i in range(0, n, 2):
88 print (i, beta[i//2], end=" ")
89 print()
90 alpha = transform(alpha, indent+1)
91 beta = transform(beta , indent+1)
92 result = [0] * n
93 for i in range(half):
94 result[i*2] = alpha[i]
95 result[i*2+1] = beta[i]
96 print(idt, "merge", result)
97 for i in range(half - 1):
98 result[i*2+1] += result[i*2+3]
99 print(idt, "result", result)
100 return result
101
102
103 def transform_itersum(vector, indent=0):
104 idt = " " * indent
105 n = len(vector)
106 if n == 1:
107 return list(vector)
108 elif n == 0 or n % 2 != 0:
109 raise ValueError()
110 else:
111 half = n // 2
112 alpha = [0] * half
113 beta = [0] * half
114 for i in range(half):
115 t1, t2 = vector[i], vector[i+half]
116 alpha[i] = t1
117 beta[i] = t2
118 alpha = transform_itersum(alpha, indent+1)
119 beta = transform_itersum(beta , indent+1)
120 result = [0] * n
121 for i in range(half):
122 result[i*2] = alpha[i]
123 result[i*2+1] = beta[i]
124 print(idt, "iter-merge", result)
125 for i in range(half - 1):
126 result[i*2+1] += result[i*2+3]
127 print(idt, "iter-result", result)
128 return result
129
130
131
132 def transform2(vec, reverse=True):
133
134 vec = deepcopy(vec)
135 # Initialization
136 n = len(vec)
137 print ()
138 print ("transform2", n)
139 levels = n.bit_length() - 1
140
141 # reference (read/write) the in-place data in *reverse-bit-order*
142 if reverse:
143 ri = list(range(n))
144 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
145
146 if reverse:
147 vec = [vec[reverse_bits(i, levels)] for i in range(n)]
148
149 if False:
150 size = n
151 #v = list(range(n))
152 v = deepcopy(ri)
153 while size >= 2:
154 halfsize = size // 2
155 tablestep = n // size
156 ir = list(range(0, n, size))
157 for i in ir:
158 k = 0
159 j = list(range(i, i + halfsize))
160 jr = list(range(i+halfsize, i + size))
161 jr.reverse()
162 print (" xform jr", j, jr)
163 vec2 = deepcopy(v)
164 for ci, (jl, jh) in enumerate(zip(j, jr)):
165 t1, t2 = v[ri[jl]], v[ri[jh]]
166 vec2[ri[jl]] = t1
167 vec2[ri[jl+halfsize]] = t2
168 v = vec2
169 size //= 2
170
171 print ("ri rev", ri)
172 print ("rh rev", v)
173
174 #vec2 = deepcopy(vec)
175 #for i in range(n):
176 # vec[i] = vec2[v[i]]
177
178 ri = v
179
180 size = n
181 while size >= 2:
182 halfsize = size // 2
183 tablestep = n // size
184 ir = list(range(0, n, size))
185 print (" xform", size, ir)
186 for i in ir:
187 k = 0
188 j = list(range(i, i + halfsize))
189 jr = list(range(i+halfsize, i + size))
190 jr.reverse()
191 print (" xform jr", j, jr)
192 for ci, (jl, jh) in enumerate(zip(j, jr)):
193 t1, t2 = vec[ri[jl]], vec[ri[jh]]
194 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
195 vec[ri[jl]] = t1 + t2
196 vec[ri[jh]] = (t1 - t2) * (1/coeff) # not jl+halfsize!
197 print ("coeff", size, i, k, "jl", jl, "jh", jh,
198 "i/n", (k+0.5)/size, coeff, vec[ri[jl]], vec[ri[jh]])
199 k += tablestep
200 # instead of using jl+halfsize, perform a swap here.
201 # use half of j/jr because actually jl+halfsize = reverse(j)
202 # actually: swap the *indices*... not the actual data.
203 # incredibly... bizarrely... this works *without* having
204 # to do anything else.
205 if len(j) > 1:
206 hz2 = halfsize // 2
207 for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
208 tmp = ri[jl+halfsize]
209 ri[jl+halfsize] = ri[jh]
210 ri[jh] = tmp
211 print (" swap", size, i, ri[jl+halfsize], ri[jh])
212 size //= 2
213
214 print("post-swapped", ri)
215 print("transform2 pre-itersum", vec)
216
217 n = len(vec)
218 size = n // 2
219 while size >= 2:
220 halfsize = size // 2
221 ir = list(range(0, halfsize))
222 print ("itersum", halfsize, size, ir)
223 for i in ir:
224 jr = list(range(i+halfsize, i+n-halfsize, size))
225 print ("itersum jr", i+halfsize, i+size, jr)
226 for jh in jr:
227 vec[jh] += vec[jh+size]
228 print (" itersum", size, i, jh, jh+size)
229 size //= 2
230
231 print("transform2 result", vec)
232
233 return vec
234
235
236 # DCT type III, unscaled. Algorithm by Byeong Gi Lee, 1984.
237 # See: https://www.nayuki.io/res/fast-discrete-cosine-transform-algorithms/lee-new-algo-discrete-cosine-transform.pdf
238 def inverse_transform(vector, root=True, indent=0):
239 idt = " " * indent
240 if root:
241 vector = list(vector)
242 vector[0] /= 2
243 n = len(vector)
244 if n == 1:
245 return vector, [0]
246 elif n == 0 or n % 2 != 0:
247 raise ValueError()
248 else:
249 half = n // 2
250 alpha = [vector[0]]
251 beta = [vector[1]]
252 for i in range(2, n, 2):
253 alpha.append(vector[i])
254 beta.append(vector[i - 1] + vector[i + 1])
255 print (idt, "n", n, "alpha 0", end=" ")
256 for i in range(2, n, 2):
257 print (i, end=" ")
258 print ("beta 1", end=" ")
259 for i in range(2, n, 2):
260 print ("%d+%d" % (i-1, i+1), end=" ")
261 print()
262 inverse_transform(alpha, False, indent+1)
263 inverse_transform(beta , False, indent+1)
264 for i in range(half):
265 x = alpha[i]
266 y = beta[i] / (math.cos((i + 0.5) * math.pi / n) * 2)
267 vector[i] = x + y
268 vector[-(i + 1)] = x - y
269 print (idt, " v[%d] = alpha[%d]+beta[%d]" % (i, i, i))
270 print (idt, " v[%d] = alpha[%d]-beta[%d]" % (n-i-1, i, i))
271 return vector
272
273
274 def inverse_transform2(vector, root=True):
275 n = len(vector)
276 if root:
277 vector = list(vector)
278 if n == 1:
279 return vector
280 elif n == 0 or n % 2 != 0:
281 raise ValueError()
282 else:
283 half = n // 2
284 alpha = [0]
285 beta = [1]
286 for i in range(2, n, 2):
287 alpha.append(i)
288 beta.append(("add", i - 1, i + 1))
289 inverse_transform2(alpha, False)
290 inverse_transform2(beta , False)
291 for i in range(half):
292 x = alpha[i]
293 y = ("cos", beta[i], i)
294 vector[i] = ("add", x, y)
295 vector[-(i + 1)] = ("sub", x, y)
296 return vector
297
298
299 def failllll_transform2(block):
300 N = len(block)
301 cos = [0.0] * (N>>1)
302
303 front = deepcopy(block)
304 back = deepcopy(block)
305
306 step = 1
307 j = N *2
308 half_N = N
309 prev_half_N = N
310
311 while j > 1: #// Cycle of iterations Input Butterfly
312 half_N = half_N >> 1
313 current_PI_half_By_N = (math.pi / 2) / prev_half_N
314 current_PI_By_N = 0.0
315 step_Phase = current_PI_half_By_N * 2.0
316 print ("n", N, "cos", end=" ")
317 for i in range(half_N):
318 #Precompute Cosine's coefficients
319 a = current_PI_By_N + current_PI_half_By_N
320 print (i, a / (math.pi), math.cos(a) * 2, end=" ")
321 cos[i] = 0.5 / math.cos(a)
322 current_PI_By_N += step_Phase
323 print()
324 k = 0
325 for x in range(step):
326 for i in range(half_N):
327 shift = k + prev_half_N - i - 1
328 back[k + i] = front[k + i] + front[shift]
329 back[k + half_N + i] = (front[k + i] - front[shift]) * cos[i]
330 print ("xf coeff", N, j, i, x, "k/kh", k+i, k+half_N+i)
331 k += prev_half_N
332 temp = front
333 front = back
334 back = temp
335 j = j >> 1
336 step = step << 1
337 prev_half_N = half_N
338
339 half_N = 2
340 prev_half_N = 2
341 j = 2
342
343 print("xform intermediate", front)
344
345 while j < N: # Cycle of Out ButterFly
346 k = 0
347 print ("out", j, N, step, half_N)
348 for x in range(step):
349 for i in range(half_N - 1):
350 back[k + (i << 1)] = front[k + i]
351 back[k + (i << 1) + 1] = (front[k + half_N + i] +
352 front[k + half_N + i + 1])
353 print (" out", j, x, i, "k", k,
354 "k+i<<1", k+(i<<1), "hh1", k+half_N+i)
355 back[k + ((half_N - 1) << 1)] = front[k + half_N - 1]
356 back[k + (half_N << 1) - 1] = front[k + (half_N << 1) - 1]
357 k += prev_half_N
358
359 temp = front
360 front = back
361 back = temp
362 j = j << 1
363 step = step >> 1
364 half_N = prev_half_N
365 prev_half_N = prev_half_N << 1
366
367 for i in range(N):
368 block[i] = front[i] #// Unload DCT coefficients
369 dN = 2.0
370 #block[0] = block[0] / dN #// Compute DC.
371
372 print("transform2 result", block)
373 return block
374
375
376 def itersum_explore(vector, indent=0):
377 idt = " " * indent
378 n = len(vector)
379 if n == 1:
380 return list(vector)
381 elif n == 0 or n % 2 != 0:
382 raise ValueError()
383 else:
384 half = n // 2
385 alpha = [0] * half
386 beta = [0] * half
387 for i in range(half):
388 t1, t2 = vector[i], vector[i+half]
389 alpha[i] = t1
390 beta[i] = t2
391 alpha = itersum_explore(alpha, indent+1)
392 beta = itersum_explore(beta , indent+1)
393 result = [0] * n
394 for i in range(half):
395 result[i*2] = alpha[i]
396 result[i*2+1] = beta[i]
397 print(idt, "iter-merge", result)
398 for i in range(half - 1):
399 result[i*2+1] = ("add", result[i*2+1], result[i*2+3])
400 print(idt, "iter-result", result)
401 return result
402
403
404 def itersum_explore2(vec, indent=0):
405 n = len(vec)
406 size = n // 2
407 while size >= 2:
408 halfsize = size // 2
409 ir = list(range(0, halfsize))
410 #ir.reverse()
411 print ("itersum", halfsize, size, ir)
412 for i in ir:
413 jr = list(range(i+halfsize, i+n-halfsize, size))
414 print ("itersum jr", i+halfsize, i+size, jr)
415 for jh in jr:
416 vec[jh] = ("add", vec[jh], vec[jh+size])
417 print (" itersum", size, i, jh, jh+size)
418 size //= 2
419
420 #if reverse:
421 # vec = [vec[reverse_bits(i, levels)] for i in range(n)]
422
423 return vec
424
425 if __name__ == '__main__':
426 n = 16
427 vec = list(range(n))
428 levels = n.bit_length() - 1
429 vec = [vec[reverse_bits(i, levels)] for i in range(n)]
430 ops = itersum_explore(vec)
431 #ops = [ops[reverse_bits(i, levels)] for i in range(n)]
432 for i, x in enumerate(ops):
433 print (i, x)
434
435 n = 16
436 vec = list(range(n))
437 levels = n.bit_length() - 1
438 #vec = [vec[reverse_bits(i, levels)] for i in range(n)]
439 ops = itersum_explore2(vec)
440 for i, x in enumerate(ops):
441 print (i, x)