sv_analysis: decouple common disclaimer
[openpower-isa.git] / src / openpower / decoder / isa / remap_dct_yield.py
1 # DCT "REMAP" scheduler
2 #
3 # Modifications made to create an in-place iterative DCT:
4 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
5 #
6 # SPDX: LGPLv3+
7 #
8 # Original fastdctlee.py by Nayuki:
9 # Copyright (c) 2020 Project Nayuki. (MIT License)
10 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
11
12 from copy import deepcopy
13 import math
14
15 # bits of the integer 'val'.
16 def reverse_bits(val, width):
17 result = 0
18 for _ in range(width):
19 result = (result << 1) | (val & 1)
20 val >>= 1
21 return result
22
23
24 # iterative version of [recursively-applied] half-rev.
25 # relies on the list lengths being power-of-two and the fact
26 # that bit-inversion of a list of binary numbers is the same
27 # as reversing the order of the list
28 # this version is dead easy to implement in hardware.
29 # a big surprise is that the half-reversal can be done with
30 # such a simple XOR. the inverse operation is slightly trickier
31 def halfrev2(vec, pre_rev=True):
32 res = []
33 for i in range(len(vec)):
34 if pre_rev:
35 res.append(vec[i ^ (i>>1)])
36 else:
37 ri = i
38 bl = i.bit_length()
39 for ji in range(1, bl):
40 ri ^= (i >> ji)
41 res.append(vec[ri])
42 return res
43
44
45 # python "yield" can be iterated. use this to make it clear how
46 # the indices are generated by using natural-looking nested loops
47 def iterate_dct_inner_halfswap_loadstore(SVSHAPE):
48 # get indices to iterate over, in the required order
49 n = SVSHAPE.lims[0]
50 mode = SVSHAPE.lims[1]
51 print ("inner halfswap loadstore", n, mode, SVSHAPE.skip,
52 "submode", SVSHAPE.submode2)
53
54 # reference list for not needing to do data-swaps, just swap what
55 # *indices* are referenced (two levels of indirection at the moment)
56 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
57 ji = list(range(n))
58
59 levels = n.bit_length() - 1
60 ri = [reverse_bits(i, levels) for i in range(n)]
61
62 if SVSHAPE.mode == 0b01: # FFT, bitrev only
63 ji = [ji[ri[i]] for i in range(n)]
64 elif SVSHAPE.submode2 == 0b001:
65 ji = [ji[ri[i]] for i in range(n)]
66 ji = halfrev2(ji, True)
67 else:
68 ji = halfrev2(ji, False)
69 ji = [ji[ri[i]] for i in range(n)]
70
71 # invert order if requested
72 if SVSHAPE.invxyz[0]:
73 ji.reverse()
74
75 for i, jl in enumerate(ji):
76 y_end = jl == ji[-1]
77 yield jl, (0b111 if y_end else 0b000)
78
79
80 # python "yield" can be iterated. use this to make it clear how
81 # the indices are generated by using natural-looking nested loops
82 def iterate_dct_inner_costable_indices(SVSHAPE):
83 # get indices to iterate over, in the required order
84 n = SVSHAPE.lims[0]
85 mode = SVSHAPE.lims[1]
86 print ("inner costable", mode)
87 # creating lists of indices to iterate over in each dimension
88 # has to be done dynamically, because it depends on the size
89 # first, the size-based loop (which can be done statically)
90 x_r = []
91 size = 2
92 while size <= n:
93 x_r.append(size)
94 size *= 2
95 # invert order if requested
96 if SVSHAPE.invxyz[0]:
97 x_r.reverse()
98
99 if len(x_r) == 0:
100 return
101
102 #print ("ri", ri)
103 #print ("ji", ji)
104
105 # start an infinite (wrapping) loop
106 skip = 0
107 z_end = 1 # doesn't exist in this, only 2 loops
108 k = 0
109 while True:
110 for size in x_r: # loop over 3rd order dimension (size)
111 x_end = size == x_r[-1]
112 # y_r schedule depends on size
113 halfsize = size // 2
114 y_r = []
115 for i in range(0, n, size):
116 y_r.append(i)
117 # invert if requested
118 if SVSHAPE.invxyz[1]: y_r.reverse()
119 # two lists of half-range indices, e.g. j 0123, jr 7654
120 j = list(range(0, halfsize))
121 # invert if requested
122 if SVSHAPE.invxyz[2]: j_r.reverse()
123 #print ("xform jr", jr)
124 # loop over 1st order dimension
125 for ci, jl in enumerate(j):
126 y_end = jl == j[-1]
127 # now depending on MODE return the index. inner butterfly
128 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
129 result = k # offset into COS table
130 elif SVSHAPE.skip == 0b10: #
131 result = ci # coefficient helper
132 elif SVSHAPE.skip == 0b11: #
133 result = size # coefficient helper
134 loopends = (z_end |
135 ((y_end and z_end)<<1) |
136 ((y_end and x_end and z_end)<<2))
137
138 yield result + SVSHAPE.offset, loopends
139 k += 1
140
141 # python "yield" can be iterated. use this to make it clear how
142 # the indices are generated by using natural-looking nested loops
143 def iterate_dct_inner_butterfly_indices(SVSHAPE):
144 # get indices to iterate over, in the required order
145 n = SVSHAPE.lims[0]
146 mode = SVSHAPE.lims[1]
147 print ("inner butterfly", mode, SVSHAPE.skip, "submode", SVSHAPE.submode2)
148 # creating lists of indices to iterate over in each dimension
149 # has to be done dynamically, because it depends on the size
150 # first, the size-based loop (which can be done statically)
151 x_r = []
152 size = 2
153 while size <= n:
154 x_r.append(size)
155 size *= 2
156 # invert order if requested
157 if SVSHAPE.invxyz[0]:
158 x_r.reverse()
159
160 if len(x_r) == 0:
161 return
162
163 # reference (read/write) the in-place data in *reverse-bit-order*
164 ri = list(range(n))
165 if SVSHAPE.submode2 == 0b01:
166 levels = n.bit_length() - 1
167 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
168
169 # reference list for not needing to do data-swaps, just swap what
170 # *indices* are referenced (two levels of indirection at the moment)
171 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
172 ji = list(range(n))
173 inplace_mode = True
174 if inplace_mode and SVSHAPE.submode2 == 0b01:
175 #print ("inplace mode")
176 ji = halfrev2(ji, True)
177 if inplace_mode and SVSHAPE.submode2 == 0b11:
178 ji = halfrev2(ji, False)
179
180 print ("ri", ri)
181 print ("ji", ji)
182
183 # start an infinite (wrapping) loop
184 while True:
185 k = 0
186 k_start = 0
187 for size in x_r: # loop over 3rd order dimension (size)
188 x_end = size == x_r[-1]
189 # y_r schedule depends on size
190 halfsize = size // 2
191 y_r = []
192 for i in range(0, n, size):
193 y_r.append(i)
194 # invert if requested
195 if SVSHAPE.invxyz[1]: y_r.reverse()
196 for i in y_r: # loop over 2nd order dimension
197 y_end = i == y_r[-1]
198 # two lists of half-range indices, e.g. j 0123, jr 7654
199 j = list(range(i, i + halfsize))
200 jr = list(range(i+halfsize, i + size))
201 jr.reverse()
202 # invert if requested
203 if SVSHAPE.invxyz[2]:
204 j.reverse()
205 jr.reverse()
206 hz2 = halfsize // 2 # zero stops reversing 1-item lists
207 #print ("xform jr", jr)
208 # loop over 1st order dimension
209 k = k_start
210 for ci, (jl, jh) in enumerate(zip(j, jr)):
211 z_end = jl == j[-1]
212 # now depending on MODE return the index. inner butterfly
213 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
214 if SVSHAPE.submode2 == 0b11: # iDCT
215 result = ji[ri[jl]] # lower half
216 else:
217 result = ri[ji[jl]] # lower half
218 elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
219 if SVSHAPE.submode2 == 0b11: # iDCT
220 result = ji[ri[jl+halfsize]] # upper half
221 else:
222 result = ri[ji[jh]] # upper half
223 elif mode == 4:
224 # COS table pre-generated mode
225 if SVSHAPE.skip == 0b10: #
226 result = k # cos table offset
227 else: # mode 2
228 # COS table generated on-demand ("Vertical-First") mode
229 if SVSHAPE.skip == 0b10: #
230 result = ci # coefficient helper
231 elif SVSHAPE.skip == 0b11: #
232 result = size # coefficient helper
233 loopends = (z_end |
234 ((y_end and z_end)<<1) |
235 ((y_end and x_end and z_end)<<2))
236
237 yield result + SVSHAPE.offset, loopends
238 k += 1
239
240 # now in-place swap
241 if inplace_mode:
242 for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
243 jlh = jl+halfsize
244 print ("inplace swap", jh, jlh)
245 tmp1, tmp2 = ji[jlh], ji[jh]
246 ji[jlh], ji[jh] = tmp2, tmp1
247
248 # new k_start point for cos tables( runs inside x_r loop NOT i loop)
249 k_start += halfsize
250
251
252 # python "yield" can be iterated. use this to make it clear how
253 # the indices are generated by using natural-looking nested loops
254 def iterate_dct_outer_butterfly_indices(SVSHAPE):
255 # get indices to iterate over, in the required order
256 n = SVSHAPE.lims[0]
257 mode = SVSHAPE.lims[1]
258 # creating lists of indices to iterate over in each dimension
259 # has to be done dynamically, because it depends on the size
260 # first, the size-based loop (which can be done statically)
261 x_r = []
262 size = n // 2
263 while size >= 2:
264 x_r.append(size)
265 size //= 2
266 # invert order if requested
267 if SVSHAPE.invxyz[0]:
268 x_r.reverse()
269
270 if len(x_r) == 0:
271 return
272
273 print ("outer butterfly", mode, SVSHAPE.skip, "submode", SVSHAPE.submode2)
274
275 # I-DCT, reference (read/write) the in-place data in *reverse-bit-order*
276 ri = list(range(n))
277 if SVSHAPE.submode2 in [0b11, 0b01]:
278 levels = n.bit_length() - 1
279 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
280
281 # reference list for not needing to do data-swaps, just swap what
282 # *indices* are referenced (two levels of indirection at the moment)
283 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
284 ji = list(range(n))
285 inplace_mode = False # need the space... SVSHAPE.skip in [0b10, 0b11]
286 if SVSHAPE.submode2 == 0b11:
287 ji = halfrev2(ji, False)
288
289 print ("ri", ri)
290 print ("ji", ji)
291
292 # start an infinite (wrapping) loop
293 while True:
294 k = 0
295 k_start = 0
296 for size in x_r: # loop over 3rd order dimension (size)
297 halfsize = size//2
298 x_end = size == x_r[-1]
299 y_r = list(range(0, halfsize))
300 print ("itersum", halfsize, size, y_r, "invert", SVSHAPE.invxyz[1])
301 # invert if requested
302 if SVSHAPE.invxyz[1]: y_r.reverse()
303 for i in y_r: # loop over 2nd order dimension
304 y_end = i == y_r[-1]
305 # one list to create iterative-sum schedule
306 jr = list(range(i+halfsize, i+n-halfsize, size))
307 # invert if requested
308 if SVSHAPE.invxyz[2]: jr.reverse()
309 print ("itersum jr", i+halfsize, i+size, jr,
310 "invert", SVSHAPE.invxyz[2])
311 hz2 = halfsize // 2 # zero stops reversing 1-item lists
312 k = k_start
313 for ci, jh in enumerate(jr): # loop over 1st order dimension
314 z_end = jh == jr[-1]
315 #print (" itersum", size, i, jh, jh+size)
316 if mode == 4:
317 # COS table pre-generated mode
318 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
319 if SVSHAPE.submode2 == 0b11: # iDCT
320 result = ji[ri[jh]] # upper half
321 else:
322 result = ri[ji[jh]] # lower half
323 elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
324 if SVSHAPE.submode2 == 0b11: # iDCT
325 result = ji[ri[jh+size]] # upper half
326 else:
327 result = ri[ji[jh+size]] # upper half
328 elif SVSHAPE.skip == 0b10: #
329 result = k # cos table offset
330 else:
331 # COS table generated on-demand ("Vertical-First") mode
332 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
333 if SVSHAPE.submode2 == 0b11: # iDCT
334 result = ji[ri[jh]] # lower half
335 else:
336 result = ri[ji[jh]] # lower half
337 elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
338 if SVSHAPE.submode2 == 0b11: # iDCT
339 result = ji[ri[jh+size]] # upper half
340 else:
341 result = ri[ji[jh+size]] # upper half
342 elif SVSHAPE.skip == 0b10: #
343 result = ci # coefficient helper
344 elif SVSHAPE.skip == 0b11: #
345 result = size # coefficient helper
346 loopends = (z_end |
347 ((y_end and z_end)<<1) |
348 ((y_end and x_end and z_end)<<2))
349
350 yield result + SVSHAPE.offset, loopends
351 k += 1
352
353 # now in-place swap (disabled)
354 if False and SVSHAPE.submode2 == 0b11:
355 j = list(range(i, i + halfsize))
356 jr = list(range(i+halfsize, i + size))
357 jr.reverse()
358 for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
359 jlh = jl+halfsize
360 tmp1, tmp2 = ji[jlh], ji[jh]
361 print ("inplace swap", jh, jlh, "actual", tmp1, tmp2)
362 ji[jlh], ji[jh] = tmp2, tmp1
363
364 # new k_start point for cos tables( runs inside x_r loop NOT i loop)
365 k_start += halfsize
366
367
368 def pprint_schedule(schedule, n):
369 size = 2
370 idx = 0
371 while size <= n:
372 halfsize = size // 2
373 tablestep = n // size
374 print ("size %d halfsize %d tablestep %d" % \
375 (size, halfsize, tablestep))
376 for i in range(0, n, size):
377 prefix = "i %d\t" % i
378 for j in range(i, i + halfsize):
379 (jl, je), (jh, he) = schedule[idx]
380 print (" %-3d\t%s j=%-2d jh=%-2d "
381 "j[jl=%-2d] j[jh=%-2d]" % \
382 (idx, prefix, j, j+halfsize,
383 jl, jh,
384 ),
385 "end", bin(je)[2:], bin(je)[2:])
386 idx += 1
387 size *= 2
388
389 def pprint_schedule_outer(schedule, n):
390 size = 2
391 idx = 0
392 while size <= n//2:
393 halfsize = size // 2
394 tablestep = n // size
395 print ("size %d halfsize %d tablestep %d" % \
396 (size, halfsize, tablestep))
397 y_r = list(range(0, halfsize))
398 for i in y_r:
399 prefix = "i %d\t" % i
400 jr = list(range(i+halfsize, i+n-halfsize, size))
401 for j in jr:
402 (jl, je), (jh, he) = schedule[idx]
403 print (" %-3d\t%s j=%-2d jh=%-2d "
404 "j[jl=%-2d] j[jh=%-2d]" % \
405 (idx, prefix, j, j+halfsize,
406 jl, jh,
407 ),
408 "end", bin(je)[2:], bin(je)[2:])
409 idx += 1
410 size *= 2
411
412
413 # totally cool *in-place* inverse DCT algorithm using yield REMAPs
414 def inverse_transform2(vec):
415
416 vec = deepcopy(vec)
417
418 # Initialization
419 n = len(vec)
420 print ()
421 print ("inverse_transform2", n)
422 levels = n.bit_length() - 1
423
424 # set up dims
425 xdim = n
426
427 # divide element 0 by 2
428 vec[0] /= 2.0
429
430 # reference (read/write) the in-place data in *reverse-bit-order*
431 ri = list(range(n))
432 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
433
434 # pretend we LDed data in half-swapped *and* bit-reversed order as well
435 # TODO: merge these two
436 vec = [vec[ri[i]] for i in range(n)]
437 vec = halfrev2(vec, True)
438
439 print("inverse_transform2 post-alter", vec)
440
441 # create a cos table: not strictly necessary but here for illustrative
442 # purposes, to demonstrate the point that it really *is* iterative.
443 # this table could be cached and used multiple times rather than
444 # computed every time.
445 ctable = []
446 size = 2
447 while size <= n:
448 halfsize = size // 2
449 for ci in range(halfsize):
450 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
451 ctable.append(coeff)
452 print ("coeff", size, "ci", ci, "k", len(ctable)-1,
453 "i/n", (ci+0.5)/size, coeff)
454 size *= 2
455
456 # set up an SVSHAPE
457 class SVSHAPE:
458 pass
459
460 # XXX TODO
461 if False:
462 # ci schedule
463 SVSHAPE0 = SVSHAPE()
464 SVSHAPE0.lims = [xdim, 4, 0]
465 SVSHAPE0.mode = 0b01
466 SVSHAPE0.submode2 = 0b01
467 SVSHAPE0.skip = 0b10
468 SVSHAPE0.offset = 0 # experiment with different offset, here
469 SVSHAPE0.invxyz = [1,0,1] # inversion if desired
470 # size schedule
471 SVSHAPE1 = SVSHAPE()
472 SVSHAPE1.lims = [xdim, 4, 0]
473 SVSHAPE1.mode = 0b01
474 SVSHAPE1.submode2 = 0b01
475 SVSHAPE1.skip = 0b11
476 SVSHAPE1.offset = 0 # experiment with different offset, here
477 SVSHAPE1.invxyz = [1,0,1] # inversion if desired
478 # k schedule
479 SVSHAPE2 = SVSHAPE()
480 SVSHAPE2.lims = [xdim, 4, 0]
481 SVSHAPE2.mode = 0b01
482 SVSHAPE2.submode2 = 0b01
483 SVSHAPE2.skip = 0b00
484 SVSHAPE2.offset = 0 # experiment with different offset, here
485 SVSHAPE2.invxyz = [1,0,1] # inversion if desired
486
487 # enumerate over the iterator function, getting new indices
488 i0 = iterate_dct_inner_costable_indices(SVSHAPE0)
489 i1 = iterate_dct_inner_costable_indices(SVSHAPE1)
490 i2 = iterate_dct_inner_costable_indices(SVSHAPE2)
491 for ((ci, cie), (size, sze), (k, ke)) in \
492 zip(i0, i1, i2):
493 print ("xform2 cos", ci, size, k)
494 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
495 assert coeff == ctable[k]
496 print ("coeff", size, "ci", ci, "k", k,
497 "i/n", (ci+0.5)/size, coeff,
498 "end", bin(cie), bin(sze), bin(ke))
499 if cie == 0b111: # all loops end
500 break
501
502 # now things are in the right order for the outer butterfly.
503
504 # j schedule
505 SVSHAPE0 = SVSHAPE()
506 SVSHAPE0.lims = [xdim, 0b0000010, 0]
507 SVSHAPE0.submode2 = 0b11
508 SVSHAPE0.mode = 0b11
509 SVSHAPE0.skip = 0b00
510 SVSHAPE0.offset = 0 # experiment with different offset, here
511 SVSHAPE0.invxyz = [1,0,1] # inversion if desired
512 # j+halfstep schedule
513 SVSHAPE1 = SVSHAPE()
514 SVSHAPE1.lims = [xdim, 0b0000010, 0]
515 SVSHAPE1.mode = 0b11
516 SVSHAPE1.submode2 = 0b11
517 SVSHAPE1.skip = 0b01
518 SVSHAPE1.offset = 0 # experiment with different offset, here
519 SVSHAPE1.invxyz = [1,0,1] # inversion if desired
520
521 # enumerate over the iterator function, getting new indices
522 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
523 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
524 for k, ((jl, jle), (jh, jhe)) in enumerate(zip(i0, i1)):
525 print ("itersum jr", jl, jh,
526 "end", bin(jle), bin(jhe),
527 vec[jl], vec[jh], vec[jh]+vec[jl])
528 vec[jh] += vec[jl]
529 size //= 2
530 if jle == 0b111: # all loops end
531 break
532
533 print("transform2 pre-itersum", vec)
534
535 ################
536 # INNER butterfly
537 ################
538
539 # j schedule
540 SVSHAPE0 = SVSHAPE()
541 SVSHAPE0.lims = [xdim, 0b000001, 0]
542 SVSHAPE0.mode = 0b11
543 SVSHAPE0.submode2 = 0b11
544 SVSHAPE0.skip = 0b00
545 SVSHAPE0.offset = 0 # experiment with different offset, here
546 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
547 # j+halfstep schedule
548 SVSHAPE1 = SVSHAPE()
549 SVSHAPE1.lims = [xdim, 0b000001, 0]
550 SVSHAPE1.mode = 0b11
551 SVSHAPE1.submode2 = 0b11
552 SVSHAPE1.skip = 0b01
553 SVSHAPE1.offset = 0 # experiment with different offset, here
554 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
555 # ci schedule
556 SVSHAPE2 = SVSHAPE()
557 SVSHAPE2.lims = [xdim, 0b000001, 0]
558 SVSHAPE2.mode = 0b11
559 SVSHAPE2.submode2 = 0b11
560 SVSHAPE2.skip = 0b10
561 SVSHAPE2.offset = 0 # experiment with different offset, here
562 SVSHAPE2.invxyz = [0,0,0] # inversion if desired
563 # size schedule
564 SVSHAPE3 = SVSHAPE()
565 SVSHAPE3.lims = [xdim, 0b000001, 0]
566 SVSHAPE3.mode = 0b11
567 SVSHAPE3.submode2 = 0b11
568 SVSHAPE3.skip = 0b11
569 SVSHAPE3.offset = 0 # experiment with different offset, here
570 SVSHAPE3.invxyz = [0,0,0] # inversion if desired
571
572 # enumerate over the iterator function, getting new indices
573 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
574 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
575 i2 = iterate_dct_inner_butterfly_indices(SVSHAPE2)
576 i3 = iterate_dct_inner_butterfly_indices(SVSHAPE3)
577 for k, ((jl, jle), (jh, jhe), (ci, cie), (size, sze)) in \
578 enumerate(zip(i0, i1, i2, i3)):
579 t1, t2 = vec[jl], vec[jh]
580 print ("xform2", jl, jh, ci, size)
581 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
582 #assert coeff == ctable[k]
583 vec[jl] = t1 + t2/coeff
584 vec[jh] = t1 - t2/coeff
585 print ("coeff", size, "ci", ci,
586 "jl", jl, "jh", jh,
587 "i/n", (ci+0.5)/size, coeff, "t1/2", t1, t2,
588 "+/-", vec[jl], vec[jh],
589 "end", bin(jle), bin(jhe))
590 if jle == 0b111: # all loops end
591 break
592
593 print("inverse_transform2 result", vec)
594
595 return vec
596
597
598 # totally cool *in-place* DCT algorithm using yield REMAPs
599 def transform2(vec):
600
601 # Initialization
602 n = len(vec)
603 print ()
604 print ("transform2", n)
605 levels = n.bit_length() - 1
606
607 # set up dims
608 xdim = n
609
610 # reference (read/write) the in-place data in *reverse-bit-order*
611 ri = list(range(n))
612 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
613
614 # and pretend we LDed data in half-swapped *and* bit-reversed order as well
615 # TODO: merge these two
616 vec = halfrev2(vec, False)
617 vec = [vec[ri[i]] for i in range(n)]
618
619 # create a cos table: not strictly necessary but here for illustrative
620 # purposes, to demonstrate the point that it really *is* iterative.
621 # this table could be cached and used multiple times rather than
622 # computed every time.
623 ctable = []
624 size = n
625 while size >= 2:
626 halfsize = size // 2
627 for ci in range(halfsize):
628 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
629 ctable.append(coeff)
630 print ("coeff", size, "ci", ci, "k", len(ctable)-1,
631 "i/n", (ci+0.5)/size, coeff)
632 size //= 2
633
634 # set up an SVSHAPE
635 class SVSHAPE:
636 pass
637 # ci schedule
638 SVSHAPE0 = SVSHAPE()
639 SVSHAPE0.lims = [xdim, 4, 0]
640 SVSHAPE0.mode = 0b01
641 SVSHAPE0.submode2 = 0b01
642 SVSHAPE0.skip = 0b10
643 SVSHAPE0.offset = 0 # experiment with different offset, here
644 SVSHAPE0.invxyz = [1,0,0] # inversion if desired
645 # size schedule
646 SVSHAPE1 = SVSHAPE()
647 SVSHAPE1.lims = [xdim, 4, 0]
648 SVSHAPE1.mode = 0b01
649 SVSHAPE1.submode2 = 0b01
650 SVSHAPE1.skip = 0b11
651 SVSHAPE1.offset = 0 # experiment with different offset, here
652 SVSHAPE1.invxyz = [1,0,0] # inversion if desired
653 # k schedule
654 SVSHAPE2 = SVSHAPE()
655 SVSHAPE2.lims = [xdim, 4, 0]
656 SVSHAPE2.mode = 0b01
657 SVSHAPE2.submode2 = 0b01
658 SVSHAPE2.skip = 0b00
659 SVSHAPE2.offset = 0 # experiment with different offset, here
660 SVSHAPE2.invxyz = [1,0,0] # inversion if desired
661
662 # enumerate over the iterator function, getting new indices
663 i0 = iterate_dct_inner_costable_indices(SVSHAPE0)
664 i1 = iterate_dct_inner_costable_indices(SVSHAPE1)
665 i2 = iterate_dct_inner_costable_indices(SVSHAPE2)
666 for ((ci, cie), (size, sze), (k, ke)) in \
667 zip(i0, i1, i2):
668 print ("xform2 cos", ci, size, k)
669 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
670 assert coeff == ctable[k]
671 print ("coeff", size, "ci", ci, "k", k,
672 "i/n", (ci+0.5)/size, coeff,
673 "end", bin(cie), bin(sze), bin(ke))
674 if cie == 0b111: # all loops end
675 break
676
677 ################
678 # INNER butterfly
679 ################
680
681 # j schedule
682 SVSHAPE0 = SVSHAPE()
683 SVSHAPE0.lims = [xdim, 0b000001, 0]
684 SVSHAPE0.mode = 0b01
685 SVSHAPE0.submode2 = 0b01
686 SVSHAPE0.skip = 0b00
687 SVSHAPE0.offset = 0 # experiment with different offset, here
688 SVSHAPE0.invxyz = [1,0,0] # inversion if desired
689 # j+halfstep schedule
690 SVSHAPE1 = SVSHAPE()
691 SVSHAPE1.lims = [xdim, 0b000001, 0]
692 SVSHAPE1.mode = 0b01
693 SVSHAPE1.submode2 = 0b01
694 SVSHAPE1.skip = 0b01
695 SVSHAPE1.offset = 0 # experiment with different offset, here
696 SVSHAPE1.invxyz = [1,0,0] # inversion if desired
697 # ci schedule
698 SVSHAPE2 = SVSHAPE()
699 SVSHAPE2.lims = [xdim, 0b000001, 0]
700 SVSHAPE2.mode = 0b01
701 SVSHAPE2.submode2 = 0b01
702 SVSHAPE2.skip = 0b10
703 SVSHAPE2.offset = 0 # experiment with different offset, here
704 SVSHAPE2.invxyz = [1,0,0] # inversion if desired
705 # size schedule
706 SVSHAPE3 = SVSHAPE()
707 SVSHAPE3.lims = [xdim, 0b000001, 0]
708 SVSHAPE3.mode = 0b01
709 SVSHAPE3.submode2 = 0b01
710 SVSHAPE3.skip = 0b11
711 SVSHAPE3.offset = 0 # experiment with different offset, here
712 SVSHAPE3.invxyz = [1,0,0] # inversion if desired
713
714 # enumerate over the iterator function, getting new indices
715 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
716 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
717 i2 = iterate_dct_inner_butterfly_indices(SVSHAPE2)
718 i3 = iterate_dct_inner_butterfly_indices(SVSHAPE3)
719 for k, ((jl, jle), (jh, jhe), (ci, cie), (size, sze)) in \
720 enumerate(zip(i0, i1, i2, i3)):
721 t1, t2 = vec[jl], vec[jh]
722 print ("xform2", jl, jh, ci, size)
723 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
724 #assert coeff == ctable[k]
725 vec[jl] = t1 + t2
726 vec[jh] = (t1 - t2) * (1/coeff)
727 print ("coeff", size, "ci", ci,
728 "jl", jl, "jh", jh,
729 "i/n", (ci+0.5)/size, coeff, vec[jl],
730 vec[jh],
731 "end", bin(jle), bin(jhe))
732 if jle == 0b111: # all loops end
733 break
734
735 print("transform2 pre-itersum", vec)
736
737 # now things are in the right order for the outer butterfly.
738
739 # j schedule
740 SVSHAPE0 = SVSHAPE()
741 SVSHAPE0.lims = [xdim, 0b0000010, 0]
742 SVSHAPE0.submode2 = 0b100
743 SVSHAPE0.mode = 0b01
744 SVSHAPE0.skip = 0b00
745 SVSHAPE0.offset = 0 # experiment with different offset, here
746 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
747 # j+halfstep schedule
748 SVSHAPE1 = SVSHAPE()
749 SVSHAPE1.lims = [xdim, 0b0000010, 0]
750 SVSHAPE1.mode = 0b01
751 SVSHAPE1.submode2 = 0b100
752 SVSHAPE1.skip = 0b01
753 SVSHAPE1.offset = 0 # experiment with different offset, here
754 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
755
756 # enumerate over the iterator function, getting new indices
757 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
758 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
759 for k, ((jl, jle), (jh, jhe)) in enumerate(zip(i0, i1)):
760 print ("itersum jr", jl, jh,
761 "end", bin(jle), bin(jhe))
762 vec[jl] += vec[jh]
763 size //= 2
764 if jle == 0b111: # all loops end
765 break
766
767 print("transform2 result", vec)
768
769 return vec
770
771
772 def demo_idct():
773 # set the dimension sizes here
774 n = 8
775 xdim = n
776 ydim = 0 # not needed
777 zdim = 0 # again, not needed
778
779 # set up an SVSHAPE
780 class SVSHAPE:
781 pass
782
783 ################
784 # outer butterfly
785 ################
786
787 # j schedule
788 SVSHAPE0 = SVSHAPE()
789 SVSHAPE0.lims = [xdim, 0b0000010, 0]
790 SVSHAPE0.submode2 = 0b11
791 SVSHAPE0.mode = 0b11
792 SVSHAPE0.skip = 0b00
793 SVSHAPE0.offset = 0 # experiment with different offset, here
794 SVSHAPE0.invxyz = [1,0,1] # inversion if desired
795 # j+halfstep schedule
796 SVSHAPE1 = SVSHAPE()
797 SVSHAPE1.lims = [xdim, 0b0000010, 0]
798 SVSHAPE1.mode = 0b11
799 SVSHAPE1.submode2 = 0b11
800 SVSHAPE1.skip = 0b01
801 SVSHAPE1.offset = 0 # experiment with different offset, here
802 SVSHAPE1.invxyz = [1,0,1] # inversion if desired
803
804 # enumerate over the iterator function, getting new indices
805 schedule = []
806 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
807 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
808 for idx, (jl, jh) in enumerate(zip(i0, i1)):
809 schedule.append((jl, jh))
810 if jl[1] == 0b111: # end
811 break
812
813 # ok now pretty-print the results, with some debug output
814 print ("outer i-dct butterfly")
815 pprint_schedule_outer(schedule, n)
816
817 ################
818 # INNER butterfly
819 ################
820
821 # j schedule
822 SVSHAPE0 = SVSHAPE()
823 SVSHAPE0.lims = [xdim, 0b000001, 0]
824 SVSHAPE0.mode = 0b11
825 SVSHAPE0.submode2 = 0b11
826 SVSHAPE0.skip = 0b00
827 SVSHAPE0.offset = 0 # experiment with different offset, here
828 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
829 # j+halfstep schedule
830 SVSHAPE1 = SVSHAPE()
831 SVSHAPE1.lims = [xdim, 0b000001, 0]
832 SVSHAPE1.mode = 0b11
833 SVSHAPE1.submode2 = 0b11
834 SVSHAPE1.skip = 0b01
835 SVSHAPE1.offset = 0 # experiment with different offset, here
836 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
837
838 # enumerate over the iterator function, getting new indices
839 schedule = []
840 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
841 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
842 for idx, (jl, jh) in enumerate(zip(i0, i1)):
843 schedule.append((jl, jh))
844 if jl[1] == 0b111: # end
845 break
846
847 # ok now pretty-print the results, with some debug output
848 print ("inner butterfly")
849 pprint_schedule(schedule, n)
850 print ("")
851
852 return
853
854 # for DCT half-swap LDs
855 # j schedule
856 SVSHAPE0 = SVSHAPE()
857 SVSHAPE0.lims = [xdim, 0b000101, zdim]
858 SVSHAPE0.mode = 0b01
859 SVSHAPE0.submode2 = 0b01
860 SVSHAPE0.skip = 0
861 SVSHAPE0.offset = 0 # experiment with different offset, here
862 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
863
864 # expected results
865 levels = n.bit_length() - 1
866 avi = list(range(n))
867 ri = [reverse_bits(i, levels) for i in range(n)]
868 av = halfrev2(avi, False)
869 av = [av[ri[i]] for i in range(n)]
870
871 i0 = iterate_dct_inner_halfswap_loadstore(SVSHAPE0)
872 for idx, (jl) in enumerate(i0):
873 print ("inverse half-swap ld", idx, jl, av[idx])
874 if jl[1] == 0b111: # end
875 break
876
877
878 def demo_dct():
879 # set the dimension sizes here
880 n = 8
881 xdim = n
882 ydim = 0 # not needed
883 zdim = 0 # again, not needed
884
885
886 ################
887 # INNER butterfly
888 ################
889
890 # set up an SVSHAPE
891 class SVSHAPE:
892 pass
893 # j schedule
894 SVSHAPE0 = SVSHAPE()
895 SVSHAPE0.lims = [xdim, 0b000001, zdim]
896 SVSHAPE0.submode2 = 0b010
897 SVSHAPE0.mode = 0b01
898 SVSHAPE0.skip = 0b00
899 SVSHAPE0.offset = 0 # experiment with different offset, here
900 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
901 # j+halfstep schedule
902 SVSHAPE1 = SVSHAPE()
903 SVSHAPE1.lims = [xdim, 0b000001, zdim]
904 SVSHAPE1.submode2 = 0b010
905 SVSHAPE1.mode = 0b01
906 SVSHAPE1.skip = 0b01
907 SVSHAPE1.offset = 0 # experiment with different offset, here
908 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
909
910 # enumerate over the iterator function, getting new indices
911 schedule = []
912 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
913 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
914 for idx, (jl, jh) in enumerate(zip(i0, i1)):
915 schedule.append((jl, jh))
916 if jl[1] == 0b111: # end
917 break
918
919 # ok now pretty-print the results, with some debug output
920 print ("inner butterfly")
921 pprint_schedule(schedule, n)
922 print ("")
923
924 ################
925 # outer butterfly
926 ################
927
928 # j schedule
929 SVSHAPE0 = SVSHAPE()
930 SVSHAPE0.lims = [xdim, 0b000010, zdim]
931 SVSHAPE0.mode = 0b01
932 SVSHAPE0.submode2 = 0b100
933 SVSHAPE0.skip = 0b10
934 SVSHAPE0.offset = 0 # experiment with different offset, here
935 SVSHAPE0.invxyz = [1,0,0] # inversion if desired
936 # j+halfstep schedule
937 SVSHAPE1 = SVSHAPE()
938 SVSHAPE1.lims = [xdim, 0b000010, zdim]
939 SVSHAPE1.mode = 0b01
940 SVSHAPE1.submode2 = 0b100
941 SVSHAPE1.skip = 0b11
942 SVSHAPE1.offset = 0 # experiment with different offset, here
943 SVSHAPE1.invxyz = [1,0,0] # inversion if desired
944
945 # enumerate over the iterator function, getting new indices
946 schedule = []
947 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
948 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
949 for idx, (jl, jh) in enumerate(zip(i0, i1)):
950 schedule.append((jl, jh))
951 if jl[1] == 0b111: # end
952 break
953
954 # ok now pretty-print the results, with some debug output
955 print ("outer butterfly")
956 pprint_schedule_outer(schedule, n)
957
958 # for DCT half-swap LDs
959 # j schedule
960 SVSHAPE0 = SVSHAPE()
961 SVSHAPE0.lims = [xdim, 0b000101, zdim]
962 SVSHAPE0.mode = 0b01
963 SVSHAPE0.submode2 = 0
964 SVSHAPE0.skip = 0
965 SVSHAPE0.offset = 0 # experiment with different offset, here
966 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
967
968 # expected results
969 levels = n.bit_length() - 1
970 avi = list(range(n))
971 ri = [reverse_bits(i, levels) for i in range(n)]
972 av = halfrev2(avi, False)
973 av = [av[ri[i]] for i in range(n)]
974
975
976 i0 = iterate_dct_inner_halfswap_loadstore(SVSHAPE0)
977 for idx, (jl) in enumerate(i0):
978 print ("inverse half-swap ld", idx, jl, av[idx])
979 if jl[1] == 0b111: # end
980 break
981
982
983 # run the demo
984 if __name__ == '__main__':
985 demo_dct()
986 demo_idct()