whoops, no ability to add comments in between functions in pseudocode
[openpower-isa.git] / src / openpower / decoder / isa / remap_dct_yield.py
1 # DCT "REMAP" scheduler
2 #
3 # Modifications made to create an in-place iterative DCT:
4 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
5 #
6 # SPDX: LGPLv3+
7 #
8 # Original fastdctlee.py by Nayuki:
9 # Copyright (c) 2020 Project Nayuki. (MIT License)
10 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
11
12 from copy import deepcopy
13 import math
14
15 # bits of the integer 'val'.
16 def reverse_bits(val, width):
17 result = 0
18 for _ in range(width):
19 result = (result << 1) | (val & 1)
20 val >>= 1
21 return result
22
23
24 # iterative version of [recursively-applied] half-rev.
25 # relies on the list lengths being power-of-two and the fact
26 # that bit-inversion of a list of binary numbers is the same
27 # as reversing the order of the list
28 # this version is dead easy to implement in hardware.
29 # a big surprise is that the half-reversal can be done with
30 # such a simple XOR. the inverse operation is slightly trickier
31 def halfrev2(vec, pre_rev=True):
32 res = []
33 for i in range(len(vec)):
34 if pre_rev:
35 res.append(vec[i ^ (i>>1)])
36 else:
37 ri = i
38 bl = i.bit_length()
39 for ji in range(1, bl):
40 ri ^= (i >> ji)
41 res.append(vec[ri])
42 return res
43
44
45 # python "yield" can be iterated. use this to make it clear how
46 # the indices are generated by using natural-looking nested loops
47 def iterate_dct_inner_halfswap_loadstore(SVSHAPE):
48 # get indices to iterate over, in the required order
49 n = SVSHAPE.lims[0]
50 mode = SVSHAPE.lims[1]
51 print ("inner halfswap loadstore", n, mode, SVSHAPE.skip)
52
53 # reference list for not needing to do data-swaps, just swap what
54 # *indices* are referenced (two levels of indirection at the moment)
55 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
56 ji = list(range(n))
57
58 levels = n.bit_length() - 1
59 ji = halfrev2(ji, False)
60 if False: # swap: TODO, add extra bit-reverse mode
61 ri = [reverse_bits(i, levels) for i in range(n)]
62 ji = [ji[ri[i]] for i in range(n)]
63
64
65 # invert order if requested
66 if SVSHAPE.invxyz[0]:
67 ji.reverse()
68
69 for i, jl in enumerate(ji):
70 y_end = jl == ji[-1]
71 yield jl, (0b111 if y_end else 0b000)
72
73
74 # python "yield" can be iterated. use this to make it clear how
75 # the indices are generated by using natural-looking nested loops
76 def iterate_dct_inner_costable_indices(SVSHAPE):
77 # get indices to iterate over, in the required order
78 n = SVSHAPE.lims[0]
79 mode = SVSHAPE.lims[1]
80 print ("inner costable", mode)
81 # creating lists of indices to iterate over in each dimension
82 # has to be done dynamically, because it depends on the size
83 # first, the size-based loop (which can be done statically)
84 x_r = []
85 size = 2
86 while size <= n:
87 x_r.append(size)
88 size *= 2
89 # invert order if requested
90 if SVSHAPE.invxyz[0]:
91 x_r.reverse()
92
93 if len(x_r) == 0:
94 return
95
96 #print ("ri", ri)
97 #print ("ji", ji)
98
99 # start an infinite (wrapping) loop
100 skip = 0
101 z_end = 1 # doesn't exist in this, only 2 loops
102 k = 0
103 while True:
104 for size in x_r: # loop over 3rd order dimension (size)
105 x_end = size == x_r[-1]
106 # y_r schedule depends on size
107 halfsize = size // 2
108 y_r = []
109 for i in range(0, n, size):
110 y_r.append(i)
111 # invert if requested
112 if SVSHAPE.invxyz[1]: y_r.reverse()
113 # two lists of half-range indices, e.g. j 0123, jr 7654
114 j = list(range(0, halfsize))
115 # invert if requested
116 if SVSHAPE.invxyz[2]: j_r.reverse()
117 #print ("xform jr", jr)
118 # loop over 1st order dimension
119 for ci, jl in enumerate(j):
120 y_end = jl == j[-1]
121 # now depending on MODE return the index. inner butterfly
122 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
123 result = k # offset into COS table
124 elif SVSHAPE.skip == 0b10: #
125 result = ci # coefficient helper
126 elif SVSHAPE.skip == 0b11: #
127 result = size # coefficient helper
128 loopends = (z_end |
129 ((y_end and z_end)<<1) |
130 ((y_end and x_end and z_end)<<2))
131
132 yield result + SVSHAPE.offset, loopends
133 k += 1
134
135 # python "yield" can be iterated. use this to make it clear how
136 # the indices are generated by using natural-looking nested loops
137 def iterate_dct_inner_butterfly_indices(SVSHAPE):
138 # get indices to iterate over, in the required order
139 n = SVSHAPE.lims[0]
140 mode = SVSHAPE.lims[1]
141 print ("inner butterfly", mode, SVSHAPE.skip, "submode", SVSHAPE.submode2)
142 # creating lists of indices to iterate over in each dimension
143 # has to be done dynamically, because it depends on the size
144 # first, the size-based loop (which can be done statically)
145 x_r = []
146 size = 2
147 while size <= n:
148 x_r.append(size)
149 size *= 2
150 # invert order if requested
151 if SVSHAPE.invxyz[0]:
152 x_r.reverse()
153
154 if len(x_r) == 0:
155 return
156
157 # reference (read/write) the in-place data in *reverse-bit-order*
158 ri = list(range(n))
159 if SVSHAPE.submode2 == 0b01:
160 levels = n.bit_length() - 1
161 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
162
163 # reference list for not needing to do data-swaps, just swap what
164 # *indices* are referenced (two levels of indirection at the moment)
165 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
166 ji = list(range(n))
167 inplace_mode = True
168 if inplace_mode and SVSHAPE.submode2 == 0b01:
169 #print ("inplace mode")
170 ji = halfrev2(ji, True)
171 if inplace_mode and SVSHAPE.submode2 == 0b11:
172 ji = halfrev2(ji, False)
173
174 print ("ri", ri)
175 print ("ji", ji)
176
177 # start an infinite (wrapping) loop
178 while True:
179 k = 0
180 k_start = 0
181 for size in x_r: # loop over 3rd order dimension (size)
182 x_end = size == x_r[-1]
183 # y_r schedule depends on size
184 halfsize = size // 2
185 y_r = []
186 for i in range(0, n, size):
187 y_r.append(i)
188 # invert if requested
189 if SVSHAPE.invxyz[1]: y_r.reverse()
190 for i in y_r: # loop over 2nd order dimension
191 y_end = i == y_r[-1]
192 # two lists of half-range indices, e.g. j 0123, jr 7654
193 j = list(range(i, i + halfsize))
194 jr = list(range(i+halfsize, i + size))
195 jr.reverse()
196 # invert if requested
197 if SVSHAPE.invxyz[2]:
198 j.reverse()
199 jr.reverse()
200 hz2 = halfsize // 2 # zero stops reversing 1-item lists
201 #print ("xform jr", jr)
202 # loop over 1st order dimension
203 k = k_start
204 for ci, (jl, jh) in enumerate(zip(j, jr)):
205 z_end = jl == j[-1]
206 # now depending on MODE return the index. inner butterfly
207 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
208 if SVSHAPE.submode2 == 0b11: # iDCT
209 result = ji[ri[jl]] # lower half
210 else:
211 result = ri[ji[jl]] # lower half
212 elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
213 if SVSHAPE.submode2 == 0b11: # iDCT
214 result = ji[ri[jl+halfsize]] # upper half
215 else:
216 result = ri[ji[jh]] # upper half
217 elif mode == 4:
218 # COS table pre-generated mode
219 if SVSHAPE.skip == 0b10: #
220 result = k # cos table offset
221 else: # mode 2
222 # COS table generated on-demand ("Vertical-First") mode
223 if SVSHAPE.skip == 0b10: #
224 result = ci # coefficient helper
225 elif SVSHAPE.skip == 0b11: #
226 result = size # coefficient helper
227 loopends = (z_end |
228 ((y_end and z_end)<<1) |
229 ((y_end and x_end and z_end)<<2))
230
231 yield result + SVSHAPE.offset, loopends
232 k += 1
233
234 # now in-place swap
235 if inplace_mode:
236 for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
237 jlh = jl+halfsize
238 print ("inplace swap", jh, jlh)
239 tmp1, tmp2 = ji[jlh], ji[jh]
240 ji[jlh], ji[jh] = tmp2, tmp1
241
242 # new k_start point for cos tables( runs inside x_r loop NOT i loop)
243 k_start += halfsize
244
245
246 # python "yield" can be iterated. use this to make it clear how
247 # the indices are generated by using natural-looking nested loops
248 def iterate_dct_outer_butterfly_indices(SVSHAPE):
249 # get indices to iterate over, in the required order
250 n = SVSHAPE.lims[0]
251 mode = SVSHAPE.lims[1]
252 # creating lists of indices to iterate over in each dimension
253 # has to be done dynamically, because it depends on the size
254 # first, the size-based loop (which can be done statically)
255 x_r = []
256 size = n // 2
257 while size >= 2:
258 x_r.append(size)
259 size //= 2
260 # invert order if requested
261 if SVSHAPE.invxyz[0]:
262 x_r.reverse()
263
264 if len(x_r) == 0:
265 return
266
267 print ("outer butterfly", mode, SVSHAPE.skip, "submode", SVSHAPE.submode2)
268
269 # I-DCT, reference (read/write) the in-place data in *reverse-bit-order*
270 ri = list(range(n))
271 if SVSHAPE.submode2 in [0b11, 0b01]:
272 levels = n.bit_length() - 1
273 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
274
275 # reference list for not needing to do data-swaps, just swap what
276 # *indices* are referenced (two levels of indirection at the moment)
277 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
278 ji = list(range(n))
279 inplace_mode = False # need the space... SVSHAPE.skip in [0b10, 0b11]
280 if SVSHAPE.submode2 == 0b11:
281 ji = halfrev2(ji, False)
282
283 print ("ri", ri)
284 print ("ji", ji)
285
286 # start an infinite (wrapping) loop
287 while True:
288 k = 0
289 k_start = 0
290 for size in x_r: # loop over 3rd order dimension (size)
291 halfsize = size//2
292 x_end = size == x_r[-1]
293 y_r = list(range(0, halfsize))
294 print ("itersum", halfsize, size, y_r, "invert", SVSHAPE.invxyz[1])
295 # invert if requested
296 if SVSHAPE.invxyz[1]: y_r.reverse()
297 for i in y_r: # loop over 2nd order dimension
298 y_end = i == y_r[-1]
299 # one list to create iterative-sum schedule
300 jr = list(range(i+halfsize, i+n-halfsize, size))
301 # invert if requested
302 if SVSHAPE.invxyz[2]: jr.reverse()
303 print ("itersum jr", i+halfsize, i+size, jr,
304 "invert", SVSHAPE.invxyz[2])
305 hz2 = halfsize // 2 # zero stops reversing 1-item lists
306 k = k_start
307 for ci, jh in enumerate(jr): # loop over 1st order dimension
308 z_end = jh == jr[-1]
309 #print (" itersum", size, i, jh, jh+size)
310 if mode == 4:
311 # COS table pre-generated mode
312 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
313 if SVSHAPE.submode2 == 0b11: # iDCT
314 result = ji[ri[jh]] # upper half
315 else:
316 result = ri[ji[jh]] # lower half
317 elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
318 if SVSHAPE.submode2 == 0b11: # iDCT
319 result = ji[ri[jh+size]] # upper half
320 else:
321 result = ri[ji[jh+size]] # upper half
322 elif SVSHAPE.skip == 0b10: #
323 result = k # cos table offset
324 else:
325 # COS table generated on-demand ("Vertical-First") mode
326 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
327 if SVSHAPE.submode2 == 0b11: # iDCT
328 result = ji[ri[jh]] # lower half
329 else:
330 result = ri[ji[jh]] # lower half
331 elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
332 if SVSHAPE.submode2 == 0b11: # iDCT
333 result = ji[ri[jh+size]] # upper half
334 else:
335 result = ri[ji[jh+size]] # upper half
336 elif SVSHAPE.skip == 0b10: #
337 result = ci # coefficient helper
338 elif SVSHAPE.skip == 0b11: #
339 result = size # coefficient helper
340 loopends = (z_end |
341 ((y_end and z_end)<<1) |
342 ((y_end and x_end and z_end)<<2))
343
344 yield result + SVSHAPE.offset, loopends
345 k += 1
346
347 # now in-place swap (disabled)
348 if False and SVSHAPE.submode2 == 0b11:
349 j = list(range(i, i + halfsize))
350 jr = list(range(i+halfsize, i + size))
351 jr.reverse()
352 for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
353 jlh = jl+halfsize
354 tmp1, tmp2 = ji[jlh], ji[jh]
355 print ("inplace swap", jh, jlh, "actual", tmp1, tmp2)
356 ji[jlh], ji[jh] = tmp2, tmp1
357
358 # new k_start point for cos tables( runs inside x_r loop NOT i loop)
359 k_start += halfsize
360
361
362 def pprint_schedule(schedule, n):
363 size = 2
364 idx = 0
365 while size <= n:
366 halfsize = size // 2
367 tablestep = n // size
368 print ("size %d halfsize %d tablestep %d" % \
369 (size, halfsize, tablestep))
370 for i in range(0, n, size):
371 prefix = "i %d\t" % i
372 for j in range(i, i + halfsize):
373 (jl, je), (jh, he) = schedule[idx]
374 print (" %-3d\t%s j=%-2d jh=%-2d "
375 "j[jl=%-2d] j[jh=%-2d]" % \
376 (idx, prefix, j, j+halfsize,
377 jl, jh,
378 ),
379 "end", bin(je)[2:], bin(je)[2:])
380 idx += 1
381 size *= 2
382
383 def pprint_schedule_outer(schedule, n):
384 size = 2
385 idx = 0
386 while size <= n//2:
387 halfsize = size // 2
388 tablestep = n // size
389 print ("size %d halfsize %d tablestep %d" % \
390 (size, halfsize, tablestep))
391 y_r = list(range(0, halfsize))
392 for i in y_r:
393 prefix = "i %d\t" % i
394 jr = list(range(i+halfsize, i+n-halfsize, size))
395 for j in jr:
396 (jl, je), (jh, he) = schedule[idx]
397 print (" %-3d\t%s j=%-2d jh=%-2d "
398 "j[jl=%-2d] j[jh=%-2d]" % \
399 (idx, prefix, j, j+halfsize,
400 jl, jh,
401 ),
402 "end", bin(je)[2:], bin(je)[2:])
403 idx += 1
404 size *= 2
405
406
407 # totally cool *in-place* inverse DCT algorithm using yield REMAPs
408 def inverse_transform2(vec):
409
410 vec = deepcopy(vec)
411
412 # Initialization
413 n = len(vec)
414 print ()
415 print ("inverse_transform2", n)
416 levels = n.bit_length() - 1
417
418 # set up dims
419 xdim = n
420
421 # divide element 0 by 2
422 vec[0] /= 2.0
423
424 # reference (read/write) the in-place data in *reverse-bit-order*
425 ri = list(range(n))
426 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
427
428 # pretend we LDed data in half-swapped *and* bit-reversed order as well
429 # TODO: merge these two
430 vec = [vec[ri[i]] for i in range(n)]
431 vec = halfrev2(vec, True)
432
433 print("inverse_transform2 post-alter", vec)
434
435 # create a cos table: not strictly necessary but here for illustrative
436 # purposes, to demonstrate the point that it really *is* iterative.
437 # this table could be cached and used multiple times rather than
438 # computed every time.
439 ctable = []
440 size = 2
441 while size <= n:
442 halfsize = size // 2
443 for ci in range(halfsize):
444 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
445 ctable.append(coeff)
446 print ("coeff", size, "ci", ci, "k", len(ctable)-1,
447 "i/n", (ci+0.5)/size, coeff)
448 size *= 2
449
450 # set up an SVSHAPE
451 class SVSHAPE:
452 pass
453
454 # XXX TODO
455 if False:
456 # ci schedule
457 SVSHAPE0 = SVSHAPE()
458 SVSHAPE0.lims = [xdim, 4, 0]
459 SVSHAPE0.mode = 0b01
460 SVSHAPE0.submode2 = 0b01
461 SVSHAPE0.skip = 0b10
462 SVSHAPE0.offset = 0 # experiment with different offset, here
463 SVSHAPE0.invxyz = [1,0,1] # inversion if desired
464 # size schedule
465 SVSHAPE1 = SVSHAPE()
466 SVSHAPE1.lims = [xdim, 4, 0]
467 SVSHAPE1.mode = 0b01
468 SVSHAPE1.submode2 = 0b01
469 SVSHAPE1.skip = 0b11
470 SVSHAPE1.offset = 0 # experiment with different offset, here
471 SVSHAPE1.invxyz = [1,0,1] # inversion if desired
472 # k schedule
473 SVSHAPE2 = SVSHAPE()
474 SVSHAPE2.lims = [xdim, 4, 0]
475 SVSHAPE2.mode = 0b01
476 SVSHAPE2.submode2 = 0b01
477 SVSHAPE2.skip = 0b00
478 SVSHAPE2.offset = 0 # experiment with different offset, here
479 SVSHAPE2.invxyz = [1,0,1] # inversion if desired
480
481 # enumerate over the iterator function, getting new indices
482 i0 = iterate_dct_inner_costable_indices(SVSHAPE0)
483 i1 = iterate_dct_inner_costable_indices(SVSHAPE1)
484 i2 = iterate_dct_inner_costable_indices(SVSHAPE2)
485 for ((ci, cie), (size, sze), (k, ke)) in \
486 zip(i0, i1, i2):
487 print ("xform2 cos", ci, size, k)
488 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
489 assert coeff == ctable[k]
490 print ("coeff", size, "ci", ci, "k", k,
491 "i/n", (ci+0.5)/size, coeff,
492 "end", bin(cie), bin(sze), bin(ke))
493 if cie == 0b111: # all loops end
494 break
495
496 # now things are in the right order for the outer butterfly.
497
498 # j schedule
499 SVSHAPE0 = SVSHAPE()
500 SVSHAPE0.lims = [xdim, 0b0000010, 0]
501 SVSHAPE0.submode2 = 0b11
502 SVSHAPE0.mode = 0b11
503 SVSHAPE0.skip = 0b00
504 SVSHAPE0.offset = 0 # experiment with different offset, here
505 SVSHAPE0.invxyz = [1,0,1] # inversion if desired
506 # j+halfstep schedule
507 SVSHAPE1 = SVSHAPE()
508 SVSHAPE1.lims = [xdim, 0b0000010, 0]
509 SVSHAPE1.mode = 0b11
510 SVSHAPE1.submode2 = 0b11
511 SVSHAPE1.skip = 0b01
512 SVSHAPE1.offset = 0 # experiment with different offset, here
513 SVSHAPE1.invxyz = [1,0,1] # inversion if desired
514
515 # enumerate over the iterator function, getting new indices
516 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
517 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
518 for k, ((jl, jle), (jh, jhe)) in enumerate(zip(i0, i1)):
519 print ("itersum jr", jl, jh,
520 "end", bin(jle), bin(jhe),
521 vec[jl], vec[jh], vec[jh]+vec[jl])
522 vec[jh] += vec[jl]
523 size //= 2
524 if jle == 0b111: # all loops end
525 break
526
527 print("transform2 pre-itersum", vec)
528
529 ################
530 # INNER butterfly
531 ################
532
533 # j schedule
534 SVSHAPE0 = SVSHAPE()
535 SVSHAPE0.lims = [xdim, 0b000001, 0]
536 SVSHAPE0.mode = 0b11
537 SVSHAPE0.submode2 = 0b11
538 SVSHAPE0.skip = 0b00
539 SVSHAPE0.offset = 0 # experiment with different offset, here
540 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
541 # j+halfstep schedule
542 SVSHAPE1 = SVSHAPE()
543 SVSHAPE1.lims = [xdim, 0b000001, 0]
544 SVSHAPE1.mode = 0b11
545 SVSHAPE1.submode2 = 0b11
546 SVSHAPE1.skip = 0b01
547 SVSHAPE1.offset = 0 # experiment with different offset, here
548 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
549 # ci schedule
550 SVSHAPE2 = SVSHAPE()
551 SVSHAPE2.lims = [xdim, 0b000001, 0]
552 SVSHAPE2.mode = 0b11
553 SVSHAPE2.submode2 = 0b11
554 SVSHAPE2.skip = 0b10
555 SVSHAPE2.offset = 0 # experiment with different offset, here
556 SVSHAPE2.invxyz = [0,0,0] # inversion if desired
557 # size schedule
558 SVSHAPE3 = SVSHAPE()
559 SVSHAPE3.lims = [xdim, 0b000001, 0]
560 SVSHAPE3.mode = 0b11
561 SVSHAPE3.submode2 = 0b11
562 SVSHAPE3.skip = 0b11
563 SVSHAPE3.offset = 0 # experiment with different offset, here
564 SVSHAPE3.invxyz = [0,0,0] # inversion if desired
565
566 # enumerate over the iterator function, getting new indices
567 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
568 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
569 i2 = iterate_dct_inner_butterfly_indices(SVSHAPE2)
570 i3 = iterate_dct_inner_butterfly_indices(SVSHAPE3)
571 for k, ((jl, jle), (jh, jhe), (ci, cie), (size, sze)) in \
572 enumerate(zip(i0, i1, i2, i3)):
573 t1, t2 = vec[jl], vec[jh]
574 print ("xform2", jl, jh, ci, size)
575 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
576 #assert coeff == ctable[k]
577 vec[jl] = t1 + t2/coeff
578 vec[jh] = t1 - t2/coeff
579 print ("coeff", size, "ci", ci,
580 "jl", jl, "jh", jh,
581 "i/n", (ci+0.5)/size, coeff, "t1/2", t1, t2,
582 "+/-", vec[jl], vec[jh],
583 "end", bin(jle), bin(jhe))
584 if jle == 0b111: # all loops end
585 break
586
587 print("inverse_transform2 result", vec)
588
589 return vec
590
591
592 # totally cool *in-place* DCT algorithm using yield REMAPs
593 def transform2(vec):
594
595 # Initialization
596 n = len(vec)
597 print ()
598 print ("transform2", n)
599 levels = n.bit_length() - 1
600
601 # set up dims
602 xdim = n
603
604 # reference (read/write) the in-place data in *reverse-bit-order*
605 ri = list(range(n))
606 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
607
608 # and pretend we LDed data in half-swapped *and* bit-reversed order as well
609 # TODO: merge these two
610 vec = halfrev2(vec, False)
611 vec = [vec[ri[i]] for i in range(n)]
612
613 # create a cos table: not strictly necessary but here for illustrative
614 # purposes, to demonstrate the point that it really *is* iterative.
615 # this table could be cached and used multiple times rather than
616 # computed every time.
617 ctable = []
618 size = n
619 while size >= 2:
620 halfsize = size // 2
621 for ci in range(halfsize):
622 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
623 ctable.append(coeff)
624 print ("coeff", size, "ci", ci, "k", len(ctable)-1,
625 "i/n", (ci+0.5)/size, coeff)
626 size //= 2
627
628 # set up an SVSHAPE
629 class SVSHAPE:
630 pass
631 # ci schedule
632 SVSHAPE0 = SVSHAPE()
633 SVSHAPE0.lims = [xdim, 4, 0]
634 SVSHAPE0.mode = 0b01
635 SVSHAPE0.submode2 = 0b01
636 SVSHAPE0.skip = 0b10
637 SVSHAPE0.offset = 0 # experiment with different offset, here
638 SVSHAPE0.invxyz = [1,0,0] # inversion if desired
639 # size schedule
640 SVSHAPE1 = SVSHAPE()
641 SVSHAPE1.lims = [xdim, 4, 0]
642 SVSHAPE1.mode = 0b01
643 SVSHAPE1.submode2 = 0b01
644 SVSHAPE1.skip = 0b11
645 SVSHAPE1.offset = 0 # experiment with different offset, here
646 SVSHAPE1.invxyz = [1,0,0] # inversion if desired
647 # k schedule
648 SVSHAPE2 = SVSHAPE()
649 SVSHAPE2.lims = [xdim, 4, 0]
650 SVSHAPE2.mode = 0b01
651 SVSHAPE2.submode2 = 0b01
652 SVSHAPE2.skip = 0b00
653 SVSHAPE2.offset = 0 # experiment with different offset, here
654 SVSHAPE2.invxyz = [1,0,0] # inversion if desired
655
656 # enumerate over the iterator function, getting new indices
657 i0 = iterate_dct_inner_costable_indices(SVSHAPE0)
658 i1 = iterate_dct_inner_costable_indices(SVSHAPE1)
659 i2 = iterate_dct_inner_costable_indices(SVSHAPE2)
660 for ((ci, cie), (size, sze), (k, ke)) in \
661 zip(i0, i1, i2):
662 print ("xform2 cos", ci, size, k)
663 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
664 assert coeff == ctable[k]
665 print ("coeff", size, "ci", ci, "k", k,
666 "i/n", (ci+0.5)/size, coeff,
667 "end", bin(cie), bin(sze), bin(ke))
668 if cie == 0b111: # all loops end
669 break
670
671 ################
672 # INNER butterfly
673 ################
674
675 # j schedule
676 SVSHAPE0 = SVSHAPE()
677 SVSHAPE0.lims = [xdim, 0b000001, 0]
678 SVSHAPE0.mode = 0b01
679 SVSHAPE0.submode2 = 0b01
680 SVSHAPE0.skip = 0b00
681 SVSHAPE0.offset = 0 # experiment with different offset, here
682 SVSHAPE0.invxyz = [1,0,0] # inversion if desired
683 # j+halfstep schedule
684 SVSHAPE1 = SVSHAPE()
685 SVSHAPE1.lims = [xdim, 0b000001, 0]
686 SVSHAPE1.mode = 0b01
687 SVSHAPE1.submode2 = 0b01
688 SVSHAPE1.skip = 0b01
689 SVSHAPE1.offset = 0 # experiment with different offset, here
690 SVSHAPE1.invxyz = [1,0,0] # inversion if desired
691 # ci schedule
692 SVSHAPE2 = SVSHAPE()
693 SVSHAPE2.lims = [xdim, 0b000001, 0]
694 SVSHAPE2.mode = 0b01
695 SVSHAPE2.submode2 = 0b01
696 SVSHAPE2.skip = 0b10
697 SVSHAPE2.offset = 0 # experiment with different offset, here
698 SVSHAPE2.invxyz = [1,0,0] # inversion if desired
699 # size schedule
700 SVSHAPE3 = SVSHAPE()
701 SVSHAPE3.lims = [xdim, 0b000001, 0]
702 SVSHAPE3.mode = 0b01
703 SVSHAPE3.submode2 = 0b01
704 SVSHAPE3.skip = 0b11
705 SVSHAPE3.offset = 0 # experiment with different offset, here
706 SVSHAPE3.invxyz = [1,0,0] # inversion if desired
707
708 # enumerate over the iterator function, getting new indices
709 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
710 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
711 i2 = iterate_dct_inner_butterfly_indices(SVSHAPE2)
712 i3 = iterate_dct_inner_butterfly_indices(SVSHAPE3)
713 for k, ((jl, jle), (jh, jhe), (ci, cie), (size, sze)) in \
714 enumerate(zip(i0, i1, i2, i3)):
715 t1, t2 = vec[jl], vec[jh]
716 print ("xform2", jl, jh, ci, size)
717 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
718 #assert coeff == ctable[k]
719 vec[jl] = t1 + t2
720 vec[jh] = (t1 - t2) * (1/coeff)
721 print ("coeff", size, "ci", ci,
722 "jl", jl, "jh", jh,
723 "i/n", (ci+0.5)/size, coeff, vec[jl],
724 vec[jh],
725 "end", bin(jle), bin(jhe))
726 if jle == 0b111: # all loops end
727 break
728
729 print("transform2 pre-itersum", vec)
730
731 # now things are in the right order for the outer butterfly.
732
733 # j schedule
734 SVSHAPE0 = SVSHAPE()
735 SVSHAPE0.lims = [xdim, 0b0000010, 0]
736 SVSHAPE0.submode2 = 0b100
737 SVSHAPE0.mode = 0b01
738 SVSHAPE0.skip = 0b00
739 SVSHAPE0.offset = 0 # experiment with different offset, here
740 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
741 # j+halfstep schedule
742 SVSHAPE1 = SVSHAPE()
743 SVSHAPE1.lims = [xdim, 0b0000010, 0]
744 SVSHAPE1.mode = 0b01
745 SVSHAPE1.submode2 = 0b100
746 SVSHAPE1.skip = 0b01
747 SVSHAPE1.offset = 0 # experiment with different offset, here
748 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
749
750 # enumerate over the iterator function, getting new indices
751 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
752 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
753 for k, ((jl, jle), (jh, jhe)) in enumerate(zip(i0, i1)):
754 print ("itersum jr", jl, jh,
755 "end", bin(jle), bin(jhe))
756 vec[jl] += vec[jh]
757 size //= 2
758 if jle == 0b111: # all loops end
759 break
760
761 print("transform2 result", vec)
762
763 return vec
764
765
766 def demo_idct():
767 # set the dimension sizes here
768 n = 8
769 xdim = n
770 ydim = 0 # not needed
771 zdim = 0 # again, not needed
772
773 # set up an SVSHAPE
774 class SVSHAPE:
775 pass
776
777 ################
778 # outer butterfly
779 ################
780
781 # j schedule
782 SVSHAPE0 = SVSHAPE()
783 SVSHAPE0.lims = [xdim, 0b0000010, 0]
784 SVSHAPE0.submode2 = 0b11
785 SVSHAPE0.mode = 0b11
786 SVSHAPE0.skip = 0b00
787 SVSHAPE0.offset = 0 # experiment with different offset, here
788 SVSHAPE0.invxyz = [1,0,1] # inversion if desired
789 # j+halfstep schedule
790 SVSHAPE1 = SVSHAPE()
791 SVSHAPE1.lims = [xdim, 0b0000010, 0]
792 SVSHAPE1.mode = 0b11
793 SVSHAPE1.submode2 = 0b11
794 SVSHAPE1.skip = 0b01
795 SVSHAPE1.offset = 0 # experiment with different offset, here
796 SVSHAPE1.invxyz = [1,0,1] # inversion if desired
797
798 # enumerate over the iterator function, getting new indices
799 schedule = []
800 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
801 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
802 for idx, (jl, jh) in enumerate(zip(i0, i1)):
803 schedule.append((jl, jh))
804 if jl[1] == 0b111: # end
805 break
806
807 # ok now pretty-print the results, with some debug output
808 print ("outer i-dct butterfly")
809 pprint_schedule_outer(schedule, n)
810
811 ################
812 # INNER butterfly
813 ################
814
815 # j schedule
816 SVSHAPE0 = SVSHAPE()
817 SVSHAPE0.lims = [xdim, 0b000001, 0]
818 SVSHAPE0.mode = 0b11
819 SVSHAPE0.submode2 = 0b11
820 SVSHAPE0.skip = 0b00
821 SVSHAPE0.offset = 0 # experiment with different offset, here
822 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
823 # j+halfstep schedule
824 SVSHAPE1 = SVSHAPE()
825 SVSHAPE1.lims = [xdim, 0b000001, 0]
826 SVSHAPE1.mode = 0b11
827 SVSHAPE1.submode2 = 0b11
828 SVSHAPE1.skip = 0b01
829 SVSHAPE1.offset = 0 # experiment with different offset, here
830 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
831
832 # enumerate over the iterator function, getting new indices
833 schedule = []
834 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
835 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
836 for idx, (jl, jh) in enumerate(zip(i0, i1)):
837 schedule.append((jl, jh))
838 if jl[1] == 0b111: # end
839 break
840
841 # ok now pretty-print the results, with some debug output
842 print ("inner butterfly")
843 pprint_schedule(schedule, n)
844 print ("")
845
846 return
847
848 # for DCT half-swap LDs
849 # j schedule
850 SVSHAPE0 = SVSHAPE()
851 SVSHAPE0.lims = [xdim, 0b000101, zdim]
852 SVSHAPE0.mode = 0b01
853 SVSHAPE0.submode2 = 0b01
854 SVSHAPE0.skip = 0
855 SVSHAPE0.offset = 0 # experiment with different offset, here
856 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
857
858 # expected results
859 levels = n.bit_length() - 1
860 avi = list(range(n))
861 ri = [reverse_bits(i, levels) for i in range(n)]
862 av = halfrev2(avi, False)
863 av = [av[ri[i]] for i in range(n)]
864
865 i0 = iterate_dct_inner_halfswap_loadstore(SVSHAPE0)
866 for idx, (jl) in enumerate(i0):
867 print ("inverse half-swap ld", idx, jl, av[idx])
868 if jl[1] == 0b111: # end
869 break
870
871
872 def demo_dct():
873 # set the dimension sizes here
874 n = 8
875 xdim = n
876 ydim = 0 # not needed
877 zdim = 0 # again, not needed
878
879
880 ################
881 # INNER butterfly
882 ################
883
884 # set up an SVSHAPE
885 class SVSHAPE:
886 pass
887 # j schedule
888 SVSHAPE0 = SVSHAPE()
889 SVSHAPE0.lims = [xdim, 0b000001, zdim]
890 SVSHAPE0.submode2 = 0b010
891 SVSHAPE0.mode = 0b01
892 SVSHAPE0.skip = 0b00
893 SVSHAPE0.offset = 0 # experiment with different offset, here
894 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
895 # j+halfstep schedule
896 SVSHAPE1 = SVSHAPE()
897 SVSHAPE1.lims = [xdim, 0b000001, zdim]
898 SVSHAPE1.submode2 = 0b010
899 SVSHAPE1.mode = 0b01
900 SVSHAPE1.skip = 0b01
901 SVSHAPE1.offset = 0 # experiment with different offset, here
902 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
903
904 # enumerate over the iterator function, getting new indices
905 schedule = []
906 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
907 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
908 for idx, (jl, jh) in enumerate(zip(i0, i1)):
909 schedule.append((jl, jh))
910 if jl[1] == 0b111: # end
911 break
912
913 # ok now pretty-print the results, with some debug output
914 print ("inner butterfly")
915 pprint_schedule(schedule, n)
916 print ("")
917
918 ################
919 # outer butterfly
920 ################
921
922 # j schedule
923 SVSHAPE0 = SVSHAPE()
924 SVSHAPE0.lims = [xdim, 0b000010, zdim]
925 SVSHAPE0.mode = 0b01
926 SVSHAPE0.submode2 = 0b100
927 SVSHAPE0.skip = 0b10
928 SVSHAPE0.offset = 0 # experiment with different offset, here
929 SVSHAPE0.invxyz = [1,0,0] # inversion if desired
930 # j+halfstep schedule
931 SVSHAPE1 = SVSHAPE()
932 SVSHAPE1.lims = [xdim, 0b000010, zdim]
933 SVSHAPE1.mode = 0b01
934 SVSHAPE1.submode2 = 0b100
935 SVSHAPE1.skip = 0b11
936 SVSHAPE1.offset = 0 # experiment with different offset, here
937 SVSHAPE1.invxyz = [1,0,0] # inversion if desired
938
939 # enumerate over the iterator function, getting new indices
940 schedule = []
941 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
942 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
943 for idx, (jl, jh) in enumerate(zip(i0, i1)):
944 schedule.append((jl, jh))
945 if jl[1] == 0b111: # end
946 break
947
948 # ok now pretty-print the results, with some debug output
949 print ("outer butterfly")
950 pprint_schedule_outer(schedule, n)
951
952 # for DCT half-swap LDs
953 # j schedule
954 SVSHAPE0 = SVSHAPE()
955 SVSHAPE0.lims = [xdim, 0b000101, zdim]
956 SVSHAPE0.mode = 0b01
957 SVSHAPE0.submode2 = 0
958 SVSHAPE0.skip = 0
959 SVSHAPE0.offset = 0 # experiment with different offset, here
960 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
961
962 # expected results
963 levels = n.bit_length() - 1
964 avi = list(range(n))
965 ri = [reverse_bits(i, levels) for i in range(n)]
966 av = halfrev2(avi, False)
967 av = [av[ri[i]] for i in range(n)]
968
969
970 i0 = iterate_dct_inner_halfswap_loadstore(SVSHAPE0)
971 for idx, (jl) in enumerate(i0):
972 print ("inverse half-swap ld", idx, jl, av[idx])
973 if jl[1] == 0b111: # end
974 break
975
976
977 # run the demo
978 if __name__ == '__main__':
979 demo_dct()
980 demo_idct()