start adding i-dct schedule with debug-print, hard to tell what is going on
[openpower-isa.git] / src / openpower / decoder / isa / remap_dct_yield.py
1 # DCT "REMAP" scheduler
2 #
3 # Modifications made to create an in-place iterative DCT:
4 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
5 #
6 # SPDX: LGPLv3+
7 #
8 # Original fastdctlee.py by Nayuki:
9 # Copyright (c) 2020 Project Nayuki. (MIT License)
10 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
11
12 import math
13
14 # bits of the integer 'val'.
15 def reverse_bits(val, width):
16 result = 0
17 for _ in range(width):
18 result = (result << 1) | (val & 1)
19 val >>= 1
20 return result
21
22
23 # iterative version of [recursively-applied] half-rev.
24 # relies on the list lengths being power-of-two and the fact
25 # that bit-inversion of a list of binary numbers is the same
26 # as reversing the order of the list
27 # this version is dead easy to implement in hardware.
28 # a big surprise is that the half-reversal can be done with
29 # such a simple XOR. the inverse operation is slightly trickier
30 def halfrev2(vec, pre_rev=True):
31 res = []
32 for i in range(len(vec)):
33 if pre_rev:
34 res.append(i ^ (i>>1))
35 else:
36 ri = i
37 bl = i.bit_length()
38 for ji in range(1, bl):
39 ri ^= (i >> ji)
40 res.append(vec[ri])
41 return res
42
43
44 # python "yield" can be iterated. use this to make it clear how
45 # the indices are generated by using natural-looking nested loops
46 def iterate_dct_inner_halfswap_loadstore(SVSHAPE):
47 # get indices to iterate over, in the required order
48 n = SVSHAPE.lims[0]
49 mode = SVSHAPE.lims[1]
50 print ("inner halfswap loadstore", n, mode, SVSHAPE.skip)
51
52 # reference list for not needing to do data-swaps, just swap what
53 # *indices* are referenced (two levels of indirection at the moment)
54 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
55 ji = list(range(n))
56
57 levels = n.bit_length() - 1
58 ji = halfrev2(ji, False)
59 if False: # swap: TODO, add extra bit-reverse mode
60 ri = [reverse_bits(i, levels) for i in range(n)]
61 ji = [ji[ri[i]] for i in range(n)]
62
63
64 # invert order if requested
65 if SVSHAPE.invxyz[0]:
66 ji.reverse()
67
68 for i, jl in enumerate(ji):
69 y_end = jl == ji[-1]
70 yield jl, (0b111 if y_end else 0b000)
71
72
73 # python "yield" can be iterated. use this to make it clear how
74 # the indices are generated by using natural-looking nested loops
75 def iterate_dct_inner_costable_indices(SVSHAPE):
76 # get indices to iterate over, in the required order
77 n = SVSHAPE.lims[0]
78 mode = SVSHAPE.lims[1]
79 print ("inner costable", mode)
80 # creating lists of indices to iterate over in each dimension
81 # has to be done dynamically, because it depends on the size
82 # first, the size-based loop (which can be done statically)
83 x_r = []
84 size = 2
85 while size <= n:
86 x_r.append(size)
87 size *= 2
88 # invert order if requested
89 if SVSHAPE.invxyz[0]:
90 x_r.reverse()
91
92 if len(x_r) == 0:
93 return
94
95 #print ("ri", ri)
96 #print ("ji", ji)
97
98 # start an infinite (wrapping) loop
99 skip = 0
100 z_end = 1 # doesn't exist in this, only 2 loops
101 k = 0
102 while True:
103 for size in x_r: # loop over 3rd order dimension (size)
104 x_end = size == x_r[-1]
105 # y_r schedule depends on size
106 halfsize = size // 2
107 y_r = []
108 for i in range(0, n, size):
109 y_r.append(i)
110 # invert if requested
111 if SVSHAPE.invxyz[1]: y_r.reverse()
112 # two lists of half-range indices, e.g. j 0123, jr 7654
113 j = list(range(0, halfsize))
114 # invert if requested
115 if SVSHAPE.invxyz[2]: j_r.reverse()
116 #print ("xform jr", jr)
117 # loop over 1st order dimension
118 for ci, jl in enumerate(j):
119 y_end = jl == j[-1]
120 # now depending on MODE return the index. inner butterfly
121 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
122 result = k # offset into COS table
123 elif SVSHAPE.skip == 0b10: #
124 result = ci # coefficient helper
125 elif SVSHAPE.skip == 0b11: #
126 result = size # coefficient helper
127 loopends = (z_end |
128 ((y_end and z_end)<<1) |
129 ((y_end and x_end and z_end)<<2))
130
131 yield result + SVSHAPE.offset, loopends
132 k += 1
133
134 # python "yield" can be iterated. use this to make it clear how
135 # the indices are generated by using natural-looking nested loops
136 def iterate_dct_inner_butterfly_indices(SVSHAPE):
137 # get indices to iterate over, in the required order
138 n = SVSHAPE.lims[0]
139 mode = SVSHAPE.lims[1]
140 #print ("inner butterfly", mode, SVSHAPE.skip)
141 # creating lists of indices to iterate over in each dimension
142 # has to be done dynamically, because it depends on the size
143 # first, the size-based loop (which can be done statically)
144 x_r = []
145 size = 2
146 while size <= n:
147 x_r.append(size)
148 size *= 2
149 # invert order if requested
150 if SVSHAPE.invxyz[0]:
151 x_r.reverse()
152
153 if len(x_r) == 0:
154 return
155
156 # reference (read/write) the in-place data in *reverse-bit-order*
157 ri = list(range(n))
158 if SVSHAPE.submode2 == 0b01:
159 levels = n.bit_length() - 1
160 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
161
162 # reference list for not needing to do data-swaps, just swap what
163 # *indices* are referenced (two levels of indirection at the moment)
164 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
165 ji = list(range(n))
166 inplace_mode = True
167 if inplace_mode and SVSHAPE.submode2 == 0b01:
168 #print ("inplace mode")
169 ji = halfrev2(ji, True)
170
171 print ("ri", ri)
172 print ("ji", ji)
173
174 # start an infinite (wrapping) loop
175 while True:
176 k = 0
177 k_start = 0
178 for size in x_r: # loop over 3rd order dimension (size)
179 x_end = size == x_r[-1]
180 # y_r schedule depends on size
181 halfsize = size // 2
182 y_r = []
183 for i in range(0, n, size):
184 y_r.append(i)
185 # invert if requested
186 if SVSHAPE.invxyz[1]: y_r.reverse()
187 for i in y_r: # loop over 2nd order dimension
188 y_end = i == y_r[-1]
189 # two lists of half-range indices, e.g. j 0123, jr 7654
190 j = list(range(i, i + halfsize))
191 jr = list(range(i+halfsize, i + size))
192 jr.reverse()
193 # invert if requested
194 if SVSHAPE.invxyz[2]: j_r.reverse()
195 hz2 = halfsize // 2 # zero stops reversing 1-item lists
196 #print ("xform jr", jr)
197 # loop over 1st order dimension
198 k = k_start
199 for ci, (jl, jh) in enumerate(zip(j, jr)):
200 z_end = jl == j[-1]
201 # now depending on MODE return the index. inner butterfly
202 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
203 result = ri[ji[jl]] # lower half
204 elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
205 result = ri[ji[jh]] # upper half
206 elif mode == 4:
207 # COS table pre-generated mode
208 if SVSHAPE.skip == 0b10: #
209 result = k # cos table offset
210 else: # mode 2
211 # COS table generated on-demand ("Vertical-First") mode
212 if SVSHAPE.skip == 0b10: #
213 result = ci # coefficient helper
214 elif SVSHAPE.skip == 0b11: #
215 result = size # coefficient helper
216 loopends = (z_end |
217 ((y_end and z_end)<<1) |
218 ((y_end and x_end and z_end)<<2))
219
220 yield result + SVSHAPE.offset, loopends
221 k += 1
222
223 # now in-place swap
224 if inplace_mode:
225 for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
226 jlh = jl+halfsize
227 #print ("inplace swap", jh, jlh)
228 tmp1, tmp2 = ji[jlh], ji[jh]
229 ji[jlh], ji[jh] = tmp2, tmp1
230
231 # new k_start point for cos tables( runs inside x_r loop NOT i loop)
232 k_start += halfsize
233
234
235 # python "yield" can be iterated. use this to make it clear how
236 # the indices are generated by using natural-looking nested loops
237 def iterate_dct_outer_butterfly_indices(SVSHAPE):
238 # get indices to iterate over, in the required order
239 n = SVSHAPE.lims[0]
240 mode = SVSHAPE.lims[1]
241 # creating lists of indices to iterate over in each dimension
242 # has to be done dynamically, because it depends on the size
243 # first, the size-based loop (which can be done statically)
244 x_r = []
245 size = n // 2
246 while size >= 2:
247 x_r.append(size)
248 size //= 2
249 # invert order if requested
250 if SVSHAPE.invxyz[0]:
251 x_r.reverse()
252
253 if len(x_r) == 0:
254 return
255
256 #print ("outer butterfly")
257
258 # I-DCT, reference (read/write) the in-place data in *reverse-bit-order*
259 ri = list(range(n))
260 if SVSHAPE.submode2 == 0b11:
261 levels = n.bit_length() - 1
262 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
263
264 # reference list for not needing to do data-swaps, just swap what
265 # *indices* are referenced (two levels of indirection at the moment)
266 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
267 ji = list(range(n))
268 inplace_mode = False # need the space... SVSHAPE.skip in [0b10, 0b11]
269 if SVSHAPE.submode2 == 0b11:
270 ji = halfrev2(ji, False)
271
272 print ("ri", ri)
273 print ("ji", ji)
274
275 # start an infinite (wrapping) loop
276 while True:
277 k = 0
278 k_start = 0
279 for size in x_r: # loop over 3rd order dimension (size)
280 halfsize = size//2
281 x_end = size == x_r[-1]
282 y_r = list(range(0, halfsize))
283 print ("itersum", halfsize, size, y_r, "invert", SVSHAPE.invxyz[1])
284 # invert if requested
285 if SVSHAPE.invxyz[1]: y_r.reverse()
286 for i in y_r: # loop over 2nd order dimension
287 y_end = i == y_r[-1]
288 # one list to create iterative-sum schedule
289 jr = list(range(i+halfsize, i+n-halfsize, size))
290 # invert if requested
291 if SVSHAPE.invxyz[2]: jr.reverse()
292 print ("itersum jr", i+halfsize, i+size, jr,
293 "invert", SVSHAPE.invxyz[2])
294 hz2 = halfsize // 2 # zero stops reversing 1-item lists
295 k = k_start
296 for ci, jh in enumerate(jr): # loop over 1st order dimension
297 z_end = jh == jr[-1]
298 #print (" itersum", size, i, jh, jh+size)
299 if mode == 4:
300 # COS table pre-generated mode
301 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
302 result = ri[ji[jh]] # lower half
303 elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
304 result = ri[ji[jh+size]] # upper half
305 elif SVSHAPE.skip == 0b10: #
306 result = k # cos table offset
307 else:
308 # COS table generated on-demand ("Vertical-First") mode
309 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
310 result = ri[ji[jh]] # lower half
311 elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
312 result = ri[ji[jh+size]] # upper half
313 elif SVSHAPE.skip == 0b10: #
314 result = ci # coefficient helper
315 elif SVSHAPE.skip == 0b11: #
316 result = size # coefficient helper
317 loopends = (z_end |
318 ((y_end and z_end)<<1) |
319 ((y_end and x_end and z_end)<<2))
320
321 yield result + SVSHAPE.offset, loopends
322 k += 1
323
324 # now in-place swap (disabled)
325 if False and SVSHAPE.submode2 == 0b11:
326 j = list(range(i, i + halfsize))
327 jr = list(range(i+halfsize, i + size))
328 jr.reverse()
329 for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
330 jlh = jl+halfsize
331 tmp1, tmp2 = ji[jlh], ji[jh]
332 print ("inplace swap", jh, jlh, "actual", tmp1, tmp2)
333 ji[jlh], ji[jh] = tmp2, tmp1
334
335 # new k_start point for cos tables( runs inside x_r loop NOT i loop)
336 k_start += halfsize
337
338
339 def pprint_schedule(schedule, n):
340 size = 2
341 idx = 0
342 while size <= n:
343 halfsize = size // 2
344 tablestep = n // size
345 print ("size %d halfsize %d tablestep %d" % \
346 (size, halfsize, tablestep))
347 for i in range(0, n, size):
348 prefix = "i %d\t" % i
349 for j in range(i, i + halfsize):
350 (jl, je), (jh, he) = schedule[idx]
351 print (" %-3d\t%s j=%-2d jh=%-2d "
352 "j[jl=%-2d] j[jh=%-2d]" % \
353 (idx, prefix, j, j+halfsize,
354 jl, jh,
355 ),
356 "end", bin(je)[2:], bin(je)[2:])
357 idx += 1
358 size *= 2
359
360 def pprint_schedule_outer(schedule, n):
361 size = 2
362 idx = 0
363 while size <= n//2:
364 halfsize = size // 2
365 tablestep = n // size
366 print ("size %d halfsize %d tablestep %d" % \
367 (size, halfsize, tablestep))
368 y_r = list(range(0, halfsize))
369 for i in y_r:
370 prefix = "i %d\t" % i
371 jr = list(range(i+halfsize, i+n-halfsize, size))
372 for j in jr:
373 (jl, je), (jh, he) = schedule[idx]
374 print (" %-3d\t%s j=%-2d jh=%-2d "
375 "j[jl=%-2d] j[jh=%-2d]" % \
376 (idx, prefix, j, j+halfsize,
377 jl, jh,
378 ),
379 "end", bin(je)[2:], bin(je)[2:])
380 idx += 1
381 size *= 2
382
383
384 # totally cool *in-place* DCT algorithm using yield REMAPs
385 def transform2(vec):
386
387 # Initialization
388 n = len(vec)
389 print ()
390 print ("transform2", n)
391 levels = n.bit_length() - 1
392
393 # set up dims
394 xdim = n
395
396 # reference (read/write) the in-place data in *reverse-bit-order*
397 ri = list(range(n))
398 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
399
400 # and pretend we LDed data in half-swapped *and* bit-reversed order as well
401 # TODO: merge these two
402 vec = halfrev2(vec, False)
403 vec = [vec[ri[i]] for i in range(n)]
404
405 # create a cos table: not strictly necessary but here for illustrative
406 # purposes, to demonstrate the point that it really *is* iterative.
407 # this table could be cached and used multiple times rather than
408 # computed every time.
409 ctable = []
410 size = n
411 while size >= 2:
412 halfsize = size // 2
413 for ci in range(halfsize):
414 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
415 ctable.append(coeff)
416 print ("coeff", size, "ci", ci, "k", len(ctable)-1,
417 "i/n", (ci+0.5)/size, coeff)
418 size //= 2
419
420 # set up an SVSHAPE
421 class SVSHAPE:
422 pass
423 # ci schedule
424 SVSHAPE0 = SVSHAPE()
425 SVSHAPE0.lims = [xdim, 4, 0]
426 SVSHAPE0.mode = 0b01
427 SVSHAPE0.submode2 = 0b01
428 SVSHAPE0.skip = 0b10
429 SVSHAPE0.offset = 0 # experiment with different offset, here
430 SVSHAPE0.invxyz = [1,0,0] # inversion if desired
431 # size schedule
432 SVSHAPE1 = SVSHAPE()
433 SVSHAPE1.lims = [xdim, 4, 0]
434 SVSHAPE1.mode = 0b01
435 SVSHAPE1.submode2 = 0b01
436 SVSHAPE1.skip = 0b11
437 SVSHAPE1.offset = 0 # experiment with different offset, here
438 SVSHAPE1.invxyz = [1,0,0] # inversion if desired
439 # k schedule
440 SVSHAPE2 = SVSHAPE()
441 SVSHAPE2.lims = [xdim, 4, 0]
442 SVSHAPE2.mode = 0b01
443 SVSHAPE2.submode2 = 0b01
444 SVSHAPE2.skip = 0b00
445 SVSHAPE2.offset = 0 # experiment with different offset, here
446 SVSHAPE2.invxyz = [1,0,0] # inversion if desired
447
448 # enumerate over the iterator function, getting new indices
449 i0 = iterate_dct_inner_costable_indices(SVSHAPE0)
450 i1 = iterate_dct_inner_costable_indices(SVSHAPE1)
451 i2 = iterate_dct_inner_costable_indices(SVSHAPE2)
452 for ((ci, cie), (size, sze), (k, ke)) in \
453 zip(i0, i1, i2):
454 print ("xform2 cos", ci, size, k)
455 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
456 assert coeff == ctable[k]
457 print ("coeff", size, "ci", ci, "k", k,
458 "i/n", (ci+0.5)/size, coeff,
459 "end", bin(cie), bin(sze), bin(ke))
460 if cie == 0b111: # all loops end
461 break
462
463 ################
464 # INNER butterfly
465 ################
466
467 # j schedule
468 SVSHAPE0 = SVSHAPE()
469 SVSHAPE0.lims = [xdim, 0b000001, 0]
470 SVSHAPE0.mode = 0b01
471 SVSHAPE0.submode2 = 0b01
472 SVSHAPE0.skip = 0b00
473 SVSHAPE0.offset = 0 # experiment with different offset, here
474 SVSHAPE0.invxyz = [1,0,0] # inversion if desired
475 # j+halfstep schedule
476 SVSHAPE1 = SVSHAPE()
477 SVSHAPE1.lims = [xdim, 0b000001, 0]
478 SVSHAPE1.mode = 0b01
479 SVSHAPE1.submode2 = 0b01
480 SVSHAPE1.skip = 0b01
481 SVSHAPE1.offset = 0 # experiment with different offset, here
482 SVSHAPE1.invxyz = [1,0,0] # inversion if desired
483 # ci schedule
484 SVSHAPE2 = SVSHAPE()
485 SVSHAPE2.lims = [xdim, 0b000001, 0]
486 SVSHAPE2.mode = 0b01
487 SVSHAPE2.submode2 = 0b01
488 SVSHAPE2.skip = 0b10
489 SVSHAPE2.offset = 0 # experiment with different offset, here
490 SVSHAPE2.invxyz = [1,0,0] # inversion if desired
491 # size schedule
492 SVSHAPE3 = SVSHAPE()
493 SVSHAPE3.lims = [xdim, 0b000001, 0]
494 SVSHAPE3.mode = 0b01
495 SVSHAPE3.submode2 = 0b01
496 SVSHAPE3.skip = 0b11
497 SVSHAPE3.offset = 0 # experiment with different offset, here
498 SVSHAPE3.invxyz = [1,0,0] # inversion if desired
499
500 # enumerate over the iterator function, getting new indices
501 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
502 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
503 i2 = iterate_dct_inner_butterfly_indices(SVSHAPE2)
504 i3 = iterate_dct_inner_butterfly_indices(SVSHAPE3)
505 for k, ((jl, jle), (jh, jhe), (ci, cie), (size, sze)) in \
506 enumerate(zip(i0, i1, i2, i3)):
507 t1, t2 = vec[jl], vec[jh]
508 print ("xform2", jl, jh, ci, size)
509 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
510 #assert coeff == ctable[k]
511 vec[jl] = t1 + t2
512 vec[jh] = (t1 - t2) * (1/coeff)
513 print ("coeff", size, "ci", ci,
514 "jl", jl, "jh", jh,
515 "i/n", (ci+0.5)/size, coeff, vec[jl],
516 vec[jh],
517 "end", bin(jle), bin(jhe))
518 if jle == 0b111: # all loops end
519 break
520
521 print("transform2 pre-itersum", vec)
522
523 # now things are in the right order for the outer butterfly.
524
525 # j schedule
526 SVSHAPE0 = SVSHAPE()
527 SVSHAPE0.lims = [xdim, 0b0000010, 0]
528 SVSHAPE0.submode2 = 0b100
529 SVSHAPE0.mode = 0b01
530 SVSHAPE0.skip = 0b00
531 SVSHAPE0.offset = 0 # experiment with different offset, here
532 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
533 # j+halfstep schedule
534 SVSHAPE1 = SVSHAPE()
535 SVSHAPE1.lims = [xdim, 0b0000010, 0]
536 SVSHAPE1.mode = 0b01
537 SVSHAPE1.submode2 = 0b100
538 SVSHAPE1.skip = 0b01
539 SVSHAPE1.offset = 0 # experiment with different offset, here
540 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
541
542 # enumerate over the iterator function, getting new indices
543 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
544 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
545 for k, ((jl, jle), (jh, jhe)) in enumerate(zip(i0, i1)):
546 print ("itersum jr", jl, jh,
547 "end", bin(jle), bin(jhe))
548 vec[jl] += vec[jh]
549 size //= 2
550 if jle == 0b111: # all loops end
551 break
552
553 print("transform2 result", vec)
554
555 return vec
556
557
558 def demo_idct():
559 # set the dimension sizes here
560 n = 8
561 xdim = n
562 ydim = 0 # not needed
563 zdim = 0 # again, not needed
564
565 # set up an SVSHAPE
566 class SVSHAPE:
567 pass
568
569 ################
570 # outer butterfly
571 ################
572
573 # j schedule
574 SVSHAPE0 = SVSHAPE()
575 SVSHAPE0.lims = [xdim, 0b0000010, 0]
576 SVSHAPE0.submode2 = 0b100
577 SVSHAPE0.mode = 0b11
578 SVSHAPE0.skip = 0b00
579 SVSHAPE0.offset = 0 # experiment with different offset, here
580 SVSHAPE0.invxyz = [1,0,1] # inversion if desired
581 # j+halfstep schedule
582 SVSHAPE1 = SVSHAPE()
583 SVSHAPE1.lims = [xdim, 0b0000010, 0]
584 SVSHAPE1.mode = 0b11
585 SVSHAPE1.submode2 = 0b100
586 SVSHAPE1.skip = 0b01
587 SVSHAPE1.offset = 0 # experiment with different offset, here
588 SVSHAPE1.invxyz = [1,0,1] # inversion if desired
589
590 # enumerate over the iterator function, getting new indices
591 schedule = []
592 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
593 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
594 for idx, (jl, jh) in enumerate(zip(i0, i1)):
595 schedule.append((jl, jh))
596 if jl[1] == 0b111: # end
597 break
598
599 # ok now pretty-print the results, with some debug output
600 print ("outer i-dct butterfly")
601 pprint_schedule_outer(schedule, n)
602
603 ################
604 # INNER butterfly
605 ################
606
607 # j schedule
608 SVSHAPE0 = SVSHAPE()
609 SVSHAPE0.lims = [xdim, 0b000001, 0]
610 SVSHAPE0.mode = 0b11
611 SVSHAPE0.submode2 = 0b11
612 SVSHAPE0.skip = 0b00
613 SVSHAPE0.offset = 0 # experiment with different offset, here
614 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
615 # j+halfstep schedule
616 SVSHAPE1 = SVSHAPE()
617 SVSHAPE1.lims = [xdim, 0b000001, 0]
618 SVSHAPE1.mode = 0b11
619 SVSHAPE1.submode2 = 0b11
620 SVSHAPE1.skip = 0b01
621 SVSHAPE1.offset = 0 # experiment with different offset, here
622 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
623
624 # enumerate over the iterator function, getting new indices
625 schedule = []
626 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
627 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
628 for idx, (jl, jh) in enumerate(zip(i0, i1)):
629 schedule.append((jl, jh))
630 if jl[1] == 0b111: # end
631 break
632
633 # ok now pretty-print the results, with some debug output
634 print ("inner butterfly")
635 pprint_schedule(schedule, n)
636 print ("")
637
638 return
639
640 # for DCT half-swap LDs
641 # j schedule
642 SVSHAPE0 = SVSHAPE()
643 SVSHAPE0.lims = [xdim, 0b000101, zdim]
644 SVSHAPE0.mode = 0b01
645 SVSHAPE0.submode2 = 0b01
646 SVSHAPE0.skip = 0
647 SVSHAPE0.offset = 0 # experiment with different offset, here
648 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
649
650 # expected results
651 levels = n.bit_length() - 1
652 avi = list(range(n))
653 ri = [reverse_bits(i, levels) for i in range(n)]
654 av = halfrev2(avi, False)
655 av = [av[ri[i]] for i in range(n)]
656
657 i0 = iterate_dct_inner_halfswap_loadstore(SVSHAPE0)
658 for idx, (jl) in enumerate(i0):
659 print ("inverse half-swap ld", idx, jl, av[idx])
660 if jl[1] == 0b111: # end
661 break
662
663
664 def demo_dct():
665 # set the dimension sizes here
666 n = 8
667 xdim = n
668 ydim = 0 # not needed
669 zdim = 0 # again, not needed
670
671
672 ################
673 # INNER butterfly
674 ################
675
676 # set up an SVSHAPE
677 class SVSHAPE:
678 pass
679 # j schedule
680 SVSHAPE0 = SVSHAPE()
681 SVSHAPE0.lims = [xdim, 0b000001, zdim]
682 SVSHAPE0.submode2 = 0b010
683 SVSHAPE0.mode = 0b01
684 SVSHAPE0.skip = 0b00
685 SVSHAPE0.offset = 0 # experiment with different offset, here
686 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
687 # j+halfstep schedule
688 SVSHAPE1 = SVSHAPE()
689 SVSHAPE1.lims = [xdim, 0b000001, zdim]
690 SVSHAPE1.submode2 = 0b010
691 SVSHAPE1.mode = 0b01
692 SVSHAPE1.skip = 0b01
693 SVSHAPE1.offset = 0 # experiment with different offset, here
694 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
695
696 # enumerate over the iterator function, getting new indices
697 schedule = []
698 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
699 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
700 for idx, (jl, jh) in enumerate(zip(i0, i1)):
701 schedule.append((jl, jh))
702 if jl[1] == 0b111: # end
703 break
704
705 # ok now pretty-print the results, with some debug output
706 print ("inner butterfly")
707 pprint_schedule(schedule, n)
708 print ("")
709
710 ################
711 # outer butterfly
712 ################
713
714 # j schedule
715 SVSHAPE0 = SVSHAPE()
716 SVSHAPE0.lims = [xdim, 0b000010, zdim]
717 SVSHAPE0.mode = 0b01
718 SVSHAPE0.submode2 = 0b100
719 SVSHAPE0.skip = 0b10
720 SVSHAPE0.offset = 0 # experiment with different offset, here
721 SVSHAPE0.invxyz = [1,0,0] # inversion if desired
722 # j+halfstep schedule
723 SVSHAPE1 = SVSHAPE()
724 SVSHAPE1.lims = [xdim, 0b000010, zdim]
725 SVSHAPE1.mode = 0b01
726 SVSHAPE1.submode2 = 0b100
727 SVSHAPE1.skip = 0b11
728 SVSHAPE1.offset = 0 # experiment with different offset, here
729 SVSHAPE1.invxyz = [1,0,0] # inversion if desired
730
731 # enumerate over the iterator function, getting new indices
732 schedule = []
733 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
734 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
735 for idx, (jl, jh) in enumerate(zip(i0, i1)):
736 schedule.append((jl, jh))
737 if jl[1] == 0b111: # end
738 break
739
740 # ok now pretty-print the results, with some debug output
741 print ("outer butterfly")
742 pprint_schedule_outer(schedule, n)
743
744 # for DCT half-swap LDs
745 # j schedule
746 SVSHAPE0 = SVSHAPE()
747 SVSHAPE0.lims = [xdim, 0b000101, zdim]
748 SVSHAPE0.mode = 0b01
749 SVSHAPE0.submode2 = 0
750 SVSHAPE0.skip = 0
751 SVSHAPE0.offset = 0 # experiment with different offset, here
752 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
753
754 # expected results
755 levels = n.bit_length() - 1
756 avi = list(range(n))
757 ri = [reverse_bits(i, levels) for i in range(n)]
758 av = halfrev2(avi, False)
759 av = [av[ri[i]] for i in range(n)]
760
761
762 i0 = iterate_dct_inner_halfswap_loadstore(SVSHAPE0)
763 for idx, (jl) in enumerate(i0):
764 print ("inverse half-swap ld", idx, jl, av[idx])
765 if jl[1] == 0b111: # end
766 break
767
768
769 # run the demo
770 if __name__ == '__main__':
771 demo_dct()
772 demo_idct()