1 # DCT "REMAP" scheduler
3 # Modifications made to create an in-place iterative DCT:
4 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
8 # Original fastdctlee.py by Nayuki:
9 # Copyright (c) 2020 Project Nayuki. (MIT License)
10 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
12 from copy
import deepcopy
15 # bits of the integer 'val'.
16 def reverse_bits(val
, width
):
18 for _
in range(width
):
19 result
= (result
<< 1) |
(val
& 1)
24 # iterative version of [recursively-applied] half-rev.
25 # relies on the list lengths being power-of-two and the fact
26 # that bit-inversion of a list of binary numbers is the same
27 # as reversing the order of the list
28 # this version is dead easy to implement in hardware.
29 # a big surprise is that the half-reversal can be done with
30 # such a simple XOR. the inverse operation is slightly trickier
31 def halfrev2(vec
, pre_rev
=True):
33 for i
in range(len(vec
)):
35 res
.append(vec
[i ^
(i
>>1)])
39 for ji
in range(1, bl
):
45 # python "yield" can be iterated. use this to make it clear how
46 # the indices are generated by using natural-looking nested loops
47 def iterate_dct_inner_halfswap_loadstore(SVSHAPE
):
48 # get indices to iterate over, in the required order
50 mode
= SVSHAPE
.lims
[1]
51 print ("inner halfswap loadstore", n
, mode
, SVSHAPE
.skip
)
53 # reference list for not needing to do data-swaps, just swap what
54 # *indices* are referenced (two levels of indirection at the moment)
55 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
58 levels
= n
.bit_length() - 1
59 ji
= halfrev2(ji
, False)
60 if False: # swap: TODO, add extra bit-reverse mode
61 ri
= [reverse_bits(i
, levels
) for i
in range(n
)]
62 ji
= [ji
[ri
[i
]] for i
in range(n
)]
65 # invert order if requested
69 for i
, jl
in enumerate(ji
):
71 yield jl
, (0b111 if y_end
else 0b000)
74 # python "yield" can be iterated. use this to make it clear how
75 # the indices are generated by using natural-looking nested loops
76 def iterate_dct_inner_costable_indices(SVSHAPE
):
77 # get indices to iterate over, in the required order
79 mode
= SVSHAPE
.lims
[1]
80 print ("inner costable", mode
)
81 # creating lists of indices to iterate over in each dimension
82 # has to be done dynamically, because it depends on the size
83 # first, the size-based loop (which can be done statically)
89 # invert order if requested
99 # start an infinite (wrapping) loop
101 z_end
= 1 # doesn't exist in this, only 2 loops
104 for size
in x_r
: # loop over 3rd order dimension (size)
105 x_end
= size
== x_r
[-1]
106 # y_r schedule depends on size
109 for i
in range(0, n
, size
):
111 # invert if requested
112 if SVSHAPE
.invxyz
[1]: y_r
.reverse()
113 # two lists of half-range indices, e.g. j 0123, jr 7654
114 j
= list(range(0, halfsize
))
115 # invert if requested
116 if SVSHAPE
.invxyz
[2]: j_r
.reverse()
117 #print ("xform jr", jr)
118 # loop over 1st order dimension
119 for ci
, jl
in enumerate(j
):
121 # now depending on MODE return the index. inner butterfly
122 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
123 result
= k
# offset into COS table
124 elif SVSHAPE
.skip
== 0b10: #
125 result
= ci
# coefficient helper
126 elif SVSHAPE
.skip
== 0b11: #
127 result
= size
# coefficient helper
129 ((y_end
and z_end
)<<1) |
130 ((y_end
and x_end
and z_end
)<<2))
132 yield result
+ SVSHAPE
.offset
, loopends
135 # python "yield" can be iterated. use this to make it clear how
136 # the indices are generated by using natural-looking nested loops
137 def iterate_dct_inner_butterfly_indices(SVSHAPE
):
138 # get indices to iterate over, in the required order
140 mode
= SVSHAPE
.lims
[1]
141 print ("inner butterfly", mode
, SVSHAPE
.skip
, "submode", SVSHAPE
.submode2
)
142 # creating lists of indices to iterate over in each dimension
143 # has to be done dynamically, because it depends on the size
144 # first, the size-based loop (which can be done statically)
150 # invert order if requested
151 if SVSHAPE
.invxyz
[0]:
157 # reference (read/write) the in-place data in *reverse-bit-order*
159 if SVSHAPE
.submode2
== 0b01:
160 levels
= n
.bit_length() - 1
161 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
163 # reference list for not needing to do data-swaps, just swap what
164 # *indices* are referenced (two levels of indirection at the moment)
165 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
168 if inplace_mode
and SVSHAPE
.submode2
== 0b01:
169 #print ("inplace mode")
170 ji
= halfrev2(ji
, True)
171 if inplace_mode
and SVSHAPE
.submode2
== 0b11:
172 ji
= halfrev2(ji
, False)
177 # start an infinite (wrapping) loop
181 for size
in x_r
: # loop over 3rd order dimension (size)
182 x_end
= size
== x_r
[-1]
183 # y_r schedule depends on size
186 for i
in range(0, n
, size
):
188 # invert if requested
189 if SVSHAPE
.invxyz
[1]: y_r
.reverse()
190 for i
in y_r
: # loop over 2nd order dimension
192 # two lists of half-range indices, e.g. j 0123, jr 7654
193 j
= list(range(i
, i
+ halfsize
))
194 jr
= list(range(i
+halfsize
, i
+ size
))
196 # invert if requested
197 if SVSHAPE
.invxyz
[2]:
200 hz2
= halfsize
// 2 # zero stops reversing 1-item lists
201 #print ("xform jr", jr)
202 # loop over 1st order dimension
204 for ci
, (jl
, jh
) in enumerate(zip(j
, jr
)):
206 # now depending on MODE return the index. inner butterfly
207 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
208 if SVSHAPE
.submode2
== 0b11: # iDCT
209 result
= ji
[ri
[jl
]] # lower half
211 result
= ri
[ji
[jl
]] # lower half
212 elif SVSHAPE
.skip
== 0b01: # in [0b01, 0b11]:
213 if SVSHAPE
.submode2
== 0b11: # iDCT
214 result
= ji
[ri
[jl
+halfsize
]] # upper half
216 result
= ri
[ji
[jh
]] # upper half
218 # COS table pre-generated mode
219 if SVSHAPE
.skip
== 0b10: #
220 result
= k
# cos table offset
222 # COS table generated on-demand ("Vertical-First") mode
223 if SVSHAPE
.skip
== 0b10: #
224 result
= ci
# coefficient helper
225 elif SVSHAPE
.skip
== 0b11: #
226 result
= size
# coefficient helper
228 ((y_end
and z_end
)<<1) |
229 ((y_end
and x_end
and z_end
)<<2))
231 yield result
+ SVSHAPE
.offset
, loopends
236 for ci
, (jl
, jh
) in enumerate(zip(j
[:hz2
], jr
[:hz2
])):
238 print ("inplace swap", jh
, jlh
)
239 tmp1
, tmp2
= ji
[jlh
], ji
[jh
]
240 ji
[jlh
], ji
[jh
] = tmp2
, tmp1
242 # new k_start point for cos tables( runs inside x_r loop NOT i loop)
246 # python "yield" can be iterated. use this to make it clear how
247 # the indices are generated by using natural-looking nested loops
248 def iterate_dct_outer_butterfly_indices(SVSHAPE
):
249 # get indices to iterate over, in the required order
251 mode
= SVSHAPE
.lims
[1]
252 # creating lists of indices to iterate over in each dimension
253 # has to be done dynamically, because it depends on the size
254 # first, the size-based loop (which can be done statically)
260 # invert order if requested
261 if SVSHAPE
.invxyz
[0]:
267 #print ("outer butterfly")
269 # I-DCT, reference (read/write) the in-place data in *reverse-bit-order*
271 if SVSHAPE
.submode2
in [0b11, 0b01]:
272 levels
= n
.bit_length() - 1
273 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
275 # reference list for not needing to do data-swaps, just swap what
276 # *indices* are referenced (two levels of indirection at the moment)
277 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
279 inplace_mode
= False # need the space... SVSHAPE.skip in [0b10, 0b11]
280 if SVSHAPE
.submode2
== 0b11:
281 ji
= halfrev2(ji
, False)
286 # start an infinite (wrapping) loop
290 for size
in x_r
: # loop over 3rd order dimension (size)
292 x_end
= size
== x_r
[-1]
293 y_r
= list(range(0, halfsize
))
294 print ("itersum", halfsize
, size
, y_r
, "invert", SVSHAPE
.invxyz
[1])
295 # invert if requested
296 if SVSHAPE
.invxyz
[1]: y_r
.reverse()
297 for i
in y_r
: # loop over 2nd order dimension
299 # one list to create iterative-sum schedule
300 jr
= list(range(i
+halfsize
, i
+n
-halfsize
, size
))
301 # invert if requested
302 if SVSHAPE
.invxyz
[2]: jr
.reverse()
303 print ("itersum jr", i
+halfsize
, i
+size
, jr
,
304 "invert", SVSHAPE
.invxyz
[2])
305 hz2
= halfsize
// 2 # zero stops reversing 1-item lists
307 for ci
, jh
in enumerate(jr
): # loop over 1st order dimension
309 #print (" itersum", size, i, jh, jh+size)
311 # COS table pre-generated mode
312 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
313 if SVSHAPE
.submode2
== 0b11: # iDCT
314 result
= ji
[ri
[jh
]] # upper half
316 result
= ri
[ji
[jh
]] # lower half
317 elif SVSHAPE
.skip
== 0b01: # in [0b01, 0b11]:
318 if SVSHAPE
.submode2
== 0b11: # iDCT
319 result
= ji
[ri
[jh
+size
]] # upper half
321 result
= ri
[ji
[jh
+size
]] # upper half
322 elif SVSHAPE
.skip
== 0b10: #
323 result
= k
# cos table offset
325 # COS table generated on-demand ("Vertical-First") mode
326 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
327 if SVSHAPE
.submode2
== 0b11: # iDCT
328 result
= ji
[ri
[jh
]] # lower half
330 result
= ri
[ji
[jh
]] # lower half
331 elif SVSHAPE
.skip
== 0b01: # in [0b01, 0b11]:
332 if SVSHAPE
.submode2
== 0b11: # iDCT
333 result
= ji
[ri
[jh
+size
]] # upper half
335 result
= ri
[ji
[jh
+size
]] # upper half
336 elif SVSHAPE
.skip
== 0b10: #
337 result
= ci
# coefficient helper
338 elif SVSHAPE
.skip
== 0b11: #
339 result
= size
# coefficient helper
341 ((y_end
and z_end
)<<1) |
342 ((y_end
and x_end
and z_end
)<<2))
344 yield result
+ SVSHAPE
.offset
, loopends
347 # now in-place swap (disabled)
348 if False and SVSHAPE
.submode2
== 0b11:
349 j
= list(range(i
, i
+ halfsize
))
350 jr
= list(range(i
+halfsize
, i
+ size
))
352 for ci
, (jl
, jh
) in enumerate(zip(j
[:hz2
], jr
[:hz2
])):
354 tmp1
, tmp2
= ji
[jlh
], ji
[jh
]
355 print ("inplace swap", jh
, jlh
, "actual", tmp1
, tmp2
)
356 ji
[jlh
], ji
[jh
] = tmp2
, tmp1
358 # new k_start point for cos tables( runs inside x_r loop NOT i loop)
362 def pprint_schedule(schedule
, n
):
367 tablestep
= n
// size
368 print ("size %d halfsize %d tablestep %d" % \
369 (size
, halfsize
, tablestep
))
370 for i
in range(0, n
, size
):
371 prefix
= "i %d\t" % i
372 for j
in range(i
, i
+ halfsize
):
373 (jl
, je
), (jh
, he
) = schedule
[idx
]
374 print (" %-3d\t%s j=%-2d jh=%-2d "
375 "j[jl=%-2d] j[jh=%-2d]" % \
376 (idx
, prefix
, j
, j
+halfsize
,
379 "end", bin(je
)[2:], bin(je
)[2:])
383 def pprint_schedule_outer(schedule
, n
):
388 tablestep
= n
// size
389 print ("size %d halfsize %d tablestep %d" % \
390 (size
, halfsize
, tablestep
))
391 y_r
= list(range(0, halfsize
))
393 prefix
= "i %d\t" % i
394 jr
= list(range(i
+halfsize
, i
+n
-halfsize
, size
))
396 (jl
, je
), (jh
, he
) = schedule
[idx
]
397 print (" %-3d\t%s j=%-2d jh=%-2d "
398 "j[jl=%-2d] j[jh=%-2d]" % \
399 (idx
, prefix
, j
, j
+halfsize
,
402 "end", bin(je
)[2:], bin(je
)[2:])
407 # totally cool *in-place* inverse DCT algorithm using yield REMAPs
408 def inverse_transform2(vec
):
415 print ("inverse_transform2", n
)
416 levels
= n
.bit_length() - 1
421 # divide element 0 by 2
424 # reference (read/write) the in-place data in *reverse-bit-order*
426 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
428 # pretend we LDed data in half-swapped *and* bit-reversed order as well
429 # TODO: merge these two
430 vec
= [vec
[ri
[i
]] for i
in range(n
)]
431 vec
= halfrev2(vec
, True)
433 print("inverse_transform2 post-alter", vec
)
435 # create a cos table: not strictly necessary but here for illustrative
436 # purposes, to demonstrate the point that it really *is* iterative.
437 # this table could be cached and used multiple times rather than
438 # computed every time.
443 for ci
in range(halfsize
):
444 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
446 print ("coeff", size
, "ci", ci
, "k", len(ctable
)-1,
447 "i/n", (ci
+0.5)/size
, coeff
)
458 SVSHAPE0
.lims
= [xdim
, 4, 0]
460 SVSHAPE0
.submode2
= 0b01
462 SVSHAPE0
.offset
= 0 # experiment with different offset, here
463 SVSHAPE0
.invxyz
= [1,0,1] # inversion if desired
466 SVSHAPE1
.lims
= [xdim
, 4, 0]
468 SVSHAPE1
.submode2
= 0b01
470 SVSHAPE1
.offset
= 0 # experiment with different offset, here
471 SVSHAPE1
.invxyz
= [1,0,1] # inversion if desired
474 SVSHAPE2
.lims
= [xdim
, 4, 0]
476 SVSHAPE2
.submode2
= 0b01
478 SVSHAPE2
.offset
= 0 # experiment with different offset, here
479 SVSHAPE2
.invxyz
= [1,0,1] # inversion if desired
481 # enumerate over the iterator function, getting new indices
482 i0
= iterate_dct_inner_costable_indices(SVSHAPE0
)
483 i1
= iterate_dct_inner_costable_indices(SVSHAPE1
)
484 i2
= iterate_dct_inner_costable_indices(SVSHAPE2
)
485 for ((ci
, cie
), (size
, sze
), (k
, ke
)) in \
487 print ("xform2 cos", ci
, size
, k
)
488 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
489 assert coeff
== ctable
[k
]
490 print ("coeff", size
, "ci", ci
, "k", k
,
491 "i/n", (ci
+0.5)/size
, coeff
,
492 "end", bin(cie
), bin(sze
), bin(ke
))
493 if cie
== 0b111: # all loops end
496 # now things are in the right order for the outer butterfly.
500 SVSHAPE0
.lims
= [xdim
, 0b0000010, 0]
501 SVSHAPE0
.submode2
= 0b11
504 SVSHAPE0
.offset
= 0 # experiment with different offset, here
505 SVSHAPE0
.invxyz
= [1,0,1] # inversion if desired
506 # j+halfstep schedule
508 SVSHAPE1
.lims
= [xdim
, 0b0000010, 0]
510 SVSHAPE1
.submode2
= 0b11
512 SVSHAPE1
.offset
= 0 # experiment with different offset, here
513 SVSHAPE1
.invxyz
= [1,0,1] # inversion if desired
515 # enumerate over the iterator function, getting new indices
516 i0
= iterate_dct_outer_butterfly_indices(SVSHAPE0
)
517 i1
= iterate_dct_outer_butterfly_indices(SVSHAPE1
)
518 for k
, ((jl
, jle
), (jh
, jhe
)) in enumerate(zip(i0
, i1
)):
519 print ("itersum jr", jl
, jh
,
520 "end", bin(jle
), bin(jhe
),
521 vec
[jl
], vec
[jh
], vec
[jh
]+vec
[jl
])
524 if jle
== 0b111: # all loops end
527 print("transform2 pre-itersum", vec
)
535 SVSHAPE0
.lims
= [xdim
, 0b000001, 0]
537 SVSHAPE0
.submode2
= 0b11
539 SVSHAPE0
.offset
= 0 # experiment with different offset, here
540 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
541 # j+halfstep schedule
543 SVSHAPE1
.lims
= [xdim
, 0b000001, 0]
545 SVSHAPE1
.submode2
= 0b11
547 SVSHAPE1
.offset
= 0 # experiment with different offset, here
548 SVSHAPE1
.invxyz
= [0,0,0] # inversion if desired
551 SVSHAPE2
.lims
= [xdim
, 0b000001, 0]
553 SVSHAPE2
.submode2
= 0b11
555 SVSHAPE2
.offset
= 0 # experiment with different offset, here
556 SVSHAPE2
.invxyz
= [0,0,0] # inversion if desired
559 SVSHAPE3
.lims
= [xdim
, 0b000001, 0]
561 SVSHAPE3
.submode2
= 0b11
563 SVSHAPE3
.offset
= 0 # experiment with different offset, here
564 SVSHAPE3
.invxyz
= [0,0,0] # inversion if desired
566 # enumerate over the iterator function, getting new indices
567 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
568 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
569 i2
= iterate_dct_inner_butterfly_indices(SVSHAPE2
)
570 i3
= iterate_dct_inner_butterfly_indices(SVSHAPE3
)
571 for k
, ((jl
, jle
), (jh
, jhe
), (ci
, cie
), (size
, sze
)) in \
572 enumerate(zip(i0
, i1
, i2
, i3
)):
573 t1
, t2
= vec
[jl
], vec
[jh
]
574 print ("xform2", jl
, jh
, ci
, size
)
575 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
576 #assert coeff == ctable[k]
577 vec
[jl
] = t1
+ t2
/coeff
578 vec
[jh
] = t1
- t2
/coeff
579 print ("coeff", size
, "ci", ci
,
581 "i/n", (ci
+0.5)/size
, coeff
, "t1/2", t1
, t2
,
582 "+/-", vec
[jl
], vec
[jh
],
583 "end", bin(jle
), bin(jhe
))
584 if jle
== 0b111: # all loops end
587 print("transform2 result", vec
)
592 # totally cool *in-place* DCT algorithm using yield REMAPs
598 print ("transform2", n
)
599 levels
= n
.bit_length() - 1
604 # reference (read/write) the in-place data in *reverse-bit-order*
606 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
608 # and pretend we LDed data in half-swapped *and* bit-reversed order as well
609 # TODO: merge these two
610 vec
= halfrev2(vec
, False)
611 vec
= [vec
[ri
[i
]] for i
in range(n
)]
613 # create a cos table: not strictly necessary but here for illustrative
614 # purposes, to demonstrate the point that it really *is* iterative.
615 # this table could be cached and used multiple times rather than
616 # computed every time.
621 for ci
in range(halfsize
):
622 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
624 print ("coeff", size
, "ci", ci
, "k", len(ctable
)-1,
625 "i/n", (ci
+0.5)/size
, coeff
)
633 SVSHAPE0
.lims
= [xdim
, 4, 0]
635 SVSHAPE0
.submode2
= 0b01
637 SVSHAPE0
.offset
= 0 # experiment with different offset, here
638 SVSHAPE0
.invxyz
= [1,0,0] # inversion if desired
641 SVSHAPE1
.lims
= [xdim
, 4, 0]
643 SVSHAPE1
.submode2
= 0b01
645 SVSHAPE1
.offset
= 0 # experiment with different offset, here
646 SVSHAPE1
.invxyz
= [1,0,0] # inversion if desired
649 SVSHAPE2
.lims
= [xdim
, 4, 0]
651 SVSHAPE2
.submode2
= 0b01
653 SVSHAPE2
.offset
= 0 # experiment with different offset, here
654 SVSHAPE2
.invxyz
= [1,0,0] # inversion if desired
656 # enumerate over the iterator function, getting new indices
657 i0
= iterate_dct_inner_costable_indices(SVSHAPE0
)
658 i1
= iterate_dct_inner_costable_indices(SVSHAPE1
)
659 i2
= iterate_dct_inner_costable_indices(SVSHAPE2
)
660 for ((ci
, cie
), (size
, sze
), (k
, ke
)) in \
662 print ("xform2 cos", ci
, size
, k
)
663 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
664 assert coeff
== ctable
[k
]
665 print ("coeff", size
, "ci", ci
, "k", k
,
666 "i/n", (ci
+0.5)/size
, coeff
,
667 "end", bin(cie
), bin(sze
), bin(ke
))
668 if cie
== 0b111: # all loops end
677 SVSHAPE0
.lims
= [xdim
, 0b000001, 0]
679 SVSHAPE0
.submode2
= 0b01
681 SVSHAPE0
.offset
= 0 # experiment with different offset, here
682 SVSHAPE0
.invxyz
= [1,0,0] # inversion if desired
683 # j+halfstep schedule
685 SVSHAPE1
.lims
= [xdim
, 0b000001, 0]
687 SVSHAPE1
.submode2
= 0b01
689 SVSHAPE1
.offset
= 0 # experiment with different offset, here
690 SVSHAPE1
.invxyz
= [1,0,0] # inversion if desired
693 SVSHAPE2
.lims
= [xdim
, 0b000001, 0]
695 SVSHAPE2
.submode2
= 0b01
697 SVSHAPE2
.offset
= 0 # experiment with different offset, here
698 SVSHAPE2
.invxyz
= [1,0,0] # inversion if desired
701 SVSHAPE3
.lims
= [xdim
, 0b000001, 0]
703 SVSHAPE3
.submode2
= 0b01
705 SVSHAPE3
.offset
= 0 # experiment with different offset, here
706 SVSHAPE3
.invxyz
= [1,0,0] # inversion if desired
708 # enumerate over the iterator function, getting new indices
709 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
710 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
711 i2
= iterate_dct_inner_butterfly_indices(SVSHAPE2
)
712 i3
= iterate_dct_inner_butterfly_indices(SVSHAPE3
)
713 for k
, ((jl
, jle
), (jh
, jhe
), (ci
, cie
), (size
, sze
)) in \
714 enumerate(zip(i0
, i1
, i2
, i3
)):
715 t1
, t2
= vec
[jl
], vec
[jh
]
716 print ("xform2", jl
, jh
, ci
, size
)
717 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
718 #assert coeff == ctable[k]
720 vec
[jh
] = (t1
- t2
) * (1/coeff
)
721 print ("coeff", size
, "ci", ci
,
723 "i/n", (ci
+0.5)/size
, coeff
, vec
[jl
],
725 "end", bin(jle
), bin(jhe
))
726 if jle
== 0b111: # all loops end
729 print("transform2 pre-itersum", vec
)
731 # now things are in the right order for the outer butterfly.
735 SVSHAPE0
.lims
= [xdim
, 0b0000010, 0]
736 SVSHAPE0
.submode2
= 0b100
739 SVSHAPE0
.offset
= 0 # experiment with different offset, here
740 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
741 # j+halfstep schedule
743 SVSHAPE1
.lims
= [xdim
, 0b0000010, 0]
745 SVSHAPE1
.submode2
= 0b100
747 SVSHAPE1
.offset
= 0 # experiment with different offset, here
748 SVSHAPE1
.invxyz
= [0,0,0] # inversion if desired
750 # enumerate over the iterator function, getting new indices
751 i0
= iterate_dct_outer_butterfly_indices(SVSHAPE0
)
752 i1
= iterate_dct_outer_butterfly_indices(SVSHAPE1
)
753 for k
, ((jl
, jle
), (jh
, jhe
)) in enumerate(zip(i0
, i1
)):
754 print ("itersum jr", jl
, jh
,
755 "end", bin(jle
), bin(jhe
))
758 if jle
== 0b111: # all loops end
761 print("transform2 result", vec
)
767 # set the dimension sizes here
770 ydim
= 0 # not needed
771 zdim
= 0 # again, not needed
783 SVSHAPE0
.lims
= [xdim
, 0b0000010, 0]
784 SVSHAPE0
.submode2
= 0b100
787 SVSHAPE0
.offset
= 0 # experiment with different offset, here
788 SVSHAPE0
.invxyz
= [1,0,1] # inversion if desired
789 # j+halfstep schedule
791 SVSHAPE1
.lims
= [xdim
, 0b0000010, 0]
793 SVSHAPE1
.submode2
= 0b100
795 SVSHAPE1
.offset
= 0 # experiment with different offset, here
796 SVSHAPE1
.invxyz
= [1,0,1] # inversion if desired
798 # enumerate over the iterator function, getting new indices
800 i0
= iterate_dct_outer_butterfly_indices(SVSHAPE0
)
801 i1
= iterate_dct_outer_butterfly_indices(SVSHAPE1
)
802 for idx
, (jl
, jh
) in enumerate(zip(i0
, i1
)):
803 schedule
.append((jl
, jh
))
804 if jl
[1] == 0b111: # end
807 # ok now pretty-print the results, with some debug output
808 print ("outer i-dct butterfly")
809 pprint_schedule_outer(schedule
, n
)
817 SVSHAPE0
.lims
= [xdim
, 0b000001, 0]
819 SVSHAPE0
.submode2
= 0b11
821 SVSHAPE0
.offset
= 0 # experiment with different offset, here
822 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
823 # j+halfstep schedule
825 SVSHAPE1
.lims
= [xdim
, 0b000001, 0]
827 SVSHAPE1
.submode2
= 0b11
829 SVSHAPE1
.offset
= 0 # experiment with different offset, here
830 SVSHAPE1
.invxyz
= [0,0,0] # inversion if desired
832 # enumerate over the iterator function, getting new indices
834 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
835 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
836 for idx
, (jl
, jh
) in enumerate(zip(i0
, i1
)):
837 schedule
.append((jl
, jh
))
838 if jl
[1] == 0b111: # end
841 # ok now pretty-print the results, with some debug output
842 print ("inner butterfly")
843 pprint_schedule(schedule
, n
)
848 # for DCT half-swap LDs
851 SVSHAPE0
.lims
= [xdim
, 0b000101, zdim
]
853 SVSHAPE0
.submode2
= 0b01
855 SVSHAPE0
.offset
= 0 # experiment with different offset, here
856 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
859 levels
= n
.bit_length() - 1
861 ri
= [reverse_bits(i
, levels
) for i
in range(n
)]
862 av
= halfrev2(avi
, False)
863 av
= [av
[ri
[i
]] for i
in range(n
)]
865 i0
= iterate_dct_inner_halfswap_loadstore(SVSHAPE0
)
866 for idx
, (jl
) in enumerate(i0
):
867 print ("inverse half-swap ld", idx
, jl
, av
[idx
])
868 if jl
[1] == 0b111: # end
873 # set the dimension sizes here
876 ydim
= 0 # not needed
877 zdim
= 0 # again, not needed
889 SVSHAPE0
.lims
= [xdim
, 0b000001, zdim
]
890 SVSHAPE0
.submode2
= 0b010
893 SVSHAPE0
.offset
= 0 # experiment with different offset, here
894 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
895 # j+halfstep schedule
897 SVSHAPE1
.lims
= [xdim
, 0b000001, zdim
]
898 SVSHAPE1
.submode2
= 0b010
901 SVSHAPE1
.offset
= 0 # experiment with different offset, here
902 SVSHAPE1
.invxyz
= [0,0,0] # inversion if desired
904 # enumerate over the iterator function, getting new indices
906 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
907 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
908 for idx
, (jl
, jh
) in enumerate(zip(i0
, i1
)):
909 schedule
.append((jl
, jh
))
910 if jl
[1] == 0b111: # end
913 # ok now pretty-print the results, with some debug output
914 print ("inner butterfly")
915 pprint_schedule(schedule
, n
)
924 SVSHAPE0
.lims
= [xdim
, 0b000010, zdim
]
926 SVSHAPE0
.submode2
= 0b100
928 SVSHAPE0
.offset
= 0 # experiment with different offset, here
929 SVSHAPE0
.invxyz
= [1,0,0] # inversion if desired
930 # j+halfstep schedule
932 SVSHAPE1
.lims
= [xdim
, 0b000010, zdim
]
934 SVSHAPE1
.submode2
= 0b100
936 SVSHAPE1
.offset
= 0 # experiment with different offset, here
937 SVSHAPE1
.invxyz
= [1,0,0] # inversion if desired
939 # enumerate over the iterator function, getting new indices
941 i0
= iterate_dct_outer_butterfly_indices(SVSHAPE0
)
942 i1
= iterate_dct_outer_butterfly_indices(SVSHAPE1
)
943 for idx
, (jl
, jh
) in enumerate(zip(i0
, i1
)):
944 schedule
.append((jl
, jh
))
945 if jl
[1] == 0b111: # end
948 # ok now pretty-print the results, with some debug output
949 print ("outer butterfly")
950 pprint_schedule_outer(schedule
, n
)
952 # for DCT half-swap LDs
955 SVSHAPE0
.lims
= [xdim
, 0b000101, zdim
]
957 SVSHAPE0
.submode2
= 0
959 SVSHAPE0
.offset
= 0 # experiment with different offset, here
960 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
963 levels
= n
.bit_length() - 1
965 ri
= [reverse_bits(i
, levels
) for i
in range(n
)]
966 av
= halfrev2(avi
, False)
967 av
= [av
[ri
[i
]] for i
in range(n
)]
970 i0
= iterate_dct_inner_halfswap_loadstore(SVSHAPE0
)
971 for idx
, (jl
) in enumerate(i0
):
972 print ("inverse half-swap ld", idx
, jl
, av
[idx
])
973 if jl
[1] == 0b111: # end
978 if __name__
== '__main__':