1 # DCT "REMAP" scheduler
3 # Modifications made to create an in-place iterative DCT:
4 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
8 # Original fastdctlee.py by Nayuki:
9 # Copyright (c) 2020 Project Nayuki. (MIT License)
10 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
12 from copy
import deepcopy
15 # bits of the integer 'val'.
16 def reverse_bits(val
, width
):
18 for _
in range(width
):
19 result
= (result
<< 1) |
(val
& 1)
24 # iterative version of [recursively-applied] half-rev.
25 # relies on the list lengths being power-of-two and the fact
26 # that bit-inversion of a list of binary numbers is the same
27 # as reversing the order of the list
28 # this version is dead easy to implement in hardware.
29 # a big surprise is that the half-reversal can be done with
30 # such a simple XOR. the inverse operation is slightly trickier
31 def halfrev2(vec
, pre_rev
=True):
33 for i
in range(len(vec
)):
35 res
.append(vec
[i ^
(i
>>1)])
39 for ji
in range(1, bl
):
45 # python "yield" can be iterated. use this to make it clear how
46 # the indices are generated by using natural-looking nested loops
47 def iterate_dct_inner_halfswap_loadstore(SVSHAPE
):
48 # get indices to iterate over, in the required order
50 mode
= SVSHAPE
.lims
[1]
51 print ("inner halfswap loadstore", n
, mode
, SVSHAPE
.skip
,
52 "submode", SVSHAPE
.submode2
)
54 # reference list for not needing to do data-swaps, just swap what
55 # *indices* are referenced (two levels of indirection at the moment)
56 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
59 levels
= n
.bit_length() - 1
60 if SVSHAPE
.submode2
== 0b001:
61 ji
= halfrev2(ji
, True)
63 ji
= halfrev2(ji
, False)
65 if False: # swap: TODO, add extra bit-reverse mode
66 ri
= [reverse_bits(i
, levels
) for i
in range(n
)]
67 ji
= [ji
[ri
[i
]] for i
in range(n
)]
69 # invert order if requested
73 for i
, jl
in enumerate(ji
):
75 yield jl
, (0b111 if y_end
else 0b000)
78 # python "yield" can be iterated. use this to make it clear how
79 # the indices are generated by using natural-looking nested loops
80 def iterate_dct_inner_costable_indices(SVSHAPE
):
81 # get indices to iterate over, in the required order
83 mode
= SVSHAPE
.lims
[1]
84 print ("inner costable", mode
)
85 # creating lists of indices to iterate over in each dimension
86 # has to be done dynamically, because it depends on the size
87 # first, the size-based loop (which can be done statically)
93 # invert order if requested
103 # start an infinite (wrapping) loop
105 z_end
= 1 # doesn't exist in this, only 2 loops
108 for size
in x_r
: # loop over 3rd order dimension (size)
109 x_end
= size
== x_r
[-1]
110 # y_r schedule depends on size
113 for i
in range(0, n
, size
):
115 # invert if requested
116 if SVSHAPE
.invxyz
[1]: y_r
.reverse()
117 # two lists of half-range indices, e.g. j 0123, jr 7654
118 j
= list(range(0, halfsize
))
119 # invert if requested
120 if SVSHAPE
.invxyz
[2]: j_r
.reverse()
121 #print ("xform jr", jr)
122 # loop over 1st order dimension
123 for ci
, jl
in enumerate(j
):
125 # now depending on MODE return the index. inner butterfly
126 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
127 result
= k
# offset into COS table
128 elif SVSHAPE
.skip
== 0b10: #
129 result
= ci
# coefficient helper
130 elif SVSHAPE
.skip
== 0b11: #
131 result
= size
# coefficient helper
133 ((y_end
and z_end
)<<1) |
134 ((y_end
and x_end
and z_end
)<<2))
136 yield result
+ SVSHAPE
.offset
, loopends
139 # python "yield" can be iterated. use this to make it clear how
140 # the indices are generated by using natural-looking nested loops
141 def iterate_dct_inner_butterfly_indices(SVSHAPE
):
142 # get indices to iterate over, in the required order
144 mode
= SVSHAPE
.lims
[1]
145 print ("inner butterfly", mode
, SVSHAPE
.skip
, "submode", SVSHAPE
.submode2
)
146 # creating lists of indices to iterate over in each dimension
147 # has to be done dynamically, because it depends on the size
148 # first, the size-based loop (which can be done statically)
154 # invert order if requested
155 if SVSHAPE
.invxyz
[0]:
161 # reference (read/write) the in-place data in *reverse-bit-order*
163 if SVSHAPE
.submode2
== 0b01:
164 levels
= n
.bit_length() - 1
165 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
167 # reference list for not needing to do data-swaps, just swap what
168 # *indices* are referenced (two levels of indirection at the moment)
169 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
172 if inplace_mode
and SVSHAPE
.submode2
== 0b01:
173 #print ("inplace mode")
174 ji
= halfrev2(ji
, True)
175 if inplace_mode
and SVSHAPE
.submode2
== 0b11:
176 ji
= halfrev2(ji
, False)
181 # start an infinite (wrapping) loop
185 for size
in x_r
: # loop over 3rd order dimension (size)
186 x_end
= size
== x_r
[-1]
187 # y_r schedule depends on size
190 for i
in range(0, n
, size
):
192 # invert if requested
193 if SVSHAPE
.invxyz
[1]: y_r
.reverse()
194 for i
in y_r
: # loop over 2nd order dimension
196 # two lists of half-range indices, e.g. j 0123, jr 7654
197 j
= list(range(i
, i
+ halfsize
))
198 jr
= list(range(i
+halfsize
, i
+ size
))
200 # invert if requested
201 if SVSHAPE
.invxyz
[2]:
204 hz2
= halfsize
// 2 # zero stops reversing 1-item lists
205 #print ("xform jr", jr)
206 # loop over 1st order dimension
208 for ci
, (jl
, jh
) in enumerate(zip(j
, jr
)):
210 # now depending on MODE return the index. inner butterfly
211 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
212 if SVSHAPE
.submode2
== 0b11: # iDCT
213 result
= ji
[ri
[jl
]] # lower half
215 result
= ri
[ji
[jl
]] # lower half
216 elif SVSHAPE
.skip
== 0b01: # in [0b01, 0b11]:
217 if SVSHAPE
.submode2
== 0b11: # iDCT
218 result
= ji
[ri
[jl
+halfsize
]] # upper half
220 result
= ri
[ji
[jh
]] # upper half
222 # COS table pre-generated mode
223 if SVSHAPE
.skip
== 0b10: #
224 result
= k
# cos table offset
226 # COS table generated on-demand ("Vertical-First") mode
227 if SVSHAPE
.skip
== 0b10: #
228 result
= ci
# coefficient helper
229 elif SVSHAPE
.skip
== 0b11: #
230 result
= size
# coefficient helper
232 ((y_end
and z_end
)<<1) |
233 ((y_end
and x_end
and z_end
)<<2))
235 yield result
+ SVSHAPE
.offset
, loopends
240 for ci
, (jl
, jh
) in enumerate(zip(j
[:hz2
], jr
[:hz2
])):
242 print ("inplace swap", jh
, jlh
)
243 tmp1
, tmp2
= ji
[jlh
], ji
[jh
]
244 ji
[jlh
], ji
[jh
] = tmp2
, tmp1
246 # new k_start point for cos tables( runs inside x_r loop NOT i loop)
250 # python "yield" can be iterated. use this to make it clear how
251 # the indices are generated by using natural-looking nested loops
252 def iterate_dct_outer_butterfly_indices(SVSHAPE
):
253 # get indices to iterate over, in the required order
255 mode
= SVSHAPE
.lims
[1]
256 # creating lists of indices to iterate over in each dimension
257 # has to be done dynamically, because it depends on the size
258 # first, the size-based loop (which can be done statically)
264 # invert order if requested
265 if SVSHAPE
.invxyz
[0]:
271 print ("outer butterfly", mode
, SVSHAPE
.skip
, "submode", SVSHAPE
.submode2
)
273 # I-DCT, reference (read/write) the in-place data in *reverse-bit-order*
275 if SVSHAPE
.submode2
in [0b11, 0b01]:
276 levels
= n
.bit_length() - 1
277 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
279 # reference list for not needing to do data-swaps, just swap what
280 # *indices* are referenced (two levels of indirection at the moment)
281 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
283 inplace_mode
= False # need the space... SVSHAPE.skip in [0b10, 0b11]
284 if SVSHAPE
.submode2
== 0b11:
285 ji
= halfrev2(ji
, False)
290 # start an infinite (wrapping) loop
294 for size
in x_r
: # loop over 3rd order dimension (size)
296 x_end
= size
== x_r
[-1]
297 y_r
= list(range(0, halfsize
))
298 print ("itersum", halfsize
, size
, y_r
, "invert", SVSHAPE
.invxyz
[1])
299 # invert if requested
300 if SVSHAPE
.invxyz
[1]: y_r
.reverse()
301 for i
in y_r
: # loop over 2nd order dimension
303 # one list to create iterative-sum schedule
304 jr
= list(range(i
+halfsize
, i
+n
-halfsize
, size
))
305 # invert if requested
306 if SVSHAPE
.invxyz
[2]: jr
.reverse()
307 print ("itersum jr", i
+halfsize
, i
+size
, jr
,
308 "invert", SVSHAPE
.invxyz
[2])
309 hz2
= halfsize
// 2 # zero stops reversing 1-item lists
311 for ci
, jh
in enumerate(jr
): # loop over 1st order dimension
313 #print (" itersum", size, i, jh, jh+size)
315 # COS table pre-generated mode
316 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
317 if SVSHAPE
.submode2
== 0b11: # iDCT
318 result
= ji
[ri
[jh
]] # upper half
320 result
= ri
[ji
[jh
]] # lower half
321 elif SVSHAPE
.skip
== 0b01: # in [0b01, 0b11]:
322 if SVSHAPE
.submode2
== 0b11: # iDCT
323 result
= ji
[ri
[jh
+size
]] # upper half
325 result
= ri
[ji
[jh
+size
]] # upper half
326 elif SVSHAPE
.skip
== 0b10: #
327 result
= k
# cos table offset
329 # COS table generated on-demand ("Vertical-First") mode
330 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
331 if SVSHAPE
.submode2
== 0b11: # iDCT
332 result
= ji
[ri
[jh
]] # lower half
334 result
= ri
[ji
[jh
]] # lower half
335 elif SVSHAPE
.skip
== 0b01: # in [0b01, 0b11]:
336 if SVSHAPE
.submode2
== 0b11: # iDCT
337 result
= ji
[ri
[jh
+size
]] # upper half
339 result
= ri
[ji
[jh
+size
]] # upper half
340 elif SVSHAPE
.skip
== 0b10: #
341 result
= ci
# coefficient helper
342 elif SVSHAPE
.skip
== 0b11: #
343 result
= size
# coefficient helper
345 ((y_end
and z_end
)<<1) |
346 ((y_end
and x_end
and z_end
)<<2))
348 yield result
+ SVSHAPE
.offset
, loopends
351 # now in-place swap (disabled)
352 if False and SVSHAPE
.submode2
== 0b11:
353 j
= list(range(i
, i
+ halfsize
))
354 jr
= list(range(i
+halfsize
, i
+ size
))
356 for ci
, (jl
, jh
) in enumerate(zip(j
[:hz2
], jr
[:hz2
])):
358 tmp1
, tmp2
= ji
[jlh
], ji
[jh
]
359 print ("inplace swap", jh
, jlh
, "actual", tmp1
, tmp2
)
360 ji
[jlh
], ji
[jh
] = tmp2
, tmp1
362 # new k_start point for cos tables( runs inside x_r loop NOT i loop)
366 def pprint_schedule(schedule
, n
):
371 tablestep
= n
// size
372 print ("size %d halfsize %d tablestep %d" % \
373 (size
, halfsize
, tablestep
))
374 for i
in range(0, n
, size
):
375 prefix
= "i %d\t" % i
376 for j
in range(i
, i
+ halfsize
):
377 (jl
, je
), (jh
, he
) = schedule
[idx
]
378 print (" %-3d\t%s j=%-2d jh=%-2d "
379 "j[jl=%-2d] j[jh=%-2d]" % \
380 (idx
, prefix
, j
, j
+halfsize
,
383 "end", bin(je
)[2:], bin(je
)[2:])
387 def pprint_schedule_outer(schedule
, n
):
392 tablestep
= n
// size
393 print ("size %d halfsize %d tablestep %d" % \
394 (size
, halfsize
, tablestep
))
395 y_r
= list(range(0, halfsize
))
397 prefix
= "i %d\t" % i
398 jr
= list(range(i
+halfsize
, i
+n
-halfsize
, size
))
400 (jl
, je
), (jh
, he
) = schedule
[idx
]
401 print (" %-3d\t%s j=%-2d jh=%-2d "
402 "j[jl=%-2d] j[jh=%-2d]" % \
403 (idx
, prefix
, j
, j
+halfsize
,
406 "end", bin(je
)[2:], bin(je
)[2:])
411 # totally cool *in-place* inverse DCT algorithm using yield REMAPs
412 def inverse_transform2(vec
):
419 print ("inverse_transform2", n
)
420 levels
= n
.bit_length() - 1
425 # divide element 0 by 2
428 # reference (read/write) the in-place data in *reverse-bit-order*
430 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
432 # pretend we LDed data in half-swapped *and* bit-reversed order as well
433 # TODO: merge these two
434 vec
= [vec
[ri
[i
]] for i
in range(n
)]
435 vec
= halfrev2(vec
, True)
437 print("inverse_transform2 post-alter", vec
)
439 # create a cos table: not strictly necessary but here for illustrative
440 # purposes, to demonstrate the point that it really *is* iterative.
441 # this table could be cached and used multiple times rather than
442 # computed every time.
447 for ci
in range(halfsize
):
448 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
450 print ("coeff", size
, "ci", ci
, "k", len(ctable
)-1,
451 "i/n", (ci
+0.5)/size
, coeff
)
462 SVSHAPE0
.lims
= [xdim
, 4, 0]
464 SVSHAPE0
.submode2
= 0b01
466 SVSHAPE0
.offset
= 0 # experiment with different offset, here
467 SVSHAPE0
.invxyz
= [1,0,1] # inversion if desired
470 SVSHAPE1
.lims
= [xdim
, 4, 0]
472 SVSHAPE1
.submode2
= 0b01
474 SVSHAPE1
.offset
= 0 # experiment with different offset, here
475 SVSHAPE1
.invxyz
= [1,0,1] # inversion if desired
478 SVSHAPE2
.lims
= [xdim
, 4, 0]
480 SVSHAPE2
.submode2
= 0b01
482 SVSHAPE2
.offset
= 0 # experiment with different offset, here
483 SVSHAPE2
.invxyz
= [1,0,1] # inversion if desired
485 # enumerate over the iterator function, getting new indices
486 i0
= iterate_dct_inner_costable_indices(SVSHAPE0
)
487 i1
= iterate_dct_inner_costable_indices(SVSHAPE1
)
488 i2
= iterate_dct_inner_costable_indices(SVSHAPE2
)
489 for ((ci
, cie
), (size
, sze
), (k
, ke
)) in \
491 print ("xform2 cos", ci
, size
, k
)
492 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
493 assert coeff
== ctable
[k
]
494 print ("coeff", size
, "ci", ci
, "k", k
,
495 "i/n", (ci
+0.5)/size
, coeff
,
496 "end", bin(cie
), bin(sze
), bin(ke
))
497 if cie
== 0b111: # all loops end
500 # now things are in the right order for the outer butterfly.
504 SVSHAPE0
.lims
= [xdim
, 0b0000010, 0]
505 SVSHAPE0
.submode2
= 0b11
508 SVSHAPE0
.offset
= 0 # experiment with different offset, here
509 SVSHAPE0
.invxyz
= [1,0,1] # inversion if desired
510 # j+halfstep schedule
512 SVSHAPE1
.lims
= [xdim
, 0b0000010, 0]
514 SVSHAPE1
.submode2
= 0b11
516 SVSHAPE1
.offset
= 0 # experiment with different offset, here
517 SVSHAPE1
.invxyz
= [1,0,1] # inversion if desired
519 # enumerate over the iterator function, getting new indices
520 i0
= iterate_dct_outer_butterfly_indices(SVSHAPE0
)
521 i1
= iterate_dct_outer_butterfly_indices(SVSHAPE1
)
522 for k
, ((jl
, jle
), (jh
, jhe
)) in enumerate(zip(i0
, i1
)):
523 print ("itersum jr", jl
, jh
,
524 "end", bin(jle
), bin(jhe
),
525 vec
[jl
], vec
[jh
], vec
[jh
]+vec
[jl
])
528 if jle
== 0b111: # all loops end
531 print("transform2 pre-itersum", vec
)
539 SVSHAPE0
.lims
= [xdim
, 0b000001, 0]
541 SVSHAPE0
.submode2
= 0b11
543 SVSHAPE0
.offset
= 0 # experiment with different offset, here
544 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
545 # j+halfstep schedule
547 SVSHAPE1
.lims
= [xdim
, 0b000001, 0]
549 SVSHAPE1
.submode2
= 0b11
551 SVSHAPE1
.offset
= 0 # experiment with different offset, here
552 SVSHAPE1
.invxyz
= [0,0,0] # inversion if desired
555 SVSHAPE2
.lims
= [xdim
, 0b000001, 0]
557 SVSHAPE2
.submode2
= 0b11
559 SVSHAPE2
.offset
= 0 # experiment with different offset, here
560 SVSHAPE2
.invxyz
= [0,0,0] # inversion if desired
563 SVSHAPE3
.lims
= [xdim
, 0b000001, 0]
565 SVSHAPE3
.submode2
= 0b11
567 SVSHAPE3
.offset
= 0 # experiment with different offset, here
568 SVSHAPE3
.invxyz
= [0,0,0] # inversion if desired
570 # enumerate over the iterator function, getting new indices
571 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
572 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
573 i2
= iterate_dct_inner_butterfly_indices(SVSHAPE2
)
574 i3
= iterate_dct_inner_butterfly_indices(SVSHAPE3
)
575 for k
, ((jl
, jle
), (jh
, jhe
), (ci
, cie
), (size
, sze
)) in \
576 enumerate(zip(i0
, i1
, i2
, i3
)):
577 t1
, t2
= vec
[jl
], vec
[jh
]
578 print ("xform2", jl
, jh
, ci
, size
)
579 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
580 #assert coeff == ctable[k]
581 vec
[jl
] = t1
+ t2
/coeff
582 vec
[jh
] = t1
- t2
/coeff
583 print ("coeff", size
, "ci", ci
,
585 "i/n", (ci
+0.5)/size
, coeff
, "t1/2", t1
, t2
,
586 "+/-", vec
[jl
], vec
[jh
],
587 "end", bin(jle
), bin(jhe
))
588 if jle
== 0b111: # all loops end
591 print("inverse_transform2 result", vec
)
596 # totally cool *in-place* DCT algorithm using yield REMAPs
602 print ("transform2", n
)
603 levels
= n
.bit_length() - 1
608 # reference (read/write) the in-place data in *reverse-bit-order*
610 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
612 # and pretend we LDed data in half-swapped *and* bit-reversed order as well
613 # TODO: merge these two
614 vec
= halfrev2(vec
, False)
615 vec
= [vec
[ri
[i
]] for i
in range(n
)]
617 # create a cos table: not strictly necessary but here for illustrative
618 # purposes, to demonstrate the point that it really *is* iterative.
619 # this table could be cached and used multiple times rather than
620 # computed every time.
625 for ci
in range(halfsize
):
626 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
628 print ("coeff", size
, "ci", ci
, "k", len(ctable
)-1,
629 "i/n", (ci
+0.5)/size
, coeff
)
637 SVSHAPE0
.lims
= [xdim
, 4, 0]
639 SVSHAPE0
.submode2
= 0b01
641 SVSHAPE0
.offset
= 0 # experiment with different offset, here
642 SVSHAPE0
.invxyz
= [1,0,0] # inversion if desired
645 SVSHAPE1
.lims
= [xdim
, 4, 0]
647 SVSHAPE1
.submode2
= 0b01
649 SVSHAPE1
.offset
= 0 # experiment with different offset, here
650 SVSHAPE1
.invxyz
= [1,0,0] # inversion if desired
653 SVSHAPE2
.lims
= [xdim
, 4, 0]
655 SVSHAPE2
.submode2
= 0b01
657 SVSHAPE2
.offset
= 0 # experiment with different offset, here
658 SVSHAPE2
.invxyz
= [1,0,0] # inversion if desired
660 # enumerate over the iterator function, getting new indices
661 i0
= iterate_dct_inner_costable_indices(SVSHAPE0
)
662 i1
= iterate_dct_inner_costable_indices(SVSHAPE1
)
663 i2
= iterate_dct_inner_costable_indices(SVSHAPE2
)
664 for ((ci
, cie
), (size
, sze
), (k
, ke
)) in \
666 print ("xform2 cos", ci
, size
, k
)
667 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
668 assert coeff
== ctable
[k
]
669 print ("coeff", size
, "ci", ci
, "k", k
,
670 "i/n", (ci
+0.5)/size
, coeff
,
671 "end", bin(cie
), bin(sze
), bin(ke
))
672 if cie
== 0b111: # all loops end
681 SVSHAPE0
.lims
= [xdim
, 0b000001, 0]
683 SVSHAPE0
.submode2
= 0b01
685 SVSHAPE0
.offset
= 0 # experiment with different offset, here
686 SVSHAPE0
.invxyz
= [1,0,0] # inversion if desired
687 # j+halfstep schedule
689 SVSHAPE1
.lims
= [xdim
, 0b000001, 0]
691 SVSHAPE1
.submode2
= 0b01
693 SVSHAPE1
.offset
= 0 # experiment with different offset, here
694 SVSHAPE1
.invxyz
= [1,0,0] # inversion if desired
697 SVSHAPE2
.lims
= [xdim
, 0b000001, 0]
699 SVSHAPE2
.submode2
= 0b01
701 SVSHAPE2
.offset
= 0 # experiment with different offset, here
702 SVSHAPE2
.invxyz
= [1,0,0] # inversion if desired
705 SVSHAPE3
.lims
= [xdim
, 0b000001, 0]
707 SVSHAPE3
.submode2
= 0b01
709 SVSHAPE3
.offset
= 0 # experiment with different offset, here
710 SVSHAPE3
.invxyz
= [1,0,0] # inversion if desired
712 # enumerate over the iterator function, getting new indices
713 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
714 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
715 i2
= iterate_dct_inner_butterfly_indices(SVSHAPE2
)
716 i3
= iterate_dct_inner_butterfly_indices(SVSHAPE3
)
717 for k
, ((jl
, jle
), (jh
, jhe
), (ci
, cie
), (size
, sze
)) in \
718 enumerate(zip(i0
, i1
, i2
, i3
)):
719 t1
, t2
= vec
[jl
], vec
[jh
]
720 print ("xform2", jl
, jh
, ci
, size
)
721 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
722 #assert coeff == ctable[k]
724 vec
[jh
] = (t1
- t2
) * (1/coeff
)
725 print ("coeff", size
, "ci", ci
,
727 "i/n", (ci
+0.5)/size
, coeff
, vec
[jl
],
729 "end", bin(jle
), bin(jhe
))
730 if jle
== 0b111: # all loops end
733 print("transform2 pre-itersum", vec
)
735 # now things are in the right order for the outer butterfly.
739 SVSHAPE0
.lims
= [xdim
, 0b0000010, 0]
740 SVSHAPE0
.submode2
= 0b100
743 SVSHAPE0
.offset
= 0 # experiment with different offset, here
744 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
745 # j+halfstep schedule
747 SVSHAPE1
.lims
= [xdim
, 0b0000010, 0]
749 SVSHAPE1
.submode2
= 0b100
751 SVSHAPE1
.offset
= 0 # experiment with different offset, here
752 SVSHAPE1
.invxyz
= [0,0,0] # inversion if desired
754 # enumerate over the iterator function, getting new indices
755 i0
= iterate_dct_outer_butterfly_indices(SVSHAPE0
)
756 i1
= iterate_dct_outer_butterfly_indices(SVSHAPE1
)
757 for k
, ((jl
, jle
), (jh
, jhe
)) in enumerate(zip(i0
, i1
)):
758 print ("itersum jr", jl
, jh
,
759 "end", bin(jle
), bin(jhe
))
762 if jle
== 0b111: # all loops end
765 print("transform2 result", vec
)
771 # set the dimension sizes here
774 ydim
= 0 # not needed
775 zdim
= 0 # again, not needed
787 SVSHAPE0
.lims
= [xdim
, 0b0000010, 0]
788 SVSHAPE0
.submode2
= 0b11
791 SVSHAPE0
.offset
= 0 # experiment with different offset, here
792 SVSHAPE0
.invxyz
= [1,0,1] # inversion if desired
793 # j+halfstep schedule
795 SVSHAPE1
.lims
= [xdim
, 0b0000010, 0]
797 SVSHAPE1
.submode2
= 0b11
799 SVSHAPE1
.offset
= 0 # experiment with different offset, here
800 SVSHAPE1
.invxyz
= [1,0,1] # inversion if desired
802 # enumerate over the iterator function, getting new indices
804 i0
= iterate_dct_outer_butterfly_indices(SVSHAPE0
)
805 i1
= iterate_dct_outer_butterfly_indices(SVSHAPE1
)
806 for idx
, (jl
, jh
) in enumerate(zip(i0
, i1
)):
807 schedule
.append((jl
, jh
))
808 if jl
[1] == 0b111: # end
811 # ok now pretty-print the results, with some debug output
812 print ("outer i-dct butterfly")
813 pprint_schedule_outer(schedule
, n
)
821 SVSHAPE0
.lims
= [xdim
, 0b000001, 0]
823 SVSHAPE0
.submode2
= 0b11
825 SVSHAPE0
.offset
= 0 # experiment with different offset, here
826 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
827 # j+halfstep schedule
829 SVSHAPE1
.lims
= [xdim
, 0b000001, 0]
831 SVSHAPE1
.submode2
= 0b11
833 SVSHAPE1
.offset
= 0 # experiment with different offset, here
834 SVSHAPE1
.invxyz
= [0,0,0] # inversion if desired
836 # enumerate over the iterator function, getting new indices
838 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
839 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
840 for idx
, (jl
, jh
) in enumerate(zip(i0
, i1
)):
841 schedule
.append((jl
, jh
))
842 if jl
[1] == 0b111: # end
845 # ok now pretty-print the results, with some debug output
846 print ("inner butterfly")
847 pprint_schedule(schedule
, n
)
852 # for DCT half-swap LDs
855 SVSHAPE0
.lims
= [xdim
, 0b000101, zdim
]
857 SVSHAPE0
.submode2
= 0b01
859 SVSHAPE0
.offset
= 0 # experiment with different offset, here
860 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
863 levels
= n
.bit_length() - 1
865 ri
= [reverse_bits(i
, levels
) for i
in range(n
)]
866 av
= halfrev2(avi
, False)
867 av
= [av
[ri
[i
]] for i
in range(n
)]
869 i0
= iterate_dct_inner_halfswap_loadstore(SVSHAPE0
)
870 for idx
, (jl
) in enumerate(i0
):
871 print ("inverse half-swap ld", idx
, jl
, av
[idx
])
872 if jl
[1] == 0b111: # end
877 # set the dimension sizes here
880 ydim
= 0 # not needed
881 zdim
= 0 # again, not needed
893 SVSHAPE0
.lims
= [xdim
, 0b000001, zdim
]
894 SVSHAPE0
.submode2
= 0b010
897 SVSHAPE0
.offset
= 0 # experiment with different offset, here
898 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
899 # j+halfstep schedule
901 SVSHAPE1
.lims
= [xdim
, 0b000001, zdim
]
902 SVSHAPE1
.submode2
= 0b010
905 SVSHAPE1
.offset
= 0 # experiment with different offset, here
906 SVSHAPE1
.invxyz
= [0,0,0] # inversion if desired
908 # enumerate over the iterator function, getting new indices
910 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
911 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
912 for idx
, (jl
, jh
) in enumerate(zip(i0
, i1
)):
913 schedule
.append((jl
, jh
))
914 if jl
[1] == 0b111: # end
917 # ok now pretty-print the results, with some debug output
918 print ("inner butterfly")
919 pprint_schedule(schedule
, n
)
928 SVSHAPE0
.lims
= [xdim
, 0b000010, zdim
]
930 SVSHAPE0
.submode2
= 0b100
932 SVSHAPE0
.offset
= 0 # experiment with different offset, here
933 SVSHAPE0
.invxyz
= [1,0,0] # inversion if desired
934 # j+halfstep schedule
936 SVSHAPE1
.lims
= [xdim
, 0b000010, zdim
]
938 SVSHAPE1
.submode2
= 0b100
940 SVSHAPE1
.offset
= 0 # experiment with different offset, here
941 SVSHAPE1
.invxyz
= [1,0,0] # inversion if desired
943 # enumerate over the iterator function, getting new indices
945 i0
= iterate_dct_outer_butterfly_indices(SVSHAPE0
)
946 i1
= iterate_dct_outer_butterfly_indices(SVSHAPE1
)
947 for idx
, (jl
, jh
) in enumerate(zip(i0
, i1
)):
948 schedule
.append((jl
, jh
))
949 if jl
[1] == 0b111: # end
952 # ok now pretty-print the results, with some debug output
953 print ("outer butterfly")
954 pprint_schedule_outer(schedule
, n
)
956 # for DCT half-swap LDs
959 SVSHAPE0
.lims
= [xdim
, 0b000101, zdim
]
961 SVSHAPE0
.submode2
= 0
963 SVSHAPE0
.offset
= 0 # experiment with different offset, here
964 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
967 levels
= n
.bit_length() - 1
969 ri
= [reverse_bits(i
, levels
) for i
in range(n
)]
970 av
= halfrev2(avi
, False)
971 av
= [av
[ri
[i
]] for i
in range(n
)]
974 i0
= iterate_dct_inner_halfswap_loadstore(SVSHAPE0
)
975 for idx
, (jl
) in enumerate(i0
):
976 print ("inverse half-swap ld", idx
, jl
, av
[idx
])
977 if jl
[1] == 0b111: # end
982 if __name__
== '__main__':