1 # DCT "REMAP" scheduler
3 # Modifications made to create an in-place iterative DCT:
4 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
8 # Original fastdctlee.py by Nayuki:
9 # Copyright (c) 2020 Project Nayuki. (MIT License)
10 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
12 from copy
import deepcopy
15 # bits of the integer 'val'.
16 def reverse_bits(val
, width
):
18 for _
in range(width
):
19 result
= (result
<< 1) |
(val
& 1)
24 # iterative version of [recursively-applied] half-rev.
25 # relies on the list lengths being power-of-two and the fact
26 # that bit-inversion of a list of binary numbers is the same
27 # as reversing the order of the list
28 # this version is dead easy to implement in hardware.
29 # a big surprise is that the half-reversal can be done with
30 # such a simple XOR. the inverse operation is slightly trickier
31 def halfrev2(vec
, pre_rev
=True):
33 for i
in range(len(vec
)):
35 res
.append(vec
[i ^
(i
>>1)])
39 for ji
in range(1, bl
):
45 # python "yield" can be iterated. use this to make it clear how
46 # the indices are generated by using natural-looking nested loops
47 def iterate_dct_inner_halfswap_loadstore(SVSHAPE
):
48 # get indices to iterate over, in the required order
50 mode
= SVSHAPE
.lims
[1]
51 print ("inner halfswap loadstore", n
, mode
, SVSHAPE
.skip
,
52 "submode", SVSHAPE
.submode2
)
54 # reference list for not needing to do data-swaps, just swap what
55 # *indices* are referenced (two levels of indirection at the moment)
56 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
59 levels
= n
.bit_length() - 1
60 ri
= [reverse_bits(i
, levels
) for i
in range(n
)]
62 if SVSHAPE
.mode
== 0b01: # FFT, bitrev only
63 ji
= [ji
[ri
[i
]] for i
in range(n
)]
64 elif SVSHAPE
.submode2
== 0b001:
65 ji
= [ji
[ri
[i
]] for i
in range(n
)]
66 ji
= halfrev2(ji
, True)
68 ji
= halfrev2(ji
, False)
69 ji
= [ji
[ri
[i
]] for i
in range(n
)]
71 # invert order if requested
75 for i
, jl
in enumerate(ji
):
77 yield jl
, (0b111 if y_end
else 0b000)
80 # python "yield" can be iterated. use this to make it clear how
81 # the indices are generated by using natural-looking nested loops
82 def iterate_dct_inner_costable_indices(SVSHAPE
):
83 # get indices to iterate over, in the required order
85 mode
= SVSHAPE
.lims
[1]
86 print ("inner costable", mode
)
87 # creating lists of indices to iterate over in each dimension
88 # has to be done dynamically, because it depends on the size
89 # first, the size-based loop (which can be done statically)
95 # invert order if requested
105 # start an infinite (wrapping) loop
107 z_end
= 1 # doesn't exist in this, only 2 loops
110 for size
in x_r
: # loop over 3rd order dimension (size)
111 x_end
= size
== x_r
[-1]
112 # y_r schedule depends on size
115 for i
in range(0, n
, size
):
117 # invert if requested
118 if SVSHAPE
.invxyz
[1]: y_r
.reverse()
119 # two lists of half-range indices, e.g. j 0123, jr 7654
120 j
= list(range(0, halfsize
))
121 # invert if requested
122 if SVSHAPE
.invxyz
[2]: j_r
.reverse()
123 #print ("xform jr", jr)
124 # loop over 1st order dimension
125 for ci
, jl
in enumerate(j
):
127 # now depending on MODE return the index. inner butterfly
128 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
129 result
= k
# offset into COS table
130 elif SVSHAPE
.skip
== 0b10: #
131 result
= ci
# coefficient helper
132 elif SVSHAPE
.skip
== 0b11: #
133 result
= size
# coefficient helper
135 ((y_end
and z_end
)<<1) |
136 ((y_end
and x_end
and z_end
)<<2))
138 yield result
+ SVSHAPE
.offset
, loopends
141 # python "yield" can be iterated. use this to make it clear how
142 # the indices are generated by using natural-looking nested loops
143 def iterate_dct_inner_butterfly_indices(SVSHAPE
):
144 # get indices to iterate over, in the required order
146 mode
= SVSHAPE
.lims
[1]
147 print ("inner butterfly", mode
, SVSHAPE
.skip
, "submode", SVSHAPE
.submode2
)
148 # creating lists of indices to iterate over in each dimension
149 # has to be done dynamically, because it depends on the size
150 # first, the size-based loop (which can be done statically)
156 # invert order if requested
157 if SVSHAPE
.invxyz
[0]:
163 # reference (read/write) the in-place data in *reverse-bit-order*
165 if SVSHAPE
.submode2
== 0b01:
166 levels
= n
.bit_length() - 1
167 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
169 # reference list for not needing to do data-swaps, just swap what
170 # *indices* are referenced (two levels of indirection at the moment)
171 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
174 if inplace_mode
and SVSHAPE
.submode2
== 0b01:
175 #print ("inplace mode")
176 ji
= halfrev2(ji
, True)
177 if inplace_mode
and SVSHAPE
.submode2
== 0b11:
178 ji
= halfrev2(ji
, False)
183 # start an infinite (wrapping) loop
187 for size
in x_r
: # loop over 3rd order dimension (size)
188 x_end
= size
== x_r
[-1]
189 # y_r schedule depends on size
192 for i
in range(0, n
, size
):
194 # invert if requested
195 if SVSHAPE
.invxyz
[1]: y_r
.reverse()
196 for i
in y_r
: # loop over 2nd order dimension
198 # two lists of half-range indices, e.g. j 0123, jr 7654
199 j
= list(range(i
, i
+ halfsize
))
200 jr
= list(range(i
+halfsize
, i
+ size
))
202 # invert if requested
203 if SVSHAPE
.invxyz
[2]:
206 hz2
= halfsize
// 2 # zero stops reversing 1-item lists
207 #print ("xform jr", jr)
208 # loop over 1st order dimension
210 for ci
, (jl
, jh
) in enumerate(zip(j
, jr
)):
212 # now depending on MODE return the index. inner butterfly
213 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
214 if SVSHAPE
.submode2
== 0b11: # iDCT
215 result
= ji
[ri
[jl
]] # lower half
217 result
= ri
[ji
[jl
]] # lower half
218 elif SVSHAPE
.skip
== 0b01: # in [0b01, 0b11]:
219 if SVSHAPE
.submode2
== 0b11: # iDCT
220 result
= ji
[ri
[jl
+halfsize
]] # upper half
222 result
= ri
[ji
[jh
]] # upper half
224 # COS table pre-generated mode
225 if SVSHAPE
.skip
== 0b10: #
226 result
= k
# cos table offset
228 # COS table generated on-demand ("Vertical-First") mode
229 if SVSHAPE
.skip
== 0b10: #
230 result
= ci
# coefficient helper
231 elif SVSHAPE
.skip
== 0b11: #
232 result
= size
# coefficient helper
234 ((y_end
and z_end
)<<1) |
235 ((y_end
and x_end
and z_end
)<<2))
237 yield result
+ SVSHAPE
.offset
, loopends
242 for ci
, (jl
, jh
) in enumerate(zip(j
[:hz2
], jr
[:hz2
])):
244 print ("inplace swap", jh
, jlh
)
245 tmp1
, tmp2
= ji
[jlh
], ji
[jh
]
246 ji
[jlh
], ji
[jh
] = tmp2
, tmp1
248 # new k_start point for cos tables( runs inside x_r loop NOT i loop)
252 # python "yield" can be iterated. use this to make it clear how
253 # the indices are generated by using natural-looking nested loops
254 def iterate_dct_outer_butterfly_indices(SVSHAPE
):
255 # get indices to iterate over, in the required order
257 mode
= SVSHAPE
.lims
[1]
258 # creating lists of indices to iterate over in each dimension
259 # has to be done dynamically, because it depends on the size
260 # first, the size-based loop (which can be done statically)
266 # invert order if requested
267 if SVSHAPE
.invxyz
[0]:
273 print ("outer butterfly", mode
, SVSHAPE
.skip
, "submode", SVSHAPE
.submode2
)
275 # I-DCT, reference (read/write) the in-place data in *reverse-bit-order*
277 if SVSHAPE
.submode2
in [0b11, 0b01]:
278 levels
= n
.bit_length() - 1
279 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
281 # reference list for not needing to do data-swaps, just swap what
282 # *indices* are referenced (two levels of indirection at the moment)
283 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
285 inplace_mode
= False # need the space... SVSHAPE.skip in [0b10, 0b11]
286 if SVSHAPE
.submode2
== 0b11:
287 ji
= halfrev2(ji
, False)
292 # start an infinite (wrapping) loop
296 for size
in x_r
: # loop over 3rd order dimension (size)
298 x_end
= size
== x_r
[-1]
299 y_r
= list(range(0, halfsize
))
300 print ("itersum", halfsize
, size
, y_r
, "invert", SVSHAPE
.invxyz
[1])
301 # invert if requested
302 if SVSHAPE
.invxyz
[1]: y_r
.reverse()
303 for i
in y_r
: # loop over 2nd order dimension
305 # one list to create iterative-sum schedule
306 jr
= list(range(i
+halfsize
, i
+n
-halfsize
, size
))
307 # invert if requested
308 if SVSHAPE
.invxyz
[2]: jr
.reverse()
309 print ("itersum jr", i
+halfsize
, i
+size
, jr
,
310 "invert", SVSHAPE
.invxyz
[2])
311 hz2
= halfsize
// 2 # zero stops reversing 1-item lists
313 for ci
, jh
in enumerate(jr
): # loop over 1st order dimension
315 #print (" itersum", size, i, jh, jh+size)
317 # COS table pre-generated mode
318 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
319 if SVSHAPE
.submode2
== 0b11: # iDCT
320 result
= ji
[ri
[jh
]] # upper half
322 result
= ri
[ji
[jh
]] # lower half
323 elif SVSHAPE
.skip
== 0b01: # in [0b01, 0b11]:
324 if SVSHAPE
.submode2
== 0b11: # iDCT
325 result
= ji
[ri
[jh
+size
]] # upper half
327 result
= ri
[ji
[jh
+size
]] # upper half
328 elif SVSHAPE
.skip
== 0b10: #
329 result
= k
# cos table offset
331 # COS table generated on-demand ("Vertical-First") mode
332 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
333 if SVSHAPE
.submode2
== 0b11: # iDCT
334 result
= ji
[ri
[jh
]] # lower half
336 result
= ri
[ji
[jh
]] # lower half
337 elif SVSHAPE
.skip
== 0b01: # in [0b01, 0b11]:
338 if SVSHAPE
.submode2
== 0b11: # iDCT
339 result
= ji
[ri
[jh
+size
]] # upper half
341 result
= ri
[ji
[jh
+size
]] # upper half
342 elif SVSHAPE
.skip
== 0b10: #
343 result
= ci
# coefficient helper
344 elif SVSHAPE
.skip
== 0b11: #
345 result
= size
# coefficient helper
347 ((y_end
and z_end
)<<1) |
348 ((y_end
and x_end
and z_end
)<<2))
350 yield result
+ SVSHAPE
.offset
, loopends
353 # now in-place swap (disabled)
354 if False and SVSHAPE
.submode2
== 0b11:
355 j
= list(range(i
, i
+ halfsize
))
356 jr
= list(range(i
+halfsize
, i
+ size
))
358 for ci
, (jl
, jh
) in enumerate(zip(j
[:hz2
], jr
[:hz2
])):
360 tmp1
, tmp2
= ji
[jlh
], ji
[jh
]
361 print ("inplace swap", jh
, jlh
, "actual", tmp1
, tmp2
)
362 ji
[jlh
], ji
[jh
] = tmp2
, tmp1
364 # new k_start point for cos tables( runs inside x_r loop NOT i loop)
368 def pprint_schedule(schedule
, n
):
373 tablestep
= n
// size
374 print ("size %d halfsize %d tablestep %d" % \
375 (size
, halfsize
, tablestep
))
376 for i
in range(0, n
, size
):
377 prefix
= "i %d\t" % i
378 for j
in range(i
, i
+ halfsize
):
379 (jl
, je
), (jh
, he
) = schedule
[idx
]
380 print (" %-3d\t%s j=%-2d jh=%-2d "
381 "j[jl=%-2d] j[jh=%-2d]" % \
382 (idx
, prefix
, j
, j
+halfsize
,
385 "end", bin(je
)[2:], bin(je
)[2:])
389 def pprint_schedule_outer(schedule
, n
):
394 tablestep
= n
// size
395 print ("size %d halfsize %d tablestep %d" % \
396 (size
, halfsize
, tablestep
))
397 y_r
= list(range(0, halfsize
))
399 prefix
= "i %d\t" % i
400 jr
= list(range(i
+halfsize
, i
+n
-halfsize
, size
))
402 (jl
, je
), (jh
, he
) = schedule
[idx
]
403 print (" %-3d\t%s j=%-2d jh=%-2d "
404 "j[jl=%-2d] j[jh=%-2d]" % \
405 (idx
, prefix
, j
, j
+halfsize
,
408 "end", bin(je
)[2:], bin(je
)[2:])
413 # totally cool *in-place* inverse DCT algorithm using yield REMAPs
414 def inverse_transform2(vec
):
421 print ("inverse_transform2", n
)
422 levels
= n
.bit_length() - 1
427 # divide element 0 by 2
430 # reference (read/write) the in-place data in *reverse-bit-order*
432 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
434 # pretend we LDed data in half-swapped *and* bit-reversed order as well
435 # TODO: merge these two
436 vec
= [vec
[ri
[i
]] for i
in range(n
)]
437 vec
= halfrev2(vec
, True)
439 print("inverse_transform2 post-alter", vec
)
441 # create a cos table: not strictly necessary but here for illustrative
442 # purposes, to demonstrate the point that it really *is* iterative.
443 # this table could be cached and used multiple times rather than
444 # computed every time.
449 for ci
in range(halfsize
):
450 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
452 print ("coeff", size
, "ci", ci
, "k", len(ctable
)-1,
453 "i/n", (ci
+0.5)/size
, coeff
)
464 SVSHAPE0
.lims
= [xdim
, 4, 0]
466 SVSHAPE0
.submode2
= 0b01
468 SVSHAPE0
.offset
= 0 # experiment with different offset, here
469 SVSHAPE0
.invxyz
= [1,0,1] # inversion if desired
472 SVSHAPE1
.lims
= [xdim
, 4, 0]
474 SVSHAPE1
.submode2
= 0b01
476 SVSHAPE1
.offset
= 0 # experiment with different offset, here
477 SVSHAPE1
.invxyz
= [1,0,1] # inversion if desired
480 SVSHAPE2
.lims
= [xdim
, 4, 0]
482 SVSHAPE2
.submode2
= 0b01
484 SVSHAPE2
.offset
= 0 # experiment with different offset, here
485 SVSHAPE2
.invxyz
= [1,0,1] # inversion if desired
487 # enumerate over the iterator function, getting new indices
488 i0
= iterate_dct_inner_costable_indices(SVSHAPE0
)
489 i1
= iterate_dct_inner_costable_indices(SVSHAPE1
)
490 i2
= iterate_dct_inner_costable_indices(SVSHAPE2
)
491 for ((ci
, cie
), (size
, sze
), (k
, ke
)) in \
493 print ("xform2 cos", ci
, size
, k
)
494 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
495 assert coeff
== ctable
[k
]
496 print ("coeff", size
, "ci", ci
, "k", k
,
497 "i/n", (ci
+0.5)/size
, coeff
,
498 "end", bin(cie
), bin(sze
), bin(ke
))
499 if cie
== 0b111: # all loops end
502 # now things are in the right order for the outer butterfly.
506 SVSHAPE0
.lims
= [xdim
, 0b0000010, 0]
507 SVSHAPE0
.submode2
= 0b11
510 SVSHAPE0
.offset
= 0 # experiment with different offset, here
511 SVSHAPE0
.invxyz
= [1,0,1] # inversion if desired
512 # j+halfstep schedule
514 SVSHAPE1
.lims
= [xdim
, 0b0000010, 0]
516 SVSHAPE1
.submode2
= 0b11
518 SVSHAPE1
.offset
= 0 # experiment with different offset, here
519 SVSHAPE1
.invxyz
= [1,0,1] # inversion if desired
521 # enumerate over the iterator function, getting new indices
522 i0
= iterate_dct_outer_butterfly_indices(SVSHAPE0
)
523 i1
= iterate_dct_outer_butterfly_indices(SVSHAPE1
)
524 for k
, ((jl
, jle
), (jh
, jhe
)) in enumerate(zip(i0
, i1
)):
525 print ("itersum jr", jl
, jh
,
526 "end", bin(jle
), bin(jhe
),
527 vec
[jl
], vec
[jh
], vec
[jh
]+vec
[jl
])
530 if jle
== 0b111: # all loops end
533 print("transform2 pre-itersum", vec
)
541 SVSHAPE0
.lims
= [xdim
, 0b000001, 0]
543 SVSHAPE0
.submode2
= 0b11
545 SVSHAPE0
.offset
= 0 # experiment with different offset, here
546 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
547 # j+halfstep schedule
549 SVSHAPE1
.lims
= [xdim
, 0b000001, 0]
551 SVSHAPE1
.submode2
= 0b11
553 SVSHAPE1
.offset
= 0 # experiment with different offset, here
554 SVSHAPE1
.invxyz
= [0,0,0] # inversion if desired
557 SVSHAPE2
.lims
= [xdim
, 0b000001, 0]
559 SVSHAPE2
.submode2
= 0b11
561 SVSHAPE2
.offset
= 0 # experiment with different offset, here
562 SVSHAPE2
.invxyz
= [0,0,0] # inversion if desired
565 SVSHAPE3
.lims
= [xdim
, 0b000001, 0]
567 SVSHAPE3
.submode2
= 0b11
569 SVSHAPE3
.offset
= 0 # experiment with different offset, here
570 SVSHAPE3
.invxyz
= [0,0,0] # inversion if desired
572 # enumerate over the iterator function, getting new indices
573 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
574 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
575 i2
= iterate_dct_inner_butterfly_indices(SVSHAPE2
)
576 i3
= iterate_dct_inner_butterfly_indices(SVSHAPE3
)
577 for k
, ((jl
, jle
), (jh
, jhe
), (ci
, cie
), (size
, sze
)) in \
578 enumerate(zip(i0
, i1
, i2
, i3
)):
579 t1
, t2
= vec
[jl
], vec
[jh
]
580 print ("xform2", jl
, jh
, ci
, size
)
581 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
582 #assert coeff == ctable[k]
583 vec
[jl
] = t1
+ t2
/coeff
584 vec
[jh
] = t1
- t2
/coeff
585 print ("coeff", size
, "ci", ci
,
587 "i/n", (ci
+0.5)/size
, coeff
, "t1/2", t1
, t2
,
588 "+/-", vec
[jl
], vec
[jh
],
589 "end", bin(jle
), bin(jhe
))
590 if jle
== 0b111: # all loops end
593 print("inverse_transform2 result", vec
)
598 # totally cool *in-place* DCT algorithm using yield REMAPs
604 print ("transform2", n
)
605 levels
= n
.bit_length() - 1
610 # reference (read/write) the in-place data in *reverse-bit-order*
612 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
614 # and pretend we LDed data in half-swapped *and* bit-reversed order as well
615 # TODO: merge these two
616 vec
= halfrev2(vec
, False)
617 vec
= [vec
[ri
[i
]] for i
in range(n
)]
619 # create a cos table: not strictly necessary but here for illustrative
620 # purposes, to demonstrate the point that it really *is* iterative.
621 # this table could be cached and used multiple times rather than
622 # computed every time.
627 for ci
in range(halfsize
):
628 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
630 print ("coeff", size
, "ci", ci
, "k", len(ctable
)-1,
631 "i/n", (ci
+0.5)/size
, coeff
)
639 SVSHAPE0
.lims
= [xdim
, 4, 0]
641 SVSHAPE0
.submode2
= 0b01
643 SVSHAPE0
.offset
= 0 # experiment with different offset, here
644 SVSHAPE0
.invxyz
= [1,0,0] # inversion if desired
647 SVSHAPE1
.lims
= [xdim
, 4, 0]
649 SVSHAPE1
.submode2
= 0b01
651 SVSHAPE1
.offset
= 0 # experiment with different offset, here
652 SVSHAPE1
.invxyz
= [1,0,0] # inversion if desired
655 SVSHAPE2
.lims
= [xdim
, 4, 0]
657 SVSHAPE2
.submode2
= 0b01
659 SVSHAPE2
.offset
= 0 # experiment with different offset, here
660 SVSHAPE2
.invxyz
= [1,0,0] # inversion if desired
662 # enumerate over the iterator function, getting new indices
663 i0
= iterate_dct_inner_costable_indices(SVSHAPE0
)
664 i1
= iterate_dct_inner_costable_indices(SVSHAPE1
)
665 i2
= iterate_dct_inner_costable_indices(SVSHAPE2
)
666 for ((ci
, cie
), (size
, sze
), (k
, ke
)) in \
668 print ("xform2 cos", ci
, size
, k
)
669 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
670 assert coeff
== ctable
[k
]
671 print ("coeff", size
, "ci", ci
, "k", k
,
672 "i/n", (ci
+0.5)/size
, coeff
,
673 "end", bin(cie
), bin(sze
), bin(ke
))
674 if cie
== 0b111: # all loops end
683 SVSHAPE0
.lims
= [xdim
, 0b000001, 0]
685 SVSHAPE0
.submode2
= 0b01
687 SVSHAPE0
.offset
= 0 # experiment with different offset, here
688 SVSHAPE0
.invxyz
= [1,0,0] # inversion if desired
689 # j+halfstep schedule
691 SVSHAPE1
.lims
= [xdim
, 0b000001, 0]
693 SVSHAPE1
.submode2
= 0b01
695 SVSHAPE1
.offset
= 0 # experiment with different offset, here
696 SVSHAPE1
.invxyz
= [1,0,0] # inversion if desired
699 SVSHAPE2
.lims
= [xdim
, 0b000001, 0]
701 SVSHAPE2
.submode2
= 0b01
703 SVSHAPE2
.offset
= 0 # experiment with different offset, here
704 SVSHAPE2
.invxyz
= [1,0,0] # inversion if desired
707 SVSHAPE3
.lims
= [xdim
, 0b000001, 0]
709 SVSHAPE3
.submode2
= 0b01
711 SVSHAPE3
.offset
= 0 # experiment with different offset, here
712 SVSHAPE3
.invxyz
= [1,0,0] # inversion if desired
714 # enumerate over the iterator function, getting new indices
715 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
716 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
717 i2
= iterate_dct_inner_butterfly_indices(SVSHAPE2
)
718 i3
= iterate_dct_inner_butterfly_indices(SVSHAPE3
)
719 for k
, ((jl
, jle
), (jh
, jhe
), (ci
, cie
), (size
, sze
)) in \
720 enumerate(zip(i0
, i1
, i2
, i3
)):
721 t1
, t2
= vec
[jl
], vec
[jh
]
722 print ("xform2", jl
, jh
, ci
, size
)
723 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
724 #assert coeff == ctable[k]
726 vec
[jh
] = (t1
- t2
) * (1/coeff
)
727 print ("coeff", size
, "ci", ci
,
729 "i/n", (ci
+0.5)/size
, coeff
, vec
[jl
],
731 "end", bin(jle
), bin(jhe
))
732 if jle
== 0b111: # all loops end
735 print("transform2 pre-itersum", vec
)
737 # now things are in the right order for the outer butterfly.
741 SVSHAPE0
.lims
= [xdim
, 0b0000010, 0]
742 SVSHAPE0
.submode2
= 0b100
745 SVSHAPE0
.offset
= 0 # experiment with different offset, here
746 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
747 # j+halfstep schedule
749 SVSHAPE1
.lims
= [xdim
, 0b0000010, 0]
751 SVSHAPE1
.submode2
= 0b100
753 SVSHAPE1
.offset
= 0 # experiment with different offset, here
754 SVSHAPE1
.invxyz
= [0,0,0] # inversion if desired
756 # enumerate over the iterator function, getting new indices
757 i0
= iterate_dct_outer_butterfly_indices(SVSHAPE0
)
758 i1
= iterate_dct_outer_butterfly_indices(SVSHAPE1
)
759 for k
, ((jl
, jle
), (jh
, jhe
)) in enumerate(zip(i0
, i1
)):
760 print ("itersum jr", jl
, jh
,
761 "end", bin(jle
), bin(jhe
))
764 if jle
== 0b111: # all loops end
767 print("transform2 result", vec
)
773 # set the dimension sizes here
776 ydim
= 0 # not needed
777 zdim
= 0 # again, not needed
789 SVSHAPE0
.lims
= [xdim
, 0b0000010, 0]
790 SVSHAPE0
.submode2
= 0b11
793 SVSHAPE0
.offset
= 0 # experiment with different offset, here
794 SVSHAPE0
.invxyz
= [1,0,1] # inversion if desired
795 # j+halfstep schedule
797 SVSHAPE1
.lims
= [xdim
, 0b0000010, 0]
799 SVSHAPE1
.submode2
= 0b11
801 SVSHAPE1
.offset
= 0 # experiment with different offset, here
802 SVSHAPE1
.invxyz
= [1,0,1] # inversion if desired
804 # enumerate over the iterator function, getting new indices
806 i0
= iterate_dct_outer_butterfly_indices(SVSHAPE0
)
807 i1
= iterate_dct_outer_butterfly_indices(SVSHAPE1
)
808 for idx
, (jl
, jh
) in enumerate(zip(i0
, i1
)):
809 schedule
.append((jl
, jh
))
810 if jl
[1] == 0b111: # end
813 # ok now pretty-print the results, with some debug output
814 print ("outer i-dct butterfly")
815 pprint_schedule_outer(schedule
, n
)
823 SVSHAPE0
.lims
= [xdim
, 0b000001, 0]
825 SVSHAPE0
.submode2
= 0b11
827 SVSHAPE0
.offset
= 0 # experiment with different offset, here
828 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
829 # j+halfstep schedule
831 SVSHAPE1
.lims
= [xdim
, 0b000001, 0]
833 SVSHAPE1
.submode2
= 0b11
835 SVSHAPE1
.offset
= 0 # experiment with different offset, here
836 SVSHAPE1
.invxyz
= [0,0,0] # inversion if desired
838 # enumerate over the iterator function, getting new indices
840 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
841 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
842 for idx
, (jl
, jh
) in enumerate(zip(i0
, i1
)):
843 schedule
.append((jl
, jh
))
844 if jl
[1] == 0b111: # end
847 # ok now pretty-print the results, with some debug output
848 print ("inner butterfly")
849 pprint_schedule(schedule
, n
)
854 # for DCT half-swap LDs
857 SVSHAPE0
.lims
= [xdim
, 0b000101, zdim
]
859 SVSHAPE0
.submode2
= 0b01
861 SVSHAPE0
.offset
= 0 # experiment with different offset, here
862 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
865 levels
= n
.bit_length() - 1
867 ri
= [reverse_bits(i
, levels
) for i
in range(n
)]
868 av
= halfrev2(avi
, False)
869 av
= [av
[ri
[i
]] for i
in range(n
)]
871 i0
= iterate_dct_inner_halfswap_loadstore(SVSHAPE0
)
872 for idx
, (jl
) in enumerate(i0
):
873 print ("inverse half-swap ld", idx
, jl
, av
[idx
])
874 if jl
[1] == 0b111: # end
879 # set the dimension sizes here
882 ydim
= 0 # not needed
883 zdim
= 0 # again, not needed
895 SVSHAPE0
.lims
= [xdim
, 0b000001, zdim
]
896 SVSHAPE0
.submode2
= 0b010
899 SVSHAPE0
.offset
= 0 # experiment with different offset, here
900 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
901 # j+halfstep schedule
903 SVSHAPE1
.lims
= [xdim
, 0b000001, zdim
]
904 SVSHAPE1
.submode2
= 0b010
907 SVSHAPE1
.offset
= 0 # experiment with different offset, here
908 SVSHAPE1
.invxyz
= [0,0,0] # inversion if desired
910 # enumerate over the iterator function, getting new indices
912 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
913 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
914 for idx
, (jl
, jh
) in enumerate(zip(i0
, i1
)):
915 schedule
.append((jl
, jh
))
916 if jl
[1] == 0b111: # end
919 # ok now pretty-print the results, with some debug output
920 print ("inner butterfly")
921 pprint_schedule(schedule
, n
)
930 SVSHAPE0
.lims
= [xdim
, 0b000010, zdim
]
932 SVSHAPE0
.submode2
= 0b100
934 SVSHAPE0
.offset
= 0 # experiment with different offset, here
935 SVSHAPE0
.invxyz
= [1,0,0] # inversion if desired
936 # j+halfstep schedule
938 SVSHAPE1
.lims
= [xdim
, 0b000010, zdim
]
940 SVSHAPE1
.submode2
= 0b100
942 SVSHAPE1
.offset
= 0 # experiment with different offset, here
943 SVSHAPE1
.invxyz
= [1,0,0] # inversion if desired
945 # enumerate over the iterator function, getting new indices
947 i0
= iterate_dct_outer_butterfly_indices(SVSHAPE0
)
948 i1
= iterate_dct_outer_butterfly_indices(SVSHAPE1
)
949 for idx
, (jl
, jh
) in enumerate(zip(i0
, i1
)):
950 schedule
.append((jl
, jh
))
951 if jl
[1] == 0b111: # end
954 # ok now pretty-print the results, with some debug output
955 print ("outer butterfly")
956 pprint_schedule_outer(schedule
, n
)
958 # for DCT half-swap LDs
961 SVSHAPE0
.lims
= [xdim
, 0b000101, zdim
]
963 SVSHAPE0
.submode2
= 0
965 SVSHAPE0
.offset
= 0 # experiment with different offset, here
966 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
969 levels
= n
.bit_length() - 1
971 ri
= [reverse_bits(i
, levels
) for i
in range(n
)]
972 av
= halfrev2(avi
, False)
973 av
= [av
[ri
[i
]] for i
in range(n
)]
976 i0
= iterate_dct_inner_halfswap_loadstore(SVSHAPE0
)
977 for idx
, (jl
) in enumerate(i0
):
978 print ("inverse half-swap ld", idx
, jl
, av
[idx
])
979 if jl
[1] == 0b111: # end
984 if __name__
== '__main__':