1 # DCT "REMAP" scheduler
3 # Modifications made to create an in-place iterative DCT:
4 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
8 # Original fastdctlee.py by Nayuki:
9 # Copyright (c) 2020 Project Nayuki. (MIT License)
10 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
12 from copy
import deepcopy
15 # bits of the integer 'val'.
16 def reverse_bits(val
, width
):
18 for _
in range(width
):
19 result
= (result
<< 1) |
(val
& 1)
24 # iterative version of [recursively-applied] half-rev.
25 # relies on the list lengths being power-of-two and the fact
26 # that bit-inversion of a list of binary numbers is the same
27 # as reversing the order of the list
28 # this version is dead easy to implement in hardware.
29 # a big surprise is that the half-reversal can be done with
30 # such a simple XOR. the inverse operation is slightly trickier
31 def halfrev2(vec
, pre_rev
=True):
33 for i
in range(len(vec
)):
35 res
.append(vec
[i ^
(i
>>1)])
39 for ji
in range(1, bl
):
45 # python "yield" can be iterated. use this to make it clear how
46 # the indices are generated by using natural-looking nested loops
47 def iterate_dct_inner_halfswap_loadstore(SVSHAPE
):
48 # get indices to iterate over, in the required order
50 mode
= SVSHAPE
.lims
[1]
51 stride
= SVSHAPE
.lims
[2]
52 print ("inner halfswap loadstore", n
, mode
, SVSHAPE
.skip
,
53 "submode", SVSHAPE
.submode2
, "stride", stride
)
55 # reference list for not needing to do data-swaps, just swap what
56 # *indices* are referenced (two levels of indirection at the moment)
57 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
60 levels
= n
.bit_length() - 1
61 ri
= [reverse_bits(i
, levels
) for i
in range(n
)]
63 if SVSHAPE
.mode
== 0b01: # FFT, bitrev only
64 ji
= [ji
[ri
[i
]] for i
in range(n
)]
65 elif SVSHAPE
.submode2
== 0b001:
66 ji
= [ji
[ri
[i
]] for i
in range(n
)]
67 ji
= halfrev2(ji
, True)
69 ji
= halfrev2(ji
, False)
70 ji
= [ji
[ri
[i
]] for i
in range(n
)]
72 # invert order if requested
76 for i
, jl
in enumerate(ji
):
78 yield jl
* stride
, (0b111 if y_end
else 0b000)
81 # python "yield" can be iterated. use this to make it clear how
82 # the indices are generated by using natural-looking nested loops
83 def iterate_dct_inner_costable_indices(SVSHAPE
):
84 # get indices to iterate over, in the required order
86 mode
= SVSHAPE
.lims
[1]
87 stride
= SVSHAPE
.lims
[2]
88 print ("inner costable", mode
, "stride", stride
)
89 # creating lists of indices to iterate over in each dimension
90 # has to be done dynamically, because it depends on the size
91 # first, the size-based loop (which can be done statically)
97 # invert order if requested
107 # start an infinite (wrapping) loop
109 z_end
= 1 # doesn't exist in this, only 2 loops
112 for size
in x_r
: # loop over 3rd order dimension (size)
113 x_end
= size
== x_r
[-1]
114 # y_r schedule depends on size
117 for i
in range(0, n
, size
):
119 # invert if requested
120 if SVSHAPE
.invxyz
[1]: y_r
.reverse()
121 # two lists of half-range indices, e.g. j 0123, jr 7654
122 j
= list(range(0, halfsize
))
123 # invert if requested
124 if SVSHAPE
.invxyz
[2]: j_r
.reverse()
125 #print ("xform jr", jr)
126 # loop over 1st order dimension
127 for ci
, jl
in enumerate(j
):
129 # now depending on MODE return the index. inner butterfly
130 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
131 result
= k
# offset into COS table
132 elif SVSHAPE
.skip
== 0b10: #
133 result
= ci
# coefficient helper
134 elif SVSHAPE
.skip
== 0b11: #
135 result
= size
# coefficient helper
137 ((y_end
and z_end
)<<1) |
138 ((y_end
and x_end
and z_end
)<<2))
140 yield (result
* stride
) + SVSHAPE
.offset
, loopends
143 # python "yield" can be iterated. use this to make it clear how
144 # the indices are generated by using natural-looking nested loops
145 def iterate_dct_inner_butterfly_indices(SVSHAPE
):
146 # get indices to iterate over, in the required order
148 mode
= SVSHAPE
.lims
[1]
149 stride
= SVSHAPE
.lims
[2]
150 print ("inner butterfly", mode
, SVSHAPE
.skip
,
151 "submode", SVSHAPE
.submode2
, "stride", stride
)
152 # creating lists of indices to iterate over in each dimension
153 # has to be done dynamically, because it depends on the size
154 # first, the size-based loop (which can be done statically)
160 # invert order if requested
161 if SVSHAPE
.invxyz
[0]:
167 # reference (read/write) the in-place data in *reverse-bit-order*
169 if SVSHAPE
.submode2
== 0b01:
170 levels
= n
.bit_length() - 1
171 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
173 # reference list for not needing to do data-swaps, just swap what
174 # *indices* are referenced (two levels of indirection at the moment)
175 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
178 if inplace_mode
and SVSHAPE
.submode2
== 0b01:
179 #print ("inplace mode")
180 ji
= halfrev2(ji
, True)
181 if inplace_mode
and SVSHAPE
.submode2
== 0b11:
182 ji
= halfrev2(ji
, False)
187 # start an infinite (wrapping) loop
191 for size
in x_r
: # loop over 3rd order dimension (size)
192 x_end
= size
== x_r
[-1]
193 # y_r schedule depends on size
196 for i
in range(0, n
, size
):
198 # invert if requested
199 if SVSHAPE
.invxyz
[1]: y_r
.reverse()
200 for i
in y_r
: # loop over 2nd order dimension
202 # two lists of half-range indices, e.g. j 0123, jr 7654
203 j
= list(range(i
, i
+ halfsize
))
204 jr
= list(range(i
+halfsize
, i
+ size
))
206 # invert if requested
207 if SVSHAPE
.invxyz
[2]:
210 hz2
= halfsize
// 2 # zero stops reversing 1-item lists
211 #print ("xform jr", jr)
212 # loop over 1st order dimension
214 for ci
, (jl
, jh
) in enumerate(zip(j
, jr
)):
216 # now depending on MODE return the index. inner butterfly
217 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
218 if SVSHAPE
.submode2
== 0b11: # iDCT
219 result
= ji
[ri
[jl
]] # lower half
221 result
= ri
[ji
[jl
]] # lower half
222 elif SVSHAPE
.skip
== 0b01: # in [0b01, 0b11]:
223 if SVSHAPE
.submode2
== 0b11: # iDCT
224 result
= ji
[ri
[jl
+halfsize
]] # upper half
226 result
= ri
[ji
[jh
]] # upper half
228 # COS table pre-generated mode
229 if SVSHAPE
.skip
== 0b10: #
230 result
= k
# cos table offset
232 # COS table generated on-demand ("Vertical-First") mode
233 if SVSHAPE
.skip
== 0b10: #
234 result
= ci
# coefficient helper
235 elif SVSHAPE
.skip
== 0b11: #
236 result
= size
# coefficient helper
238 ((y_end
and z_end
)<<1) |
239 ((y_end
and x_end
and z_end
)<<2))
241 yield (result
* stride
) + SVSHAPE
.offset
, loopends
246 for ci
, (jl
, jh
) in enumerate(zip(j
[:hz2
], jr
[:hz2
])):
248 print ("inplace swap", jh
, jlh
)
249 tmp1
, tmp2
= ji
[jlh
], ji
[jh
]
250 ji
[jlh
], ji
[jh
] = tmp2
, tmp1
252 # new k_start point for cos tables( runs inside x_r loop NOT i loop)
256 # python "yield" can be iterated. use this to make it clear how
257 # the indices are generated by using natural-looking nested loops
258 def iterate_dct_outer_butterfly_indices(SVSHAPE
):
259 # get indices to iterate over, in the required order
261 mode
= SVSHAPE
.lims
[1]
262 stride
= SVSHAPE
.lims
[2]
263 # creating lists of indices to iterate over in each dimension
264 # has to be done dynamically, because it depends on the size
265 # first, the size-based loop (which can be done statically)
271 # invert order if requested
272 if SVSHAPE
.invxyz
[0]:
278 print ("outer butterfly", mode
, SVSHAPE
.skip
,
279 "submode", SVSHAPE
.submode2
,
282 # I-DCT, reference (read/write) the in-place data in *reverse-bit-order*
284 if SVSHAPE
.submode2
in [0b11, 0b01]:
285 levels
= n
.bit_length() - 1
286 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
288 # reference list for not needing to do data-swaps, just swap what
289 # *indices* are referenced (two levels of indirection at the moment)
290 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
292 inplace_mode
= False # need the space... SVSHAPE.skip in [0b10, 0b11]
293 if SVSHAPE
.submode2
== 0b11:
294 ji
= halfrev2(ji
, False)
299 # start an infinite (wrapping) loop
303 for size
in x_r
: # loop over 3rd order dimension (size)
305 x_end
= size
== x_r
[-1]
306 y_r
= list(range(0, halfsize
))
307 print ("itersum", halfsize
, size
, y_r
, "invert", SVSHAPE
.invxyz
[1])
308 # invert if requested
309 if SVSHAPE
.invxyz
[1]: y_r
.reverse()
310 for i
in y_r
: # loop over 2nd order dimension
312 # one list to create iterative-sum schedule
313 jr
= list(range(i
+halfsize
, i
+n
-halfsize
, size
))
314 # invert if requested
315 if SVSHAPE
.invxyz
[2]: jr
.reverse()
316 print ("itersum jr", i
+halfsize
, i
+size
, jr
,
317 "invert", SVSHAPE
.invxyz
[2])
318 hz2
= halfsize
// 2 # zero stops reversing 1-item lists
320 for ci
, jh
in enumerate(jr
): # loop over 1st order dimension
322 #print (" itersum", size, i, jh, jh+size)
324 # COS table pre-generated mode
325 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
326 if SVSHAPE
.submode2
== 0b11: # iDCT
327 result
= ji
[ri
[jh
]] # upper half
329 result
= ri
[ji
[jh
]] # lower half
330 elif SVSHAPE
.skip
== 0b01: # in [0b01, 0b11]:
331 if SVSHAPE
.submode2
== 0b11: # iDCT
332 result
= ji
[ri
[jh
+size
]] # upper half
334 result
= ri
[ji
[jh
+size
]] # upper half
335 elif SVSHAPE
.skip
== 0b10: #
336 result
= k
# cos table offset
338 # COS table generated on-demand ("Vertical-First") mode
339 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
340 if SVSHAPE
.submode2
== 0b11: # iDCT
341 result
= ji
[ri
[jh
]] # lower half
343 result
= ri
[ji
[jh
]] # lower half
344 elif SVSHAPE
.skip
== 0b01: # in [0b01, 0b11]:
345 if SVSHAPE
.submode2
== 0b11: # iDCT
346 result
= ji
[ri
[jh
+size
]] # upper half
348 result
= ri
[ji
[jh
+size
]] # upper half
349 elif SVSHAPE
.skip
== 0b10: #
350 result
= ci
# coefficient helper
351 elif SVSHAPE
.skip
== 0b11: #
352 result
= size
# coefficient helper
354 ((y_end
and z_end
)<<1) |
355 ((y_end
and x_end
and z_end
)<<2))
357 yield (result
* stride
) + SVSHAPE
.offset
, loopends
360 # now in-place swap (disabled)
361 if False and SVSHAPE
.submode2
== 0b11:
362 j
= list(range(i
, i
+ halfsize
))
363 jr
= list(range(i
+halfsize
, i
+ size
))
365 for ci
, (jl
, jh
) in enumerate(zip(j
[:hz2
], jr
[:hz2
])):
367 tmp1
, tmp2
= ji
[jlh
], ji
[jh
]
368 print ("inplace swap", jh
, jlh
, "actual", tmp1
, tmp2
)
369 ji
[jlh
], ji
[jh
] = tmp2
, tmp1
371 # new k_start point for cos tables( runs inside x_r loop NOT i loop)
375 def pprint_schedule(schedule
, n
):
380 tablestep
= n
// size
381 print ("size %d halfsize %d tablestep %d" % \
382 (size
, halfsize
, tablestep
))
383 for i
in range(0, n
, size
):
384 prefix
= "i %d\t" % i
385 for j
in range(i
, i
+ halfsize
):
386 (jl
, je
), (jh
, he
) = schedule
[idx
]
387 print (" %-3d\t%s j=%-2d jh=%-2d "
388 "j[jl=%-2d] j[jh=%-2d]" % \
389 (idx
, prefix
, j
, j
+halfsize
,
392 "end", bin(je
)[2:], bin(je
)[2:])
396 def pprint_schedule_outer(schedule
, n
):
401 tablestep
= n
// size
402 print ("size %d halfsize %d tablestep %d" % \
403 (size
, halfsize
, tablestep
))
404 y_r
= list(range(0, halfsize
))
406 prefix
= "i %d\t" % i
407 jr
= list(range(i
+halfsize
, i
+n
-halfsize
, size
))
409 (jl
, je
), (jh
, he
) = schedule
[idx
]
410 print (" %-3d\t%s j=%-2d jh=%-2d "
411 "j[jl=%-2d] j[jh=%-2d]" % \
412 (idx
, prefix
, j
, j
+halfsize
,
415 "end", bin(je
)[2:], bin(je
)[2:])
420 # totally cool *in-place* inverse DCT algorithm using yield REMAPs
421 def inverse_transform2(vec
):
428 print ("inverse_transform2", n
)
429 levels
= n
.bit_length() - 1
434 # divide element 0 by 2
437 # reference (read/write) the in-place data in *reverse-bit-order*
439 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
441 # pretend we LDed data in half-swapped *and* bit-reversed order as well
442 # TODO: merge these two
443 vec
= [vec
[ri
[i
]] for i
in range(n
)]
444 vec
= halfrev2(vec
, True)
446 print("inverse_transform2 post-alter", vec
)
448 # create a cos table: not strictly necessary but here for illustrative
449 # purposes, to demonstrate the point that it really *is* iterative.
450 # this table could be cached and used multiple times rather than
451 # computed every time.
456 for ci
in range(halfsize
):
457 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
459 print ("coeff", size
, "ci", ci
, "k", len(ctable
)-1,
460 "i/n", (ci
+0.5)/size
, coeff
)
471 SVSHAPE0
.lims
= [xdim
, 4, 1]
473 SVSHAPE0
.submode2
= 0b01
475 SVSHAPE0
.offset
= 0 # experiment with different offset, here
476 SVSHAPE0
.invxyz
= [1,0,1] # inversion if desired
479 SVSHAPE1
.lims
= [xdim
, 4, 1]
481 SVSHAPE1
.submode2
= 0b01
483 SVSHAPE1
.offset
= 0 # experiment with different offset, here
484 SVSHAPE1
.invxyz
= [1,0,1] # inversion if desired
487 SVSHAPE2
.lims
= [xdim
, 4, 1]
489 SVSHAPE2
.submode2
= 0b01
491 SVSHAPE2
.offset
= 0 # experiment with different offset, here
492 SVSHAPE2
.invxyz
= [1,0,1] # inversion if desired
494 # enumerate over the iterator function, getting new indices
495 i0
= iterate_dct_inner_costable_indices(SVSHAPE0
)
496 i1
= iterate_dct_inner_costable_indices(SVSHAPE1
)
497 i2
= iterate_dct_inner_costable_indices(SVSHAPE2
)
498 for ((ci
, cie
), (size
, sze
), (k
, ke
)) in \
500 print ("xform2 cos", ci
, size
, k
)
501 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
502 assert coeff
== ctable
[k
]
503 print ("coeff", size
, "ci", ci
, "k", k
,
504 "i/n", (ci
+0.5)/size
, coeff
,
505 "end", bin(cie
), bin(sze
), bin(ke
))
506 if cie
== 0b111: # all loops end
509 # now things are in the right order for the outer butterfly.
513 SVSHAPE0
.lims
= [xdim
, 0b0000010, 1]
514 SVSHAPE0
.submode2
= 0b11
517 SVSHAPE0
.offset
= 0 # experiment with different offset, here
518 SVSHAPE0
.invxyz
= [1,0,1] # inversion if desired
519 # j+halfstep schedule
521 SVSHAPE1
.lims
= [xdim
, 0b0000010, 1]
523 SVSHAPE1
.submode2
= 0b11
525 SVSHAPE1
.offset
= 0 # experiment with different offset, here
526 SVSHAPE1
.invxyz
= [1,0,1] # inversion if desired
528 # enumerate over the iterator function, getting new indices
529 i0
= iterate_dct_outer_butterfly_indices(SVSHAPE0
)
530 i1
= iterate_dct_outer_butterfly_indices(SVSHAPE1
)
531 for k
, ((jl
, jle
), (jh
, jhe
)) in enumerate(zip(i0
, i1
)):
532 print ("itersum jr", jl
, jh
,
533 "end", bin(jle
), bin(jhe
),
534 vec
[jl
], vec
[jh
], vec
[jh
]+vec
[jl
])
537 if jle
== 0b111: # all loops end
540 print("transform2 pre-itersum", vec
)
548 SVSHAPE0
.lims
= [xdim
, 0b000001, 1]
550 SVSHAPE0
.submode2
= 0b11
552 SVSHAPE0
.offset
= 0 # experiment with different offset, here
553 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
554 # j+halfstep schedule
556 SVSHAPE1
.lims
= [xdim
, 0b000001, 1]
558 SVSHAPE1
.submode2
= 0b11
560 SVSHAPE1
.offset
= 0 # experiment with different offset, here
561 SVSHAPE1
.invxyz
= [0,0,0] # inversion if desired
564 SVSHAPE2
.lims
= [xdim
, 0b000001, 1]
566 SVSHAPE2
.submode2
= 0b11
568 SVSHAPE2
.offset
= 0 # experiment with different offset, here
569 SVSHAPE2
.invxyz
= [0,0,0] # inversion if desired
572 SVSHAPE3
.lims
= [xdim
, 0b000001, 1]
574 SVSHAPE3
.submode2
= 0b11
576 SVSHAPE3
.offset
= 0 # experiment with different offset, here
577 SVSHAPE3
.invxyz
= [0,0,0] # inversion if desired
579 # enumerate over the iterator function, getting new indices
580 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
581 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
582 i2
= iterate_dct_inner_butterfly_indices(SVSHAPE2
)
583 i3
= iterate_dct_inner_butterfly_indices(SVSHAPE3
)
584 for k
, ((jl
, jle
), (jh
, jhe
), (ci
, cie
), (size
, sze
)) in \
585 enumerate(zip(i0
, i1
, i2
, i3
)):
586 t1
, t2
= vec
[jl
], vec
[jh
]
587 print ("xform2", jl
, jh
, ci
, size
)
588 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
589 #assert coeff == ctable[k]
590 vec
[jl
] = t1
+ t2
/coeff
591 vec
[jh
] = t1
- t2
/coeff
592 print ("coeff", size
, "ci", ci
,
594 "i/n", (ci
+0.5)/size
, coeff
, "t1/2", t1
, t2
,
595 "+/-", vec
[jl
], vec
[jh
],
596 "end", bin(jle
), bin(jhe
))
597 if jle
== 0b111: # all loops end
600 print("inverse_transform2 result", vec
)
605 # totally cool *in-place* DCT algorithm using yield REMAPs
611 print ("transform2", n
)
612 levels
= n
.bit_length() - 1
617 # reference (read/write) the in-place data in *reverse-bit-order*
619 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
621 # and pretend we LDed data in half-swapped *and* bit-reversed order as well
622 # TODO: merge these two
623 vec
= halfrev2(vec
, False)
624 vec
= [vec
[ri
[i
]] for i
in range(n
)]
626 # create a cos table: not strictly necessary but here for illustrative
627 # purposes, to demonstrate the point that it really *is* iterative.
628 # this table could be cached and used multiple times rather than
629 # computed every time.
634 for ci
in range(halfsize
):
635 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
637 print ("coeff", size
, "ci", ci
, "k", len(ctable
)-1,
638 "i/n", (ci
+0.5)/size
, coeff
)
646 SVSHAPE0
.lims
= [xdim
, 4, 1]
648 SVSHAPE0
.submode2
= 0b01
650 SVSHAPE0
.offset
= 0 # experiment with different offset, here
651 SVSHAPE0
.invxyz
= [1,0,0] # inversion if desired
654 SVSHAPE1
.lims
= [xdim
, 4, 1]
656 SVSHAPE1
.submode2
= 0b01
658 SVSHAPE1
.offset
= 0 # experiment with different offset, here
659 SVSHAPE1
.invxyz
= [1,0,0] # inversion if desired
662 SVSHAPE2
.lims
= [xdim
, 4, 1]
664 SVSHAPE2
.submode2
= 0b01
666 SVSHAPE2
.offset
= 0 # experiment with different offset, here
667 SVSHAPE2
.invxyz
= [1,0,0] # inversion if desired
669 # enumerate over the iterator function, getting new indices
670 i0
= iterate_dct_inner_costable_indices(SVSHAPE0
)
671 i1
= iterate_dct_inner_costable_indices(SVSHAPE1
)
672 i2
= iterate_dct_inner_costable_indices(SVSHAPE2
)
673 for ((ci
, cie
), (size
, sze
), (k
, ke
)) in \
675 print ("xform2 cos", ci
, size
, k
)
676 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
677 assert coeff
== ctable
[k
]
678 print ("coeff", size
, "ci", ci
, "k", k
,
679 "i/n", (ci
+0.5)/size
, coeff
,
680 "end", bin(cie
), bin(sze
), bin(ke
))
681 if cie
== 0b111: # all loops end
690 SVSHAPE0
.lims
= [xdim
, 0b000001, 1]
692 SVSHAPE0
.submode2
= 0b01
694 SVSHAPE0
.offset
= 0 # experiment with different offset, here
695 SVSHAPE0
.invxyz
= [1,0,0] # inversion if desired
696 # j+halfstep schedule
698 SVSHAPE1
.lims
= [xdim
, 0b000001, 1]
700 SVSHAPE1
.submode2
= 0b01
702 SVSHAPE1
.offset
= 0 # experiment with different offset, here
703 SVSHAPE1
.invxyz
= [1,0,0] # inversion if desired
706 SVSHAPE2
.lims
= [xdim
, 0b000001, 1]
708 SVSHAPE2
.submode2
= 0b01
710 SVSHAPE2
.offset
= 0 # experiment with different offset, here
711 SVSHAPE2
.invxyz
= [1,0,0] # inversion if desired
714 SVSHAPE3
.lims
= [xdim
, 0b000001, 1]
716 SVSHAPE3
.submode2
= 0b01
718 SVSHAPE3
.offset
= 0 # experiment with different offset, here
719 SVSHAPE3
.invxyz
= [1,0,0] # inversion if desired
721 # enumerate over the iterator function, getting new indices
722 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
723 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
724 i2
= iterate_dct_inner_butterfly_indices(SVSHAPE2
)
725 i3
= iterate_dct_inner_butterfly_indices(SVSHAPE3
)
726 for k
, ((jl
, jle
), (jh
, jhe
), (ci
, cie
), (size
, sze
)) in \
727 enumerate(zip(i0
, i1
, i2
, i3
)):
728 t1
, t2
= vec
[jl
], vec
[jh
]
729 print ("xform2", jl
, jh
, ci
, size
)
730 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
731 #assert coeff == ctable[k]
733 vec
[jh
] = (t1
- t2
) * (1/coeff
)
734 print ("coeff", size
, "ci", ci
,
736 "i/n", (ci
+0.5)/size
, coeff
, vec
[jl
],
738 "end", bin(jle
), bin(jhe
))
739 if jle
== 0b111: # all loops end
742 print("transform2 pre-itersum", vec
)
744 # now things are in the right order for the outer butterfly.
748 SVSHAPE0
.lims
= [xdim
, 0b0000010, 1]
749 SVSHAPE0
.submode2
= 0b100
752 SVSHAPE0
.offset
= 0 # experiment with different offset, here
753 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
754 # j+halfstep schedule
756 SVSHAPE1
.lims
= [xdim
, 0b0000010, 1]
758 SVSHAPE1
.submode2
= 0b100
760 SVSHAPE1
.offset
= 0 # experiment with different offset, here
761 SVSHAPE1
.invxyz
= [0,0,0] # inversion if desired
763 # enumerate over the iterator function, getting new indices
764 i0
= iterate_dct_outer_butterfly_indices(SVSHAPE0
)
765 i1
= iterate_dct_outer_butterfly_indices(SVSHAPE1
)
766 for k
, ((jl
, jle
), (jh
, jhe
)) in enumerate(zip(i0
, i1
)):
767 print ("itersum jr", jl
, jh
,
768 "end", bin(jle
), bin(jhe
))
771 if jle
== 0b111: # all loops end
774 print("transform2 result", vec
)
780 # set the dimension sizes here
783 ydim
= 0 # not needed
784 zdim
= 1 # again, not needed
796 SVSHAPE0
.lims
= [xdim
, 0b0000010, 1]
797 SVSHAPE0
.submode2
= 0b11
800 SVSHAPE0
.offset
= 0 # experiment with different offset, here
801 SVSHAPE0
.invxyz
= [1,0,1] # inversion if desired
802 # j+halfstep schedule
804 SVSHAPE1
.lims
= [xdim
, 0b0000010, 1]
806 SVSHAPE1
.submode2
= 0b11
808 SVSHAPE1
.offset
= 0 # experiment with different offset, here
809 SVSHAPE1
.invxyz
= [1,0,1] # inversion if desired
811 # enumerate over the iterator function, getting new indices
813 i0
= iterate_dct_outer_butterfly_indices(SVSHAPE0
)
814 i1
= iterate_dct_outer_butterfly_indices(SVSHAPE1
)
815 for idx
, (jl
, jh
) in enumerate(zip(i0
, i1
)):
816 schedule
.append((jl
, jh
))
817 if jl
[1] == 0b111: # end
820 # ok now pretty-print the results, with some debug output
821 print ("outer i-dct butterfly")
822 pprint_schedule_outer(schedule
, n
)
830 SVSHAPE0
.lims
= [xdim
, 0b000001, 1]
832 SVSHAPE0
.submode2
= 0b11
834 SVSHAPE0
.offset
= 0 # experiment with different offset, here
835 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
836 # j+halfstep schedule
838 SVSHAPE1
.lims
= [xdim
, 0b000001, 1]
840 SVSHAPE1
.submode2
= 0b11
842 SVSHAPE1
.offset
= 0 # experiment with different offset, here
843 SVSHAPE1
.invxyz
= [0,0,0] # inversion if desired
845 # enumerate over the iterator function, getting new indices
847 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
848 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
849 for idx
, (jl
, jh
) in enumerate(zip(i0
, i1
)):
850 schedule
.append((jl
, jh
))
851 if jl
[1] == 0b111: # end
854 # ok now pretty-print the results, with some debug output
855 print ("inner butterfly")
856 pprint_schedule(schedule
, n
)
861 # for DCT half-swap LDs
864 SVSHAPE0
.lims
= [xdim
, 0b000101, zdim
]
866 SVSHAPE0
.submode2
= 0b01
868 SVSHAPE0
.offset
= 0 # experiment with different offset, here
869 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
872 levels
= n
.bit_length() - 1
874 ri
= [reverse_bits(i
, levels
) for i
in range(n
)]
875 av
= halfrev2(avi
, False)
876 av
= [av
[ri
[i
]] for i
in range(n
)]
878 i0
= iterate_dct_inner_halfswap_loadstore(SVSHAPE0
)
879 for idx
, (jl
) in enumerate(i0
):
880 print ("inverse half-swap ld", idx
, jl
, av
[idx
])
881 if jl
[1] == 0b111: # end
886 # set the dimension sizes here
889 ydim
= 0 # not needed
890 zdim
= 1 # must be set at least to 1
902 SVSHAPE0
.lims
= [xdim
, 0b000001, zdim
]
903 SVSHAPE0
.submode2
= 0b010
906 SVSHAPE0
.offset
= 0 # experiment with different offset, here
907 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
908 # j+halfstep schedule
910 SVSHAPE1
.lims
= [xdim
, 0b000001, zdim
]
911 SVSHAPE1
.submode2
= 0b010
914 SVSHAPE1
.offset
= 0 # experiment with different offset, here
915 SVSHAPE1
.invxyz
= [0,0,0] # inversion if desired
917 # enumerate over the iterator function, getting new indices
919 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
920 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
921 for idx
, (jl
, jh
) in enumerate(zip(i0
, i1
)):
922 schedule
.append((jl
, jh
))
923 if jl
[1] == 0b111: # end
926 # ok now pretty-print the results, with some debug output
927 print ("inner butterfly")
928 pprint_schedule(schedule
, n
)
937 SVSHAPE0
.lims
= [xdim
, 0b000010, zdim
]
939 SVSHAPE0
.submode2
= 0b100
941 SVSHAPE0
.offset
= 0 # experiment with different offset, here
942 SVSHAPE0
.invxyz
= [1,0,0] # inversion if desired
943 # j+halfstep schedule
945 SVSHAPE1
.lims
= [xdim
, 0b000010, zdim
]
947 SVSHAPE1
.submode2
= 0b100
949 SVSHAPE1
.offset
= 0 # experiment with different offset, here
950 SVSHAPE1
.invxyz
= [1,0,0] # inversion if desired
952 # enumerate over the iterator function, getting new indices
954 i0
= iterate_dct_outer_butterfly_indices(SVSHAPE0
)
955 i1
= iterate_dct_outer_butterfly_indices(SVSHAPE1
)
956 for idx
, (jl
, jh
) in enumerate(zip(i0
, i1
)):
957 schedule
.append((jl
, jh
))
958 if jl
[1] == 0b111: # end
961 # ok now pretty-print the results, with some debug output
962 print ("outer butterfly")
963 pprint_schedule_outer(schedule
, n
)
965 # for DCT half-swap LDs
968 SVSHAPE0
.lims
= [xdim
, 0b000101, zdim
]
970 SVSHAPE0
.submode2
= 0
972 SVSHAPE0
.offset
= 0 # experiment with different offset, here
973 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
976 levels
= n
.bit_length() - 1
978 ri
= [reverse_bits(i
, levels
) for i
in range(n
)]
979 av
= halfrev2(avi
, False)
980 av
= [av
[ri
[i
]] for i
in range(n
)]
983 i0
= iterate_dct_inner_halfswap_loadstore(SVSHAPE0
)
984 for idx
, (jl
) in enumerate(i0
):
985 print ("inverse half-swap ld", idx
, jl
, av
[idx
])
986 if jl
[1] == 0b111: # end
991 if __name__
== '__main__':