1 # DCT "REMAP" scheduler
3 # Modifications made to create an in-place iterative DCT:
4 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
8 # Original fastdctlee.py by Nayuki:
9 # Copyright (c) 2020 Project Nayuki. (MIT License)
10 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
14 # bits of the integer 'val'.
15 def reverse_bits(val
, width
):
17 for _
in range(width
):
18 result
= (result
<< 1) |
(val
& 1)
23 # iterative version of [recursively-applied] half-rev.
24 # relies on the list lengths being power-of-two and the fact
25 # that bit-inversion of a list of binary numbers is the same
26 # as reversing the order of the list
27 # this version is dead easy to implement in hardware.
28 # a big surprise is that the half-reversal can be done with
29 # such a simple XOR. the inverse operation is slightly trickier
30 def halfrev2(vec
, pre_rev
=True):
32 for i
in range(len(vec
)):
34 res
.append(i ^
(i
>>1))
38 for ji
in range(1, bl
):
44 # python "yield" can be iterated. use this to make it clear how
45 # the indices are generated by using natural-looking nested loops
46 def iterate_dct_inner_halfswap_loadstore(SVSHAPE
):
47 # get indices to iterate over, in the required order
49 mode
= SVSHAPE
.lims
[1]
50 print ("inner halfswap loadstore", n
, mode
, SVSHAPE
.skip
)
52 # reference list for not needing to do data-swaps, just swap what
53 # *indices* are referenced (two levels of indirection at the moment)
54 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
57 levels
= n
.bit_length() - 1
58 ji
= halfrev2(ji
, False)
59 if False: # swap: TODO, add extra bit-reverse mode
60 ri
= [reverse_bits(i
, levels
) for i
in range(n
)]
61 ji
= [ji
[ri
[i
]] for i
in range(n
)]
64 # invert order if requested
68 for i
, jl
in enumerate(ji
):
70 yield jl
, (0b111 if y_end
else 0b000)
73 # python "yield" can be iterated. use this to make it clear how
74 # the indices are generated by using natural-looking nested loops
75 def iterate_dct_inner_costable_indices(SVSHAPE
):
76 # get indices to iterate over, in the required order
78 mode
= SVSHAPE
.lims
[1]
79 print ("inner costable", mode
)
80 # creating lists of indices to iterate over in each dimension
81 # has to be done dynamically, because it depends on the size
82 # first, the size-based loop (which can be done statically)
88 # invert order if requested
98 # start an infinite (wrapping) loop
100 z_end
= 1 # doesn't exist in this, only 2 loops
103 for size
in x_r
: # loop over 3rd order dimension (size)
104 x_end
= size
== x_r
[-1]
105 # y_r schedule depends on size
108 for i
in range(0, n
, size
):
110 # invert if requested
111 if SVSHAPE
.invxyz
[1]: y_r
.reverse()
112 # two lists of half-range indices, e.g. j 0123, jr 7654
113 j
= list(range(0, halfsize
))
114 # invert if requested
115 if SVSHAPE
.invxyz
[2]: j_r
.reverse()
116 #print ("xform jr", jr)
117 # loop over 1st order dimension
118 for ci
, jl
in enumerate(j
):
120 # now depending on MODE return the index. inner butterfly
121 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
122 result
= k
# offset into COS table
123 elif SVSHAPE
.skip
== 0b10: #
124 result
= ci
# coefficient helper
125 elif SVSHAPE
.skip
== 0b11: #
126 result
= size
# coefficient helper
128 ((y_end
and z_end
)<<1) |
129 ((y_end
and x_end
and z_end
)<<2))
131 yield result
+ SVSHAPE
.offset
, loopends
134 # python "yield" can be iterated. use this to make it clear how
135 # the indices are generated by using natural-looking nested loops
136 def iterate_dct_inner_butterfly_indices(SVSHAPE
):
137 # get indices to iterate over, in the required order
139 mode
= SVSHAPE
.lims
[1]
140 #print ("inner butterfly", mode, SVSHAPE.skip)
141 # creating lists of indices to iterate over in each dimension
142 # has to be done dynamically, because it depends on the size
143 # first, the size-based loop (which can be done statically)
149 # invert order if requested
150 if SVSHAPE
.invxyz
[0]:
156 # reference (read/write) the in-place data in *reverse-bit-order*
158 if SVSHAPE
.submode2
== 0b01:
159 levels
= n
.bit_length() - 1
160 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
162 # reference list for not needing to do data-swaps, just swap what
163 # *indices* are referenced (two levels of indirection at the moment)
164 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
166 inplace_mode
= SVSHAPE
.submode2
== 0b01
167 # and SVSHAPE.skip not in [0b10, 0b11]
169 #print ("inplace mode")
170 ji
= halfrev2(ji
, True)
175 # start an infinite (wrapping) loop
180 for size
in x_r
: # loop over 3rd order dimension (size)
181 x_end
= size
== x_r
[-1]
182 # y_r schedule depends on size
185 for i
in range(0, n
, size
):
187 # invert if requested
188 if SVSHAPE
.invxyz
[1]: y_r
.reverse()
189 for i
in y_r
: # loop over 2nd order dimension
191 # two lists of half-range indices, e.g. j 0123, jr 7654
192 j
= list(range(i
, i
+ halfsize
))
193 jr
= list(range(i
+halfsize
, i
+ size
))
195 # invert if requested
196 if SVSHAPE
.invxyz
[2]: j_r
.reverse()
197 hz2
= halfsize
// 2 # zero stops reversing 1-item lists
198 # if you *really* want to do the in-place swapping manually,
199 # this allows you to do it. good luck...
200 if SVSHAPE
.submode2
== 0b01 and not inplace_mode
:
203 #print ("xform jr", jr)
204 # loop over 1st order dimension
206 for ci
, (jl
, jh
) in enumerate(zip(j
, jr
)):
208 # now depending on MODE return the index. inner butterfly
209 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
210 result
= ri
[ji
[jl
]] # lower half
211 elif SVSHAPE
.skip
== 0b01: # in [0b01, 0b11]:
212 result
= ri
[ji
[jh
]] # upper half
214 # COS table pre-generated mode
215 if SVSHAPE
.skip
== 0b10: #
216 result
= k
# cos table offset
218 # COS table generated on-demand ("Vertical-First") mode
219 if SVSHAPE
.skip
== 0b10: #
220 result
= ci
# coefficient helper
221 elif SVSHAPE
.skip
== 0b11: #
222 result
= size
# coefficient helper
224 ((y_end
and z_end
)<<1) |
225 ((y_end
and x_end
and z_end
)<<2))
227 yield result
+ SVSHAPE
.offset
, loopends
232 for ci
, (jl
, jh
) in enumerate(zip(j
[:hz2
], jr
[:hz2
])):
234 #print ("inplace swap", jh, jlh)
235 tmp1
, tmp2
= ji
[jlh
], ji
[jh
]
236 ji
[jlh
], ji
[jh
] = tmp2
, tmp1
238 # new k_start point for cos tables( runs inside x_r loop NOT i loop)
242 # python "yield" can be iterated. use this to make it clear how
243 # the indices are generated by using natural-looking nested loops
244 def iterate_dct_outer_butterfly_indices(SVSHAPE
):
245 # get indices to iterate over, in the required order
247 mode
= SVSHAPE
.lims
[1]
248 # createing lists of indices to iterate over in each dimension
249 # has to be done dynamically, because it depends on the size
250 # first, the size-based loop (which can be done statically)
256 # invert order if requested
257 if SVSHAPE
.invxyz
[0]:
263 #print ("outer butterfly")
265 # reference (read/write) the in-place data in *reverse-bit-order*
267 if SVSHAPE
.submode2
== 0b11:
268 levels
= n
.bit_length() - 1
269 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
271 # reference list for not needing to do data-swaps, just swap what
272 # *indices* are referenced (two levels of indirection at the moment)
273 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
275 inplace_mode
= False # need the space... SVSHAPE.skip in [0b10, 0b11]
277 #print ("inplace mode", SVSHAPE.skip)
278 ji
= halfrev2(ji
, True)
283 # start an infinite (wrapping) loop
288 for size
in x_r
: # loop over 3rd order dimension (size)
290 x_end
= size
== x_r
[-1]
291 y_r
= list(range(0, halfsize
))
292 #print ("itersum", halfsize, size, y_r)
293 # invert if requested
294 if SVSHAPE
.invxyz
[1]: y_r
.reverse()
295 for i
in y_r
: # loop over 2nd order dimension
297 # one list to create iterative-sum schedule
298 jr
= list(range(i
+halfsize
, i
+n
-halfsize
, size
))
299 #print ("itersum jr", i+halfsize, i+size, jr)
300 # invert if requested
301 if SVSHAPE
.invxyz
[2]: j_r
.reverse()
302 hz2
= halfsize
// 2 # zero stops reversing 1-item lists
304 for ci
, jh
in enumerate(jr
): # loop over 1st order dimension
306 #print (" itersum", size, i, jh, jh+size)
308 # COS table pre-generated mode
309 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
310 result
= ri
[ji
[jh
]] # lower half
311 elif SVSHAPE
.skip
== 0b01: # in [0b01, 0b11]:
312 result
= ri
[ji
[jh
+size
]] # upper half
313 elif SVSHAPE
.skip
== 0b10: #
314 result
= k
# cos table offset
316 # COS table generated on-demand ("Vertical-First") mode
317 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
318 result
= ri
[ji
[jh
]] # lower half
319 elif SVSHAPE
.skip
== 0b01: # in [0b01, 0b11]:
320 result
= ri
[ji
[jh
+size
]] # upper half
321 elif SVSHAPE
.skip
== 0b10: #
322 result
= ci
# coefficient helper
323 elif SVSHAPE
.skip
== 0b11: #
324 result
= size
# coefficient helper
326 ((y_end
and z_end
)<<1) |
327 ((y_end
and x_end
and z_end
)<<2))
329 yield result
+ SVSHAPE
.offset
, loopends
333 if SVSHAPE
.submode2
== 0b11 and inplace_mode
:
334 j
= list(range(i
, i
+ halfsize
))
335 jr
= list(range(i
+halfsize
, i
+ size
))
337 for ci
, (jl
, jh
) in enumerate(zip(j
[:hz2
], jr
[:hz2
])):
339 #print ("inplace swap", jh, jlh)
340 tmp1
, tmp2
= ji
[jlh
], ji
[jh
]
341 ji
[jlh
], ji
[jh
] = tmp2
, tmp1
343 # new k_start point for cos tables( runs inside x_r loop NOT i loop)
347 def pprint_schedule(schedule
, n
):
352 tablestep
= n
// size
353 print ("size %d halfsize %d tablestep %d" % \
354 (size
, halfsize
, tablestep
))
355 for i
in range(0, n
, size
):
356 prefix
= "i %d\t" % i
357 for j
in range(i
, i
+ halfsize
):
358 (jl
, je
), (jh
, he
) = schedule
[idx
]
359 print (" %-3d\t%s j=%-2d jh=%-2d "
360 "j[jl=%-2d] j[jh=%-2d]" % \
361 (idx
, prefix
, j
, j
+halfsize
,
364 "end", bin(je
)[2:], bin(je
)[2:])
368 def pprint_schedule_outer(schedule
, n
):
373 tablestep
= n
// size
374 print ("size %d halfsize %d tablestep %d" % \
375 (size
, halfsize
, tablestep
))
376 y_r
= list(range(0, halfsize
))
378 prefix
= "i %d\t" % i
379 jr
= list(range(i
+halfsize
, i
+n
-halfsize
, size
))
381 (jl
, je
), (jh
, he
) = schedule
[idx
]
382 print (" %-3d\t%s j=%-2d jh=%-2d "
383 "j[jl=%-2d] j[jh=%-2d]" % \
384 (idx
, prefix
, j
, j
+halfsize
,
387 "end", bin(je
)[2:], bin(je
)[2:])
392 # totally cool *in-place* DCT algorithm using yield REMAPs
398 print ("transform2", n
)
399 levels
= n
.bit_length() - 1
404 # reference (read/write) the in-place data in *reverse-bit-order*
406 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
408 # and pretend we LDed data in half-swapped *and* bit-reversed order as well
409 # TODO: merge these two
410 vec
= halfrev2(vec
, False)
411 vec
= [vec
[ri
[i
]] for i
in range(n
)]
413 # create a cos table: not strictly necessary but here for illustrative
414 # purposes, to demonstrate the point that it really *is* iterative.
415 # this table could be cached and used multiple times rather than
416 # computed every time.
421 for ci
in range(halfsize
):
422 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
424 print ("coeff", size
, "ci", ci
, "k", len(ctable
)-1,
425 "i/n", (ci
+0.5)/size
, coeff
)
433 SVSHAPE0
.lims
= [xdim
, 4, 0]
435 SVSHAPE0
.submode2
= 0b01
437 SVSHAPE0
.offset
= 0 # experiment with different offset, here
438 SVSHAPE0
.invxyz
= [1,0,0] # inversion if desired
441 SVSHAPE1
.lims
= [xdim
, 4, 0]
443 SVSHAPE1
.submode2
= 0b01
445 SVSHAPE1
.offset
= 0 # experiment with different offset, here
446 SVSHAPE1
.invxyz
= [1,0,0] # inversion if desired
449 SVSHAPE2
.lims
= [xdim
, 4, 0]
451 SVSHAPE2
.submode2
= 0b01
453 SVSHAPE2
.offset
= 0 # experiment with different offset, here
454 SVSHAPE2
.invxyz
= [1,0,0] # inversion if desired
456 # enumerate over the iterator function, getting new indices
457 i0
= iterate_dct_inner_costable_indices(SVSHAPE0
)
458 i1
= iterate_dct_inner_costable_indices(SVSHAPE1
)
459 i2
= iterate_dct_inner_costable_indices(SVSHAPE2
)
460 for ((ci
, cie
), (size
, sze
), (k
, ke
)) in \
462 print ("xform2 cos", ci
, size
, k
)
463 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
464 assert coeff
== ctable
[k
]
465 print ("coeff", size
, "ci", ci
, "k", k
,
466 "i/n", (ci
+0.5)/size
, coeff
,
467 "end", bin(cie
), bin(sze
), bin(ke
))
468 if cie
== 0b111: # all loops end
477 SVSHAPE0
.lims
= [xdim
, 0b000001, 0]
479 SVSHAPE0
.submode2
= 0b01
481 SVSHAPE0
.offset
= 0 # experiment with different offset, here
482 SVSHAPE0
.invxyz
= [1,0,0] # inversion if desired
483 # j+halfstep schedule
485 SVSHAPE1
.lims
= [xdim
, 0b000001, 0]
487 SVSHAPE1
.submode2
= 0b01
489 SVSHAPE1
.offset
= 0 # experiment with different offset, here
490 SVSHAPE1
.invxyz
= [1,0,0] # inversion if desired
493 SVSHAPE2
.lims
= [xdim
, 0b000001, 0]
495 SVSHAPE2
.submode2
= 0b01
497 SVSHAPE2
.offset
= 0 # experiment with different offset, here
498 SVSHAPE2
.invxyz
= [1,0,0] # inversion if desired
501 SVSHAPE3
.lims
= [xdim
, 0b000001, 0]
503 SVSHAPE3
.submode2
= 0b01
505 SVSHAPE3
.offset
= 0 # experiment with different offset, here
506 SVSHAPE3
.invxyz
= [1,0,0] # inversion if desired
508 # enumerate over the iterator function, getting new indices
509 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
510 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
511 i2
= iterate_dct_inner_butterfly_indices(SVSHAPE2
)
512 i3
= iterate_dct_inner_butterfly_indices(SVSHAPE3
)
513 for k
, ((jl
, jle
), (jh
, jhe
), (ci
, cie
), (size
, sze
)) in \
514 enumerate(zip(i0
, i1
, i2
, i3
)):
515 t1
, t2
= vec
[jl
], vec
[jh
]
516 print ("xform2", jl
, jh
, ci
, size
)
517 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
518 #assert coeff == ctable[k]
520 vec
[jh
] = (t1
- t2
) * (1/coeff
)
521 print ("coeff", size
, "ci", ci
,
523 "i/n", (ci
+0.5)/size
, coeff
, vec
[jl
],
525 "end", bin(jle
), bin(jhe
))
526 if jle
== 0b111: # all loops end
529 print("transform2 pre-itersum", vec
)
531 # now things are in the right order for the outer butterfly.
535 SVSHAPE0
.lims
= [xdim
, 0b0000010, 0]
536 SVSHAPE0
.submode2
= 0b100
539 SVSHAPE0
.offset
= 0 # experiment with different offset, here
540 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
541 # j+halfstep schedule
543 SVSHAPE1
.lims
= [xdim
, 0b0000010, 0]
545 SVSHAPE1
.submode2
= 0b100
547 SVSHAPE1
.offset
= 0 # experiment with different offset, here
548 SVSHAPE1
.invxyz
= [0,0,0] # inversion if desired
550 # enumerate over the iterator function, getting new indices
551 i0
= iterate_dct_outer_butterfly_indices(SVSHAPE0
)
552 i1
= iterate_dct_outer_butterfly_indices(SVSHAPE1
)
553 for k
, ((jl
, jle
), (jh
, jhe
)) in enumerate(zip(i0
, i1
)):
554 print ("itersum jr", jl
, jh
,
555 "end", bin(jle
), bin(jhe
))
558 if jle
== 0b111: # all loops end
561 print("transform2 result", vec
)
567 # set the dimension sizes here
570 ydim
= 0 # not needed
571 zdim
= 0 # again, not needed
583 SVSHAPE0
.lims
= [xdim
, 0b000001, zdim
]
584 SVSHAPE0
.submode2
= 0b010
587 SVSHAPE0
.offset
= 0 # experiment with different offset, here
588 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
589 # j+halfstep schedule
591 SVSHAPE1
.lims
= [xdim
, 0b000001, zdim
]
592 SVSHAPE1
.submode2
= 0b010
595 SVSHAPE1
.offset
= 0 # experiment with different offset, here
596 SVSHAPE1
.invxyz
= [0,0,0] # inversion if desired
598 # enumerate over the iterator function, getting new indices
600 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
601 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
602 for idx
, (jl
, jh
) in enumerate(zip(i0
, i1
)):
603 schedule
.append((jl
, jh
))
604 if jl
[1] == 0b111: # end
607 # ok now pretty-print the results, with some debug output
608 print ("inner butterfly")
609 pprint_schedule(schedule
, n
)
618 SVSHAPE0
.lims
= [xdim
, 0b000010, zdim
]
620 SVSHAPE0
.submode2
= 0b100
622 SVSHAPE0
.offset
= 0 # experiment with different offset, here
623 SVSHAPE0
.invxyz
= [1,0,0] # inversion if desired
624 # j+halfstep schedule
626 SVSHAPE1
.lims
= [xdim
, 0b000010, zdim
]
628 SVSHAPE1
.submode2
= 0b100
630 SVSHAPE1
.offset
= 0 # experiment with different offset, here
631 SVSHAPE1
.invxyz
= [1,0,0] # inversion if desired
633 # enumerate over the iterator function, getting new indices
635 i0
= iterate_dct_outer_butterfly_indices(SVSHAPE0
)
636 i1
= iterate_dct_outer_butterfly_indices(SVSHAPE1
)
637 for idx
, (jl
, jh
) in enumerate(zip(i0
, i1
)):
638 schedule
.append((jl
, jh
))
639 if jl
[1] == 0b111: # end
642 # ok now pretty-print the results, with some debug output
643 print ("outer butterfly")
644 pprint_schedule_outer(schedule
, n
)
646 # for DCT half-swap LDs
649 SVSHAPE0
.lims
= [xdim
, 0b000101, zdim
]
651 SVSHAPE0
.submode2
= 0
653 SVSHAPE0
.offset
= 0 # experiment with different offset, here
654 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
657 levels
= n
.bit_length() - 1
659 ri
= [reverse_bits(i
, levels
) for i
in range(n
)]
660 av
= halfrev2(avi
, False)
661 av
= [av
[ri
[i
]] for i
in range(n
)]
664 i0
= iterate_dct_inner_halfswap_loadstore(SVSHAPE0
)
665 for idx
, (jl
) in enumerate(i0
):
666 print ("inverse half-swap ld", idx
, jl
, av
[idx
])
667 if jl
[1] == 0b111: # end
672 if __name__
== '__main__':