1 # DCT "REMAP" scheduler
3 # Modifications made to create an in-place iterative DCT:
4 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
8 # Original fastdctlee.py by Nayuki:
9 # Copyright (c) 2020 Project Nayuki. (MIT License)
10 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
14 # bits of the integer 'val'.
15 def reverse_bits(val
, width
):
17 for _
in range(width
):
18 result
= (result
<< 1) |
(val
& 1)
23 # iterative version of [recursively-applied] half-rev.
24 # relies on the list lengths being power-of-two and the fact
25 # that bit-inversion of a list of binary numbers is the same
26 # as reversing the order of the list
27 # this version is dead easy to implement in hardware.
28 # a big surprise is that the half-reversal can be done with
29 # such a simple XOR. the inverse operation is slightly trickier
30 def halfrev2(vec
, pre_rev
=True):
32 for i
in range(len(vec
)):
34 res
.append(i ^
(i
>>1))
38 for ji
in range(1, bl
):
44 # python "yield" can be iterated. use this to make it clear how
45 # the indices are generated by using natural-looking nested loops
46 def iterate_dct_inner_halfswap_loadstore(SVSHAPE
):
47 # get indices to iterate over, in the required order
49 mode
= SVSHAPE
.lims
[1]
50 print ("inner halfswap loadstore", n
, mode
, SVSHAPE
.skip
)
52 # reference list for not needing to do data-swaps, just swap what
53 # *indices* are referenced (two levels of indirection at the moment)
54 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
56 ji
= halfrev2(ji
, True)
58 # invert order if requested
65 # python "yield" can be iterated. use this to make it clear how
66 # the indices are generated by using natural-looking nested loops
67 def iterate_dct_inner_costable_indices(SVSHAPE
):
68 # get indices to iterate over, in the required order
70 mode
= SVSHAPE
.lims
[1]
71 print ("inner costable", mode
)
72 # creating lists of indices to iterate over in each dimension
73 # has to be done dynamically, because it depends on the size
74 # first, the size-based loop (which can be done statically)
80 # invert order if requested
90 # start an infinite (wrapping) loop
92 z_end
= 1 # doesn't exist in this, only 2 loops
95 for size
in x_r
: # loop over 3rd order dimension (size)
96 x_end
= size
== x_r
[-1]
97 # y_r schedule depends on size
100 for i
in range(0, n
, size
):
102 # invert if requested
103 if SVSHAPE
.invxyz
[1]: y_r
.reverse()
104 # two lists of half-range indices, e.g. j 0123, jr 7654
105 j
= list(range(0, halfsize
))
106 # invert if requested
107 if SVSHAPE
.invxyz
[2]: j_r
.reverse()
108 #print ("xform jr", jr)
109 # loop over 1st order dimension
110 for ci
, jl
in enumerate(j
):
112 # now depending on MODE return the index. inner butterfly
113 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
114 result
= k
# offset into COS table
115 elif SVSHAPE
.skip
== 0b10: #
116 result
= ci
# coefficient helper
117 elif SVSHAPE
.skip
== 0b11: #
118 result
= size
# coefficient helper
120 ((y_end
and z_end
)<<1) |
121 ((y_end
and x_end
and z_end
)<<2))
123 yield result
+ SVSHAPE
.offset
, loopends
126 # python "yield" can be iterated. use this to make it clear how
127 # the indices are generated by using natural-looking nested loops
128 def iterate_dct_inner_butterfly_indices(SVSHAPE
):
129 # get indices to iterate over, in the required order
131 mode
= SVSHAPE
.lims
[1]
132 #print ("inner butterfly", mode, SVSHAPE.skip)
133 # creating lists of indices to iterate over in each dimension
134 # has to be done dynamically, because it depends on the size
135 # first, the size-based loop (which can be done statically)
141 # invert order if requested
142 if SVSHAPE
.invxyz
[0]:
148 # reference (read/write) the in-place data in *reverse-bit-order*
150 if SVSHAPE
.submode2
== 0b01:
151 levels
= n
.bit_length() - 1
152 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
154 # reference list for not needing to do data-swaps, just swap what
155 # *indices* are referenced (two levels of indirection at the moment)
156 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
158 inplace_mode
= SVSHAPE
.submode2
== 0b01
159 # and SVSHAPE.skip not in [0b10, 0b11]
161 #print ("inplace mode")
162 ji
= halfrev2(ji
, True)
167 # start an infinite (wrapping) loop
172 for size
in x_r
: # loop over 3rd order dimension (size)
173 x_end
= size
== x_r
[-1]
174 # y_r schedule depends on size
177 for i
in range(0, n
, size
):
179 # invert if requested
180 if SVSHAPE
.invxyz
[1]: y_r
.reverse()
181 for i
in y_r
: # loop over 2nd order dimension
183 # two lists of half-range indices, e.g. j 0123, jr 7654
184 j
= list(range(i
, i
+ halfsize
))
185 jr
= list(range(i
+halfsize
, i
+ size
))
187 # invert if requested
188 if SVSHAPE
.invxyz
[2]: j_r
.reverse()
189 hz2
= halfsize
// 2 # zero stops reversing 1-item lists
190 # if you *really* want to do the in-place swapping manually,
191 # this allows you to do it. good luck...
192 if SVSHAPE
.submode2
== 0b01 and not inplace_mode
:
195 #print ("xform jr", jr)
196 # loop over 1st order dimension
198 for ci
, (jl
, jh
) in enumerate(zip(j
, jr
)):
200 # now depending on MODE return the index. inner butterfly
201 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
202 result
= ri
[ji
[jl
]] # lower half
203 elif SVSHAPE
.skip
== 0b01: # in [0b01, 0b11]:
204 result
= ri
[ji
[jh
]] # upper half
206 # COS table pre-generated mode
207 if SVSHAPE
.skip
== 0b10: #
208 result
= k
# cos table offset
210 # COS table generated on-demand ("Vertical-First") mode
211 if SVSHAPE
.skip
== 0b10: #
212 result
= ci
# coefficient helper
213 elif SVSHAPE
.skip
== 0b11: #
214 result
= size
# coefficient helper
216 ((y_end
and z_end
)<<1) |
217 ((y_end
and x_end
and z_end
)<<2))
219 yield result
+ SVSHAPE
.offset
, loopends
224 for ci
, (jl
, jh
) in enumerate(zip(j
[:hz2
], jr
[:hz2
])):
226 #print ("inplace swap", jh, jlh)
227 tmp1
, tmp2
= ji
[jlh
], ji
[jh
]
228 ji
[jlh
], ji
[jh
] = tmp2
, tmp1
230 # new k_start point for cos tables( runs inside x_r loop NOT i loop)
234 # python "yield" can be iterated. use this to make it clear how
235 # the indices are generated by using natural-looking nested loops
236 def iterate_dct_outer_butterfly_indices(SVSHAPE
):
237 # get indices to iterate over, in the required order
239 mode
= SVSHAPE
.lims
[1]
240 # createing lists of indices to iterate over in each dimension
241 # has to be done dynamically, because it depends on the size
242 # first, the size-based loop (which can be done statically)
248 # invert order if requested
249 if SVSHAPE
.invxyz
[0]:
255 #print ("outer butterfly")
257 # reference (read/write) the in-place data in *reverse-bit-order*
259 if SVSHAPE
.submode2
== 0b11:
260 levels
= n
.bit_length() - 1
261 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
263 # reference list for not needing to do data-swaps, just swap what
264 # *indices* are referenced (two levels of indirection at the moment)
265 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
267 inplace_mode
= False # need the space... SVSHAPE.skip in [0b10, 0b11]
269 #print ("inplace mode", SVSHAPE.skip)
270 ji
= halfrev2(ji
, True)
275 # start an infinite (wrapping) loop
280 for size
in x_r
: # loop over 3rd order dimension (size)
282 x_end
= size
== x_r
[-1]
283 y_r
= list(range(0, halfsize
))
284 #print ("itersum", halfsize, size, y_r)
285 # invert if requested
286 if SVSHAPE
.invxyz
[1]: y_r
.reverse()
287 for i
in y_r
: # loop over 2nd order dimension
289 # one list to create iterative-sum schedule
290 jr
= list(range(i
+halfsize
, i
+n
-halfsize
, size
))
291 #print ("itersum jr", i+halfsize, i+size, jr)
292 # invert if requested
293 if SVSHAPE
.invxyz
[2]: j_r
.reverse()
294 hz2
= halfsize
// 2 # zero stops reversing 1-item lists
296 for ci
, jh
in enumerate(jr
): # loop over 1st order dimension
298 #print (" itersum", size, i, jh, jh+size)
300 # COS table pre-generated mode
301 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
302 result
= ri
[ji
[jh
]] # lower half
303 elif SVSHAPE
.skip
== 0b01: # in [0b01, 0b11]:
304 result
= ri
[ji
[jh
+size
]] # upper half
305 elif SVSHAPE
.skip
== 0b10: #
306 result
= k
# cos table offset
308 # COS table generated on-demand ("Vertical-First") mode
309 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
310 result
= ri
[ji
[jh
]] # lower half
311 elif SVSHAPE
.skip
== 0b01: # in [0b01, 0b11]:
312 result
= ri
[ji
[jh
+size
]] # upper half
313 elif SVSHAPE
.skip
== 0b10: #
314 result
= ci
# coefficient helper
315 elif SVSHAPE
.skip
== 0b11: #
316 result
= size
# coefficient helper
318 ((y_end
and z_end
)<<1) |
319 ((y_end
and x_end
and z_end
)<<2))
321 yield result
+ SVSHAPE
.offset
, loopends
325 if SVSHAPE
.submode2
== 0b11 and inplace_mode
:
326 j
= list(range(i
, i
+ halfsize
))
327 jr
= list(range(i
+halfsize
, i
+ size
))
329 for ci
, (jl
, jh
) in enumerate(zip(j
[:hz2
], jr
[:hz2
])):
331 #print ("inplace swap", jh, jlh)
332 tmp1
, tmp2
= ji
[jlh
], ji
[jh
]
333 ji
[jlh
], ji
[jh
] = tmp2
, tmp1
335 # new k_start point for cos tables( runs inside x_r loop NOT i loop)
339 def pprint_schedule(schedule
, n
):
344 tablestep
= n
// size
345 print ("size %d halfsize %d tablestep %d" % \
346 (size
, halfsize
, tablestep
))
347 for i
in range(0, n
, size
):
348 prefix
= "i %d\t" % i
349 for j
in range(i
, i
+ halfsize
):
350 (jl
, je
), (jh
, he
) = schedule
[idx
]
351 print (" %-3d\t%s j=%-2d jh=%-2d "
352 "j[jl=%-2d] j[jh=%-2d]" % \
353 (idx
, prefix
, j
, j
+halfsize
,
356 "end", bin(je
)[2:], bin(je
)[2:])
360 def pprint_schedule_outer(schedule
, n
):
365 tablestep
= n
// size
366 print ("size %d halfsize %d tablestep %d" % \
367 (size
, halfsize
, tablestep
))
368 y_r
= list(range(0, halfsize
))
370 prefix
= "i %d\t" % i
371 jr
= list(range(i
+halfsize
, i
+n
-halfsize
, size
))
373 (jl
, je
), (jh
, he
) = schedule
[idx
]
374 print (" %-3d\t%s j=%-2d jh=%-2d "
375 "j[jl=%-2d] j[jh=%-2d]" % \
376 (idx
, prefix
, j
, j
+halfsize
,
379 "end", bin(je
)[2:], bin(je
)[2:])
384 # totally cool *in-place* DCT algorithm using yield REMAPs
390 print ("transform2", n
)
391 levels
= n
.bit_length() - 1
396 # reference (read/write) the in-place data in *reverse-bit-order*
398 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
400 # and pretend we LDed data in half-swapped *and* bit-reversed order as well
401 # TODO: merge these two
402 vec
= halfrev2(vec
, False)
403 vec
= [vec
[ri
[i
]] for i
in range(n
)]
405 # create a cos table: not strictly necessary but here for illustrative
406 # purposes, to demonstrate the point that it really *is* iterative.
407 # this table could be cached and used multiple times rather than
408 # computed every time.
413 for ci
in range(halfsize
):
414 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
416 print ("coeff", size
, "ci", ci
, "k", len(ctable
)-1,
417 "i/n", (ci
+0.5)/size
, coeff
)
425 SVSHAPE0
.lims
= [xdim
, 4, 0]
427 SVSHAPE0
.submode2
= 0b01
429 SVSHAPE0
.offset
= 0 # experiment with different offset, here
430 SVSHAPE0
.invxyz
= [1,0,0] # inversion if desired
433 SVSHAPE1
.lims
= [xdim
, 4, 0]
435 SVSHAPE1
.submode2
= 0b01
437 SVSHAPE1
.offset
= 0 # experiment with different offset, here
438 SVSHAPE1
.invxyz
= [1,0,0] # inversion if desired
441 SVSHAPE2
.lims
= [xdim
, 4, 0]
443 SVSHAPE2
.submode2
= 0b01
445 SVSHAPE2
.offset
= 0 # experiment with different offset, here
446 SVSHAPE2
.invxyz
= [1,0,0] # inversion if desired
448 # enumerate over the iterator function, getting new indices
449 i0
= iterate_dct_inner_costable_indices(SVSHAPE0
)
450 i1
= iterate_dct_inner_costable_indices(SVSHAPE1
)
451 i2
= iterate_dct_inner_costable_indices(SVSHAPE2
)
452 for ((ci
, cie
), (size
, sze
), (k
, ke
)) in \
454 print ("xform2 cos", ci
, size
, k
)
455 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
456 assert coeff
== ctable
[k
]
457 print ("coeff", size
, "ci", ci
, "k", k
,
458 "i/n", (ci
+0.5)/size
, coeff
,
459 "end", bin(cie
), bin(sze
), bin(ke
))
460 if cie
== 0b111: # all loops end
469 SVSHAPE0
.lims
= [xdim
, 0b000001, 0]
471 SVSHAPE0
.submode2
= 0b01
473 SVSHAPE0
.offset
= 0 # experiment with different offset, here
474 SVSHAPE0
.invxyz
= [1,0,0] # inversion if desired
475 # j+halfstep schedule
477 SVSHAPE1
.lims
= [xdim
, 0b000001, 0]
479 SVSHAPE1
.submode2
= 0b01
481 SVSHAPE1
.offset
= 0 # experiment with different offset, here
482 SVSHAPE1
.invxyz
= [1,0,0] # inversion if desired
485 SVSHAPE2
.lims
= [xdim
, 0b000001, 0]
487 SVSHAPE2
.submode2
= 0b01
489 SVSHAPE2
.offset
= 0 # experiment with different offset, here
490 SVSHAPE2
.invxyz
= [1,0,0] # inversion if desired
493 SVSHAPE3
.lims
= [xdim
, 0b000001, 0]
495 SVSHAPE3
.submode2
= 0b01
497 SVSHAPE3
.offset
= 0 # experiment with different offset, here
498 SVSHAPE3
.invxyz
= [1,0,0] # inversion if desired
500 # enumerate over the iterator function, getting new indices
501 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
502 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
503 i2
= iterate_dct_inner_butterfly_indices(SVSHAPE2
)
504 i3
= iterate_dct_inner_butterfly_indices(SVSHAPE3
)
505 for k
, ((jl
, jle
), (jh
, jhe
), (ci
, cie
), (size
, sze
)) in \
506 enumerate(zip(i0
, i1
, i2
, i3
)):
507 t1
, t2
= vec
[jl
], vec
[jh
]
508 print ("xform2", jl
, jh
, ci
, size
)
509 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
510 #assert coeff == ctable[k]
512 vec
[jh
] = (t1
- t2
) * (1/coeff
)
513 print ("coeff", size
, "ci", ci
,
515 "i/n", (ci
+0.5)/size
, coeff
, vec
[jl
],
517 "end", bin(jle
), bin(jhe
))
518 if jle
== 0b111: # all loops end
521 print("transform2 pre-itersum", vec
)
523 # now things are in the right order for the outer butterfly.
527 SVSHAPE0
.lims
= [xdim
, 0b0000010, 0]
528 SVSHAPE0
.submode2
= 0b100
531 SVSHAPE0
.offset
= 0 # experiment with different offset, here
532 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
533 # j+halfstep schedule
535 SVSHAPE1
.lims
= [xdim
, 0b0000010, 0]
537 SVSHAPE1
.submode2
= 0b100
539 SVSHAPE1
.offset
= 0 # experiment with different offset, here
540 SVSHAPE1
.invxyz
= [0,0,0] # inversion if desired
542 # enumerate over the iterator function, getting new indices
543 i0
= iterate_dct_outer_butterfly_indices(SVSHAPE0
)
544 i1
= iterate_dct_outer_butterfly_indices(SVSHAPE1
)
545 for k
, ((jl
, jle
), (jh
, jhe
)) in enumerate(zip(i0
, i1
)):
546 print ("itersum jr", jl
, jh
,
547 "end", bin(jle
), bin(jhe
))
550 if jle
== 0b111: # all loops end
553 print("transform2 result", vec
)
559 # set the dimension sizes here
562 ydim
= 0 # not needed
563 zdim
= 0 # again, not needed
575 SVSHAPE0
.lims
= [xdim
, 0b000001, zdim
]
576 SVSHAPE0
.submode2
= 0b010
579 SVSHAPE0
.offset
= 0 # experiment with different offset, here
580 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
581 # j+halfstep schedule
583 SVSHAPE1
.lims
= [xdim
, 0b000001, zdim
]
584 SVSHAPE1
.submode2
= 0b010
587 SVSHAPE1
.offset
= 0 # experiment with different offset, here
588 SVSHAPE1
.invxyz
= [0,0,0] # inversion if desired
590 # enumerate over the iterator function, getting new indices
592 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
593 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
594 for idx
, (jl
, jh
) in enumerate(zip(i0
, i1
)):
595 schedule
.append((jl
, jh
))
596 if jl
[1] == 0b111: # end
599 # ok now pretty-print the results, with some debug output
600 print ("inner butterfly")
601 pprint_schedule(schedule
, n
)
610 SVSHAPE0
.lims
= [xdim
, 0b000010, zdim
]
612 SVSHAPE0
.submode2
= 0b100
614 SVSHAPE0
.offset
= 0 # experiment with different offset, here
615 SVSHAPE0
.invxyz
= [1,0,0] # inversion if desired
616 # j+halfstep schedule
618 SVSHAPE1
.lims
= [xdim
, 0b000010, zdim
]
620 SVSHAPE1
.submode2
= 0b100
622 SVSHAPE1
.offset
= 0 # experiment with different offset, here
623 SVSHAPE1
.invxyz
= [1,0,0] # inversion if desired
625 # enumerate over the iterator function, getting new indices
627 i0
= iterate_dct_outer_butterfly_indices(SVSHAPE0
)
628 i1
= iterate_dct_outer_butterfly_indices(SVSHAPE1
)
629 for idx
, (jl
, jh
) in enumerate(zip(i0
, i1
)):
630 schedule
.append((jl
, jh
))
631 if jl
[1] == 0b111: # end
634 # ok now pretty-print the results, with some debug output
635 print ("outer butterfly")
636 pprint_schedule_outer(schedule
, n
)
639 if __name__
== '__main__':