1 # DCT "REMAP" scheduler
3 # Modifications made to create an in-place iterative DCT:
4 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
8 # Original fastdctlee.py by Nayuki:
9 # Copyright (c) 2020 Project Nayuki. (MIT License)
10 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
14 # bits of the integer 'val'.
15 def reverse_bits(val
, width
):
17 for _
in range(width
):
18 result
= (result
<< 1) |
(val
& 1)
23 # iterative version of [recursively-applied] half-rev.
24 # relies on the list lengths being power-of-two and the fact
25 # that bit-inversion of a list of binary numbers is the same
26 # as reversing the order of the list
27 # this version is dead easy to implement in hardware.
28 # a big surprise is that the half-reversal can be done with
29 # such a simple XOR. the inverse operation is slightly trickier
30 def halfrev2(vec
, pre_rev
=True):
32 for i
in range(len(vec
)):
34 res
.append(i ^
(i
>>1))
38 for ji
in range(1, bl
):
44 # python "yield" can be iterated. use this to make it clear how
45 # the indices are generated by using natural-looking nested loops
46 def iterate_dct_inner_costable_indices(SVSHAPE
):
47 # get indices to iterate over, in the required order
49 mode
= SVSHAPE
.lims
[1]
50 print ("inner costable", mode
)
51 # creating lists of indices to iterate over in each dimension
52 # has to be done dynamically, because it depends on the size
53 # first, the size-based loop (which can be done statically)
59 # invert order if requested
69 # start an infinite (wrapping) loop
71 z_end
= 1 # doesn't exist in this, only 2 loops
74 for size
in x_r
: # loop over 3rd order dimension (size)
75 x_end
= size
== x_r
[-1]
76 # y_r schedule depends on size
79 for i
in range(0, n
, size
):
82 if SVSHAPE
.invxyz
[1]: y_r
.reverse()
83 # two lists of half-range indices, e.g. j 0123, jr 7654
84 j
= list(range(0, halfsize
))
86 if SVSHAPE
.invxyz
[2]: j_r
.reverse()
87 #print ("xform jr", jr)
88 # loop over 1st order dimension
89 for ci
, jl
in enumerate(j
):
91 # now depending on MODE return the index. inner butterfly
92 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
93 result
= k
# offset into COS table
94 elif SVSHAPE
.skip
== 0b10: #
95 result
= ci
# coefficient helper
96 elif SVSHAPE
.skip
== 0b11: #
97 result
= size
# coefficient helper
99 ((y_end
and z_end
)<<1) |
100 ((y_end
and x_end
and z_end
)<<2))
102 yield result
+ SVSHAPE
.offset
, loopends
105 # python "yield" can be iterated. use this to make it clear how
106 # the indices are generated by using natural-looking nested loops
107 def iterate_dct_inner_butterfly_indices(SVSHAPE
):
108 # get indices to iterate over, in the required order
110 mode
= SVSHAPE
.lims
[1]
111 #print ("inner butterfly", mode, SVSHAPE.skip)
112 # creating lists of indices to iterate over in each dimension
113 # has to be done dynamically, because it depends on the size
114 # first, the size-based loop (which can be done statically)
120 # invert order if requested
121 if SVSHAPE
.invxyz
[0]:
127 # reference (read/write) the in-place data in *reverse-bit-order*
129 if SVSHAPE
.submode2
== 0b01:
130 levels
= n
.bit_length() - 1
131 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
133 # reference list for not needing to do data-swaps, just swap what
134 # *indices* are referenced (two levels of indirection at the moment)
135 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
137 inplace_mode
= SVSHAPE
.submode2
== 0b01
138 # and SVSHAPE.skip not in [0b10, 0b11]
140 #print ("inplace mode")
141 ji
= halfrev2(ji
, True)
146 # start an infinite (wrapping) loop
151 for size
in x_r
: # loop over 3rd order dimension (size)
152 x_end
= size
== x_r
[-1]
153 # y_r schedule depends on size
156 for i
in range(0, n
, size
):
158 # invert if requested
159 if SVSHAPE
.invxyz
[1]: y_r
.reverse()
160 for i
in y_r
: # loop over 2nd order dimension
162 # two lists of half-range indices, e.g. j 0123, jr 7654
163 j
= list(range(i
, i
+ halfsize
))
164 jr
= list(range(i
+halfsize
, i
+ size
))
166 # invert if requested
167 if SVSHAPE
.invxyz
[2]: j_r
.reverse()
168 hz2
= halfsize
// 2 # zero stops reversing 1-item lists
169 # if you *really* want to do the in-place swapping manually,
170 # this allows you to do it. good luck...
171 if SVSHAPE
.submode2
== 0b01 and not inplace_mode
:
174 #print ("xform jr", jr)
175 # loop over 1st order dimension
177 for ci
, (jl
, jh
) in enumerate(zip(j
, jr
)):
179 # now depending on MODE return the index. inner butterfly
180 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
181 result
= ri
[ji
[jl
]] # lower half
182 elif SVSHAPE
.skip
== 0b01: # in [0b01, 0b11]:
183 result
= ri
[ji
[jh
]] # upper half
185 # COS table pre-generated mode
186 if SVSHAPE
.skip
== 0b10: #
187 result
= k
# cos table offset
189 # COS table generated on-demand ("Vertical-First") mode
190 if SVSHAPE
.skip
== 0b10: #
191 result
= ci
# coefficient helper
192 elif SVSHAPE
.skip
== 0b11: #
193 result
= size
# coefficient helper
195 ((y_end
and z_end
)<<1) |
196 ((y_end
and x_end
and z_end
)<<2))
198 yield result
+ SVSHAPE
.offset
, loopends
203 for ci
, (jl
, jh
) in enumerate(zip(j
[:hz2
], jr
[:hz2
])):
205 #print ("inplace swap", jh, jlh)
206 tmp1
, tmp2
= ji
[jlh
], ji
[jh
]
207 ji
[jlh
], ji
[jh
] = tmp2
, tmp1
209 # new k_start point for cos tables( runs inside x_r loop NOT i loop)
213 # python "yield" can be iterated. use this to make it clear how
214 # the indices are generated by using natural-looking nested loops
215 def iterate_dct_outer_butterfly_indices(SVSHAPE
):
216 # get indices to iterate over, in the required order
218 mode
= SVSHAPE
.lims
[1]
219 # createing lists of indices to iterate over in each dimension
220 # has to be done dynamically, because it depends on the size
221 # first, the size-based loop (which can be done statically)
227 # invert order if requested
228 if SVSHAPE
.invxyz
[0]:
234 #print ("outer butterfly")
236 # reference (read/write) the in-place data in *reverse-bit-order*
238 if SVSHAPE
.submode2
== 0b11:
239 levels
= n
.bit_length() - 1
240 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
242 # reference list for not needing to do data-swaps, just swap what
243 # *indices* are referenced (two levels of indirection at the moment)
244 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
246 inplace_mode
= False # need the space... SVSHAPE.skip in [0b10, 0b11]
248 #print ("inplace mode", SVSHAPE.skip)
249 ji
= halfrev2(ji
, True)
254 # start an infinite (wrapping) loop
259 for size
in x_r
: # loop over 3rd order dimension (size)
261 x_end
= size
== x_r
[-1]
262 y_r
= list(range(0, halfsize
))
263 #print ("itersum", halfsize, size, y_r)
264 # invert if requested
265 if SVSHAPE
.invxyz
[1]: y_r
.reverse()
266 for i
in y_r
: # loop over 2nd order dimension
268 # one list to create iterative-sum schedule
269 jr
= list(range(i
+halfsize
, i
+n
-halfsize
, size
))
270 #print ("itersum jr", i+halfsize, i+size, jr)
271 # invert if requested
272 if SVSHAPE
.invxyz
[2]: j_r
.reverse()
273 hz2
= halfsize
// 2 # zero stops reversing 1-item lists
275 for ci
, jh
in enumerate(jr
): # loop over 1st order dimension
277 #print (" itersum", size, i, jh, jh+size)
279 # COS table pre-generated mode
280 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
281 result
= ri
[ji
[jh
]] # lower half
282 elif SVSHAPE
.skip
== 0b01: # in [0b01, 0b11]:
283 result
= ri
[ji
[jh
+size
]] # upper half
284 elif SVSHAPE
.skip
== 0b10: #
285 result
= k
# cos table offset
287 # COS table generated on-demand ("Vertical-First") mode
288 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
289 result
= ri
[ji
[jh
]] # lower half
290 elif SVSHAPE
.skip
== 0b01: # in [0b01, 0b11]:
291 result
= ri
[ji
[jh
+size
]] # upper half
292 elif SVSHAPE
.skip
== 0b10: #
293 result
= ci
# coefficient helper
294 elif SVSHAPE
.skip
== 0b11: #
295 result
= size
# coefficient helper
297 ((y_end
and z_end
)<<1) |
298 ((y_end
and x_end
and z_end
)<<2))
300 yield result
+ SVSHAPE
.offset
, loopends
304 if SVSHAPE
.submode2
== 0b11 and inplace_mode
:
305 j
= list(range(i
, i
+ halfsize
))
306 jr
= list(range(i
+halfsize
, i
+ size
))
308 for ci
, (jl
, jh
) in enumerate(zip(j
[:hz2
], jr
[:hz2
])):
310 #print ("inplace swap", jh, jlh)
311 tmp1
, tmp2
= ji
[jlh
], ji
[jh
]
312 ji
[jlh
], ji
[jh
] = tmp2
, tmp1
314 # new k_start point for cos tables( runs inside x_r loop NOT i loop)
318 def pprint_schedule(schedule
, n
):
323 tablestep
= n
// size
324 print ("size %d halfsize %d tablestep %d" % \
325 (size
, halfsize
, tablestep
))
326 for i
in range(0, n
, size
):
327 prefix
= "i %d\t" % i
328 for j
in range(i
, i
+ halfsize
):
329 (jl
, je
), (jh
, he
) = schedule
[idx
]
330 print (" %-3d\t%s j=%-2d jh=%-2d "
331 "j[jl=%-2d] j[jh=%-2d]" % \
332 (idx
, prefix
, j
, j
+halfsize
,
335 "end", bin(je
)[2:], bin(je
)[2:])
339 def pprint_schedule_outer(schedule
, n
):
344 tablestep
= n
// size
345 print ("size %d halfsize %d tablestep %d" % \
346 (size
, halfsize
, tablestep
))
347 y_r
= list(range(0, halfsize
))
349 prefix
= "i %d\t" % i
350 jr
= list(range(i
+halfsize
, i
+n
-halfsize
, size
))
352 (jl
, je
), (jh
, he
) = schedule
[idx
]
353 print (" %-3d\t%s j=%-2d jh=%-2d "
354 "j[jl=%-2d] j[jh=%-2d]" % \
355 (idx
, prefix
, j
, j
+halfsize
,
358 "end", bin(je
)[2:], bin(je
)[2:])
363 # totally cool *in-place* DCT algorithm using yield REMAPs
369 print ("transform2", n
)
370 levels
= n
.bit_length() - 1
375 # reference (read/write) the in-place data in *reverse-bit-order*
377 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
379 # and pretend we LDed data in half-swapped *and* bit-reversed order as well
380 # TODO: merge these two
381 vec
= halfrev2(vec
, False)
382 vec
= [vec
[ri
[i
]] for i
in range(n
)]
384 # create a cos table: not strictly necessary but here for illustrative
385 # purposes, to demonstrate the point that it really *is* iterative.
386 # this table could be cached and used multiple times rather than
387 # computed every time.
392 for ci
in range(halfsize
):
393 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
395 print ("coeff", size
, "ci", ci
, "k", len(ctable
)-1,
396 "i/n", (ci
+0.5)/size
, coeff
)
404 SVSHAPE0
.lims
= [xdim
, 4, 0]
406 SVSHAPE0
.submode2
= 0b01
408 SVSHAPE0
.offset
= 0 # experiment with different offset, here
409 SVSHAPE0
.invxyz
= [1,0,0] # inversion if desired
412 SVSHAPE1
.lims
= [xdim
, 4, 0]
414 SVSHAPE1
.submode2
= 0b01
416 SVSHAPE1
.offset
= 0 # experiment with different offset, here
417 SVSHAPE1
.invxyz
= [1,0,0] # inversion if desired
420 SVSHAPE2
.lims
= [xdim
, 4, 0]
422 SVSHAPE2
.submode2
= 0b01
424 SVSHAPE2
.offset
= 0 # experiment with different offset, here
425 SVSHAPE2
.invxyz
= [1,0,0] # inversion if desired
427 # enumerate over the iterator function, getting new indices
428 i0
= iterate_dct_inner_costable_indices(SVSHAPE0
)
429 i1
= iterate_dct_inner_costable_indices(SVSHAPE1
)
430 i2
= iterate_dct_inner_costable_indices(SVSHAPE2
)
431 for ((ci
, cie
), (size
, sze
), (k
, ke
)) in \
433 print ("xform2 cos", ci
, size
, k
)
434 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
435 assert coeff
== ctable
[k
]
436 print ("coeff", size
, "ci", ci
, "k", k
,
437 "i/n", (ci
+0.5)/size
, coeff
,
438 "end", bin(cie
), bin(sze
), bin(ke
))
439 if cie
== 0b111: # all loops end
448 SVSHAPE0
.lims
= [xdim
, 0b000001, 0]
450 SVSHAPE0
.submode2
= 0b01
452 SVSHAPE0
.offset
= 0 # experiment with different offset, here
453 SVSHAPE0
.invxyz
= [1,0,0] # inversion if desired
454 # j+halfstep schedule
456 SVSHAPE1
.lims
= [xdim
, 0b000001, 0]
458 SVSHAPE1
.submode2
= 0b01
460 SVSHAPE1
.offset
= 0 # experiment with different offset, here
461 SVSHAPE1
.invxyz
= [1,0,0] # inversion if desired
464 SVSHAPE2
.lims
= [xdim
, 0b000001, 0]
466 SVSHAPE2
.submode2
= 0b01
468 SVSHAPE2
.offset
= 0 # experiment with different offset, here
469 SVSHAPE2
.invxyz
= [1,0,0] # inversion if desired
472 SVSHAPE3
.lims
= [xdim
, 0b000001, 0]
474 SVSHAPE3
.submode2
= 0b01
476 SVSHAPE3
.offset
= 0 # experiment with different offset, here
477 SVSHAPE3
.invxyz
= [1,0,0] # inversion if desired
479 # enumerate over the iterator function, getting new indices
480 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
481 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
482 i2
= iterate_dct_inner_butterfly_indices(SVSHAPE2
)
483 i3
= iterate_dct_inner_butterfly_indices(SVSHAPE3
)
484 for k
, ((jl
, jle
), (jh
, jhe
), (ci
, cie
), (size
, sze
)) in \
485 enumerate(zip(i0
, i1
, i2
, i3
)):
486 t1
, t2
= vec
[jl
], vec
[jh
]
487 print ("xform2", jl
, jh
, ci
, size
)
488 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
489 #assert coeff == ctable[k]
491 vec
[jh
] = (t1
- t2
) * (1/coeff
)
492 print ("coeff", size
, "ci", ci
,
494 "i/n", (ci
+0.5)/size
, coeff
, vec
[jl
],
496 "end", bin(jle
), bin(jhe
))
497 if jle
== 0b111: # all loops end
500 print("transform2 pre-itersum", vec
)
502 # now things are in the right order for the outer butterfly.
506 SVSHAPE0
.lims
= [xdim
, 0b0000010, 0]
507 SVSHAPE0
.submode2
= 0b100
510 SVSHAPE0
.offset
= 0 # experiment with different offset, here
511 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
512 # j+halfstep schedule
514 SVSHAPE1
.lims
= [xdim
, 0b0000010, 0]
516 SVSHAPE1
.submode2
= 0b100
518 SVSHAPE1
.offset
= 0 # experiment with different offset, here
519 SVSHAPE1
.invxyz
= [0,0,0] # inversion if desired
521 # enumerate over the iterator function, getting new indices
522 i0
= iterate_dct_outer_butterfly_indices(SVSHAPE0
)
523 i1
= iterate_dct_outer_butterfly_indices(SVSHAPE1
)
524 for k
, ((jl
, jle
), (jh
, jhe
)) in enumerate(zip(i0
, i1
)):
525 print ("itersum jr", jl
, jh
,
526 "end", bin(jle
), bin(jhe
))
529 if jle
== 0b111: # all loops end
532 print("transform2 result", vec
)
538 # set the dimension sizes here
541 ydim
= 0 # not needed
542 zdim
= 0 # again, not needed
554 SVSHAPE0
.lims
= [xdim
, 0b000001, zdim
]
555 SVSHAPE0
.submode2
= 0b010
558 SVSHAPE0
.offset
= 0 # experiment with different offset, here
559 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
560 # j+halfstep schedule
562 SVSHAPE1
.lims
= [xdim
, 0b000001, zdim
]
563 SVSHAPE1
.submode2
= 0b010
566 SVSHAPE1
.offset
= 0 # experiment with different offset, here
567 SVSHAPE1
.invxyz
= [0,0,0] # inversion if desired
569 # enumerate over the iterator function, getting new indices
571 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
572 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
573 for idx
, (jl
, jh
) in enumerate(zip(i0
, i1
)):
574 schedule
.append((jl
, jh
))
575 if jl
[1] == 0b111: # end
578 # ok now pretty-print the results, with some debug output
579 print ("inner butterfly")
580 pprint_schedule(schedule
, n
)
589 SVSHAPE0
.lims
= [xdim
, 0b000010, zdim
]
591 SVSHAPE0
.submode2
= 0b100
593 SVSHAPE0
.offset
= 0 # experiment with different offset, here
594 SVSHAPE0
.invxyz
= [1,0,0] # inversion if desired
595 # j+halfstep schedule
597 SVSHAPE1
.lims
= [xdim
, 0b000010, zdim
]
599 SVSHAPE1
.submode2
= 0b100
601 SVSHAPE1
.offset
= 0 # experiment with different offset, here
602 SVSHAPE1
.invxyz
= [1,0,0] # inversion if desired
604 # enumerate over the iterator function, getting new indices
606 i0
= iterate_dct_outer_butterfly_indices(SVSHAPE0
)
607 i1
= iterate_dct_outer_butterfly_indices(SVSHAPE1
)
608 for idx
, (jl
, jh
) in enumerate(zip(i0
, i1
)):
609 schedule
.append((jl
, jh
))
610 if jl
[1] == 0b111: # end
613 # ok now pretty-print the results, with some debug output
614 print ("outer butterfly")
615 pprint_schedule_outer(schedule
, n
)
618 if __name__
== '__main__':