1 # DCT "REMAP" scheduler
3 # Modifications made to create an in-place iterative DCT:
4 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
8 # Original fastdctlee.py by Nayuki:
9 # Copyright (c) 2020 Project Nayuki. (MIT License)
10 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
14 # bits of the integer 'val'.
15 def reverse_bits(val
, width
):
17 for _
in range(width
):
18 result
= (result
<< 1) |
(val
& 1)
23 # iterative version of [recursively-applied] half-rev.
24 # relies on the list lengths being power-of-two and the fact
25 # that bit-inversion of a list of binary numbers is the same
26 # as reversing the order of the list
27 # this version is dead easy to implement in hardware.
28 # a big surprise is that the half-reversal can be done with
29 # such a simple XOR. the inverse operation is slightly trickier
30 def halfrev2(vec
, pre_rev
=True):
32 for i
in range(len(vec
)):
34 res
.append(i ^
(i
>>1))
38 for ji
in range(1, bl
):
44 # python "yield" can be iterated. use this to make it clear how
45 # the indices are generated by using natural-looking nested loops
46 def iterate_dct_inner_butterfly_indices(SVSHAPE
):
47 # get indices to iterate over, in the required order
49 mode
= SVSHAPE
.lims
[1]
50 #print ("inner butterfly", mode)
51 # creating lists of indices to iterate over in each dimension
52 # has to be done dynamically, because it depends on the size
53 # first, the size-based loop (which can be done statically)
59 # invert order if requested
66 # reference (read/write) the in-place data in *reverse-bit-order*
68 if SVSHAPE
.submode2
== 0b01:
69 levels
= n
.bit_length() - 1
70 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
72 # reference list for not needing to do data-swaps, just swap what
73 # *indices* are referenced (two levels of indirection at the moment)
74 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
76 inplace_mode
= SVSHAPE
.submode2
== 0b01
77 # and SVSHAPE.skip not in [0b10, 0b11]
79 #print ("inplace mode")
80 ji
= halfrev2(ji
, True)
85 # start an infinite (wrapping) loop
88 for size
in x_r
: # loop over 3rd order dimension (size)
89 x_end
= size
== x_r
[-1]
90 # y_r schedule depends on size
93 for i
in range(0, n
, size
):
96 if SVSHAPE
.invxyz
[1]: y_r
.reverse()
97 for i
in y_r
: # loop over 2nd order dimension
99 # two lists of half-range indices, e.g. j 0123, jr 7654
100 j
= list(range(i
, i
+ halfsize
))
101 jr
= list(range(i
+halfsize
, i
+ size
))
103 # invert if requested
104 if SVSHAPE
.invxyz
[2]: j_r
.reverse()
105 hz2
= halfsize
// 2 # zero stops reversing 1-item lists
106 # if you *really* want to do the in-place swapping manually,
107 # this allows you to do it. good luck...
108 if SVSHAPE
.submode2
== 0b01 and not inplace_mode
:
111 #print ("xform jr", jr)
112 # loop over 1st order dimension
113 for ci
, (jl
, jh
) in enumerate(zip(j
, jr
)):
115 # now depending on MODE return the index. inner butterfly
116 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
117 result
= ri
[ji
[jl
]] # lower half
118 elif SVSHAPE
.skip
== 0b01: # in [0b01, 0b11]:
119 result
= ri
[ji
[jh
]] # upper half, reverse order
120 elif SVSHAPE
.skip
== 0b10: #
121 result
= ci
# coefficient helper
122 elif SVSHAPE
.skip
== 0b11: #
123 result
= size
# coefficient helper
125 ((y_end
and z_end
)<<1) |
126 ((y_end
and x_end
and z_end
)<<2))
128 yield result
+ SVSHAPE
.offset
, loopends
132 for ci
, (jl
, jh
) in enumerate(zip(j
[:hz2
], jr
[:hz2
])):
134 #print ("inplace swap", jh, jlh)
135 tmp1
, tmp2
= ji
[jlh
], ji
[jh
]
136 ji
[jlh
], ji
[jh
] = tmp2
, tmp1
139 # python "yield" can be iterated. use this to make it clear how
140 # the indices are generated by using natural-looking nested loops
141 def iterate_dct_outer_butterfly_indices(SVSHAPE
):
142 # get indices to iterate over, in the required order
144 mode
= SVSHAPE
.lims
[1]
145 # createing lists of indices to iterate over in each dimension
146 # has to be done dynamically, because it depends on the size
147 # first, the size-based loop (which can be done statically)
153 # invert order if requested
154 if SVSHAPE
.invxyz
[0]:
160 #print ("outer butterfly")
162 # reference (read/write) the in-place data in *reverse-bit-order*
164 if SVSHAPE
.submode2
== 0b11:
165 levels
= n
.bit_length() - 1
166 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
168 # reference list for not needing to do data-swaps, just swap what
169 # *indices* are referenced (two levels of indirection at the moment)
170 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
172 inplace_mode
= False # need the space... SVSHAPE.skip in [0b10, 0b11]
174 #print ("inplace mode", SVSHAPE.skip)
175 ji
= halfrev2(ji
, True)
180 # start an infinite (wrapping) loop
183 for size
in x_r
: # loop over 3rd order dimension (size)
185 x_end
= size
== x_r
[-1]
186 y_r
= list(range(0, halfsize
))
187 #print ("itersum", halfsize, size, y_r)
188 # invert if requested
189 if SVSHAPE
.invxyz
[1]: y_r
.reverse()
190 for i
in y_r
: # loop over 2nd order dimension
192 # one list to create iterative-sum schedule
193 jr
= list(range(i
+halfsize
, i
+n
-halfsize
, size
))
194 #print ("itersum jr", i+halfsize, i+size, jr)
195 # invert if requested
196 if SVSHAPE
.invxyz
[2]: j_r
.reverse()
197 hz2
= halfsize
// 2 # zero stops reversing 1-item lists
198 for ci
, jh
in enumerate(jr
): # loop over 1st order dimension
200 #print (" itersum", size, i, jh, jh+size)
201 if SVSHAPE
.skip
== 0b00: # in [0b00, 0b10]:
202 result
= ri
[ji
[jh
]] # lower half
203 elif SVSHAPE
.skip
== 0b01: # in [0b01, 0b11]:
204 result
= ri
[ji
[jh
+size
]] # upper half
205 elif SVSHAPE
.skip
== 0b10: #
206 result
= ci
# coefficient helper
207 elif SVSHAPE
.skip
== 0b11: #
208 result
= size
# coefficient helper
210 ((y_end
and z_end
)<<1) |
211 ((y_end
and x_end
and z_end
)<<2))
213 yield result
+ SVSHAPE
.offset
, loopends
216 if SVSHAPE
.submode2
== 0b11 and inplace_mode
:
217 j
= list(range(i
, i
+ halfsize
))
218 jr
= list(range(i
+halfsize
, i
+ size
))
220 for ci
, (jl
, jh
) in enumerate(zip(j
[:hz2
], jr
[:hz2
])):
222 #print ("inplace swap", jh, jlh)
223 tmp1
, tmp2
= ji
[jlh
], ji
[jh
]
224 ji
[jlh
], ji
[jh
] = tmp2
, tmp1
227 def pprint_schedule(schedule
, n
):
232 tablestep
= n
// size
233 print ("size %d halfsize %d tablestep %d" % \
234 (size
, halfsize
, tablestep
))
235 for i
in range(0, n
, size
):
236 prefix
= "i %d\t" % i
237 for j
in range(i
, i
+ halfsize
):
238 (jl
, je
), (jh
, he
) = schedule
[idx
]
239 print (" %-3d\t%s j=%-2d jh=%-2d "
240 "j[jl=%-2d] j[jh=%-2d]" % \
241 (idx
, prefix
, j
, j
+halfsize
,
244 "end", bin(je
)[2:], bin(je
)[2:])
248 def pprint_schedule_outer(schedule
, n
):
253 tablestep
= n
// size
254 print ("size %d halfsize %d tablestep %d" % \
255 (size
, halfsize
, tablestep
))
256 y_r
= list(range(0, halfsize
))
258 prefix
= "i %d\t" % i
259 jr
= list(range(i
+halfsize
, i
+n
-halfsize
, size
))
261 (jl
, je
), (jh
, he
) = schedule
[idx
]
262 print (" %-3d\t%s j=%-2d jh=%-2d "
263 "j[jl=%-2d] j[jh=%-2d]" % \
264 (idx
, prefix
, j
, j
+halfsize
,
267 "end", bin(je
)[2:], bin(je
)[2:])
272 # totally cool *in-place* DCT algorithm using yield REMAPs
278 print ("transform2", n
)
279 levels
= n
.bit_length() - 1
281 # reference (read/write) the in-place data in *reverse-bit-order*
283 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
285 # and pretend we LDed data in half-swapped *and* bit-reversed order as well
286 # TODO: merge these two
287 vec
= halfrev2(vec
, False)
288 vec
= [vec
[ri
[i
]] for i
in range(n
)]
290 # create a cos table: not strictly necessary but here for illustrative
291 # purposes, to demonstrate the point that it really *is* iterative.
292 # this table could be cached and used multiple times rather than
293 # computed every time.
298 for i
in range(n
//size
):
299 for ci
in range(halfsize
):
300 ctable
.append((math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0))
315 SVSHAPE0
.lims
= [xdim
, 0b000001, zdim
]
317 SVSHAPE0
.submode2
= 0b01
319 SVSHAPE0
.offset
= 0 # experiment with different offset, here
320 SVSHAPE0
.invxyz
= [1,0,0] # inversion if desired
321 # j+halfstep schedule
323 SVSHAPE1
.lims
= [xdim
, 0b000001, zdim
]
325 SVSHAPE1
.submode2
= 0b01
327 SVSHAPE1
.offset
= 0 # experiment with different offset, here
328 SVSHAPE1
.invxyz
= [1,0,0] # inversion if desired
331 SVSHAPE2
.lims
= [xdim
, 0b000001, zdim
]
333 SVSHAPE2
.submode2
= 0b01
335 SVSHAPE2
.offset
= 0 # experiment with different offset, here
336 SVSHAPE2
.invxyz
= [1,0,0] # inversion if desired
339 SVSHAPE3
.lims
= [xdim
, 0b000001, zdim
]
341 SVSHAPE3
.submode2
= 0b01
343 SVSHAPE3
.offset
= 0 # experiment with different offset, here
344 SVSHAPE3
.invxyz
= [1,0,0] # inversion if desired
346 # enumerate over the iterator function, getting new indices
347 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
348 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
349 i2
= iterate_dct_inner_butterfly_indices(SVSHAPE2
)
350 i3
= iterate_dct_inner_butterfly_indices(SVSHAPE3
)
351 for k
, ((jl
, jle
), (jh
, jhe
), (ci
, cie
), (size
, sze
)) in \
352 enumerate(zip(i0
, i1
, i2
, i3
)):
353 t1
, t2
= vec
[jl
], vec
[jh
]
354 print ("xform2", jl
, jh
, ci
, size
)
355 coeff
= (math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0)
356 assert coeff
== ctable
[k
]
358 vec
[jh
] = (t1
- t2
) * (1/coeff
)
359 print ("coeff", size
, i
, "ci", ci
,
361 "i/n", (ci
+0.5)/size
, coeff
, vec
[jl
],
363 "end", bin(jle
), bin(jhe
))
364 if jle
== 0b111: # all loops end
367 print("transform2 pre-itersum", vec
)
369 # now things are in the right order for the outer butterfly.
373 SVSHAPE0
.lims
= [xdim
, 0b0000010, zdim
]
374 SVSHAPE0
.submode2
= 0b100
377 SVSHAPE0
.offset
= 0 # experiment with different offset, here
378 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
379 # j+halfstep schedule
381 SVSHAPE1
.lims
= [xdim
, 0b0000010, zdim
]
383 SVSHAPE1
.submode2
= 0b100
385 SVSHAPE1
.offset
= 0 # experiment with different offset, here
386 SVSHAPE1
.invxyz
= [0,0,0] # inversion if desired
388 # enumerate over the iterator function, getting new indices
389 i0
= iterate_dct_outer_butterfly_indices(SVSHAPE0
)
390 i1
= iterate_dct_outer_butterfly_indices(SVSHAPE1
)
391 for k
, ((jl
, jle
), (jh
, jhe
)) in enumerate(zip(i0
, i1
)):
392 print ("itersum jr", jl
, jh
,
393 "end", bin(jle
), bin(jhe
))
396 if jle
== 0b111: # all loops end
399 print("transform2 result", vec
)
405 # set the dimension sizes here
408 ydim
= 0 # not needed
409 zdim
= 0 # again, not needed
421 SVSHAPE0
.lims
= [xdim
, 0b000001, zdim
]
422 SVSHAPE0
.submode2
= 0b010
425 SVSHAPE0
.offset
= 0 # experiment with different offset, here
426 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
427 # j+halfstep schedule
429 SVSHAPE1
.lims
= [xdim
, 0b000001, zdim
]
430 SVSHAPE1
.submode2
= 0b010
433 SVSHAPE1
.offset
= 0 # experiment with different offset, here
434 SVSHAPE1
.invxyz
= [0,0,0] # inversion if desired
436 # enumerate over the iterator function, getting new indices
438 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
439 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
440 for idx
, (jl
, jh
) in enumerate(zip(i0
, i1
)):
441 schedule
.append((jl
, jh
))
442 if jl
[1] == 0b111: # end
445 # ok now pretty-print the results, with some debug output
446 print ("inner butterfly")
447 pprint_schedule(schedule
, n
)
456 SVSHAPE0
.lims
= [xdim
, 0b000010, zdim
]
458 SVSHAPE0
.submode2
= 0b100
460 SVSHAPE0
.offset
= 0 # experiment with different offset, here
461 SVSHAPE0
.invxyz
= [1,0,0] # inversion if desired
462 # j+halfstep schedule
464 SVSHAPE1
.lims
= [xdim
, 0b000010, zdim
]
466 SVSHAPE1
.submode2
= 0b100
468 SVSHAPE1
.offset
= 0 # experiment with different offset, here
469 SVSHAPE1
.invxyz
= [1,0,0] # inversion if desired
471 # enumerate over the iterator function, getting new indices
473 i0
= iterate_dct_outer_butterfly_indices(SVSHAPE0
)
474 i1
= iterate_dct_outer_butterfly_indices(SVSHAPE1
)
475 for idx
, (jl
, jh
) in enumerate(zip(i0
, i1
)):
476 schedule
.append((jl
, jh
))
477 if jl
[1] == 0b111: # end
480 # ok now pretty-print the results, with some debug output
481 print ("outer butterfly")
482 pprint_schedule_outer(schedule
, n
)
485 if __name__
== '__main__':