1 # DCT "REMAP" scheduler
3 # Modifications made to create an in-place iterative DCT:
4 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
8 # Original fastdctlee.py by Nayuki:
9 # Copyright (c) 2020 Project Nayuki. (MIT License)
10 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
14 # bits of the integer 'val'.
15 def reverse_bits(val
, width
):
17 for _
in range(width
):
18 result
= (result
<< 1) |
(val
& 1)
23 # iterative version of [recursively-applied] half-rev.
24 # relies on the list lengths being power-of-two and the fact
25 # that bit-inversion of a list of binary numbers is the same
26 # as reversing the order of the list
27 # this version is dead easy to implement in hardware.
28 # a big surprise is that the half-reversal can be done with
29 # such a simple XOR. the inverse operation is slightly trickier
30 def halfrev2(vec
, pre_rev
=True):
32 for i
in range(len(vec
)):
34 res
.append(i ^
(i
>>1))
38 for ji
in range(1, bl
):
44 # python "yield" can be iterated. use this to make it clear how
45 # the indices are generated by using natural-looking nested loops
46 def iterate_dct_inner_butterfly_indices(SVSHAPE
):
47 # get indices to iterate over, in the required order
49 # createing lists of indices to iterate over in each dimension
50 # has to be done dynamically, because it depends on the size
51 # first, the size-based loop (which can be done statically)
57 # invert order if requested
64 # reference (read/write) the in-place data in *reverse-bit-order*
66 if SVSHAPE
.mode
== 0b01:
67 levels
= n
.bit_length() - 1
68 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
70 # reference list for not needing to do data-swaps, just swap what
71 # *indices* are referenced (two levels of indirection at the moment)
72 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
74 inplace_mode
= SVSHAPE
.mode
== 0b01 and SVSHAPE
.skip
not in [0b10, 0b11]
76 #print ("inplace mode")
77 ji
= halfrev2(ji
, True)
82 # start an infinite (wrapping) loop
85 for size
in x_r
: # loop over 3rd order dimension (size)
86 x_end
= size
== x_r
[-1]
87 # y_r schedule depends on size
90 for i
in range(0, n
, size
):
93 if SVSHAPE
.invxyz
[1]: y_r
.reverse()
94 for i
in y_r
: # loop over 2nd order dimension
96 # two lists of half-range indices, e.g. j 0123, jr 7654
97 j
= list(range(i
, i
+ halfsize
))
98 jr
= list(range(i
+halfsize
, i
+ size
))
100 # invert if requested
101 if SVSHAPE
.invxyz
[2]: j_r
.reverse()
102 hz2
= halfsize
// 2 # zero stops reversing 1-item lists
103 # if you *really* want to do the in-place swapping manually,
104 # this allows you to do it. good luck...
105 if SVSHAPE
.mode
== 0b01 and not inplace_mode
:
108 #print ("xform jr", jr)
109 for jl
, jh
in zip(j
, jr
): # loop over 1st order dimension
111 # now depending on MODE return the index. inner butterfly
112 if SVSHAPE
.mode
== 0b01:
113 if SVSHAPE
.skip
in [0b00, 0b10]:
114 result
= ri
[ji
[jl
]] # lower half
115 elif SVSHAPE
.skip
in [0b01, 0b11]:
116 result
= ri
[ji
[jh
]] # upper half, reverse order
118 elif SVSHAPE
.mode
== 0b10:
119 if SVSHAPE
.skip
== 0b00:
120 result
= ri
[ji
[jl
]] # lower half
121 elif SVSHAPE
.skip
== 0b01:
122 result
= ri
[ji
[jl
+size
]] # upper half
124 ((y_end
and z_end
)<<1) |
125 ((y_end
and x_end
and z_end
)<<2))
127 yield result
+ SVSHAPE
.offset
, loopends
130 if SVSHAPE
.mode
== 0b01 and inplace_mode
:
131 for ci
, (jl
, jh
) in enumerate(zip(j
[:hz2
], jr
[:hz2
])):
133 #print ("inplace swap", jh, jlh)
134 tmp1
, tmp2
= ji
[jlh
], ji
[jh
]
135 ji
[jlh
], ji
[jh
] = tmp2
, tmp1
138 # python "yield" can be iterated. use this to make it clear how
139 # the indices are generated by using natural-looking nested loops
140 def iterate_dct_outer_butterfly_indices(SVSHAPE
):
141 # get indices to iterate over, in the required order
143 # createing lists of indices to iterate over in each dimension
144 # has to be done dynamically, because it depends on the size
145 # first, the size-based loop (which can be done statically)
151 # invert order if requested
152 if SVSHAPE
.invxyz
[0]:
158 #print ("outer butterfly")
160 # reference (read/write) the in-place data in *reverse-bit-order*
162 if SVSHAPE
.mode
== 0b11:
163 levels
= n
.bit_length() - 1
164 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
166 # reference list for not needing to do data-swaps, just swap what
167 # *indices* are referenced (two levels of indirection at the moment)
168 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
170 inplace_mode
= SVSHAPE
.skip
in [0b10, 0b11]
172 #print ("inplace mode", SVSHAPE.skip)
173 ji
= halfrev2(ji
, True)
178 # start an infinite (wrapping) loop
181 for size
in x_r
: # loop over 3rd order dimension (size)
183 x_end
= size
== x_r
[-1]
184 y_r
= list(range(0, halfsize
))
185 #print ("itersum", halfsize, size, y_r)
186 # invert if requested
187 if SVSHAPE
.invxyz
[1]: y_r
.reverse()
188 for i
in y_r
: # loop over 2nd order dimension
190 # one list to create iterative-sum schedule
191 jr
= list(range(i
+halfsize
, i
+n
-halfsize
, size
))
192 #print ("itersum jr", i+halfsize, i+size, jr)
193 # invert if requested
194 if SVSHAPE
.invxyz
[2]: j_r
.reverse()
195 hz2
= halfsize
// 2 # zero stops reversing 1-item lists
196 for jh
in jr
: # loop over 1st order dimension
198 #print (" itersum", size, i, jh, jh+size)
199 if SVSHAPE
.skip
in [0b00, 0b10]:
200 result
= ri
[ji
[jh
]] # lower half
201 elif SVSHAPE
.skip
in [0b01, 0b11]:
202 result
= ri
[ji
[jh
+size
]] # upper half
204 ((y_end
and z_end
)<<1) |
205 ((y_end
and x_end
and z_end
)<<2))
207 yield result
+ SVSHAPE
.offset
, loopends
210 if SVSHAPE
.mode
== 0b11 and inplace_mode
:
211 j
= list(range(i
, i
+ halfsize
))
212 jr
= list(range(i
+halfsize
, i
+ size
))
214 for ci
, (jl
, jh
) in enumerate(zip(j
[:hz2
], jr
[:hz2
])):
216 #print ("inplace swap", jh, jlh)
217 tmp1
, tmp2
= ji
[jlh
], ji
[jh
]
218 ji
[jlh
], ji
[jh
] = tmp2
, tmp1
221 def pprint_schedule(schedule
, n
):
226 tablestep
= n
// size
227 print ("size %d halfsize %d tablestep %d" % \
228 (size
, halfsize
, tablestep
))
229 for i
in range(0, n
, size
):
230 prefix
= "i %d\t" % i
231 for j
in range(i
, i
+ halfsize
):
232 (jl
, je
), (jh
, he
) = schedule
[idx
]
233 print (" %-3d\t%s j=%-2d jh=%-2d "
234 "j[jl=%-2d] j[jh=%-2d]" % \
235 (idx
, prefix
, j
, j
+halfsize
,
238 "end", bin(je
)[2:], bin(je
)[2:])
242 def pprint_schedule_outer(schedule
, n
):
247 tablestep
= n
// size
248 print ("size %d halfsize %d tablestep %d" % \
249 (size
, halfsize
, tablestep
))
250 y_r
= list(range(0, halfsize
))
252 prefix
= "i %d\t" % i
253 jr
= list(range(i
+halfsize
, i
+n
-halfsize
, size
))
255 (jl
, je
), (jh
, he
) = schedule
[idx
]
256 print (" %-3d\t%s j=%-2d jh=%-2d "
257 "j[jl=%-2d] j[jh=%-2d]" % \
258 (idx
, prefix
, j
, j
+halfsize
,
261 "end", bin(je
)[2:], bin(je
)[2:])
266 # totally cool *in-place* DCT algorithm using yield REMAPs
272 print ("transform2", n
)
273 levels
= n
.bit_length() - 1
275 # reference (read/write) the in-place data in *reverse-bit-order*
277 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
279 # and pretend we LDed data in half-swapped *and* bit-reversed order as well
280 # TODO: merge these two
281 vec
= halfrev2(vec
, False)
282 vec
= [vec
[ri
[i
]] for i
in range(n
)]
284 # create a cos table: not strictly necessary but here for illustrative
285 # purposes, to demonstrate the point that it really *is* iterative.
286 # this table could be cached and used multiple times rather than
287 # computed every time.
292 for i
in range(n
//size
):
293 for ci
in range(halfsize
):
294 ctable
.append((math
.cos((ci
+ 0.5) * math
.pi
/ size
) * 2.0))
309 SVSHAPE0
.lims
= [xdim
, ydim
, zdim
]
310 SVSHAPE0
.order
= [0,1,2] # experiment with different permutations, here
313 SVSHAPE0
.offset
= 0 # experiment with different offset, here
314 SVSHAPE0
.invxyz
= [1,0,0] # inversion if desired
315 # j+halfstep schedule
317 SVSHAPE1
.lims
= [xdim
, ydim
, zdim
]
318 SVSHAPE1
.order
= [0,1,2] # experiment with different permutations, here
321 SVSHAPE1
.offset
= 0 # experiment with different offset, here
322 SVSHAPE1
.invxyz
= [1,0,0] # inversion if desired
324 # enumerate over the iterator function, getting new indices
325 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
326 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
327 for k
, ((jl
, jle
), (jh
, jhe
)) in enumerate(zip(i0
, i1
)):
328 t1
, t2
= vec
[jl
], vec
[jh
]
331 vec
[jh
] = (t1
- t2
) * (1/coeff
)
332 print ("coeff", size
, i
, "ci", ci
,
334 "i/n", (ci
+0.5)/size
, coeff
, vec
[jl
],
336 "end", bin(jle
), bin(jhe
))
337 if jle
== 0b111: # all loops end
340 print("transform2 pre-itersum", vec
)
342 # now things are in the right order for the outer butterfly.
346 SVSHAPE0
.lims
= [xdim
, ydim
, zdim
]
347 SVSHAPE0
.order
= [0,1,2] # experiment with different permutations, here
350 SVSHAPE0
.offset
= 0 # experiment with different offset, here
351 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
352 # j+halfstep schedule
354 SVSHAPE1
.lims
= [xdim
, ydim
, zdim
]
355 SVSHAPE1
.order
= [0,1,2] # experiment with different permutations, here
358 SVSHAPE1
.offset
= 0 # experiment with different offset, here
359 SVSHAPE1
.invxyz
= [0,0,0] # inversion if desired
361 # enumerate over the iterator function, getting new indices
362 i0
= iterate_dct_outer_butterfly_indices(SVSHAPE0
)
363 i1
= iterate_dct_outer_butterfly_indices(SVSHAPE1
)
364 for k
, ((jl
, jle
), (jh
, jhe
)) in enumerate(zip(i0
, i1
)):
365 print ("itersum jr", jl
, jh
,
366 "end", bin(jle
), bin(jhe
))
369 if jle
== 0b111: # all loops end
372 print("transform2 result", vec
)
378 # set the dimension sizes here
381 ydim
= 0 # not needed
382 zdim
= 0 # again, not needed
394 SVSHAPE0
.lims
= [xdim
, ydim
, zdim
]
395 SVSHAPE0
.order
= [0,1,2] # experiment with different permutations, here
398 SVSHAPE0
.offset
= 0 # experiment with different offset, here
399 SVSHAPE0
.invxyz
= [0,0,0] # inversion if desired
400 # j+halfstep schedule
402 SVSHAPE1
.lims
= [xdim
, ydim
, zdim
]
403 SVSHAPE1
.order
= [0,1,2] # experiment with different permutations, here
406 SVSHAPE1
.offset
= 0 # experiment with different offset, here
407 SVSHAPE1
.invxyz
= [0,0,0] # inversion if desired
409 # enumerate over the iterator function, getting new indices
411 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
412 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
413 for idx
, (jl
, jh
) in enumerate(zip(i0
, i1
)):
414 schedule
.append((jl
, jh
))
415 if jl
[1] == 0b111: # end
418 # ok now pretty-print the results, with some debug output
419 print ("inner butterfly")
420 pprint_schedule(schedule
, n
)
429 SVSHAPE0
.lims
= [xdim
, ydim
, zdim
]
430 SVSHAPE0
.order
= [0,1,2] # experiment with different permutations, here
433 SVSHAPE0
.offset
= 0 # experiment with different offset, here
434 SVSHAPE0
.invxyz
= [1,0,0] # inversion if desired
435 # j+halfstep schedule
437 SVSHAPE1
.lims
= [xdim
, ydim
, zdim
]
438 SVSHAPE1
.order
= [0,1,2] # experiment with different permutations, here
441 SVSHAPE1
.offset
= 0 # experiment with different offset, here
442 SVSHAPE1
.invxyz
= [1,0,0] # inversion if desired
444 # enumerate over the iterator function, getting new indices
446 i0
= iterate_dct_outer_butterfly_indices(SVSHAPE0
)
447 i1
= iterate_dct_outer_butterfly_indices(SVSHAPE1
)
448 for idx
, (jl
, jh
) in enumerate(zip(i0
, i1
)):
449 schedule
.append((jl
, jh
))
450 if jl
[1] == 0b111: # end
453 # ok now pretty-print the results, with some debug output
454 print ("outer butterfly")
455 pprint_schedule_outer(schedule
, n
)
458 if __name__
== '__main__':