use ydimsz as sub-mode in DCT/FFT butterfly
[openpower-isa.git] / src / openpower / decoder / isa / remap_dct_yield.py
1 # DCT "REMAP" scheduler
2 #
3 # Modifications made to create an in-place iterative DCT:
4 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
5 #
6 # SPDX: LGPLv3+
7 #
8 # Original fastdctlee.py by Nayuki:
9 # Copyright (c) 2020 Project Nayuki. (MIT License)
10 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
11
12 import math
13
14 # bits of the integer 'val'.
15 def reverse_bits(val, width):
16 result = 0
17 for _ in range(width):
18 result = (result << 1) | (val & 1)
19 val >>= 1
20 return result
21
22
23 # iterative version of [recursively-applied] half-rev.
24 # relies on the list lengths being power-of-two and the fact
25 # that bit-inversion of a list of binary numbers is the same
26 # as reversing the order of the list
27 # this version is dead easy to implement in hardware.
28 # a big surprise is that the half-reversal can be done with
29 # such a simple XOR. the inverse operation is slightly trickier
30 def halfrev2(vec, pre_rev=True):
31 res = []
32 for i in range(len(vec)):
33 if pre_rev:
34 res.append(i ^ (i>>1))
35 else:
36 ri = i
37 bl = i.bit_length()
38 for ji in range(1, bl):
39 ri ^= (i >> ji)
40 res.append(vec[ri])
41 return res
42
43
44 # python "yield" can be iterated. use this to make it clear how
45 # the indices are generated by using natural-looking nested loops
46 def iterate_dct_inner_butterfly_indices(SVSHAPE):
47 # get indices to iterate over, in the required order
48 n = SVSHAPE.lims[0]
49 mode = SVSHAPE.lims[1]
50 #print ("inner butterfly", mode)
51 # creating lists of indices to iterate over in each dimension
52 # has to be done dynamically, because it depends on the size
53 # first, the size-based loop (which can be done statically)
54 x_r = []
55 size = 2
56 while size <= n:
57 x_r.append(size)
58 size *= 2
59 # invert order if requested
60 if SVSHAPE.invxyz[0]:
61 x_r.reverse()
62
63 if len(x_r) == 0:
64 return
65
66 # reference (read/write) the in-place data in *reverse-bit-order*
67 ri = list(range(n))
68 if SVSHAPE.submode2 == 0b01:
69 levels = n.bit_length() - 1
70 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
71
72 # reference list for not needing to do data-swaps, just swap what
73 # *indices* are referenced (two levels of indirection at the moment)
74 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
75 ji = list(range(n))
76 inplace_mode = SVSHAPE.submode2 == 0b01
77 # and SVSHAPE.skip not in [0b10, 0b11]
78 if inplace_mode:
79 #print ("inplace mode")
80 ji = halfrev2(ji, True)
81
82 #print ("ri", ri)
83 #print ("ji", ji)
84
85 # start an infinite (wrapping) loop
86 skip = 0
87 while True:
88 for size in x_r: # loop over 3rd order dimension (size)
89 x_end = size == x_r[-1]
90 # y_r schedule depends on size
91 halfsize = size // 2
92 y_r = []
93 for i in range(0, n, size):
94 y_r.append(i)
95 # invert if requested
96 if SVSHAPE.invxyz[1]: y_r.reverse()
97 for i in y_r: # loop over 2nd order dimension
98 y_end = i == y_r[-1]
99 # two lists of half-range indices, e.g. j 0123, jr 7654
100 j = list(range(i, i + halfsize))
101 jr = list(range(i+halfsize, i + size))
102 jr.reverse()
103 # invert if requested
104 if SVSHAPE.invxyz[2]: j_r.reverse()
105 hz2 = halfsize // 2 # zero stops reversing 1-item lists
106 # if you *really* want to do the in-place swapping manually,
107 # this allows you to do it. good luck...
108 if SVSHAPE.submode2 == 0b01 and not inplace_mode:
109 #print ("swap mode")
110 jr = j_r[:hz2]
111 #print ("xform jr", jr)
112 # loop over 1st order dimension
113 for ci, (jl, jh) in enumerate(zip(j, jr)):
114 z_end = jl == j[-1]
115 # now depending on MODE return the index. inner butterfly
116 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
117 result = ri[ji[jl]] # lower half
118 elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
119 result = ri[ji[jh]] # upper half, reverse order
120 elif SVSHAPE.skip == 0b10: #
121 result = ci # coefficient helper
122 elif SVSHAPE.skip == 0b11: #
123 result = size # coefficient helper
124 loopends = (z_end |
125 ((y_end and z_end)<<1) |
126 ((y_end and x_end and z_end)<<2))
127
128 yield result + SVSHAPE.offset, loopends
129
130 # now in-place swap
131 if inplace_mode:
132 for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
133 jlh = jl+halfsize
134 #print ("inplace swap", jh, jlh)
135 tmp1, tmp2 = ji[jlh], ji[jh]
136 ji[jlh], ji[jh] = tmp2, tmp1
137
138
139 # python "yield" can be iterated. use this to make it clear how
140 # the indices are generated by using natural-looking nested loops
141 def iterate_dct_outer_butterfly_indices(SVSHAPE):
142 # get indices to iterate over, in the required order
143 n = SVSHAPE.lims[0]
144 mode = SVSHAPE.lims[1]
145 # createing lists of indices to iterate over in each dimension
146 # has to be done dynamically, because it depends on the size
147 # first, the size-based loop (which can be done statically)
148 x_r = []
149 size = n // 2
150 while size >= 2:
151 x_r.append(size)
152 size //= 2
153 # invert order if requested
154 if SVSHAPE.invxyz[0]:
155 x_r.reverse()
156
157 if len(x_r) == 0:
158 return
159
160 #print ("outer butterfly")
161
162 # reference (read/write) the in-place data in *reverse-bit-order*
163 ri = list(range(n))
164 if SVSHAPE.submode2 == 0b11:
165 levels = n.bit_length() - 1
166 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
167
168 # reference list for not needing to do data-swaps, just swap what
169 # *indices* are referenced (two levels of indirection at the moment)
170 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
171 ji = list(range(n))
172 inplace_mode = False # need the space... SVSHAPE.skip in [0b10, 0b11]
173 if inplace_mode:
174 #print ("inplace mode", SVSHAPE.skip)
175 ji = halfrev2(ji, True)
176
177 #print ("ri", ri)
178 #print ("ji", ji)
179
180 # start an infinite (wrapping) loop
181 skip = 0
182 while True:
183 for size in x_r: # loop over 3rd order dimension (size)
184 halfsize = size//2
185 x_end = size == x_r[-1]
186 y_r = list(range(0, halfsize))
187 #print ("itersum", halfsize, size, y_r)
188 # invert if requested
189 if SVSHAPE.invxyz[1]: y_r.reverse()
190 for i in y_r: # loop over 2nd order dimension
191 y_end = i == y_r[-1]
192 # one list to create iterative-sum schedule
193 jr = list(range(i+halfsize, i+n-halfsize, size))
194 #print ("itersum jr", i+halfsize, i+size, jr)
195 # invert if requested
196 if SVSHAPE.invxyz[2]: j_r.reverse()
197 hz2 = halfsize // 2 # zero stops reversing 1-item lists
198 for ci, jh in enumerate(jr): # loop over 1st order dimension
199 z_end = jh == jr[-1]
200 #print (" itersum", size, i, jh, jh+size)
201 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
202 result = ri[ji[jh]] # lower half
203 elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
204 result = ri[ji[jh+size]] # upper half
205 elif SVSHAPE.skip == 0b10: #
206 result = ci # coefficient helper
207 elif SVSHAPE.skip == 0b11: #
208 result = size # coefficient helper
209 loopends = (z_end |
210 ((y_end and z_end)<<1) |
211 ((y_end and x_end and z_end)<<2))
212
213 yield result + SVSHAPE.offset, loopends
214
215 # now in-place swap
216 if SVSHAPE.submode2 == 0b11 and inplace_mode:
217 j = list(range(i, i + halfsize))
218 jr = list(range(i+halfsize, i + size))
219 jr.reverse()
220 for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
221 jlh = jl+halfsize
222 #print ("inplace swap", jh, jlh)
223 tmp1, tmp2 = ji[jlh], ji[jh]
224 ji[jlh], ji[jh] = tmp2, tmp1
225
226
227 def pprint_schedule(schedule, n):
228 size = 2
229 idx = 0
230 while size <= n:
231 halfsize = size // 2
232 tablestep = n // size
233 print ("size %d halfsize %d tablestep %d" % \
234 (size, halfsize, tablestep))
235 for i in range(0, n, size):
236 prefix = "i %d\t" % i
237 for j in range(i, i + halfsize):
238 (jl, je), (jh, he) = schedule[idx]
239 print (" %-3d\t%s j=%-2d jh=%-2d "
240 "j[jl=%-2d] j[jh=%-2d]" % \
241 (idx, prefix, j, j+halfsize,
242 jl, jh,
243 ),
244 "end", bin(je)[2:], bin(je)[2:])
245 idx += 1
246 size *= 2
247
248 def pprint_schedule_outer(schedule, n):
249 size = 2
250 idx = 0
251 while size <= n//2:
252 halfsize = size // 2
253 tablestep = n // size
254 print ("size %d halfsize %d tablestep %d" % \
255 (size, halfsize, tablestep))
256 y_r = list(range(0, halfsize))
257 for i in y_r:
258 prefix = "i %d\t" % i
259 jr = list(range(i+halfsize, i+n-halfsize, size))
260 for j in jr:
261 (jl, je), (jh, he) = schedule[idx]
262 print (" %-3d\t%s j=%-2d jh=%-2d "
263 "j[jl=%-2d] j[jh=%-2d]" % \
264 (idx, prefix, j, j+halfsize,
265 jl, jh,
266 ),
267 "end", bin(je)[2:], bin(je)[2:])
268 idx += 1
269 size *= 2
270
271
272 # totally cool *in-place* DCT algorithm using yield REMAPs
273 def transform2(vec):
274
275 # Initialization
276 n = len(vec)
277 print ()
278 print ("transform2", n)
279 levels = n.bit_length() - 1
280
281 # reference (read/write) the in-place data in *reverse-bit-order*
282 ri = list(range(n))
283 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
284
285 # and pretend we LDed data in half-swapped *and* bit-reversed order as well
286 # TODO: merge these two
287 vec = halfrev2(vec, False)
288 vec = [vec[ri[i]] for i in range(n)]
289
290 # create a cos table: not strictly necessary but here for illustrative
291 # purposes, to demonstrate the point that it really *is* iterative.
292 # this table could be cached and used multiple times rather than
293 # computed every time.
294 ctable = []
295 size = n
296 while size >= 2:
297 halfsize = size // 2
298 for i in range(n//size):
299 for ci in range(halfsize):
300 ctable.append((math.cos((ci + 0.5) * math.pi / size) * 2.0))
301 size //= 2
302
303 ################
304 # INNER butterfly
305 ################
306 xdim = n
307 ydim = 0
308 zdim = 0
309
310 # set up an SVSHAPE
311 class SVSHAPE:
312 pass
313 # j schedule
314 SVSHAPE0 = SVSHAPE()
315 SVSHAPE0.lims = [xdim, 0b000001, zdim]
316 SVSHAPE0.mode = 0b01
317 SVSHAPE0.submode2 = 0b01
318 SVSHAPE0.skip = 0b00
319 SVSHAPE0.offset = 0 # experiment with different offset, here
320 SVSHAPE0.invxyz = [1,0,0] # inversion if desired
321 # j+halfstep schedule
322 SVSHAPE1 = SVSHAPE()
323 SVSHAPE1.lims = [xdim, 0b000001, zdim]
324 SVSHAPE1.mode = 0b01
325 SVSHAPE1.submode2 = 0b01
326 SVSHAPE1.skip = 0b01
327 SVSHAPE1.offset = 0 # experiment with different offset, here
328 SVSHAPE1.invxyz = [1,0,0] # inversion if desired
329 # ci schedule
330 SVSHAPE2 = SVSHAPE()
331 SVSHAPE2.lims = [xdim, 0b000001, zdim]
332 SVSHAPE2.mode = 0b01
333 SVSHAPE2.submode2 = 0b01
334 SVSHAPE2.skip = 0b10
335 SVSHAPE2.offset = 0 # experiment with different offset, here
336 SVSHAPE2.invxyz = [1,0,0] # inversion if desired
337 # size schedule
338 SVSHAPE3 = SVSHAPE()
339 SVSHAPE3.lims = [xdim, 0b000001, zdim]
340 SVSHAPE3.mode = 0b01
341 SVSHAPE3.submode2 = 0b01
342 SVSHAPE3.skip = 0b11
343 SVSHAPE3.offset = 0 # experiment with different offset, here
344 SVSHAPE3.invxyz = [1,0,0] # inversion if desired
345
346 # enumerate over the iterator function, getting new indices
347 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
348 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
349 i2 = iterate_dct_inner_butterfly_indices(SVSHAPE2)
350 i3 = iterate_dct_inner_butterfly_indices(SVSHAPE3)
351 for k, ((jl, jle), (jh, jhe), (ci, cie), (size, sze)) in \
352 enumerate(zip(i0, i1, i2, i3)):
353 t1, t2 = vec[jl], vec[jh]
354 print ("xform2", jl, jh, ci, size)
355 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
356 assert coeff == ctable[k]
357 vec[jl] = t1 + t2
358 vec[jh] = (t1 - t2) * (1/coeff)
359 print ("coeff", size, i, "ci", ci,
360 "jl", jl, "jh", jh,
361 "i/n", (ci+0.5)/size, coeff, vec[jl],
362 vec[jh],
363 "end", bin(jle), bin(jhe))
364 if jle == 0b111: # all loops end
365 break
366
367 print("transform2 pre-itersum", vec)
368
369 # now things are in the right order for the outer butterfly.
370
371 # j schedule
372 SVSHAPE0 = SVSHAPE()
373 SVSHAPE0.lims = [xdim, 0b0000010, zdim]
374 SVSHAPE0.submode2 = 0b100
375 SVSHAPE0.mode = 0b01
376 SVSHAPE0.skip = 0b00
377 SVSHAPE0.offset = 0 # experiment with different offset, here
378 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
379 # j+halfstep schedule
380 SVSHAPE1 = SVSHAPE()
381 SVSHAPE1.lims = [xdim, 0b0000010, zdim]
382 SVSHAPE1.mode = 0b01
383 SVSHAPE1.submode2 = 0b100
384 SVSHAPE1.skip = 0b01
385 SVSHAPE1.offset = 0 # experiment with different offset, here
386 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
387
388 # enumerate over the iterator function, getting new indices
389 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
390 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
391 for k, ((jl, jle), (jh, jhe)) in enumerate(zip(i0, i1)):
392 print ("itersum jr", jl, jh,
393 "end", bin(jle), bin(jhe))
394 vec[jl] += vec[jh]
395 size //= 2
396 if jle == 0b111: # all loops end
397 break
398
399 print("transform2 result", vec)
400
401 return vec
402
403
404 def demo():
405 # set the dimension sizes here
406 n = 8
407 xdim = n
408 ydim = 0 # not needed
409 zdim = 0 # again, not needed
410
411
412 ################
413 # INNER butterfly
414 ################
415
416 # set up an SVSHAPE
417 class SVSHAPE:
418 pass
419 # j schedule
420 SVSHAPE0 = SVSHAPE()
421 SVSHAPE0.lims = [xdim, 0b000001, zdim]
422 SVSHAPE0.submode2 = 0b010
423 SVSHAPE0.mode = 0b01
424 SVSHAPE0.skip = 0b00
425 SVSHAPE0.offset = 0 # experiment with different offset, here
426 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
427 # j+halfstep schedule
428 SVSHAPE1 = SVSHAPE()
429 SVSHAPE1.lims = [xdim, 0b000001, zdim]
430 SVSHAPE1.submode2 = 0b010
431 SVSHAPE1.mode = 0b01
432 SVSHAPE1.skip = 0b01
433 SVSHAPE1.offset = 0 # experiment with different offset, here
434 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
435
436 # enumerate over the iterator function, getting new indices
437 schedule = []
438 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
439 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
440 for idx, (jl, jh) in enumerate(zip(i0, i1)):
441 schedule.append((jl, jh))
442 if jl[1] == 0b111: # end
443 break
444
445 # ok now pretty-print the results, with some debug output
446 print ("inner butterfly")
447 pprint_schedule(schedule, n)
448 print ("")
449
450 ################
451 # outer butterfly
452 ################
453
454 # j schedule
455 SVSHAPE0 = SVSHAPE()
456 SVSHAPE0.lims = [xdim, 0b000010, zdim]
457 SVSHAPE0.mode = 0b01
458 SVSHAPE0.submode2 = 0b100
459 SVSHAPE0.skip = 0b10
460 SVSHAPE0.offset = 0 # experiment with different offset, here
461 SVSHAPE0.invxyz = [1,0,0] # inversion if desired
462 # j+halfstep schedule
463 SVSHAPE1 = SVSHAPE()
464 SVSHAPE1.lims = [xdim, 0b000010, zdim]
465 SVSHAPE1.mode = 0b01
466 SVSHAPE1.submode2 = 0b100
467 SVSHAPE1.skip = 0b11
468 SVSHAPE1.offset = 0 # experiment with different offset, here
469 SVSHAPE1.invxyz = [1,0,0] # inversion if desired
470
471 # enumerate over the iterator function, getting new indices
472 schedule = []
473 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
474 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
475 for idx, (jl, jh) in enumerate(zip(i0, i1)):
476 schedule.append((jl, jh))
477 if jl[1] == 0b111: # end
478 break
479
480 # ok now pretty-print the results, with some debug output
481 print ("outer butterfly")
482 pprint_schedule_outer(schedule, n)
483
484 # run the demo
485 if __name__ == '__main__':
486 demo()