add DCT outer butterfly iterative overlapping ADD schedule
[openpower-isa.git] / src / openpower / decoder / isa / remap_dct_yield.py
1 # DCT "REMAP" scheduler
2 #
3 # Modifications made to create an in-place iterative DCT:
4 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
5 #
6 # SPDX: LGPLv3+
7 #
8 # Original fastdctlee.py by Nayuki:
9 # Copyright (c) 2020 Project Nayuki. (MIT License)
10 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
11
12 import math
13
14 # bits of the integer 'val'.
15 def reverse_bits(val, width):
16 result = 0
17 for _ in range(width):
18 result = (result << 1) | (val & 1)
19 val >>= 1
20 return result
21
22
23 # iterative version of [recursively-applied] half-rev.
24 # relies on the list lengths being power-of-two and the fact
25 # that bit-inversion of a list of binary numbers is the same
26 # as reversing the order of the list
27 # this version is dead easy to implement in hardware.
28 # a big surprise is that the half-reversal can be done with
29 # such a simple XOR. the inverse operation is slightly trickier
30 def halfrev2(vec, pre_rev=True):
31 res = []
32 for i in range(len(vec)):
33 if pre_rev:
34 res.append(i ^ (i>>1))
35 else:
36 ri = i
37 bl = i.bit_length()
38 for ji in range(1, bl):
39 ri ^= (i >> ji)
40 res.append(vec[ri])
41 return res
42
43
44 # python "yield" can be iterated. use this to make it clear how
45 # the indices are generated by using natural-looking nested loops
46 def iterate_dct_inner_butterfly_indices(SVSHAPE):
47 # get indices to iterate over, in the required order
48 n = SVSHAPE.lims[0]
49 # createing lists of indices to iterate over in each dimension
50 # has to be done dynamically, because it depends on the size
51 # first, the size-based loop (which can be done statically)
52 x_r = []
53 size = 2
54 while size <= n:
55 x_r.append(size)
56 size *= 2
57 # invert order if requested
58 if SVSHAPE.invxyz[0]:
59 x_r.reverse()
60
61 if len(x_r) == 0:
62 return
63
64 # reference (read/write) the in-place data in *reverse-bit-order*
65 ri = list(range(n))
66 if SVSHAPE.submode2 == 0b01:
67 levels = n.bit_length() - 1
68 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
69
70 # reference list for not needing to do data-swaps, just swap what
71 # *indices* are referenced (two levels of indirection at the moment)
72 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
73 ji = list(range(n))
74 inplace_mode = SVSHAPE.submode2 == 0b01 and SVSHAPE.skip not in [0b10, 0b11]
75 if inplace_mode:
76 #print ("inplace mode")
77 ji = halfrev2(ji, True)
78
79 #print ("ri", ri)
80 #print ("ji", ji)
81
82 # start an infinite (wrapping) loop
83 skip = 0
84 while True:
85 for size in x_r: # loop over 3rd order dimension (size)
86 x_end = size == x_r[-1]
87 # y_r schedule depends on size
88 halfsize = size // 2
89 y_r = []
90 for i in range(0, n, size):
91 y_r.append(i)
92 # invert if requested
93 if SVSHAPE.invxyz[1]: y_r.reverse()
94 for i in y_r: # loop over 2nd order dimension
95 y_end = i == y_r[-1]
96 # two lists of half-range indices, e.g. j 0123, jr 7654
97 j = list(range(i, i + halfsize))
98 jr = list(range(i+halfsize, i + size))
99 jr.reverse()
100 # invert if requested
101 if SVSHAPE.invxyz[2]: j_r.reverse()
102 hz2 = halfsize // 2 # zero stops reversing 1-item lists
103 # if you *really* want to do the in-place swapping manually,
104 # this allows you to do it. good luck...
105 if SVSHAPE.submode2 == 0b01 and not inplace_mode:
106 #print ("swap mode")
107 jr = j_r[:hz2]
108 #print ("xform jr", jr)
109 for jl, jh in zip(j, jr): # loop over 1st order dimension
110 z_end = jl == j[-1]
111 # now depending on MODE return the index. inner butterfly
112 if SVSHAPE.skip in [0b00, 0b10]:
113 result = ri[ji[jl]] # lower half
114 elif SVSHAPE.skip in [0b01, 0b11]:
115 result = ri[ji[jh]] # upper half, reverse order
116 loopends = (z_end |
117 ((y_end and z_end)<<1) |
118 ((y_end and x_end and z_end)<<2))
119
120 yield result + SVSHAPE.offset, loopends
121
122 # now in-place swap
123 if inplace_mode:
124 for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
125 jlh = jl+halfsize
126 #print ("inplace swap", jh, jlh)
127 tmp1, tmp2 = ji[jlh], ji[jh]
128 ji[jlh], ji[jh] = tmp2, tmp1
129
130
131 # python "yield" can be iterated. use this to make it clear how
132 # the indices are generated by using natural-looking nested loops
133 def iterate_dct_outer_butterfly_indices(SVSHAPE):
134 # get indices to iterate over, in the required order
135 n = SVSHAPE.lims[0]
136 # createing lists of indices to iterate over in each dimension
137 # has to be done dynamically, because it depends on the size
138 # first, the size-based loop (which can be done statically)
139 x_r = []
140 size = n // 2
141 while size >= 2:
142 x_r.append(size)
143 size //= 2
144 # invert order if requested
145 if SVSHAPE.invxyz[0]:
146 x_r.reverse()
147
148 if len(x_r) == 0:
149 return
150
151 #print ("outer butterfly")
152
153 # reference (read/write) the in-place data in *reverse-bit-order*
154 ri = list(range(n))
155 if SVSHAPE.submode2 == 0b11:
156 levels = n.bit_length() - 1
157 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
158
159 # reference list for not needing to do data-swaps, just swap what
160 # *indices* are referenced (two levels of indirection at the moment)
161 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
162 ji = list(range(n))
163 inplace_mode = SVSHAPE.skip in [0b10, 0b11]
164 if inplace_mode:
165 #print ("inplace mode", SVSHAPE.skip)
166 ji = halfrev2(ji, True)
167
168 #print ("ri", ri)
169 #print ("ji", ji)
170
171 # start an infinite (wrapping) loop
172 skip = 0
173 while True:
174 for size in x_r: # loop over 3rd order dimension (size)
175 halfsize = size//2
176 x_end = size == x_r[-1]
177 y_r = list(range(0, halfsize))
178 #print ("itersum", halfsize, size, y_r)
179 # invert if requested
180 if SVSHAPE.invxyz[1]: y_r.reverse()
181 for i in y_r: # loop over 2nd order dimension
182 y_end = i == y_r[-1]
183 # one list to create iterative-sum schedule
184 jr = list(range(i+halfsize, i+n-halfsize, size))
185 #print ("itersum jr", i+halfsize, i+size, jr)
186 # invert if requested
187 if SVSHAPE.invxyz[2]: j_r.reverse()
188 hz2 = halfsize // 2 # zero stops reversing 1-item lists
189 for jh in jr: # loop over 1st order dimension
190 z_end = jh == jr[-1]
191 #print (" itersum", size, i, jh, jh+size)
192 if SVSHAPE.skip in [0b00, 0b10]:
193 result = ri[ji[jh]] # lower half
194 elif SVSHAPE.skip in [0b01, 0b11]:
195 result = ri[ji[jh+size]] # upper half
196 loopends = (z_end |
197 ((y_end and z_end)<<1) |
198 ((y_end and x_end and z_end)<<2))
199
200 yield result + SVSHAPE.offset, loopends
201
202 # now in-place swap
203 if SVSHAPE.submode2 == 0b11 and inplace_mode:
204 j = list(range(i, i + halfsize))
205 jr = list(range(i+halfsize, i + size))
206 jr.reverse()
207 for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
208 jlh = jl+halfsize
209 #print ("inplace swap", jh, jlh)
210 tmp1, tmp2 = ji[jlh], ji[jh]
211 ji[jlh], ji[jh] = tmp2, tmp1
212
213
214 def pprint_schedule(schedule, n):
215 size = 2
216 idx = 0
217 while size <= n:
218 halfsize = size // 2
219 tablestep = n // size
220 print ("size %d halfsize %d tablestep %d" % \
221 (size, halfsize, tablestep))
222 for i in range(0, n, size):
223 prefix = "i %d\t" % i
224 for j in range(i, i + halfsize):
225 (jl, je), (jh, he) = schedule[idx]
226 print (" %-3d\t%s j=%-2d jh=%-2d "
227 "j[jl=%-2d] j[jh=%-2d]" % \
228 (idx, prefix, j, j+halfsize,
229 jl, jh,
230 ),
231 "end", bin(je)[2:], bin(je)[2:])
232 idx += 1
233 size *= 2
234
235 def pprint_schedule_outer(schedule, n):
236 size = 2
237 idx = 0
238 while size <= n//2:
239 halfsize = size // 2
240 tablestep = n // size
241 print ("size %d halfsize %d tablestep %d" % \
242 (size, halfsize, tablestep))
243 y_r = list(range(0, halfsize))
244 for i in y_r:
245 prefix = "i %d\t" % i
246 jr = list(range(i+halfsize, i+n-halfsize, size))
247 for j in jr:
248 (jl, je), (jh, he) = schedule[idx]
249 print (" %-3d\t%s j=%-2d jh=%-2d "
250 "j[jl=%-2d] j[jh=%-2d]" % \
251 (idx, prefix, j, j+halfsize,
252 jl, jh,
253 ),
254 "end", bin(je)[2:], bin(je)[2:])
255 idx += 1
256 size *= 2
257
258
259 # totally cool *in-place* DCT algorithm using yield REMAPs
260 def transform2(vec):
261
262 # Initialization
263 n = len(vec)
264 print ()
265 print ("transform2", n)
266 levels = n.bit_length() - 1
267
268 # reference (read/write) the in-place data in *reverse-bit-order*
269 ri = list(range(n))
270 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
271
272 # and pretend we LDed data in half-swapped *and* bit-reversed order as well
273 # TODO: merge these two
274 vec = halfrev2(vec, False)
275 vec = [vec[ri[i]] for i in range(n)]
276
277 # create a cos table: not strictly necessary but here for illustrative
278 # purposes, to demonstrate the point that it really *is* iterative.
279 # this table could be cached and used multiple times rather than
280 # computed every time.
281 ctable = []
282 size = n
283 while size >= 2:
284 halfsize = size // 2
285 for i in range(n//size):
286 for ci in range(halfsize):
287 ctable.append((math.cos((ci + 0.5) * math.pi / size) * 2.0))
288 size //= 2
289
290 ################
291 # INNER butterfly
292 ################
293 xdim = n
294 ydim = 0
295 zdim = 0
296
297 # set up an SVSHAPE
298 class SVSHAPE:
299 pass
300 # j schedule
301 SVSHAPE0 = SVSHAPE()
302 SVSHAPE0.lims = [xdim, ydim, zdim]
303 SVSHAPE0.order = [0,1,2] # experiment with different permutations, here
304 SVSHAPE0.mode = 0b01
305 SVSHAPE0.submode2 = 0b01
306 SVSHAPE0.skip = 0b00
307 SVSHAPE0.offset = 0 # experiment with different offset, here
308 SVSHAPE0.invxyz = [1,0,0] # inversion if desired
309 # j+halfstep schedule
310 SVSHAPE1 = SVSHAPE()
311 SVSHAPE1.lims = [xdim, ydim, zdim]
312 SVSHAPE1.order = [0,1,2] # experiment with different permutations, here
313 SVSHAPE1.mode = 0b01
314 SVSHAPE1.submode2 = 0b01
315 SVSHAPE1.skip = 0b01
316 SVSHAPE1.offset = 0 # experiment with different offset, here
317 SVSHAPE1.invxyz = [1,0,0] # inversion if desired
318
319 # enumerate over the iterator function, getting new indices
320 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
321 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
322 for k, ((jl, jle), (jh, jhe)) in enumerate(zip(i0, i1)):
323 t1, t2 = vec[jl], vec[jh]
324 coeff = ctable[k]
325 vec[jl] = t1 + t2
326 vec[jh] = (t1 - t2) * (1/coeff)
327 print ("coeff", size, i, "ci", ci,
328 "jl", jl, "jh", jh,
329 "i/n", (ci+0.5)/size, coeff, vec[jl],
330 vec[jh],
331 "end", bin(jle), bin(jhe))
332 if jle == 0b111: # all loops end
333 break
334
335 print("transform2 pre-itersum", vec)
336
337 # now things are in the right order for the outer butterfly.
338
339 # j schedule
340 SVSHAPE0 = SVSHAPE()
341 SVSHAPE0.lims = [xdim, ydim, zdim]
342 SVSHAPE0.submode2 = 0b100
343 SVSHAPE0.mode = 0b01
344 SVSHAPE0.skip = 0b00
345 SVSHAPE0.offset = 0 # experiment with different offset, here
346 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
347 # j+halfstep schedule
348 SVSHAPE1 = SVSHAPE()
349 SVSHAPE1.lims = [xdim, ydim, zdim]
350 SVSHAPE1.mode = 0b01
351 SVSHAPE1.submode2 = 0b100
352 SVSHAPE1.skip = 0b01
353 SVSHAPE1.offset = 0 # experiment with different offset, here
354 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
355
356 # enumerate over the iterator function, getting new indices
357 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
358 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
359 for k, ((jl, jle), (jh, jhe)) in enumerate(zip(i0, i1)):
360 print ("itersum jr", jl, jh,
361 "end", bin(jle), bin(jhe))
362 vec[jl] += vec[jh]
363 size //= 2
364 if jle == 0b111: # all loops end
365 break
366
367 print("transform2 result", vec)
368
369 return vec
370
371
372 def demo():
373 # set the dimension sizes here
374 n = 8
375 xdim = n
376 ydim = 0 # not needed
377 zdim = 0 # again, not needed
378
379
380 ################
381 # INNER butterfly
382 ################
383
384 # set up an SVSHAPE
385 class SVSHAPE:
386 pass
387 # j schedule
388 SVSHAPE0 = SVSHAPE()
389 SVSHAPE0.lims = [xdim, ydim, zdim]
390 SVSHAPE0.submode2 = 0b010
391 SVSHAPE0.mode = 0b01
392 SVSHAPE0.skip = 0b00
393 SVSHAPE0.offset = 0 # experiment with different offset, here
394 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
395 # j+halfstep schedule
396 SVSHAPE1 = SVSHAPE()
397 SVSHAPE1.lims = [xdim, ydim, zdim]
398 SVSHAPE1.submode2 = 0b010
399 SVSHAPE1.mode = 0b01
400 SVSHAPE1.skip = 0b01
401 SVSHAPE1.offset = 0 # experiment with different offset, here
402 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
403
404 # enumerate over the iterator function, getting new indices
405 schedule = []
406 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
407 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
408 for idx, (jl, jh) in enumerate(zip(i0, i1)):
409 schedule.append((jl, jh))
410 if jl[1] == 0b111: # end
411 break
412
413 # ok now pretty-print the results, with some debug output
414 print ("inner butterfly")
415 pprint_schedule(schedule, n)
416 print ("")
417
418 ################
419 # outer butterfly
420 ################
421
422 # j schedule
423 SVSHAPE0 = SVSHAPE()
424 SVSHAPE0.lims = [xdim, ydim, zdim]
425 SVSHAPE0.mode = 0b10
426 SVSHAPE0.submode2 = 0b100
427 SVSHAPE0.skip = 0b10
428 SVSHAPE0.offset = 0 # experiment with different offset, here
429 SVSHAPE0.invxyz = [1,0,0] # inversion if desired
430 # j+halfstep schedule
431 SVSHAPE1 = SVSHAPE()
432 SVSHAPE1.lims = [xdim, ydim, zdim]
433 SVSHAPE1.mode = 0b10
434 SVSHAPE1.submode2 = 0b100
435 SVSHAPE1.skip = 0b11
436 SVSHAPE1.offset = 0 # experiment with different offset, here
437 SVSHAPE1.invxyz = [1,0,0] # inversion if desired
438
439 # enumerate over the iterator function, getting new indices
440 schedule = []
441 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
442 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
443 for idx, (jl, jh) in enumerate(zip(i0, i1)):
444 schedule.append((jl, jh))
445 if jl[1] == 0b111: # end
446 break
447
448 # ok now pretty-print the results, with some debug output
449 print ("outer butterfly")
450 pprint_schedule_outer(schedule, n)
451
452 # run the demo
453 if __name__ == '__main__':
454 demo()