create schedule for calculating COS coefficient in DCT
[openpower-isa.git] / src / openpower / decoder / isa / remap_dct_yield.py
1 # DCT "REMAP" scheduler
2 #
3 # Modifications made to create an in-place iterative DCT:
4 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
5 #
6 # SPDX: LGPLv3+
7 #
8 # Original fastdctlee.py by Nayuki:
9 # Copyright (c) 2020 Project Nayuki. (MIT License)
10 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
11
12 import math
13
14 # bits of the integer 'val'.
15 def reverse_bits(val, width):
16 result = 0
17 for _ in range(width):
18 result = (result << 1) | (val & 1)
19 val >>= 1
20 return result
21
22
23 # iterative version of [recursively-applied] half-rev.
24 # relies on the list lengths being power-of-two and the fact
25 # that bit-inversion of a list of binary numbers is the same
26 # as reversing the order of the list
27 # this version is dead easy to implement in hardware.
28 # a big surprise is that the half-reversal can be done with
29 # such a simple XOR. the inverse operation is slightly trickier
30 def halfrev2(vec, pre_rev=True):
31 res = []
32 for i in range(len(vec)):
33 if pre_rev:
34 res.append(i ^ (i>>1))
35 else:
36 ri = i
37 bl = i.bit_length()
38 for ji in range(1, bl):
39 ri ^= (i >> ji)
40 res.append(vec[ri])
41 return res
42
43
44 # python "yield" can be iterated. use this to make it clear how
45 # the indices are generated by using natural-looking nested loops
46 def iterate_dct_inner_butterfly_indices(SVSHAPE):
47 # get indices to iterate over, in the required order
48 n = SVSHAPE.lims[0]
49 # createing lists of indices to iterate over in each dimension
50 # has to be done dynamically, because it depends on the size
51 # first, the size-based loop (which can be done statically)
52 x_r = []
53 size = 2
54 while size <= n:
55 x_r.append(size)
56 size *= 2
57 # invert order if requested
58 if SVSHAPE.invxyz[0]:
59 x_r.reverse()
60
61 if len(x_r) == 0:
62 return
63
64 # reference (read/write) the in-place data in *reverse-bit-order*
65 ri = list(range(n))
66 if SVSHAPE.submode2 == 0b01:
67 levels = n.bit_length() - 1
68 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
69
70 # reference list for not needing to do data-swaps, just swap what
71 # *indices* are referenced (two levels of indirection at the moment)
72 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
73 ji = list(range(n))
74 inplace_mode = SVSHAPE.submode2 == 0b01
75 # and SVSHAPE.skip not in [0b10, 0b11]
76 if inplace_mode:
77 #print ("inplace mode")
78 ji = halfrev2(ji, True)
79
80 #print ("ri", ri)
81 #print ("ji", ji)
82
83 # start an infinite (wrapping) loop
84 skip = 0
85 while True:
86 for size in x_r: # loop over 3rd order dimension (size)
87 x_end = size == x_r[-1]
88 # y_r schedule depends on size
89 halfsize = size // 2
90 y_r = []
91 for i in range(0, n, size):
92 y_r.append(i)
93 # invert if requested
94 if SVSHAPE.invxyz[1]: y_r.reverse()
95 for i in y_r: # loop over 2nd order dimension
96 y_end = i == y_r[-1]
97 # two lists of half-range indices, e.g. j 0123, jr 7654
98 j = list(range(i, i + halfsize))
99 jr = list(range(i+halfsize, i + size))
100 jr.reverse()
101 # invert if requested
102 if SVSHAPE.invxyz[2]: j_r.reverse()
103 hz2 = halfsize // 2 # zero stops reversing 1-item lists
104 # if you *really* want to do the in-place swapping manually,
105 # this allows you to do it. good luck...
106 if SVSHAPE.submode2 == 0b01 and not inplace_mode:
107 #print ("swap mode")
108 jr = j_r[:hz2]
109 #print ("xform jr", jr)
110 # loop over 1st order dimension
111 for ci, (jl, jh) in enumerate(zip(j, jr)):
112 z_end = jl == j[-1]
113 # now depending on MODE return the index. inner butterfly
114 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
115 result = ri[ji[jl]] # lower half
116 elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
117 result = ri[ji[jh]] # upper half, reverse order
118 elif SVSHAPE.skip == 0b10: #
119 result = ci # coefficient helper
120 elif SVSHAPE.skip == 0b11: #
121 result = size # coefficient helper
122 loopends = (z_end |
123 ((y_end and z_end)<<1) |
124 ((y_end and x_end and z_end)<<2))
125
126 yield result + SVSHAPE.offset, loopends
127
128 # now in-place swap
129 if inplace_mode:
130 for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
131 jlh = jl+halfsize
132 #print ("inplace swap", jh, jlh)
133 tmp1, tmp2 = ji[jlh], ji[jh]
134 ji[jlh], ji[jh] = tmp2, tmp1
135
136
137 # python "yield" can be iterated. use this to make it clear how
138 # the indices are generated by using natural-looking nested loops
139 def iterate_dct_outer_butterfly_indices(SVSHAPE):
140 # get indices to iterate over, in the required order
141 n = SVSHAPE.lims[0]
142 # createing lists of indices to iterate over in each dimension
143 # has to be done dynamically, because it depends on the size
144 # first, the size-based loop (which can be done statically)
145 x_r = []
146 size = n // 2
147 while size >= 2:
148 x_r.append(size)
149 size //= 2
150 # invert order if requested
151 if SVSHAPE.invxyz[0]:
152 x_r.reverse()
153
154 if len(x_r) == 0:
155 return
156
157 #print ("outer butterfly")
158
159 # reference (read/write) the in-place data in *reverse-bit-order*
160 ri = list(range(n))
161 if SVSHAPE.submode2 == 0b11:
162 levels = n.bit_length() - 1
163 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
164
165 # reference list for not needing to do data-swaps, just swap what
166 # *indices* are referenced (two levels of indirection at the moment)
167 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
168 ji = list(range(n))
169 inplace_mode = False # need the space... SVSHAPE.skip in [0b10, 0b11]
170 if inplace_mode:
171 #print ("inplace mode", SVSHAPE.skip)
172 ji = halfrev2(ji, True)
173
174 #print ("ri", ri)
175 #print ("ji", ji)
176
177 # start an infinite (wrapping) loop
178 skip = 0
179 while True:
180 for size in x_r: # loop over 3rd order dimension (size)
181 halfsize = size//2
182 x_end = size == x_r[-1]
183 y_r = list(range(0, halfsize))
184 #print ("itersum", halfsize, size, y_r)
185 # invert if requested
186 if SVSHAPE.invxyz[1]: y_r.reverse()
187 for i in y_r: # loop over 2nd order dimension
188 y_end = i == y_r[-1]
189 # one list to create iterative-sum schedule
190 jr = list(range(i+halfsize, i+n-halfsize, size))
191 #print ("itersum jr", i+halfsize, i+size, jr)
192 # invert if requested
193 if SVSHAPE.invxyz[2]: j_r.reverse()
194 hz2 = halfsize // 2 # zero stops reversing 1-item lists
195 for ci, jh in enumerate(jr): # loop over 1st order dimension
196 z_end = jh == jr[-1]
197 #print (" itersum", size, i, jh, jh+size)
198 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
199 result = ri[ji[jh]] # lower half
200 elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
201 result = ri[ji[jh+size]] # upper half
202 elif SVSHAPE.skip == 0b10: #
203 result = ci # coefficient helper
204 elif SVSHAPE.skip == 0b11: #
205 result = size # coefficient helper
206 loopends = (z_end |
207 ((y_end and z_end)<<1) |
208 ((y_end and x_end and z_end)<<2))
209
210 yield result + SVSHAPE.offset, loopends
211
212 # now in-place swap
213 if SVSHAPE.submode2 == 0b11 and inplace_mode:
214 j = list(range(i, i + halfsize))
215 jr = list(range(i+halfsize, i + size))
216 jr.reverse()
217 for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
218 jlh = jl+halfsize
219 #print ("inplace swap", jh, jlh)
220 tmp1, tmp2 = ji[jlh], ji[jh]
221 ji[jlh], ji[jh] = tmp2, tmp1
222
223
224 def pprint_schedule(schedule, n):
225 size = 2
226 idx = 0
227 while size <= n:
228 halfsize = size // 2
229 tablestep = n // size
230 print ("size %d halfsize %d tablestep %d" % \
231 (size, halfsize, tablestep))
232 for i in range(0, n, size):
233 prefix = "i %d\t" % i
234 for j in range(i, i + halfsize):
235 (jl, je), (jh, he) = schedule[idx]
236 print (" %-3d\t%s j=%-2d jh=%-2d "
237 "j[jl=%-2d] j[jh=%-2d]" % \
238 (idx, prefix, j, j+halfsize,
239 jl, jh,
240 ),
241 "end", bin(je)[2:], bin(je)[2:])
242 idx += 1
243 size *= 2
244
245 def pprint_schedule_outer(schedule, n):
246 size = 2
247 idx = 0
248 while size <= n//2:
249 halfsize = size // 2
250 tablestep = n // size
251 print ("size %d halfsize %d tablestep %d" % \
252 (size, halfsize, tablestep))
253 y_r = list(range(0, halfsize))
254 for i in y_r:
255 prefix = "i %d\t" % i
256 jr = list(range(i+halfsize, i+n-halfsize, size))
257 for j in jr:
258 (jl, je), (jh, he) = schedule[idx]
259 print (" %-3d\t%s j=%-2d jh=%-2d "
260 "j[jl=%-2d] j[jh=%-2d]" % \
261 (idx, prefix, j, j+halfsize,
262 jl, jh,
263 ),
264 "end", bin(je)[2:], bin(je)[2:])
265 idx += 1
266 size *= 2
267
268
269 # totally cool *in-place* DCT algorithm using yield REMAPs
270 def transform2(vec):
271
272 # Initialization
273 n = len(vec)
274 print ()
275 print ("transform2", n)
276 levels = n.bit_length() - 1
277
278 # reference (read/write) the in-place data in *reverse-bit-order*
279 ri = list(range(n))
280 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
281
282 # and pretend we LDed data in half-swapped *and* bit-reversed order as well
283 # TODO: merge these two
284 vec = halfrev2(vec, False)
285 vec = [vec[ri[i]] for i in range(n)]
286
287 # create a cos table: not strictly necessary but here for illustrative
288 # purposes, to demonstrate the point that it really *is* iterative.
289 # this table could be cached and used multiple times rather than
290 # computed every time.
291 ctable = []
292 size = n
293 while size >= 2:
294 halfsize = size // 2
295 for i in range(n//size):
296 for ci in range(halfsize):
297 ctable.append((math.cos((ci + 0.5) * math.pi / size) * 2.0))
298 size //= 2
299
300 ################
301 # INNER butterfly
302 ################
303 xdim = n
304 ydim = 0
305 zdim = 0
306
307 # set up an SVSHAPE
308 class SVSHAPE:
309 pass
310 # j schedule
311 SVSHAPE0 = SVSHAPE()
312 SVSHAPE0.lims = [xdim, ydim, zdim]
313 SVSHAPE0.mode = 0b01
314 SVSHAPE0.submode2 = 0b01
315 SVSHAPE0.skip = 0b00
316 SVSHAPE0.offset = 0 # experiment with different offset, here
317 SVSHAPE0.invxyz = [1,0,0] # inversion if desired
318 # j+halfstep schedule
319 SVSHAPE1 = SVSHAPE()
320 SVSHAPE1.lims = [xdim, ydim, zdim]
321 SVSHAPE1.mode = 0b01
322 SVSHAPE1.submode2 = 0b01
323 SVSHAPE1.skip = 0b01
324 SVSHAPE1.offset = 0 # experiment with different offset, here
325 SVSHAPE1.invxyz = [1,0,0] # inversion if desired
326 # ci schedule
327 SVSHAPE2 = SVSHAPE()
328 SVSHAPE2.lims = [xdim, ydim, zdim]
329 SVSHAPE2.mode = 0b01
330 SVSHAPE2.submode2 = 0b01
331 SVSHAPE2.skip = 0b10
332 SVSHAPE2.offset = 0 # experiment with different offset, here
333 SVSHAPE2.invxyz = [1,0,0] # inversion if desired
334 # size schedule
335 SVSHAPE3 = SVSHAPE()
336 SVSHAPE3.lims = [xdim, ydim, zdim]
337 SVSHAPE3.mode = 0b01
338 SVSHAPE3.submode2 = 0b01
339 SVSHAPE3.skip = 0b11
340 SVSHAPE3.offset = 0 # experiment with different offset, here
341 SVSHAPE3.invxyz = [1,0,0] # inversion if desired
342
343 # enumerate over the iterator function, getting new indices
344 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
345 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
346 i2 = iterate_dct_inner_butterfly_indices(SVSHAPE2)
347 i3 = iterate_dct_inner_butterfly_indices(SVSHAPE3)
348 for k, ((jl, jle), (jh, jhe), (ci, cie), (size, sze)) in \
349 enumerate(zip(i0, i1, i2, i3)):
350 t1, t2 = vec[jl], vec[jh]
351 print ("xform2", jl, jh, ci, size)
352 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
353 assert coeff == ctable[k]
354 vec[jl] = t1 + t2
355 vec[jh] = (t1 - t2) * (1/coeff)
356 print ("coeff", size, i, "ci", ci,
357 "jl", jl, "jh", jh,
358 "i/n", (ci+0.5)/size, coeff, vec[jl],
359 vec[jh],
360 "end", bin(jle), bin(jhe))
361 if jle == 0b111: # all loops end
362 break
363
364 print("transform2 pre-itersum", vec)
365
366 # now things are in the right order for the outer butterfly.
367
368 # j schedule
369 SVSHAPE0 = SVSHAPE()
370 SVSHAPE0.lims = [xdim, ydim, zdim]
371 SVSHAPE0.submode2 = 0b100
372 SVSHAPE0.mode = 0b01
373 SVSHAPE0.skip = 0b00
374 SVSHAPE0.offset = 0 # experiment with different offset, here
375 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
376 # j+halfstep schedule
377 SVSHAPE1 = SVSHAPE()
378 SVSHAPE1.lims = [xdim, ydim, zdim]
379 SVSHAPE1.mode = 0b01
380 SVSHAPE1.submode2 = 0b100
381 SVSHAPE1.skip = 0b01
382 SVSHAPE1.offset = 0 # experiment with different offset, here
383 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
384
385 # enumerate over the iterator function, getting new indices
386 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
387 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
388 for k, ((jl, jle), (jh, jhe)) in enumerate(zip(i0, i1)):
389 print ("itersum jr", jl, jh,
390 "end", bin(jle), bin(jhe))
391 vec[jl] += vec[jh]
392 size //= 2
393 if jle == 0b111: # all loops end
394 break
395
396 print("transform2 result", vec)
397
398 return vec
399
400
401 def demo():
402 # set the dimension sizes here
403 n = 8
404 xdim = n
405 ydim = 0 # not needed
406 zdim = 0 # again, not needed
407
408
409 ################
410 # INNER butterfly
411 ################
412
413 # set up an SVSHAPE
414 class SVSHAPE:
415 pass
416 # j schedule
417 SVSHAPE0 = SVSHAPE()
418 SVSHAPE0.lims = [xdim, ydim, zdim]
419 SVSHAPE0.submode2 = 0b010
420 SVSHAPE0.mode = 0b01
421 SVSHAPE0.skip = 0b00
422 SVSHAPE0.offset = 0 # experiment with different offset, here
423 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
424 # j+halfstep schedule
425 SVSHAPE1 = SVSHAPE()
426 SVSHAPE1.lims = [xdim, ydim, zdim]
427 SVSHAPE1.submode2 = 0b010
428 SVSHAPE1.mode = 0b01
429 SVSHAPE1.skip = 0b01
430 SVSHAPE1.offset = 0 # experiment with different offset, here
431 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
432
433 # enumerate over the iterator function, getting new indices
434 schedule = []
435 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
436 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
437 for idx, (jl, jh) in enumerate(zip(i0, i1)):
438 schedule.append((jl, jh))
439 if jl[1] == 0b111: # end
440 break
441
442 # ok now pretty-print the results, with some debug output
443 print ("inner butterfly")
444 pprint_schedule(schedule, n)
445 print ("")
446
447 ################
448 # outer butterfly
449 ################
450
451 # j schedule
452 SVSHAPE0 = SVSHAPE()
453 SVSHAPE0.lims = [xdim, ydim, zdim]
454 SVSHAPE0.mode = 0b10
455 SVSHAPE0.submode2 = 0b100
456 SVSHAPE0.skip = 0b10
457 SVSHAPE0.offset = 0 # experiment with different offset, here
458 SVSHAPE0.invxyz = [1,0,0] # inversion if desired
459 # j+halfstep schedule
460 SVSHAPE1 = SVSHAPE()
461 SVSHAPE1.lims = [xdim, ydim, zdim]
462 SVSHAPE1.mode = 0b10
463 SVSHAPE1.submode2 = 0b100
464 SVSHAPE1.skip = 0b11
465 SVSHAPE1.offset = 0 # experiment with different offset, here
466 SVSHAPE1.invxyz = [1,0,0] # inversion if desired
467
468 # enumerate over the iterator function, getting new indices
469 schedule = []
470 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
471 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
472 for idx, (jl, jh) in enumerate(zip(i0, i1)):
473 schedule.append((jl, jh))
474 if jl[1] == 0b111: # end
475 break
476
477 # ok now pretty-print the results, with some debug output
478 print ("outer butterfly")
479 pprint_schedule_outer(schedule, n)
480
481 # run the demo
482 if __name__ == '__main__':
483 demo()