pysvp64db: fix traversal
[openpower-isa.git] / src / openpower / decoder / isa / remap_dct_yield.py
1 # DCT "REMAP" scheduler
2 #
3 # Modifications made to create an in-place iterative DCT:
4 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
5 #
6 # SPDX: LGPLv3+
7 #
8 # Original fastdctlee.py by Nayuki:
9 # Copyright (c) 2020 Project Nayuki. (MIT License)
10 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
11
12 from copy import deepcopy
13 import math
14
15 # bits of the integer 'val'.
16 def reverse_bits(val, width):
17 result = 0
18 for _ in range(width):
19 result = (result << 1) | (val & 1)
20 val >>= 1
21 return result
22
23
24 # iterative version of [recursively-applied] half-rev.
25 # relies on the list lengths being power-of-two and the fact
26 # that bit-inversion of a list of binary numbers is the same
27 # as reversing the order of the list
28 # this version is dead easy to implement in hardware.
29 # a big surprise is that the half-reversal can be done with
30 # such a simple XOR. the inverse operation is slightly trickier
31 def halfrev2(vec, pre_rev=True):
32 res = []
33 for i in range(len(vec)):
34 if pre_rev:
35 res.append(vec[i ^ (i>>1)])
36 else:
37 ri = i
38 bl = i.bit_length()
39 for ji in range(1, bl):
40 ri ^= (i >> ji)
41 res.append(vec[ri])
42 return res
43
44
45 # python "yield" can be iterated. use this to make it clear how
46 # the indices are generated by using natural-looking nested loops
47 def iterate_dct_inner_halfswap_loadstore(SVSHAPE):
48 # get indices to iterate over, in the required order
49 n = SVSHAPE.lims[0]
50 mode = SVSHAPE.lims[1]
51 stride = SVSHAPE.lims[2]
52 print ("inner halfswap loadstore", n, mode, SVSHAPE.skip,
53 "submode", SVSHAPE.submode2, "stride", stride)
54
55 # reference list for not needing to do data-swaps, just swap what
56 # *indices* are referenced (two levels of indirection at the moment)
57 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
58 ji = list(range(n))
59
60 levels = n.bit_length() - 1
61 ri = [reverse_bits(i, levels) for i in range(n)]
62
63 if SVSHAPE.mode == 0b01: # FFT, bitrev only
64 ji = [ji[ri[i]] for i in range(n)]
65 elif SVSHAPE.submode2 == 0b001:
66 ji = [ji[ri[i]] for i in range(n)]
67 ji = halfrev2(ji, True)
68 else:
69 ji = halfrev2(ji, False)
70 ji = [ji[ri[i]] for i in range(n)]
71
72 # invert order if requested
73 if SVSHAPE.invxyz[0]:
74 ji.reverse()
75
76 for i, jl in enumerate(ji):
77 y_end = jl == ji[-1]
78 yield jl * stride, (0b111 if y_end else 0b000)
79
80
81 # python "yield" can be iterated. use this to make it clear how
82 # the indices are generated by using natural-looking nested loops
83 def iterate_dct_inner_costable_indices(SVSHAPE):
84 # get indices to iterate over, in the required order
85 n = SVSHAPE.lims[0]
86 mode = SVSHAPE.lims[1]
87 stride = SVSHAPE.lims[2]
88 print ("inner costable", mode, "stride", stride)
89 # creating lists of indices to iterate over in each dimension
90 # has to be done dynamically, because it depends on the size
91 # first, the size-based loop (which can be done statically)
92 x_r = []
93 size = 2
94 while size <= n:
95 x_r.append(size)
96 size *= 2
97 # invert order if requested
98 if SVSHAPE.invxyz[0]:
99 x_r.reverse()
100
101 if len(x_r) == 0:
102 return
103
104 #print ("ri", ri)
105 #print ("ji", ji)
106
107 # start an infinite (wrapping) loop
108 skip = 0
109 z_end = 1 # doesn't exist in this, only 2 loops
110 k = 0
111 while True:
112 for size in x_r: # loop over 3rd order dimension (size)
113 x_end = size == x_r[-1]
114 # y_r schedule depends on size
115 halfsize = size // 2
116 y_r = []
117 for i in range(0, n, size):
118 y_r.append(i)
119 # invert if requested
120 if SVSHAPE.invxyz[1]: y_r.reverse()
121 # two lists of half-range indices, e.g. j 0123, jr 7654
122 j = list(range(0, halfsize))
123 # invert if requested
124 if SVSHAPE.invxyz[2]: j_r.reverse()
125 #print ("xform jr", jr)
126 # loop over 1st order dimension
127 for ci, jl in enumerate(j):
128 y_end = jl == j[-1]
129 # now depending on MODE return the index. inner butterfly
130 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
131 result = k # offset into COS table
132 elif SVSHAPE.skip == 0b10: #
133 result = ci # coefficient helper
134 elif SVSHAPE.skip == 0b11: #
135 result = size # coefficient helper
136 loopends = (z_end |
137 ((y_end and z_end)<<1) |
138 ((y_end and x_end and z_end)<<2))
139
140 yield (result * stride) + SVSHAPE.offset, loopends
141 k += 1
142
143 # python "yield" can be iterated. use this to make it clear how
144 # the indices are generated by using natural-looking nested loops
145 def iterate_dct_inner_butterfly_indices(SVSHAPE):
146 # get indices to iterate over, in the required order
147 n = SVSHAPE.lims[0]
148 mode = SVSHAPE.lims[1]
149 stride = SVSHAPE.lims[2]
150 print ("inner butterfly", mode, SVSHAPE.skip,
151 "submode", SVSHAPE.submode2, "stride", stride)
152 # creating lists of indices to iterate over in each dimension
153 # has to be done dynamically, because it depends on the size
154 # first, the size-based loop (which can be done statically)
155 x_r = []
156 size = 2
157 while size <= n:
158 x_r.append(size)
159 size *= 2
160 # invert order if requested
161 if SVSHAPE.invxyz[0]:
162 x_r.reverse()
163
164 if len(x_r) == 0:
165 return
166
167 # reference (read/write) the in-place data in *reverse-bit-order*
168 ri = list(range(n))
169 if SVSHAPE.submode2 == 0b01:
170 levels = n.bit_length() - 1
171 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
172
173 # reference list for not needing to do data-swaps, just swap what
174 # *indices* are referenced (two levels of indirection at the moment)
175 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
176 ji = list(range(n))
177 inplace_mode = True
178 if inplace_mode and SVSHAPE.submode2 == 0b01:
179 #print ("inplace mode")
180 ji = halfrev2(ji, True)
181 if inplace_mode and SVSHAPE.submode2 == 0b11:
182 ji = halfrev2(ji, False)
183
184 print ("ri", ri)
185 print ("ji", ji)
186
187 # start an infinite (wrapping) loop
188 while True:
189 k = 0
190 k_start = 0
191 for size in x_r: # loop over 3rd order dimension (size)
192 x_end = size == x_r[-1]
193 # y_r schedule depends on size
194 halfsize = size // 2
195 y_r = []
196 for i in range(0, n, size):
197 y_r.append(i)
198 # invert if requested
199 if SVSHAPE.invxyz[1]: y_r.reverse()
200 for i in y_r: # loop over 2nd order dimension
201 y_end = i == y_r[-1]
202 # two lists of half-range indices, e.g. j 0123, jr 7654
203 j = list(range(i, i + halfsize))
204 jr = list(range(i+halfsize, i + size))
205 jr.reverse()
206 # invert if requested
207 if SVSHAPE.invxyz[2]:
208 j.reverse()
209 jr.reverse()
210 hz2 = halfsize // 2 # zero stops reversing 1-item lists
211 #print ("xform jr", jr)
212 # loop over 1st order dimension
213 k = k_start
214 for ci, (jl, jh) in enumerate(zip(j, jr)):
215 z_end = jl == j[-1]
216 # now depending on MODE return the index. inner butterfly
217 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
218 if SVSHAPE.submode2 == 0b11: # iDCT
219 result = ji[ri[jl]] # lower half
220 else:
221 result = ri[ji[jl]] # lower half
222 elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
223 if SVSHAPE.submode2 == 0b11: # iDCT
224 result = ji[ri[jl+halfsize]] # upper half
225 else:
226 result = ri[ji[jh]] # upper half
227 elif mode == 4:
228 # COS table pre-generated mode
229 if SVSHAPE.skip == 0b10: #
230 result = k # cos table offset
231 else: # mode 2
232 # COS table generated on-demand ("Vertical-First") mode
233 if SVSHAPE.skip == 0b10: #
234 result = ci # coefficient helper
235 elif SVSHAPE.skip == 0b11: #
236 result = size # coefficient helper
237 loopends = (z_end |
238 ((y_end and z_end)<<1) |
239 ((y_end and x_end and z_end)<<2))
240
241 yield (result * stride) + SVSHAPE.offset, loopends
242 k += 1
243
244 # now in-place swap
245 if inplace_mode:
246 for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
247 jlh = jl+halfsize
248 print ("inplace swap", jh, jlh)
249 tmp1, tmp2 = ji[jlh], ji[jh]
250 ji[jlh], ji[jh] = tmp2, tmp1
251
252 # new k_start point for cos tables( runs inside x_r loop NOT i loop)
253 k_start += halfsize
254
255
256 # python "yield" can be iterated. use this to make it clear how
257 # the indices are generated by using natural-looking nested loops
258 def iterate_dct_outer_butterfly_indices(SVSHAPE):
259 # get indices to iterate over, in the required order
260 n = SVSHAPE.lims[0]
261 mode = SVSHAPE.lims[1]
262 stride = SVSHAPE.lims[2]
263 # creating lists of indices to iterate over in each dimension
264 # has to be done dynamically, because it depends on the size
265 # first, the size-based loop (which can be done statically)
266 x_r = []
267 size = n // 2
268 while size >= 2:
269 x_r.append(size)
270 size //= 2
271 # invert order if requested
272 if SVSHAPE.invxyz[0]:
273 x_r.reverse()
274
275 if len(x_r) == 0:
276 return
277
278 print ("outer butterfly", mode, SVSHAPE.skip,
279 "submode", SVSHAPE.submode2,
280 "stride", stride)
281
282 # I-DCT, reference (read/write) the in-place data in *reverse-bit-order*
283 ri = list(range(n))
284 if SVSHAPE.submode2 in [0b11, 0b01]:
285 levels = n.bit_length() - 1
286 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
287
288 # reference list for not needing to do data-swaps, just swap what
289 # *indices* are referenced (two levels of indirection at the moment)
290 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
291 ji = list(range(n))
292 inplace_mode = False # need the space... SVSHAPE.skip in [0b10, 0b11]
293 if SVSHAPE.submode2 == 0b11:
294 ji = halfrev2(ji, False)
295
296 print ("ri", ri)
297 print ("ji", ji)
298
299 # start an infinite (wrapping) loop
300 while True:
301 k = 0
302 k_start = 0
303 for size in x_r: # loop over 3rd order dimension (size)
304 halfsize = size//2
305 x_end = size == x_r[-1]
306 y_r = list(range(0, halfsize))
307 print ("itersum", halfsize, size, y_r, "invert", SVSHAPE.invxyz[1])
308 # invert if requested
309 if SVSHAPE.invxyz[1]: y_r.reverse()
310 for i in y_r: # loop over 2nd order dimension
311 y_end = i == y_r[-1]
312 # one list to create iterative-sum schedule
313 jr = list(range(i+halfsize, i+n-halfsize, size))
314 # invert if requested
315 if SVSHAPE.invxyz[2]: jr.reverse()
316 print ("itersum jr", i+halfsize, i+size, jr,
317 "invert", SVSHAPE.invxyz[2])
318 hz2 = halfsize // 2 # zero stops reversing 1-item lists
319 k = k_start
320 for ci, jh in enumerate(jr): # loop over 1st order dimension
321 z_end = jh == jr[-1]
322 #print (" itersum", size, i, jh, jh+size)
323 if mode == 4:
324 # COS table pre-generated mode
325 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
326 if SVSHAPE.submode2 == 0b11: # iDCT
327 result = ji[ri[jh]] # upper half
328 else:
329 result = ri[ji[jh]] # lower half
330 elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
331 if SVSHAPE.submode2 == 0b11: # iDCT
332 result = ji[ri[jh+size]] # upper half
333 else:
334 result = ri[ji[jh+size]] # upper half
335 elif SVSHAPE.skip == 0b10: #
336 result = k # cos table offset
337 else:
338 # COS table generated on-demand ("Vertical-First") mode
339 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
340 if SVSHAPE.submode2 == 0b11: # iDCT
341 result = ji[ri[jh]] # lower half
342 else:
343 result = ri[ji[jh]] # lower half
344 elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
345 if SVSHAPE.submode2 == 0b11: # iDCT
346 result = ji[ri[jh+size]] # upper half
347 else:
348 result = ri[ji[jh+size]] # upper half
349 elif SVSHAPE.skip == 0b10: #
350 result = ci # coefficient helper
351 elif SVSHAPE.skip == 0b11: #
352 result = size # coefficient helper
353 loopends = (z_end |
354 ((y_end and z_end)<<1) |
355 ((y_end and x_end and z_end)<<2))
356
357 yield (result * stride) + SVSHAPE.offset, loopends
358 k += 1
359
360 # now in-place swap (disabled)
361 if False and SVSHAPE.submode2 == 0b11:
362 j = list(range(i, i + halfsize))
363 jr = list(range(i+halfsize, i + size))
364 jr.reverse()
365 for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
366 jlh = jl+halfsize
367 tmp1, tmp2 = ji[jlh], ji[jh]
368 print ("inplace swap", jh, jlh, "actual", tmp1, tmp2)
369 ji[jlh], ji[jh] = tmp2, tmp1
370
371 # new k_start point for cos tables( runs inside x_r loop NOT i loop)
372 k_start += halfsize
373
374
375 def pprint_schedule(schedule, n):
376 size = 2
377 idx = 0
378 while size <= n:
379 halfsize = size // 2
380 tablestep = n // size
381 print ("size %d halfsize %d tablestep %d" % \
382 (size, halfsize, tablestep))
383 for i in range(0, n, size):
384 prefix = "i %d\t" % i
385 for j in range(i, i + halfsize):
386 (jl, je), (jh, he) = schedule[idx]
387 print (" %-3d\t%s j=%-2d jh=%-2d "
388 "j[jl=%-2d] j[jh=%-2d]" % \
389 (idx, prefix, j, j+halfsize,
390 jl, jh,
391 ),
392 "end", bin(je)[2:], bin(je)[2:])
393 idx += 1
394 size *= 2
395
396 def pprint_schedule_outer(schedule, n):
397 size = 2
398 idx = 0
399 while size <= n//2:
400 halfsize = size // 2
401 tablestep = n // size
402 print ("size %d halfsize %d tablestep %d" % \
403 (size, halfsize, tablestep))
404 y_r = list(range(0, halfsize))
405 for i in y_r:
406 prefix = "i %d\t" % i
407 jr = list(range(i+halfsize, i+n-halfsize, size))
408 for j in jr:
409 (jl, je), (jh, he) = schedule[idx]
410 print (" %-3d\t%s j=%-2d jh=%-2d "
411 "j[jl=%-2d] j[jh=%-2d]" % \
412 (idx, prefix, j, j+halfsize,
413 jl, jh,
414 ),
415 "end", bin(je)[2:], bin(je)[2:])
416 idx += 1
417 size *= 2
418
419
420 # totally cool *in-place* inverse DCT algorithm using yield REMAPs
421 def inverse_transform2(vec):
422
423 vec = deepcopy(vec)
424
425 # Initialization
426 n = len(vec)
427 print ()
428 print ("inverse_transform2", n)
429 levels = n.bit_length() - 1
430
431 # set up dims
432 xdim = n
433
434 # divide element 0 by 2
435 vec[0] /= 2.0
436
437 # reference (read/write) the in-place data in *reverse-bit-order*
438 ri = list(range(n))
439 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
440
441 # pretend we LDed data in half-swapped *and* bit-reversed order as well
442 # TODO: merge these two
443 vec = [vec[ri[i]] for i in range(n)]
444 vec = halfrev2(vec, True)
445
446 print("inverse_transform2 post-alter", vec)
447
448 # create a cos table: not strictly necessary but here for illustrative
449 # purposes, to demonstrate the point that it really *is* iterative.
450 # this table could be cached and used multiple times rather than
451 # computed every time.
452 ctable = []
453 size = 2
454 while size <= n:
455 halfsize = size // 2
456 for ci in range(halfsize):
457 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
458 ctable.append(coeff)
459 print ("coeff", size, "ci", ci, "k", len(ctable)-1,
460 "i/n", (ci+0.5)/size, coeff)
461 size *= 2
462
463 # set up an SVSHAPE
464 class SVSHAPE:
465 pass
466
467 # XXX TODO
468 if False:
469 # ci schedule
470 SVSHAPE0 = SVSHAPE()
471 SVSHAPE0.lims = [xdim, 4, 1]
472 SVSHAPE0.mode = 0b01
473 SVSHAPE0.submode2 = 0b01
474 SVSHAPE0.skip = 0b10
475 SVSHAPE0.offset = 0 # experiment with different offset, here
476 SVSHAPE0.invxyz = [1,0,1] # inversion if desired
477 # size schedule
478 SVSHAPE1 = SVSHAPE()
479 SVSHAPE1.lims = [xdim, 4, 1]
480 SVSHAPE1.mode = 0b01
481 SVSHAPE1.submode2 = 0b01
482 SVSHAPE1.skip = 0b11
483 SVSHAPE1.offset = 0 # experiment with different offset, here
484 SVSHAPE1.invxyz = [1,0,1] # inversion if desired
485 # k schedule
486 SVSHAPE2 = SVSHAPE()
487 SVSHAPE2.lims = [xdim, 4, 1]
488 SVSHAPE2.mode = 0b01
489 SVSHAPE2.submode2 = 0b01
490 SVSHAPE2.skip = 0b00
491 SVSHAPE2.offset = 0 # experiment with different offset, here
492 SVSHAPE2.invxyz = [1,0,1] # inversion if desired
493
494 # enumerate over the iterator function, getting new indices
495 i0 = iterate_dct_inner_costable_indices(SVSHAPE0)
496 i1 = iterate_dct_inner_costable_indices(SVSHAPE1)
497 i2 = iterate_dct_inner_costable_indices(SVSHAPE2)
498 for ((ci, cie), (size, sze), (k, ke)) in \
499 zip(i0, i1, i2):
500 print ("xform2 cos", ci, size, k)
501 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
502 assert coeff == ctable[k]
503 print ("coeff", size, "ci", ci, "k", k,
504 "i/n", (ci+0.5)/size, coeff,
505 "end", bin(cie), bin(sze), bin(ke))
506 if cie == 0b111: # all loops end
507 break
508
509 # now things are in the right order for the outer butterfly.
510
511 # j schedule
512 SVSHAPE0 = SVSHAPE()
513 SVSHAPE0.lims = [xdim, 0b0000010, 1]
514 SVSHAPE0.submode2 = 0b11
515 SVSHAPE0.mode = 0b11
516 SVSHAPE0.skip = 0b00
517 SVSHAPE0.offset = 0 # experiment with different offset, here
518 SVSHAPE0.invxyz = [1,0,1] # inversion if desired
519 # j+halfstep schedule
520 SVSHAPE1 = SVSHAPE()
521 SVSHAPE1.lims = [xdim, 0b0000010, 1]
522 SVSHAPE1.mode = 0b11
523 SVSHAPE1.submode2 = 0b11
524 SVSHAPE1.skip = 0b01
525 SVSHAPE1.offset = 0 # experiment with different offset, here
526 SVSHAPE1.invxyz = [1,0,1] # inversion if desired
527
528 # enumerate over the iterator function, getting new indices
529 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
530 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
531 for k, ((jl, jle), (jh, jhe)) in enumerate(zip(i0, i1)):
532 print ("itersum jr", jl, jh,
533 "end", bin(jle), bin(jhe),
534 vec[jl], vec[jh], vec[jh]+vec[jl])
535 vec[jh] += vec[jl]
536 size //= 2
537 if jle == 0b111: # all loops end
538 break
539
540 print("transform2 pre-itersum", vec)
541
542 ################
543 # INNER butterfly
544 ################
545
546 # j schedule
547 SVSHAPE0 = SVSHAPE()
548 SVSHAPE0.lims = [xdim, 0b000001, 1]
549 SVSHAPE0.mode = 0b11
550 SVSHAPE0.submode2 = 0b11
551 SVSHAPE0.skip = 0b00
552 SVSHAPE0.offset = 0 # experiment with different offset, here
553 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
554 # j+halfstep schedule
555 SVSHAPE1 = SVSHAPE()
556 SVSHAPE1.lims = [xdim, 0b000001, 1]
557 SVSHAPE1.mode = 0b11
558 SVSHAPE1.submode2 = 0b11
559 SVSHAPE1.skip = 0b01
560 SVSHAPE1.offset = 0 # experiment with different offset, here
561 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
562 # ci schedule
563 SVSHAPE2 = SVSHAPE()
564 SVSHAPE2.lims = [xdim, 0b000001, 1]
565 SVSHAPE2.mode = 0b11
566 SVSHAPE2.submode2 = 0b11
567 SVSHAPE2.skip = 0b10
568 SVSHAPE2.offset = 0 # experiment with different offset, here
569 SVSHAPE2.invxyz = [0,0,0] # inversion if desired
570 # size schedule
571 SVSHAPE3 = SVSHAPE()
572 SVSHAPE3.lims = [xdim, 0b000001, 1]
573 SVSHAPE3.mode = 0b11
574 SVSHAPE3.submode2 = 0b11
575 SVSHAPE3.skip = 0b11
576 SVSHAPE3.offset = 0 # experiment with different offset, here
577 SVSHAPE3.invxyz = [0,0,0] # inversion if desired
578
579 # enumerate over the iterator function, getting new indices
580 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
581 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
582 i2 = iterate_dct_inner_butterfly_indices(SVSHAPE2)
583 i3 = iterate_dct_inner_butterfly_indices(SVSHAPE3)
584 for k, ((jl, jle), (jh, jhe), (ci, cie), (size, sze)) in \
585 enumerate(zip(i0, i1, i2, i3)):
586 t1, t2 = vec[jl], vec[jh]
587 print ("xform2", jl, jh, ci, size)
588 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
589 #assert coeff == ctable[k]
590 vec[jl] = t1 + t2/coeff
591 vec[jh] = t1 - t2/coeff
592 print ("coeff", size, "ci", ci,
593 "jl", jl, "jh", jh,
594 "i/n", (ci+0.5)/size, coeff, "t1/2", t1, t2,
595 "+/-", vec[jl], vec[jh],
596 "end", bin(jle), bin(jhe))
597 if jle == 0b111: # all loops end
598 break
599
600 print("inverse_transform2 result", vec)
601
602 return vec
603
604
605 # totally cool *in-place* DCT algorithm using yield REMAPs
606 def transform2(vec):
607
608 # Initialization
609 n = len(vec)
610 print ()
611 print ("transform2", n)
612 levels = n.bit_length() - 1
613
614 # set up dims
615 xdim = n
616
617 # reference (read/write) the in-place data in *reverse-bit-order*
618 ri = list(range(n))
619 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
620
621 # and pretend we LDed data in half-swapped *and* bit-reversed order as well
622 # TODO: merge these two
623 vec = halfrev2(vec, False)
624 vec = [vec[ri[i]] for i in range(n)]
625
626 # create a cos table: not strictly necessary but here for illustrative
627 # purposes, to demonstrate the point that it really *is* iterative.
628 # this table could be cached and used multiple times rather than
629 # computed every time.
630 ctable = []
631 size = n
632 while size >= 2:
633 halfsize = size // 2
634 for ci in range(halfsize):
635 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
636 ctable.append(coeff)
637 print ("coeff", size, "ci", ci, "k", len(ctable)-1,
638 "i/n", (ci+0.5)/size, coeff)
639 size //= 2
640
641 # set up an SVSHAPE
642 class SVSHAPE:
643 pass
644 # ci schedule
645 SVSHAPE0 = SVSHAPE()
646 SVSHAPE0.lims = [xdim, 4, 1]
647 SVSHAPE0.mode = 0b01
648 SVSHAPE0.submode2 = 0b01
649 SVSHAPE0.skip = 0b10
650 SVSHAPE0.offset = 0 # experiment with different offset, here
651 SVSHAPE0.invxyz = [1,0,0] # inversion if desired
652 # size schedule
653 SVSHAPE1 = SVSHAPE()
654 SVSHAPE1.lims = [xdim, 4, 1]
655 SVSHAPE1.mode = 0b01
656 SVSHAPE1.submode2 = 0b01
657 SVSHAPE1.skip = 0b11
658 SVSHAPE1.offset = 0 # experiment with different offset, here
659 SVSHAPE1.invxyz = [1,0,0] # inversion if desired
660 # k schedule
661 SVSHAPE2 = SVSHAPE()
662 SVSHAPE2.lims = [xdim, 4, 1]
663 SVSHAPE2.mode = 0b01
664 SVSHAPE2.submode2 = 0b01
665 SVSHAPE2.skip = 0b00
666 SVSHAPE2.offset = 0 # experiment with different offset, here
667 SVSHAPE2.invxyz = [1,0,0] # inversion if desired
668
669 # enumerate over the iterator function, getting new indices
670 i0 = iterate_dct_inner_costable_indices(SVSHAPE0)
671 i1 = iterate_dct_inner_costable_indices(SVSHAPE1)
672 i2 = iterate_dct_inner_costable_indices(SVSHAPE2)
673 for ((ci, cie), (size, sze), (k, ke)) in \
674 zip(i0, i1, i2):
675 print ("xform2 cos", ci, size, k)
676 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
677 assert coeff == ctable[k]
678 print ("coeff", size, "ci", ci, "k", k,
679 "i/n", (ci+0.5)/size, coeff,
680 "end", bin(cie), bin(sze), bin(ke))
681 if cie == 0b111: # all loops end
682 break
683
684 ################
685 # INNER butterfly
686 ################
687
688 # j schedule
689 SVSHAPE0 = SVSHAPE()
690 SVSHAPE0.lims = [xdim, 0b000001, 1]
691 SVSHAPE0.mode = 0b01
692 SVSHAPE0.submode2 = 0b01
693 SVSHAPE0.skip = 0b00
694 SVSHAPE0.offset = 0 # experiment with different offset, here
695 SVSHAPE0.invxyz = [1,0,0] # inversion if desired
696 # j+halfstep schedule
697 SVSHAPE1 = SVSHAPE()
698 SVSHAPE1.lims = [xdim, 0b000001, 1]
699 SVSHAPE1.mode = 0b01
700 SVSHAPE1.submode2 = 0b01
701 SVSHAPE1.skip = 0b01
702 SVSHAPE1.offset = 0 # experiment with different offset, here
703 SVSHAPE1.invxyz = [1,0,0] # inversion if desired
704 # ci schedule
705 SVSHAPE2 = SVSHAPE()
706 SVSHAPE2.lims = [xdim, 0b000001, 1]
707 SVSHAPE2.mode = 0b01
708 SVSHAPE2.submode2 = 0b01
709 SVSHAPE2.skip = 0b10
710 SVSHAPE2.offset = 0 # experiment with different offset, here
711 SVSHAPE2.invxyz = [1,0,0] # inversion if desired
712 # size schedule
713 SVSHAPE3 = SVSHAPE()
714 SVSHAPE3.lims = [xdim, 0b000001, 1]
715 SVSHAPE3.mode = 0b01
716 SVSHAPE3.submode2 = 0b01
717 SVSHAPE3.skip = 0b11
718 SVSHAPE3.offset = 0 # experiment with different offset, here
719 SVSHAPE3.invxyz = [1,0,0] # inversion if desired
720
721 # enumerate over the iterator function, getting new indices
722 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
723 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
724 i2 = iterate_dct_inner_butterfly_indices(SVSHAPE2)
725 i3 = iterate_dct_inner_butterfly_indices(SVSHAPE3)
726 for k, ((jl, jle), (jh, jhe), (ci, cie), (size, sze)) in \
727 enumerate(zip(i0, i1, i2, i3)):
728 t1, t2 = vec[jl], vec[jh]
729 print ("xform2", jl, jh, ci, size)
730 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
731 #assert coeff == ctable[k]
732 vec[jl] = t1 + t2
733 vec[jh] = (t1 - t2) * (1/coeff)
734 print ("coeff", size, "ci", ci,
735 "jl", jl, "jh", jh,
736 "i/n", (ci+0.5)/size, coeff, vec[jl],
737 vec[jh],
738 "end", bin(jle), bin(jhe))
739 if jle == 0b111: # all loops end
740 break
741
742 print("transform2 pre-itersum", vec)
743
744 # now things are in the right order for the outer butterfly.
745
746 # j schedule
747 SVSHAPE0 = SVSHAPE()
748 SVSHAPE0.lims = [xdim, 0b0000010, 1]
749 SVSHAPE0.submode2 = 0b100
750 SVSHAPE0.mode = 0b01
751 SVSHAPE0.skip = 0b00
752 SVSHAPE0.offset = 0 # experiment with different offset, here
753 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
754 # j+halfstep schedule
755 SVSHAPE1 = SVSHAPE()
756 SVSHAPE1.lims = [xdim, 0b0000010, 1]
757 SVSHAPE1.mode = 0b01
758 SVSHAPE1.submode2 = 0b100
759 SVSHAPE1.skip = 0b01
760 SVSHAPE1.offset = 0 # experiment with different offset, here
761 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
762
763 # enumerate over the iterator function, getting new indices
764 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
765 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
766 for k, ((jl, jle), (jh, jhe)) in enumerate(zip(i0, i1)):
767 print ("itersum jr", jl, jh,
768 "end", bin(jle), bin(jhe))
769 vec[jl] += vec[jh]
770 size //= 2
771 if jle == 0b111: # all loops end
772 break
773
774 print("transform2 result", vec)
775
776 return vec
777
778
779 def demo_idct():
780 # set the dimension sizes here
781 n = 8
782 xdim = n
783 ydim = 0 # not needed
784 zdim = 1 # again, not needed
785
786 # set up an SVSHAPE
787 class SVSHAPE:
788 pass
789
790 ################
791 # outer butterfly
792 ################
793
794 # j schedule
795 SVSHAPE0 = SVSHAPE()
796 SVSHAPE0.lims = [xdim, 0b0000010, 1]
797 SVSHAPE0.submode2 = 0b11
798 SVSHAPE0.mode = 0b11
799 SVSHAPE0.skip = 0b00
800 SVSHAPE0.offset = 0 # experiment with different offset, here
801 SVSHAPE0.invxyz = [1,0,1] # inversion if desired
802 # j+halfstep schedule
803 SVSHAPE1 = SVSHAPE()
804 SVSHAPE1.lims = [xdim, 0b0000010, 1]
805 SVSHAPE1.mode = 0b11
806 SVSHAPE1.submode2 = 0b11
807 SVSHAPE1.skip = 0b01
808 SVSHAPE1.offset = 0 # experiment with different offset, here
809 SVSHAPE1.invxyz = [1,0,1] # inversion if desired
810
811 # enumerate over the iterator function, getting new indices
812 schedule = []
813 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
814 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
815 for idx, (jl, jh) in enumerate(zip(i0, i1)):
816 schedule.append((jl, jh))
817 if jl[1] == 0b111: # end
818 break
819
820 # ok now pretty-print the results, with some debug output
821 print ("outer i-dct butterfly")
822 pprint_schedule_outer(schedule, n)
823
824 ################
825 # INNER butterfly
826 ################
827
828 # j schedule
829 SVSHAPE0 = SVSHAPE()
830 SVSHAPE0.lims = [xdim, 0b000001, 1]
831 SVSHAPE0.mode = 0b11
832 SVSHAPE0.submode2 = 0b11
833 SVSHAPE0.skip = 0b00
834 SVSHAPE0.offset = 0 # experiment with different offset, here
835 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
836 # j+halfstep schedule
837 SVSHAPE1 = SVSHAPE()
838 SVSHAPE1.lims = [xdim, 0b000001, 1]
839 SVSHAPE1.mode = 0b11
840 SVSHAPE1.submode2 = 0b11
841 SVSHAPE1.skip = 0b01
842 SVSHAPE1.offset = 0 # experiment with different offset, here
843 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
844
845 # enumerate over the iterator function, getting new indices
846 schedule = []
847 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
848 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
849 for idx, (jl, jh) in enumerate(zip(i0, i1)):
850 schedule.append((jl, jh))
851 if jl[1] == 0b111: # end
852 break
853
854 # ok now pretty-print the results, with some debug output
855 print ("inner butterfly")
856 pprint_schedule(schedule, n)
857 print ("")
858
859 return
860
861 # for DCT half-swap LDs
862 # j schedule
863 SVSHAPE0 = SVSHAPE()
864 SVSHAPE0.lims = [xdim, 0b000101, zdim]
865 SVSHAPE0.mode = 0b01
866 SVSHAPE0.submode2 = 0b01
867 SVSHAPE0.skip = 0
868 SVSHAPE0.offset = 0 # experiment with different offset, here
869 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
870
871 # expected results
872 levels = n.bit_length() - 1
873 avi = list(range(n))
874 ri = [reverse_bits(i, levels) for i in range(n)]
875 av = halfrev2(avi, False)
876 av = [av[ri[i]] for i in range(n)]
877
878 i0 = iterate_dct_inner_halfswap_loadstore(SVSHAPE0)
879 for idx, (jl) in enumerate(i0):
880 print ("inverse half-swap ld", idx, jl, av[idx])
881 if jl[1] == 0b111: # end
882 break
883
884
885 def demo_dct():
886 # set the dimension sizes here
887 n = 8
888 xdim = n
889 ydim = 0 # not needed
890 zdim = 1 # must be set at least to 1
891
892
893 ################
894 # INNER butterfly
895 ################
896
897 # set up an SVSHAPE
898 class SVSHAPE:
899 pass
900 # j schedule
901 SVSHAPE0 = SVSHAPE()
902 SVSHAPE0.lims = [xdim, 0b000001, zdim]
903 SVSHAPE0.submode2 = 0b010
904 SVSHAPE0.mode = 0b01
905 SVSHAPE0.skip = 0b00
906 SVSHAPE0.offset = 0 # experiment with different offset, here
907 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
908 # j+halfstep schedule
909 SVSHAPE1 = SVSHAPE()
910 SVSHAPE1.lims = [xdim, 0b000001, zdim]
911 SVSHAPE1.submode2 = 0b010
912 SVSHAPE1.mode = 0b01
913 SVSHAPE1.skip = 0b01
914 SVSHAPE1.offset = 0 # experiment with different offset, here
915 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
916
917 # enumerate over the iterator function, getting new indices
918 schedule = []
919 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
920 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
921 for idx, (jl, jh) in enumerate(zip(i0, i1)):
922 schedule.append((jl, jh))
923 if jl[1] == 0b111: # end
924 break
925
926 # ok now pretty-print the results, with some debug output
927 print ("inner butterfly")
928 pprint_schedule(schedule, n)
929 print ("")
930
931 ################
932 # outer butterfly
933 ################
934
935 # j schedule
936 SVSHAPE0 = SVSHAPE()
937 SVSHAPE0.lims = [xdim, 0b000010, zdim]
938 SVSHAPE0.mode = 0b01
939 SVSHAPE0.submode2 = 0b100
940 SVSHAPE0.skip = 0b10
941 SVSHAPE0.offset = 0 # experiment with different offset, here
942 SVSHAPE0.invxyz = [1,0,0] # inversion if desired
943 # j+halfstep schedule
944 SVSHAPE1 = SVSHAPE()
945 SVSHAPE1.lims = [xdim, 0b000010, zdim]
946 SVSHAPE1.mode = 0b01
947 SVSHAPE1.submode2 = 0b100
948 SVSHAPE1.skip = 0b11
949 SVSHAPE1.offset = 0 # experiment with different offset, here
950 SVSHAPE1.invxyz = [1,0,0] # inversion if desired
951
952 # enumerate over the iterator function, getting new indices
953 schedule = []
954 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
955 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
956 for idx, (jl, jh) in enumerate(zip(i0, i1)):
957 schedule.append((jl, jh))
958 if jl[1] == 0b111: # end
959 break
960
961 # ok now pretty-print the results, with some debug output
962 print ("outer butterfly")
963 pprint_schedule_outer(schedule, n)
964
965 # for DCT half-swap LDs
966 # j schedule
967 SVSHAPE0 = SVSHAPE()
968 SVSHAPE0.lims = [xdim, 0b000101, zdim]
969 SVSHAPE0.mode = 0b01
970 SVSHAPE0.submode2 = 0
971 SVSHAPE0.skip = 0
972 SVSHAPE0.offset = 0 # experiment with different offset, here
973 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
974
975 # expected results
976 levels = n.bit_length() - 1
977 avi = list(range(n))
978 ri = [reverse_bits(i, levels) for i in range(n)]
979 av = halfrev2(avi, False)
980 av = [av[ri[i]] for i in range(n)]
981
982
983 i0 = iterate_dct_inner_halfswap_loadstore(SVSHAPE0)
984 for idx, (jl) in enumerate(i0):
985 print ("inverse half-swap ld", idx, jl, av[idx])
986 if jl[1] == 0b111: # end
987 break
988
989
990 # run the demo
991 if __name__ == '__main__':
992 demo_dct()
993 demo_idct()