argh, have LD-bitreverse select the offset from RA REMAP schedule
[openpower-isa.git] / src / openpower / decoder / isa / remap_dct_yield.py
1 # DCT "REMAP" scheduler
2 #
3 # Modifications made to create an in-place iterative DCT:
4 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
5 #
6 # SPDX: LGPLv3+
7 #
8 # Original fastdctlee.py by Nayuki:
9 # Copyright (c) 2020 Project Nayuki. (MIT License)
10 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
11
12 import math
13
14 # bits of the integer 'val'.
15 def reverse_bits(val, width):
16 result = 0
17 for _ in range(width):
18 result = (result << 1) | (val & 1)
19 val >>= 1
20 return result
21
22
23 # iterative version of [recursively-applied] half-rev.
24 # relies on the list lengths being power-of-two and the fact
25 # that bit-inversion of a list of binary numbers is the same
26 # as reversing the order of the list
27 # this version is dead easy to implement in hardware.
28 # a big surprise is that the half-reversal can be done with
29 # such a simple XOR. the inverse operation is slightly trickier
30 def halfrev2(vec, pre_rev=True):
31 res = []
32 for i in range(len(vec)):
33 if pre_rev:
34 res.append(i ^ (i>>1))
35 else:
36 ri = i
37 bl = i.bit_length()
38 for ji in range(1, bl):
39 ri ^= (i >> ji)
40 res.append(vec[ri])
41 return res
42
43
44 # python "yield" can be iterated. use this to make it clear how
45 # the indices are generated by using natural-looking nested loops
46 def iterate_dct_inner_halfswap_loadstore(SVSHAPE):
47 # get indices to iterate over, in the required order
48 n = SVSHAPE.lims[0]
49 mode = SVSHAPE.lims[1]
50 print ("inner halfswap loadstore", n, mode, SVSHAPE.skip)
51
52 # reference list for not needing to do data-swaps, just swap what
53 # *indices* are referenced (two levels of indirection at the moment)
54 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
55 ji = list(range(n))
56
57 levels = n.bit_length() - 1
58 ji = halfrev2(ji, False)
59 if False: # swap: TODO, add extra bit-reverse mode
60 ri = [reverse_bits(i, levels) for i in range(n)]
61 ji = [ji[ri[i]] for i in range(n)]
62
63
64 # invert order if requested
65 if SVSHAPE.invxyz[0]:
66 ji.reverse()
67
68 for i, jl in enumerate(ji):
69 y_end = jl == ji[-1]
70 yield jl, (0b111 if y_end else 0b000)
71
72
73 # python "yield" can be iterated. use this to make it clear how
74 # the indices are generated by using natural-looking nested loops
75 def iterate_dct_inner_costable_indices(SVSHAPE):
76 # get indices to iterate over, in the required order
77 n = SVSHAPE.lims[0]
78 mode = SVSHAPE.lims[1]
79 print ("inner costable", mode)
80 # creating lists of indices to iterate over in each dimension
81 # has to be done dynamically, because it depends on the size
82 # first, the size-based loop (which can be done statically)
83 x_r = []
84 size = 2
85 while size <= n:
86 x_r.append(size)
87 size *= 2
88 # invert order if requested
89 if SVSHAPE.invxyz[0]:
90 x_r.reverse()
91
92 if len(x_r) == 0:
93 return
94
95 #print ("ri", ri)
96 #print ("ji", ji)
97
98 # start an infinite (wrapping) loop
99 skip = 0
100 z_end = 1 # doesn't exist in this, only 2 loops
101 k = 0
102 while True:
103 for size in x_r: # loop over 3rd order dimension (size)
104 x_end = size == x_r[-1]
105 # y_r schedule depends on size
106 halfsize = size // 2
107 y_r = []
108 for i in range(0, n, size):
109 y_r.append(i)
110 # invert if requested
111 if SVSHAPE.invxyz[1]: y_r.reverse()
112 # two lists of half-range indices, e.g. j 0123, jr 7654
113 j = list(range(0, halfsize))
114 # invert if requested
115 if SVSHAPE.invxyz[2]: j_r.reverse()
116 #print ("xform jr", jr)
117 # loop over 1st order dimension
118 for ci, jl in enumerate(j):
119 y_end = jl == j[-1]
120 # now depending on MODE return the index. inner butterfly
121 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
122 result = k # offset into COS table
123 elif SVSHAPE.skip == 0b10: #
124 result = ci # coefficient helper
125 elif SVSHAPE.skip == 0b11: #
126 result = size # coefficient helper
127 loopends = (z_end |
128 ((y_end and z_end)<<1) |
129 ((y_end and x_end and z_end)<<2))
130
131 yield result + SVSHAPE.offset, loopends
132 k += 1
133
134 # python "yield" can be iterated. use this to make it clear how
135 # the indices are generated by using natural-looking nested loops
136 def iterate_dct_inner_butterfly_indices(SVSHAPE):
137 # get indices to iterate over, in the required order
138 n = SVSHAPE.lims[0]
139 mode = SVSHAPE.lims[1]
140 #print ("inner butterfly", mode, SVSHAPE.skip)
141 # creating lists of indices to iterate over in each dimension
142 # has to be done dynamically, because it depends on the size
143 # first, the size-based loop (which can be done statically)
144 x_r = []
145 size = 2
146 while size <= n:
147 x_r.append(size)
148 size *= 2
149 # invert order if requested
150 if SVSHAPE.invxyz[0]:
151 x_r.reverse()
152
153 if len(x_r) == 0:
154 return
155
156 # reference (read/write) the in-place data in *reverse-bit-order*
157 ri = list(range(n))
158 if SVSHAPE.submode2 == 0b01:
159 levels = n.bit_length() - 1
160 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
161
162 # reference list for not needing to do data-swaps, just swap what
163 # *indices* are referenced (two levels of indirection at the moment)
164 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
165 ji = list(range(n))
166 inplace_mode = SVSHAPE.submode2 == 0b01
167 # and SVSHAPE.skip not in [0b10, 0b11]
168 if inplace_mode:
169 #print ("inplace mode")
170 ji = halfrev2(ji, True)
171
172 #print ("ri", ri)
173 #print ("ji", ji)
174
175 # start an infinite (wrapping) loop
176 skip = 0
177 k = 0
178 k_start = 0
179 while True:
180 for size in x_r: # loop over 3rd order dimension (size)
181 x_end = size == x_r[-1]
182 # y_r schedule depends on size
183 halfsize = size // 2
184 y_r = []
185 for i in range(0, n, size):
186 y_r.append(i)
187 # invert if requested
188 if SVSHAPE.invxyz[1]: y_r.reverse()
189 for i in y_r: # loop over 2nd order dimension
190 y_end = i == y_r[-1]
191 # two lists of half-range indices, e.g. j 0123, jr 7654
192 j = list(range(i, i + halfsize))
193 jr = list(range(i+halfsize, i + size))
194 jr.reverse()
195 # invert if requested
196 if SVSHAPE.invxyz[2]: j_r.reverse()
197 hz2 = halfsize // 2 # zero stops reversing 1-item lists
198 # if you *really* want to do the in-place swapping manually,
199 # this allows you to do it. good luck...
200 if SVSHAPE.submode2 == 0b01 and not inplace_mode:
201 #print ("swap mode")
202 jr = j_r[:hz2]
203 #print ("xform jr", jr)
204 # loop over 1st order dimension
205 k = k_start
206 for ci, (jl, jh) in enumerate(zip(j, jr)):
207 z_end = jl == j[-1]
208 # now depending on MODE return the index. inner butterfly
209 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
210 result = ri[ji[jl]] # lower half
211 elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
212 result = ri[ji[jh]] # upper half
213 elif mode == 4:
214 # COS table pre-generated mode
215 if SVSHAPE.skip == 0b10: #
216 result = k # cos table offset
217 else: # mode 2
218 # COS table generated on-demand ("Vertical-First") mode
219 if SVSHAPE.skip == 0b10: #
220 result = ci # coefficient helper
221 elif SVSHAPE.skip == 0b11: #
222 result = size # coefficient helper
223 loopends = (z_end |
224 ((y_end and z_end)<<1) |
225 ((y_end and x_end and z_end)<<2))
226
227 yield result + SVSHAPE.offset, loopends
228 k += 1
229
230 # now in-place swap
231 if inplace_mode:
232 for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
233 jlh = jl+halfsize
234 #print ("inplace swap", jh, jlh)
235 tmp1, tmp2 = ji[jlh], ji[jh]
236 ji[jlh], ji[jh] = tmp2, tmp1
237
238 # new k_start point for cos tables( runs inside x_r loop NOT i loop)
239 k_start += halfsize
240
241
242 # python "yield" can be iterated. use this to make it clear how
243 # the indices are generated by using natural-looking nested loops
244 def iterate_dct_outer_butterfly_indices(SVSHAPE):
245 # get indices to iterate over, in the required order
246 n = SVSHAPE.lims[0]
247 mode = SVSHAPE.lims[1]
248 # createing lists of indices to iterate over in each dimension
249 # has to be done dynamically, because it depends on the size
250 # first, the size-based loop (which can be done statically)
251 x_r = []
252 size = n // 2
253 while size >= 2:
254 x_r.append(size)
255 size //= 2
256 # invert order if requested
257 if SVSHAPE.invxyz[0]:
258 x_r.reverse()
259
260 if len(x_r) == 0:
261 return
262
263 #print ("outer butterfly")
264
265 # reference (read/write) the in-place data in *reverse-bit-order*
266 ri = list(range(n))
267 if SVSHAPE.submode2 == 0b11:
268 levels = n.bit_length() - 1
269 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
270
271 # reference list for not needing to do data-swaps, just swap what
272 # *indices* are referenced (two levels of indirection at the moment)
273 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
274 ji = list(range(n))
275 inplace_mode = False # need the space... SVSHAPE.skip in [0b10, 0b11]
276 if inplace_mode:
277 #print ("inplace mode", SVSHAPE.skip)
278 ji = halfrev2(ji, True)
279
280 #print ("ri", ri)
281 #print ("ji", ji)
282
283 # start an infinite (wrapping) loop
284 skip = 0
285 k = 0
286 k_start = 0
287 while True:
288 for size in x_r: # loop over 3rd order dimension (size)
289 halfsize = size//2
290 x_end = size == x_r[-1]
291 y_r = list(range(0, halfsize))
292 #print ("itersum", halfsize, size, y_r)
293 # invert if requested
294 if SVSHAPE.invxyz[1]: y_r.reverse()
295 for i in y_r: # loop over 2nd order dimension
296 y_end = i == y_r[-1]
297 # one list to create iterative-sum schedule
298 jr = list(range(i+halfsize, i+n-halfsize, size))
299 #print ("itersum jr", i+halfsize, i+size, jr)
300 # invert if requested
301 if SVSHAPE.invxyz[2]: j_r.reverse()
302 hz2 = halfsize // 2 # zero stops reversing 1-item lists
303 k = k_start
304 for ci, jh in enumerate(jr): # loop over 1st order dimension
305 z_end = jh == jr[-1]
306 #print (" itersum", size, i, jh, jh+size)
307 if mode == 4:
308 # COS table pre-generated mode
309 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
310 result = ri[ji[jh]] # lower half
311 elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
312 result = ri[ji[jh+size]] # upper half
313 elif SVSHAPE.skip == 0b10: #
314 result = k # cos table offset
315 else:
316 # COS table generated on-demand ("Vertical-First") mode
317 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
318 result = ri[ji[jh]] # lower half
319 elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
320 result = ri[ji[jh+size]] # upper half
321 elif SVSHAPE.skip == 0b10: #
322 result = ci # coefficient helper
323 elif SVSHAPE.skip == 0b11: #
324 result = size # coefficient helper
325 loopends = (z_end |
326 ((y_end and z_end)<<1) |
327 ((y_end and x_end and z_end)<<2))
328
329 yield result + SVSHAPE.offset, loopends
330 k += 1
331
332 # now in-place swap
333 if SVSHAPE.submode2 == 0b11 and inplace_mode:
334 j = list(range(i, i + halfsize))
335 jr = list(range(i+halfsize, i + size))
336 jr.reverse()
337 for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
338 jlh = jl+halfsize
339 #print ("inplace swap", jh, jlh)
340 tmp1, tmp2 = ji[jlh], ji[jh]
341 ji[jlh], ji[jh] = tmp2, tmp1
342
343 # new k_start point for cos tables( runs inside x_r loop NOT i loop)
344 k_start += halfsize
345
346
347 def pprint_schedule(schedule, n):
348 size = 2
349 idx = 0
350 while size <= n:
351 halfsize = size // 2
352 tablestep = n // size
353 print ("size %d halfsize %d tablestep %d" % \
354 (size, halfsize, tablestep))
355 for i in range(0, n, size):
356 prefix = "i %d\t" % i
357 for j in range(i, i + halfsize):
358 (jl, je), (jh, he) = schedule[idx]
359 print (" %-3d\t%s j=%-2d jh=%-2d "
360 "j[jl=%-2d] j[jh=%-2d]" % \
361 (idx, prefix, j, j+halfsize,
362 jl, jh,
363 ),
364 "end", bin(je)[2:], bin(je)[2:])
365 idx += 1
366 size *= 2
367
368 def pprint_schedule_outer(schedule, n):
369 size = 2
370 idx = 0
371 while size <= n//2:
372 halfsize = size // 2
373 tablestep = n // size
374 print ("size %d halfsize %d tablestep %d" % \
375 (size, halfsize, tablestep))
376 y_r = list(range(0, halfsize))
377 for i in y_r:
378 prefix = "i %d\t" % i
379 jr = list(range(i+halfsize, i+n-halfsize, size))
380 for j in jr:
381 (jl, je), (jh, he) = schedule[idx]
382 print (" %-3d\t%s j=%-2d jh=%-2d "
383 "j[jl=%-2d] j[jh=%-2d]" % \
384 (idx, prefix, j, j+halfsize,
385 jl, jh,
386 ),
387 "end", bin(je)[2:], bin(je)[2:])
388 idx += 1
389 size *= 2
390
391
392 # totally cool *in-place* DCT algorithm using yield REMAPs
393 def transform2(vec):
394
395 # Initialization
396 n = len(vec)
397 print ()
398 print ("transform2", n)
399 levels = n.bit_length() - 1
400
401 # set up dims
402 xdim = n
403
404 # reference (read/write) the in-place data in *reverse-bit-order*
405 ri = list(range(n))
406 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
407
408 # and pretend we LDed data in half-swapped *and* bit-reversed order as well
409 # TODO: merge these two
410 vec = halfrev2(vec, False)
411 vec = [vec[ri[i]] for i in range(n)]
412
413 # create a cos table: not strictly necessary but here for illustrative
414 # purposes, to demonstrate the point that it really *is* iterative.
415 # this table could be cached and used multiple times rather than
416 # computed every time.
417 ctable = []
418 size = n
419 while size >= 2:
420 halfsize = size // 2
421 for ci in range(halfsize):
422 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
423 ctable.append(coeff)
424 print ("coeff", size, "ci", ci, "k", len(ctable)-1,
425 "i/n", (ci+0.5)/size, coeff)
426 size //= 2
427
428 # set up an SVSHAPE
429 class SVSHAPE:
430 pass
431 # ci schedule
432 SVSHAPE0 = SVSHAPE()
433 SVSHAPE0.lims = [xdim, 4, 0]
434 SVSHAPE0.mode = 0b01
435 SVSHAPE0.submode2 = 0b01
436 SVSHAPE0.skip = 0b10
437 SVSHAPE0.offset = 0 # experiment with different offset, here
438 SVSHAPE0.invxyz = [1,0,0] # inversion if desired
439 # size schedule
440 SVSHAPE1 = SVSHAPE()
441 SVSHAPE1.lims = [xdim, 4, 0]
442 SVSHAPE1.mode = 0b01
443 SVSHAPE1.submode2 = 0b01
444 SVSHAPE1.skip = 0b11
445 SVSHAPE1.offset = 0 # experiment with different offset, here
446 SVSHAPE1.invxyz = [1,0,0] # inversion if desired
447 # k schedule
448 SVSHAPE2 = SVSHAPE()
449 SVSHAPE2.lims = [xdim, 4, 0]
450 SVSHAPE2.mode = 0b01
451 SVSHAPE2.submode2 = 0b01
452 SVSHAPE2.skip = 0b00
453 SVSHAPE2.offset = 0 # experiment with different offset, here
454 SVSHAPE2.invxyz = [1,0,0] # inversion if desired
455
456 # enumerate over the iterator function, getting new indices
457 i0 = iterate_dct_inner_costable_indices(SVSHAPE0)
458 i1 = iterate_dct_inner_costable_indices(SVSHAPE1)
459 i2 = iterate_dct_inner_costable_indices(SVSHAPE2)
460 for ((ci, cie), (size, sze), (k, ke)) in \
461 zip(i0, i1, i2):
462 print ("xform2 cos", ci, size, k)
463 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
464 assert coeff == ctable[k]
465 print ("coeff", size, "ci", ci, "k", k,
466 "i/n", (ci+0.5)/size, coeff,
467 "end", bin(cie), bin(sze), bin(ke))
468 if cie == 0b111: # all loops end
469 break
470
471 ################
472 # INNER butterfly
473 ################
474
475 # j schedule
476 SVSHAPE0 = SVSHAPE()
477 SVSHAPE0.lims = [xdim, 0b000001, 0]
478 SVSHAPE0.mode = 0b01
479 SVSHAPE0.submode2 = 0b01
480 SVSHAPE0.skip = 0b00
481 SVSHAPE0.offset = 0 # experiment with different offset, here
482 SVSHAPE0.invxyz = [1,0,0] # inversion if desired
483 # j+halfstep schedule
484 SVSHAPE1 = SVSHAPE()
485 SVSHAPE1.lims = [xdim, 0b000001, 0]
486 SVSHAPE1.mode = 0b01
487 SVSHAPE1.submode2 = 0b01
488 SVSHAPE1.skip = 0b01
489 SVSHAPE1.offset = 0 # experiment with different offset, here
490 SVSHAPE1.invxyz = [1,0,0] # inversion if desired
491 # ci schedule
492 SVSHAPE2 = SVSHAPE()
493 SVSHAPE2.lims = [xdim, 0b000001, 0]
494 SVSHAPE2.mode = 0b01
495 SVSHAPE2.submode2 = 0b01
496 SVSHAPE2.skip = 0b10
497 SVSHAPE2.offset = 0 # experiment with different offset, here
498 SVSHAPE2.invxyz = [1,0,0] # inversion if desired
499 # size schedule
500 SVSHAPE3 = SVSHAPE()
501 SVSHAPE3.lims = [xdim, 0b000001, 0]
502 SVSHAPE3.mode = 0b01
503 SVSHAPE3.submode2 = 0b01
504 SVSHAPE3.skip = 0b11
505 SVSHAPE3.offset = 0 # experiment with different offset, here
506 SVSHAPE3.invxyz = [1,0,0] # inversion if desired
507
508 # enumerate over the iterator function, getting new indices
509 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
510 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
511 i2 = iterate_dct_inner_butterfly_indices(SVSHAPE2)
512 i3 = iterate_dct_inner_butterfly_indices(SVSHAPE3)
513 for k, ((jl, jle), (jh, jhe), (ci, cie), (size, sze)) in \
514 enumerate(zip(i0, i1, i2, i3)):
515 t1, t2 = vec[jl], vec[jh]
516 print ("xform2", jl, jh, ci, size)
517 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
518 #assert coeff == ctable[k]
519 vec[jl] = t1 + t2
520 vec[jh] = (t1 - t2) * (1/coeff)
521 print ("coeff", size, "ci", ci,
522 "jl", jl, "jh", jh,
523 "i/n", (ci+0.5)/size, coeff, vec[jl],
524 vec[jh],
525 "end", bin(jle), bin(jhe))
526 if jle == 0b111: # all loops end
527 break
528
529 print("transform2 pre-itersum", vec)
530
531 # now things are in the right order for the outer butterfly.
532
533 # j schedule
534 SVSHAPE0 = SVSHAPE()
535 SVSHAPE0.lims = [xdim, 0b0000010, 0]
536 SVSHAPE0.submode2 = 0b100
537 SVSHAPE0.mode = 0b01
538 SVSHAPE0.skip = 0b00
539 SVSHAPE0.offset = 0 # experiment with different offset, here
540 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
541 # j+halfstep schedule
542 SVSHAPE1 = SVSHAPE()
543 SVSHAPE1.lims = [xdim, 0b0000010, 0]
544 SVSHAPE1.mode = 0b01
545 SVSHAPE1.submode2 = 0b100
546 SVSHAPE1.skip = 0b01
547 SVSHAPE1.offset = 0 # experiment with different offset, here
548 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
549
550 # enumerate over the iterator function, getting new indices
551 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
552 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
553 for k, ((jl, jle), (jh, jhe)) in enumerate(zip(i0, i1)):
554 print ("itersum jr", jl, jh,
555 "end", bin(jle), bin(jhe))
556 vec[jl] += vec[jh]
557 size //= 2
558 if jle == 0b111: # all loops end
559 break
560
561 print("transform2 result", vec)
562
563 return vec
564
565
566 def demo():
567 # set the dimension sizes here
568 n = 8
569 xdim = n
570 ydim = 0 # not needed
571 zdim = 0 # again, not needed
572
573
574 ################
575 # INNER butterfly
576 ################
577
578 # set up an SVSHAPE
579 class SVSHAPE:
580 pass
581 # j schedule
582 SVSHAPE0 = SVSHAPE()
583 SVSHAPE0.lims = [xdim, 0b000001, zdim]
584 SVSHAPE0.submode2 = 0b010
585 SVSHAPE0.mode = 0b01
586 SVSHAPE0.skip = 0b00
587 SVSHAPE0.offset = 0 # experiment with different offset, here
588 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
589 # j+halfstep schedule
590 SVSHAPE1 = SVSHAPE()
591 SVSHAPE1.lims = [xdim, 0b000001, zdim]
592 SVSHAPE1.submode2 = 0b010
593 SVSHAPE1.mode = 0b01
594 SVSHAPE1.skip = 0b01
595 SVSHAPE1.offset = 0 # experiment with different offset, here
596 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
597
598 # enumerate over the iterator function, getting new indices
599 schedule = []
600 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
601 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
602 for idx, (jl, jh) in enumerate(zip(i0, i1)):
603 schedule.append((jl, jh))
604 if jl[1] == 0b111: # end
605 break
606
607 # ok now pretty-print the results, with some debug output
608 print ("inner butterfly")
609 pprint_schedule(schedule, n)
610 print ("")
611
612 ################
613 # outer butterfly
614 ################
615
616 # j schedule
617 SVSHAPE0 = SVSHAPE()
618 SVSHAPE0.lims = [xdim, 0b000010, zdim]
619 SVSHAPE0.mode = 0b01
620 SVSHAPE0.submode2 = 0b100
621 SVSHAPE0.skip = 0b10
622 SVSHAPE0.offset = 0 # experiment with different offset, here
623 SVSHAPE0.invxyz = [1,0,0] # inversion if desired
624 # j+halfstep schedule
625 SVSHAPE1 = SVSHAPE()
626 SVSHAPE1.lims = [xdim, 0b000010, zdim]
627 SVSHAPE1.mode = 0b01
628 SVSHAPE1.submode2 = 0b100
629 SVSHAPE1.skip = 0b11
630 SVSHAPE1.offset = 0 # experiment with different offset, here
631 SVSHAPE1.invxyz = [1,0,0] # inversion if desired
632
633 # enumerate over the iterator function, getting new indices
634 schedule = []
635 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
636 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
637 for idx, (jl, jh) in enumerate(zip(i0, i1)):
638 schedule.append((jl, jh))
639 if jl[1] == 0b111: # end
640 break
641
642 # ok now pretty-print the results, with some debug output
643 print ("outer butterfly")
644 pprint_schedule_outer(schedule, n)
645
646 # for DCT half-swap LDs
647 # j schedule
648 SVSHAPE0 = SVSHAPE()
649 SVSHAPE0.lims = [xdim, 0b000101, zdim]
650 SVSHAPE0.mode = 0b01
651 SVSHAPE0.submode2 = 0
652 SVSHAPE0.skip = 0
653 SVSHAPE0.offset = 0 # experiment with different offset, here
654 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
655
656 # expected results
657 levels = n.bit_length() - 1
658 avi = list(range(n))
659 ri = [reverse_bits(i, levels) for i in range(n)]
660 av = halfrev2(avi, False)
661 av = [av[ri[i]] for i in range(n)]
662
663
664 i0 = iterate_dct_inner_halfswap_loadstore(SVSHAPE0)
665 for idx, (jl) in enumerate(i0):
666 print ("inverse half-swap ld", idx, jl, av[idx])
667 if jl[1] == 0b111: # end
668 break
669
670
671 # run the demo
672 if __name__ == '__main__':
673 demo()