src/openpower/decoder/isa/remap_dct_yield.py

   1 # DCT "REMAP" scheduler
   2 #
   3 # Modifications made to create an in-place iterative DCT:
   4 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
   5 #
   6 # SPDX: LGPLv3+
   7 #
   8 # Original fastdctlee.py by Nayuki:
   9 # Copyright (c) 2020 Project Nayuki. (MIT License)
  10 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
  11
  12 import math
  13
  14 # bits of the integer 'val'.
  15 def reverse_bits(val, width):
  16     result = 0
  17     for _ in range(width):
  18         result = (result << 1) | (val & 1)
  19         val >>= 1
  20     return result
  21
  22
  23 # iterative version of [recursively-applied] half-rev.
  24 # relies on the list lengths being power-of-two and the fact
  25 # that bit-inversion of a list of binary numbers is the same
  26 # as reversing the order of the list
  27 # this version is dead easy to implement in hardware.
  28 # a big surprise is that the half-reversal can be done with
  29 # such a simple XOR. the inverse operation is slightly trickier
  30 def halfrev2(vec, pre_rev=True):
  31     res = []
  32     for i in range(len(vec)):
  33         if pre_rev:
  34             res.append(i ^ (i>>1))
  35         else:
  36             ri = i
  37             bl = i.bit_length()
  38             for ji in range(1, bl):
  39                 ri ^= (i >> ji)
  40             res.append(vec[ri])
  41     return res
  42
  43
  44 # python "yield" can be iterated. use this to make it clear how
  45 # the indices are generated by using natural-looking nested loops
  46 def iterate_dct_inner_halfswap_loadstore(SVSHAPE):
  47     # get indices to iterate over, in the required order
  48     n = SVSHAPE.lims[0]
  49     mode = SVSHAPE.lims[1]
  50     print ("inner halfswap loadstore", n, mode, SVSHAPE.skip)
  51
  52     # reference list for not needing to do data-swaps, just swap what
  53     # *indices* are referenced (two levels of indirection at the moment)
  54     # pre-reverse the data-swap list so that it *ends up* in the order 0123..
  55     ji = list(range(n))
  56     ji = halfrev2(ji, True)
  57
  58     # invert order if requested
  59     if SVSHAPE.invxyz[0]:
  60         ji.reverse()
  61
  62     yield from ji
  63
  64
  65 # python "yield" can be iterated. use this to make it clear how
  66 # the indices are generated by using natural-looking nested loops
  67 def iterate_dct_inner_costable_indices(SVSHAPE):
  68     # get indices to iterate over, in the required order
  69     n = SVSHAPE.lims[0]
  70     mode = SVSHAPE.lims[1]
  71     print ("inner costable", mode)
  72     # creating lists of indices to iterate over in each dimension
  73     # has to be done dynamically, because it depends on the size
  74     # first, the size-based loop (which can be done statically)
  75     x_r = []
  76     size = 2
  77     while size <= n:
  78         x_r.append(size)
  79         size *= 2
  80     # invert order if requested
  81     if SVSHAPE.invxyz[0]:
  82         x_r.reverse()
  83
  84     if len(x_r) == 0:
  85         return
  86
  87     #print ("ri", ri)
  88     #print ("ji", ji)
  89
  90     # start an infinite (wrapping) loop
  91     skip = 0
  92     z_end = 1 # doesn't exist in this, only 2 loops
  93     k = 0
  94     while True:
  95         for size in x_r:           # loop over 3rd order dimension (size)
  96             x_end = size == x_r[-1]
  97             # y_r schedule depends on size
  98             halfsize = size // 2
  99             y_r = []
 100             for i in range(0, n, size):
 101                 y_r.append(i)
 102             # invert if requested
 103             if SVSHAPE.invxyz[1]: y_r.reverse()
 104             # two lists of half-range indices, e.g. j 0123, jr 7654
 105             j = list(range(0, halfsize))
 106             # invert if requested
 107             if SVSHAPE.invxyz[2]: j_r.reverse()
 108             #print ("xform jr", jr)
 109             # loop over 1st order dimension
 110             for ci, jl in enumerate(j):
 111                 y_end = jl == j[-1]
 112                 # now depending on MODE return the index.  inner butterfly
 113                 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
 114                     result = k  # offset into COS table
 115                 elif SVSHAPE.skip == 0b10: #
 116                     result = ci # coefficient helper
 117                 elif SVSHAPE.skip == 0b11: #
 118                     result = size # coefficient helper
 119                 loopends = (z_end |
 120                            ((y_end and z_end)<<1) |
 121                             ((y_end and x_end and z_end)<<2))
 122
 123                 yield result + SVSHAPE.offset, loopends
 124                 k += 1
 125
 126 # python "yield" can be iterated. use this to make it clear how
 127 # the indices are generated by using natural-looking nested loops
 128 def iterate_dct_inner_butterfly_indices(SVSHAPE):
 129     # get indices to iterate over, in the required order
 130     n = SVSHAPE.lims[0]
 131     mode = SVSHAPE.lims[1]
 132     #print ("inner butterfly", mode, SVSHAPE.skip)
 133     # creating lists of indices to iterate over in each dimension
 134     # has to be done dynamically, because it depends on the size
 135     # first, the size-based loop (which can be done statically)
 136     x_r = []
 137     size = 2
 138     while size <= n:
 139         x_r.append(size)
 140         size *= 2
 141     # invert order if requested
 142     if SVSHAPE.invxyz[0]:
 143         x_r.reverse()
 144
 145     if len(x_r) == 0:
 146         return
 147
 148     # reference (read/write) the in-place data in *reverse-bit-order*
 149     ri = list(range(n))
 150     if SVSHAPE.submode2 == 0b01:
 151         levels = n.bit_length() - 1
 152         ri = [ri[reverse_bits(i, levels)] for i in range(n)]
 153
 154     # reference list for not needing to do data-swaps, just swap what
 155     # *indices* are referenced (two levels of indirection at the moment)
 156     # pre-reverse the data-swap list so that it *ends up* in the order 0123..
 157     ji = list(range(n))
 158     inplace_mode = SVSHAPE.submode2 == 0b01
 159     #                     and SVSHAPE.skip not in [0b10, 0b11]
 160     if inplace_mode:
 161         #print ("inplace mode")
 162         ji = halfrev2(ji, True)
 163
 164     #print ("ri", ri)
 165     #print ("ji", ji)
 166
 167     # start an infinite (wrapping) loop
 168     skip = 0
 169     k = 0
 170     k_start = 0
 171     while True:
 172         for size in x_r:           # loop over 3rd order dimension (size)
 173             x_end = size == x_r[-1]
 174             # y_r schedule depends on size
 175             halfsize = size // 2
 176             y_r = []
 177             for i in range(0, n, size):
 178                 y_r.append(i)
 179             # invert if requested
 180             if SVSHAPE.invxyz[1]: y_r.reverse()
 181             for i in y_r:       # loop over 2nd order dimension
 182                 y_end = i == y_r[-1]
 183                 # two lists of half-range indices, e.g. j 0123, jr 7654
 184                 j = list(range(i, i + halfsize))
 185                 jr = list(range(i+halfsize, i + size))
 186                 jr.reverse()
 187                 # invert if requested
 188                 if SVSHAPE.invxyz[2]: j_r.reverse()
 189                 hz2 = halfsize // 2 # zero stops reversing 1-item lists
 190                 # if you *really* want to do the in-place swapping manually,
 191                 # this allows you to do it.  good luck...
 192                 if SVSHAPE.submode2 == 0b01 and not inplace_mode:
 193                     #print ("swap mode")
 194                     jr = j_r[:hz2]
 195                 #print ("xform jr", jr)
 196                 # loop over 1st order dimension
 197                 k = k_start
 198                 for ci, (jl, jh) in enumerate(zip(j, jr)):
 199                     z_end = jl == j[-1]
 200                     # now depending on MODE return the index.  inner butterfly
 201                     if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
 202                         result = ri[ji[jl]]        # lower half
 203                     elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
 204                         result = ri[ji[jh]] # upper half
 205                     elif mode == 4:
 206                         # COS table pre-generated mode
 207                         if SVSHAPE.skip == 0b10: #
 208                             result = k # cos table offset
 209                     else: # mode 2
 210                         # COS table generated on-demand ("Vertical-First") mode
 211                         if SVSHAPE.skip == 0b10: #
 212                             result = ci # coefficient helper
 213                         elif SVSHAPE.skip == 0b11: #
 214                             result = size # coefficient helper
 215                     loopends = (z_end |
 216                                ((y_end and z_end)<<1) |
 217                                 ((y_end and x_end and z_end)<<2))
 218
 219                     yield result + SVSHAPE.offset, loopends
 220                     k += 1
 221
 222                 # now in-place swap
 223                 if inplace_mode:
 224                     for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
 225                         jlh = jl+halfsize
 226                         #print ("inplace swap", jh, jlh)
 227                         tmp1, tmp2 = ji[jlh], ji[jh]
 228                         ji[jlh], ji[jh] = tmp2, tmp1
 229
 230             # new k_start point for cos tables( runs inside x_r loop NOT i loop)
 231             k_start += halfsize
 232
 233
 234 # python "yield" can be iterated. use this to make it clear how
 235 # the indices are generated by using natural-looking nested loops
 236 def iterate_dct_outer_butterfly_indices(SVSHAPE):
 237     # get indices to iterate over, in the required order
 238     n = SVSHAPE.lims[0]
 239     mode = SVSHAPE.lims[1]
 240     # createing lists of indices to iterate over in each dimension
 241     # has to be done dynamically, because it depends on the size
 242     # first, the size-based loop (which can be done statically)
 243     x_r = []
 244     size = n // 2
 245     while size >= 2:
 246         x_r.append(size)
 247         size //= 2
 248     # invert order if requested
 249     if SVSHAPE.invxyz[0]:
 250         x_r.reverse()
 251
 252     if len(x_r) == 0:
 253         return
 254
 255     #print ("outer butterfly")
 256
 257     # reference (read/write) the in-place data in *reverse-bit-order*
 258     ri = list(range(n))
 259     if SVSHAPE.submode2 == 0b11:
 260         levels = n.bit_length() - 1
 261         ri = [ri[reverse_bits(i, levels)] for i in range(n)]
 262
 263     # reference list for not needing to do data-swaps, just swap what
 264     # *indices* are referenced (two levels of indirection at the moment)
 265     # pre-reverse the data-swap list so that it *ends up* in the order 0123..
 266     ji = list(range(n))
 267     inplace_mode = False # need the space... SVSHAPE.skip in [0b10, 0b11]
 268     if inplace_mode:
 269         #print ("inplace mode", SVSHAPE.skip)
 270         ji = halfrev2(ji, True)
 271
 272     #print ("ri", ri)
 273     #print ("ji", ji)
 274
 275     # start an infinite (wrapping) loop
 276     skip = 0
 277     k = 0
 278     k_start = 0
 279     while True:
 280         for size in x_r:           # loop over 3rd order dimension (size)
 281             halfsize = size//2
 282             x_end = size == x_r[-1]
 283             y_r = list(range(0, halfsize))
 284             #print ("itersum", halfsize, size, y_r)
 285             # invert if requested
 286             if SVSHAPE.invxyz[1]: y_r.reverse()
 287             for i in y_r:       # loop over 2nd order dimension
 288                 y_end = i == y_r[-1]
 289                 # one list to create iterative-sum schedule
 290                 jr = list(range(i+halfsize, i+n-halfsize, size))
 291                 #print ("itersum     jr", i+halfsize, i+size, jr)
 292                 # invert if requested
 293                 if SVSHAPE.invxyz[2]: j_r.reverse()
 294                 hz2 = halfsize // 2 # zero stops reversing 1-item lists
 295                 k = k_start
 296                 for ci, jh in enumerate(jr):   # loop over 1st order dimension
 297                     z_end = jh == jr[-1]
 298                     #print ("     itersum", size, i, jh, jh+size)
 299                     if mode == 4:
 300                         # COS table pre-generated mode
 301                         if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
 302                             result = ri[ji[jh]]        # lower half
 303                         elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
 304                             result = ri[ji[jh+size]] # upper half
 305                         elif SVSHAPE.skip == 0b10: #
 306                             result = k # cos table offset
 307                     else:
 308                         # COS table generated on-demand ("Vertical-First") mode
 309                         if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
 310                             result = ri[ji[jh]]        # lower half
 311                         elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
 312                             result = ri[ji[jh+size]] # upper half
 313                         elif SVSHAPE.skip == 0b10: #
 314                             result = ci # coefficient helper
 315                         elif SVSHAPE.skip == 0b11: #
 316                             result = size # coefficient helper
 317                     loopends = (z_end |
 318                                ((y_end and z_end)<<1) |
 319                                 ((y_end and x_end and z_end)<<2))
 320
 321                     yield result + SVSHAPE.offset, loopends
 322                     k += 1
 323
 324                 # now in-place swap
 325                 if SVSHAPE.submode2 == 0b11 and inplace_mode:
 326                     j = list(range(i, i + halfsize))
 327                     jr = list(range(i+halfsize, i + size))
 328                     jr.reverse()
 329                     for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
 330                         jlh = jl+halfsize
 331                         #print ("inplace swap", jh, jlh)
 332                         tmp1, tmp2 = ji[jlh], ji[jh]
 333                         ji[jlh], ji[jh] = tmp2, tmp1
 334
 335             # new k_start point for cos tables( runs inside x_r loop NOT i loop)
 336             k_start += halfsize
 337
 338
 339 def pprint_schedule(schedule, n):
 340     size = 2
 341     idx = 0
 342     while size <= n:
 343         halfsize = size // 2
 344         tablestep = n // size
 345         print ("size %d halfsize %d tablestep %d" % \
 346                 (size, halfsize, tablestep))
 347         for i in range(0, n, size):
 348             prefix = "i %d\t" % i
 349             for j in range(i, i + halfsize):
 350                 (jl, je), (jh, he) = schedule[idx]
 351                 print ("  %-3d\t%s j=%-2d jh=%-2d "
 352                         "j[jl=%-2d] j[jh=%-2d]" % \
 353                                 (idx, prefix, j, j+halfsize,
 354                                       jl, jh,
 355                                 ),
 356                                 "end", bin(je)[2:], bin(je)[2:])
 357                 idx += 1
 358         size *= 2
 359
 360 def pprint_schedule_outer(schedule, n):
 361     size = 2
 362     idx = 0
 363     while size <= n//2:
 364         halfsize = size // 2
 365         tablestep = n // size
 366         print ("size %d halfsize %d tablestep %d" % \
 367                 (size, halfsize, tablestep))
 368         y_r = list(range(0, halfsize))
 369         for i in y_r:
 370             prefix = "i %d\t" % i
 371             jr = list(range(i+halfsize, i+n-halfsize, size))
 372             for j in jr:
 373                 (jl, je), (jh, he) = schedule[idx]
 374                 print ("  %-3d\t%s j=%-2d jh=%-2d "
 375                         "j[jl=%-2d] j[jh=%-2d]" % \
 376                                 (idx, prefix, j, j+halfsize,
 377                                       jl, jh,
 378                                 ),
 379                                 "end", bin(je)[2:], bin(je)[2:])
 380                 idx += 1
 381         size *= 2
 382
 383
 384 # totally cool *in-place* DCT algorithm using yield REMAPs
 385 def transform2(vec):
 386
 387     # Initialization
 388     n = len(vec)
 389     print ()
 390     print ("transform2", n)
 391     levels = n.bit_length() - 1
 392
 393     # set up dims
 394     xdim = n
 395
 396     # reference (read/write) the in-place data in *reverse-bit-order*
 397     ri = list(range(n))
 398     ri = [ri[reverse_bits(i, levels)] for i in range(n)]
 399
 400     # and pretend we LDed data in half-swapped *and* bit-reversed order as well
 401     # TODO: merge these two
 402     vec = halfrev2(vec, False)
 403     vec = [vec[ri[i]] for i in range(n)]
 404
 405     # create a cos table: not strictly necessary but here for illustrative
 406     # purposes, to demonstrate the point that it really *is* iterative.
 407     # this table could be cached and used multiple times rather than
 408     # computed every time.
 409     ctable = []
 410     size = n
 411     while size >= 2:
 412         halfsize = size // 2
 413         for ci in range(halfsize):
 414             coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
 415             ctable.append(coeff)
 416             print ("coeff", size,  "ci", ci, "k", len(ctable)-1,
 417                    "i/n", (ci+0.5)/size, coeff)
 418         size //= 2
 419
 420     # set up an SVSHAPE
 421     class SVSHAPE:
 422         pass
 423     # ci schedule
 424     SVSHAPE0 = SVSHAPE()
 425     SVSHAPE0.lims = [xdim, 4, 0]
 426     SVSHAPE0.mode = 0b01
 427     SVSHAPE0.submode2 = 0b01
 428     SVSHAPE0.skip = 0b10
 429     SVSHAPE0.offset = 0       # experiment with different offset, here
 430     SVSHAPE0.invxyz = [1,0,0] # inversion if desired
 431     # size schedule
 432     SVSHAPE1 = SVSHAPE()
 433     SVSHAPE1.lims = [xdim, 4, 0]
 434     SVSHAPE1.mode = 0b01
 435     SVSHAPE1.submode2 = 0b01
 436     SVSHAPE1.skip = 0b11
 437     SVSHAPE1.offset = 0       # experiment with different offset, here
 438     SVSHAPE1.invxyz = [1,0,0] # inversion if desired
 439     # k schedule
 440     SVSHAPE2 = SVSHAPE()
 441     SVSHAPE2.lims = [xdim, 4, 0]
 442     SVSHAPE2.mode = 0b01
 443     SVSHAPE2.submode2 = 0b01
 444     SVSHAPE2.skip = 0b00
 445     SVSHAPE2.offset = 0       # experiment with different offset, here
 446     SVSHAPE2.invxyz = [1,0,0] # inversion if desired
 447
 448     # enumerate over the iterator function, getting new indices
 449     i0 = iterate_dct_inner_costable_indices(SVSHAPE0)
 450     i1 = iterate_dct_inner_costable_indices(SVSHAPE1)
 451     i2 = iterate_dct_inner_costable_indices(SVSHAPE2)
 452     for ((ci, cie), (size, sze), (k, ke)) in \
 453                 zip(i0, i1, i2):
 454         print ("xform2 cos", ci, size, k)
 455         coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
 456         assert coeff == ctable[k]
 457         print ("coeff", size,  "ci", ci, "k", k,
 458                "i/n", (ci+0.5)/size, coeff,
 459                 "end", bin(cie), bin(sze), bin(ke))
 460         if cie == 0b111: # all loops end
 461             break
 462
 463     ################
 464     # INNER butterfly
 465     ################
 466
 467     # j schedule
 468     SVSHAPE0 = SVSHAPE()
 469     SVSHAPE0.lims = [xdim, 0b000001, 0]
 470     SVSHAPE0.mode = 0b01
 471     SVSHAPE0.submode2 = 0b01
 472     SVSHAPE0.skip = 0b00
 473     SVSHAPE0.offset = 0       # experiment with different offset, here
 474     SVSHAPE0.invxyz = [1,0,0] # inversion if desired
 475     # j+halfstep schedule
 476     SVSHAPE1 = SVSHAPE()
 477     SVSHAPE1.lims = [xdim, 0b000001, 0]
 478     SVSHAPE1.mode = 0b01
 479     SVSHAPE1.submode2 = 0b01
 480     SVSHAPE1.skip = 0b01
 481     SVSHAPE1.offset = 0       # experiment with different offset, here
 482     SVSHAPE1.invxyz = [1,0,0] # inversion if desired
 483     # ci schedule
 484     SVSHAPE2 = SVSHAPE()
 485     SVSHAPE2.lims = [xdim, 0b000001, 0]
 486     SVSHAPE2.mode = 0b01
 487     SVSHAPE2.submode2 = 0b01
 488     SVSHAPE2.skip = 0b10
 489     SVSHAPE2.offset = 0       # experiment with different offset, here
 490     SVSHAPE2.invxyz = [1,0,0] # inversion if desired
 491     # size schedule
 492     SVSHAPE3 = SVSHAPE()
 493     SVSHAPE3.lims = [xdim, 0b000001, 0]
 494     SVSHAPE3.mode = 0b01
 495     SVSHAPE3.submode2 = 0b01
 496     SVSHAPE3.skip = 0b11
 497     SVSHAPE3.offset = 0       # experiment with different offset, here
 498     SVSHAPE3.invxyz = [1,0,0] # inversion if desired
 499
 500     # enumerate over the iterator function, getting new indices
 501     i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
 502     i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
 503     i2 = iterate_dct_inner_butterfly_indices(SVSHAPE2)
 504     i3 = iterate_dct_inner_butterfly_indices(SVSHAPE3)
 505     for k, ((jl, jle), (jh, jhe), (ci, cie), (size, sze)) in \
 506                 enumerate(zip(i0, i1, i2, i3)):
 507         t1, t2 = vec[jl], vec[jh]
 508         print ("xform2", jl, jh, ci, size)
 509         coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
 510         #assert coeff == ctable[k]
 511         vec[jl] = t1 + t2
 512         vec[jh] = (t1 - t2) * (1/coeff)
 513         print ("coeff", size, "ci", ci,
 514                 "jl", jl, "jh", jh,
 515                "i/n", (ci+0.5)/size, coeff, vec[jl],
 516                                             vec[jh],
 517                 "end", bin(jle), bin(jhe))
 518         if jle == 0b111: # all loops end
 519             break
 520
 521     print("transform2 pre-itersum", vec)
 522
 523     # now things are in the right order for the outer butterfly.
 524
 525     # j schedule
 526     SVSHAPE0 = SVSHAPE()
 527     SVSHAPE0.lims = [xdim, 0b0000010, 0]
 528     SVSHAPE0.submode2 = 0b100
 529     SVSHAPE0.mode = 0b01
 530     SVSHAPE0.skip = 0b00
 531     SVSHAPE0.offset = 0       # experiment with different offset, here
 532     SVSHAPE0.invxyz = [0,0,0] # inversion if desired
 533     # j+halfstep schedule
 534     SVSHAPE1 = SVSHAPE()
 535     SVSHAPE1.lims = [xdim, 0b0000010, 0]
 536     SVSHAPE1.mode = 0b01
 537     SVSHAPE1.submode2 = 0b100
 538     SVSHAPE1.skip = 0b01
 539     SVSHAPE1.offset = 0       # experiment with different offset, here
 540     SVSHAPE1.invxyz = [0,0,0] # inversion if desired
 541
 542     # enumerate over the iterator function, getting new indices
 543     i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
 544     i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
 545     for k, ((jl, jle), (jh, jhe)) in enumerate(zip(i0, i1)):
 546         print ("itersum    jr", jl, jh,
 547                 "end", bin(jle), bin(jhe))
 548         vec[jl] += vec[jh]
 549         size //= 2
 550         if jle == 0b111: # all loops end
 551             break
 552
 553     print("transform2 result", vec)
 554
 555     return vec
 556
 557
 558 def demo():
 559     # set the dimension sizes here
 560     n = 8
 561     xdim = n
 562     ydim = 0 # not needed
 563     zdim = 0 # again, not needed
 564
 565
 566     ################
 567     # INNER butterfly
 568     ################
 569
 570     # set up an SVSHAPE
 571     class SVSHAPE:
 572         pass
 573     # j schedule
 574     SVSHAPE0 = SVSHAPE()
 575     SVSHAPE0.lims = [xdim, 0b000001, zdim]
 576     SVSHAPE0.submode2 = 0b010
 577     SVSHAPE0.mode = 0b01
 578     SVSHAPE0.skip = 0b00
 579     SVSHAPE0.offset = 0       # experiment with different offset, here
 580     SVSHAPE0.invxyz = [0,0,0] # inversion if desired
 581     # j+halfstep schedule
 582     SVSHAPE1 = SVSHAPE()
 583     SVSHAPE1.lims = [xdim, 0b000001, zdim]
 584     SVSHAPE1.submode2 = 0b010
 585     SVSHAPE1.mode = 0b01
 586     SVSHAPE1.skip = 0b01
 587     SVSHAPE1.offset = 0       # experiment with different offset, here
 588     SVSHAPE1.invxyz = [0,0,0] # inversion if desired
 589
 590     # enumerate over the iterator function, getting new indices
 591     schedule = []
 592     i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
 593     i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
 594     for idx, (jl, jh) in enumerate(zip(i0, i1)):
 595         schedule.append((jl, jh))
 596         if jl[1] == 0b111: # end
 597             break
 598
 599     # ok now pretty-print the results, with some debug output
 600     print ("inner butterfly")
 601     pprint_schedule(schedule, n)
 602     print ("")
 603
 604     ################
 605     # outer butterfly
 606     ################
 607
 608     # j schedule
 609     SVSHAPE0 = SVSHAPE()
 610     SVSHAPE0.lims = [xdim, 0b000010, zdim]
 611     SVSHAPE0.mode = 0b01
 612     SVSHAPE0.submode2 = 0b100
 613     SVSHAPE0.skip = 0b10
 614     SVSHAPE0.offset = 0       # experiment with different offset, here
 615     SVSHAPE0.invxyz = [1,0,0] # inversion if desired
 616     # j+halfstep schedule
 617     SVSHAPE1 = SVSHAPE()
 618     SVSHAPE1.lims = [xdim, 0b000010, zdim]
 619     SVSHAPE1.mode = 0b01
 620     SVSHAPE1.submode2 = 0b100
 621     SVSHAPE1.skip = 0b11
 622     SVSHAPE1.offset = 0       # experiment with different offset, here
 623     SVSHAPE1.invxyz = [1,0,0] # inversion if desired
 624
 625     # enumerate over the iterator function, getting new indices
 626     schedule = []
 627     i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
 628     i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
 629     for idx, (jl, jh) in enumerate(zip(i0, i1)):
 630         schedule.append((jl, jh))
 631         if jl[1] == 0b111: # end
 632             break
 633
 634     # ok now pretty-print the results, with some debug output
 635     print ("outer butterfly")
 636     pprint_schedule_outer(schedule, n)
 637
 638 # run the demo
 639 if __name__ == '__main__':
 640     demo()