src/openpower/decoder/isa/remap_dct_yield.py

   1 # DCT "REMAP" scheduler
   2 #
   3 # Modifications made to create an in-place iterative DCT:
   4 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
   5 #
   6 # SPDX: LGPLv3+
   7 #
   8 # Original fastdctlee.py by Nayuki:
   9 # Copyright (c) 2020 Project Nayuki. (MIT License)
  10 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
  11
  12 import math
  13
  14 # bits of the integer 'val'.
  15 def reverse_bits(val, width):
  16     result = 0
  17     for _ in range(width):
  18         result = (result << 1) | (val & 1)
  19         val >>= 1
  20     return result
  21
  22
  23 # iterative version of [recursively-applied] half-rev.
  24 # relies on the list lengths being power-of-two and the fact
  25 # that bit-inversion of a list of binary numbers is the same
  26 # as reversing the order of the list
  27 # this version is dead easy to implement in hardware.
  28 # a big surprise is that the half-reversal can be done with
  29 # such a simple XOR. the inverse operation is slightly trickier
  30 def halfrev2(vec, pre_rev=True):
  31     res = []
  32     for i in range(len(vec)):
  33         if pre_rev:
  34             res.append(i ^ (i>>1))
  35         else:
  36             ri = i
  37             bl = i.bit_length()
  38             for ji in range(1, bl):
  39                 ri ^= (i >> ji)
  40             res.append(vec[ri])
  41     return res
  42
  43
  44 # python "yield" can be iterated. use this to make it clear how
  45 # the indices are generated by using natural-looking nested loops
  46 def iterate_dct_inner_costable_indices(SVSHAPE):
  47     # get indices to iterate over, in the required order
  48     n = SVSHAPE.lims[0]
  49     mode = SVSHAPE.lims[1]
  50     print ("inner costable", mode)
  51     # creating lists of indices to iterate over in each dimension
  52     # has to be done dynamically, because it depends on the size
  53     # first, the size-based loop (which can be done statically)
  54     x_r = []
  55     size = 2
  56     while size <= n:
  57         x_r.append(size)
  58         size *= 2
  59     # invert order if requested
  60     if SVSHAPE.invxyz[0]:
  61         x_r.reverse()
  62
  63     if len(x_r) == 0:
  64         return
  65
  66     #print ("ri", ri)
  67     #print ("ji", ji)
  68
  69     # start an infinite (wrapping) loop
  70     skip = 0
  71     z_end = 1 # doesn't exist in this, only 2 loops
  72     k = 0
  73     while True:
  74         for size in x_r:           # loop over 3rd order dimension (size)
  75             x_end = size == x_r[-1]
  76             # y_r schedule depends on size
  77             halfsize = size // 2
  78             y_r = []
  79             for i in range(0, n, size):
  80                 y_r.append(i)
  81             # invert if requested
  82             if SVSHAPE.invxyz[1]: y_r.reverse()
  83             # two lists of half-range indices, e.g. j 0123, jr 7654
  84             j = list(range(0, halfsize))
  85             # invert if requested
  86             if SVSHAPE.invxyz[2]: j_r.reverse()
  87             #print ("xform jr", jr)
  88             # loop over 1st order dimension
  89             for ci, jl in enumerate(j):
  90                 y_end = jl == j[-1]
  91                 # now depending on MODE return the index.  inner butterfly
  92                 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
  93                     result = k  # offset into COS table
  94                 elif SVSHAPE.skip == 0b10: #
  95                     result = ci # coefficient helper
  96                 elif SVSHAPE.skip == 0b11: #
  97                     result = size # coefficient helper
  98                 loopends = (z_end |
  99                            ((y_end and z_end)<<1) |
 100                             ((y_end and x_end and z_end)<<2))
 101
 102                 yield result + SVSHAPE.offset, loopends
 103                 k += 1
 104
 105 # python "yield" can be iterated. use this to make it clear how
 106 # the indices are generated by using natural-looking nested loops
 107 def iterate_dct_inner_butterfly_indices(SVSHAPE):
 108     # get indices to iterate over, in the required order
 109     n = SVSHAPE.lims[0]
 110     mode = SVSHAPE.lims[1]
 111     #print ("inner butterfly", mode, SVSHAPE.skip)
 112     # creating lists of indices to iterate over in each dimension
 113     # has to be done dynamically, because it depends on the size
 114     # first, the size-based loop (which can be done statically)
 115     x_r = []
 116     size = 2
 117     while size <= n:
 118         x_r.append(size)
 119         size *= 2
 120     # invert order if requested
 121     if SVSHAPE.invxyz[0]:
 122         x_r.reverse()
 123
 124     if len(x_r) == 0:
 125         return
 126
 127     # reference (read/write) the in-place data in *reverse-bit-order*
 128     ri = list(range(n))
 129     if SVSHAPE.submode2 == 0b01:
 130         levels = n.bit_length() - 1
 131         ri = [ri[reverse_bits(i, levels)] for i in range(n)]
 132
 133     # reference list for not needing to do data-swaps, just swap what
 134     # *indices* are referenced (two levels of indirection at the moment)
 135     # pre-reverse the data-swap list so that it *ends up* in the order 0123..
 136     ji = list(range(n))
 137     inplace_mode = SVSHAPE.submode2 == 0b01
 138     #                     and SVSHAPE.skip not in [0b10, 0b11]
 139     if inplace_mode:
 140         #print ("inplace mode")
 141         ji = halfrev2(ji, True)
 142
 143     #print ("ri", ri)
 144     #print ("ji", ji)
 145
 146     # start an infinite (wrapping) loop
 147     skip = 0
 148     k = 0
 149     k_start = 0
 150     while True:
 151         for size in x_r:           # loop over 3rd order dimension (size)
 152             x_end = size == x_r[-1]
 153             # y_r schedule depends on size
 154             halfsize = size // 2
 155             y_r = []
 156             for i in range(0, n, size):
 157                 y_r.append(i)
 158             # invert if requested
 159             if SVSHAPE.invxyz[1]: y_r.reverse()
 160             for i in y_r:       # loop over 2nd order dimension
 161                 y_end = i == y_r[-1]
 162                 # two lists of half-range indices, e.g. j 0123, jr 7654
 163                 j = list(range(i, i + halfsize))
 164                 jr = list(range(i+halfsize, i + size))
 165                 jr.reverse()
 166                 # invert if requested
 167                 if SVSHAPE.invxyz[2]: j_r.reverse()
 168                 hz2 = halfsize // 2 # zero stops reversing 1-item lists
 169                 # if you *really* want to do the in-place swapping manually,
 170                 # this allows you to do it.  good luck...
 171                 if SVSHAPE.submode2 == 0b01 and not inplace_mode:
 172                     #print ("swap mode")
 173                     jr = j_r[:hz2]
 174                 #print ("xform jr", jr)
 175                 # loop over 1st order dimension
 176                 k = k_start
 177                 for ci, (jl, jh) in enumerate(zip(j, jr)):
 178                     z_end = jl == j[-1]
 179                     # now depending on MODE return the index.  inner butterfly
 180                     if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
 181                         result = ri[ji[jl]]        # lower half
 182                     elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
 183                         result = ri[ji[jh]] # upper half
 184                     elif mode == 4:
 185                         # COS table pre-generated mode
 186                         if SVSHAPE.skip == 0b10: #
 187                             result = k # cos table offset
 188                     else: # mode 2
 189                         # COS table generated on-demand ("Vertical-First") mode
 190                         if SVSHAPE.skip == 0b10: #
 191                             result = ci # coefficient helper
 192                         elif SVSHAPE.skip == 0b11: #
 193                             result = size # coefficient helper
 194                     loopends = (z_end |
 195                                ((y_end and z_end)<<1) |
 196                                 ((y_end and x_end and z_end)<<2))
 197
 198                     yield result + SVSHAPE.offset, loopends
 199                     k += 1
 200
 201                 # now in-place swap
 202                 if inplace_mode:
 203                     for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
 204                         jlh = jl+halfsize
 205                         #print ("inplace swap", jh, jlh)
 206                         tmp1, tmp2 = ji[jlh], ji[jh]
 207                         ji[jlh], ji[jh] = tmp2, tmp1
 208
 209             # new k_start point for cos tables( runs inside x_r loop NOT i loop)
 210             k_start += halfsize
 211
 212
 213 # python "yield" can be iterated. use this to make it clear how
 214 # the indices are generated by using natural-looking nested loops
 215 def iterate_dct_outer_butterfly_indices(SVSHAPE):
 216     # get indices to iterate over, in the required order
 217     n = SVSHAPE.lims[0]
 218     mode = SVSHAPE.lims[1]
 219     # createing lists of indices to iterate over in each dimension
 220     # has to be done dynamically, because it depends on the size
 221     # first, the size-based loop (which can be done statically)
 222     x_r = []
 223     size = n // 2
 224     while size >= 2:
 225         x_r.append(size)
 226         size //= 2
 227     # invert order if requested
 228     if SVSHAPE.invxyz[0]:
 229         x_r.reverse()
 230
 231     if len(x_r) == 0:
 232         return
 233
 234     #print ("outer butterfly")
 235
 236     # reference (read/write) the in-place data in *reverse-bit-order*
 237     ri = list(range(n))
 238     if SVSHAPE.submode2 == 0b11:
 239         levels = n.bit_length() - 1
 240         ri = [ri[reverse_bits(i, levels)] for i in range(n)]
 241
 242     # reference list for not needing to do data-swaps, just swap what
 243     # *indices* are referenced (two levels of indirection at the moment)
 244     # pre-reverse the data-swap list so that it *ends up* in the order 0123..
 245     ji = list(range(n))
 246     inplace_mode = False # need the space... SVSHAPE.skip in [0b10, 0b11]
 247     if inplace_mode:
 248         #print ("inplace mode", SVSHAPE.skip)
 249         ji = halfrev2(ji, True)
 250
 251     #print ("ri", ri)
 252     #print ("ji", ji)
 253
 254     # start an infinite (wrapping) loop
 255     skip = 0
 256     k = 0
 257     k_start = 0
 258     while True:
 259         for size in x_r:           # loop over 3rd order dimension (size)
 260             halfsize = size//2
 261             x_end = size == x_r[-1]
 262             y_r = list(range(0, halfsize))
 263             #print ("itersum", halfsize, size, y_r)
 264             # invert if requested
 265             if SVSHAPE.invxyz[1]: y_r.reverse()
 266             for i in y_r:       # loop over 2nd order dimension
 267                 y_end = i == y_r[-1]
 268                 # one list to create iterative-sum schedule
 269                 jr = list(range(i+halfsize, i+n-halfsize, size))
 270                 #print ("itersum     jr", i+halfsize, i+size, jr)
 271                 # invert if requested
 272                 if SVSHAPE.invxyz[2]: j_r.reverse()
 273                 hz2 = halfsize // 2 # zero stops reversing 1-item lists
 274                 k = k_start
 275                 for ci, jh in enumerate(jr):   # loop over 1st order dimension
 276                     z_end = jh == jr[-1]
 277                     #print ("     itersum", size, i, jh, jh+size)
 278                     if mode == 4:
 279                         # COS table pre-generated mode
 280                         if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
 281                             result = ri[ji[jh]]        # lower half
 282                         elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
 283                             result = ri[ji[jh+size]] # upper half
 284                         elif SVSHAPE.skip == 0b10: #
 285                             result = k # cos table offset
 286                     else:
 287                         # COS table generated on-demand ("Vertical-First") mode
 288                         if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
 289                             result = ri[ji[jh]]        # lower half
 290                         elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
 291                             result = ri[ji[jh+size]] # upper half
 292                         elif SVSHAPE.skip == 0b10: #
 293                             result = ci # coefficient helper
 294                         elif SVSHAPE.skip == 0b11: #
 295                             result = size # coefficient helper
 296                     loopends = (z_end |
 297                                ((y_end and z_end)<<1) |
 298                                 ((y_end and x_end and z_end)<<2))
 299
 300                     yield result + SVSHAPE.offset, loopends
 301                     k += 1
 302
 303                 # now in-place swap
 304                 if SVSHAPE.submode2 == 0b11 and inplace_mode:
 305                     j = list(range(i, i + halfsize))
 306                     jr = list(range(i+halfsize, i + size))
 307                     jr.reverse()
 308                     for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
 309                         jlh = jl+halfsize
 310                         #print ("inplace swap", jh, jlh)
 311                         tmp1, tmp2 = ji[jlh], ji[jh]
 312                         ji[jlh], ji[jh] = tmp2, tmp1
 313
 314             # new k_start point for cos tables( runs inside x_r loop NOT i loop)
 315             k_start += halfsize
 316
 317
 318 def pprint_schedule(schedule, n):
 319     size = 2
 320     idx = 0
 321     while size <= n:
 322         halfsize = size // 2
 323         tablestep = n // size
 324         print ("size %d halfsize %d tablestep %d" % \
 325                 (size, halfsize, tablestep))
 326         for i in range(0, n, size):
 327             prefix = "i %d\t" % i
 328             for j in range(i, i + halfsize):
 329                 (jl, je), (jh, he) = schedule[idx]
 330                 print ("  %-3d\t%s j=%-2d jh=%-2d "
 331                         "j[jl=%-2d] j[jh=%-2d]" % \
 332                                 (idx, prefix, j, j+halfsize,
 333                                       jl, jh,
 334                                 ),
 335                                 "end", bin(je)[2:], bin(je)[2:])
 336                 idx += 1
 337         size *= 2
 338
 339 def pprint_schedule_outer(schedule, n):
 340     size = 2
 341     idx = 0
 342     while size <= n//2:
 343         halfsize = size // 2
 344         tablestep = n // size
 345         print ("size %d halfsize %d tablestep %d" % \
 346                 (size, halfsize, tablestep))
 347         y_r = list(range(0, halfsize))
 348         for i in y_r:
 349             prefix = "i %d\t" % i
 350             jr = list(range(i+halfsize, i+n-halfsize, size))
 351             for j in jr:
 352                 (jl, je), (jh, he) = schedule[idx]
 353                 print ("  %-3d\t%s j=%-2d jh=%-2d "
 354                         "j[jl=%-2d] j[jh=%-2d]" % \
 355                                 (idx, prefix, j, j+halfsize,
 356                                       jl, jh,
 357                                 ),
 358                                 "end", bin(je)[2:], bin(je)[2:])
 359                 idx += 1
 360         size *= 2
 361
 362
 363 # totally cool *in-place* DCT algorithm using yield REMAPs
 364 def transform2(vec):
 365
 366     # Initialization
 367     n = len(vec)
 368     print ()
 369     print ("transform2", n)
 370     levels = n.bit_length() - 1
 371
 372     # set up dims
 373     xdim = n
 374
 375     # reference (read/write) the in-place data in *reverse-bit-order*
 376     ri = list(range(n))
 377     ri = [ri[reverse_bits(i, levels)] for i in range(n)]
 378
 379     # and pretend we LDed data in half-swapped *and* bit-reversed order as well
 380     # TODO: merge these two
 381     vec = halfrev2(vec, False)
 382     vec = [vec[ri[i]] for i in range(n)]
 383
 384     # create a cos table: not strictly necessary but here for illustrative
 385     # purposes, to demonstrate the point that it really *is* iterative.
 386     # this table could be cached and used multiple times rather than
 387     # computed every time.
 388     ctable = []
 389     size = n
 390     while size >= 2:
 391         halfsize = size // 2
 392         for ci in range(halfsize):
 393             coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
 394             ctable.append(coeff)
 395             print ("coeff", size,  "ci", ci, "k", len(ctable)-1,
 396                    "i/n", (ci+0.5)/size, coeff)
 397         size //= 2
 398
 399     # set up an SVSHAPE
 400     class SVSHAPE:
 401         pass
 402     # ci schedule
 403     SVSHAPE0 = SVSHAPE()
 404     SVSHAPE0.lims = [xdim, 4, 0]
 405     SVSHAPE0.mode = 0b01
 406     SVSHAPE0.submode2 = 0b01
 407     SVSHAPE0.skip = 0b10
 408     SVSHAPE0.offset = 0       # experiment with different offset, here
 409     SVSHAPE0.invxyz = [1,0,0] # inversion if desired
 410     # size schedule
 411     SVSHAPE1 = SVSHAPE()
 412     SVSHAPE1.lims = [xdim, 4, 0]
 413     SVSHAPE1.mode = 0b01
 414     SVSHAPE1.submode2 = 0b01
 415     SVSHAPE1.skip = 0b11
 416     SVSHAPE1.offset = 0       # experiment with different offset, here
 417     SVSHAPE1.invxyz = [1,0,0] # inversion if desired
 418     # k schedule
 419     SVSHAPE2 = SVSHAPE()
 420     SVSHAPE2.lims = [xdim, 4, 0]
 421     SVSHAPE2.mode = 0b01
 422     SVSHAPE2.submode2 = 0b01
 423     SVSHAPE2.skip = 0b00
 424     SVSHAPE2.offset = 0       # experiment with different offset, here
 425     SVSHAPE2.invxyz = [1,0,0] # inversion if desired
 426
 427     # enumerate over the iterator function, getting new indices
 428     i0 = iterate_dct_inner_costable_indices(SVSHAPE0)
 429     i1 = iterate_dct_inner_costable_indices(SVSHAPE1)
 430     i2 = iterate_dct_inner_costable_indices(SVSHAPE2)
 431     for ((ci, cie), (size, sze), (k, ke)) in \
 432                 zip(i0, i1, i2):
 433         print ("xform2 cos", ci, size, k)
 434         coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
 435         assert coeff == ctable[k]
 436         print ("coeff", size,  "ci", ci, "k", k,
 437                "i/n", (ci+0.5)/size, coeff,
 438                 "end", bin(cie), bin(sze), bin(ke))
 439         if cie == 0b111: # all loops end
 440             break
 441
 442     ################
 443     # INNER butterfly
 444     ################
 445
 446     # j schedule
 447     SVSHAPE0 = SVSHAPE()
 448     SVSHAPE0.lims = [xdim, 0b000001, 0]
 449     SVSHAPE0.mode = 0b01
 450     SVSHAPE0.submode2 = 0b01
 451     SVSHAPE0.skip = 0b00
 452     SVSHAPE0.offset = 0       # experiment with different offset, here
 453     SVSHAPE0.invxyz = [1,0,0] # inversion if desired
 454     # j+halfstep schedule
 455     SVSHAPE1 = SVSHAPE()
 456     SVSHAPE1.lims = [xdim, 0b000001, 0]
 457     SVSHAPE1.mode = 0b01
 458     SVSHAPE1.submode2 = 0b01
 459     SVSHAPE1.skip = 0b01
 460     SVSHAPE1.offset = 0       # experiment with different offset, here
 461     SVSHAPE1.invxyz = [1,0,0] # inversion if desired
 462     # ci schedule
 463     SVSHAPE2 = SVSHAPE()
 464     SVSHAPE2.lims = [xdim, 0b000001, 0]
 465     SVSHAPE2.mode = 0b01
 466     SVSHAPE2.submode2 = 0b01
 467     SVSHAPE2.skip = 0b10
 468     SVSHAPE2.offset = 0       # experiment with different offset, here
 469     SVSHAPE2.invxyz = [1,0,0] # inversion if desired
 470     # size schedule
 471     SVSHAPE3 = SVSHAPE()
 472     SVSHAPE3.lims = [xdim, 0b000001, 0]
 473     SVSHAPE3.mode = 0b01
 474     SVSHAPE3.submode2 = 0b01
 475     SVSHAPE3.skip = 0b11
 476     SVSHAPE3.offset = 0       # experiment with different offset, here
 477     SVSHAPE3.invxyz = [1,0,0] # inversion if desired
 478
 479     # enumerate over the iterator function, getting new indices
 480     i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
 481     i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
 482     i2 = iterate_dct_inner_butterfly_indices(SVSHAPE2)
 483     i3 = iterate_dct_inner_butterfly_indices(SVSHAPE3)
 484     for k, ((jl, jle), (jh, jhe), (ci, cie), (size, sze)) in \
 485                 enumerate(zip(i0, i1, i2, i3)):
 486         t1, t2 = vec[jl], vec[jh]
 487         print ("xform2", jl, jh, ci, size)
 488         coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
 489         #assert coeff == ctable[k]
 490         vec[jl] = t1 + t2
 491         vec[jh] = (t1 - t2) * (1/coeff)
 492         print ("coeff", size, "ci", ci,
 493                 "jl", jl, "jh", jh,
 494                "i/n", (ci+0.5)/size, coeff, vec[jl],
 495                                             vec[jh],
 496                 "end", bin(jle), bin(jhe))
 497         if jle == 0b111: # all loops end
 498             break
 499
 500     print("transform2 pre-itersum", vec)
 501
 502     # now things are in the right order for the outer butterfly.
 503
 504     # j schedule
 505     SVSHAPE0 = SVSHAPE()
 506     SVSHAPE0.lims = [xdim, 0b0000010, 0]
 507     SVSHAPE0.submode2 = 0b100
 508     SVSHAPE0.mode = 0b01
 509     SVSHAPE0.skip = 0b00
 510     SVSHAPE0.offset = 0       # experiment with different offset, here
 511     SVSHAPE0.invxyz = [0,0,0] # inversion if desired
 512     # j+halfstep schedule
 513     SVSHAPE1 = SVSHAPE()
 514     SVSHAPE1.lims = [xdim, 0b0000010, 0]
 515     SVSHAPE1.mode = 0b01
 516     SVSHAPE1.submode2 = 0b100
 517     SVSHAPE1.skip = 0b01
 518     SVSHAPE1.offset = 0       # experiment with different offset, here
 519     SVSHAPE1.invxyz = [0,0,0] # inversion if desired
 520
 521     # enumerate over the iterator function, getting new indices
 522     i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
 523     i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
 524     for k, ((jl, jle), (jh, jhe)) in enumerate(zip(i0, i1)):
 525         print ("itersum    jr", jl, jh,
 526                 "end", bin(jle), bin(jhe))
 527         vec[jl] += vec[jh]
 528         size //= 2
 529         if jle == 0b111: # all loops end
 530             break
 531
 532     print("transform2 result", vec)
 533
 534     return vec
 535
 536
 537 def demo():
 538     # set the dimension sizes here
 539     n = 8
 540     xdim = n
 541     ydim = 0 # not needed
 542     zdim = 0 # again, not needed
 543
 544
 545     ################
 546     # INNER butterfly
 547     ################
 548
 549     # set up an SVSHAPE
 550     class SVSHAPE:
 551         pass
 552     # j schedule
 553     SVSHAPE0 = SVSHAPE()
 554     SVSHAPE0.lims = [xdim, 0b000001, zdim]
 555     SVSHAPE0.submode2 = 0b010
 556     SVSHAPE0.mode = 0b01
 557     SVSHAPE0.skip = 0b00
 558     SVSHAPE0.offset = 0       # experiment with different offset, here
 559     SVSHAPE0.invxyz = [0,0,0] # inversion if desired
 560     # j+halfstep schedule
 561     SVSHAPE1 = SVSHAPE()
 562     SVSHAPE1.lims = [xdim, 0b000001, zdim]
 563     SVSHAPE1.submode2 = 0b010
 564     SVSHAPE1.mode = 0b01
 565     SVSHAPE1.skip = 0b01
 566     SVSHAPE1.offset = 0       # experiment with different offset, here
 567     SVSHAPE1.invxyz = [0,0,0] # inversion if desired
 568
 569     # enumerate over the iterator function, getting new indices
 570     schedule = []
 571     i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
 572     i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
 573     for idx, (jl, jh) in enumerate(zip(i0, i1)):
 574         schedule.append((jl, jh))
 575         if jl[1] == 0b111: # end
 576             break
 577
 578     # ok now pretty-print the results, with some debug output
 579     print ("inner butterfly")
 580     pprint_schedule(schedule, n)
 581     print ("")
 582
 583     ################
 584     # outer butterfly
 585     ################
 586
 587     # j schedule
 588     SVSHAPE0 = SVSHAPE()
 589     SVSHAPE0.lims = [xdim, 0b000010, zdim]
 590     SVSHAPE0.mode = 0b01
 591     SVSHAPE0.submode2 = 0b100
 592     SVSHAPE0.skip = 0b10
 593     SVSHAPE0.offset = 0       # experiment with different offset, here
 594     SVSHAPE0.invxyz = [1,0,0] # inversion if desired
 595     # j+halfstep schedule
 596     SVSHAPE1 = SVSHAPE()
 597     SVSHAPE1.lims = [xdim, 0b000010, zdim]
 598     SVSHAPE1.mode = 0b01
 599     SVSHAPE1.submode2 = 0b100
 600     SVSHAPE1.skip = 0b11
 601     SVSHAPE1.offset = 0       # experiment with different offset, here
 602     SVSHAPE1.invxyz = [1,0,0] # inversion if desired
 603
 604     # enumerate over the iterator function, getting new indices
 605     schedule = []
 606     i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
 607     i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
 608     for idx, (jl, jh) in enumerate(zip(i0, i1)):
 609         schedule.append((jl, jh))
 610         if jl[1] == 0b111: # end
 611             break
 612
 613     # ok now pretty-print the results, with some debug output
 614     print ("outer butterfly")
 615     pprint_schedule_outer(schedule, n)
 616
 617 # run the demo
 618 if __name__ == '__main__':
 619     demo()