src/openpower/decoder/isa/remap_dct_yield.py

   1 # DCT "REMAP" scheduler
   2 #
   3 # Modifications made to create an in-place iterative DCT:
   4 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
   5 #
   6 # SPDX: LGPLv3+
   7 #
   8 # Original fastdctlee.py by Nayuki:
   9 # Copyright (c) 2020 Project Nayuki. (MIT License)
  10 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
  11
  12 import math
  13
  14 # bits of the integer 'val'.
  15 def reverse_bits(val, width):
  16     result = 0
  17     for _ in range(width):
  18         result = (result << 1) | (val & 1)
  19         val >>= 1
  20     return result
  21
  22
  23 # iterative version of [recursively-applied] half-rev.
  24 # relies on the list lengths being power-of-two and the fact
  25 # that bit-inversion of a list of binary numbers is the same
  26 # as reversing the order of the list
  27 # this version is dead easy to implement in hardware.
  28 # a big surprise is that the half-reversal can be done with
  29 # such a simple XOR. the inverse operation is slightly trickier
  30 def halfrev2(vec, pre_rev=True):
  31     res = []
  32     for i in range(len(vec)):
  33         if pre_rev:
  34             res.append(i ^ (i>>1))
  35         else:
  36             ri = i
  37             bl = i.bit_length()
  38             for ji in range(1, bl):
  39                 ri ^= (i >> ji)
  40             res.append(vec[ri])
  41     return res
  42
  43
  44 # python "yield" can be iterated. use this to make it clear how
  45 # the indices are generated by using natural-looking nested loops
  46 def iterate_dct_inner_butterfly_indices(SVSHAPE):
  47     # get indices to iterate over, in the required order
  48     n = SVSHAPE.lims[0]
  49     # createing lists of indices to iterate over in each dimension
  50     # has to be done dynamically, because it depends on the size
  51     # first, the size-based loop (which can be done statically)
  52     x_r = []
  53     size = 2
  54     while size <= n:
  55         x_r.append(size)
  56         size *= 2
  57     # invert order if requested
  58     if SVSHAPE.invxyz[0]:
  59         x_r.reverse()
  60
  61     if len(x_r) == 0:
  62         return
  63
  64     # reference (read/write) the in-place data in *reverse-bit-order*
  65     ri = list(range(n))
  66     if SVSHAPE.submode2 == 0b01:
  67         levels = n.bit_length() - 1
  68         ri = [ri[reverse_bits(i, levels)] for i in range(n)]
  69
  70     # reference list for not needing to do data-swaps, just swap what
  71     # *indices* are referenced (two levels of indirection at the moment)
  72     # pre-reverse the data-swap list so that it *ends up* in the order 0123..
  73     ji = list(range(n))
  74     inplace_mode = SVSHAPE.submode2 == 0b01
  75     #                     and SVSHAPE.skip not in [0b10, 0b11]
  76     if inplace_mode:
  77         #print ("inplace mode")
  78         ji = halfrev2(ji, True)
  79
  80     #print ("ri", ri)
  81     #print ("ji", ji)
  82
  83     # start an infinite (wrapping) loop
  84     skip = 0
  85     while True:
  86         for size in x_r:           # loop over 3rd order dimension (size)
  87             x_end = size == x_r[-1]
  88             # y_r schedule depends on size
  89             halfsize = size // 2
  90             y_r = []
  91             for i in range(0, n, size):
  92                 y_r.append(i)
  93             # invert if requested
  94             if SVSHAPE.invxyz[1]: y_r.reverse()
  95             for i in y_r:       # loop over 2nd order dimension
  96                 y_end = i == y_r[-1]
  97                 # two lists of half-range indices, e.g. j 0123, jr 7654
  98                 j = list(range(i, i + halfsize))
  99                 jr = list(range(i+halfsize, i + size))
 100                 jr.reverse()
 101                 # invert if requested
 102                 if SVSHAPE.invxyz[2]: j_r.reverse()
 103                 hz2 = halfsize // 2 # zero stops reversing 1-item lists
 104                 # if you *really* want to do the in-place swapping manually,
 105                 # this allows you to do it.  good luck...
 106                 if SVSHAPE.submode2 == 0b01 and not inplace_mode:
 107                     #print ("swap mode")
 108                     jr = j_r[:hz2]
 109                 #print ("xform jr", jr)
 110                 # loop over 1st order dimension
 111                 for ci, (jl, jh) in enumerate(zip(j, jr)):
 112                     z_end = jl == j[-1]
 113                     # now depending on MODE return the index.  inner butterfly
 114                     if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
 115                         result = ri[ji[jl]]        # lower half
 116                     elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
 117                         result = ri[ji[jh]] # upper half, reverse order
 118                     elif SVSHAPE.skip == 0b10: #
 119                         result = ci # coefficient helper
 120                     elif SVSHAPE.skip == 0b11: #
 121                         result = size # coefficient helper
 122                     loopends = (z_end |
 123                                ((y_end and z_end)<<1) |
 124                                 ((y_end and x_end and z_end)<<2))
 125
 126                     yield result + SVSHAPE.offset, loopends
 127
 128                 # now in-place swap
 129                 if inplace_mode:
 130                     for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
 131                         jlh = jl+halfsize
 132                         #print ("inplace swap", jh, jlh)
 133                         tmp1, tmp2 = ji[jlh], ji[jh]
 134                         ji[jlh], ji[jh] = tmp2, tmp1
 135
 136
 137 # python "yield" can be iterated. use this to make it clear how
 138 # the indices are generated by using natural-looking nested loops
 139 def iterate_dct_outer_butterfly_indices(SVSHAPE):
 140     # get indices to iterate over, in the required order
 141     n = SVSHAPE.lims[0]
 142     # createing lists of indices to iterate over in each dimension
 143     # has to be done dynamically, because it depends on the size
 144     # first, the size-based loop (which can be done statically)
 145     x_r = []
 146     size = n // 2
 147     while size >= 2:
 148         x_r.append(size)
 149         size //= 2
 150     # invert order if requested
 151     if SVSHAPE.invxyz[0]:
 152         x_r.reverse()
 153
 154     if len(x_r) == 0:
 155         return
 156
 157     #print ("outer butterfly")
 158
 159     # reference (read/write) the in-place data in *reverse-bit-order*
 160     ri = list(range(n))
 161     if SVSHAPE.submode2 == 0b11:
 162         levels = n.bit_length() - 1
 163         ri = [ri[reverse_bits(i, levels)] for i in range(n)]
 164
 165     # reference list for not needing to do data-swaps, just swap what
 166     # *indices* are referenced (two levels of indirection at the moment)
 167     # pre-reverse the data-swap list so that it *ends up* in the order 0123..
 168     ji = list(range(n))
 169     inplace_mode = False # need the space... SVSHAPE.skip in [0b10, 0b11]
 170     if inplace_mode:
 171         #print ("inplace mode", SVSHAPE.skip)
 172         ji = halfrev2(ji, True)
 173
 174     #print ("ri", ri)
 175     #print ("ji", ji)
 176
 177     # start an infinite (wrapping) loop
 178     skip = 0
 179     while True:
 180         for size in x_r:           # loop over 3rd order dimension (size)
 181             halfsize = size//2
 182             x_end = size == x_r[-1]
 183             y_r = list(range(0, halfsize))
 184             #print ("itersum", halfsize, size, y_r)
 185             # invert if requested
 186             if SVSHAPE.invxyz[1]: y_r.reverse()
 187             for i in y_r:       # loop over 2nd order dimension
 188                 y_end = i == y_r[-1]
 189                 # one list to create iterative-sum schedule
 190                 jr = list(range(i+halfsize, i+n-halfsize, size))
 191                 #print ("itersum     jr", i+halfsize, i+size, jr)
 192                 # invert if requested
 193                 if SVSHAPE.invxyz[2]: j_r.reverse()
 194                 hz2 = halfsize // 2 # zero stops reversing 1-item lists
 195                 for ci, jh in enumerate(jr):   # loop over 1st order dimension
 196                     z_end = jh == jr[-1]
 197                     #print ("     itersum", size, i, jh, jh+size)
 198                     if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
 199                         result = ri[ji[jh]]        # lower half
 200                     elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
 201                         result = ri[ji[jh+size]] # upper half
 202                     elif SVSHAPE.skip == 0b10: #
 203                         result = ci # coefficient helper
 204                     elif SVSHAPE.skip == 0b11: #
 205                         result = size # coefficient helper
 206                     loopends = (z_end |
 207                                ((y_end and z_end)<<1) |
 208                                 ((y_end and x_end and z_end)<<2))
 209
 210                     yield result + SVSHAPE.offset, loopends
 211
 212                 # now in-place swap
 213                 if SVSHAPE.submode2 == 0b11 and inplace_mode:
 214                     j = list(range(i, i + halfsize))
 215                     jr = list(range(i+halfsize, i + size))
 216                     jr.reverse()
 217                     for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
 218                         jlh = jl+halfsize
 219                         #print ("inplace swap", jh, jlh)
 220                         tmp1, tmp2 = ji[jlh], ji[jh]
 221                         ji[jlh], ji[jh] = tmp2, tmp1
 222
 223
 224 def pprint_schedule(schedule, n):
 225     size = 2
 226     idx = 0
 227     while size <= n:
 228         halfsize = size // 2
 229         tablestep = n // size
 230         print ("size %d halfsize %d tablestep %d" % \
 231                 (size, halfsize, tablestep))
 232         for i in range(0, n, size):
 233             prefix = "i %d\t" % i
 234             for j in range(i, i + halfsize):
 235                 (jl, je), (jh, he) = schedule[idx]
 236                 print ("  %-3d\t%s j=%-2d jh=%-2d "
 237                         "j[jl=%-2d] j[jh=%-2d]" % \
 238                                 (idx, prefix, j, j+halfsize,
 239                                       jl, jh,
 240                                 ),
 241                                 "end", bin(je)[2:], bin(je)[2:])
 242                 idx += 1
 243         size *= 2
 244
 245 def pprint_schedule_outer(schedule, n):
 246     size = 2
 247     idx = 0
 248     while size <= n//2:
 249         halfsize = size // 2
 250         tablestep = n // size
 251         print ("size %d halfsize %d tablestep %d" % \
 252                 (size, halfsize, tablestep))
 253         y_r = list(range(0, halfsize))
 254         for i in y_r:
 255             prefix = "i %d\t" % i
 256             jr = list(range(i+halfsize, i+n-halfsize, size))
 257             for j in jr:
 258                 (jl, je), (jh, he) = schedule[idx]
 259                 print ("  %-3d\t%s j=%-2d jh=%-2d "
 260                         "j[jl=%-2d] j[jh=%-2d]" % \
 261                                 (idx, prefix, j, j+halfsize,
 262                                       jl, jh,
 263                                 ),
 264                                 "end", bin(je)[2:], bin(je)[2:])
 265                 idx += 1
 266         size *= 2
 267
 268
 269 # totally cool *in-place* DCT algorithm using yield REMAPs
 270 def transform2(vec):
 271
 272     # Initialization
 273     n = len(vec)
 274     print ()
 275     print ("transform2", n)
 276     levels = n.bit_length() - 1
 277
 278     # reference (read/write) the in-place data in *reverse-bit-order*
 279     ri = list(range(n))
 280     ri = [ri[reverse_bits(i, levels)] for i in range(n)]
 281
 282     # and pretend we LDed data in half-swapped *and* bit-reversed order as well
 283     # TODO: merge these two
 284     vec = halfrev2(vec, False)
 285     vec = [vec[ri[i]] for i in range(n)]
 286
 287     # create a cos table: not strictly necessary but here for illustrative
 288     # purposes, to demonstrate the point that it really *is* iterative.
 289     # this table could be cached and used multiple times rather than
 290     # computed every time.
 291     ctable = []
 292     size = n
 293     while size >= 2:
 294         halfsize = size // 2
 295         for i in range(n//size):
 296             for ci in range(halfsize):
 297                 ctable.append((math.cos((ci + 0.5) * math.pi / size) * 2.0))
 298         size //= 2
 299
 300     ################
 301     # INNER butterfly
 302     ################
 303     xdim = n
 304     ydim = 0
 305     zdim = 0
 306
 307     # set up an SVSHAPE
 308     class SVSHAPE:
 309         pass
 310     # j schedule
 311     SVSHAPE0 = SVSHAPE()
 312     SVSHAPE0.lims = [xdim, ydim, zdim]
 313     SVSHAPE0.mode = 0b01
 314     SVSHAPE0.submode2 = 0b01
 315     SVSHAPE0.skip = 0b00
 316     SVSHAPE0.offset = 0       # experiment with different offset, here
 317     SVSHAPE0.invxyz = [1,0,0] # inversion if desired
 318     # j+halfstep schedule
 319     SVSHAPE1 = SVSHAPE()
 320     SVSHAPE1.lims = [xdim, ydim, zdim]
 321     SVSHAPE1.mode = 0b01
 322     SVSHAPE1.submode2 = 0b01
 323     SVSHAPE1.skip = 0b01
 324     SVSHAPE1.offset = 0       # experiment with different offset, here
 325     SVSHAPE1.invxyz = [1,0,0] # inversion if desired
 326     # ci schedule
 327     SVSHAPE2 = SVSHAPE()
 328     SVSHAPE2.lims = [xdim, ydim, zdim]
 329     SVSHAPE2.mode = 0b01
 330     SVSHAPE2.submode2 = 0b01
 331     SVSHAPE2.skip = 0b10
 332     SVSHAPE2.offset = 0       # experiment with different offset, here
 333     SVSHAPE2.invxyz = [1,0,0] # inversion if desired
 334     # size schedule
 335     SVSHAPE3 = SVSHAPE()
 336     SVSHAPE3.lims = [xdim, ydim, zdim]
 337     SVSHAPE3.mode = 0b01
 338     SVSHAPE3.submode2 = 0b01
 339     SVSHAPE3.skip = 0b11
 340     SVSHAPE3.offset = 0       # experiment with different offset, here
 341     SVSHAPE3.invxyz = [1,0,0] # inversion if desired
 342
 343     # enumerate over the iterator function, getting new indices
 344     i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
 345     i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
 346     i2 = iterate_dct_inner_butterfly_indices(SVSHAPE2)
 347     i3 = iterate_dct_inner_butterfly_indices(SVSHAPE3)
 348     for k, ((jl, jle), (jh, jhe), (ci, cie), (size, sze)) in \
 349                 enumerate(zip(i0, i1, i2, i3)):
 350         t1, t2 = vec[jl], vec[jh]
 351         print ("xform2", jl, jh, ci, size)
 352         coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
 353         assert coeff == ctable[k]
 354         vec[jl] = t1 + t2
 355         vec[jh] = (t1 - t2) * (1/coeff)
 356         print ("coeff", size, i, "ci", ci,
 357                 "jl", jl, "jh", jh,
 358                "i/n", (ci+0.5)/size, coeff, vec[jl],
 359                                             vec[jh],
 360                 "end", bin(jle), bin(jhe))
 361         if jle == 0b111: # all loops end
 362             break
 363
 364     print("transform2 pre-itersum", vec)
 365
 366     # now things are in the right order for the outer butterfly.
 367
 368     # j schedule
 369     SVSHAPE0 = SVSHAPE()
 370     SVSHAPE0.lims = [xdim, ydim, zdim]
 371     SVSHAPE0.submode2 = 0b100
 372     SVSHAPE0.mode = 0b01
 373     SVSHAPE0.skip = 0b00
 374     SVSHAPE0.offset = 0       # experiment with different offset, here
 375     SVSHAPE0.invxyz = [0,0,0] # inversion if desired
 376     # j+halfstep schedule
 377     SVSHAPE1 = SVSHAPE()
 378     SVSHAPE1.lims = [xdim, ydim, zdim]
 379     SVSHAPE1.mode = 0b01
 380     SVSHAPE1.submode2 = 0b100
 381     SVSHAPE1.skip = 0b01
 382     SVSHAPE1.offset = 0       # experiment with different offset, here
 383     SVSHAPE1.invxyz = [0,0,0] # inversion if desired
 384
 385     # enumerate over the iterator function, getting new indices
 386     i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
 387     i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
 388     for k, ((jl, jle), (jh, jhe)) in enumerate(zip(i0, i1)):
 389         print ("itersum    jr", jl, jh,
 390                 "end", bin(jle), bin(jhe))
 391         vec[jl] += vec[jh]
 392         size //= 2
 393         if jle == 0b111: # all loops end
 394             break
 395
 396     print("transform2 result", vec)
 397
 398     return vec
 399
 400
 401 def demo():
 402     # set the dimension sizes here
 403     n = 8
 404     xdim = n
 405     ydim = 0 # not needed
 406     zdim = 0 # again, not needed
 407
 408
 409     ################
 410     # INNER butterfly
 411     ################
 412
 413     # set up an SVSHAPE
 414     class SVSHAPE:
 415         pass
 416     # j schedule
 417     SVSHAPE0 = SVSHAPE()
 418     SVSHAPE0.lims = [xdim, ydim, zdim]
 419     SVSHAPE0.submode2 = 0b010
 420     SVSHAPE0.mode = 0b01
 421     SVSHAPE0.skip = 0b00
 422     SVSHAPE0.offset = 0       # experiment with different offset, here
 423     SVSHAPE0.invxyz = [0,0,0] # inversion if desired
 424     # j+halfstep schedule
 425     SVSHAPE1 = SVSHAPE()
 426     SVSHAPE1.lims = [xdim, ydim, zdim]
 427     SVSHAPE1.submode2 = 0b010
 428     SVSHAPE1.mode = 0b01
 429     SVSHAPE1.skip = 0b01
 430     SVSHAPE1.offset = 0       # experiment with different offset, here
 431     SVSHAPE1.invxyz = [0,0,0] # inversion if desired
 432
 433     # enumerate over the iterator function, getting new indices
 434     schedule = []
 435     i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
 436     i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
 437     for idx, (jl, jh) in enumerate(zip(i0, i1)):
 438         schedule.append((jl, jh))
 439         if jl[1] == 0b111: # end
 440             break
 441
 442     # ok now pretty-print the results, with some debug output
 443     print ("inner butterfly")
 444     pprint_schedule(schedule, n)
 445     print ("")
 446
 447     ################
 448     # outer butterfly
 449     ################
 450
 451     # j schedule
 452     SVSHAPE0 = SVSHAPE()
 453     SVSHAPE0.lims = [xdim, ydim, zdim]
 454     SVSHAPE0.mode = 0b10
 455     SVSHAPE0.submode2 = 0b100
 456     SVSHAPE0.skip = 0b10
 457     SVSHAPE0.offset = 0       # experiment with different offset, here
 458     SVSHAPE0.invxyz = [1,0,0] # inversion if desired
 459     # j+halfstep schedule
 460     SVSHAPE1 = SVSHAPE()
 461     SVSHAPE1.lims = [xdim, ydim, zdim]
 462     SVSHAPE1.mode = 0b10
 463     SVSHAPE1.submode2 = 0b100
 464     SVSHAPE1.skip = 0b11
 465     SVSHAPE1.offset = 0       # experiment with different offset, here
 466     SVSHAPE1.invxyz = [1,0,0] # inversion if desired
 467
 468     # enumerate over the iterator function, getting new indices
 469     schedule = []
 470     i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
 471     i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
 472     for idx, (jl, jh) in enumerate(zip(i0, i1)):
 473         schedule.append((jl, jh))
 474         if jl[1] == 0b111: # end
 475             break
 476
 477     # ok now pretty-print the results, with some debug output
 478     print ("outer butterfly")
 479     pprint_schedule_outer(schedule, n)
 480
 481 # run the demo
 482 if __name__ == '__main__':
 483     demo()