src/openpower/decoder/isa/remap_dct_yield.py

   1 # DCT "REMAP" scheduler
   2 #
   3 # Modifications made to create an in-place iterative DCT:
   4 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
   5 #
   6 # SPDX: LGPLv3+
   7 #
   8 # Original fastdctlee.py by Nayuki:
   9 # Copyright (c) 2020 Project Nayuki. (MIT License)
  10 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
  11
  12 import math
  13
  14 # bits of the integer 'val'.
  15 def reverse_bits(val, width):
  16     result = 0
  17     for _ in range(width):
  18         result = (result << 1) | (val & 1)
  19         val >>= 1
  20     return result
  21
  22
  23 # iterative version of [recursively-applied] half-rev.
  24 # relies on the list lengths being power-of-two and the fact
  25 # that bit-inversion of a list of binary numbers is the same
  26 # as reversing the order of the list
  27 # this version is dead easy to implement in hardware.
  28 # a big surprise is that the half-reversal can be done with
  29 # such a simple XOR. the inverse operation is slightly trickier
  30 def halfrev2(vec, pre_rev=True):
  31     res = []
  32     for i in range(len(vec)):
  33         if pre_rev:
  34             res.append(i ^ (i>>1))
  35         else:
  36             ri = i
  37             bl = i.bit_length()
  38             for ji in range(1, bl):
  39                 if (1<<ji) & i:
  40                     ri ^= ((1<<ji)-1)
  41             res.append(vec[ri])
  42     return res
  43
  44
  45 # python "yield" can be iterated. use this to make it clear how
  46 # the indices are generated by using natural-looking nested loops
  47 def iterate_dct_inner_butterfly_indices(SVSHAPE):
  48     # get indices to iterate over, in the required order
  49     n = SVSHAPE.lims[0]
  50     # createing lists of indices to iterate over in each dimension
  51     # has to be done dynamically, because it depends on the size
  52     # first, the size-based loop (which can be done statically)
  53     x_r = []
  54     size = 2
  55     while size <= n:
  56         x_r.append(size)
  57         size *= 2
  58     # invert order if requested
  59     if SVSHAPE.invxyz[0]: x_r.reverse()
  60
  61     if len(x_r) == 0:
  62         return
  63
  64     # reference (read/write) the in-place data in *reverse-bit-order*
  65     ri = list(range(n))
  66     ri = [ri[reverse_bits(i, levels)] for i in range(n)]
  67
  68     # reference list for not needing to do data-swaps, just swap what
  69     # *indices* are referenced (two levels of indirection at the moment)
  70     # pre-reverse the data-swap list so that it *ends up* in the order 0123..
  71     ji = list(range(n))
  72     inplace_mode = SVSHAPE.skip not in [0b10, 0b11]
  73     if inplace_mode:
  74         ji = halfrev2(ji, True)
  75
  76     # start an infinite (wrapping) loop
  77     skip = 0
  78     while True:
  79         for size in x_r:           # loop over 3rd order dimension (size)
  80             x_end = size == x_r[-1]
  81             # y_r schedule depends on size
  82             halfsize = size // 2
  83             y_r = []
  84             for i in range(0, n, size):
  85                 y_r.append(i)
  86             # invert if requested
  87             if SVSHAPE.invxyz[1]: y_r.reverse()
  88             for i in y_r:       # loop over 2nd order dimension
  89                 y_end = i == y_r[-1]
  90                 k_r = []
  91                 j_r = []
  92                 k = 0
  93                 for j in range(i, i+halfsize):
  94                     k_r.append(k)
  95                     j_r.append(j)
  96                 # invert if requested
  97                 if SVSHAPE.invxyz[2]: k_r.reverse()
  98                 if SVSHAPE.invxyz[2]: j_r.reverse()
  99                 hz2 = halfsize // 2 # zero stops reversing 1-item lists
 100                 # if you *really* want to do the in-place swapping manually,
 101                 # this allows you to do it.  good luck...
 102                 if not inplace_mode:
 103                     jr = j_r[:hz2]
 104                 for j in j_r:   # loop over 1st order dimension
 105                     z_end = j == j_r[-1]
 106                     # now depending on MODE return the index
 107                     if SVSHAPE.skip in [0b00, 0b10]:
 108                         result = ri[ji[j]]        # lower half
 109                     elif SVSHAPE.skip in [0b01, 0b11]:
 110                         result = ri[ji[size-j-1]] # upper half, reverse order
 111                     loopends = (z_end |
 112                                ((y_end and z_end)<<1) |
 113                                 ((y_end and x_end and z_end)<<2))
 114
 115                     yield result + SVSHAPE.offset, loopends
 116
 117                 # now in-place swap
 118                 if inplace_mode:
 119                     for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
 120                         jlh = jl+halfsize
 121                         tmp1, tmp2 = ji[jlh], ji[jh]
 122                         ji[jlh], ji[jh] = tmp2, tmp1
 123
 124
 125 # totally cool *in-place* DCT algorithm
 126 def transform2(vec):
 127
 128     # Initialization
 129     n = len(vec)
 130     levels = n.bit_length() - 1
 131
 132     # and pretend we LDed data in half-swapped *and* bit-reversed order as well
 133     # TODO: merge these two
 134     vec = halfrev2(vec, False)
 135     vec = [vec[ri[i]] for i in range(n)]
 136
 137     # start the inner butterfly
 138     size = n
 139     while size >= 2:
 140         halfsize = size // 2
 141         tablestep = n // size
 142         ir = list(range(0, n, size))
 143         for i in ir:
 144             # two lists of half-range indices, e.g. j 0123, jr 7654
 145             j = list(range(i, i + halfsize))
 146             jr = list(range(i+halfsize, i + size))
 147             jr.reverse()
 148             for ci, (jl, jh) in enumerate(zip(j, jr)):
 149                 vec[ri[ji[jl]]] = t1 + t2
 150                 vec[ri[ji[jh]]] = (t1 - t2) * (1/coeff)
 151             hz2 = halfsize // 2 # can be zero which stops reversing 1-item lists
 152             for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
 153                 jlh = jl+halfsize
 154                 tmp1, tmp2 = ji[jlh], ji[jh]
 155                 ji[jlh], ji[jh] = tmp2, tmp1
 156         size //= 2
 157
 158 def dct_outer_butterfly(SVSHAPE):
 159     n = len(vec)
 160     size = n // 2
 161     while size >= 2:
 162         halfsize = size // 2
 163         ir = list(range(0, halfsize))
 164         print ("itersum", halfsize, size, ir)
 165         for i in ir:
 166             jr = list(range(i+halfsize, i+n-halfsize, size))
 167             print ("itersum    jr", i+halfsize, i+size, jr)
 168             for jh in jr:
 169                 vec[jh] += vec[jh+size]
 170                 print ("    itersum", size, i, jh, jh+size)
 171         size //= 2
 172
 173     return vec
 174