src/openpower/decoder/isa/fastdctlee.py

   1 #
   2 # Fast discrete cosine transform algorithms (Python)
   3 #
   4 # Copyright (c) 2020 Project Nayuki. (MIT License)
   5 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
   6 #
   7 # License for original fastdctlee.py by Nayuki:
   8 #
   9 # Permission is hereby granted, free of charge, to any person obtaining
  10 # a copy of this software and associated documentation files (the
  11 # "Software"), to deal in the Software without restriction, including
  12 # without limitation the rights to use, copy, modify, merge, publish,
  13 # distribute, sublicense, and/or sell copies of the Software, and to
  14 # permit persons to whom the Software is furnished to do so, subject to
  15 # the following conditions:
  16 # - The above copyright notice and this permission notice shall be included in
  17 #   all copies or substantial portions of the Software.
  18 # - The Software is provided "as is", without warranty of any kind, express or
  19 #   implied, including but not limited to the warranties of merchantability,
  20 #   fitness for a particular purpose and noninfringement. In no event shall the
  21 #   authors or copyright holders be liable for any claim, damages or other
  22 #   liability, whether in an action of contract, tort or otherwise,
  23 #   arising from, out of or in connection with the Software or the use
  24 #   or other dealings in the Software.
  25 #
  26 #
  27 # Modifications made to create an in-place iterative DCT - SPDX: LGPLv3+
  28 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
  29 #
  30 # The modifications made are firstly to create an iterative schedule,
  31 # rather than the more normal recursive algorithm.  Secondly, the
  32 # two butterflys are also separated out: inner butterfly does COS +/-
  33 # whilst outer butterfly does the iterative summing.
  34 #
  35 # However, to avoid data copying some additional tricks are played:
  36 # - firstly, the data is LOADed in bit-reversed order (which is normally
  37 #   covered by the recursive algorithm due to the odd-even reconstruction)
  38 #   but then to reference the data in the correct order an array of
  39 #   bit-reversed indices is created, as a level of indirection.
  40 #   the data is bit-reversed but so are the indices, making it all A-Ok.
  41 # - secondly, normally in DCT a 2nd target (copy) array is used where
  42 #   the top half is read in reverse order (7 6 5 4) and written out
  43 #   to the target 4 5 6 7.  the plan was to do this in two stages:
  44 #   write in-place in order 4 5 6 7 then swap afterwards (7-4), (6-5).
  45 #
  46 # End result is that once the first butterfly is done - bear in mind
  47 # it's in-place - the data is in the right order so that a second
  48 # dead-straightforward iterative sum can be done: again, in-place.
  49 # Really surprising.
  50
  51 import math
  52 from copy import deepcopy
  53
  54 # bits of the integer 'val'.
  55 def reverse_bits(val, width):
  56     result = 0
  57     for _ in range(width):
  58         result = (result << 1) | (val & 1)
  59         val >>= 1
  60     return result
  61
  62
  63 # DCT type II, unscaled. Algorithm by Byeong Gi Lee, 1984.
  64 # See: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.118.3056&rep=rep1&type=pdf#page=34
  65 # original (recursive) algorithm by Nayuki
  66 def transform(vector, indent=0):
  67     idt = "   " * indent
  68     n = len(vector)
  69     if n == 1:
  70         return list(vector)
  71     elif n == 0 or n % 2 != 0:
  72         raise ValueError()
  73     else:
  74         half = n // 2
  75         alpha = [(vector[i] + vector[-(i + 1)]) for i in range(half)]
  76         beta  = [(vector[i] - vector[-(i + 1)]) /
  77                  (math.cos((i + 0.5) * math.pi / n) * 2.0)
  78                     for i in range(half)]
  79         alpha = transform(alpha)
  80         beta  = transform(beta )
  81         result = []
  82         for i in range(half - 1):
  83             result.append(alpha[i])
  84             result.append(beta[i] + beta[i + 1])
  85         result.append(alpha[-1])
  86         result.append(beta [-1])
  87         return result
  88
  89
  90 # modified (iterative) algorithm by lkcl, based on Nayuki original
  91 def transform(vector, indent=0):
  92     idt = "   " * indent
  93     n = len(vector)
  94     if n == 1:
  95         return list(vector)
  96     elif n == 0 or n % 2 != 0:
  97         raise ValueError()
  98     else:
  99         half = n // 2
 100         alpha = [0] * half
 101         beta = [0] * half
 102         print (idt, "xf", vector)
 103         print (idt, "coeff", n, "->", end=" ")
 104         for i in range(half):
 105             t1, t2 = vector[i], vector[n-i-1]
 106             k = (math.cos((i + 0.5) * math.pi / n) * 2.0)
 107             print (i, n-i-1, "i/n", (i+0.5)/n, ":", k, end= " ")
 108             alpha[i] = t1 + t2
 109             beta[i] = (t1 - t2) * (1/k)
 110         print ()
 111         print (idt, "n", n, "alpha", end=" ")
 112         for i in range(0, n, 2):
 113             print (i, i//2, alpha[i//2], end=" ")
 114         print()
 115         print (idt, "n", n, "beta", end=" ")
 116         for i in range(0, n, 2):
 117             print (i, beta[i//2], end=" ")
 118         print()
 119         alpha = transform(alpha, indent+1)
 120         beta  = transform(beta , indent+1)
 121         result = [0] * n
 122         for i in range(half):
 123             result[i*2] = alpha[i]
 124             result[i*2+1] = beta[i]
 125         print(idt, "merge", result)
 126         for i in range(half - 1):
 127             result[i*2+1] += result[i*2+3]
 128         print(idt, "result", result)
 129         return result
 130
 131 # reverse top half of a list, recursively.  the recursion can be
 132 # applied *after* or *before* the reversal of the top half.  these
 133 # are inverses of each other
 134 def halfrev(l, pre_rev=True):
 135     n = len(l)
 136     if n == 1:
 137         return l
 138     ll, lh = l[:n//2], l[n//2:]
 139     if pre_rev:
 140         ll, lh = halfrev(ll, pre_rev), halfrev(lh, pre_rev)
 141     lh.reverse()
 142     if not pre_rev:
 143         ll, lh = halfrev(ll, pre_rev), halfrev(lh, pre_rev)
 144     return ll + lh
 145
 146
 147 # totally cool *in-place* DCT algorithm
 148 def transform2(vec):
 149
 150     vec = deepcopy(vec)
 151     # Initialization
 152     n = len(vec)
 153     print ()
 154     print ("transform2", n)
 155     levels = n.bit_length() - 1
 156
 157     # reference (read/write) the in-place data in *reverse-bit-order*
 158     ri = list(range(n))
 159     ri = [ri[reverse_bits(i, levels)] for i in range(n)]
 160
 161     # reference list for not needing to do data-swaps, just swap what
 162     # *indices* are referenced (two levels of indirection at the moment)
 163     # pre-reverse the data-swap list so that it *ends up* in the order 0123..
 164     ji = list(range(n))
 165     ji = halfrev(ji, True)
 166
 167     # and pretend we LDed data in half-swapped *and* bit-reversed order as well
 168     # TODO: merge these two
 169     vec = halfrev(vec, False)
 170     vec = [vec[ri[i]] for i in range(n)]
 171
 172     # start the inner butterfly
 173     size = n
 174     while size >= 2:
 175         halfsize = size // 2
 176         tablestep = n // size
 177         ir = list(range(0, n, size))
 178         print ("  xform", size, ir)
 179         for i in ir:
 180             # two lists of half-range indices, e.g. j 0123, jr 7654
 181             j = list(range(i, i + halfsize))
 182             jr = list(range(i+halfsize, i + size))
 183             jr.reverse()
 184             print ("  xform jr", j, jr)
 185             for ci, (jl, jh) in enumerate(zip(j, jr)):
 186                 t1, t2 = vec[ri[ji[jl]]], vec[ri[ji[jh]]]
 187                 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
 188                 # normally DCT would use jl+halfsize not jh, here.
 189                 # to be able to work in-place, the idea is to perform a
 190                 # swap afterwards.
 191                 vec[ri[ji[jl]]] = t1 + t2
 192                 vec[ri[ji[jh]]] = (t1 - t2) * (1/coeff)
 193                 print ("coeff", size, i, "ci", ci,
 194                         "jl", ri[jl], "jh", ri[jh],
 195                        "i/n", (ci+0.5)/size, coeff, vec[ri[ji[jl]]],
 196                                                     vec[ri[ji[jh]]])
 197             # instead of using jl+halfsize, perform a swap here.
 198             # use half of j/jr because actually jl+halfsize = reverse(j)
 199             hz2 = halfsize // 2 # can be zero which stops reversing 1-item lists
 200             for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
 201                 jlh = jl+halfsize
 202                 # swap indices
 203                 tmp1 = ji[jlh]
 204                 tmp2 = ji[jh]
 205                 ji[jlh] = tmp2
 206                 ji[jh] = tmp1
 207                 print ("     swap", size, i, ji[jlh], ji[jh])
 208         size //= 2
 209
 210     print("post-swapped", ri)
 211     print("ji-swapped", ji)
 212
 213     print("transform2 pre-itersum", vec)
 214
 215     # now things are in the right order for the outer butterfly.
 216     n = len(vec)
 217     size = n // 2
 218     while size >= 2:
 219         halfsize = size // 2
 220         ir = list(range(0, halfsize))
 221         print ("itersum", halfsize, size, ir)
 222         for i in ir:
 223             jr = list(range(i+halfsize, i+n-halfsize, size))
 224             print ("itersum    jr", i+halfsize, i+size, jr)
 225             for jh in jr:
 226                 vec[jh] += vec[jh+size]
 227                 print ("    itersum", size, i, jh, jh+size)
 228         size //= 2
 229
 230     print("transform2 result", vec)
 231
 232     return vec
 233
 234
 235 # DCT type III, unscaled. Algorithm by Byeong Gi Lee, 1984.
 236 # See: https://www.nayuki.io/res/fast-discrete-cosine-transform-algorithms/lee-new-algo-discrete-cosine-transform.pdf
 237 def inverse_transform(vector, root=True, indent=0):
 238     idt = "   " * indent
 239     if root:
 240         vector = list(vector)
 241         vector[0] /= 2
 242     n = len(vector)
 243     if n == 1:
 244         return vector, [0]
 245     elif n == 0 or n % 2 != 0:
 246         raise ValueError()
 247     else:
 248         half = n // 2
 249         alpha = [vector[0]]
 250         beta  = [vector[1]]
 251         for i in range(2, n, 2):
 252             alpha.append(vector[i])
 253             beta.append(vector[i - 1] + vector[i + 1])
 254         print (idt, "n", n, "alpha 0", end=" ")
 255         for i in range(2, n, 2):
 256             print (i, end=" ")
 257         print ("beta 1", end=" ")
 258         for i in range(2, n, 2):
 259             print ("%d+%d" % (i-1, i+1), end=" ")
 260         print()
 261         inverse_transform(alpha, False, indent+1)
 262         inverse_transform(beta , False, indent+1)
 263         for i in range(half):
 264             x = alpha[i]
 265             y = beta[i] / (math.cos((i + 0.5) * math.pi / n) * 2)
 266             vector[i] = x + y
 267             vector[-(i + 1)] = x - y
 268             print (idt, " v[%d] = alpha[%d]+beta[%d]" % (i, i, i))
 269             print (idt, " v[%d] = alpha[%d]-beta[%d]" % (n-i-1, i, i))
 270         return vector
 271
 272
 273 def inverse_transform2(vector, root=True):
 274     n = len(vector)
 275     if root:
 276         vector = list(vector)
 277     if n == 1:
 278         return vector
 279     elif n == 0 or n % 2 != 0:
 280         raise ValueError()
 281     else:
 282         half = n // 2
 283         alpha = [0]
 284         beta  = [1]
 285         for i in range(2, n, 2):
 286             alpha.append(i)
 287             beta.append(("add", i - 1, i + 1))
 288         inverse_transform2(alpha, False)
 289         inverse_transform2(beta , False)
 290         for i in range(half):
 291             x = alpha[i]
 292             y = ("cos", beta[i], i)
 293             vector[i] = ("add", x, y)
 294             vector[-(i + 1)] = ("sub", x, y)
 295         return vector
 296
 297
 298 # does the outer butterfly in a recursive fashion, used in an
 299 # intermediary development of the in-place DCT.
 300 def transform_itersum(vector, indent=0):
 301     idt = "   " * indent
 302     n = len(vector)
 303     if n == 1:
 304         return list(vector)
 305     elif n == 0 or n % 2 != 0:
 306         raise ValueError()
 307     else:
 308         half = n // 2
 309         alpha = [0] * half
 310         beta = [0] * half
 311         for i in range(half):
 312             t1, t2 = vector[i], vector[i+half]
 313             alpha[i] = t1
 314             beta[i] = t2
 315         alpha = transform_itersum(alpha, indent+1)
 316         beta  = transform_itersum(beta , indent+1)
 317         result = [0] * n
 318         for i in range(half):
 319             result[i*2] = alpha[i]
 320             result[i*2+1] = beta[i]
 321         print(idt, "iter-merge", result)
 322         for i in range(half - 1):
 323             result[i*2+1] += result[i*2+3]
 324         print(idt, "iter-result", result)
 325         return result
 326
 327
 328 # prints out an "add" schedule for the outer butterfly, recursively,
 329 # matching what transform_itersum does.
 330 def itersum_explore(vector, indent=0):
 331     idt = "   " * indent
 332     n = len(vector)
 333     if n == 1:
 334         return list(vector)
 335     elif n == 0 or n % 2 != 0:
 336         raise ValueError()
 337     else:
 338         half = n // 2
 339         alpha = [0] * half
 340         beta = [0] * half
 341         for i in range(half):
 342             t1, t2 = vector[i], vector[i+half]
 343             alpha[i] = t1
 344             beta[i] = t2
 345         alpha = itersum_explore(alpha, indent+1)
 346         beta  = itersum_explore(beta , indent+1)
 347         result = [0] * n
 348         for i in range(half):
 349             result[i*2] = alpha[i]
 350             result[i*2+1] = beta[i]
 351         print(idt, "iter-merge", result)
 352         for i in range(half - 1):
 353             result[i*2+1] = ("add", result[i*2+1], result[i*2+3])
 354         print(idt, "iter-result", result)
 355         return result
 356
 357
 358 # prints out the exact same outer butterfly but does so iteratively.
 359 # by comparing the output from itersum_explore and itersum_explore2
 360 # and by drawing out the resultant ADDs as a graph it was possible
 361 # to deduce what the heck was going on.
 362 def itersum_explore2(vec, indent=0):
 363     n = len(vec)
 364     size = n // 2
 365     while size >= 2:
 366         halfsize = size // 2
 367         ir = list(range(0, halfsize))
 368         print ("itersum", halfsize, size, ir)
 369         for i in ir:
 370             jr = list(range(i+halfsize, i+n-halfsize, size))
 371             print ("itersum    jr", i+halfsize, i+size, jr)
 372             for jh in jr:
 373                 vec[jh] = ("add", vec[jh], vec[jh+size])
 374                 print ("    itersum", size, i, jh, jh+size)
 375         size //= 2
 376
 377     return vec
 378
 379 if __name__ == '__main__':
 380     n = 16
 381     vec = list(range(n))
 382     levels = n.bit_length() - 1
 383     vec = [vec[reverse_bits(i, levels)] for i in range(n)]
 384     ops = itersum_explore(vec)
 385     for i, x in enumerate(ops):
 386         print (i, x)
 387
 388     n = 16
 389     vec = list(range(n))
 390     levels = n.bit_length() - 1
 391     ops = itersum_explore2(vec)
 392     for i, x in enumerate(ops):
 393         print (i, x)
 394
 395     # halfrev test
 396     vec = list(range(8))
 397     print ("orig vec", vec)
 398     vecr = halfrev(vec, True)
 399     print ("reversed", vecr)
 400     vecrr = halfrev(vecr, False)
 401     assert vec == vecrr
 402     vecrr = halfrev(vec, False)
 403     print ("pre-reversed", vecrr)