From eba460bcfdec209ba46f30aecfd8dfee193541df Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Thu, 22 Jul 2021 15:12:43 +0100 Subject: [PATCH] add REMAP DCT yield schedule function, TODO --- src/openpower/decoder/isa/remap_dct_yield.py | 174 +++++++++++++++++++ 1 file changed, 174 insertions(+) create mode 100644 src/openpower/decoder/isa/remap_dct_yield.py diff --git a/src/openpower/decoder/isa/remap_dct_yield.py b/src/openpower/decoder/isa/remap_dct_yield.py new file mode 100644 index 00000000..eaaeaf89 --- /dev/null +++ b/src/openpower/decoder/isa/remap_dct_yield.py @@ -0,0 +1,174 @@ +# DCT "REMAP" scheduler +# +# Modifications made to create an in-place iterative DCT: +# Copyright (c) 2021 Luke Kenneth Casson Leighton +# +# SPDX: LGPLv3+ +# +# Original fastdctlee.py by Nayuki: +# Copyright (c) 2020 Project Nayuki. (MIT License) +# https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms + +import math + +# bits of the integer 'val'. +def reverse_bits(val, width): + result = 0 + for _ in range(width): + result = (result << 1) | (val & 1) + val >>= 1 + return result + + +# iterative version of [recursively-applied] half-rev. +# relies on the list lengths being power-of-two and the fact +# that bit-inversion of a list of binary numbers is the same +# as reversing the order of the list +# this version is dead easy to implement in hardware. +# a big surprise is that the half-reversal can be done with +# such a simple XOR. the inverse operation is slightly trickier +def halfrev2(vec, pre_rev=True): + res = [] + for i in range(len(vec)): + if pre_rev: + res.append(i ^ (i>>1)) + else: + ri = i + bl = i.bit_length() + for ji in range(1, bl): + if (1<= 2: + halfsize = size // 2 + tablestep = n // size + ir = list(range(0, n, size)) + for i in ir: + # two lists of half-range indices, e.g. j 0123, jr 7654 + j = list(range(i, i + halfsize)) + jr = list(range(i+halfsize, i + size)) + jr.reverse() + for ci, (jl, jh) in enumerate(zip(j, jr)): + vec[ri[ji[jl]]] = t1 + t2 + vec[ri[ji[jh]]] = (t1 - t2) * (1/coeff) + hz2 = halfsize // 2 # can be zero which stops reversing 1-item lists + for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])): + jlh = jl+halfsize + tmp1, tmp2 = ji[jlh], ji[jh] + ji[jlh], ji[jh] = tmp2, tmp1 + size //= 2 + +def dct_outer_butterfly(SVSHAPE): + n = len(vec) + size = n // 2 + while size >= 2: + halfsize = size // 2 + ir = list(range(0, halfsize)) + print ("itersum", halfsize, size, ir) + for i in ir: + jr = list(range(i+halfsize, i+n-halfsize, size)) + print ("itersum jr", i+halfsize, i+size, jr) + for jh in jr: + vec[jh] += vec[jh+size] + print (" itersum", size, i, jh, jh+size) + size //= 2 + + return vec + -- 2.30.2