From 4dcd159f94217422d7db8b0b3bd7f6ddc881a4c5 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Tue, 20 Jul 2021 21:49:23 +0100 Subject: [PATCH] add iterative list-reversing algorithm, replace recursive variant. actually really simple (to implement in hardware) --- src/openpower/decoder/isa/fastdctlee.py | 56 +++++++++++++++++++++---- 1 file changed, 48 insertions(+), 8 deletions(-) diff --git a/src/openpower/decoder/isa/fastdctlee.py b/src/openpower/decoder/isa/fastdctlee.py index 4aef62e1..898cee43 100644 --- a/src/openpower/decoder/isa/fastdctlee.py +++ b/src/openpower/decoder/isa/fastdctlee.py @@ -1,6 +1,12 @@ # -# Fast discrete cosine transform algorithms (Python) +# Fast discrete cosine transform algorithm (Python) # +# Modifications made to create an in-place iterative DCT: +# Copyright (c) 2021 Luke Kenneth Casson Leighton +# +# License for modifications - SPDX: LGPLv3+ +# +# Original fastdctlee.py by Nayuki: # Copyright (c) 2020 Project Nayuki. (MIT License) # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms # @@ -24,9 +30,6 @@ # or other dealings in the Software. # # -# Modifications made to create an in-place iterative DCT - SPDX: LGPLv3+ -# Copyright (c) 2021 Luke Kenneth Casson Leighton -# # The modifications made are firstly to create an iterative schedule, # rather than the more normal recursive algorithm. Secondly, the # two butterflys are also separated out: inner butterfly does COS +/- @@ -42,6 +45,12 @@ # the top half is read in reverse order (7 6 5 4) and written out # to the target 4 5 6 7. the plan was to do this in two stages: # write in-place in order 4 5 6 7 then swap afterwards (7-4), (6-5). +# however by leaving the data *in-place* and having subsequent +# loops refer to the data *where it now is*, the swap is avoided +# - thirdly, arrange for the data to be *pre-swapped* (in an inverse +# order of how it would have got there, if that makes sense), such +# that *when* it gets swapped, it ends up in the right order. +# given that that will be a LD operation it's no big deal. # # End result is that once the first butterfly is done - bear in mind # it's in-place - the data is in the right order so that a second @@ -130,7 +139,8 @@ def transform(vector, indent=0): # reverse top half of a list, recursively. the recursion can be # applied *after* or *before* the reversal of the top half. these -# are inverses of each other +# are inverses of each other. +# this function is unused except to test the iterative version (halfrev2) def halfrev(l, pre_rev=True): n = len(l) if n == 1: @@ -143,6 +153,26 @@ def halfrev(l, pre_rev=True): ll, lh = halfrev(ll, pre_rev), halfrev(lh, pre_rev) return ll + lh +# iterative version of [recursively-applied] half-rev. +# relies on the list lengths being power-of-two and the fact +# that bit-inversion of a list of binary numbers is the same +# as reversing the order of the list +# this version is dead easy to implement in hardware. +# a big surprise is that the half-reversal can be done with +# such a simple XOR. the inverse operation is slightly trickier +def halfrev2(vec, pre_rev=True): + res = [] + for i in range(len(vec)): + if pre_rev: + res.append(i ^ (i>>1)) + else: + ri = i + bl = i.bit_length() + for ji in range(1, bl): + if (1<