From b36fa1ffff00d521df1d3f901e7f63a7bfe2f23a Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Mon, 19 Jul 2021 16:24:48 +0100 Subject: [PATCH] do in-place swap --- src/openpower/decoder/isa/fastdct-test.py | 3 +-- src/openpower/decoder/isa/fastdctlee.py | 24 ++++++++++------------- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/src/openpower/decoder/isa/fastdct-test.py b/src/openpower/decoder/isa/fastdct-test.py index 872cfb5c..7de2281e 100644 --- a/src/openpower/decoder/isa/fastdct-test.py +++ b/src/openpower/decoder/isa/fastdct-test.py @@ -28,13 +28,12 @@ import fastdctlee, naivedct class FastDctTest(unittest.TestCase): def test_fast_dct_lee_vs_naive(self): - for i in range(3, 4): + for i in range(3, 10): n = 2**i vector = FastDctTest.nonrandom_vector(n) expect = naivedct.transform(vector) original = fastdctlee.transform(vector) actual = fastdctlee.transform2(vector) - actual = original self.assertListAlmostEqual(actual, expect) expect = naivedct.inverse_transform(vector) actual = fastdctlee.inverse_transform(vector) diff --git a/src/openpower/decoder/isa/fastdctlee.py b/src/openpower/decoder/isa/fastdctlee.py index b27caa6c..ef879a89 100644 --- a/src/openpower/decoder/isa/fastdctlee.py +++ b/src/openpower/decoder/isa/fastdctlee.py @@ -24,7 +24,7 @@ # or other dealings in the Software. # # -# Modifications made to in-place iterative DCT - SPDX: LGPLv3+ +# Modifications made to create an in-place iterative DCT - SPDX: LGPLv3+ # Copyright (c) 2021 Luke Kenneth Casson Leighton # # The modifications made are firstly to create an iterative schedule, @@ -42,10 +42,7 @@ # the top half is read in reverse order (7 6 5 4) and written out # to the target 4 5 6 7. the plan was to do this in two stages: # write in-place in order 4 5 6 7 then swap afterwards (7-4), (6-5). -# the insight then was: to modify the *indirection* indices rather -# than swap the actual data, and then try the same trick as was done -# with the bit-reversed LOAD. by a bizarre twist of good fortune -# *that was not needed*! simply swapping the indices was enough! +# # End result is that once the first butterfly is done - bear in mind # it's in-place - the data is in the right order so that a second # dead-straightforward iterative sum can be done: again, in-place. @@ -147,7 +144,7 @@ def transform2(vec): ri = [ri[reverse_bits(i, levels)] for i in range(n)] # and pretend we LDed the data in bit-reversed order as well - vec = [vec[reverse_bits(i, levels)] for i in range(n)] + vec = [vec[ri[i]] for i in range(n)] size = n while size >= 2: @@ -166,7 +163,7 @@ def transform2(vec): coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0) # normally DCT would use jl+halfsize not jh, here. # to be able to work in-place, the idea is to perform a - # swap afterwards. however actually we swap the *indices* + # swap afterwards. vec[ri[jl]] = t1 + t2 vec[ri[jh]] = (t1 - t2) * (1/coeff) print ("coeff", size, i, k, "ci", ci, @@ -175,15 +172,14 @@ def transform2(vec): k += tablestep # instead of using jl+halfsize, perform a swap here. # use half of j/jr because actually jl+halfsize = reverse(j) - # actually: swap the *indices*... not the actual data. - # incredibly... bizarrely... this works *without* having - # to do anything else. hz2 = halfsize // 2 # can be zero which stops reversing 1-item lists for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])): - tmp = ri[jl+halfsize] - ri[jl+halfsize] = ri[jh] - ri[jh] = tmp - print (" swap", size, i, ri[jl+halfsize], ri[jh]) + jlh = jl+halfsize + # swap + tmp = vec[ri[jlh]] + vec[ri[jlh]] = vec[ri[jh]] + vec[ri[jh]] = tmp + print (" swap", size, i, ri[jlh], ri[jh]) size //= 2 print("post-swapped", ri) -- 2.30.2