From d70aa28e0fdfbc5098d14e14ebca62c52e0615e2 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Thu, 29 Jul 2021 23:11:59 +0100 Subject: [PATCH] random experimentation landed eventually on a "pass" of iDCT with in-place half-swapping --- src/openpower/decoder/isa/fastdct-test.py | 2 +- src/openpower/decoder/isa/fastdctlee.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/openpower/decoder/isa/fastdct-test.py b/src/openpower/decoder/isa/fastdct-test.py index 72414d5c..c3aacd6b 100644 --- a/src/openpower/decoder/isa/fastdct-test.py +++ b/src/openpower/decoder/isa/fastdct-test.py @@ -28,7 +28,7 @@ import fastdctlee, naivedct, remap_dct_yield class FastDctTest(unittest.TestCase): def test_fast_dct_lee_vs_naive(self): - for i in range(3, 4): + for i in range(3, 10): n = 2**i vector = FastDctTest.nonrandom_vector(n) expect = naivedct.transform(vector) diff --git a/src/openpower/decoder/isa/fastdctlee.py b/src/openpower/decoder/isa/fastdctlee.py index 16de9cb7..c558aec6 100644 --- a/src/openpower/decoder/isa/fastdctlee.py +++ b/src/openpower/decoder/isa/fastdctlee.py @@ -359,8 +359,8 @@ def inverse_transform_iter(vec): vec[0] /= 2.0 print("transform2-inv pre-itersum", vec) - #vec = halfrev2(vec, True) vec = [vec[ri[i]] for i in range(n)] + vec = halfrev2(vec, True) #print("transform2-inv post-itersum-reorder", vec) # first the outer butterfly (iterative sum thing) @@ -375,9 +375,9 @@ def inverse_transform_iter(vec): jr.reverse() print ("itersum jr", i+halfsize, i+size, jr) for jh in jr: - x = vec[ri[jh]] - y = vec[ri[jh+size]] - vec[ri[jh+size]] = x + y + x = vec[ji[ri[jh]]] + y = vec[ji[ri[jh+size]]] + vec[ji[ri[jh+size]]] = x + y print (" itersum", size, i, jh, jh+size, x, y, "jh+sz", vec[ji[jh+size]]) size *= 2 @@ -386,7 +386,7 @@ def inverse_transform_iter(vec): # and pretend we LDed data in half-swapped *and* bit-reversed order as well # TODO: merge these two - vec = halfrev2(vec, True) + #vec = halfrev2(vec, True) ri = list(range(n)) print("transform2-inv post-reorder", vec) -- 2.30.2