From 770e5bca20d44f6ac47e2a08ab76f2ce0f70a739 Mon Sep 17 00:00:00 2001
From: Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Date: Mon, 19 Jul 2021 15:59:03 +0100
Subject: [PATCH] annoying: missed out something in the unit test, not working
 yet

---
 src/openpower/decoder/isa/fastdct-test.py |  2 +-
 src/openpower/decoder/isa/fastdctlee.py   | 23 ++++++++---------------
 2 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/src/openpower/decoder/isa/fastdct-test.py b/src/openpower/decoder/isa/fastdct-test.py
index 25a16f3d..872cfb5c 100644
--- a/src/openpower/decoder/isa/fastdct-test.py
+++ b/src/openpower/decoder/isa/fastdct-test.py
@@ -28,7 +28,7 @@ import fastdctlee, naivedct
 class FastDctTest(unittest.TestCase):
 
     def test_fast_dct_lee_vs_naive(self):
-        for i in range(3, 10):
+        for i in range(3, 4):
             n = 2**i
             vector = FastDctTest.nonrandom_vector(n)
             expect = naivedct.transform(vector)
diff --git a/src/openpower/decoder/isa/fastdctlee.py b/src/openpower/decoder/isa/fastdctlee.py
index 425c5e33..b27caa6c 100644
--- a/src/openpower/decoder/isa/fastdctlee.py
+++ b/src/openpower/decoder/isa/fastdctlee.py
@@ -149,12 +149,6 @@ def transform2(vec):
     # and pretend we LDed the data in bit-reversed order as well
     vec = [vec[reverse_bits(i, levels)] for i in range(n)]
 
-    # create cos coefficient table
-    coeffs = []
-    for ci in range(n):
-        coeffs.append((math.cos((ci + 0.5) * math.pi / n) * 2.0))
-
-    # start the inner butterfly
     size = n
     while size >= 2:
         halfsize = size // 2
@@ -164,18 +158,18 @@ def transform2(vec):
         for i in ir:
             k = 0
             j = list(range(i, i + halfsize))
-            print ("  xform j", j)
-            for ci, jl in enumerate(j):
-                jh = i+size-jl-1
+            jr = list(range(i+halfsize, i + size))
+            jr.reverse()
+            print ("  xform jr", j, jr)
+            for ci, (jl, jh) in enumerate(zip(j, jr)):
                 t1, t2 = vec[ri[jl]], vec[ri[jh]]
+                coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
                 # normally DCT would use jl+halfsize not jh, here.
                 # to be able to work in-place, the idea is to perform a
-                # high-half reverse/swap afterwards.  however actually
-                # we swap the *indices*
-                coeff = coeffs[k]
+                # swap afterwards.  however actually we swap the *indices*
                 vec[ri[jl]] = t1 + t2
                 vec[ri[jh]] = (t1 - t2) * (1/coeff)
-                print (" ", size, i, k, "ci", ci,
+                print ("coeff", size, i, k, "ci", ci,
                         "jl", ri[jl], "jh", ri[jh],
                        "i/n", (k+0.5)/size, coeff, vec[ri[jl]], vec[ri[jh]])
                 k += tablestep
@@ -185,8 +179,7 @@ def transform2(vec):
             # incredibly... bizarrely... this works *without* having
             # to do anything else.
             hz2 = halfsize // 2 # can be zero which stops reversing 1-item lists
-            for jl in j[:hz2]:
-                jh = i+size-jl-1
+            for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
                 tmp = ri[jl+halfsize]
                 ri[jl+halfsize] = ri[jh]
                 ri[jh] = tmp
-- 
2.30.2