From: Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Date: Thu, 22 Jul 2021 16:31:29 +0000 (+0100)
Subject: half way through converting in-place dct to yield unit test
X-Git-Tag: xlen-bcd~232
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=688ef33ea0cd3481cc991e99e377bd9d19df4a42;p=openpower-isa.git

half way through converting in-place dct to yield unit test
---

diff --git a/src/openpower/decoder/isa/fastdct-test.py b/src/openpower/decoder/isa/fastdct-test.py
index be42d368..b1db7803 100644
--- a/src/openpower/decoder/isa/fastdct-test.py
+++ b/src/openpower/decoder/isa/fastdct-test.py
@@ -22,12 +22,12 @@
 #
 
 import math, random, unittest
-import fastdctlee, naivedct
+import fastdctlee, naivedct, remap_dct_yield
 
 
 class FastDctTest(unittest.TestCase):
 
-    def test_fast_dct_lee_vs_naive(self):
+    def tst_fast_dct_lee_vs_naive(self):
         for i in range(3, 10):
             n = 2**i
             vector = FastDctTest.nonrandom_vector(n)
@@ -39,6 +39,17 @@ class FastDctTest(unittest.TestCase):
             actual = fastdctlee.inverse_transform(vector)
             self.assertListAlmostEqual(actual, expect)
 
+    def test_yield_dct_lee_vs_naive(self):
+        for i in range(3, 10):
+            n = 2**i
+            vector = FastDctTest.nonrandom_vector(n)
+            expect = fastdctlee.transform2(vector)
+            actual = remap_dct_yield.transform2(vector)
+            self.assertListAlmostEqual(actual, expect)
+            expect = naivedct.inverse_transform(vector)
+            actual = fastdctlee.inverse_transform(vector)
+            self.assertListAlmostEqual(actual, expect)
+
     def notest_fast_dct_lee_invertibility(self):
         for i in range(1, 10):
             n = 2**i
diff --git a/src/openpower/decoder/isa/fastdctlee.py b/src/openpower/decoder/isa/fastdctlee.py
index f2641461..68678ab9 100644
--- a/src/openpower/decoder/isa/fastdctlee.py
+++ b/src/openpower/decoder/isa/fastdctlee.py
@@ -202,6 +202,9 @@ def transform2(vec):
     vec = halfrev2(vec, False)
     vec = [vec[ri[i]] for i in range(n)]
 
+    print ("ri", ri)
+    print ("ji", ji)
+
     # create a cos table: not strictly necessary but here for illustrative
     # purposes, to demonstrate the point that it really *is* iterative.
     # this table could be cached and used multiple times rather than
@@ -240,7 +243,7 @@ def transform2(vec):
                 vec[ri[ji[jl]]] = t1 + t2
                 vec[ri[ji[jh]]] = (t1 - t2) * (1/coeff)
                 print ("coeff", size, i, "ci", ci,
-                        "jl", ri[jl], "jh", ri[jh],
+                        "jl", ri[ji[jl]], "jh", ri[ji[jh]],
                        "i/n", (ci+0.5)/size, coeff, vec[ri[ji[jl]]],
                                                     vec[ri[ji[jh]]])
             # instead of using jl+halfsize, perform a swap here.
diff --git a/src/openpower/decoder/isa/remap_dct_yield.py b/src/openpower/decoder/isa/remap_dct_yield.py
index 63928d4d..e6598fa8 100644
--- a/src/openpower/decoder/isa/remap_dct_yield.py
+++ b/src/openpower/decoder/isa/remap_dct_yield.py
@@ -73,8 +73,12 @@ def iterate_dct_butterfly_indices(SVSHAPE):
     ji = list(range(n))
     inplace_mode = SVSHAPE.mode == 0b01 and SVSHAPE.skip not in [0b10, 0b11]
     if inplace_mode:
+        print ("inplace mode")
         ji = halfrev2(ji, True)
 
+    print ("ri", ri)
+    print ("ji", ji)
+
     # start an infinite (wrapping) loop
     skip = 0
     while True:
@@ -89,12 +93,10 @@ def iterate_dct_butterfly_indices(SVSHAPE):
             if SVSHAPE.invxyz[1]: y_r.reverse()
             for i in y_r:       # loop over 2nd order dimension
                 y_end = i == y_r[-1]
-                k_r = []
-                j_r = []
-                k = 0
-                for j in range(i, i+halfsize):
-                    k_r.append(k)
-                    j_r.append(j)
+                # two lists of half-range indices, e.g. j 0123, jr 7654
+                j = list(range(i, i + halfsize))
+                jr = list(range(i+halfsize, i + size))
+                jr.reverse()
                 # invert if requested
                 if SVSHAPE.invxyz[2]: k_r.reverse()
                 if SVSHAPE.invxyz[2]: j_r.reverse()
@@ -102,14 +104,16 @@ def iterate_dct_butterfly_indices(SVSHAPE):
                 # if you *really* want to do the in-place swapping manually,
                 # this allows you to do it.  good luck...
                 if SVSHAPE.mode == 0b01 and not inplace_mode:
+                    print ("swap mode")
                     jr = j_r[:hz2]
-                for j in j_r:   # loop over 1st order dimension
-                    z_end = j == j_r[-1]
+                print ("xform jr", jr)
+                for jl, jh in zip(j, jr):   # loop over 1st order dimension
+                    z_end = jl == j[-1]
                     # now depending on MODE return the index
                     if SVSHAPE.skip in [0b00, 0b10]:
-                        result = ri[ji[j]]        # lower half
+                        result = ri[ji[jl]]        # lower half
                     elif SVSHAPE.skip in [0b01, 0b11]:
-                        result = ri[ji[size-j-1]] # upper half, reverse order
+                        result = ri[ji[jh]] # upper half, reverse order
                     loopends = (z_end |
                                ((y_end and z_end)<<1) |
                                 ((y_end and x_end and z_end)<<2))
@@ -118,9 +122,9 @@ def iterate_dct_butterfly_indices(SVSHAPE):
 
                 # now in-place swap
                 if SVSHAPE.mode == 0b01 and inplace_mode:
-                    for ci, jl in enumerate(j_r[:hz2]):
-                        jh = size-jl-1 # upper half, reverse order
+                    for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
                         jlh = jl+halfsize
+                        #print ("inplace swap", jh, jlh)
                         tmp1, tmp2 = ji[jlh], ji[jh]
                         ji[jlh], ji[jh] = tmp2, tmp1
 
@@ -146,6 +150,102 @@ def pprint_schedule(schedule, n):
                 idx += 1
         size *= 2
 
+# totally cool *in-place* DCT algorithm using yield REMAPs
+def transform2(vec):
+
+    # Initialization
+    n = len(vec)
+    print ()
+    print ("transform2", n)
+    levels = n.bit_length() - 1
+
+    # reference (read/write) the in-place data in *reverse-bit-order*
+    ri = list(range(n))
+    ri = [ri[reverse_bits(i, levels)] for i in range(n)]
+
+    # and pretend we LDed data in half-swapped *and* bit-reversed order as well
+    # TODO: merge these two
+    vec = halfrev2(vec, False)
+    vec = [vec[ri[i]] for i in range(n)]
+
+    # create a cos table: not strictly necessary but here for illustrative
+    # purposes, to demonstrate the point that it really *is* iterative.
+    # this table could be cached and used multiple times rather than
+    # computed every time.
+    ctable = []
+    size = n
+    VL = 0
+    while size >= 2:
+        halfsize = size // 2
+        for i in range(n//size):
+            for ci in range(halfsize):
+                ctable.append((math.cos((ci + 0.5) * math.pi / size) * 2.0))
+                VL += 1
+        size //= 2
+
+    ################
+    # INNER butterfly
+    ################
+    xdim = n
+    ydim = 0
+    zdim = 0
+
+    # set up an SVSHAPE
+    class SVSHAPE:
+        pass
+    # j schedule
+    SVSHAPE0 = SVSHAPE()
+    SVSHAPE0.lims = [xdim, ydim, zdim]
+    SVSHAPE0.order = [0,1,2]  # experiment with different permutations, here
+    SVSHAPE0.mode = 0b01
+    SVSHAPE0.skip = 0b00
+    SVSHAPE0.offset = 0       # experiment with different offset, here
+    SVSHAPE0.invxyz = [1,0,0] # inversion if desired
+    # j+halfstep schedule
+    SVSHAPE1 = SVSHAPE()
+    SVSHAPE1.lims = [xdim, ydim, zdim]
+    SVSHAPE1.order = [0,1,2]  # experiment with different permutations, here
+    SVSHAPE1.mode = 0b01
+    SVSHAPE1.skip = 0b01
+    SVSHAPE1.offset = 0       # experiment with different offset, here
+    SVSHAPE1.invxyz = [1,0,0] # inversion if desired
+
+    # enumerate over the iterator function, getting new indices
+    i0 = iterate_dct_butterfly_indices(SVSHAPE0)
+    i1 = iterate_dct_butterfly_indices(SVSHAPE1)
+    for k, ((jl, jle), (jh, jhe)) in enumerate(zip(i0, i1)):
+        if k >= VL:
+            break
+        t1, t2 = vec[jl], vec[jh]
+        coeff = ctable[k]
+        vec[jl] = t1 + t2
+        vec[jh] = (t1 - t2) * (1/coeff)
+        print ("coeff", size, i, "ci", ci,
+                "jl", jl, "jh", jh,
+               "i/n", (ci+0.5)/size, coeff, vec[jl],
+                                            vec[jh])
+
+    print("transform2 pre-itersum", vec)
+
+    # now things are in the right order for the outer butterfly.
+    n = len(vec)
+    size = n // 2
+    while size >= 2:
+        halfsize = size // 2
+        ir = list(range(0, halfsize))
+        print ("itersum", halfsize, size, ir)
+        for i in ir:
+            jr = list(range(i+halfsize, i+n-halfsize, size))
+            print ("itersum    jr", i+halfsize, i+size, jr)
+            for jh in jr:
+                vec[jh] += vec[jh+size]
+                print ("    itersum", size, i, jh, jh+size)
+        size //= 2
+
+    print("transform2 result", vec)
+
+    return vec
+
 
 def demo():
     # set the dimension sizes here