add nayuki dct
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Sat, 17 Jul 2021 15:29:11 +0000 (16:29 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Sat, 17 Jul 2021 15:29:11 +0000 (16:29 +0100)
src/openpower/decoder/isa/fastdct-test.py [new file with mode: 0644]
src/openpower/decoder/isa/fastdctlee.py [new file with mode: 0644]
src/openpower/decoder/isa/test_caller_svp64_fft.py

diff --git a/src/openpower/decoder/isa/fastdct-test.py b/src/openpower/decoder/isa/fastdct-test.py
new file mode 100644 (file)
index 0000000..60b65bd
--- /dev/null
@@ -0,0 +1,115 @@
+# 
+# Fast discrete cosine transform algorithms (Python)
+# 
+# Copyright (c) 2020 Project Nayuki. (MIT License)
+# https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
+# 
+# Permission is hereby granted, free of charge, to any person obtaining a copy of
+# this software and associated documentation files (the "Software"), to deal in
+# the Software without restriction, including without limitation the rights to
+# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+# - The above copyright notice and this permission notice shall be included in
+#   all copies or substantial portions of the Software.
+# - The Software is provided "as is", without warranty of any kind, express or
+#   implied, including but not limited to the warranties of merchantability,
+#   fitness for a particular purpose and noninfringement. In no event shall the
+#   authors or copyright holders be liable for any claim, damages or other
+#   liability, whether in an action of contract, tort or otherwise, arising from,
+#   out of or in connection with the Software or the use or other dealings in the
+#   Software.
+# 
+
+import math, random, unittest
+import fastdct8, fastdctfft, fastdctlee, naivedct
+
+
+class FastDctTest(unittest.TestCase):
+       
+       def test_fast_dct_lee_vs_naive(self):
+               for i in range(1, 12):
+                       n = 2**i
+                       vector = FastDctTest.random_vector(n)
+                       expect = naivedct.transform(vector)
+                       actual = fastdctlee.transform(vector)
+                       self.assertListAlmostEqual(actual, expect)
+                       expect = naivedct.inverse_transform(vector)
+                       actual = fastdctlee.inverse_transform(vector)
+                       self.assertListAlmostEqual(actual, expect)
+       
+       
+       def test_fast_dct_lee_invertibility(self):
+               for i in range(1, 18):
+                       n = 2**i
+                       vector = FastDctTest.random_vector(n)
+                       temp = fastdctlee.transform(vector)
+                       temp = fastdctlee.inverse_transform(temp)
+                       temp = [(val * 2.0 / n) for val in temp]
+                       self.assertListAlmostEqual(vector, temp)
+       
+       
+       def test_fast_dct8_vs_naive(self):
+               vector = FastDctTest.random_vector(8)
+               
+               expect = naivedct.transform(vector)
+               expect = [(val / (math.sqrt(8) if (i == 0) else 2))
+                       for (i, val) in enumerate(expect)]
+               actual = fastdct8.transform(vector)
+               self.assertListAlmostEqual(actual, expect)
+               
+               expect = [(val / (math.sqrt(2) if (i == 0) else 2))
+                       for (i, val) in enumerate(vector)]
+               expect = naivedct.inverse_transform(expect)
+               actual = fastdct8.inverse_transform(vector)
+               self.assertListAlmostEqual(actual, expect)
+       
+       
+       def test_fast_dct_fft_vs_naive(self):
+               prev = 0
+               for i in range(100 + 1):
+                       n = int(round(1000**(i / 100)))
+                       if n <= prev:
+                               continue
+                       prev = n
+                       vector = FastDctTest.random_vector(n)
+                       
+                       expect = naivedct.transform(vector)
+                       actual = fastdctfft.transform(vector)
+                       self.assertListAlmostEqual(actual, expect)
+                       
+                       expect = naivedct.inverse_transform(vector)
+                       actual = fastdctfft.inverse_transform(vector)
+                       self.assertListAlmostEqual(actual, expect)
+       
+       
+       def test_fast_dct_fft_invertibility(self):
+               prev = 0
+               for i in range(30 + 1):
+                       n = int(round(10000**(i / 30)))
+                       if n <= prev:
+                               continue
+                       prev = n
+                       vector = FastDctTest.random_vector(n)
+                       temp = fastdctfft.transform(vector)
+                       temp = fastdctfft.inverse_transform(temp)
+                       temp = [(val * 2.0 / n) for val in temp]
+                       self.assertListAlmostEqual(vector, temp)
+       
+       
+       def assertListAlmostEqual(self, actual, expect):
+               self.assertEqual(len(actual), len(expect))
+               for (x, y) in zip(actual, expect):
+                       self.assertAlmostEqual(x, y, delta=FastDctTest._EPSILON)
+       
+       
+       @staticmethod
+       def random_vector(n):
+               return [random.uniform(-1.0, 1.0) for _ in range(n)]
+       
+       
+       _EPSILON = 1e-9
+
+
+if __name__ == "__main__":
+       unittest.main()
diff --git a/src/openpower/decoder/isa/fastdctlee.py b/src/openpower/decoder/isa/fastdctlee.py
new file mode 100644 (file)
index 0000000..cf4386d
--- /dev/null
@@ -0,0 +1,76 @@
+# 
+# Fast discrete cosine transform algorithms (Python)
+# 
+# Copyright (c) 2020 Project Nayuki. (MIT License)
+# https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
+# 
+# Permission is hereby granted, free of charge, to any person obtaining a copy of
+# this software and associated documentation files (the "Software"), to deal in
+# the Software without restriction, including without limitation the rights to
+# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+# - The above copyright notice and this permission notice shall be included in
+#   all copies or substantial portions of the Software.
+# - The Software is provided "as is", without warranty of any kind, express or
+#   implied, including but not limited to the warranties of merchantability,
+#   fitness for a particular purpose and noninfringement. In no event shall the
+#   authors or copyright holders be liable for any claim, damages or other
+#   liability, whether in an action of contract, tort or otherwise, arising from,
+#   out of or in connection with the Software or the use or other dealings in the
+#   Software.
+# 
+
+import math
+
+
+# DCT type II, unscaled. Algorithm by Byeong Gi Lee, 1984.
+# See: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.118.3056&rep=rep1&type=pdf#page=34
+def transform(vector):
+       n = len(vector)
+       if n == 1:
+               return list(vector)
+       elif n == 0 or n % 2 != 0:
+               raise ValueError()
+       else:
+               half = n // 2
+               alpha = [(vector[i] + vector[-(i + 1)]) for i in range(half)]
+               beta  = [(vector[i] - vector[-(i + 1)]) / (math.cos((i + 0.5) * math.pi / n) * 2.0)
+                       for i in range(half)]
+               alpha = transform(alpha)
+               beta  = transform(beta )
+               result = []
+               for i in range(half - 1):
+                       result.append(alpha[i])
+                       result.append(beta[i] + beta[i + 1])
+               result.append(alpha[-1])
+               result.append(beta [-1])
+               return result
+
+
+# DCT type III, unscaled. Algorithm by Byeong Gi Lee, 1984.
+# See: https://www.nayuki.io/res/fast-discrete-cosine-transform-algorithms/lee-new-algo-discrete-cosine-transform.pdf
+def inverse_transform(vector, root=True):
+       if root:
+               vector = list(vector)
+               vector[0] /= 2
+       n = len(vector)
+       if n == 1:
+               return vector
+       elif n == 0 or n % 2 != 0:
+               raise ValueError()
+       else:
+               half = n // 2
+               alpha = [vector[0]]
+               beta  = [vector[1]]
+               for i in range(2, n, 2):
+                       alpha.append(vector[i])
+                       beta.append(vector[i - 1] + vector[i + 1])
+               inverse_transform(alpha, False)
+               inverse_transform(beta , False)
+               for i in range(half):
+                       x = alpha[i]
+                       y = beta[i] / (math.cos((i + 0.5) * math.pi / n) * 2)
+                       vector[i] = x + y
+                       vector[-(i + 1)] = x - y
+               return vector
index 4febee386b4f54fe20635199c638ae417ead217c..b6a6efa65d78c4c61dbb6cea2178c1b5cd8890c0 100644 (file)
@@ -681,10 +681,12 @@ class FFTTestCase(FHDLTestCase):
                 self.assertEqual(sim.fpr(i+6), u)
 
     def test_sv_remap_fpmadds_fft_ldst(self):
-        """>>> lst = ["svshape 8, 1, 1, 1, 0",
-                     "svremap 31, 1, 0, 2, 0, 1, 0",
-                      "sv.ffmadds 2.v, 2.v, 2.v, 10.v"
-                     ]
+        """>>>lst = ["setvl 0, 0, 8, 0, 1, 1",
+                         "sv.lfsbr 0.v, 4(0), 20", # bit-reversed
+                         "svshape 8, 1, 1, 1, 0",
+                         "svremap 31, 1, 0, 2, 0, 1, 0",
+                         "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
+
             runs a full in-place O(N log2 N) butterfly schedule for
             Discrete Fourier Transform, using bit-reversed LD/ST
         """
@@ -730,7 +732,8 @@ class FFTTestCase(FHDLTestCase):
             print ("mem dump")
             print (sim.mem.dump())
 
-            # work out the results with the twin mul/add-sub
+            # work out the results with the twin mul/add-sub,
+            # note bit-reverse mode requested
             res = transform_radix2(av, coe, reverse=True)
 
             for i, expected in enumerate(res):