From 12a15c4fe7ac571ed8feb114517ecb9ed9db5e1a Mon Sep 17 00:00:00 2001
From: Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Date: Fri, 9 Jul 2021 17:03:31 +0100
Subject: [PATCH] add svstep variant of fpmadds fft test

---
 .../decoder/isa/test_caller_svp64_fft.py      | 81 +++++++++++++++++++
 1 file changed, 81 insertions(+)

diff --git a/src/openpower/decoder/isa/test_caller_svp64_fft.py b/src/openpower/decoder/isa/test_caller_svp64_fft.py
index df93c763..152b4f1b 100644
--- a/src/openpower/decoder/isa/test_caller_svp64_fft.py
+++ b/src/openpower/decoder/isa/test_caller_svp64_fft.py
@@ -146,6 +146,87 @@ class FFTTestCase(FHDLTestCase):
                 err = abs(actual - expected) / expected
                 self.assertTrue(err < 1e-7)
 
+    def test_sv_remap_fpmadds_fft_svstep(self):
+        """>>> lst = ["svremap 8, 1, 1, 1",
+                      "sv.ffmadds 2.v, 2.v, 2.v, 10.v"
+                     ]
+            runs a full in-place O(N log2 N) butterfly schedule for
+            Discrete Fourier Transform.
+
+            this is the twin "butterfly" mul-add-sub from Cooley-Tukey
+            https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
+
+            there is the *option* to target a different location (non-in-place)
+            just in case.
+
+            SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
+            (3 inputs, 2 outputs)
+        """
+        lst = SVP64Asm( ["setvl 0, 0, 11, 1, 1, 1",
+                        "svremap 8, 1, 1, 1",
+                        "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
+                        "setvl. 0, 0, 0, 1, 0, 0",
+                        "bc 4, 2, -16"
+                        ])
+        lst = list(lst)
+
+        # array and coefficients to test
+        av = [7.0, -9.8, 3.0, -32.3,
+              -2.0, 5.0, -9.8, 31.3] # array 0..7
+        coe = [-0.25, 0.5, 3.1, 6.2] # coefficients
+
+        # store in regfile
+        fprs = [0] * 32
+        for i, c in enumerate(coe):
+            fprs[i+8] = fp64toselectable(c)
+        for i, a in enumerate(av):
+            fprs[i+0] = fp64toselectable(a)
+
+        # set total. err don't know how to calculate how many there are...
+        # do it manually for now
+        VL = 0
+        size = 2
+        n = len(av)
+        while size <= n:
+            halfsize = size // 2
+            tablestep = n // size
+            for i in range(0, n, size):
+                for j in range(i, i + halfsize):
+                    VL += 1
+            size *= 2
+
+        # SVSTATE (calculated VL)
+        svstate = SVP64State()
+        svstate.vl[0:7] = VL # VL
+        svstate.maxvl[0:7] = VL # MAXVL
+        print ("SVSTATE", bin(svstate.spr.asint()))
+
+        with Program(lst, bigendian=False) as program:
+            sim = self.run_tst_program(program, svstate=svstate,
+                                       initial_fprs=fprs)
+            print ("spr svshape0", sim.spr['SVSHAPE0'])
+            print ("    xdimsz", sim.spr['SVSHAPE0'].xdimsz)
+            print ("    ydimsz", sim.spr['SVSHAPE0'].ydimsz)
+            print ("    zdimsz", sim.spr['SVSHAPE0'].zdimsz)
+            print ("spr svshape1", sim.spr['SVSHAPE1'])
+            print ("spr svshape2", sim.spr['SVSHAPE2'])
+            print ("spr svshape3", sim.spr['SVSHAPE3'])
+
+            # work out the results with the twin mul/add-sub
+            res = transform_radix2(av, coe)
+
+            for i, expected in enumerate(res):
+                print ("i", i, float(sim.fpr(i)), "expected", expected)
+            for i, expected in enumerate(res):
+                # convert to Power single
+                expected = DOUBLE2SINGLE(fp64toselectable(expected))
+                expected = float(expected)
+                actual = float(sim.fpr(i))
+                # approximate error calculation, good enough test
+                # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
+                # and the rounding is different
+                err = abs(actual - expected) / expected
+                self.assertTrue(err < 1e-7)
 
     def test_sv_fpmadds_fft(self):
         """>>> lst = ["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
-- 
2.30.2