comments/variables-cleanup
[openpower-isa.git] / src / openpower / decoder / isa / test_caller_svp64_fft.py
index 346f9e4d8dbd9dbe954dbecfcd408fa7bae1c170..cf5915adf99b4348316ec7853376362df0ed6893 100644 (file)
@@ -1,5 +1,5 @@
 from nmigen import Module, Signal
-from nmigen.back.pysim import Simulator, Delay, Settle
+from nmigen.sim import Simulator, Delay, Settle
 from nmutil.formaltest import FHDLTestCase
 import unittest
 from openpower.decoder.power_decoder import (create_pdecode)
@@ -9,11 +9,15 @@ from openpower.decoder.selectable_int import SelectableInt
 from openpower.decoder.isa.test_caller import run_tst
 from openpower.sv.trans.svp64 import SVP64Asm
 from copy import deepcopy
-from openpower.decoder.helpers import fp64toselectable
-from openpower.decoder.isafunctions.double2single import DOUBLE2SINGLE
+from openpower.decoder.helpers import fp64toselectable, SINGLE
+from openpower.decoder.isafunctions.double2single import ISACallerFnHelper
 
+# really bad hack.  need to access the DOUBLE2SINGLE function auto-generated
+# from pseudo-code.
+fph = ISACallerFnHelper(XLEN=64)
 
-def transform_radix2(vec, exptable):
+
+def transform_radix2(vec, exptable, reverse=False):
     """
     # FFT and convolution test (Python), based on Project Nayuki
     #
@@ -34,7 +38,8 @@ def transform_radix2(vec, exptable):
     levels = n.bit_length() - 1
 
     # Copy with bit-reversed permutation
-    #vec = [vec[reverse_bits(i, levels)] for i in range(n)]
+    if reverse:
+        vec = [vec[reverse_bits(i, levels)] for i in range(n)]
 
     size = 2
     while size <= n:
@@ -61,7 +66,7 @@ def transform_radix2(vec, exptable):
     return vec
 
 
-def transform_radix2_complex(vec_r, vec_i, cos_r, sin_i):
+def transform_radix2_complex(vec_r, vec_i, cos_r, sin_i, reverse=False):
     """
     # FFT and convolution test (Python), based on Project Nayuki
     #
@@ -82,7 +87,8 @@ def transform_radix2_complex(vec_r, vec_i, cos_r, sin_i):
     levels = n.bit_length() - 1
 
     # Copy with bit-reversed permutation
-    #vec = [vec[reverse_bits(i, levels)] for i in range(n)]
+    if reverse:
+        vec = [vec[reverse_bits(i, levels)] for i in range(n)]
 
     size = 2
     while size <= n:
@@ -131,10 +137,63 @@ class FFTTestCase(FHDLTestCase):
         for i in range(32):
             self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
 
+    def test_sv_remap_fpmadds_fft_4(self):
+        """>>> lst = ["svshape 2, 1, 1, 1, 0",
+                     "svremap 31, 1, 0, 2, 0, 1, 0",
+                      "sv.ffmadds. *2, *2, *2, *10"
+                     ]
+        this is a cheap (cheating) way to run a single "ffmadds." to
+        get at least Rc=1 on sv.ffmadds to be activated. the results
+        are not actually tested because there's no checking yet on
+        FP Rc=1
+        """
+        lst = SVP64Asm( ["svshape 2, 1, 1, 1, 0",
+                         "svremap 31, 1, 0, 2, 0, 1, 0",
+                        "sv.ffmadds *0, *0, *0, *8"
+                        ])
+        lst = list(lst)
+
+        # array and coefficients to test
+        av = [7.0, -9.8 ] # array 0..1
+        coe = [3.1] # coefficients
+
+        # store in regfile
+        fprs = [0] * 32
+        for i, c in enumerate(coe):
+            fprs[i+8] = fp64toselectable(c)
+        for i, a in enumerate(av):
+            fprs[i+0] = fp64toselectable(a)
+
+        with Program(lst, bigendian=False) as program:
+            sim = self.run_tst_program(program, initial_fprs=fprs)
+            print ("spr svshape0", sim.spr['SVSHAPE0'])
+            print ("    xdimsz", sim.spr['SVSHAPE0'].xdimsz)
+            print ("    ydimsz", sim.spr['SVSHAPE0'].ydimsz)
+            print ("    zdimsz", sim.spr['SVSHAPE0'].zdimsz)
+            print ("spr svshape1", sim.spr['SVSHAPE1'])
+            print ("spr svshape2", sim.spr['SVSHAPE2'])
+            print ("spr svshape3", sim.spr['SVSHAPE3'])
+
+            # work out the results with the twin mul/add-sub
+            res = transform_radix2(av, coe)
+
+            for i, expected in enumerate(res):
+                print ("i", i, float(sim.fpr(i)), "expected", expected)
+            for i, expected in enumerate(res):
+                # convert to Power single
+                expected = fph.DOUBLE2SINGLE(fp64toselectable(expected))
+                expected = float(expected)
+                actual = float(sim.fpr(i))
+                # approximate error calculation, good enough test
+                # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
+                # and the rounding is different
+                err = abs(actual - expected) / expected
+                self.assertTrue(err < 1e-7)
+
     def test_sv_remap_fpmadds_fft(self):
         """>>> lst = ["svshape 8, 1, 1, 1, 0",
                      "svremap 31, 1, 0, 2, 0, 1, 0",
-                      "sv.ffmadds 2.v, 2.v, 2.v, 10.v"
+                      "sv.ffmadds *2, *2, *2, *10"
                      ]
             runs a full in-place O(N log2 N) butterfly schedule for
             Discrete Fourier Transform.
@@ -150,7 +209,7 @@ class FFTTestCase(FHDLTestCase):
         """
         lst = SVP64Asm( ["svshape 8, 1, 1, 1, 0",
                          "svremap 31, 1, 0, 2, 0, 1, 0",
-                        "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
+                        "sv.ffmadds *0, *0, *0, *8"
                         ])
         lst = list(lst)
 
@@ -183,7 +242,7 @@ class FFTTestCase(FHDLTestCase):
                 print ("i", i, float(sim.fpr(i)), "expected", expected)
             for i, expected in enumerate(res):
                 # convert to Power single
-                expected = DOUBLE2SINGLE(fp64toselectable(expected))
+                expected = fph.DOUBLE2SINGLE(fp64toselectable(expected))
                 expected = float(expected)
                 actual = float(sim.fpr(i))
                 # approximate error calculation, good enough test
@@ -196,9 +255,9 @@ class FFTTestCase(FHDLTestCase):
         """>>> lst = SVP64Asm( [
                             "svshape 8, 1, 1, 1, 1",
                              "svremap 31, 1, 0, 2, 0, 1, 0",
-                            "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
+                            "sv.ffmadds *0, *0, *0, *8",
                             "setvl. 0, 0, 1, 1, 0, 0",
-                            "bc 4, 2, -16"
+                            "bc 6, 3, -16"
                             ])
             runs a full in-place O(N log2 N) butterfly schedule for
             Discrete Fourier Transform.  this version however uses
@@ -211,9 +270,9 @@ class FFTTestCase(FHDLTestCase):
         lst = SVP64Asm( [
                         "svshape 8, 1, 1, 1, 1",
                          "svremap 31, 1, 0, 2, 0, 1, 0",
-                        "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
+                        "sv.ffmadds *0, *0, *0, *8",
                         "setvl. 0, 0, 1, 1, 0, 0",
-                        "bc 4, 2, -16"
+                        "bc 6, 3, -16"
                         ])
         lst = list(lst)
 
@@ -266,7 +325,7 @@ class FFTTestCase(FHDLTestCase):
                 print ("i", i, float(sim.fpr(i)), "expected", expected)
             for i, expected in enumerate(res):
                 # convert to Power single
-                expected = DOUBLE2SINGLE(fp64toselectable(expected))
+                expected = fph.DOUBLE2SINGLE(fp64toselectable(expected))
                 expected = float(expected)
                 actual = float(sim.fpr(i))
                 # approximate error calculation, good enough test
@@ -280,12 +339,12 @@ class FFTTestCase(FHDLTestCase):
                         "svshape 8, 1, 1, 1, 1",
                          # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
                          "svremap 5, 1, 0, 2, 0, 0, 1",
-                         "sv.fmuls 24, 0.v, 8.v",
+                         "sv.fmuls 24, *0, *8",
                          # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
                          "svremap 26, 0, 0, 0, 0, 1, 1",
-                        "sv.ffadds 0.v, 24, 0.v",
+                        "sv.ffadds *0, 24, *0",
                         "setvl. 0, 0, 1, 1, 0, 0",
-                        "bc 4, 2, -28"
+                        "bc 6, 3, -28"
                             ])
 
             runs a full in-place O(N log2 N) butterfly schedule for
@@ -319,12 +378,12 @@ class FFTTestCase(FHDLTestCase):
                         "svshape 8, 1, 1, 1, 1",
                          # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
                          "svremap 5, 1, 0, 2, 0, 0, 0",
-                         "sv.fmuls 24, 0.v, 8.v",
+                         "sv.fmuls 24, *0, *8",
                          # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
                          "svremap 26, 0, 0, 0, 0, 1, 0",
-                        "sv.ffadds 0.v, 24, 0.v",
+                        "sv.ffadds *0, 24, *0",
                         "setvl. 0, 0, 1, 1, 0, 0",
-                        "bc 4, 2, -28"
+                        "bc 6, 3, -28"
                         ])
         lst = list(lst)
 
@@ -377,7 +436,7 @@ class FFTTestCase(FHDLTestCase):
                 print ("i", i, float(sim.fpr(i)), "expected", expected)
             for i, expected in enumerate(res):
                 # convert to Power single
-                expected = DOUBLE2SINGLE(fp64toselectable(expected))
+                expected = fph.DOUBLE2SINGLE(fp64toselectable(expected))
                 expected = float(expected)
                 actual = float(sim.fpr(i))
                 # approximate error calculation, good enough test
@@ -387,7 +446,7 @@ class FFTTestCase(FHDLTestCase):
                 self.assertTrue(err < 1e-7)
 
     def test_sv_fpmadds_fft(self):
-        """>>> lst = ["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
+        """>>> lst = ["sv.ffmadds *2, *2, *2, *10"
                         ]
             four in-place vector mul-adds, four in-place vector mul-subs
 
@@ -405,7 +464,7 @@ class FFTTestCase(FHDLTestCase):
                 fnmsubs FRT+vl, FRA, FRC, FRB+vl
 
         """
-        lst = SVP64Asm(["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
+        lst = SVP64Asm(["sv.ffmadds *2, *2, *2, *10"
                         ])
         lst = list(lst)
 
@@ -422,8 +481,8 @@ class FFTTestCase(FHDLTestCase):
             mul = a * c
             t = b + mul
             u = b - mul
-            t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
-            u = DOUBLE2SINGLE(fp64toselectable(u)) # from double
+            t = fph.DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
+            u = fph.DOUBLE2SINGLE(fp64toselectable(u)) # from double
             res.append((t, u))
             print ("FFT", i, "in", a, b, "coeff", c, "mul", mul, "res", t, u)
 
@@ -442,7 +501,7 @@ class FFTTestCase(FHDLTestCase):
                 self.assertEqual(sim.fpr(i+6), u)
 
     def test_sv_ffadds_fft(self):
-        """>>> lst = ["sv.ffadds 2.v, 2.v, 2.v"
+        """>>> lst = ["sv.ffadds *2, *2, *2"
                         ]
             four in-place vector adds, four in-place vector subs
 
@@ -453,7 +512,7 @@ class FFTTestCase(FHDLTestCase):
                 fadds FRT   , FRB, FRA
                 fsubs FRT+vl, FRA, FRB+vl
         """
-        lst = SVP64Asm(["sv.ffadds 2.v, 2.v, 2.v"
+        lst = SVP64Asm(["sv.ffadds *2, *2, *2"
                         ])
         lst = list(lst)
 
@@ -467,8 +526,8 @@ class FFTTestCase(FHDLTestCase):
             fprs[i+6] = fp64toselectable(b)
             t = b + a
             u = b - a
-            t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
-            u = DOUBLE2SINGLE(fp64toselectable(u)) # from double
+            t = fph.DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
+            u = fph.DOUBLE2SINGLE(fp64toselectable(u)) # from double
             res.append((t, u))
             print ("FFT", i, "in", a, b, "res", t, u)
 
@@ -528,31 +587,37 @@ class FFTTestCase(FHDLTestCase):
 
             however it turns out that they can be *merged*, and for
             the first one (sv.fmadds/sv.fmsubs) the scalar arguments (RT, RB)
-            *ignore* their REMAPs (by definition), and for the second
-            one (sv.ffads) exactly the right REMAPs are also ignored!
+            *ignore* their REMAPs (by definition, because you can't REMAP
+            scalar operands), and for the second one (sv.ffads) exactly the
+            right REMAPs are also ignored!
 
+            therefore we can merge:
+                "svremap 5, 1, 0, 2, 0, 0, 1",
+                "svremap 26, 0, 0, 0, 0, 1, 1",
+            into:
                 "svremap 31, 1, 0, 2, 0, 1, 1",
+            and save one instruction.
         """
         lst = SVP64Asm( [
-                        # set triple butterfly mode
+                        # set triple butterfly mode with persistent "REMAP"
                         "svshape 8, 1, 1, 1, 1",
-                        # tpre
                         "svremap 31, 1, 0, 2, 0, 1, 1",
-                        "sv.fmuls 24, 0.v, 16.v",    # mul1_r = r*cos_r
-                        "sv.fmadds 24, 8.v, 20.v, 24", # mul2_r = i*sin_i
+                        # tpre
+                        "sv.fmuls 24, *0, *16",    # mul1_r = r*cos_r
+                        "sv.fmadds 24, *8, *20, 24", # mul2_r = i*sin_i
                                                      # tpre = mul1_r + mul2_r
                         # tpim
-                        "sv.fmuls 26, 0.v, 20.v",    # mul1_i = r*sin_i
-                        "sv.fmsubs 26, 8.v, 16.v, 26", # mul2_i = i*cos_r
+                        "sv.fmuls 26, *0, *20",    # mul1_i = r*sin_i
+                        "sv.fmsubs 26, *8, *16, 26", # mul2_i = i*cos_r
                                                      # tpim = mul2_i - mul1_i
                         # vec_r jh/jl
-                        "sv.ffadds 0.v, 24, 0.v",    # vh/vl +/- tpre
+                        "sv.ffadds *0, 24, *0",    # vh/vl +/- tpre
                         # vec_i jh/jl
-                        "sv.ffadds 8.v, 26, 8.v",    # vh/vl +- tpim
+                        "sv.ffadds *8, 26, *8",    # vh/vl +- tpim
 
                         # svstep loop
                         "setvl. 0, 0, 1, 1, 0, 0",
-                        "bc 4, 2, -56"
+                        "bc 6, 3, -56"
                         ])
         lst = list(lst)
 
@@ -615,7 +680,7 @@ class FFTTestCase(FHDLTestCase):
                        "expected_i", expected_i)
             for i, (expected_r, expected_i) in enumerate(zip(res_r, res_i)):
                 # convert to Power single
-                expected_r = DOUBLE2SINGLE(fp64toselectable(expected_r ))
+                expected_r = fph.DOUBLE2SINGLE(fp64toselectable(expected_r ))
                 expected_r = float(expected_r)
                 actual_r = float(sim.fpr(i))
                 # approximate error calculation, good enough test
@@ -624,7 +689,7 @@ class FFTTestCase(FHDLTestCase):
                 err = abs(actual_r - expected_r ) / expected_r
                 self.assertTrue(err < 1e-6)
                 # convert to Power single
-                expected_i = DOUBLE2SINGLE(fp64toselectable(expected_i ))
+                expected_i = fph.DOUBLE2SINGLE(fp64toselectable(expected_i ))
                 expected_i = float(expected_i)
                 actual_i = float(sim.fpr(i+8))
                 # approximate error calculation, good enough test
@@ -634,12 +699,12 @@ class FFTTestCase(FHDLTestCase):
                 self.assertTrue(err < 1e-6)
 
     def test_sv_ffadds_fft_scalar(self):
-        """>>> lst = ["sv.ffadds 2.v, 12, 13"
+        """>>> lst = ["sv.ffadds *2, 12, 13"
                         ]
             four in-place vector adds and subs, but done with a scalar
             pair (fp12, fp13)
         """
-        lst = SVP64Asm(["sv.ffadds 2.v, 12, 13"
+        lst = SVP64Asm(["sv.ffadds *2, 12, 13"
                         ])
         lst = list(lst)
 
@@ -653,8 +718,8 @@ class FFTTestCase(FHDLTestCase):
         for i in range(4):
             t = scalar_b + scalar_a
             u = scalar_b - scalar_a
-            t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
-            u = DOUBLE2SINGLE(fp64toselectable(u)) # from double
+            t = fph.DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
+            u = fph.DOUBLE2SINGLE(fp64toselectable(u)) # from double
             res.append((t, u))
             print ("FFT", i, "res", t, u)
 
@@ -678,6 +743,76 @@ class FFTTestCase(FHDLTestCase):
                 self.assertEqual(sim.fpr(i+2), t)
                 self.assertEqual(sim.fpr(i+6), u)
 
+    def test_sv_remap_fpmadds_fft_ldst(self):
+        """>>>lst = ["setvl 0, 0, 8, 0, 1, 1",
+                         "sv.lfs/els *0, 4(0)", 
+                         "svshape 8, 1, 1, 1, 0",
+                         "svremap 31, 1, 0, 2, 0, 1, 0",
+                         "sv.ffmadds *0, *0, *0, *8"
+
+            runs a full in-place O(N log2 N) butterfly schedule for
+            Discrete Fourier Transform, using bit-reversed LD/ST
+        """
+        lst = SVP64Asm( ["svshape 8, 1, 1, 15, 0",
+                         "svremap 1, 0, 0, 0, 0, 0, 0",
+                         "sv.lfs/els *0, 4(0)",
+                         "svshape 8, 1, 1, 1, 0",
+                         "svremap 31, 1, 0, 2, 0, 1, 0",
+                         "sv.ffmadds *0, *0, *0, *8"
+                        ])
+        lst = list(lst)
+
+        # array and coefficients to test
+        av = [7.0, -9.8, 3.0, -32.3,
+              -2.0, 5.0, -9.8, 31.3] # array 0..7
+        coe = [-0.25, 0.5, 3.1, 6.2] # coefficients
+
+        # store in regfile
+        fprs = [0] * 32
+        for i, c in enumerate(coe):
+            fprs[i+8] = fp64toselectable(c)
+        # store in memory
+        mem = {}
+        val = 0
+        for i, a in enumerate(av):
+            a = SINGLE(fp64toselectable(a)).value
+            shift = (i % 2) == 1
+            if shift == 0:
+                val = a
+            else:
+                mem[(i//2)*8] = val | (a << 32)
+
+        with Program(lst, bigendian=False) as program:
+            sim = self.run_tst_program(program, initial_mem=mem,
+                                                initial_fprs=fprs)
+            print ("spr svshape0", sim.spr['SVSHAPE0'])
+            print ("    xdimsz", sim.spr['SVSHAPE0'].xdimsz)
+            print ("    ydimsz", sim.spr['SVSHAPE0'].ydimsz)
+            print ("    zdimsz", sim.spr['SVSHAPE0'].zdimsz)
+            print ("spr svshape1", sim.spr['SVSHAPE1'])
+            print ("spr svshape2", sim.spr['SVSHAPE2'])
+            print ("spr svshape3", sim.spr['SVSHAPE3'])
+
+            print ("mem dump")
+            print (sim.mem.dump())
+
+            # work out the results with the twin mul/add-sub,
+            # note bit-reverse mode requested
+            res = transform_radix2(av, coe, reverse=True)
+
+            for i, expected in enumerate(res):
+                print ("i", i, float(sim.fpr(i)), "expected", expected)
+            for i, expected in enumerate(res):
+                # convert to Power single
+                expected = fph.DOUBLE2SINGLE(fp64toselectable(expected))
+                expected = float(expected)
+                actual = float(sim.fpr(i))
+                # approximate error calculation, good enough test
+                # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
+                # and the rounding is different
+                err = abs(actual - expected) / expected
+                self.assertTrue(err < 1e-6)
+
     def run_tst_program(self, prog, initial_regs=None,
                               svstate=None,
                               initial_mem=None,