add SVP64 i-DCT unit test for inner butterfly, coefficients pre-computed
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Sat, 31 Jul 2021 13:28:25 +0000 (14:28 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Sat, 31 Jul 2021 13:28:25 +0000 (14:28 +0100)
at present

openpower/isa/simplev.mdwn
src/openpower/decoder/isa/test_caller_svp64_dct.py

index 2971ace1a827b30bddcb3fbc5af586be1cf5d5a8..c04301dc06a160ae8ee0059abeb90dc0596323e6 100644 (file)
@@ -166,14 +166,15 @@ Pseudo-code:
         SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
         if (SVRM = 0b1010) | (SVRM = 0b1100) then
             SVSHAPE0[30:31] <- 0b11          # iDCT mode
+            SVSHAPE0[18:20] <- 0b011         # iDCT Inner Butterfly sub-mode
         else
             SVSHAPE0[30:31] <- 0b01          # DCT mode
+            SVSHAPE0[18:20] <- 0b001         # DCT Inner Butterfly sub-mode
+            SVSHAPE0[21:23] <- 0b001         # "inverse" on outer loop
         if (SVRM = 0b1100) | (SVRM = 0b0100) then
             SVSHAPE0[6:11] <- 0b000011       # (i)DCT Inner Butterfly mode 4
         else
             SVSHAPE0[6:11] <- 0b000001       # (i)DCT Inner Butterfly mode 2
-        SVSHAPE0[18:20] <- 0b001         # DCT Inner Butterfly sub-mode
-        SVSHAPE0[21:23] <- 0b001         # "inverse" on outer loop
         # copy
         SVSHAPE1[0:31] <- SVSHAPE0[0:31]
         SVSHAPE2[0:31] <- SVSHAPE0[0:31]
index 53537ffb83b7760c98345c7c3413487fd6a2d672..2bc6dd2c242b9380a51a655174907072d8c18a0e 100644 (file)
@@ -137,7 +137,7 @@ def transform_inner_radix2_idct(vec, ctable):
     levels = n.bit_length() - 1
 
     # pretend we LDed data in half-swapped order
-    vec = halfrev2(vec, True)
+    vec = halfrev2(vec, False)
 
     ################
     # INNER butterfly
@@ -375,6 +375,68 @@ class DCTTestCase(FHDLTestCase):
                 print ("err", i, err)
                 self.assertTrue(err < 1e-6)
 
+    def test_sv_remap_fpmadds_dct_inner_4(self):
+        """>>> lst = ["svshape 4, 1, 1, 10, 0",
+                      "svremap 27, 1, 0, 2, 0, 1, 0",
+                      "sv.fdmadds 0.v, 0.v, 0.v, 8.v"
+                     ]
+            runs a full in-place 4-long O(N log2 N) inner butterfly schedule
+            for inverse-DCT
+
+            SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
+            (3 inputs, 2 outputs)
+
+            Note that the coefficient (FRC) is not on a "schedule", it
+            is straight Vectorised (0123...) because DCT coefficients
+            cannot be shared between butterfly layers (due to +0.5)
+        """
+        lst = SVP64Asm( ["svshape 4, 1, 1, 10, 0",
+                         "svremap 27, 1, 0, 2, 0, 1, 0",
+                         "sv.fdmadds 0.v, 0.v, 0.v, 8.v"
+                        ])
+        lst = list(lst)
+
+        # array and coefficients to test
+        n = 4
+        levels = n.bit_length() - 1
+        coe = [-0.25, 0.5, 3.1, 6.2] # 4 coefficients
+        avi = [7.0, -0.8, 2.0, -2.3] # first half of array 0..3
+        av = halfrev2(avi, False)
+
+        # store in regfile
+        fprs = [0] * 32
+        for i, c in enumerate(coe):
+            fprs[i+8] = fp64toselectable(1.0 / c) # invert
+        for i, a in enumerate(av):
+            fprs[i+0] = fp64toselectable(a)
+
+        with Program(lst, bigendian=False) as program:
+            sim = self.run_tst_program(program, initial_fprs=fprs)
+            print ("spr svshape0", sim.spr['SVSHAPE0'])
+            print ("    xdimsz", sim.spr['SVSHAPE0'].xdimsz)
+            print ("    ydimsz", sim.spr['SVSHAPE0'].ydimsz)
+            print ("    zdimsz", sim.spr['SVSHAPE0'].zdimsz)
+            print ("spr svshape1", sim.spr['SVSHAPE1'])
+            print ("spr svshape2", sim.spr['SVSHAPE2'])
+            print ("spr svshape3", sim.spr['SVSHAPE3'])
+
+            # work out the results with the twin mul/add-sub
+            res = transform_inner_radix2_idct(avi, coe)
+
+            for i, expected in enumerate(res):
+                print ("i", i, float(sim.fpr(i)), "expected", expected)
+            for i, expected in enumerate(res):
+                # convert to Power single
+                expected = DOUBLE2SINGLE(fp64toselectable(expected))
+                expected = float(expected)
+                actual = float(sim.fpr(i))
+                # approximate error calculation, good enough test
+                # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
+                # and the rounding is different
+                err = abs((actual - expected) / expected)
+                print ("err", i, err)
+                self.assertTrue(err < 1e-6)
+
     def test_sv_remap_fpmadds_idct_outer_8(self):
         """>>> lst = ["svshape 8, 1, 1, 11, 0",
                      "svremap 27, 1, 0, 2, 0, 1, 0",