scale-up svshape pseudo-code for striding in DCT/FFT

author Luke Kenneth Casson Leighton <lkcl@lkcl.net>

Wed, 21 Sep 2022 17:18:14 +0000 (18:18 +0100)

committer Luke Kenneth Casson Leighton <lkcl@lkcl.net>

Wed, 21 Sep 2022 17:18:14 +0000 (18:18 +0100)
author Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Wed, 21 Sep 2022 17:18:14 +0000 (18:18 +0100)
committer Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Wed, 21 Sep 2022 17:18:14 +0000 (18:18 +0100)
diff --git a/openpower/isa/simplev.mdwn b/openpower/isa/simplev.mdwn

index e96acbf1bee464d1ff0a18d6fff5558d1416f4ff..ab7af687c7e5ee41b4567329dccdc973f1f5b6d2 100644 (file)
--- a/openpower/isa/simplev.mdwn
+++ b/openpower/isa/simplev.mdwn
@@ -105,6 +105,7 @@ Pseudo-code:
  
      # for convenience, VL to be calculated and stored in SVSTATE
      vlen <- [0] * 7
+    mscale[0:6] <- 0b0000001 # for scaling MAXVL
      itercount[0:6] <- [0] * 7
      SVSTATE[0:31] <- [0] * 32
      # only overwrite REMAP if "persistence" is zero
@@ -156,6 +157,7 @@ Pseudo-code:
          # for FRA and FRT
          SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
          SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim - "striding" (2D FFT)
+        mscale <- (0b0 || SVzd) + 1
          SVSHAPE0[30:31] <- 0b01          # Butterfly mode
          # copy
          SVSHAPE1[0:31] <- SVSHAPE0[0:31]
@@ -181,6 +183,7 @@ Pseudo-code:
          # set up FRB and FRS
          SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
          SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim - "striding" (2D DCT)
+        mscale <- (0b0 || SVzd) + 1
          if (SVrm = 0b1010) | (SVrm = 0b1100) then
              SVSHAPE0[30:31] <- 0b11          # iDCT mode
              SVSHAPE0[18:20] <- 0b011         # iDCT Inner Butterfly sub-mode
@@ -223,6 +226,7 @@ Pseudo-code:
          # set up FRB and FRS
          SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
          SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim - "striding" (2D DCT)
+        mscale <- (0b0 || SVzd) + 1
          if (SVrm = 0b1011) then
              SVSHAPE0[30:31] <- 0b11      # iDCT mode
              SVSHAPE0[18:20] <- 0b011     # iDCT Outer Butterfly sub-mode
@@ -253,6 +257,7 @@ Pseudo-code:
          # set up FRB and FRS
          SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
          SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim - "striding" (2D DCT)
+        mscale <- (0b0 || SVzd) + 1
          SVSHAPE0[30:31] <- 0b01          # DCT/FFT mode
          SVSHAPE0[6:11] <- 0b000100       # DCT Inner Butterfly COS-gen mode
          if (SVrm = 0b0101) then
@@ -269,6 +274,7 @@ Pseudo-code:
          # set up template in SVSHAPE0
          SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
          SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim - "striding" (2D DCT)
+        mscale <- (0b0 || SVzd) + 1
          if (SVrm = 0b1110) then
              SVSHAPE0[18:20] <- 0b001     # DCT opposite half-swap
          if (SVrm = 0b1111) then
@@ -295,14 +301,17 @@ Pseudo-code:
          # set up template in SVSHAPE0, then copy to 1. only 2 needed
          SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
          SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim - "striding" (2D DCT)
+        mscale <- (0b0 || SVzd) + 1
          SVSHAPE0[30:31] <- 0b10          # parallel reduce submode
          # copy
          SVSHAPE1[0:31] <- SVSHAPE0[0:31]
          # set up right operand (left operand 28:29 is zero)
          SVSHAPE1[28:29] <- 0b01           # right operand
      # set VL, MVL and Vertical-First
-    SVSTATE[0:6] <- vlen
-    SVSTATE[7:13] <- vlen
+    m[0:12] <- vlen * mscale
+    maxvl[0:6] <- m[6:12]
+    SVSTATE[0:6] <- vlen    # VL
+    SVSTATE[7:13] <- maxvl  # MAVXL
      SVSTATE[63] <- vf
  
  Special Registers Altered:
diff --git a/src/openpower/decoder/isa/test_caller_svp64_dct.py b/src/openpower/decoder/isa/test_caller_svp64_dct.py

index f2abe0c60aae0f7da1e26e36437c685c8a35be35..436755e4a27a0ef81f37cee42874f8d311055fb4 100644 (file)
--- a/src/openpower/decoder/isa/test_caller_svp64_dct.py
+++ b/src/openpower/decoder/isa/test_caller_svp64_dct.py
@@ -313,10 +313,10 @@ class DCTTestCase(FHDLTestCase):
                  self.assertEqual(sim.fpr(i+0), t)
                  self.assertEqual(sim.fpr(i+4), u)
  
-    def test_sv_remap_fpmadds_dct_inner_4(self):
+    def test_sv_remap_fpmadds_dct_inner_4(self, stride=1):
          """>>> lst = ["svshape 4, 1, 1, 2, 0",
                       "svremap 27, 1, 0, 2, 0, 1, 0",
-                        "sv.fdmadds *0, *0, *0, *8"
+                        "sv.fdmadds *0, *0, *0, *32"
                       ]
              runs a full in-place 4-long O(N log2 N) inner butterfly schedule
              for DCT
@@ -328,9 +328,9 @@ class DCTTestCase(FHDLTestCase):
              is straight Vectorised (0123...) because DCT coefficients
              cannot be shared between butterfly layers (due to +0.5)
          """
-        lst = SVP64Asm( ["svshape 4, 1, 1, 2, 0",
+        lst = SVP64Asm( ["svshape 4, 1, %d, 2, 0" % stride,
                           "svremap 27, 1, 0, 2, 0, 1, 0",
-                         "sv.fdmadds *0, *0, *0, *8"
+                         "sv.fdmadds *0, *0, *0, *32"
                          ])
          lst = list(lst)
  
@@ -347,11 +347,11 @@ class DCTTestCase(FHDLTestCase):
          av = [av[ri[i]] for i in range(n)]
  
          # store in regfile
-        fprs = [0] * 32
+        fprs = [0] * 64
          for i, c in enumerate(coe):
-            fprs[i+8] = fp64toselectable(1.0 / c) # invert
+            fprs[i*stride+32] = fp64toselectable(1.0 / c) # invert
          for i, a in enumerate(av):
-            fprs[i+0] = fp64toselectable(a)
+            fprs[i*stride+0] = fp64toselectable(a)
  
          with Program(lst, bigendian=False) as program:
              sim = self.run_tst_program(program, initial_fprs=fprs)
@@ -367,12 +367,13 @@ class DCTTestCase(FHDLTestCase):
              res = transform_inner_radix2_dct(avi, coe)
  
              for i, expected in enumerate(res):
-                print ("i", i, float(sim.fpr(i)), "expected", expected)
+                print ("i", i*stride, float(sim.fpr(i*stride)),
+                       "expected", expected)
              for i, expected in enumerate(res):
                  # convert to Power single
                  expected = fph.DOUBLE2SINGLE(fp64toselectable(expected))
                  expected = float(expected)
-                actual = float(sim.fpr(i))
+                actual = float(sim.fpr(i*stride))
                  # approximate error calculation, good enough test
                  # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
                  # and the rounding is different
author	Luke Kenneth Casson Leighton <lkcl@lkcl.net>
	Wed, 21 Sep 2022 17:18:14 +0000 (18:18 +0100)
committer	Luke Kenneth Casson Leighton <lkcl@lkcl.net>
	Wed, 21 Sep 2022 17:18:14 +0000 (18:18 +0100)
openpower/isa/simplev.mdwn		patch \| blob \| history
src/openpower/decoder/isa/test_caller_svp64_dct.py		patch \| blob \| history