add new DCT inner butterfly shorter COS-gen mode unit test
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Tue, 27 Jul 2021 15:17:52 +0000 (16:17 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Tue, 27 Jul 2021 15:17:52 +0000 (16:17 +0100)
openpower/isa/simplev.mdwn
src/openpower/decoder/isa/svshape.py
src/openpower/decoder/isa/test_caller_svp64_dct.py

index 6af33bb89a42ecc64e41d9ce953775fba830f1a8..fd0c242a57b07a2eed36b5f7ffc423a536231917 100644 (file)
@@ -90,6 +90,7 @@ Pseudo-code:
 
     # for convenience, VL to be calculated and stored in SVSTATE
     vlen <- [0] * 7
+    itercount[0:6] <- [0] * 7
     SVSTATE[0:31] <- [0] * 32
     # only overwrite REMAP if "persistence" is zero
     if (SVSTATE[62] = 0b0) then
@@ -220,7 +221,7 @@ Pseudo-code:
         # set up FRB and FRS
         SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
         SVSHAPE0[30:31] <- 0b01          # DCT/FFT mode
-        SVSHAPE0[6:11] <- 0b000011       # DCT Inner Butterfly COS-gen mode
+        SVSHAPE0[6:11] <- 0b000100       # DCT Inner Butterfly COS-gen mode
         SVSHAPE0[21:23] <- 0b001         # "inverse" on outer loop
         # copy
         SVSHAPE1[0:31] <- SVSHAPE0[0:31]
index 75b392a028bbbc511947f92e30ff34743324dbc8..8275e05e1e2d888f46301d3c5a2a3725a83c2098 100644 (file)
@@ -125,7 +125,7 @@ class SVSHAPE(SelectableInt):
                 iterate_fn = iterate_dct_inner_butterfly_indices
             elif self.ydimsz == 3:
                 iterate_fn = iterate_dct_outer_butterfly_indices
-            elif self.ydimsz == 3:
+            elif self.ydimsz == 5:
                 iterate_fn = iterate_dct_inner_costable_indices
         # create a **NEW** iterator each time this is called
         return iterate_fn(deepcopy(self))
index dc0551a54993c70bafa097477edff8c0dab0899d..4b7c62b55bf884c934f7d31f06fc99164eb05cfe 100644 (file)
@@ -455,6 +455,81 @@ class DCTTestCase(FHDLTestCase):
                                         "err", err)
                 self.assertTrue(err < 1e-6)
 
+    def test_sv_remap_dct_cos_precompute_inner_8(self):
+        """pre-computes a DCT COS table, using the shorter costable
+        indices schedule
+
+        the simpler (scalar) version is in test_caller_transcendentals.py
+        (test_fp_coss_cvt), this is the SVP64 variant.  TODO: really
+        need the new version of fcfids which doesn't spam memory with
+        LD/STs.
+        """
+        lst = SVP64Asm(["svshape 8, 1, 1, 5, 0",
+                        "svremap 0, 0, 0, 2, 0, 1, 1",
+                        "sv.svstep 4.v, 3, 1", # svstep get vector of ci
+                        "sv.svstep 16.v, 2, 1", # svstep get vector of step
+                        "addi 1, 0, 0x0000",
+                        "setvl 0, 0, 7, 0, 1, 1",
+                        "sv.std 4.v, 0(1)",
+                        "sv.lfd  64.v, 0(1)",
+                        "sv.fcfids 48.v, 64.v",
+                        "addi 1, 0, 0x0060",
+                        "sv.std 16.v, 0(1)",
+                        "sv.lfd  12.v, 0(1)",
+                        "sv.fcfids 24.v, 12.v",
+                        "sv.fadds 0.v, 24.v, 43", # plus 0.5
+                        "sv.fmuls 0.v, 0.v, 41", # times PI
+                        "sv.fdivs 0.v, 0.v, 48.v", # div size
+                        "sv.fcoss 80.v, 0.v",
+                        "sv.fdivs 80.v, 43, 80.v", # div 0.5 / x
+                     ])
+        lst = list(lst)
+
+        gprs = [0] * 32
+        fprs = [0] * 128
+        # constants
+        fprs[43] = fp64toselectable(0.5)         # 0.5
+        fprs[41] = fp64toselectable(math.pi) # pi
+        fprs[44] = fp64toselectable(2.0)     # 2.0
+
+        n = 8
+
+        ctable = []
+        size = n
+        while size >= 2:
+            halfsize = size // 2
+            for ci in range(halfsize):
+                coeff = math.cos((ci + 0.5) * math.pi / size) * 2.0
+                ctable.append(coeff)
+                print ("coeff", "ci", ci, "size", size,
+                   "i/n", (ci+0.5), 1.0/coeff)
+            size //= 2
+
+        with Program(lst, bigendian=False) as program:
+            sim = self.run_tst_program(program, gprs, initial_fprs=fprs)
+            print ("MEM")
+            sim.mem.dump()
+            print ("ci FP")
+            for i in range(len(ctable)):
+                actual = float(sim.fpr(i+24))
+                print ("i", i, actual)
+            print ("size FP")
+            for i in range(len(ctable)):
+                actual = float(sim.fpr(i+48))
+                print ("i", i, actual)
+            print ("temps")
+            for i in range(len(ctable)):
+                actual = float(sim.fpr(i))
+                print ("i", i, actual)
+            for i in range(len(ctable)):
+                expected = 1.0/ctable[i]
+                actual = float(sim.fpr(i+80))
+                err = abs((actual - expected) / expected)
+                print ("i", i, actual, "1/expect", 1/expected,
+                                        "expected", expected,
+                                        "err", err)
+                self.assertTrue(err < 1e-6)
+
     def run_tst_program(self, prog, initial_regs=None,
                               svstate=None,
                               initial_mem=None,