get butterfly RADIX2 SVP64 example working, breaks the fpmadds one though
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Wed, 7 Jul 2021 15:27:46 +0000 (16:27 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Wed, 7 Jul 2021 15:27:46 +0000 (16:27 +0100)
openpower/isa/svfparith.mdwn
src/openpower/decoder/isa/caller.py
src/openpower/decoder/isa/test_caller_svp64_fft.py

index 813ee2c5469d4484abe56a564546cdb2e5f1517d..70a246c86def4c2a9c74500858bb9b05a9ecf38c 100644 (file)
@@ -166,7 +166,7 @@ A-Form
 
 Pseudo-code:
 
-    FRT <- FPMULADD32(FRA, FRC, FRA, 1, 1)
+    FRT <- FPMULADD32(FRA, FRC, FRB, 1, 1)
     FRS <- FPMULADD32(FRA, FRC, FRB, -1, 1)
 
 Special Registers Altered:
index d0b80550190bc8b12b8f338654019a47516d542e..aeefe2575d8e8b399ba412de2c59ae443f400932 100644 (file)
@@ -1252,9 +1252,9 @@ class ISACaller:
                 if shape.mode == 0b01:
                     if i == 0:
                         yield self.dec2.o_step.eq(remap_idx)   # RT
-                        yield self.dec2.in1_step.eq(remap_idx) # RA
-                    elif i == 1:
                         yield self.dec2.in2_step.eq(remap_idx) # RB
+                    elif i == 1:
+                        yield self.dec2.in1_step.eq(remap_idx) # RA
                         yield self.dec2.o2_step.eq(remap_idx)  # EA (FRS)
                     elif i == 2:
                         yield self.dec2.in3_step.eq(remap_idx) # RC
index 23bd38e58176b7c8f0054071104492deadf33e96..3001050b244619ae996690190eeecad1101b096d 100644 (file)
@@ -51,12 +51,14 @@ def transform_radix2(vec, exptable):
                 # exact same actual computation, just embedded in
                 # triple-nested for-loops
                 jl, jh = j, j+halfsize
+                vjh = vec[jh]
                 temp1 = vec[jh] * exptable[k]
                 temp2 = vec[jl]
                 vec[jh] = temp2 - temp1
                 vec[jl] = temp2 + temp1
-                print ("transform_radix2 jl jh k", jl, jh, k,
-                       "temp1, temp2", temp1, temp2,
+                print ("xform jl jh k", jl, jh, k,
+                       "vj vjh ek", temp2, vjh, exptable[k],
+                       "t1, t2", temp1, temp2,
                        "v[jh] v[jl]", vec[jh], vec[jl])
                 k += tablestep
         size *= 2
@@ -103,9 +105,6 @@ class DecoderTestCase(FHDLTestCase):
         for i, a in enumerate(av):
             fprs[i+0] = fp64toselectable(a)
 
-        # work out the results with the twin mul/add-sub
-        res = transform_radix2(av, coe)
-
         # set total. err don't know how to calculate how many there are...
         # do it manually for now
         VL = 0
@@ -135,12 +134,22 @@ class DecoderTestCase(FHDLTestCase):
             print ("spr svshape1", sim.spr['SVSHAPE1'])
             print ("spr svshape2", sim.spr['SVSHAPE2'])
             print ("spr svshape3", sim.spr['SVSHAPE3'])
+
+            # work out the results with the twin mul/add-sub
+            res = transform_radix2(av, coe)
+
             for i, expected in enumerate(res):
                 print ("i", i, float(sim.fpr(i)), "expected", expected)
             for i, expected in enumerate(res):
                 # convert to Power single
                 expected = DOUBLE2SINGLE(fp64toselectable(expected))
-                self.assertEqual(sim.fpr(i), expected)
+                expected = float(expected)
+                actual = float(sim.fpr(i))
+                # approximate error calculation, good enough test
+                # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
+                # and the rounding is different
+                err = abs(actual - expected) / expected
+                self.assertTrue(err < 1e-7)
 
 
     def test_sv_fpmadds_fft(self):