reduce number of operands to ffmadds as well
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Fri, 28 Apr 2023 15:40:49 +0000 (16:40 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Fri, 28 Apr 2023 15:40:49 +0000 (16:40 +0100)
openpower/isa/svfparith.mdwn
openpower/isatables/RM-1P-3S1D.csv
openpower/isatables/minor_59.csv
src/openpower/decoder/isa/test_caller_svp64_dct.py
src/openpower/decoder/isa/test_caller_svp64_fft.py
src/openpower/decoder/power_insn.py
src/openpower/sv/sv_analysis.py

index 193d774d7711273e246db7543dfcaf53a6975aa8..49b43f4f495a20b782334f326dd0aa46039f5682 100644 (file)
@@ -183,13 +183,14 @@ Special Registers Altered:
 
 A-Form
 
-* ffmadds FRT,FRA,FRC,FRB (Rc=0)
-* ffmadds. FRT,FRA,FRC,FRB (Rc=1)
+* ffmadds FRT,FRA,FRB (Rc=0)
+* ffmadds. FRT,FRA,FRB (Rc=1)
 
 Pseudo-code:
 
-    FRT <- FPMULADD32(FRA, FRC, FRB, 1, 1)
-    FRS <- FPMULADD32(FRA, FRC, FRB, -1, 1)
+    tmp <- FRT
+    FRT <- FPMULADD32(tmp, FRA, FRB, 1, 1)
+    FRS <- FPMULADD32(tmp, FRA, FRB, -1, 1)
 
 Special Registers Altered:
 
index 3146c5ca31ffc39f13058d6ecd525702758ff90c..e1154917a17efc1ede12d8d8c6c3d9459f847cff 100644 (file)
@@ -46,7 +46,7 @@ dsrd.,NORMAL,,1P,EXTRA2,NO,d:RT;d:CR0,s:RA,s:RB,s:RC,RA,RB,RC,RT,0,CR0,0
 pcdec,NORMAL,,1P,EXTRA2,NO,d:RT;d:CR0,s:RA,s:RB,s:RC,RA,RB,RC,RT,0,CR0,0
 ternlogi,NORMAL,,1P,EXTRA2,NO,d:RT;d:CR0,s:RA,s:RB,s:RT,RA,RB,RT,RT,0,CR0,0
 ffmsubs,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRA,s:FRB,s:FRC,FRA,FRB,FRC,FRT,0,CR1,0
-ffmadds,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRA,s:FRB,s:FRC,FRA,FRB,FRC,FRT,0,CR1,0
+ffmadds,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRT,s:FRB,s:FRA,FRT,FRB,FRA,FRT,0,CR1,0
 ffnmsubs,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRA,s:FRB,s:FRC,FRA,FRB,FRC,FRT,0,CR1,0
 ffnmadds,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRA,s:FRB,s:FRC,FRA,FRB,FRC,FRT,0,CR1,0
 fdmadds,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRT,s:FRB,s:FRA,FRT,FRB,FRA,FRT,0,CR1,0
index 9aad016ddc100aa7a556f700aa5f5866b6083397..cf8622f8c96fcb1445b12b8b4c48c286f0301891 100644 (file)
@@ -15,7 +15,7 @@ opcode,unit,internal op,in1,in2,in3,out,CR in,CR out,inv A,inv out,cry in,cry ou
 -----11110,FPU,OP_FP_MADD,FRA,FRB,FRC,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,fnmsubs,A,,,
 -----11111,FPU,OP_FP_MADD,FRA,FRB,FRC,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,fnmadds,A,,,
 -----00100,FPU,OP_FP_MADD,FRA,FRB,FRC,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,ffmsubs,A,,1,3-in 2-out: implicit FRS. unofficial until submitted and approved/renumbered by the opf isa wg
------00101,FPU,OP_FP_MADD,FRA,FRB,FRC,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,ffmadds,A,,1,3-in 2-out: implicit FRS. unofficial until submitted and approved/renumbered by the opf isa wg
+-----00101,FPU,OP_FP_MADD,FRT,FRB,FRA,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,ffmadds,A,,1,3-in 2-out: implicit FRS. unofficial until submitted and approved/renumbered by the opf isa wg
 -----00110,FPU,OP_FP_MADD,FRA,FRB,FRC,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,ffnmsubs,A,,1,3-in 2-out: implicit FRS. unofficial until submitted and approved/renumbered by the opf isa wg
 -----00111,FPU,OP_FP_MADD,FRA,FRB,FRC,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,ffnmadds,A,,1,3-in 2-out: implicit FRS. unofficial until submitted and approved/renumbered by the opf isa wg
 1111100000,FPU,OP_FPOP,FRA,FRB,NONE,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,ffadds,X,,1,2-in 2-out: implicit FRS. unofficial until submitted and approved/renumbered by the opf isa wg
index b28705cde21c3f43bd79112271ad8e43e7e9be9b..0e3796b47a47a6aa80a8cb718a5684a84992a533 100644 (file)
@@ -308,7 +308,7 @@ class DCTTestCase(FHDLTestCase):
                 self.assertEqual(sim.fpr(i+0), t)
                 self.assertEqual(sim.fpr(i+4), u)
 
-    def tst_sv_remap_fpmadds_idct_outer_8(self, stride=2):
+    def test_sv_remap_fpmadds_idct_outer_8(self, stride=2):
         """>>> lst = ["svshape 8, 1, 1, 11, 0",
                      "svremap 27, 0, 1, 2, 1, 0, 0",
                          "sv.fadds *0, *0, *0"
@@ -367,7 +367,7 @@ class DCTTestCase(FHDLTestCase):
                 print("err", i, err)
                 self.assertTrue(err < 1e-6)
 
-    def tst_sv_remap_fpmadds_dct_outer_8(self, stride=2):
+    def test_sv_remap_fpmadds_dct_outer_8(self, stride=2):
         """>>> lst = ["svshape 8, 1, 1, 3, 0",
                      "svremap 27, 1, 0, 2, 0, 1, 0",
                          "sv.fadds *0, *0, *0"
@@ -419,7 +419,7 @@ class DCTTestCase(FHDLTestCase):
                 print("err", i, err)
                 self.assertTrue(err < 1e-6)
 
-    def tst_sv_remap_dct_cos_precompute_inner_8(self):
+    def test_sv_remap_dct_cos_precompute_inner_8(self):
         """pre-computes a DCT COS table, using the shorter costable
         indices schedule.  turns out, some COS values are repeated
         in each layer of the DCT butterfly.
@@ -657,7 +657,7 @@ class DCTTestCase(FHDLTestCase):
                 print("err", i, err)
                 self.assertTrue(err < 1e-5)
 
-    def tst_sv_remap_fpmadds_ldbrev_idct_8_mode_4(self):
+    def test_sv_remap_fpmadds_ldbrev_idct_8_mode_4(self):
         """>>> lst = [# LOAD bit-reversed with half-swap
                       "svshape 8, 1, 1, 14, 0",
                       "svremap 1, 0, 0, 0, 0, 0, 0",
@@ -668,7 +668,7 @@ class DCTTestCase(FHDLTestCase):
                       "sv.fadds *0, *0, *0",
                       # Inner butterfly, twin +/- MUL-ADD-SUB
                       "svshape 8, 1, 1, 12, 0",
-                      "sv.ffmadds *0, *0, *0, *8"
+                      "sv.ffmadds *0, *8, *0"
                      ]
             runs a full in-place 8-long O(N log2 N) Inverse-DCT, both
             inner and outer butterfly "REMAP" schedules, and using
@@ -684,7 +684,7 @@ class DCTTestCase(FHDLTestCase):
                         "svshape 8, 1, 1, 11, 0",
                         "sv.fadds *0, *0, *0",
                         "svshape 8, 1, 1, 12, 0",
-                        "sv.ffmadds *0, *0, *0, *8"
+                        "sv.ffmadds *0, *8, *0"
                         ])
         lst = list(lst)
 
index 14df7a31b02733169e157764b12201c6611f514d..649918a0b529a5e1e38a9673e34843ff86130e9e 100644 (file)
@@ -137,7 +137,7 @@ class FFTTestCase(FHDLTestCase):
     def test_sv_remap_fpmadds_fft_4(self):
         """>>> lst = ["svshape 2, 1, 1, 1, 0",
                      "svremap 31, 1, 0, 2, 0, 1, 0",
-                      "sv.ffmadds. *2, *2, *2, *10"
+                      "sv.ffmadds. *2, *10, *2"
                      ]
         this is a cheap (cheating) way to run a single "ffmadds." to
         get at least Rc=1 on sv.ffmadds to be activated. the results
@@ -146,7 +146,7 @@ class FFTTestCase(FHDLTestCase):
         """
         lst = SVP64Asm(["svshape 2, 1, 1, 1, 0",
                         "svremap 31, 1, 0, 2, 0, 1, 0",
-                        "sv.ffmadds *0, *0, *0, *8"
+                        "sv.ffmadds *0, *8, *0"
                         ])
         lst = list(lst)
 
@@ -190,7 +190,7 @@ class FFTTestCase(FHDLTestCase):
     def test_sv_remap_fpmadds_fft(self):
         """>>> lst = ["svshape 8, 1, 1, 1, 0",
                      "svremap 31, 1, 0, 2, 0, 1, 0",
-                      "sv.ffmadds *2, *2, *2, *10"
+                      "sv.ffmadds *2, *10, *2"
                      ]
             runs a full in-place O(N log2 N) butterfly schedule for
             Discrete Fourier Transform.
@@ -206,7 +206,7 @@ class FFTTestCase(FHDLTestCase):
         """
         lst = SVP64Asm(["svshape 8, 1, 1, 1, 0",
                         "svremap 31, 1, 0, 2, 0, 1, 0",
-                        "sv.ffmadds *0, *0, *0, *8"
+                        "sv.ffmadds *0, *8, *0"
                         ])
         lst = list(lst)
 
@@ -252,7 +252,7 @@ class FFTTestCase(FHDLTestCase):
         """>>> lst = SVP64Asm( [
                             "svshape 8, 1, 1, 1, 1",
                              "svremap 31, 1, 0, 2, 0, 1, 0",
-                            "sv.ffmadds *0, *0, *0, *8",
+                            "sv.ffmadds *0, *8, *0",
                             "svstep. 12, 1, 0",
                             "bc 6, 3, -16"
                             ])
@@ -267,7 +267,7 @@ class FFTTestCase(FHDLTestCase):
         lst = SVP64Asm([
             "svshape 8, 1, 1, 1, 1",
             "svremap 31, 1, 0, 2, 0, 1, 0",
-            "sv.ffmadds *0, *0, *0, *8",
+            "sv.ffmadds *0, *8, *0",
             "svstep. 27, 1, 0",
             "bc 6, 3, -16"
         ])
@@ -443,7 +443,7 @@ class FFTTestCase(FHDLTestCase):
                 self.assertTrue(err < 1e-7)
 
     def test_sv_fpmadds_fft(self):
-        """>>> lst = ["sv.ffmadds *2, *2, *2, *10"
+        """>>> lst = ["sv.ffmadds *2, *10, *2"
                         ]
             four in-place vector mul-adds, four in-place vector mul-subs
 
@@ -461,7 +461,7 @@ class FFTTestCase(FHDLTestCase):
                 fnmsubs FRT+vl, FRA, FRC, FRB+vl
 
         """
-        lst = SVP64Asm(["sv.ffmadds *2, *2, *2, *10"
+        lst = SVP64Asm(["sv.ffmadds *2, *10, *2"
                         ])
         lst = list(lst)
 
@@ -748,7 +748,7 @@ class FFTTestCase(FHDLTestCase):
                          "sv.lfs/els *0, 4(0)", 
                          "svshape 8, 1, 1, 1, 0",
                          "svremap 31, 1, 0, 2, 0, 1, 0",
-                         "sv.ffmadds *0, *0, *0, *8"
+                         "sv.ffmadds *0, *8, *0"
 
             runs a full in-place O(N log2 N) butterfly schedule for
             Discrete Fourier Transform, using bit-reversed LD/ST
@@ -758,7 +758,7 @@ class FFTTestCase(FHDLTestCase):
                         "sv.lfs/els *0, 4(0)",
                         "svshape 8, 1, 1, 1, 0",
                         "svremap 31, 1, 0, 2, 0, 1, 0",
-                        "sv.ffmadds *0, *0, *0, *8"
+                        "sv.ffmadds *0, *8, *0"
                         ])
         lst = list(lst)
 
index 1d22fa84fd87377aa6c06cf306e8dd690c977f46..483b7ac52c8a223aa4842f002b39a725cd0ebd1a 100644 (file)
@@ -618,8 +618,8 @@ class Operands:
             # The operands in the assembly syntax are FRT,FRA,FRC,FRB.
             # The real assembly order, however, is FRT,FRA,FRB,FRC.
             # The legacy assembler placed operands in syntax order.
-            "ffmadds": {"FRB": FMAOperandFRB, "FRC": FMAOperandFRC},
-            "ffmadds.": {"FRB": FMAOperandFRB, "FRC": FMAOperandFRC},
+            #"ffmadds": {"FRB": FMAOperandFRB, "FRC": FMAOperandFRC},
+            #"ffmadds.": {"FRB": FMAOperandFRB, "FRC": FMAOperandFRC},
             #"fdmadds": {"FRB": FMAOperandFRB, "FRC": FMAOperandFRC},
             #"fdmadds.": {"FRB": FMAOperandFRB, "FRC": FMAOperandFRC},
         }
index 69780eb9b7cc03d17019eb8f920e5e028c27057b..d0c6128bf9b2f034f13a5e2b109fed16b21f5f10 100644 (file)
@@ -597,7 +597,7 @@ def extra_classifier(insn_name, value, name, res, regs):
 
     elif value == 'RM-1P-3S1D':
         res['Etype'] = 'EXTRA2'  # RM EXTRA2 type
-        if regs == ['FRT', 'FRB', 'FRA', 'FRT', '', 'CR1']:  # fdmadds
+        if regs == ['FRT', 'FRB', 'FRA', 'FRT', '', 'CR1']:  # ffmadds/fdmadds
             res['0'] = 'd:FRT;d:CR1'  # FRT,CR1: Rdest1_EXTRA2
             res['1'] = 's:FRT'  # FRT: Rsrc1_EXTRA2
             res['2'] = 's:FRB'  # FRB: Rsrc2_EXTRA2