From 6fe2b6ccc37181c0f416df3706110ea609377746 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Fri, 28 Apr 2023 16:40:49 +0100 Subject: [PATCH] reduce number of operands to ffmadds as well --- openpower/isa/svfparith.mdwn | 9 +++++---- openpower/isatables/RM-1P-3S1D.csv | 2 +- openpower/isatables/minor_59.csv | 2 +- .../decoder/isa/test_caller_svp64_dct.py | 12 +++++------ .../decoder/isa/test_caller_svp64_fft.py | 20 +++++++++---------- src/openpower/decoder/power_insn.py | 4 ++-- src/openpower/sv/sv_analysis.py | 2 +- 7 files changed, 26 insertions(+), 25 deletions(-) diff --git a/openpower/isa/svfparith.mdwn b/openpower/isa/svfparith.mdwn index 193d774d..49b43f4f 100644 --- a/openpower/isa/svfparith.mdwn +++ b/openpower/isa/svfparith.mdwn @@ -183,13 +183,14 @@ Special Registers Altered: A-Form -* ffmadds FRT,FRA,FRC,FRB (Rc=0) -* ffmadds. FRT,FRA,FRC,FRB (Rc=1) +* ffmadds FRT,FRA,FRB (Rc=0) +* ffmadds. FRT,FRA,FRB (Rc=1) Pseudo-code: - FRT <- FPMULADD32(FRA, FRC, FRB, 1, 1) - FRS <- FPMULADD32(FRA, FRC, FRB, -1, 1) + tmp <- FRT + FRT <- FPMULADD32(tmp, FRA, FRB, 1, 1) + FRS <- FPMULADD32(tmp, FRA, FRB, -1, 1) Special Registers Altered: diff --git a/openpower/isatables/RM-1P-3S1D.csv b/openpower/isatables/RM-1P-3S1D.csv index 3146c5ca..e1154917 100644 --- a/openpower/isatables/RM-1P-3S1D.csv +++ b/openpower/isatables/RM-1P-3S1D.csv @@ -46,7 +46,7 @@ dsrd.,NORMAL,,1P,EXTRA2,NO,d:RT;d:CR0,s:RA,s:RB,s:RC,RA,RB,RC,RT,0,CR0,0 pcdec,NORMAL,,1P,EXTRA2,NO,d:RT;d:CR0,s:RA,s:RB,s:RC,RA,RB,RC,RT,0,CR0,0 ternlogi,NORMAL,,1P,EXTRA2,NO,d:RT;d:CR0,s:RA,s:RB,s:RT,RA,RB,RT,RT,0,CR0,0 ffmsubs,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRA,s:FRB,s:FRC,FRA,FRB,FRC,FRT,0,CR1,0 -ffmadds,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRA,s:FRB,s:FRC,FRA,FRB,FRC,FRT,0,CR1,0 +ffmadds,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRT,s:FRB,s:FRA,FRT,FRB,FRA,FRT,0,CR1,0 ffnmsubs,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRA,s:FRB,s:FRC,FRA,FRB,FRC,FRT,0,CR1,0 ffnmadds,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRA,s:FRB,s:FRC,FRA,FRB,FRC,FRT,0,CR1,0 fdmadds,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRT,s:FRB,s:FRA,FRT,FRB,FRA,FRT,0,CR1,0 diff --git a/openpower/isatables/minor_59.csv b/openpower/isatables/minor_59.csv index 9aad016d..cf8622f8 100644 --- a/openpower/isatables/minor_59.csv +++ b/openpower/isatables/minor_59.csv @@ -15,7 +15,7 @@ opcode,unit,internal op,in1,in2,in3,out,CR in,CR out,inv A,inv out,cry in,cry ou -----11110,FPU,OP_FP_MADD,FRA,FRB,FRC,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,fnmsubs,A,,, -----11111,FPU,OP_FP_MADD,FRA,FRB,FRC,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,fnmadds,A,,, -----00100,FPU,OP_FP_MADD,FRA,FRB,FRC,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,ffmsubs,A,,1,3-in 2-out: implicit FRS. unofficial until submitted and approved/renumbered by the opf isa wg ------00101,FPU,OP_FP_MADD,FRA,FRB,FRC,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,ffmadds,A,,1,3-in 2-out: implicit FRS. unofficial until submitted and approved/renumbered by the opf isa wg +-----00101,FPU,OP_FP_MADD,FRT,FRB,FRA,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,ffmadds,A,,1,3-in 2-out: implicit FRS. unofficial until submitted and approved/renumbered by the opf isa wg -----00110,FPU,OP_FP_MADD,FRA,FRB,FRC,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,ffnmsubs,A,,1,3-in 2-out: implicit FRS. unofficial until submitted and approved/renumbered by the opf isa wg -----00111,FPU,OP_FP_MADD,FRA,FRB,FRC,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,ffnmadds,A,,1,3-in 2-out: implicit FRS. unofficial until submitted and approved/renumbered by the opf isa wg 1111100000,FPU,OP_FPOP,FRA,FRB,NONE,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,ffadds,X,,1,2-in 2-out: implicit FRS. unofficial until submitted and approved/renumbered by the opf isa wg diff --git a/src/openpower/decoder/isa/test_caller_svp64_dct.py b/src/openpower/decoder/isa/test_caller_svp64_dct.py index b28705cd..0e3796b4 100644 --- a/src/openpower/decoder/isa/test_caller_svp64_dct.py +++ b/src/openpower/decoder/isa/test_caller_svp64_dct.py @@ -308,7 +308,7 @@ class DCTTestCase(FHDLTestCase): self.assertEqual(sim.fpr(i+0), t) self.assertEqual(sim.fpr(i+4), u) - def tst_sv_remap_fpmadds_idct_outer_8(self, stride=2): + def test_sv_remap_fpmadds_idct_outer_8(self, stride=2): """>>> lst = ["svshape 8, 1, 1, 11, 0", "svremap 27, 0, 1, 2, 1, 0, 0", "sv.fadds *0, *0, *0" @@ -367,7 +367,7 @@ class DCTTestCase(FHDLTestCase): print("err", i, err) self.assertTrue(err < 1e-6) - def tst_sv_remap_fpmadds_dct_outer_8(self, stride=2): + def test_sv_remap_fpmadds_dct_outer_8(self, stride=2): """>>> lst = ["svshape 8, 1, 1, 3, 0", "svremap 27, 1, 0, 2, 0, 1, 0", "sv.fadds *0, *0, *0" @@ -419,7 +419,7 @@ class DCTTestCase(FHDLTestCase): print("err", i, err) self.assertTrue(err < 1e-6) - def tst_sv_remap_dct_cos_precompute_inner_8(self): + def test_sv_remap_dct_cos_precompute_inner_8(self): """pre-computes a DCT COS table, using the shorter costable indices schedule. turns out, some COS values are repeated in each layer of the DCT butterfly. @@ -657,7 +657,7 @@ class DCTTestCase(FHDLTestCase): print("err", i, err) self.assertTrue(err < 1e-5) - def tst_sv_remap_fpmadds_ldbrev_idct_8_mode_4(self): + def test_sv_remap_fpmadds_ldbrev_idct_8_mode_4(self): """>>> lst = [# LOAD bit-reversed with half-swap "svshape 8, 1, 1, 14, 0", "svremap 1, 0, 0, 0, 0, 0, 0", @@ -668,7 +668,7 @@ class DCTTestCase(FHDLTestCase): "sv.fadds *0, *0, *0", # Inner butterfly, twin +/- MUL-ADD-SUB "svshape 8, 1, 1, 12, 0", - "sv.ffmadds *0, *0, *0, *8" + "sv.ffmadds *0, *8, *0" ] runs a full in-place 8-long O(N log2 N) Inverse-DCT, both inner and outer butterfly "REMAP" schedules, and using @@ -684,7 +684,7 @@ class DCTTestCase(FHDLTestCase): "svshape 8, 1, 1, 11, 0", "sv.fadds *0, *0, *0", "svshape 8, 1, 1, 12, 0", - "sv.ffmadds *0, *0, *0, *8" + "sv.ffmadds *0, *8, *0" ]) lst = list(lst) diff --git a/src/openpower/decoder/isa/test_caller_svp64_fft.py b/src/openpower/decoder/isa/test_caller_svp64_fft.py index 14df7a31..649918a0 100644 --- a/src/openpower/decoder/isa/test_caller_svp64_fft.py +++ b/src/openpower/decoder/isa/test_caller_svp64_fft.py @@ -137,7 +137,7 @@ class FFTTestCase(FHDLTestCase): def test_sv_remap_fpmadds_fft_4(self): """>>> lst = ["svshape 2, 1, 1, 1, 0", "svremap 31, 1, 0, 2, 0, 1, 0", - "sv.ffmadds. *2, *2, *2, *10" + "sv.ffmadds. *2, *10, *2" ] this is a cheap (cheating) way to run a single "ffmadds." to get at least Rc=1 on sv.ffmadds to be activated. the results @@ -146,7 +146,7 @@ class FFTTestCase(FHDLTestCase): """ lst = SVP64Asm(["svshape 2, 1, 1, 1, 0", "svremap 31, 1, 0, 2, 0, 1, 0", - "sv.ffmadds *0, *0, *0, *8" + "sv.ffmadds *0, *8, *0" ]) lst = list(lst) @@ -190,7 +190,7 @@ class FFTTestCase(FHDLTestCase): def test_sv_remap_fpmadds_fft(self): """>>> lst = ["svshape 8, 1, 1, 1, 0", "svremap 31, 1, 0, 2, 0, 1, 0", - "sv.ffmadds *2, *2, *2, *10" + "sv.ffmadds *2, *10, *2" ] runs a full in-place O(N log2 N) butterfly schedule for Discrete Fourier Transform. @@ -206,7 +206,7 @@ class FFTTestCase(FHDLTestCase): """ lst = SVP64Asm(["svshape 8, 1, 1, 1, 0", "svremap 31, 1, 0, 2, 0, 1, 0", - "sv.ffmadds *0, *0, *0, *8" + "sv.ffmadds *0, *8, *0" ]) lst = list(lst) @@ -252,7 +252,7 @@ class FFTTestCase(FHDLTestCase): """>>> lst = SVP64Asm( [ "svshape 8, 1, 1, 1, 1", "svremap 31, 1, 0, 2, 0, 1, 0", - "sv.ffmadds *0, *0, *0, *8", + "sv.ffmadds *0, *8, *0", "svstep. 12, 1, 0", "bc 6, 3, -16" ]) @@ -267,7 +267,7 @@ class FFTTestCase(FHDLTestCase): lst = SVP64Asm([ "svshape 8, 1, 1, 1, 1", "svremap 31, 1, 0, 2, 0, 1, 0", - "sv.ffmadds *0, *0, *0, *8", + "sv.ffmadds *0, *8, *0", "svstep. 27, 1, 0", "bc 6, 3, -16" ]) @@ -443,7 +443,7 @@ class FFTTestCase(FHDLTestCase): self.assertTrue(err < 1e-7) def test_sv_fpmadds_fft(self): - """>>> lst = ["sv.ffmadds *2, *2, *2, *10" + """>>> lst = ["sv.ffmadds *2, *10, *2" ] four in-place vector mul-adds, four in-place vector mul-subs @@ -461,7 +461,7 @@ class FFTTestCase(FHDLTestCase): fnmsubs FRT+vl, FRA, FRC, FRB+vl """ - lst = SVP64Asm(["sv.ffmadds *2, *2, *2, *10" + lst = SVP64Asm(["sv.ffmadds *2, *10, *2" ]) lst = list(lst) @@ -748,7 +748,7 @@ class FFTTestCase(FHDLTestCase): "sv.lfs/els *0, 4(0)", "svshape 8, 1, 1, 1, 0", "svremap 31, 1, 0, 2, 0, 1, 0", - "sv.ffmadds *0, *0, *0, *8" + "sv.ffmadds *0, *8, *0" runs a full in-place O(N log2 N) butterfly schedule for Discrete Fourier Transform, using bit-reversed LD/ST @@ -758,7 +758,7 @@ class FFTTestCase(FHDLTestCase): "sv.lfs/els *0, 4(0)", "svshape 8, 1, 1, 1, 0", "svremap 31, 1, 0, 2, 0, 1, 0", - "sv.ffmadds *0, *0, *0, *8" + "sv.ffmadds *0, *8, *0" ]) lst = list(lst) diff --git a/src/openpower/decoder/power_insn.py b/src/openpower/decoder/power_insn.py index 1d22fa84..483b7ac5 100644 --- a/src/openpower/decoder/power_insn.py +++ b/src/openpower/decoder/power_insn.py @@ -618,8 +618,8 @@ class Operands: # The operands in the assembly syntax are FRT,FRA,FRC,FRB. # The real assembly order, however, is FRT,FRA,FRB,FRC. # The legacy assembler placed operands in syntax order. - "ffmadds": {"FRB": FMAOperandFRB, "FRC": FMAOperandFRC}, - "ffmadds.": {"FRB": FMAOperandFRB, "FRC": FMAOperandFRC}, + #"ffmadds": {"FRB": FMAOperandFRB, "FRC": FMAOperandFRC}, + #"ffmadds.": {"FRB": FMAOperandFRB, "FRC": FMAOperandFRC}, #"fdmadds": {"FRB": FMAOperandFRB, "FRC": FMAOperandFRC}, #"fdmadds.": {"FRB": FMAOperandFRB, "FRC": FMAOperandFRC}, } diff --git a/src/openpower/sv/sv_analysis.py b/src/openpower/sv/sv_analysis.py index 69780eb9..d0c6128b 100644 --- a/src/openpower/sv/sv_analysis.py +++ b/src/openpower/sv/sv_analysis.py @@ -597,7 +597,7 @@ def extra_classifier(insn_name, value, name, res, regs): elif value == 'RM-1P-3S1D': res['Etype'] = 'EXTRA2' # RM EXTRA2 type - if regs == ['FRT', 'FRB', 'FRA', 'FRT', '', 'CR1']: # fdmadds + if regs == ['FRT', 'FRB', 'FRA', 'FRT', '', 'CR1']: # ffmadds/fdmadds res['0'] = 'd:FRT;d:CR1' # FRT,CR1: Rdest1_EXTRA2 res['1'] = 's:FRT' # FRT: Rsrc1_EXTRA2 res['2'] = 's:FRB' # FRB: Rsrc2_EXTRA2 -- 2.30.2