From f07887871e71669b6cce2dc2aaac53f2b02a4621 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Fri, 28 Apr 2023 08:53:40 +0100 Subject: [PATCH] reduce fdmadds down to only 3 operands, RT-overwrite, to save on operand space. it is still 3-in 2-out but FRC is now FRA --- openpower/isa/svfparith.mdwn | 12 ++++++------ openpower/isatables/RM-1P-3S1D.csv | 2 +- openpower/isatables/fields.text | 15 ++++++++++----- openpower/isatables/minor_59.csv | 2 +- src/openpower/decoder/isa/caller.py | 11 +++++++++++ .../decoder/isa/test_caller_svp64_dct.py | 18 +++++++++--------- src/openpower/decoder/power_decoder2.py | 13 +++++++++++++ src/openpower/decoder/power_enums.py | 7 ++++++- src/openpower/decoder/power_insn.py | 6 +++--- src/openpower/sv/sv_analysis.py | 7 ++++++- 10 files changed, 66 insertions(+), 27 deletions(-) diff --git a/openpower/isa/svfparith.mdwn b/openpower/isa/svfparith.mdwn index a7b05457..193d774d 100644 --- a/openpower/isa/svfparith.mdwn +++ b/openpower/isa/svfparith.mdwn @@ -161,16 +161,16 @@ Special Registers Altered: # [DRAFT] Floating Twin Multiply-Add DCT [Single] -A-Form +DCT-Form -* fdmadds FRT,FRA,FRC,FRB (Rc=0) -* fdmadds. FRT,FRA,FRC,FRB (Rc=1) +* fdmadds FRT,FRA,FRB (Rc=0) +* fdmadds. FRT,FRA,FRB (Rc=1) Pseudo-code: - FRS <- FPADD32(FRA, FRB) - sub <- FPSUB32(FRA, FRB) - FRT <- FPMUL32(FRC, sub) + FRS <- FPADD32(FRT, FRB) + sub <- FPSUB32(FRT, FRB) + FRT <- FPMUL32(FRA, sub) Special Registers Altered: diff --git a/openpower/isatables/RM-1P-3S1D.csv b/openpower/isatables/RM-1P-3S1D.csv index 526af3f9..3146c5ca 100644 --- a/openpower/isatables/RM-1P-3S1D.csv +++ b/openpower/isatables/RM-1P-3S1D.csv @@ -49,7 +49,7 @@ ffmsubs,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRA,s:FRB,s:FRC,FRA,FRB,FRC,FRT,0,CR1 ffmadds,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRA,s:FRB,s:FRC,FRA,FRB,FRC,FRT,0,CR1,0 ffnmsubs,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRA,s:FRB,s:FRC,FRA,FRB,FRC,FRT,0,CR1,0 ffnmadds,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRA,s:FRB,s:FRC,FRA,FRB,FRC,FRT,0,CR1,0 -fdmadds,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRA,s:FRB,s:FRC,FRA,FRB,FRC,FRT,0,CR1,0 +fdmadds,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRT,s:FRB,s:FRA,FRT,FRB,FRA,FRT,0,CR1,0 fmsubs,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRA,s:FRB,s:FRC,FRA,FRB,FRC,FRT,0,CR1,0 fmadds,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRA,s:FRB,s:FRC,FRA,FRB,FRC,FRT,0,CR1,0 fnmsubs,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRA,s:FRB,s:FRC,FRA,FRB,FRC,FRT,0,CR1,0 diff --git a/openpower/isatables/fields.text b/openpower/isatables/fields.text index b0f91cae..8c90cad7 100644 --- a/openpower/isatables/fields.text +++ b/openpower/isatables/fields.text @@ -135,6 +135,11 @@ | PO | FRT | FRA | FRB | XO | Rc | | PO | FRT | FRA | RB | XO | Rc | +# 1.6.7.1 DCT-FORM + + |0 |6 |11 |16 |21 |26 |31 | + | PO | FRT | FRA | FRB | // | XO | Rc | + # 1.6.8 XL-FORM |0 |6 |9 |11 |14 |16 |19|20|21 |31 | | PO | BT | BA | BB | XO | / | @@ -534,7 +539,7 @@ FRA (11:15) Field used to specify a FPR to be used as a source. - Formats: A, MM, X, Z22, Z23 + Formats: A, MM, X, Z22, Z23, DCT FRAp (11:15) Field used to specify an even/odd pair of FPRs to be concatenated and used as a source. @@ -542,7 +547,7 @@ FRB (16:20) Field used to specify an FPR to be used as a source. - Formats: A, MM, X, XFL, Z23 + Formats: A, MM, X, XFL, Z23, DCT FRBp (16:20) Field used to specify an even/odd pair of FPRs to be concatenated and used as a source. @@ -562,7 +567,7 @@ FRT (6:10) Field used to specify an FPR to be used as a tar- get. - Formats: A, D, MM, X, Z22, Z23 + Formats: A, D, MM, X, Z22, Z23, DCT FRTp (6:10) Field used to specify an even/odd pair of FPRs to be concatenated and used as a target. @@ -769,7 +774,7 @@ 1 Set Condition Register Field 0 or Field 1 as described in Section 2.3.1, 'Condition Regis- ter' on page 30. - Formats: A, M, MD, MDS, MM, VA2, X, XFL, XO, XS, Z22, Z23, SVL, XB, TLI + Formats: A, M, MD, MDS, MM, VA2, X, XFL, XO, XS, Z22, Z23, SVL, XB, TLI, DCT RIC (12:13) Field used to specify what types of entries to inval- idate for tlbie[l]. @@ -1070,7 +1075,7 @@ Formats: XX4 XO (26:30) Extended opcode field. - Formats: A, DX, VA2, SVL, CRB + Formats: A, DX, VA2, SVL, CRB, DCT XO (26:31) Extended opcode field. Formats: VA, SVM, SVRM, SVI diff --git a/openpower/isatables/minor_59.csv b/openpower/isatables/minor_59.csv index 09ccb05c..9aad016d 100644 --- a/openpower/isatables/minor_59.csv +++ b/openpower/isatables/minor_59.csv @@ -19,7 +19,7 @@ opcode,unit,internal op,in1,in2,in3,out,CR in,CR out,inv A,inv out,cry in,cry ou -----00110,FPU,OP_FP_MADD,FRA,FRB,FRC,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,ffnmsubs,A,,1,3-in 2-out: implicit FRS. unofficial until submitted and approved/renumbered by the opf isa wg -----00111,FPU,OP_FP_MADD,FRA,FRB,FRC,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,ffnmadds,A,,1,3-in 2-out: implicit FRS. unofficial until submitted and approved/renumbered by the opf isa wg 1111100000,FPU,OP_FPOP,FRA,FRB,NONE,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,ffadds,X,,1,2-in 2-out: implicit FRS. unofficial until submitted and approved/renumbered by the opf isa wg ------11011,FPU,OP_FP_MADD,FRA,FRB,FRC,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,fdmadds,A,,1,3-in 2-out: implicit FRS. unofficial until submitted and approved/renumbered by the opf isa wg +-----11011,FPU,OP_FP_MADD,FRT,FRB,FRA,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,fdmadds,DCT,,1,3-in 2-out: implicit FRS. unofficial until submitted and approved/renumbered by the opf isa wg 1001001110,FPU,OP_FPOP,FRA,FRB,NONE,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,fatan2s,X,,1,unofficial until submitted and approved/renumbered by the opf isa wg 1000001110,FPU,OP_FPOP,FRA,FRB,NONE,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,fatan2pis,X,,1,unofficial until submitted and approved/renumbered by the opf isa wg 1111101101,FPU,OP_FPOP,FRA,FRB,NONE,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,fpows,X,,1,unofficial until submitted and approved/renumbered by the opf isa wg diff --git a/src/openpower/decoder/isa/caller.py b/src/openpower/decoder/isa/caller.py index a8bb8487..2e6b6427 100644 --- a/src/openpower/decoder/isa/caller.py +++ b/src/openpower/decoder/isa/caller.py @@ -431,6 +431,8 @@ def get_idx_map(dec2, name): elif name == 'FRA': if in1_sel == In1Sel.FRA.value: return 1 + if in3_sel == In3Sel.FRA.value: + return 3 elif name == 'FRB': if in2_sel == In2Sel.FRB.value: return 2 @@ -442,6 +444,12 @@ def get_idx_map(dec2, name): return 1 if in3_sel == In3Sel.FRS.value: return 3 + elif name == 'FRT': + if in1_sel == In1Sel.FRT.value: + return 1 + elif name == 'RT': + if in1_sel == In1Sel.RT.value: + return 1 return None @@ -577,6 +585,9 @@ def get_out_map(dec2, name): elif name == 'FRA': if out_sel == OutSel.FRA.value: return True + elif name == 'FRS': + if out_sel == OutSel.FRS.value: + return True elif name == 'FRT': if out_sel == OutSel.FRT.value: return True diff --git a/src/openpower/decoder/isa/test_caller_svp64_dct.py b/src/openpower/decoder/isa/test_caller_svp64_dct.py index 78e47529..b28705cd 100644 --- a/src/openpower/decoder/isa/test_caller_svp64_dct.py +++ b/src/openpower/decoder/isa/test_caller_svp64_dct.py @@ -259,7 +259,7 @@ class DCTTestCase(FHDLTestCase): fadds FRT , FRB, FRA fsubs FRT+vl, FRA, FRB+vl """ - lst = SVP64Asm(["sv.fdmadds *0, *0, *0, *8" + lst = SVP64Asm(["sv.fdmadds *0, *8, *0" ]) lst = list(lst) @@ -308,7 +308,7 @@ class DCTTestCase(FHDLTestCase): self.assertEqual(sim.fpr(i+0), t) self.assertEqual(sim.fpr(i+4), u) - def test_sv_remap_fpmadds_idct_outer_8(self, stride=2): + def tst_sv_remap_fpmadds_idct_outer_8(self, stride=2): """>>> lst = ["svshape 8, 1, 1, 11, 0", "svremap 27, 0, 1, 2, 1, 0, 0", "sv.fadds *0, *0, *0" @@ -367,7 +367,7 @@ class DCTTestCase(FHDLTestCase): print("err", i, err) self.assertTrue(err < 1e-6) - def test_sv_remap_fpmadds_dct_outer_8(self, stride=2): + def tst_sv_remap_fpmadds_dct_outer_8(self, stride=2): """>>> lst = ["svshape 8, 1, 1, 3, 0", "svremap 27, 1, 0, 2, 0, 1, 0", "sv.fadds *0, *0, *0" @@ -419,7 +419,7 @@ class DCTTestCase(FHDLTestCase): print("err", i, err) self.assertTrue(err < 1e-6) - def test_sv_remap_dct_cos_precompute_inner_8(self): + def tst_sv_remap_dct_cos_precompute_inner_8(self): """pre-computes a DCT COS table, using the shorter costable indices schedule. turns out, some COS values are repeated in each layer of the DCT butterfly. @@ -498,7 +498,7 @@ class DCTTestCase(FHDLTestCase): def test_sv_remap_fpmadds_dct_8_mode_4(self, stride=2): """>>> lst = ["svremap 31, 1, 0, 2, 0, 1, 1", "svshape 8, 1, 1, 4, 0", - "sv.fdmadds *0, *0, *0, *8" + "sv.fdmadds *0, *16, *0" "svshape 8, 1, 1, 3, 0", "sv.fadds *0, *0, *0" ] @@ -508,7 +508,7 @@ class DCTTestCase(FHDLTestCase): """ lst = SVP64Asm(["svremap 31, 1, 0, 2, 0, 1, 1", "svshape 8, 1, %d, 4, 0" % stride, - "sv.fdmadds *0, *0, *0, *16", + "sv.fdmadds *0, *16, *0", "svshape 8, 1, %d, 3, 0" % stride, "sv.fadds *0, *0, *0" ]) @@ -573,7 +573,7 @@ class DCTTestCase(FHDLTestCase): # Inner butterfly, twin +/- MUL-ADD-SUB "svremap 31, 1, 0, 2, 0, 1, 1", "svshape 8, 1, 1, 4, 0", - "sv.fdmadds *0, *0, *0, *8" + "sv.fdmadds *0, *32, *0" # Outer butterfly, iterative sum "svshape 8, 1, 1, 3, 0", "sv.fadds *0, *0, *0" @@ -590,7 +590,7 @@ class DCTTestCase(FHDLTestCase): "sv.lfs/els *0, 4(1)", "svremap 31, 1, 0, 2, 0, 1, 1", "svshape 8, 1, %d, 4, 0" % stride, - "sv.fdmadds *0, *0, *0, *32", + "sv.fdmadds *0, *32, *0", "svshape 8, 1, %d, 3, 0" % stride, "sv.fadds *0, *0, *0" ]) @@ -657,7 +657,7 @@ class DCTTestCase(FHDLTestCase): print("err", i, err) self.assertTrue(err < 1e-5) - def test_sv_remap_fpmadds_ldbrev_idct_8_mode_4(self): + def tst_sv_remap_fpmadds_ldbrev_idct_8_mode_4(self): """>>> lst = [# LOAD bit-reversed with half-swap "svshape 8, 1, 1, 14, 0", "svremap 1, 0, 0, 0, 0, 0, 0", diff --git a/src/openpower/decoder/power_decoder2.py b/src/openpower/decoder/power_decoder2.py index 2b9af402..a243d825 100644 --- a/src/openpower/decoder/power_decoder2.py +++ b/src/openpower/decoder/power_decoder2.py @@ -163,6 +163,13 @@ class DecodeA(Elaboratable): comb += reg.data.eq(frs) comb += reg.ok.eq(1) + # select Register FRT field, + frt = Signal(5, reset_less=True) + comb += frt.eq(self.dec.FRT) + with m.If(self.sel_in == In1Sel.FRT): + comb += reg.data.eq(frt) + comb += reg.ok.eq(1) + # decode Fast-SPR based on instruction type with m.Switch(op.internal_op): @@ -365,6 +372,9 @@ class DecodeC(Elaboratable): with m.Case(In3Sel.FRS): comb += reg.data.eq(self.dec.FRS) comb += reg.ok.eq(1) + with m.Case(In3Sel.FRA): + comb += reg.data.eq(self.dec.FRA) + comb += reg.ok.eq(1) with m.Case(In3Sel.FRC): comb += reg.data.eq(self.dec.FRC) comb += reg.ok.eq(1) @@ -415,6 +425,9 @@ class DecodeOut(Elaboratable): with m.Case(OutSel.FRS): comb += reg.data.eq(self.dec.FRS) comb += reg.ok.eq(1) + with m.Case(OutSel.FRA): + comb += reg.data.eq(self.dec.FRA) + comb += reg.ok.eq(1) with m.Case(OutSel.FRT): comb += reg.data.eq(self.dec.FRT) comb += reg.ok.eq(1) diff --git a/src/openpower/decoder/power_enums.py b/src/openpower/decoder/power_enums.py index 0e7ceceb..399c1f46 100644 --- a/src/openpower/decoder/power_enums.py +++ b/src/openpower/decoder/power_enums.py @@ -171,6 +171,7 @@ class Form(Enum): MM = 43 # [f]minmax[s][.] CW = 44 CW2 = 45 + DCT = 46 # fdmadds # Simple-V svp64 fields https://libre-soc.org/openpower/sv/svp64/ @@ -922,7 +923,9 @@ class In1Sel(Enum): FRAp = FRA FRS = 6 FRSp = FRS - CIA = 7 # for addpcis + FRT = 7 + CIA = 8 # for addpcis + RT = 9 class In2Sel(Enum): @@ -961,6 +964,7 @@ class In3Sel(Enum): RC = 5 # for SVP64 bit-reverse LD/ST RT = 6 # for ternlog[i] RTp = RT + FRA = 7 class OutSel(Enum): @@ -976,6 +980,7 @@ class OutSel(Enum): FRSp = FRS RS = 7 RSp = RS + FRA = 8 @unique diff --git a/src/openpower/decoder/power_insn.py b/src/openpower/decoder/power_insn.py index 7330a802..1d22fa84 100644 --- a/src/openpower/decoder/power_insn.py +++ b/src/openpower/decoder/power_insn.py @@ -241,7 +241,7 @@ class PPCRecord: comment2: str = "" function: _Function = _Function.NONE intop: _MicrOp = _MicrOp.OP_ILLEGAL - in1: _In1Sel = _In1Sel.RA + in1: _In1Sel = _In1Sel.NONE in2: _In2Sel = _In2Sel.NONE in3: _In3Sel = _In3Sel.NONE out: _OutSel = _OutSel.NONE @@ -620,8 +620,8 @@ class Operands: # The legacy assembler placed operands in syntax order. "ffmadds": {"FRB": FMAOperandFRB, "FRC": FMAOperandFRC}, "ffmadds.": {"FRB": FMAOperandFRB, "FRC": FMAOperandFRC}, - "fdmadds": {"FRB": FMAOperandFRB, "FRC": FMAOperandFRC}, - "fdmadds.": {"FRB": FMAOperandFRB, "FRC": FMAOperandFRC}, + #"fdmadds": {"FRB": FMAOperandFRB, "FRC": FMAOperandFRC}, + #"fdmadds.": {"FRB": FMAOperandFRB, "FRC": FMAOperandFRC}, } custom_fields = { "SVi": NonZeroOperand, diff --git a/src/openpower/sv/sv_analysis.py b/src/openpower/sv/sv_analysis.py index 8b89212f..69780eb9 100644 --- a/src/openpower/sv/sv_analysis.py +++ b/src/openpower/sv/sv_analysis.py @@ -597,7 +597,12 @@ def extra_classifier(insn_name, value, name, res, regs): elif value == 'RM-1P-3S1D': res['Etype'] = 'EXTRA2' # RM EXTRA2 type - if regs == ['RA', 'RB', 'RC', 'RT', '', '']: # madd* + if regs == ['FRT', 'FRB', 'FRA', 'FRT', '', 'CR1']: # fdmadds + res['0'] = 'd:FRT;d:CR1' # FRT,CR1: Rdest1_EXTRA2 + res['1'] = 's:FRT' # FRT: Rsrc1_EXTRA2 + res['2'] = 's:FRB' # FRB: Rsrc2_EXTRA2 + res['3'] = 's:FRA' # FRA: Rsrc3_EXTRA2 + elif regs == ['RA', 'RB', 'RC', 'RT', '', '']: # madd* res['0'] = 'd:RT' # RT,CR0: Rdest1_EXTRA2 res['1'] = 's:RA' # RA: Rsrc1_EXTRA2 res['2'] = 's:RB' # RT: Rsrc2_EXTRA2 -- 2.30.2