# [DRAFT] Floating Twin Multiply-Add DCT [Single]
-A-Form
+DCT-Form
-* fdmadds FRT,FRA,FRC,FRB (Rc=0)
-* fdmadds. FRT,FRA,FRC,FRB (Rc=1)
+* fdmadds FRT,FRA,FRB (Rc=0)
+* fdmadds. FRT,FRA,FRB (Rc=1)
Pseudo-code:
- FRS <- FPADD32(FRA, FRB)
- sub <- FPSUB32(FRA, FRB)
- FRT <- FPMUL32(FRC, sub)
+ FRS <- FPADD32(FRT, FRB)
+ sub <- FPSUB32(FRT, FRB)
+ FRT <- FPMUL32(FRA, sub)
Special Registers Altered:
ffmadds,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRA,s:FRB,s:FRC,FRA,FRB,FRC,FRT,0,CR1,0
ffnmsubs,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRA,s:FRB,s:FRC,FRA,FRB,FRC,FRT,0,CR1,0
ffnmadds,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRA,s:FRB,s:FRC,FRA,FRB,FRC,FRT,0,CR1,0
-fdmadds,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRA,s:FRB,s:FRC,FRA,FRB,FRC,FRT,0,CR1,0
+fdmadds,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRT,s:FRB,s:FRA,FRT,FRB,FRA,FRT,0,CR1,0
fmsubs,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRA,s:FRB,s:FRC,FRA,FRB,FRC,FRT,0,CR1,0
fmadds,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRA,s:FRB,s:FRC,FRA,FRB,FRC,FRT,0,CR1,0
fnmsubs,NORMAL,,1P,EXTRA2,NO,d:FRT;d:CR1,s:FRA,s:FRB,s:FRC,FRA,FRB,FRC,FRT,0,CR1,0
| PO | FRT | FRA | FRB | XO | Rc |
| PO | FRT | FRA | RB | XO | Rc |
+# 1.6.7.1 DCT-FORM
+
+ |0 |6 |11 |16 |21 |26 |31 |
+ | PO | FRT | FRA | FRB | // | XO | Rc |
+
# 1.6.8 XL-FORM
|0 |6 |9 |11 |14 |16 |19|20|21 |31 |
| PO | BT | BA | BB | XO | / |
FRA (11:15)
Field used to specify a FPR to be used as a
source.
- Formats: A, MM, X, Z22, Z23
+ Formats: A, MM, X, Z22, Z23, DCT
FRAp (11:15)
Field used to specify an even/odd pair of FPRs to
be concatenated and used as a source.
FRB (16:20)
Field used to specify an FPR to be used as a
source.
- Formats: A, MM, X, XFL, Z23
+ Formats: A, MM, X, XFL, Z23, DCT
FRBp (16:20)
Field used to specify an even/odd pair of FPRs to
be concatenated and used as a source.
FRT (6:10)
Field used to specify an FPR to be used as a tar-
get.
- Formats: A, D, MM, X, Z22, Z23
+ Formats: A, D, MM, X, Z22, Z23, DCT
FRTp (6:10)
Field used to specify an even/odd pair of FPRs to
be concatenated and used as a target.
1 Set Condition Register Field 0 or Field 1 as
described in Section 2.3.1, 'Condition Regis-
ter' on page 30.
- Formats: A, M, MD, MDS, MM, VA2, X, XFL, XO, XS, Z22, Z23, SVL, XB, TLI
+ Formats: A, M, MD, MDS, MM, VA2, X, XFL, XO, XS, Z22, Z23, SVL, XB, TLI, DCT
RIC (12:13)
Field used to specify what types of entries to inval-
idate for tlbie[l].
Formats: XX4
XO (26:30)
Extended opcode field.
- Formats: A, DX, VA2, SVL, CRB
+ Formats: A, DX, VA2, SVL, CRB, DCT
XO (26:31)
Extended opcode field.
Formats: VA, SVM, SVRM, SVI
-----00110,FPU,OP_FP_MADD,FRA,FRB,FRC,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,ffnmsubs,A,,1,3-in 2-out: implicit FRS. unofficial until submitted and approved/renumbered by the opf isa wg
-----00111,FPU,OP_FP_MADD,FRA,FRB,FRC,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,ffnmadds,A,,1,3-in 2-out: implicit FRS. unofficial until submitted and approved/renumbered by the opf isa wg
1111100000,FPU,OP_FPOP,FRA,FRB,NONE,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,ffadds,X,,1,2-in 2-out: implicit FRS. unofficial until submitted and approved/renumbered by the opf isa wg
------11011,FPU,OP_FP_MADD,FRA,FRB,FRC,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,fdmadds,A,,1,3-in 2-out: implicit FRS. unofficial until submitted and approved/renumbered by the opf isa wg
+-----11011,FPU,OP_FP_MADD,FRT,FRB,FRA,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,fdmadds,DCT,,1,3-in 2-out: implicit FRS. unofficial until submitted and approved/renumbered by the opf isa wg
1001001110,FPU,OP_FPOP,FRA,FRB,NONE,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,fatan2s,X,,1,unofficial until submitted and approved/renumbered by the opf isa wg
1000001110,FPU,OP_FPOP,FRA,FRB,NONE,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,fatan2pis,X,,1,unofficial until submitted and approved/renumbered by the opf isa wg
1111101101,FPU,OP_FPOP,FRA,FRB,NONE,FRT,NONE,CR1,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,fpows,X,,1,unofficial until submitted and approved/renumbered by the opf isa wg
elif name == 'FRA':
if in1_sel == In1Sel.FRA.value:
return 1
+ if in3_sel == In3Sel.FRA.value:
+ return 3
elif name == 'FRB':
if in2_sel == In2Sel.FRB.value:
return 2
return 1
if in3_sel == In3Sel.FRS.value:
return 3
+ elif name == 'FRT':
+ if in1_sel == In1Sel.FRT.value:
+ return 1
+ elif name == 'RT':
+ if in1_sel == In1Sel.RT.value:
+ return 1
return None
elif name == 'FRA':
if out_sel == OutSel.FRA.value:
return True
+ elif name == 'FRS':
+ if out_sel == OutSel.FRS.value:
+ return True
elif name == 'FRT':
if out_sel == OutSel.FRT.value:
return True
fadds FRT , FRB, FRA
fsubs FRT+vl, FRA, FRB+vl
"""
- lst = SVP64Asm(["sv.fdmadds *0, *0, *0, *8"
+ lst = SVP64Asm(["sv.fdmadds *0, *8, *0"
])
lst = list(lst)
self.assertEqual(sim.fpr(i+0), t)
self.assertEqual(sim.fpr(i+4), u)
- def test_sv_remap_fpmadds_idct_outer_8(self, stride=2):
+ def tst_sv_remap_fpmadds_idct_outer_8(self, stride=2):
""">>> lst = ["svshape 8, 1, 1, 11, 0",
"svremap 27, 0, 1, 2, 1, 0, 0",
"sv.fadds *0, *0, *0"
print("err", i, err)
self.assertTrue(err < 1e-6)
- def test_sv_remap_fpmadds_dct_outer_8(self, stride=2):
+ def tst_sv_remap_fpmadds_dct_outer_8(self, stride=2):
""">>> lst = ["svshape 8, 1, 1, 3, 0",
"svremap 27, 1, 0, 2, 0, 1, 0",
"sv.fadds *0, *0, *0"
print("err", i, err)
self.assertTrue(err < 1e-6)
- def test_sv_remap_dct_cos_precompute_inner_8(self):
+ def tst_sv_remap_dct_cos_precompute_inner_8(self):
"""pre-computes a DCT COS table, using the shorter costable
indices schedule. turns out, some COS values are repeated
in each layer of the DCT butterfly.
def test_sv_remap_fpmadds_dct_8_mode_4(self, stride=2):
""">>> lst = ["svremap 31, 1, 0, 2, 0, 1, 1",
"svshape 8, 1, 1, 4, 0",
- "sv.fdmadds *0, *0, *0, *8"
+ "sv.fdmadds *0, *16, *0"
"svshape 8, 1, 1, 3, 0",
"sv.fadds *0, *0, *0"
]
"""
lst = SVP64Asm(["svremap 31, 1, 0, 2, 0, 1, 1",
"svshape 8, 1, %d, 4, 0" % stride,
- "sv.fdmadds *0, *0, *0, *16",
+ "sv.fdmadds *0, *16, *0",
"svshape 8, 1, %d, 3, 0" % stride,
"sv.fadds *0, *0, *0"
])
# Inner butterfly, twin +/- MUL-ADD-SUB
"svremap 31, 1, 0, 2, 0, 1, 1",
"svshape 8, 1, 1, 4, 0",
- "sv.fdmadds *0, *0, *0, *8"
+ "sv.fdmadds *0, *32, *0"
# Outer butterfly, iterative sum
"svshape 8, 1, 1, 3, 0",
"sv.fadds *0, *0, *0"
"sv.lfs/els *0, 4(1)",
"svremap 31, 1, 0, 2, 0, 1, 1",
"svshape 8, 1, %d, 4, 0" % stride,
- "sv.fdmadds *0, *0, *0, *32",
+ "sv.fdmadds *0, *32, *0",
"svshape 8, 1, %d, 3, 0" % stride,
"sv.fadds *0, *0, *0"
])
print("err", i, err)
self.assertTrue(err < 1e-5)
- def test_sv_remap_fpmadds_ldbrev_idct_8_mode_4(self):
+ def tst_sv_remap_fpmadds_ldbrev_idct_8_mode_4(self):
""">>> lst = [# LOAD bit-reversed with half-swap
"svshape 8, 1, 1, 14, 0",
"svremap 1, 0, 0, 0, 0, 0, 0",
comb += reg.data.eq(frs)
comb += reg.ok.eq(1)
+ # select Register FRT field,
+ frt = Signal(5, reset_less=True)
+ comb += frt.eq(self.dec.FRT)
+ with m.If(self.sel_in == In1Sel.FRT):
+ comb += reg.data.eq(frt)
+ comb += reg.ok.eq(1)
+
# decode Fast-SPR based on instruction type
with m.Switch(op.internal_op):
with m.Case(In3Sel.FRS):
comb += reg.data.eq(self.dec.FRS)
comb += reg.ok.eq(1)
+ with m.Case(In3Sel.FRA):
+ comb += reg.data.eq(self.dec.FRA)
+ comb += reg.ok.eq(1)
with m.Case(In3Sel.FRC):
comb += reg.data.eq(self.dec.FRC)
comb += reg.ok.eq(1)
with m.Case(OutSel.FRS):
comb += reg.data.eq(self.dec.FRS)
comb += reg.ok.eq(1)
+ with m.Case(OutSel.FRA):
+ comb += reg.data.eq(self.dec.FRA)
+ comb += reg.ok.eq(1)
with m.Case(OutSel.FRT):
comb += reg.data.eq(self.dec.FRT)
comb += reg.ok.eq(1)
MM = 43 # [f]minmax[s][.]
CW = 44
CW2 = 45
+ DCT = 46 # fdmadds
# Simple-V svp64 fields https://libre-soc.org/openpower/sv/svp64/
FRAp = FRA
FRS = 6
FRSp = FRS
- CIA = 7 # for addpcis
+ FRT = 7
+ CIA = 8 # for addpcis
+ RT = 9
class In2Sel(Enum):
RC = 5 # for SVP64 bit-reverse LD/ST
RT = 6 # for ternlog[i]
RTp = RT
+ FRA = 7
class OutSel(Enum):
FRSp = FRS
RS = 7
RSp = RS
+ FRA = 8
@unique
comment2: str = ""
function: _Function = _Function.NONE
intop: _MicrOp = _MicrOp.OP_ILLEGAL
- in1: _In1Sel = _In1Sel.RA
+ in1: _In1Sel = _In1Sel.NONE
in2: _In2Sel = _In2Sel.NONE
in3: _In3Sel = _In3Sel.NONE
out: _OutSel = _OutSel.NONE
# The legacy assembler placed operands in syntax order.
"ffmadds": {"FRB": FMAOperandFRB, "FRC": FMAOperandFRC},
"ffmadds.": {"FRB": FMAOperandFRB, "FRC": FMAOperandFRC},
- "fdmadds": {"FRB": FMAOperandFRB, "FRC": FMAOperandFRC},
- "fdmadds.": {"FRB": FMAOperandFRB, "FRC": FMAOperandFRC},
+ #"fdmadds": {"FRB": FMAOperandFRB, "FRC": FMAOperandFRC},
+ #"fdmadds.": {"FRB": FMAOperandFRB, "FRC": FMAOperandFRC},
}
custom_fields = {
"SVi": NonZeroOperand,
elif value == 'RM-1P-3S1D':
res['Etype'] = 'EXTRA2' # RM EXTRA2 type
- if regs == ['RA', 'RB', 'RC', 'RT', '', '']: # madd*
+ if regs == ['FRT', 'FRB', 'FRA', 'FRT', '', 'CR1']: # fdmadds
+ res['0'] = 'd:FRT;d:CR1' # FRT,CR1: Rdest1_EXTRA2
+ res['1'] = 's:FRT' # FRT: Rsrc1_EXTRA2
+ res['2'] = 's:FRB' # FRB: Rsrc2_EXTRA2
+ res['3'] = 's:FRA' # FRA: Rsrc3_EXTRA2
+ elif regs == ['RA', 'RB', 'RC', 'RT', '', '']: # madd*
res['0'] = 'd:RT' # RT,CR0: Rdest1_EXTRA2
res['1'] = 's:RA' # RA: Rsrc1_EXTRA2
res['2'] = 's:RB' # RT: Rsrc2_EXTRA2