From 345d0a51ab62fe336c4a81eb9a3544796ffea0e0 Mon Sep 17 00:00:00 2001 From: Javier Setoain Date: Tue, 13 Nov 2018 17:32:54 +0000 Subject: [PATCH] arch-arm: Add SVE LD1RQ[BHWD] Add both scalar+scalar and scalar+immediate versions. Change-Id: If5fa1a71ab0dab93f9d35b544ea0899ece858bea Signed-off-by: Giacomo Gabrielli Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/19170 Tested-by: kokoro Reviewed-by: Andreas Sandberg Maintainer: Andreas Sandberg --- src/arch/arm/isa/formats/sve_2nd_level.isa | 50 ++++++++++++++++ src/arch/arm/isa/insts/sve_mem.isa | 69 ++++++++++++++++++++++ src/arch/arm/isa/operands.isa | 2 + 3 files changed, 121 insertions(+) diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa b/src/arch/arm/isa/formats/sve_2nd_level.isa index 3bfae1d9b..c06d7f6a7 100644 --- a/src/arch/arm/isa/formats/sve_2nd_level.isa +++ b/src/arch/arm/isa/formats/sve_2nd_level.isa @@ -3049,12 +3049,62 @@ namespace Aarch64 StaticInstPtr decodeSveLoadBcastQuadSS(ExtMachInst machInst) { + uint8_t num = bits(machInst, 22, 21); + if (num != 0x00) { + return new Unknown64(machInst); + } + + IntRegIndex zt = (IntRegIndex)(uint8_t) bits(machInst, 4, 0); + IntRegIndex rn = makeSP((IntRegIndex)(uint8_t) bits(machInst, 9, 5)); + IntRegIndex pg = (IntRegIndex)(uint8_t) bits(machInst, 12, 10); + IntRegIndex rm = (IntRegIndex)(uint8_t) bits(machInst, 20, 16); + uint8_t msz = bits(machInst, 24, 23); + switch (msz) { + case 0: + return new SveLd1RqSS("ld1rqb", + machInst, zt, pg, rn, rm); + case 1: + return new SveLd1RqSS("ld1rqh", + machInst, zt, pg, rn, rm); + case 2: + return new SveLd1RqSS("ld1rqw", + machInst, zt, pg, rn, rm); + case 3: + return new SveLd1RqSS("ld1rqd", + machInst, zt, pg, rn, rm); + } + return new Unknown64(machInst); } // decodeSveLoadBcastQuadSS StaticInstPtr decodeSveLoadBcastQuadSI(ExtMachInst machInst) { + uint8_t num = bits(machInst, 22, 21); + if (num != 0x00) { + return new Unknown64(machInst); + } + + IntRegIndex zt = (IntRegIndex)(uint8_t) bits(machInst, 4, 0); + IntRegIndex rn = makeSP((IntRegIndex)(uint8_t) bits(machInst, 9, 5)); + IntRegIndex pg = (IntRegIndex)(uint8_t) bits(machInst, 12, 10); + uint64_t imm = sext<4>(bits(machInst, 19, 16)); + uint8_t msz = bits(machInst, 24, 23); + switch (msz) { + case 0: + return new SveLd1RqSI("ld1rqb", + machInst, zt, pg, rn, imm); + case 1: + return new SveLd1RqSI("ld1rqh", + machInst, zt, pg, rn, imm); + case 2: + return new SveLd1RqSI("ld1rqw", + machInst, zt, pg, rn, imm); + case 3: + return new SveLd1RqSI("ld1rqd", + machInst, zt, pg, rn, imm); + } + return new Unknown64(machInst); } // decodeSveLoadBcastQuadSI diff --git a/src/arch/arm/isa/insts/sve_mem.isa b/src/arch/arm/isa/insts/sve_mem.isa index 32a078dbd..d993122b1 100644 --- a/src/arch/arm/isa/insts/sve_mem.isa +++ b/src/arch/arm/isa/insts/sve_mem.isa @@ -1475,6 +1475,70 @@ let {{ else 'SveStoreRegRegMicroop') exec_output += SveStructMemExecDeclare.subst(substDict) + # Generates definitions for SVE load-and-replicate quadword instructions + def emitSveLoadAndReplQuad(offsetIsImm): + global header_output, exec_output, decoders + tplHeader = 'template ' + tplArgs = '' + eaCode = SPAlignmentCheckCode + ''' + int memAccessSize = 16; + EA = XBase + ''' + if offsetIsImm: + eaCode += '(((int64_t) this->imm) * 16);' + else: + eaCode += '(XOffset * sizeof(MemElemType));' + loadRdEnableCode = ''' + eCount = 16/sizeof(RegElemType); + auto rdEn = std::vector(16, true); + for (int i = 0; i < eCount; ++i) { + if (!GpOp_x[i]) { + for (int j = 0; j < sizeof(RegElemType); ++j) { + rdEn[sizeof(RegElemType) * i + j] = false; + } + } + } + ''' + memAccCode = ''' + __uint128_t qword; + RegElemType* qp = reinterpret_cast(&qword); + for (int i = 0; i < 16/sizeof(RegElemType); ++i) { + if (GpOp_x[i]) { + qp[i] = memDataView[i]; + } else { + qp[i] = 0; + } + } + eCount = ArmStaticInst::getCurSveVecLen<__uint128_t>( + xc->tcBase()); + for (int i = 0; i < eCount; ++i) { + AA64FpDest_uq[i] = qword; + } + ''' + iop = InstObjParams('ld1rq', + 'SveLd1RqSI' if offsetIsImm else 'SveLd1RqSS', + 'SveContigMemSI' if offsetIsImm else 'SveContigMemSS', + {'tpl_header': tplHeader, + 'tpl_args': tplArgs, + 'rden_code': loadRdEnableCode, + 'memacc_code': memAccCode, + 'ea_code': sveEnabledCheckCode + eaCode, + 'fault_code': '', + 'fa_code': ''}, + ['IsMemRef', 'IsLoad']) + if offsetIsImm: + header_output += SveContigMemSIOpDeclare.subst(iop) + else: + header_output += SveContigMemSSOpDeclare.subst(iop) + exec_output += ( + SveContigLoadExecute.subst(iop) + + SveContigLoadInitiateAcc.subst(iop) + + SveContigLoadCompleteAcc.subst(iop)) + for ttype in ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t'): + substDict = {'tpl_args': '<%s, %s>' % (ttype, ttype), + 'class_name': 'SveLd1RqSI' if offsetIsImm + else 'SveLd1RqSS'} + exec_output += SveContigMemExecDeclare.subst(substDict) + # LD1[S]{B,H,W,D} (scalar plus immediate) # ST1[S]{B,H,W,D} (scalar plus immediate) # LDNF1[S]{B,H,W,D} (scalar plus immediate) @@ -1487,6 +1551,11 @@ let {{ # LD1R[S]{B,H,W,D} emitSveLoadAndRepl() + # LD1RQ{B,H,W,D} (scalar plus immediate) + emitSveLoadAndReplQuad(offsetIsImm = True) + # LD1RQ{B,H,W,D} (scalar plus scalar) + emitSveLoadAndReplQuad(offsetIsImm = False) + # LD{2,3,4}{B,H,W,D} (scalar plus immediate) # ST{2,3,4}{B,H,W,D} (scalar plus immediate) emitSveStructMemInsts(offsetIsImm = True) diff --git a/src/arch/arm/isa/operands.isa b/src/arch/arm/isa/operands.isa index 5eae9b4b9..b0e5b3b8e 100644 --- a/src/arch/arm/isa/operands.isa +++ b/src/arch/arm/isa/operands.isa @@ -48,6 +48,8 @@ def operand_types {{ 'uw' : 'uint32_t', 'sd' : 'int64_t', 'ud' : 'uint64_t', + 'sq' : '__int128_t', + 'uq' : '__uint128_t', 'tud' : 'std::array', 'sf' : 'float', 'df' : 'double', -- 2.30.2