From 2e47c6c5ed37dc1db0ea35f51b2f7d4afc0da45e Mon Sep 17 00:00:00 2001 From: Javier Setoain Date: Wed, 4 Apr 2018 16:53:17 +0100 Subject: [PATCH] arch-arm: Add support for SVE load/store structures Change-Id: I4d9cde18dfc3d478eacc156de6a4a9721eb9e2ff Signed-off-by: Giacomo Gabrielli Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/13524 Tested-by: kokoro Reviewed-by: Andreas Sandberg Maintainer: Andreas Sandberg --- src/arch/arm/insts/sve_macromem.hh | 289 +++++++++++++ src/arch/arm/isa/formats/sve_2nd_level.isa | 50 +++ src/arch/arm/isa/insts/sve_mem.isa | 468 +++++++++++++++++++++ src/arch/arm/isa/operands.isa | 45 ++ src/arch/arm/isa/templates/sve_mem.isa | 446 ++++++++++++++++++++ src/arch/arm/registers.hh | 7 +- 6 files changed, 1304 insertions(+), 1 deletion(-) diff --git a/src/arch/arm/insts/sve_macromem.hh b/src/arch/arm/insts/sve_macromem.hh index b365dcb4b..861318122 100644 --- a/src/arch/arm/insts/sve_macromem.hh +++ b/src/arch/arm/insts/sve_macromem.hh @@ -45,6 +45,295 @@ namespace ArmISA { +template class MicroopLdMemType, + template class MicroopDeIntrlvType> +class SveLdStructSS : public PredMacroOp +{ + protected: + IntRegIndex dest; + IntRegIndex gp; + IntRegIndex base; + IntRegIndex offset; + uint8_t numregs; + + public: + SveLdStructSS(const char* mnem, ExtMachInst machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base, + IntRegIndex _offset, uint8_t _numregs) + : PredMacroOp(mnem, machInst, __opClass), + dest(_dest), gp(_gp), base(_base), offset(_offset), numregs(_numregs) + { + numMicroops = numregs * 2; + + microOps = new StaticInstPtr[numMicroops]; + + for (int i = 0; i < numregs; ++i) { + microOps[i] = new MicroopLdMemType( + mnem, machInst, static_cast(INTRLVREG0 + i), + _gp, _base, _offset, _numregs, i); + } + for (int i = 0; i < numregs; ++i) { + microOps[i + numregs] = new MicroopDeIntrlvType( + mnem, machInst, static_cast((_dest + i) % 32), + _numregs, i, this); + } + + microOps[0]->setFirstMicroop(); + microOps[numMicroops - 1]->setLastMicroop(); + + for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) { + (*uop)->setDelayedCommit(); + } + } + + Fault + execute(ExecContext *, Trace::InstRecord *) const + { + panic("Execute method called when it shouldn't!"); + return NoFault; + } + + std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const + { + std::stringstream ss; + printMnemonic(ss, "", false); + ccprintf(ss, "{"); + for (int i = 0; i < numregs; ++i) { + printVecReg(ss, (dest + i) % 32, true); + if (i < numregs - 1) + ccprintf(ss, ", "); + } + ccprintf(ss, "}, "); + printVecPredReg(ss, gp); + ccprintf(ss, "/z, ["); + printIntReg(ss, base); + ccprintf(ss, ", "); + printIntReg(ss, offset); + ccprintf(ss, "]"); + return ss.str(); + } +}; + +template class MicroopStMemType, + template class MicroopIntrlvType> +class SveStStructSS : public PredMacroOp +{ + protected: + IntRegIndex dest; + IntRegIndex gp; + IntRegIndex base; + IntRegIndex offset; + uint8_t numregs; + + public: + SveStStructSS(const char* mnem, ExtMachInst machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base, + IntRegIndex _offset, uint8_t _numregs) + : PredMacroOp(mnem, machInst, __opClass), + dest(_dest), gp(_gp), base(_base), offset(_offset), numregs(_numregs) + { + numMicroops = numregs * 2; + + microOps = new StaticInstPtr[numMicroops]; + + for (int i = 0; i < numregs; ++i) { + microOps[i] = new MicroopIntrlvType( + mnem, machInst, static_cast(INTRLVREG0 + i), + _dest, _numregs, i, this); + } + + for (int i = 0; i < numregs; ++i) { + microOps[i + numregs] = new MicroopStMemType( + mnem, machInst, static_cast(INTRLVREG0 + i), + _gp, _base, _offset, _numregs, i); + } + + microOps[0]->setFirstMicroop(); + microOps[numMicroops - 1]->setLastMicroop(); + + for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) { + (*uop)->setDelayedCommit(); + } + } + + Fault + execute(ExecContext *, Trace::InstRecord *) const + { + panic("Execute method called when it shouldn't!"); + return NoFault; + } + + std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const + { + std::stringstream ss; + printMnemonic(ss, "", false); + ccprintf(ss, "{"); + for (int i = 0; i < numregs; ++i) { + printVecReg(ss, (dest + i) % 32, true); + if (i < numregs - 1) + ccprintf(ss, ", "); + } + ccprintf(ss, "}, "); + printVecPredReg(ss, gp); + ccprintf(ss, ", ["); + printIntReg(ss, base); + ccprintf(ss, ", "); + printIntReg(ss, offset); + ccprintf(ss, "]"); + return ss.str(); + } +}; + + +template class MicroopLdMemType, + template class MicroopDeIntrlvType> +class SveLdStructSI : public PredMacroOp +{ + protected: + IntRegIndex dest; + IntRegIndex gp; + IntRegIndex base; + int64_t imm; + uint8_t numregs; + + public: + SveLdStructSI(const char* mnem, ExtMachInst machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base, + int64_t _imm, uint8_t _numregs) + : PredMacroOp(mnem, machInst, __opClass), + dest(_dest), gp(_gp), base(_base), imm(_imm), numregs(_numregs) + { + numMicroops = numregs * 2; + + microOps = new StaticInstPtr[numMicroops]; + + for (int i = 0; i < numregs; ++i) { + microOps[i] = new MicroopLdMemType( + mnem, machInst, static_cast(INTRLVREG0 + i), + _gp, _base, _imm, _numregs, i); + } + for (int i = 0; i < numregs; ++i) { + microOps[i + numregs] = new MicroopDeIntrlvType( + mnem, machInst, static_cast((_dest + i) % 32), + _numregs, i, this); + } + + microOps[0]->setFirstMicroop(); + microOps[numMicroops - 1]->setLastMicroop(); + + for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) { + (*uop)->setDelayedCommit(); + } + } + + Fault + execute(ExecContext *, Trace::InstRecord *) const + { + panic("Execute method called when it shouldn't!"); + return NoFault; + } + + std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const + { + std::stringstream ss; + printMnemonic(ss, "", false); + ccprintf(ss, "{"); + for (int i = 0; i < numregs; ++i) { + printVecReg(ss, (dest + i) % 32, true); + if (i < numregs - 1) + ccprintf(ss, ", "); + } + ccprintf(ss, "}, "); + printVecPredReg(ss, gp); + ccprintf(ss, "/z, ["); + printIntReg(ss, base); + if (imm != 0) { + ccprintf(ss, ", #%d, MUL VL", imm); + } + ccprintf(ss, "]"); + return ss.str(); + } +}; + +template class MicroopStMemType, + template class MicroopIntrlvType> +class SveStStructSI : public PredMacroOp +{ + protected: + IntRegIndex dest; + IntRegIndex gp; + IntRegIndex base; + int64_t imm; + uint8_t numregs; + + public: + SveStStructSI(const char* mnem, ExtMachInst machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base, + int64_t _imm, uint8_t _numregs) + : PredMacroOp(mnem, machInst, __opClass), + dest(_dest), gp(_gp), base(_base), imm(_imm), numregs(_numregs) + { + numMicroops = numregs * 2; + + microOps = new StaticInstPtr[numMicroops]; + + for (int i = 0; i < numregs; ++i) { + microOps[i] = new MicroopIntrlvType( + mnem, machInst, static_cast(INTRLVREG0 + i), + _dest, _numregs, i, this); + } + + for (int i = 0; i < numregs; ++i) { + microOps[i + numregs] = new MicroopStMemType( + mnem, machInst, static_cast(INTRLVREG0 + i), + _gp, _base, _imm, _numregs, i); + } + + microOps[0]->setFirstMicroop(); + microOps[numMicroops - 1]->setLastMicroop(); + + for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) { + (*uop)->setDelayedCommit(); + } + } + + Fault + execute(ExecContext *, Trace::InstRecord *) const + { + panic("Execute method called when it shouldn't!"); + return NoFault; + } + + std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const + { + std::stringstream ss; + printMnemonic(ss, "", false); + ccprintf(ss, "{"); + for (int i = 0; i < numregs; ++i) { + printVecReg(ss, (dest + i) % 32, true); + if (i < numregs - 1) + ccprintf(ss, ", "); + } + ccprintf(ss, "}, "); + printVecPredReg(ss, gp); + ccprintf(ss, ", ["); + printIntReg(ss, base); + if (imm != 0) { + ccprintf(ss, ", #%d, MUL VL", imm); + } + ccprintf(ss, "]"); + return ss.str(); + } +}; + template class MicroopType, template class FirstFaultWritebackMicroopType> diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa b/src/arch/arm/isa/formats/sve_2nd_level.isa index 69d80e294..def17812d 100644 --- a/src/arch/arm/isa/formats/sve_2nd_level.isa +++ b/src/arch/arm/isa/formats/sve_2nd_level.isa @@ -3123,6 +3123,18 @@ namespace Aarch64 StaticInstPtr decodeSveLoadStructsSS(ExtMachInst machInst) { + IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10); + uint8_t msz = bits(machInst, 24, 23); + uint8_t num = bits(machInst, 22, 21); + + if (rm != 0x1f && num != 0) { + num++; + return decodeSveStructLoadSSInsts(msz, machInst, + zt, pg, rn, rm, num); + } return new Unknown64(machInst); } // decodeSveLoadStructsSS @@ -3135,6 +3147,19 @@ namespace Aarch64 StaticInstPtr decodeSveLoadStructsSI(ExtMachInst machInst) { + IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + int64_t imm = sext<4>(bits(machInst, 19, 16)); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10); + uint8_t msz = bits(machInst, 24, 23); + uint8_t num = bits(machInst, 22, 21); + + if (num != 0) { + num++; + imm *= num; + return decodeSveStructLoadSIInsts(msz, machInst, + zt, pg, rn, imm, num); + } return new Unknown64(machInst); } // decodeSveLoadStructsSI @@ -3331,12 +3356,37 @@ namespace Aarch64 StaticInstPtr decodeSveStoreStructsSS(ExtMachInst machInst) { + IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10); + uint8_t msz = bits(machInst, 24, 23); + uint8_t num = bits(machInst, 22, 21); + + if (rm != 0x1f && num != 0) { + num++; + return decodeSveStructStoreSSInsts(msz, machInst, + zt, pg, rn, rm, num); + } return new Unknown64(machInst); } // decodeSveStoreStructsSS StaticInstPtr decodeSveStoreStructsSI(ExtMachInst machInst) { + IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + int64_t imm = sext<4>(bits(machInst, 19, 16)); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10); + uint8_t msz = bits(machInst, 24, 23); + uint8_t num = bits(machInst, 22, 21); + + if (num != 0) { + num++; + imm *= num; + return decodeSveStructStoreSIInsts(msz, machInst, + zt, pg, rn, imm, num); + } return new Unknown64(machInst); } // decodeSveStoreStructsSI diff --git a/src/arch/arm/isa/insts/sve_mem.isa b/src/arch/arm/isa/insts/sve_mem.isa index e776deb59..32a078dbd 100644 --- a/src/arch/arm/isa/insts/sve_mem.isa +++ b/src/arch/arm/isa/insts/sve_mem.isa @@ -204,6 +204,238 @@ output header {{ output decoder {{ + template + StaticInstPtr + decodeSveStructLoadSIInstsByNReg(uint8_t esize, ExtMachInst machInst, + IntRegIndex zt, IntRegIndex pg, IntRegIndex xn, + int64_t imm, int numregs) + { + static const char* nm[5][4] = { + { nullptr, nullptr, nullptr, nullptr}, + { nullptr, nullptr, nullptr, nullptr}, + { "ld2b", "ld2h", "ld2w", "ld2d" }, + { "ld3b", "ld3h", "ld3w", "ld3d" }, + { "ld4b", "ld4h", "ld4w", "ld4d" } }; + + switch (numregs) { + case 2: + return new SveLdStructSI( + nm[numregs][esize], machInst, MemReadOp, + zt, pg, xn, imm, numregs); + case 3: + return new SveLdStructSI( + nm[numregs][esize], machInst, MemReadOp, + zt, pg, xn, imm, numregs); + case 4: + return new SveLdStructSI( + nm[numregs][esize], machInst, MemReadOp, + zt, pg, xn, imm, numregs); + } + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSveStructLoadSIInsts(uint8_t esize, ExtMachInst machInst, + IntRegIndex zt, IntRegIndex pg, IntRegIndex xn, + int64_t imm, int numregs) + { + switch (esize) { + case 0: + return decodeSveStructLoadSIInstsByNReg(esize, + machInst, zt, pg, xn, imm, numregs); + case 1: + return decodeSveStructLoadSIInstsByNReg(esize, + machInst, zt, pg, xn, imm, numregs); + case 2: + return decodeSveStructLoadSIInstsByNReg(esize, + machInst, zt, pg, xn, imm, numregs); + case 3: + return decodeSveStructLoadSIInstsByNReg(esize, + machInst, zt, pg, xn, imm, numregs); + } + return new Unknown64(machInst); + } + + template + StaticInstPtr + decodeSveStructStoreSIInstsByNReg(uint8_t esize, ExtMachInst machInst, + IntRegIndex zt, IntRegIndex pg, IntRegIndex xn, + int64_t imm, int numregs) + { + static const char* nm[5][4] = { + { nullptr, nullptr, nullptr, nullptr}, + { nullptr, nullptr, nullptr, nullptr}, + { "st2b", "st2h", "st2w", "st2d" }, + { "st3b", "st3h", "st3w", "st3d" }, + { "st4b", "st4h", "st4w", "st4d" } }; + + switch (numregs) { + case 2: + return new SveStStructSI( + nm[numregs][esize], machInst, MemWriteOp, + zt, pg, xn, imm, numregs); + case 3: + return new SveStStructSI( + nm[numregs][esize], machInst, MemWriteOp, + zt, pg, xn, imm, numregs); + case 4: + return new SveStStructSI( + nm[numregs][esize], machInst, MemWriteOp, + zt, pg, xn, imm, numregs); + } + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSveStructStoreSIInsts(uint8_t esize, ExtMachInst machInst, + IntRegIndex zt, IntRegIndex pg, IntRegIndex xn, + int64_t imm, int numregs) + { + switch (esize) { + case 0: + return decodeSveStructStoreSIInstsByNReg(esize, + machInst, zt, pg, xn, imm, numregs); + case 1: + return decodeSveStructStoreSIInstsByNReg(esize, + machInst, zt, pg, xn, imm, numregs); + case 2: + return decodeSveStructStoreSIInstsByNReg(esize, + machInst, zt, pg, xn, imm, numregs); + case 3: + return decodeSveStructStoreSIInstsByNReg(esize, + machInst, zt, pg, xn, imm, numregs); + } + return new Unknown64(machInst); + } + + template + StaticInstPtr + decodeSveStructLoadSSInstsByNReg(uint8_t esize, ExtMachInst machInst, + IntRegIndex zt, IntRegIndex pg, IntRegIndex xn, + IntRegIndex xm, int numregs) + { + static const char* nm[5][4] = { + { nullptr, nullptr, nullptr, nullptr}, + { nullptr, nullptr, nullptr, nullptr}, + { "ld2b", "ld2h", "ld2w", "ld2d" }, + { "ld3b", "ld3h", "ld3w", "ld3d" }, + { "ld4b", "ld4h", "ld4w", "ld4d" } }; + + switch (numregs) { + case 2: + return new SveLdStructSS( + nm[numregs][esize], machInst, MemReadOp, + zt, pg, xn, xm, numregs); + case 3: + return new SveLdStructSS( + nm[numregs][esize], machInst, MemReadOp, + zt, pg, xn, xm, numregs); + case 4: + return new SveLdStructSS( + nm[numregs][esize], machInst, MemReadOp, + zt, pg, xn, xm, numregs); + } + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSveStructLoadSSInsts(uint8_t esize, ExtMachInst machInst, + IntRegIndex zt, IntRegIndex pg, IntRegIndex xn, + IntRegIndex xm, int numregs) + { + switch (esize) { + case 0: + return decodeSveStructLoadSSInstsByNReg(esize, + machInst, zt, pg, xn, xm, numregs); + case 1: + return decodeSveStructLoadSSInstsByNReg(esize, + machInst, zt, pg, xn, xm, numregs); + case 2: + return decodeSveStructLoadSSInstsByNReg(esize, + machInst, zt, pg, xn, xm, numregs); + case 3: + return decodeSveStructLoadSSInstsByNReg(esize, + machInst, zt, pg, xn, xm, numregs); + } + return new Unknown64(machInst); + } + + template + StaticInstPtr + decodeSveStructStoreSSInstsByNReg(uint8_t esize, ExtMachInst machInst, + IntRegIndex zt, IntRegIndex pg, IntRegIndex xn, + IntRegIndex xm, int numregs) + { + static const char* nm[5][4] = { + { nullptr, nullptr, nullptr, nullptr}, + { nullptr, nullptr, nullptr, nullptr}, + { "st2b", "st2h", "st2w", "st2d" }, + { "st3b", "st3h", "st3w", "st3d" }, + { "st4b", "st4h", "st4w", "st4d" } }; + + switch (numregs) { + case 2: + return new SveStStructSS( + nm[numregs][esize], machInst, MemWriteOp, + zt, pg, xn, xm, numregs); + case 3: + return new SveStStructSS( + nm[numregs][esize], machInst, MemWriteOp, + zt, pg, xn, xm, numregs); + case 4: + return new SveStStructSS( + nm[numregs][esize], machInst, MemWriteOp, + zt, pg, xn, xm, numregs); + } + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSveStructStoreSSInsts(uint8_t esize, ExtMachInst machInst, + IntRegIndex zt, IntRegIndex pg, IntRegIndex xn, + IntRegIndex xm, int numregs) + { + switch (esize) { + case 0: + return decodeSveStructStoreSSInstsByNReg(esize, + machInst, zt, pg, xn, xm, numregs); + case 1: + return decodeSveStructStoreSSInstsByNReg(esize, + machInst, zt, pg, xn, xm, numregs); + case 2: + return decodeSveStructStoreSSInstsByNReg(esize, + machInst, zt, pg, xn, xm, numregs); + case 3: + return decodeSveStructStoreSSInstsByNReg(esize, + machInst, zt, pg, xn, xm, numregs); + } + return new Unknown64(machInst); + } + StaticInstPtr decodeSveGatherLoadVIInsts(uint8_t dtype, ExtMachInst machInst, IntRegIndex zt, IntRegIndex pg, IntRegIndex zn, @@ -1018,6 +1250,231 @@ let {{ header_output += SveGatherLoadCpySrcVecMicroopDeclare.subst(iop) exec_output += SveGatherLoadCpySrcVecMicroopExecute.subst(iop) + def emitSveInterleaveMicroop(): + global header_output, exec_output, decoders + code2 = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen( + xc->tcBase()); + for (unsigned int i = 0; i < eCount; ++i) { + unsigned int absIdx = regIndex * eCount + i; + unsigned int srcIdx = absIdx / numRegs; + unsigned int srcVec = absIdx % numRegs; + if (srcVec == 0) + AA64FpDest_x[i] = AA64FpOp1V0S_x[srcIdx]; + else if (srcVec == 1) + AA64FpDest_x[i] = AA64FpOp1V1S_x[srcIdx]; + }''' + + code3 = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen( + xc->tcBase()); + for (unsigned int i = 0; i < eCount; ++i) { + unsigned int absIdx = regIndex * eCount + i; + unsigned int srcIdx = absIdx / numRegs; + unsigned int srcVec = absIdx % numRegs; + if (srcVec == 0) + AA64FpDest_x[i] = AA64FpOp1V0S_x[srcIdx]; + else if (srcVec == 1) + AA64FpDest_x[i] = AA64FpOp1V1S_x[srcIdx]; + else if (srcVec == 2) + AA64FpDest_x[i] = AA64FpOp1V2S_x[srcIdx]; + }''' + + code4 = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen( + xc->tcBase()); + for (unsigned int i = 0; i < eCount; ++i) { + unsigned int absIdx = regIndex * eCount + i; + unsigned int srcIdx = absIdx / numRegs; + unsigned int srcVec = absIdx % numRegs; + if (srcVec == 0) + AA64FpDest_x[i] = AA64FpOp1V0S_x[srcIdx]; + else if (srcVec == 1) + AA64FpDest_x[i] = AA64FpOp1V1S_x[srcIdx]; + else if (srcVec == 2) + AA64FpDest_x[i] = AA64FpOp1V2S_x[srcIdx]; + else if (srcVec == 3) + AA64FpDest_x[i] = AA64FpOp1V3S_x[srcIdx]; + }''' + + iop2 = InstObjParams('intrlv', + 'SveIntrlv2Microop', + 'MicroOp', + {'code': code2}, + ['IsMicroop']) + iop3 = InstObjParams('intrlv', + 'SveIntrlv3Microop', + 'MicroOp', + {'code': code3}, + ['IsMicroop']) + iop4 = InstObjParams('intrlv', + 'SveIntrlv4Microop', + 'MicroOp', + {'code': code4}, + ['IsMicroop']) + header_output += SveIntrlvMicroopDeclare.subst(iop2); + header_output += SveIntrlvMicroopDeclare.subst(iop3); + header_output += SveIntrlvMicroopDeclare.subst(iop4); + exec_output += SveIntrlvMicroopExecute.subst(iop2); + exec_output += SveIntrlvMicroopExecute.subst(iop3); + exec_output += SveIntrlvMicroopExecute.subst(iop4); + for type in ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t'): + for nreg in range(2,5): + substDict = {'targs' : type, + 'class_name' : 'SveIntrlv' + str(nreg) + 'Microop'} + exec_output += SveIntrlvMicroopExecDeclare.subst(substDict) + + def emitSveDeInterleaveMicroop(): + global header_output, exec_output, decoders + code2 = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen( + xc->tcBase()); + for (unsigned int i = 0; i < eCount; ++i) { + unsigned int absIdx = (regIndex + numRegs * i); + unsigned int srcIdx = absIdx % eCount; + unsigned int srcVec = absIdx / eCount; + if (srcVec == 0) + AA64FpDest_x[i] = AA64IntrlvReg0_x[srcIdx]; + else if(srcVec == 1) + AA64FpDest_x[i] = AA64IntrlvReg1_x[srcIdx]; + }''' + + code3 = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen( + xc->tcBase()); + for (unsigned int i = 0; i < eCount; ++i) { + unsigned int absIdx = (regIndex + numRegs * i); + unsigned int srcIdx = absIdx % eCount; + unsigned int srcVec = absIdx / eCount; + if (srcVec == 0) + AA64FpDest_x[i] = AA64IntrlvReg0_x[srcIdx]; + else if(srcVec == 1) + AA64FpDest_x[i] = AA64IntrlvReg1_x[srcIdx]; + else if(srcVec == 2) + AA64FpDest_x[i] = AA64IntrlvReg2_x[srcIdx]; + }''' + + code4 = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen( + xc->tcBase()); + for (unsigned int i = 0; i < eCount; ++i) { + unsigned int absIdx = (regIndex + numRegs * i); + unsigned int srcIdx = absIdx % eCount; + unsigned int srcVec = absIdx / eCount; + if (srcVec == 0) + AA64FpDest_x[i] = AA64IntrlvReg0_x[srcIdx]; + else if(srcVec == 1) + AA64FpDest_x[i] = AA64IntrlvReg1_x[srcIdx]; + else if(srcVec == 2) + AA64FpDest_x[i] = AA64IntrlvReg2_x[srcIdx]; + else if(srcVec == 3) + AA64FpDest_x[i] = AA64IntrlvReg3_x[srcIdx]; + }''' + + iop2 = InstObjParams('deintrlv', + 'SveDeIntrlv2Microop', + 'MicroOp', + {'code': code2}, + ['IsMicroop']) + iop3 = InstObjParams('deintrlv', + 'SveDeIntrlv3Microop', + 'MicroOp', + {'code': code3}, + ['IsMicroop']) + iop4 = InstObjParams('deintrlv', + 'SveDeIntrlv4Microop', + 'MicroOp', + {'code': code4}, + ['IsMicroop']) + header_output += SveDeIntrlvMicroopDeclare.subst(iop2); + header_output += SveDeIntrlvMicroopDeclare.subst(iop3); + header_output += SveDeIntrlvMicroopDeclare.subst(iop4); + exec_output += SveIntrlvMicroopExecute.subst(iop2); + exec_output += SveIntrlvMicroopExecute.subst(iop3); + exec_output += SveIntrlvMicroopExecute.subst(iop4); + for type in ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t'): + for nreg in range(2,5): + substDict = {'targs' : type, + 'class_name' : 'SveDeIntrlv' + str(nreg) + 'Microop'} + exec_output += SveIntrlvMicroopExecDeclare.subst(substDict) + + # Generates definitions for SVE struct load/store microops + def emitSveStructMemInsts(offsetIsImm): + global header_output, exec_output, decoders + eaCode = SPAlignmentCheckCode + ''' + int memAccessSize = eCount * sizeof(Element); + EA = memAccessSize * regIndex + XBase + ''' + if offsetIsImm: + eaCode += '((int64_t) this->imm * eCount * sizeof(Element))' + else: + eaCode += '(XOffset * sizeof(Element));' + loadMemAccCode = ''' + for (int i = 0; i < eCount; i++) { + int gpIdx = (regIndex * eCount + i) / numRegs; + if (GpOp_x[gpIdx]) { + AA64FpDest_x[i] = memDataView[i]; + } else { + AA64FpDest_x[i] = 0; + } + } + ''' + storeMemAccCode = ''' + for (int i = 0; i < eCount; i++) { + int gpIdx = (regIndex * eCount + i) / numRegs; + if (GpOp_x[gpIdx]) { + memDataView[i] = AA64FpDest_x[i]; + } else { + memDataView[i] = 0; + for (int j = 0; j < sizeof(Element); j++) { + wrEn[sizeof(Element) * i + j] = false; + } + } + } + ''' + storeWrEnableCode = ''' + auto wrEn = std::vector(sizeof(Element) * eCount, true); + ''' + loadIop = InstObjParams('ldxx', + 'SveLoadRegImmMicroop' if offsetIsImm else 'SveLoadRegRegMicroop', + 'MicroOp', + {'targs': 'Element', + 'memacc_code': loadMemAccCode, + 'ea_code' : sveEnabledCheckCode + eaCode, + 'fa_code' : ''}, + ['IsMemRef', 'IsLoad', 'IsMicroop']) + storeIop = InstObjParams('stxx', + 'SveStoreRegImmMicroop' if offsetIsImm + else 'SveStoreRegRegMicroop', + 'MicroOp', + {'targs': 'Element', + 'wren_code': storeWrEnableCode, + 'memacc_code': storeMemAccCode, + 'ea_code' : sveEnabledCheckCode + eaCode, + 'fa_code' : ''}, + ['IsMemRef', 'IsStore', 'IsMicroop']) + if offsetIsImm: + header_output += SveStructMemSIMicroopDeclare.subst(loadIop) + header_output += SveStructMemSIMicroopDeclare.subst(storeIop) + else: + header_output += SveStructMemSSMicroopDeclare.subst(loadIop) + header_output += SveStructMemSSMicroopDeclare.subst(storeIop) + exec_output += ( + SveStructLoadExecute.subst(loadIop) + + SveStructLoadInitiateAcc.subst(loadIop) + + SveStructLoadCompleteAcc.subst(loadIop) + + SveStructStoreExecute.subst(storeIop) + + SveStructStoreInitiateAcc.subst(storeIop) + + SveStructStoreCompleteAcc.subst(storeIop)) + tplArgs = ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t') + for type in tplArgs: + substDict = {'targs': type, + 'class_name': 'SveLoadRegImmMicroop' if offsetIsImm + else 'SveLoadRegRegMicroop'} + exec_output += SveStructMemExecDeclare.subst(substDict) + substDict['class_name'] = ('SveStoreRegImmMicroop' if offsetIsImm + else 'SveStoreRegRegMicroop') + exec_output += SveStructMemExecDeclare.subst(substDict) + # LD1[S]{B,H,W,D} (scalar plus immediate) # ST1[S]{B,H,W,D} (scalar plus immediate) # LDNF1[S]{B,H,W,D} (scalar plus immediate) @@ -1030,6 +1487,13 @@ let {{ # LD1R[S]{B,H,W,D} emitSveLoadAndRepl() + # LD{2,3,4}{B,H,W,D} (scalar plus immediate) + # ST{2,3,4}{B,H,W,D} (scalar plus immediate) + emitSveStructMemInsts(offsetIsImm = True) + # LD{2,3,4}{B,H,W,D} (scalar plus scalar) + # ST{2,3,4}{B,H,W,D} (scalar plus scalar) + emitSveStructMemInsts(offsetIsImm = False) + # LDR (predicate), STR (predicate) emitSveMemFillSpill(True) # LDR (vector), STR (vector) @@ -1049,4 +1513,8 @@ let {{ # Source vector copy microop for gather loads emitSveGatherLoadCpySrcVecMicroop() + + # ST/LD struct de/interleave microops + emitSveInterleaveMicroop() + emitSveDeInterleaveMicroop() }}; diff --git a/src/arch/arm/isa/operands.isa b/src/arch/arm/isa/operands.isa index aaa64e7b0..5eae9b4b9 100644 --- a/src/arch/arm/isa/operands.isa +++ b/src/arch/arm/isa/operands.isa @@ -530,6 +530,51 @@ def operands {{ 'AA64FpDestQV1L': vectorRegElem('0', 'tud', zeroing = True) }), + # Temporary registers for SVE interleaving + 'AA64IntrlvReg0': vectorReg('INTRLVREG0', + { + 'AA64IntrlvReg0P0': vectorRegElem('0'), + 'AA64IntrlvReg0P1': vectorRegElem('1'), + 'AA64IntrlvReg0P2': vectorRegElem('2'), + 'AA64IntrlvReg0P3': vectorRegElem('3'), + 'AA64IntrlvReg0S': vectorRegElem('0', 'sf', zeroing = True), + 'AA64IntrlvReg0D': vectorRegElem('0', 'df', zeroing = True), + 'AA64IntrlvReg0Q': vectorRegElem('0', 'tud', zeroing = True) + }), + + 'AA64IntrlvReg1': vectorReg('INTRLVREG1', + { + 'AA64IntrlvReg1P0': vectorRegElem('0'), + 'AA64IntrlvReg1P1': vectorRegElem('1'), + 'AA64IntrlvReg1P2': vectorRegElem('2'), + 'AA64IntrlvReg1P3': vectorRegElem('3'), + 'AA64IntrlvReg1S': vectorRegElem('0', 'sf', zeroing = True), + 'AA64IntrlvReg1D': vectorRegElem('0', 'df', zeroing = True), + 'AA64IntrlvReg1Q': vectorRegElem('0', 'tud', zeroing = True) + }), + + 'AA64IntrlvReg2': vectorReg('INTRLVREG2', + { + 'AA64IntrlvReg2P0': vectorRegElem('0'), + 'AA64IntrlvReg2P1': vectorRegElem('1'), + 'AA64IntrlvReg2P2': vectorRegElem('2'), + 'AA64IntrlvReg2P3': vectorRegElem('3'), + 'AA64IntrlvReg2S': vectorRegElem('0', 'sf', zeroing = True), + 'AA64IntrlvReg2D': vectorRegElem('0', 'df', zeroing = True), + 'AA64IntrlvReg2Q': vectorRegElem('0', 'tud', zeroing = True) + }), + + 'AA64IntrlvReg3': vectorReg('INTRLVREG3', + { + 'AA64IntrlvReg3P0': vectorRegElem('0'), + 'AA64IntrlvReg3P1': vectorRegElem('1'), + 'AA64IntrlvReg3P2': vectorRegElem('2'), + 'AA64IntrlvReg3P3': vectorRegElem('3'), + 'AA64IntrlvReg3S': vectorRegElem('0', 'sf', zeroing = True), + 'AA64IntrlvReg3D': vectorRegElem('0', 'df', zeroing = True), + 'AA64IntrlvReg3Q': vectorRegElem('0', 'tud', zeroing = True) + }), + 'AA64FpDestMerge': vectorReg('dest', { 'AA64FpDestMergeP0': vectorRegElem('0'), diff --git a/src/arch/arm/isa/templates/sve_mem.isa b/src/arch/arm/isa/templates/sve_mem.isa index 5e2e55333..dced5f4a5 100644 --- a/src/arch/arm/isa/templates/sve_mem.isa +++ b/src/arch/arm/isa/templates/sve_mem.isa @@ -815,3 +815,449 @@ def template SveGatherLoadCpySrcVecMicroopExecute {{ return fault; } }}; + +def template SveStructMemSIMicroopDeclare {{ + template + class %(class_name)s : public %(base_class)s + { + protected: + typedef _Element Element; + typedef _Element TPElem; + + IntRegIndex dest; + IntRegIndex gp; + IntRegIndex base; + int64_t imm; + + uint8_t numRegs; + int regIndex; + + unsigned memAccessFlags; + + bool baseIsSP; + + public: + %(class_name)s(const char* mnem, ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base, + int64_t _imm, uint8_t _numRegs, int _regIndex) + : %(base_class)s(mnem, machInst, %(op_class)s), + dest(_dest), gp(_gp), base(_base), imm(_imm), + numRegs(_numRegs), regIndex(_regIndex), + memAccessFlags(ArmISA::TLB::AllowUnaligned | + ArmISA::TLB::MustBeOne) + { + %(constructor)s; + baseIsSP = isSP(_base); + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; + Fault initiateAcc(ExecContext *, Trace::InstRecord *) const; + Fault completeAcc(PacketPtr, ExecContext *, Trace::InstRecord *) const; + + virtual void + annotateFault(ArmFault *fault) + { + %(fa_code)s + } + + std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const + { + std::stringstream ss; + printMnemonic(ss, "", false); + ccprintf(ss, "{"); + switch (dest) { + case INTRLVREG0: + ccprintf(ss, "INTRLV0"); + break; + case INTRLVREG1: + ccprintf(ss, "INTRLV1"); + break; + case INTRLVREG2: + ccprintf(ss, "INTRLV2"); + break; + case INTRLVREG3: + ccprintf(ss, "INTRLV3"); + break; + default: + printVecReg(ss, dest, true); + break; + } + ccprintf(ss, "}, "); + printVecPredReg(ss, gp); + if (_opClass == MemReadOp) { + ccprintf(ss, "/z"); + } + ccprintf(ss, ", ["); + printVecReg(ss, base, true); + if (imm != 0) { + ccprintf(ss, ", #%d", imm * sizeof(Element)); + } + ccprintf(ss, "] (uop reg %d tfer)", regIndex); + return ss.str(); + } + }; +}}; + +def template SveStructMemExecDeclare {{ + template + Fault %(class_name)s<%(targs)s>::execute(ExecContext *, + Trace::InstRecord *) const; + + template + Fault %(class_name)s<%(targs)s>::initiateAcc(ExecContext *, + Trace::InstRecord *) const; + + template + Fault %(class_name)s<%(targs)s>::completeAcc(PacketPtr, + ExecContext *, Trace::InstRecord *) const; +}}; + +def template SveStructLoadExecute {{ + template + Fault %(class_name)s::execute(ExecContext *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + bool aarch64 M5_VAR_USED = true; + unsigned eCount = ArmStaticInst::getCurSveVecLen( + xc->tcBase()); + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + TheISA::VecRegContainer memData; + auto memDataView = memData.as(); + + if (fault == NoFault) { + fault = xc->readMem(EA, memData.raw_ptr(), memAccessSize, + this->memAccessFlags); + %(memacc_code)s; + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template SveStructLoadInitiateAcc {{ + template + Fault %(class_name)s::initiateAcc(ExecContext *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + bool aarch64 M5_VAR_USED = true; + unsigned eCount = ArmStaticInst::getCurSveVecLen( + xc->tcBase()); + + %(op_src_decl)s; + %(op_rd)s; + + %(ea_code)s; + + if (fault == NoFault) { + fault = xc->initiateMemRead(EA, memAccessSize, + this->memAccessFlags); + } + + return fault; + } +}}; + +def template SveStructLoadCompleteAcc {{ + template + Fault %(class_name)s::completeAcc(PacketPtr pkt, + ExecContext *xc, Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + bool aarch64 M5_VAR_USED = true; + unsigned eCount = ArmStaticInst::getCurSveVecLen( + xc->tcBase()); + + %(op_decl)s; + %(op_rd)s; + + TheISA::VecRegContainer memData; + auto memDataView = memData.as(); + + memcpy(memData.raw_ptr(), pkt->getPtr(), + pkt->getSize()); + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template SveStructStoreExecute {{ + template + Fault %(class_name)s::execute(ExecContext *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + bool aarch64 M5_VAR_USED = true; + unsigned eCount = ArmStaticInst::getCurSveVecLen( + xc->tcBase()); + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + TheISA::VecRegContainer memData; + auto memDataView = memData.as(); + + %(wren_code)s; + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + fault = xc->writeMem(memData.raw_ptr(), memAccessSize, EA, + this->memAccessFlags, NULL, wrEn); + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template SveStructStoreInitiateAcc {{ + template + Fault %(class_name)s::initiateAcc(ExecContext *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + bool aarch64 M5_VAR_USED = true; + unsigned eCount = ArmStaticInst::getCurSveVecLen( + xc->tcBase()); + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + TheISA::VecRegContainer memData; + auto memDataView = memData.as(); + + %(wren_code)s; + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + fault = xc->writeMem(memData.raw_ptr(), memAccessSize, EA, + this->memAccessFlags, NULL, wrEn); + } + + return fault; + } +}}; + +def template SveStructStoreCompleteAcc {{ + template + Fault %(class_name)s::completeAcc(PacketPtr pkt, + ExecContext *xc, Trace::InstRecord *traceData) const + { + return NoFault; + } +}}; + +def template SveStructMemSSMicroopDeclare {{ + template + class %(class_name)s : public %(base_class)s + { + protected: + typedef _Element Element; + typedef _Element TPElem; + + IntRegIndex dest; + IntRegIndex gp; + IntRegIndex base; + IntRegIndex offset; + + uint8_t numRegs; + int regIndex; + + unsigned memAccessFlags; + + bool baseIsSP; + + public: + %(class_name)s(const char* mnem, ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base, + IntRegIndex _offset, uint8_t _numRegs, int _regIndex) + : %(base_class)s(mnem, machInst, %(op_class)s), + dest(_dest), gp(_gp), base(_base), offset(_offset), + numRegs(_numRegs), regIndex(_regIndex), + memAccessFlags(ArmISA::TLB::AllowUnaligned | + ArmISA::TLB::MustBeOne) + { + %(constructor)s; + baseIsSP = isSP(_base); + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; + Fault initiateAcc(ExecContext *, Trace::InstRecord *) const; + Fault completeAcc(PacketPtr, ExecContext *, Trace::InstRecord *) const; + + virtual void + annotateFault(ArmFault *fault) + { + %(fa_code)s + } + + std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const + { + std::stringstream ss; + printMnemonic(ss, "", false); + ccprintf(ss, "{"); + switch (dest) { + case INTRLVREG0: + ccprintf(ss, "INTRLV0"); + break; + case INTRLVREG1: + ccprintf(ss, "INTRLV1"); + break; + case INTRLVREG2: + ccprintf(ss, "INTRLV2"); + break; + case INTRLVREG3: + ccprintf(ss, "INTRLV3"); + break; + default: + printVecReg(ss, dest, true); + break; + } + ccprintf(ss, "}, "); + printVecPredReg(ss, gp); + if (_opClass == MemReadOp) { + ccprintf(ss, "/z"); + } + ccprintf(ss, ", ["); + printIntReg(ss, base); + ccprintf(ss, ", "); + printVecReg(ss, offset, true); + ccprintf(ss, "] (uop reg %d tfer)", regIndex); + return ss.str(); + } + }; +}}; + +def template SveIntrlvMicroopDeclare {{ + template + class %(class_name)s: public %(base_class)s + { + protected: + typedef _Element Element; + typedef _Element TPElem; + IntRegIndex dest; + IntRegIndex op1; + uint8_t numRegs; + int regIndex; + + StaticInst *macroOp; + + public: + %(class_name)s(const char* mnem, ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, + uint8_t _numRegs, int _regIndex, StaticInst *_macroOp) + : MicroOp(mnem, machInst, SimdAluOp), + dest(_dest), op1(_op1), numRegs(_numRegs), regIndex(_regIndex), + macroOp(_macroOp) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; + + std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const + { + std::stringstream ss; + ccprintf(ss, "%s", macroOp->disassemble(pc, symtab)); + ccprintf(ss, " (uop interleave)"); + return ss.str(); + } + }; +}}; + +def template SveDeIntrlvMicroopDeclare {{ + template + class %(class_name)s : public %(base_class)s + { + protected: + typedef _Element Element; + typedef _Element TPElem; + IntRegIndex dest; + uint8_t numRegs; + int regIndex; + + StaticInst *macroOp; + + public: + %(class_name)s(const char* mnem, ExtMachInst machInst, + IntRegIndex _dest, uint8_t _numRegs, int _regIndex, + StaticInst *_macroOp) + : MicroOp(mnem, machInst, SimdAluOp), + dest(_dest), numRegs(_numRegs), regIndex(_regIndex), + macroOp(_macroOp) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; + + std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const + { + std::stringstream ss; + ccprintf(ss, "%s", macroOp->disassemble(pc, symtab)); + ccprintf(ss, " (uop deinterleave)"); + return ss.str(); + } + }; +}}; + +def template SveIntrlvMicroopExecDeclare {{ + template + Fault %(class_name)s<%(targs)s>::execute( + ExecContext *, Trace::InstRecord *) const; +}}; + +def template SveIntrlvMicroopExecute {{ + template + Fault %(class_name)s::execute(ExecContext *xc, + Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + %(op_decl)s; + %(op_rd)s; + + %(code)s; + if (fault == NoFault) + { + %(op_wb)s; + } + + return fault; + } +}}; diff --git a/src/arch/arm/registers.hh b/src/arch/arm/registers.hh index 3790d9d5c..4a8e960d4 100644 --- a/src/arch/arm/registers.hh +++ b/src/arch/arm/registers.hh @@ -85,15 +85,20 @@ const int NumVecV7ArchRegs = 64; const int NumVecV8ArchRegs = 32; const int NumVecSpecialRegs = 8; +const int NumVecIntrlvRegs = 4; const int NumIntRegs = NUM_INTREGS; const int NumFloatRegs = NumFloatV8ArchRegs + NumFloatSpecialRegs; -const int NumVecRegs = NumVecV8ArchRegs + NumVecSpecialRegs; +const int NumVecRegs = NumVecV8ArchRegs + NumVecSpecialRegs + NumVecIntrlvRegs; const int VECREG_UREG0 = 32; const int NumVecPredRegs = 18; // P0-P15, FFR, UREG0 const int PREDREG_FFR = 16; const int PREDREG_UREG0 = 17; const int NumCCRegs = NUM_CCREGS; const int NumMiscRegs = NUM_MISCREGS; +const int INTRLVREG0 = NumVecV8ArchRegs + NumVecSpecialRegs; +const int INTRLVREG1 = INTRLVREG0 + 1; +const int INTRLVREG2 = INTRLVREG0 + 2; +const int INTRLVREG3 = INTRLVREG0 + 3; #define ISA_HAS_CC_REGS -- 2.30.2