From 3cf4a04fceef321b5cd6ece9a4ff1814787a236d Mon Sep 17 00:00:00 2001 From: Giacomo Gabrielli Date: Tue, 23 Oct 2018 13:57:05 +0100 Subject: [PATCH] arch-arm: Add initial support for SVE gather/scatter loads/stores Change-Id: I891623015b47a39f61ed616f8896f32a7134c8e2 Signed-off-by: Giacomo Gabrielli Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/13521 Reviewed-by: Andreas Sandberg Maintainer: Andreas Sandberg Tested-by: kokoro --- src/arch/arm/insts/sve_macromem.hh | 224 ++++++++++ src/arch/arm/isa/formats/sve_2nd_level.isa | 458 ++++++++++++++++----- src/arch/arm/isa/includes.isa | 10 +- src/arch/arm/isa/insts/sve_mem.isa | 434 ++++++++++++++++++- src/arch/arm/isa/operands.isa | 35 +- src/arch/arm/isa/templates/sve_mem.isa | 341 ++++++++++++++- src/arch/arm/registers.hh | 1 + 7 files changed, 1403 insertions(+), 100 deletions(-) create mode 100644 src/arch/arm/insts/sve_macromem.hh diff --git a/src/arch/arm/insts/sve_macromem.hh b/src/arch/arm/insts/sve_macromem.hh new file mode 100644 index 000000000..a31af9b92 --- /dev/null +++ b/src/arch/arm/insts/sve_macromem.hh @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2018 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Giacomo Gabrielli + */ + +#ifndef __ARCH_ARM_SVE_MACROMEM_HH__ +#define __ARCH_ARM_SVE_MACROMEM_HH__ + +#include "arch/arm/generated/decoder.hh" +#include "arch/arm/insts/pred_inst.hh" + +namespace ArmISA { + +template class MicroopType> +class SveIndexedMemVI : public PredMacroOp +{ + protected: + IntRegIndex dest; + IntRegIndex gp; + IntRegIndex base; + uint64_t imm; + + public: + SveIndexedMemVI(const char *mnem, ExtMachInst machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base, + uint64_t _imm) + : PredMacroOp(mnem, machInst, __opClass), + dest(_dest), gp(_gp), base(_base), imm(_imm) + { + bool isLoad = (__opClass == MemReadOp); + + int num_elems = ((machInst.sveLen + 1) * 16) / sizeof(RegElemType); + + numMicroops = num_elems; + if (isLoad) { + numMicroops++; + } + + microOps = new StaticInstPtr[numMicroops]; + + StaticInstPtr *uop = microOps; + + if (isLoad) { + // The first microop of a gather load copies the source vector + // register used for address calculation to an auxiliary register, + // with all subsequent microops reading from the latter. This is + // needed to properly handle cases where the source vector + // register is the same as the destination register + *uop = new ArmISAInst::SveGatherLoadCpySrcVecMicroop( + mnem, machInst, _base, this); + uop++; + } + + for (int i = 0; i < num_elems; i++, uop++) { + *uop = new MicroopType( + mnem, machInst, __opClass, _dest, _gp, + isLoad ? (IntRegIndex) VECREG_UREG0 : _base, _imm, i, + num_elems); + } + + --uop; + (*uop)->setLastMicroop(); + microOps[0]->setFirstMicroop(); + + for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) { + (*uop)->setDelayedCommit(); + } + } + + Fault + execute(ExecContext *, Trace::InstRecord *) const + { + panic("Execute method called when it shouldn't!"); + return NoFault; + } + + std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const + { + // TODO: add suffix to transfer and base registers + std::stringstream ss; + printMnemonic(ss, "", false); + ccprintf(ss, "{"); + printVecReg(ss, dest, true); + ccprintf(ss, "}, "); + printVecPredReg(ss, gp); + ccprintf(ss, "/z, ["); + printVecReg(ss, base, true); + if (imm != 0) { + ccprintf(ss, ", #%d", imm * sizeof(MemElemType)); + } + ccprintf(ss, "]"); + return ss.str(); + } +}; + +template class MicroopType> +class SveIndexedMemSV : public PredMacroOp +{ + protected: + IntRegIndex dest; + IntRegIndex gp; + IntRegIndex base; + IntRegIndex offset; + + bool offsetIs32; + bool offsetIsSigned; + bool offsetIsScaled; + + public: + SveIndexedMemSV(const char *mnem, ExtMachInst machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base, + IntRegIndex _offset, bool _offsetIs32, + bool _offsetIsSigned, bool _offsetIsScaled) + : PredMacroOp(mnem, machInst, __opClass), + dest(_dest), gp(_gp), base(_base), offset(_offset), + offsetIs32(_offsetIs32), offsetIsSigned(_offsetIsSigned), + offsetIsScaled(_offsetIsScaled) + { + bool isLoad = (__opClass == MemReadOp); + + int num_elems = ((machInst.sveLen + 1) * 16) / sizeof(RegElemType); + + numMicroops = num_elems; + if (isLoad) { + numMicroops++; + } + + microOps = new StaticInstPtr[numMicroops]; + + StaticInstPtr *uop = microOps; + + if (isLoad) { + // The first microop of a gather load copies the source vector + // register used for address calculation to an auxiliary register, + // with all subsequent microops reading from the latter. This is + // needed to properly handle cases where the source vector + // register is the same as the destination register + *uop = new ArmISAInst::SveGatherLoadCpySrcVecMicroop( + mnem, machInst, _offset, this); + uop++; + } + + for (int i = 0; i < num_elems; i++, uop++) { + *uop = new MicroopType( + mnem, machInst, __opClass, _dest, _gp, _base, + isLoad ? (IntRegIndex) VECREG_UREG0 : _offset, _offsetIs32, + _offsetIsSigned, _offsetIsScaled, i, num_elems); + } + + --uop; + (*uop)->setLastMicroop(); + microOps[0]->setFirstMicroop(); + + for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) { + (*uop)->setDelayedCommit(); + } + } + + Fault + execute(ExecContext *, Trace::InstRecord *) const + { + panic("Execute method called when it shouldn't!"); + return NoFault; + } + + std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const + { + // TODO: add suffix to transfer and base registers + std::stringstream ss; + printMnemonic(ss, "", false); + ccprintf(ss, "{"); + printVecReg(ss, dest, true); + ccprintf(ss, "}, "); + printVecPredReg(ss, gp); + ccprintf(ss, "/z, ["); + printIntReg(ss, base); + ccprintf(ss, ", "); + printVecReg(ss, offset, true); + ccprintf(ss, "]"); + return ss.str(); + } +}; + +} // namespace ArmISA + +#endif // __ARCH_ARM_SVE_MACROMEM_HH__ diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa b/src/arch/arm/isa/formats/sve_2nd_level.isa index e81ab3ed7..7b2d3af49 100644 --- a/src/arch/arm/isa/formats/sve_2nd_level.isa +++ b/src/arch/arm/isa/formats/sve_2nd_level.isa @@ -2896,34 +2896,153 @@ namespace Aarch64 StaticInstPtr decodeSveMemGather32(ExtMachInst machInst) { - // TODO: for now only LDR and LD1R are implemented - if (bits(machInst, 22) && bits(machInst, 15)) { - IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); - IntRegIndex rn = makeSP( - (IntRegIndex) (uint8_t) bits(machInst, 9, 5)); - uint64_t imm = bits(machInst, 21, 16); - IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10); - uint8_t dtype = (bits(machInst, 24, 23) << 2) | - bits(machInst, 14, 13); - return decodeSveContigLoadSIInsts( - dtype, machInst, zt, pg, rn, imm, false, true); - } else if (bits(machInst, 24, 22) == 0x6 && - bits(machInst, 15, 13) == 0x0 && - bits(machInst, 4) == 0x0) { - IntRegIndex pt = (IntRegIndex) (uint8_t) bits(machInst, 3, 0); - IntRegIndex rn = makeSP( - (IntRegIndex) (uint8_t) bits(machInst, 9, 5)); - uint64_t imm = sext<9>((bits(machInst, 21, 16) << 3) | - bits(machInst, 12, 10)); - return new SveLdrPred(machInst, pt, rn, imm); - } else if (bits(machInst, 24, 22) == 0x6 && - bits(machInst, 15, 13) == 0x2) { - IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); - IntRegIndex rn = makeSP( - (IntRegIndex) (uint8_t) bits(machInst, 9, 5)); - uint64_t imm = sext<9>((bits(machInst, 21, 16) << 3) | - bits(machInst, 12, 10)); - return new SveLdrVec(machInst, zt, rn, imm); + if (bits(machInst, 15)) { + if (bits(machInst, 22)) { + // SVE load and broadcast element + IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + uint64_t imm = bits(machInst, 21, 16); + IntRegIndex pg = (IntRegIndex) (uint8_t) + bits(machInst, 12, 10); + uint8_t dtype = (bits(machInst, 24, 23) << 2) | + bits(machInst, 14, 13); + return decodeSveContigLoadSIInsts( + dtype, machInst, zt, pg, rn, imm, false, true); + } else { + if (bits(machInst, 21)) { + // SVE 32-bit gather load (vector plus immediate) + IntRegIndex zt = (IntRegIndex) (uint8_t) + bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) + bits(machInst, 9, 5); + uint64_t imm = bits(machInst, 20, 16); + IntRegIndex pg = (IntRegIndex) (uint8_t) + bits(machInst, 12, 10); + uint8_t dtype = (bits(machInst, 24, 23) << 1) | + bits(machInst, 14); + uint8_t ff = bits(machInst, 13); + if (ff) { + return new Unknown64(machInst); + } + return decodeSveGatherLoadVIInsts( + dtype, machInst, zt, pg, zn, imm, true, ff); + } else { + uint8_t b14_13 = bits(machInst, 14, 13); + if (b14_13 == 0x2 && bits(machInst, 4) == 0) { + // TODO: SVE contiguous prefetch (scalar plus scalar) + return new Unknown64(machInst); + } else if (b14_13 == 0x3 && bits(machInst, 4) == 0) { + // TODO: SVE 32-bit gather prefetch (vector plus + // immediate) + return new Unknown64(machInst); + } + } + } + } else { + uint8_t b24_23 = bits(machInst, 24, 23); + if (b24_23 != 0x3 && bits(machInst, 21) == 0) { + // SVE 32-bit gather load (scalar plus 32-bit unscaled offsets) + IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex zm = (IntRegIndex) (uint8_t) + bits(machInst, 20, 16); + IntRegIndex pg = (IntRegIndex) (uint8_t) + bits(machInst, 12, 10); + uint8_t dtype = (bits(machInst, 24, 23) << 1) | + bits(machInst, 14); + uint8_t xs = bits(machInst, 22); + uint8_t ff = bits(machInst, 13); + if (ff) { + return new Unknown64(machInst); + } + return decodeSveGatherLoadSVInsts( + dtype, machInst, zt, pg, rn, zm, + true, true, xs, false, ff); + } + switch (b24_23) { + case 0x0: + if (bits(machInst, 21) && bits(machInst, 4) == 0) { + // TODO: SVE 32-bit gather prefetch (vector plus immediate) + break; + } + break; + case 0x1: + if (bits(machInst, 21)) { + // SVE 32-bit gather load halfwords (scalar plus 32-bit + // scaled offsets) + IntRegIndex zt = (IntRegIndex) (uint8_t) + bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex) (uint8_t) + bits(machInst, 9, 5); + IntRegIndex zm = (IntRegIndex) (uint8_t) + bits(machInst, 20, 16); + IntRegIndex pg = (IntRegIndex) (uint8_t) + bits(machInst, 12, 10); + uint8_t xs = bits(machInst, 22); + uint8_t ff = bits(machInst, 13); + if (ff) { + return new Unknown64(machInst); + } + if (bits(machInst, 14)) { + return new SveIndexedMemSV( + "ld1", machInst, MemReadOp, zt, pg, rn, zm, + true, xs, true); + } else { + return new SveIndexedMemSV( + "ld1", machInst, MemReadOp, zt, pg, rn, zm, + true, xs, true); + } + } + break; + case 0x2: + if (bits(machInst, 21)) { + // SVE 32-bit gather load words (scalar plus 32-bit scaled + // offsets) + IntRegIndex zt = (IntRegIndex) (uint8_t) + bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex) (uint8_t) + bits(machInst, 9, 5); + IntRegIndex zm = (IntRegIndex) (uint8_t) + bits(machInst, 20, 16); + IntRegIndex pg = (IntRegIndex) (uint8_t) + bits(machInst, 12, 10); + uint8_t xs = bits(machInst, 22); + uint8_t ff = bits(machInst, 13); + if (ff) { + return new Unknown64(machInst); + } + return new SveIndexedMemSV( + "ld1", machInst, MemReadOp, zt, pg, rn, zm, + true, xs, true); + } + break; + case 0x3: + if (bits(machInst, 22) == 0 && bits(machInst, 14, 13) == 0x0 && + bits(machInst, 4) == 0) { + // SVE load predicate register + IntRegIndex pt = (IntRegIndex) (uint8_t) + bits(machInst, 3, 0); + IntRegIndex rn = (IntRegIndex) (uint8_t) + bits(machInst, 9, 5); + uint64_t imm = sext<9>((bits(machInst, 21, 16) << 3) | + bits(machInst, 12, 10)); + return new SveLdrPred(machInst, pt, rn, imm); + } else if (bits(machInst, 22) == 0 && + bits(machInst, 14, 13) == 0x2) { + // SVE load vector register + IntRegIndex zt = (IntRegIndex) (uint8_t) + bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex) (uint8_t) + bits(machInst, 9, 5); + uint64_t imm = sext<9>((bits(machInst, 21, 16) << 3) | + bits(machInst, 12, 10)); + return new SveLdrVec(machInst, zt, rn, imm); + } + break; + } } return new Unknown64(machInst); } // decodeSveMemGather32 @@ -3048,6 +3167,124 @@ namespace Aarch64 StaticInstPtr decodeSveMemGather64(ExtMachInst machInst) { + switch ((bits(machInst, 21) << 1) | bits(machInst, 15)) { + case 0x0: + { + // SVE 64-bit gather load (scalar plus unpacked 32-bit unscaled + // offsets) + IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex zm = (IntRegIndex) (uint8_t) + bits(machInst, 20, 16); + IntRegIndex pg = (IntRegIndex) (uint8_t) + bits(machInst, 12, 10); + uint8_t dtype = (bits(machInst, 24, 23) << 1) | + bits(machInst, 14); + uint8_t xs = bits(machInst, 22); + uint8_t ff = bits(machInst, 13); + if (ff) { + return new Unknown64(machInst); + } + return decodeSveGatherLoadSVInsts( + dtype, machInst, zt, pg, rn, zm, + false, true, xs, false, ff); + } + case 0x1: + if (bits(machInst, 22)) { + // SVE 64-bit gather load (scalar plus 64-bit unscaled offsets) + IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex zm = (IntRegIndex) (uint8_t) + bits(machInst, 20, 16); + IntRegIndex pg = (IntRegIndex) (uint8_t) + bits(machInst, 12, 10); + uint8_t dtype = (bits(machInst, 24, 23) << 1) | + bits(machInst, 14); + uint8_t ff = bits(machInst, 13); + if (ff) { + return new Unknown64(machInst); + } + return decodeSveGatherLoadSVInsts( + dtype, machInst, zt, pg, rn, zm, + false, false, false, false, ff); + } else { + if (bits(machInst, 14, 13) == 0x3 && bits(machInst, 4) == 0) { + // TODO: SVE 64-bit gather prefetch (vector plus immediate) + break; + } + } + break; + case 0x2: + if (bits(machInst, 24, 23) != 0x0) { + // SVE 64-bit gather load (scalar plus unpacked 32-bit scaled + // offsets) + IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex zm = (IntRegIndex) (uint8_t) + bits(machInst, 20, 16); + IntRegIndex pg = (IntRegIndex) (uint8_t) + bits(machInst, 12, 10); + uint8_t dtype = (bits(machInst, 24, 23) << 1) | + bits(machInst, 14); + uint8_t xs = bits(machInst, 22); + uint8_t ff = bits(machInst, 13); + if (ff) { + return new Unknown64(machInst); + } + return decodeSveGatherLoadSVInsts( + dtype, machInst, zt, pg, rn, zm, + false, true, xs, true, ff); + } else if (bits(machInst, 4) == 0) { + // TODO: SVE 64-bit gather prefetch (scalar plus unpacked + // 32-bit scaled offsets) + return new Unknown64(machInst); + } + break; + case 0x3: + if (bits(machInst, 22) == 0) { + // SVE 64-bit gather load (vector plus immediate) + IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + uint64_t imm = bits(machInst, 20, 16); + IntRegIndex pg = (IntRegIndex) (uint8_t) + bits(machInst, 12, 10); + uint8_t dtype = (bits(machInst, 24, 23) << 1) | + bits(machInst, 14); + uint8_t ff = bits(machInst, 13); + if (ff) { + return new Unknown64(machInst); + } + return decodeSveGatherLoadVIInsts( + dtype, machInst, zt, pg, zn, imm, false, ff); + } else { + if (bits(machInst, 24, 23) != 0x0) { + // SVE 64-bit gather load (scalar plus 64-bit scaled + // offsets) + IntRegIndex zt = (IntRegIndex) (uint8_t) + bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex) (uint8_t) + bits(machInst, 9, 5); + IntRegIndex zm = (IntRegIndex) (uint8_t) + bits(machInst, 20, 16); + IntRegIndex pg = (IntRegIndex) (uint8_t) + bits(machInst, 12, 10); + uint8_t dtype = (bits(machInst, 24, 23) << 1) | + bits(machInst, 14); + uint8_t ff = bits(machInst, 13); + if (ff) { + return new Unknown64(machInst); + } + return decodeSveGatherLoadSVInsts( + dtype, machInst, zt, pg, rn, zm, + false, false, false, true, ff); + } else if (bits(machInst, 4) == 0) { + // TODO: SVE 64-bit gather prefetch (scalar plus 64-bit + // scaled offsets) + break; + } + } + break; + } return new Unknown64(machInst); } // decodeSveMemGather64 @@ -3086,36 +3323,12 @@ namespace Aarch64 return new Unknown64(machInst); } // decodeSveContigNTStoreSS - StaticInstPtr - decodeSveScatterStore64SV32U(ExtMachInst machInst) - { - return new Unknown64(machInst); - } // decodeSveScatterStore64SV32U - - StaticInstPtr - decodeSveScatterStore64SV64U(ExtMachInst machInst) - { - return new Unknown64(machInst); - } // decodeSveScatterStore64SV64U - StaticInstPtr decodeSveContigNTStoreSI(ExtMachInst machInst) { return new Unknown64(machInst); } // decodeSveContigNTStoreSI - StaticInstPtr - decodeSveScatterStore64VI(ExtMachInst machInst) - { - return new Unknown64(machInst); - } // decodeSveScatterStore64VI - - StaticInstPtr - decodeSveScatterStore32SV32S(ExtMachInst machInst) - { - return new Unknown64(machInst); - } // decodeSveScatterStore32SV32S - StaticInstPtr decodeSveStoreStructsSS(ExtMachInst machInst) { @@ -3128,30 +3341,6 @@ namespace Aarch64 return new Unknown64(machInst); } // decodeSveStoreStructsSI - StaticInstPtr - decodeSveScatterStore32SV32U(ExtMachInst machInst) - { - return new Unknown64(machInst); - } // decodeSveScatterStore32SV32U - - StaticInstPtr - decodeSveScatterStore32VI(ExtMachInst machInst) - { - return new Unknown64(machInst); - } // decodeSveScatterStore32VI - - StaticInstPtr - decodeSveScatterStore64SV32S(ExtMachInst machInst) - { - return new Unknown64(machInst); - } // decodeSveScatterStore64SV32S - - StaticInstPtr - decodeSveScatterStore64SV64S(ExtMachInst machInst) - { - return new Unknown64(machInst); - } // decodeSveScatterStore64SV64S - StaticInstPtr decodeSveMemStore(ExtMachInst machInst) { @@ -3186,37 +3375,118 @@ namespace Aarch64 } case 0x4: case 0x6: - switch (bits(machInst, 22, 21)) { - case 0x0: - return decodeSveScatterStore64SV32U(machInst); - case 0x1: - if (bits(machInst, 24, 23) != 0x0) { - return decodeSveScatterStore64SV32S(machInst); - } - break; - case 0x2: - if (bits(machInst, 24, 23) != 0x3) { - return decodeSveScatterStore32SV32U(machInst); + { + IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex zm = (IntRegIndex) (uint8_t) + bits(machInst, 20, 16); + IntRegIndex pg = (IntRegIndex) (uint8_t) + bits(machInst, 12, 10); + uint8_t msz = bits(machInst, 24, 23); + uint8_t xs = bits(machInst, 22); + + switch (bits(machInst, 22, 21)) { + case 0x0: + // SVE 64-bit scatter store (scalar plus unpacked 32-bit + // unscaled offsets) + return decodeSveScatterStoreSVInsts( + msz, machInst, zt, pg, rn, zm, + false, true, xs, false); + case 0x1: + if (bits(machInst, 24, 23) != 0x0) { + // SVE 64-bit scatter store (scalar plus unpacked + // 32-bit scaled offsets) + return decodeSveScatterStoreSVInsts( + msz, machInst, zt, pg, rn, zm, + false, true, xs, true); + } + break; + case 0x2: + if (bits(machInst, 24, 23) != 0x3) { + // SVE 32-bit scatter store (scalar plus 32-bit + // unscaled offsets) + return decodeSveScatterStoreSVInsts( + msz, machInst, zt, pg, rn, zm, + true, true, xs, false); + } + break; + case 0x3: + // SVE 32-bit scatter store (scalar plus 32-bit scaled + // offsets) + return decodeSveScatterStoreSVInsts( + msz, machInst, zt, pg, rn, zm, + true, true, xs, true); } - break; - case 0x3: - return decodeSveScatterStore32SV32S(machInst); } break; case 0x5: switch (bits(machInst, 22, 21)) { case 0x0: - return decodeSveScatterStore64SV64U(machInst); + { + // SVE 64-bit scatter store (scalar plus 64-bit unscaled + // offsets) + IntRegIndex zt = (IntRegIndex) (uint8_t) + bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex) (uint8_t) + bits(machInst, 9, 5); + IntRegIndex zm = (IntRegIndex) (uint8_t) + bits(machInst, 20, 16); + IntRegIndex pg = (IntRegIndex) (uint8_t) + bits(machInst, 12, 10); + uint8_t msz = bits(machInst, 24, 23); + + return decodeSveScatterStoreSVInsts( + msz, machInst, zt, pg, rn, zm, + false, false, false, false); + } case 0x1: if (bits(machInst, 24, 23) != 0x0) { - return decodeSveScatterStore64SV64S(machInst); + // SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + IntRegIndex zt = (IntRegIndex) (uint8_t) + bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex) (uint8_t) + bits(machInst, 9, 5); + IntRegIndex zm = (IntRegIndex) (uint8_t) + bits(machInst, 20, 16); + IntRegIndex pg = (IntRegIndex) (uint8_t) + bits(machInst, 12, 10); + uint8_t msz = bits(machInst, 24, 23); + + return decodeSveScatterStoreSVInsts( + msz, machInst, zt, pg, rn, zm, + false, false, false, true); } break; case 0x2: - return decodeSveScatterStore64VI(machInst); + { + // SVE 64-bit scatter store (vector plus immediate) + IntRegIndex zt = (IntRegIndex) (uint8_t) + bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) + bits(machInst, 9, 5); + uint64_t imm = bits(machInst, 20, 16); + IntRegIndex pg = (IntRegIndex) (uint8_t) + bits(machInst, 12, 10); + uint8_t msz = bits(machInst, 24, 23); + + return decodeSveScatterStoreVIInsts( + msz, machInst, zt, pg, zn, imm, false); + } case 0x3: if (bits(machInst, 24, 23) != 0x3) { - return decodeSveScatterStore64VI(machInst); + // SVE 32-bit scatter store (vector plus immediate) + IntRegIndex zt = (IntRegIndex) (uint8_t) + bits(machInst, 4, 0); + IntRegIndex zn = (IntRegIndex) (uint8_t) + bits(machInst, 9, 5); + uint64_t imm = bits(machInst, 20, 16); + IntRegIndex pg = (IntRegIndex) (uint8_t) + bits(machInst, 12, 10); + uint8_t msz = bits(machInst, 24, 23); + + return decodeSveScatterStoreVIInsts( + msz, machInst, zt, pg, zn, imm, true); } break; } diff --git a/src/arch/arm/isa/includes.isa b/src/arch/arm/isa/includes.isa index 9aef8c651..f054bc862 100644 --- a/src/arch/arm/isa/includes.isa +++ b/src/arch/arm/isa/includes.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010, 2012, 2017 ARM Limited +// Copyright (c) 2010, 2012, 2017-2018 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -68,9 +68,10 @@ output header {{ #include "arch/arm/insts/sve_mem.hh" #include "arch/arm/insts/vfp.hh" #include "arch/arm/isa_traits.hh" +#include "enums/DecoderFlavour.hh" #include "mem/packet.hh" #include "sim/faults.hh" -#include "enums/DecoderFlavour.hh" + }}; output decoder {{ @@ -80,11 +81,12 @@ output decoder {{ #include "arch/arm/decoder.hh" #include "arch/arm/faults.hh" +#include "arch/arm/insts/sve_macromem.hh" #include "arch/arm/intregs.hh" #include "arch/arm/isa_traits.hh" #include "arch/arm/utility.hh" -#include "base/loader/symtab.hh" #include "base/cprintf.hh" +#include "base/loader/symtab.hh" #include "cpu/thread_context.hh" using namespace ArmISA; @@ -102,8 +104,10 @@ output exec {{ #include "base/crc.hh" #include "cpu/base.hh" #include "sim/pseudo_inst.hh" + #if defined(linux) #include + #endif #include "base/cp_annotate.hh" diff --git a/src/arch/arm/isa/insts/sve_mem.isa b/src/arch/arm/isa/insts/sve_mem.isa index f4ca4c3c9..3102e800a 100644 --- a/src/arch/arm/isa/insts/sve_mem.isa +++ b/src/arch/arm/isa/insts/sve_mem.isa @@ -1,4 +1,4 @@ -// Copyright (c) 2017 ARM Limited +// Copyright (c) 2017-2018 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -204,6 +204,288 @@ output header {{ }}; +output decoder {{ + + StaticInstPtr + decodeSveGatherLoadVIInsts(uint8_t dtype, ExtMachInst machInst, + IntRegIndex zt, IntRegIndex pg, IntRegIndex zn, + uint64_t imm, bool esizeIs32, + bool firstFaulting) + { + const char* mn = firstFaulting ? "ldff1" : "ld1"; + switch (dtype) { + case 0x0: + if (esizeIs32) { + return new SveIndexedMemVI( + mn, machInst, MemReadOp, zt, pg, zn, imm); + } else { + return new SveIndexedMemVI( + mn, machInst, MemReadOp, zt, pg, zn, imm); + } + case 0x1: + if (esizeIs32) { + return new SveIndexedMemVI( + mn, machInst, MemReadOp, zt, pg, zn, imm); + } else { + return new SveIndexedMemVI( + mn, machInst, MemReadOp, zt, pg, zn, imm); + } + case 0x2: + if (esizeIs32) { + return new SveIndexedMemVI( + mn, machInst, MemReadOp, zt, pg, zn, imm); + } else { + return new SveIndexedMemVI( + mn, machInst, MemReadOp, zt, pg, zn, imm); + } + case 0x3: + if (esizeIs32) { + return new SveIndexedMemVI( + mn, machInst, MemReadOp, zt, pg, zn, imm); + } else { + return new SveIndexedMemVI( + mn, machInst, MemReadOp, zt, pg, zn, imm); + } + case 0x4: + if (esizeIs32) { + break; + } else { + return new SveIndexedMemVI( + mn, machInst, MemReadOp, zt, pg, zn, imm); + } + case 0x5: + if (esizeIs32) { + return new SveIndexedMemVI( + mn, machInst, MemReadOp, zt, pg, zn, imm); + } else { + return new SveIndexedMemVI( + mn, machInst, MemReadOp, zt, pg, zn, imm); + } + case 0x7: + if (esizeIs32) { + break; + } else { + return new SveIndexedMemVI( + mn, machInst, MemReadOp, zt, pg, zn, imm); + } + } + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSveGatherLoadSVInsts(uint8_t dtype, ExtMachInst machInst, + IntRegIndex zt, IntRegIndex pg, IntRegIndex rn, + IntRegIndex zm, bool esizeIs32, bool offsetIs32, + bool offsetIsSigned, bool offsetIsScaled, + bool firstFaulting) + { + const char* mn = firstFaulting ? "ldff1" : "ld1"; + switch (dtype) { + case 0x0: + if (esizeIs32) { + return new SveIndexedMemSV( + mn, machInst, MemReadOp, zt, pg, rn, zm, + offsetIs32, offsetIsSigned, offsetIsScaled); + } else { + return new SveIndexedMemSV( + mn, machInst, MemReadOp, zt, pg, rn, zm, + offsetIs32, offsetIsSigned, offsetIsScaled); + } + case 0x1: + if (esizeIs32) { + return new SveIndexedMemSV( + mn, machInst, MemReadOp, zt, pg, rn, zm, + offsetIs32, offsetIsSigned, offsetIsScaled); + } else { + return new SveIndexedMemSV( + mn, machInst, MemReadOp, zt, pg, rn, zm, + offsetIs32, offsetIsSigned, offsetIsScaled); + } + case 0x2: + if (esizeIs32) { + return new SveIndexedMemSV( + mn, machInst, MemReadOp, zt, pg, rn, zm, + offsetIs32, offsetIsSigned, offsetIsScaled); + } else { + return new SveIndexedMemSV( + mn, machInst, MemReadOp, zt, pg, rn, zm, + offsetIs32, offsetIsSigned, offsetIsScaled); + } + case 0x3: + if (esizeIs32) { + return new SveIndexedMemSV( + mn, machInst, MemReadOp, zt, pg, rn, zm, + offsetIs32, offsetIsSigned, offsetIsScaled); + } else { + return new SveIndexedMemSV( + mn, machInst, MemReadOp, zt, pg, rn, zm, + offsetIs32, offsetIsSigned, offsetIsScaled); + } + case 0x4: + if (esizeIs32) { + break; + } else { + return new SveIndexedMemSV( + mn, machInst, MemReadOp, zt, pg, rn, zm, + offsetIs32, offsetIsSigned, offsetIsScaled); + } + case 0x5: + if (esizeIs32) { + return new SveIndexedMemSV( + mn, machInst, MemReadOp, zt, pg, rn, zm, + offsetIs32, offsetIsSigned, offsetIsScaled); + } else { + return new SveIndexedMemSV( + mn, machInst, MemReadOp, zt, pg, rn, zm, + offsetIs32, offsetIsSigned, offsetIsScaled); + } + case 0x7: + if (esizeIs32) { + break; + } else { + return new SveIndexedMemSV( + mn, machInst, MemReadOp, zt, pg, rn, zm, + offsetIs32, offsetIsSigned, offsetIsScaled); + } + } + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSveScatterStoreVIInsts(uint8_t msz, ExtMachInst machInst, + IntRegIndex zt, IntRegIndex pg, + IntRegIndex zn, uint64_t imm, + bool esizeIs32) + { + const char* mn = "st1"; + switch (msz) { + case 0x0: + if (esizeIs32) { + return new SveIndexedMemVI( + mn, machInst, MemWriteOp, zt, pg, zn, imm); + } else { + return new SveIndexedMemVI( + mn, machInst, MemWriteOp, zt, pg, zn, imm); + } + case 0x1: + if (esizeIs32) { + return new SveIndexedMemVI( + mn, machInst, MemWriteOp, zt, pg, zn, imm); + } else { + return new SveIndexedMemVI( + mn, machInst, MemWriteOp, zt, pg, zn, imm); + } + case 0x2: + if (esizeIs32) { + return new SveIndexedMemVI( + mn, machInst, MemWriteOp, zt, pg, zn, imm); + } else { + return new SveIndexedMemVI( + mn, machInst, MemWriteOp, zt, pg, zn, imm); + } + case 0x3: + if (esizeIs32) { + break; + } else { + return new SveIndexedMemVI( + mn, machInst, MemWriteOp, zt, pg, zn, imm); + } + } + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSveScatterStoreSVInsts(uint8_t msz, ExtMachInst machInst, + IntRegIndex zt, IntRegIndex pg, + IntRegIndex rn, IntRegIndex zm, + bool esizeIs32, bool offsetIs32, + bool offsetIsSigned, bool offsetIsScaled) + { + const char* mn = "st1"; + switch (msz) { + case 0x0: + if (esizeIs32) { + return new SveIndexedMemSV( + mn, machInst, MemWriteOp, zt, pg, rn, zm, + offsetIs32, offsetIsSigned, offsetIsScaled); + } else { + return new SveIndexedMemSV( + mn, machInst, MemWriteOp, zt, pg, rn, zm, + offsetIs32, offsetIsSigned, offsetIsScaled); + } + case 0x1: + if (esizeIs32) { + return new SveIndexedMemSV( + mn, machInst, MemWriteOp, zt, pg, rn, zm, + offsetIs32, offsetIsSigned, offsetIsScaled); + } else { + return new SveIndexedMemSV( + mn, machInst, MemWriteOp, zt, pg, rn, zm, + offsetIs32, offsetIsSigned, offsetIsScaled); + } + case 0x2: + if (esizeIs32) { + return new SveIndexedMemSV( + mn, machInst, MemWriteOp, zt, pg, rn, zm, + offsetIs32, offsetIsSigned, offsetIsScaled); + } else { + return new SveIndexedMemSV( + mn, machInst, MemWriteOp, zt, pg, rn, zm, + offsetIs32, offsetIsSigned, offsetIsScaled); + } + case 0x3: + if (esizeIs32) { + break; + } else { + return new SveIndexedMemSV( + mn, machInst, MemWriteOp, zt, pg, rn, zm, + offsetIs32, offsetIsSigned, offsetIsScaled); + } + } + return new Unknown64(machInst); + } + +}}; + + let {{ header_output = '' @@ -323,6 +605,31 @@ let {{ ('uint64_t', 'uint64_t'), ) + gatherLoadTplArgs = ( + ('int32_t', 'int8_t'), + ('int64_t', 'int8_t'), + ('uint32_t', 'uint8_t'), + ('uint64_t', 'uint8_t'), + ('int32_t', 'int16_t'), + ('int64_t', 'int16_t'), + ('uint32_t', 'uint16_t'), + ('uint64_t', 'uint16_t'), + ('int64_t', 'int32_t'), + ('uint32_t', 'uint32_t'), + ('uint64_t', 'uint32_t'), + ('uint64_t', 'uint64_t'), + ) + + scatterStoreTplArgs = ( + ('uint32_t', 'uint8_t'), + ('uint64_t', 'uint8_t'), + ('uint32_t', 'uint16_t'), + ('uint64_t', 'uint16_t'), + ('uint32_t', 'uint32_t'), + ('uint64_t', 'uint32_t'), + ('uint64_t', 'uint64_t'), + ) + # Generates definitions for SVE contiguous loads def emitSveContigMemInsts(offsetIsImm): global header_output, exec_output, decoders @@ -437,9 +744,124 @@ let {{ 'class_name': 'SveLoadAndRepl'} exec_output += SveContigMemExecDeclare.subst(substDict) + class IndexedAddrForm: + VEC_PLUS_IMM = 0 + SCA_PLUS_VEC = 1 + + # Generates definitions for the transfer microops of SVE indexed memory + # operations (gather loads, scatter stores) + def emitSveIndexedMemMicroops(indexed_addr_form): + assert indexed_addr_form in (IndexedAddrForm.VEC_PLUS_IMM, + IndexedAddrForm.SCA_PLUS_VEC) + global header_output, exec_output, decoders + tplHeader = 'template ' + tplArgs = '' + if indexed_addr_form == IndexedAddrForm.VEC_PLUS_IMM: + eaCode = ''' + EA = AA64FpBase_x[elemIndex] + imm * sizeof(MemElemType)''' + else: + eaCode = ''' + uint64_t offset = AA64FpOffset_x[elemIndex]; + if (offsetIs32) { + offset &= (1ULL << 32) - 1; + } + if (offsetIsSigned) { + offset = sext<32>(offset); + } + if (offsetIsScaled) { + offset *= sizeof(MemElemType); + } + EA = XBase + offset''' + loadMemAccCode = ''' + if (GpOp_x[elemIndex]) { + AA64FpDest_x[elemIndex] = memData; + } else { + AA64FpDest_x[elemIndex] = 0; + } + ''' + storeMemAccCode = ''' + memData = AA64FpDest_x[elemIndex]; + ''' + predCheckCode = 'GpOp_x[elemIndex]' + loadIop = InstObjParams('ld1', + ('SveGatherLoadVIMicroop' + if indexed_addr_form == IndexedAddrForm.VEC_PLUS_IMM + else 'SveGatherLoadSVMicroop'), + 'MicroOp', + {'tpl_header': tplHeader, + 'tpl_args': tplArgs, + 'memacc_code': loadMemAccCode, + 'ea_code' : sveEnabledCheckCode + eaCode, + 'pred_check_code' : predCheckCode, + 'fa_code' : ''}, + ['IsMicroop', 'IsMemRef', 'IsLoad']) + storeIop = InstObjParams('st1', + ('SveScatterStoreVIMicroop' + if indexed_addr_form == IndexedAddrForm.VEC_PLUS_IMM + else 'SveScatterStoreSVMicroop'), + 'MicroOp', + {'tpl_header': tplHeader, + 'tpl_args': tplArgs, + 'memacc_code': storeMemAccCode, + 'ea_code' : sveEnabledCheckCode + eaCode, + 'pred_check_code' : predCheckCode, + 'fa_code' : ''}, + ['IsMicroop', 'IsMemRef', 'IsStore']) + if indexed_addr_form == IndexedAddrForm.VEC_PLUS_IMM: + header_output += SveIndexedMemVIMicroopDeclare.subst(loadIop) + header_output += SveIndexedMemVIMicroopDeclare.subst(storeIop) + else: + header_output += SveIndexedMemSVMicroopDeclare.subst(loadIop) + header_output += SveIndexedMemSVMicroopDeclare.subst(storeIop) + exec_output += ( + SveGatherLoadMicroopExecute.subst(loadIop) + + SveGatherLoadMicroopInitiateAcc.subst(loadIop) + + SveGatherLoadMicroopCompleteAcc.subst(loadIop) + + SveScatterStoreMicroopExecute.subst(storeIop) + + SveScatterStoreMicroopInitiateAcc.subst(storeIop) + + SveScatterStoreMicroopCompleteAcc.subst(storeIop)) + for args in gatherLoadTplArgs: + substDict = {'tpl_args': '<%s>' % ', '.join(args), + 'class_name': ( + 'SveGatherLoadVIMicroop' + if indexed_addr_form == \ + IndexedAddrForm.VEC_PLUS_IMM + else 'SveGatherLoadSVMicroop')} + # TODO: this should become SveMemExecDeclare + exec_output += SveContigMemExecDeclare.subst(substDict) + for args in scatterStoreTplArgs: + substDict = {'tpl_args': '<%s>' % ', '.join(args), + 'class_name': ( + 'SveScatterStoreVIMicroop' + if indexed_addr_form == \ + IndexedAddrForm.VEC_PLUS_IMM + else 'SveScatterStoreSVMicroop')} + # TODO: this should become SveMemExecDeclare + exec_output += SveContigMemExecDeclare.subst(substDict) + + # Generates definitions for the first microop of SVE gather loads, required + # to propagate the source vector register to the transfer microops + def emitSveGatherLoadCpySrcVecMicroop(): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen( + xc->tcBase()); + for (unsigned i = 0; i < eCount; i++) { + AA64FpUreg0_ub[i] = AA64FpOp1_ub[i]; + }''' + iop = InstObjParams('ld1', + 'SveGatherLoadCpySrcVecMicroop', + 'MicroOp', + {'code': code}, + ['IsMicroop']) + header_output += SveGatherLoadCpySrcVecMicroopDeclare.subst(iop) + exec_output += SveGatherLoadCpySrcVecMicroopExecute.subst(iop) + # LD1[S]{B,H,W,D} (scalar plus immediate) + # ST1[S]{B,H,W,D} (scalar plus immediate) emitSveContigMemInsts(True) # LD1[S]{B,H,W,D} (scalar plus scalar) + # ST1[S]{B,H,W,D} (scalar plus scalar) emitSveContigMemInsts(False) # LD1R[S]{B,H,W,D} @@ -450,4 +872,14 @@ let {{ # LDR (vector), STR (vector) emitSveMemFillSpill(False) + # LD1[S]{B,H,W,D} (vector plus immediate) + # ST1[S]{B,H,W,D} (vector plus immediate) + emitSveIndexedMemMicroops(IndexedAddrForm.VEC_PLUS_IMM) + # LD1[S]{B,H,W,D} (scalar plus vector) + # ST1[S]{B,H,W,D} (scalar plus vector) + emitSveIndexedMemMicroops(IndexedAddrForm.SCA_PLUS_VEC) + + # Source vector copy microop for gather loads + emitSveGatherLoadCpySrcVecMicroop() + }}; diff --git a/src/arch/arm/isa/operands.isa b/src/arch/arm/isa/operands.isa index 0a0469acc..a3b385756 100644 --- a/src/arch/arm/isa/operands.isa +++ b/src/arch/arm/isa/operands.isa @@ -1,5 +1,5 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2014, 2016 ARM Limited +// Copyright (c) 2010-2014, 2016-2018 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -541,6 +541,39 @@ def operands {{ 'AA64FpDestMergeQ': vectorRegElem('0', 'tud', zeroing = True) }), + 'AA64FpBase': vectorReg('base', + { + 'AA64FpBaseP0': vectorRegElem('0'), + 'AA64FpBaseP1': vectorRegElem('1'), + 'AA64FpBaseP2': vectorRegElem('2'), + 'AA64FpBaseP3': vectorRegElem('3'), + 'AA64FpBaseS': vectorRegElem('0', 'sf', zeroing = True), + 'AA64FpBaseD': vectorRegElem('0', 'df', zeroing = True), + 'AA64FpBaseQ': vectorRegElem('0', 'tud', zeroing = True) + }), + + 'AA64FpOffset': vectorReg('offset', + { + 'AA64FpOffsetP0': vectorRegElem('0'), + 'AA64FpOffsetP1': vectorRegElem('1'), + 'AA64FpOffsetP2': vectorRegElem('2'), + 'AA64FpOffsetP3': vectorRegElem('3'), + 'AA64FpOffsetS': vectorRegElem('0', 'sf', zeroing = True), + 'AA64FpOffsetD': vectorRegElem('0', 'df', zeroing = True), + 'AA64FpOffsetQ': vectorRegElem('0', 'tud', zeroing = True) + }), + + 'AA64FpUreg0': vectorReg('VECREG_UREG0', + { + 'AA64FpUreg0P0': vectorRegElem('0'), + 'AA64FpUreg0P1': vectorRegElem('1'), + 'AA64FpUreg0P2': vectorRegElem('2'), + 'AA64FpUreg0P3': vectorRegElem('3'), + 'AA64FpUreg0S': vectorRegElem('0', 'sf', zeroing = True), + 'AA64FpUreg0D': vectorRegElem('0', 'df', zeroing = True), + 'AA64FpUreg0Q': vectorRegElem('0', 'tud', zeroing = True) + }), + # Predicate register operands 'GpOp': vecPredReg('gp'), 'POp1': vecPredReg('op1'), diff --git a/src/arch/arm/isa/templates/sve_mem.isa b/src/arch/arm/isa/templates/sve_mem.isa index 8471e44ba..2cdf2ffd4 100644 --- a/src/arch/arm/isa/templates/sve_mem.isa +++ b/src/arch/arm/isa/templates/sve_mem.isa @@ -1,4 +1,4 @@ -// Copyright (c) 2017 ARM Limited +// Copyright (c) 2017-2018 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -384,3 +384,342 @@ def template SveLoadAndReplCompleteAcc {{ } }}; +def template SveIndexedMemVIMicroopDeclare {{ + %(tpl_header)s + class %(class_name)s : public %(base_class)s + { + protected: + typedef RegElemType TPElem; + + IntRegIndex dest; + IntRegIndex gp; + IntRegIndex base; + uint64_t imm; + + int elemIndex; + int numElems; + + unsigned memAccessFlags; + + public: + %(class_name)s(const char* mnem, ExtMachInst machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _gp, + IntRegIndex _base, uint64_t _imm, int _elemIndex, int _numElems) + : %(base_class)s(mnem, machInst, %(op_class)s), + dest(_dest), gp(_gp), base(_base), imm(_imm), + elemIndex(_elemIndex), numElems(_numElems), + memAccessFlags(ArmISA::TLB::AllowUnaligned | + ArmISA::TLB::MustBeOne) + { + %(constructor)s; + if (_opClass == MemReadOp && elemIndex == 0) { + // The first micro-op is responsible for pinning the + // destination register + _destRegIdx[0].setNumPinnedWrites(numElems - 1); + } + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; + Fault initiateAcc(ExecContext *, Trace::InstRecord *) const; + Fault completeAcc(PacketPtr, ExecContext *, Trace::InstRecord *) const; + + virtual void + annotateFault(ArmFault *fault) + { + %(fa_code)s + } + + std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const + { + // TODO: add suffix to transfer register + std::stringstream ss; + printMnemonic(ss, "", false); + ccprintf(ss, "{"); + printVecReg(ss, dest, true); + ccprintf(ss, "}, "); + printVecPredReg(ss, gp); + if (_opClass == MemReadOp) { + ccprintf(ss, "/z"); + } + ccprintf(ss, ", ["); + printVecReg(ss, base, true); + if (imm != 0) { + ccprintf(ss, ", #%d", imm * sizeof(MemElemType)); + } + ccprintf(ss, "] (uop elem %d tfer)", elemIndex); + return ss.str(); + } + }; +}}; + +def template SveIndexedMemSVMicroopDeclare {{ + %(tpl_header)s + class %(class_name)s : public %(base_class)s + { + protected: + typedef RegElemType TPElem; + + IntRegIndex dest; + IntRegIndex gp; + IntRegIndex base; + IntRegIndex offset; + + bool offsetIs32; + bool offsetIsSigned; + bool offsetIsScaled; + + int elemIndex; + int numElems; + + unsigned memAccessFlags; + + public: + %(class_name)s(const char* mnem, ExtMachInst machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _gp, + IntRegIndex _base, IntRegIndex _offset, bool _offsetIs32, + bool _offsetIsSigned, bool _offsetIsScaled, int _elemIndex, + int _numElems) + : %(base_class)s(mnem, machInst, %(op_class)s), + dest(_dest), gp(_gp), base(_base), offset(_offset), + offsetIs32(_offsetIs32), offsetIsSigned(_offsetIsSigned), + offsetIsScaled(_offsetIsScaled), elemIndex(_elemIndex), + numElems(_numElems), + memAccessFlags(ArmISA::TLB::AllowUnaligned | + ArmISA::TLB::MustBeOne) + { + %(constructor)s; + if (_opClass == MemReadOp && elemIndex == 0) { + // The first micro-op is responsible for pinning the + // destination register + _destRegIdx[0].setNumPinnedWrites(numElems - 1); + } + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; + Fault initiateAcc(ExecContext *, Trace::InstRecord *) const; + Fault completeAcc(PacketPtr, ExecContext *, Trace::InstRecord *) const; + + virtual void + annotateFault(ArmFault *fault) + { + %(fa_code)s + } + + std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const + { + // TODO: add suffix to transfer and base registers + std::stringstream ss; + printMnemonic(ss, "", false); + ccprintf(ss, "{"); + printVecReg(ss, dest, true); + ccprintf(ss, "}, "); + printVecPredReg(ss, gp); + if (_opClass == MemReadOp) { + ccprintf(ss, "/z"); + } + ccprintf(ss, ", ["); + printIntReg(ss, base); + ccprintf(ss, ", "); + printVecReg(ss, offset, true); + ccprintf(ss, "] (uop elem %d tfer)", elemIndex); + return ss.str(); + } + }; +}}; + +def template SveGatherLoadMicroopExecute {{ + %(tpl_header)s + Fault %(class_name)s%(tpl_args)s::execute(ExecContext *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + bool aarch64 M5_VAR_USED = true; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + MemElemType memData; + + if (%(pred_check_code)s) { + fault = readMemAtomic(xc, traceData, EA, memData, + this->memAccessFlags); + } + + if (fault == NoFault) { + %(memacc_code)s; + %(op_wb)s; + } + + return fault; + } +}}; + +def template SveGatherLoadMicroopInitiateAcc {{ + %(tpl_header)s + Fault %(class_name)s%(tpl_args)s::initiateAcc(ExecContext *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + bool aarch64 M5_VAR_USED = true; + + %(op_src_decl)s; + %(op_rd)s; + %(ea_code)s; + + MemElemType memData; + + if (%(pred_check_code)s) { + fault = initiateMemRead(xc, traceData, EA, memData, + this->memAccessFlags); + } else { + xc->setMemAccPredicate(false); + } + + return fault; + } +}}; + +def template SveGatherLoadMicroopCompleteAcc {{ + %(tpl_header)s + Fault %(class_name)s%(tpl_args)s::completeAcc(PacketPtr pkt, + ExecContext *xc, Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + bool aarch64 M5_VAR_USED = true; + + %(op_decl)s; + %(op_rd)s; + + MemElemType memData = 0; + if (%(pred_check_code)s) { + getMem(pkt, memData, traceData); + } + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template SveScatterStoreMicroopExecute {{ + %(tpl_header)s + Fault %(class_name)s%(tpl_args)s::execute(ExecContext *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + bool aarch64 M5_VAR_USED = true; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + MemElemType memData; + %(memacc_code)s; + + if (%(pred_check_code)s) { + fault = writeMemAtomic(xc, traceData, memData, EA, + this->memAccessFlags, NULL); + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template SveScatterStoreMicroopInitiateAcc {{ + %(tpl_header)s + Fault %(class_name)s%(tpl_args)s::initiateAcc(ExecContext *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + bool aarch64 M5_VAR_USED = true; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + MemElemType memData; + %(memacc_code)s; + + if (%(pred_check_code)s) { + fault = writeMemTiming(xc, traceData, memData, EA, + this->memAccessFlags, NULL); + } else { + xc->setPredicate(false); + } + + return fault; + } +}}; + +def template SveScatterStoreMicroopCompleteAcc {{ + %(tpl_header)s + Fault %(class_name)s%(tpl_args)s::completeAcc(PacketPtr pkt, + ExecContext *xc, Trace::InstRecord *traceData) const + { + return NoFault; + } +}}; + +def template SveGatherLoadCpySrcVecMicroopDeclare {{ + class SveGatherLoadCpySrcVecMicroop : public MicroOp + { + protected: + IntRegIndex op1; + + StaticInst *macroOp; + + public: + SveGatherLoadCpySrcVecMicroop(const char* mnem, ExtMachInst machInst, + IntRegIndex _op1, StaticInst *_macroOp) + : MicroOp(mnem, machInst, SimdAluOp), op1(_op1), macroOp(_macroOp) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; + + std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const + { + std::stringstream ss; + ccprintf(ss, "%s", macroOp->disassemble(pc, symtab)); + ccprintf(ss, " (uop src vec cpy)"); + return ss.str(); + } + }; +}}; + +def template SveGatherLoadCpySrcVecMicroopExecute {{ + Fault SveGatherLoadCpySrcVecMicroop::execute(ExecContext *xc, + Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + %(op_decl)s; + %(op_rd)s; + + %(code)s; + if (fault == NoFault) + { + %(op_wb)s; + } + + return fault; + } +}}; diff --git a/src/arch/arm/registers.hh b/src/arch/arm/registers.hh index 8ee48edc0..8e6ce799b 100644 --- a/src/arch/arm/registers.hh +++ b/src/arch/arm/registers.hh @@ -88,6 +88,7 @@ const int NumVecSpecialRegs = 8; const int NumIntRegs = NUM_INTREGS; const int NumFloatRegs = NumFloatV8ArchRegs + NumFloatSpecialRegs; const int NumVecRegs = NumVecV8ArchRegs + NumVecSpecialRegs; +const int VECREG_UREG0 = 32; const int NumVecPredRegs = 17; // P0-P15, FFR const int PREDREG_FFR = 16; const int NumCCRegs = NUM_CCREGS; -- 2.30.2