From 1836d58b3640c1830065af81328652d5fc03d81f Mon Sep 17 00:00:00 2001 From: Matt Sinclair Date: Tue, 10 Jul 2018 02:58:28 -0400 Subject: [PATCH] arch-gcn3: add support for v_mbcnt_hi and v_mbcnt_lo Change-Id: I1c70fe693c904f1abd7d5a2b99220c74a075eae5 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29948 Maintainer: Anthony Gutierrez Tested-by: kokoro Reviewed-by: Matt Sinclair --- src/arch/gcn3/insts/instructions.cc | 64 +++++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 4 deletions(-) diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/gcn3/insts/instructions.cc index 79e7ddacf..6ffd049f2 100644 --- a/src/arch/gcn3/insts/instructions.cc +++ b/src/arch/gcn3/insts/instructions.cc @@ -30309,8 +30309,36 @@ namespace Gcn3ISA void Inst_VOP3__V_MBCNT_LO_U32_B32::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); - } + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + uint64_t threadMask = 0; + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + threadMask = ((1LL << lane) - 1LL); + vdst[lane] = popCount(src0[lane] & bits(threadMask, 31, 0)) + + src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MBCNT_HI_U32_B32 class methods --- Inst_VOP3__V_MBCNT_HI_U32_B32::Inst_VOP3__V_MBCNT_HI_U32_B32( InFmt_VOP3 *iFmt) @@ -30330,8 +30358,36 @@ namespace Gcn3ISA void Inst_VOP3__V_MBCNT_HI_U32_B32::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); - } + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + uint64_t threadMask = 0; + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + threadMask = ((1LL << lane) - 1LL); + vdst[lane] = popCount(src0[lane] & bits(threadMask, 63, 32)) + + src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LSHLREV_B64 class methods --- Inst_VOP3__V_LSHLREV_B64::Inst_VOP3__V_LSHLREV_B64(InFmt_VOP3 *iFmt) : Inst_VOP3(iFmt, "v_lshlrev_b64", false) -- 2.30.2