From 5f6ebe752e1e7e5b97b175478abbfb90b1ec5ff0 Mon Sep 17 00:00:00 2001 From: Kyle Roarty Date: Tue, 10 Nov 2020 00:10:06 -0600 Subject: [PATCH] arch-gcn3: Implement flat_load_sbyte instruction Change-Id: I3aa7547a393b9ecb4b3d4d107394c54d690a0ac2 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/37476 Reviewed-by: Matt Sinclair Reviewed-by: Matthew Poremba Maintainer: Matt Sinclair Tested-by: kokoro --- src/arch/gcn3/insts/instructions.cc | 46 ++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/gcn3/insts/instructions.cc index 64f0c36fd..93fc142eb 100644 --- a/src/arch/gcn3/insts/instructions.cc +++ b/src/arch/gcn3/insts/instructions.cc @@ -39497,17 +39497,61 @@ namespace Gcn3ISA void Inst_FLAT__FLAT_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + Wavefront *wf = gpuDynInst->wavefront(); + + if (wf->execMask().none()) { + wf->decVMemInstsIssued(); + wf->decLGKMInstsIssued(); + wf->rdGmReqsInPipe--; + wf->rdLmReqsInPipe--; + gpuDynInst->exec_mask = wf->execMask(); + wf->computeUnit->vrf[wf->simdId]-> + scheduleWriteOperandsFromLoad(wf, gpuDynInst); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->exec_mask = gpuDynInst->wavefront()->execMask(); + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) { + gpuDynInst->computeUnit()->globalMemoryPipe + .issueRequest(gpuDynInst); + wf->rdGmReqsInPipe--; + wf->outstandingReqsRdGm++; + } else { + fatal("Non global flat instructions not implemented yet.\n"); + } + + gpuDynInst->wavefront()->outstandingReqs++; + gpuDynInst->wavefront()->validateRequestCounters(); } void Inst_FLAT__FLAT_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) { + initMemRead(gpuDynInst); } // initiateAcc void Inst_FLAT__FLAT_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst) { + VecOperandI32 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (VecElemI32)((reinterpret_cast( + gpuDynInst->d_data))[lane]); + } + } + vdst.write(); } Inst_FLAT__FLAT_LOAD_USHORT::Inst_FLAT__FLAT_LOAD_USHORT(InFmt_FLAT *iFmt) -- 2.30.2