From: Alexandru Dutu Date: Thu, 9 Nov 2017 07:20:54 +0000 (-0500) Subject: arch-gcn3: Implementation of flat atomic swap instruction X-Git-Tag: v20.1.0.0~469 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=07fcbf16fcda7c57975d8c0950d0850a3e7c2485;p=gem5.git arch-gcn3: Implementation of flat atomic swap instruction Change-Id: I9b9042899e65e8c9848b31c509eb2e3b13293e52 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29937 Maintainer: Anthony Gutierrez Tested-by: kokoro Reviewed-by: Matt Sinclair --- diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/gcn3/insts/instructions.cc index 2e39bf5c4..607e3c6f2 100644 --- a/src/arch/gcn3/insts/instructions.cc +++ b/src/arch/gcn3/insts/instructions.cc @@ -39231,8 +39231,80 @@ namespace Gcn3ISA void Inst_FLAT__FLAT_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); - } + Wavefront *wf = gpuDynInst->wavefront(); + + if (wf->execMask().none()) { + wf->wrGmReqsInPipe--; + wf->rdGmReqsInPipe--; + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->exec_mask = wf->execMask(); + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + if (gpuDynInst->executedAs() == Enums::SC_GLOBAL || + gpuDynInst->executedAs() == Enums::SC_PRIVATE) { + // TODO: additional address computation required for scratch + panic_if(gpuDynInst->executedAs() == Enums::SC_PRIVATE, + "Flats to private aperture not tested yet\n"); + gpuDynInst->computeUnit()->globalMemoryPipe. + issueRequest(gpuDynInst); + wf->wrGmReqsInPipe--; + wf->outstandingReqsWrGm++; + wf->rdGmReqsInPipe--; + wf->outstandingReqsRdGm++; + } else { + fatal("Non global flat instructions not implemented yet.\n"); + } + + gpuDynInst->wavefront()->outstandingReqs++; + gpuDynInst->wavefront()->validateRequestCounters(); + + ConstVecOperandU32 data(gpuDynInst, extData.DATA); + + data.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->a_data))[lane] + = data[lane]; + } + } + + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_SWAP::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_SWAP::completeAcc(GPUDynInstPtr gpuDynInst) + { + if (isAtomicRet()) { + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane]; + } + } + + vdst.write(); + } + } // completeAcc + + // --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP class methods --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP ::Inst_FLAT__FLAT_ATOMIC_CMPSWAP(InFmt_FLAT *iFmt) diff --git a/src/arch/gcn3/insts/instructions.hh b/src/arch/gcn3/insts/instructions.hh index ff0cfea85..b0cc37e8f 100644 --- a/src/arch/gcn3/insts/instructions.hh +++ b/src/arch/gcn3/insts/instructions.hh @@ -79949,9 +79949,9 @@ namespace Gcn3ISA case 0: //vgpr_addr return 8; case 1: //vgpr_src - return 32; + return 4; case 2: //vgpr_dst - return 32; + return 4; default: fatal("op idx %i out of bounds\n", opIdx); return -1; @@ -79991,6 +79991,8 @@ namespace Gcn3ISA } // isDstOperand void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_SWAP class Inst_FLAT__FLAT_ATOMIC_CMPSWAP : public Inst_FLAT