From 62ec973244f5e905c3934b80cc75bfd51e63a8ed Mon Sep 17 00:00:00 2001 From: Kyle Roarty Date: Wed, 5 Aug 2020 13:24:56 -0500 Subject: [PATCH] arch-gcn3: Free registers when execMask = 0 Flat instructions free some of their registers through their memory requests, in particuar a call to scheduleWriteOperandsFromLoad(), which gets called from GlobalMemPipeline::exec. When execMask is 0, the instruction doesn't issue a memory request. This patch adds in a call to scheduleWriteOperandsFromLoad() when execMask is 0 for Flat Load and AtomicReturn instructions, as those are the instructions that call scheduleWriteOperandsFromLoad() in the memory pipeline. This patch also adds in a missing return statement when execMask is 0 in one of the Flat instructions. Change-Id: I09296adb7401e7515d3cedceb780a5df4598b109 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/32234 Reviewed-by: Matt Sinclair Reviewed-by: Anthony Gutierrez Maintainer: Anthony Gutierrez Tested-by: kokoro --- src/arch/gcn3/insts/instructions.cc | 74 +++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/gcn3/insts/instructions.cc index 955d8013d..fd89ae291 100644 --- a/src/arch/gcn3/insts/instructions.cc +++ b/src/arch/gcn3/insts/instructions.cc @@ -39406,6 +39406,9 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->rdGmReqsInPipe--; wf->rdLmReqsInPipe--; + gpuDynInst->exec_mask = wf->execMask(); + wf->computeUnit->vrf[wf->simdId]-> + scheduleWriteOperandsFromLoad(wf, gpuDynInst); return; } @@ -39504,6 +39507,9 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->rdGmReqsInPipe--; wf->rdLmReqsInPipe--; + gpuDynInst->exec_mask = wf->execMask(); + wf->computeUnit->vrf[wf->simdId]-> + scheduleWriteOperandsFromLoad(wf, gpuDynInst); return; } @@ -39602,6 +39608,9 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->rdGmReqsInPipe--; wf->rdLmReqsInPipe--; + gpuDynInst->exec_mask = wf->execMask(); + wf->computeUnit->vrf[wf->simdId]-> + scheduleWriteOperandsFromLoad(wf, gpuDynInst); return; } @@ -39672,6 +39681,9 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->rdGmReqsInPipe--; wf->rdLmReqsInPipe--; + gpuDynInst->exec_mask = wf->execMask(); + wf->computeUnit->vrf[wf->simdId]-> + scheduleWriteOperandsFromLoad(wf, gpuDynInst); return; } @@ -39742,6 +39754,9 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->rdGmReqsInPipe--; wf->rdLmReqsInPipe--; + gpuDynInst->exec_mask = wf->execMask(); + wf->computeUnit->vrf[wf->simdId]-> + scheduleWriteOperandsFromLoad(wf, gpuDynInst); return; } @@ -39821,6 +39836,10 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->rdGmReqsInPipe--; wf->rdLmReqsInPipe--; + gpuDynInst->exec_mask = wf->execMask(); + wf->computeUnit->vrf[wf->simdId]-> + scheduleWriteOperandsFromLoad(wf, gpuDynInst); + return; } gpuDynInst->execUnitId = wf->execUnitId; @@ -40355,6 +40374,11 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->wrGmReqsInPipe--; wf->rdGmReqsInPipe--; + if (instData.GLC) { + gpuDynInst->exec_mask = wf->execMask(); + wf->computeUnit->vrf[wf->simdId]-> + scheduleWriteOperandsFromLoad(wf, gpuDynInst); + } return; } @@ -40457,6 +40481,11 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->wrGmReqsInPipe--; wf->rdGmReqsInPipe--; + if (instData.GLC) { + gpuDynInst->exec_mask = wf->execMask(); + wf->computeUnit->vrf[wf->simdId]-> + scheduleWriteOperandsFromLoad(wf, gpuDynInst); + } return; } @@ -40560,6 +40589,11 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->wrGmReqsInPipe--; wf->rdGmReqsInPipe--; + if (instData.GLC) { + gpuDynInst->exec_mask = wf->execMask(); + wf->computeUnit->vrf[wf->simdId]-> + scheduleWriteOperandsFromLoad(wf, gpuDynInst); + } return; } @@ -40650,6 +40684,11 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->wrGmReqsInPipe--; wf->rdGmReqsInPipe--; + if (instData.GLC) { + gpuDynInst->exec_mask = wf->execMask(); + wf->computeUnit->vrf[wf->simdId]-> + scheduleWriteOperandsFromLoad(wf, gpuDynInst); + } return; } @@ -40914,6 +40953,11 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->wrGmReqsInPipe--; wf->rdGmReqsInPipe--; + if (instData.GLC) { + gpuDynInst->exec_mask = wf->execMask(); + wf->computeUnit->vrf[wf->simdId]-> + scheduleWriteOperandsFromLoad(wf, gpuDynInst); + } return; } @@ -41004,6 +41048,11 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->wrGmReqsInPipe--; wf->rdGmReqsInPipe--; + if (instData.GLC) { + gpuDynInst->exec_mask = wf->execMask(); + wf->computeUnit->vrf[wf->simdId]-> + scheduleWriteOperandsFromLoad(wf, gpuDynInst); + } return; } @@ -41123,6 +41172,11 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->wrGmReqsInPipe--; wf->rdGmReqsInPipe--; + if (instData.GLC) { + gpuDynInst->exec_mask = wf->execMask(); + wf->computeUnit->vrf[wf->simdId]-> + scheduleWriteOperandsFromLoad(wf, gpuDynInst); + } return; } @@ -41227,6 +41281,11 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->wrGmReqsInPipe--; wf->rdGmReqsInPipe--; + if (instData.GLC) { + gpuDynInst->exec_mask = wf->execMask(); + wf->computeUnit->vrf[wf->simdId]-> + scheduleWriteOperandsFromLoad(wf, gpuDynInst); + } return; } @@ -41319,6 +41378,11 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->wrGmReqsInPipe--; wf->rdGmReqsInPipe--; + if (instData.GLC) { + gpuDynInst->exec_mask = wf->execMask(); + wf->computeUnit->vrf[wf->simdId]-> + scheduleWriteOperandsFromLoad(wf, gpuDynInst); + } return; } @@ -41593,6 +41657,11 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->wrGmReqsInPipe--; wf->rdGmReqsInPipe--; + if (instData.GLC) { + gpuDynInst->exec_mask = wf->execMask(); + wf->computeUnit->vrf[wf->simdId]-> + scheduleWriteOperandsFromLoad(wf, gpuDynInst); + } return; } @@ -41686,6 +41755,11 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->wrGmReqsInPipe--; wf->rdGmReqsInPipe--; + if (instData.GLC) { + gpuDynInst->exec_mask = wf->execMask(); + wf->computeUnit->vrf[wf->simdId]-> + scheduleWriteOperandsFromLoad(wf, gpuDynInst); + } return; } -- 2.30.2