From 0983929a24e383efb16678df69364aec6b53cf2c Mon Sep 17 00:00:00 2001 From: Kyle Roarty Date: Wed, 12 Aug 2020 18:08:05 -0500 Subject: [PATCH] arch-gcn3: Update LmReqsInPipe in atomic flats when execMask=0 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit In flat instructions, wrLmReqsInPipe/rdLmReqsInPipe are decremented in the calcAddr() function. However, the calcAddr() function is only called when execMask != 0. This patch adds in statements to decrement wrLmReqsInPipe and rdLmReqsInPipe in all implemented atomic flats when execMask is 0. This fixes a scenario where vector local memory and flat instructions are unable to execute due to LocalMemPipeline::isLMReqFIFOWrRdy always returning false in ScheduleStage::dispatchReady after too many atomic flats execute with execMask = 0 Change-Id: I081cfd3faf74bbfcf0728445e7160fa2a76a6a7e Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/32614 Reviewed-by: Matt Sinclair Reviewed-by: Alexandru Duțu Maintainer: Anthony Gutierrez Tested-by: kokoro --- src/arch/gcn3/insts/instructions.cc | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/gcn3/insts/instructions.cc index fd89ae291..296dbadf1 100644 --- a/src/arch/gcn3/insts/instructions.cc +++ b/src/arch/gcn3/insts/instructions.cc @@ -40374,6 +40374,8 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->wrGmReqsInPipe--; wf->rdGmReqsInPipe--; + wf->wrLmReqsInPipe--; + wf->rdLmReqsInPipe--; if (instData.GLC) { gpuDynInst->exec_mask = wf->execMask(); wf->computeUnit->vrf[wf->simdId]-> @@ -40481,6 +40483,8 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->wrGmReqsInPipe--; wf->rdGmReqsInPipe--; + wf->wrLmReqsInPipe--; + wf->rdLmReqsInPipe--; if (instData.GLC) { gpuDynInst->exec_mask = wf->execMask(); wf->computeUnit->vrf[wf->simdId]-> @@ -40589,6 +40593,8 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->wrGmReqsInPipe--; wf->rdGmReqsInPipe--; + wf->wrLmReqsInPipe--; + wf->rdLmReqsInPipe--; if (instData.GLC) { gpuDynInst->exec_mask = wf->execMask(); wf->computeUnit->vrf[wf->simdId]-> @@ -40684,6 +40690,8 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->wrGmReqsInPipe--; wf->rdGmReqsInPipe--; + wf->wrLmReqsInPipe--; + wf->rdLmReqsInPipe--; if (instData.GLC) { gpuDynInst->exec_mask = wf->execMask(); wf->computeUnit->vrf[wf->simdId]-> @@ -40953,6 +40961,8 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->wrGmReqsInPipe--; wf->rdGmReqsInPipe--; + wf->wrLmReqsInPipe--; + wf->rdLmReqsInPipe--; if (instData.GLC) { gpuDynInst->exec_mask = wf->execMask(); wf->computeUnit->vrf[wf->simdId]-> @@ -41048,6 +41058,8 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->wrGmReqsInPipe--; wf->rdGmReqsInPipe--; + wf->wrLmReqsInPipe--; + wf->rdLmReqsInPipe--; if (instData.GLC) { gpuDynInst->exec_mask = wf->execMask(); wf->computeUnit->vrf[wf->simdId]-> @@ -41172,6 +41184,8 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->wrGmReqsInPipe--; wf->rdGmReqsInPipe--; + wf->wrLmReqsInPipe--; + wf->rdLmReqsInPipe--; if (instData.GLC) { gpuDynInst->exec_mask = wf->execMask(); wf->computeUnit->vrf[wf->simdId]-> @@ -41281,6 +41295,8 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->wrGmReqsInPipe--; wf->rdGmReqsInPipe--; + wf->wrLmReqsInPipe--; + wf->rdLmReqsInPipe--; if (instData.GLC) { gpuDynInst->exec_mask = wf->execMask(); wf->computeUnit->vrf[wf->simdId]-> @@ -41378,6 +41394,8 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->wrGmReqsInPipe--; wf->rdGmReqsInPipe--; + wf->wrLmReqsInPipe--; + wf->rdLmReqsInPipe--; if (instData.GLC) { gpuDynInst->exec_mask = wf->execMask(); wf->computeUnit->vrf[wf->simdId]-> @@ -41657,6 +41675,8 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->wrGmReqsInPipe--; wf->rdGmReqsInPipe--; + wf->wrLmReqsInPipe--; + wf->rdLmReqsInPipe--; if (instData.GLC) { gpuDynInst->exec_mask = wf->execMask(); wf->computeUnit->vrf[wf->simdId]-> @@ -41755,6 +41775,8 @@ namespace Gcn3ISA wf->decLGKMInstsIssued(); wf->wrGmReqsInPipe--; wf->rdGmReqsInPipe--; + wf->wrLmReqsInPipe--; + wf->rdLmReqsInPipe--; if (instData.GLC) { gpuDynInst->exec_mask = wf->execMask(); wf->computeUnit->vrf[wf->simdId]-> -- 2.30.2