From 62ec973244f5e905c3934b80cc75bfd51e63a8ed Mon Sep 17 00:00:00 2001
From: Kyle Roarty <kyleroarty1716@gmail.com>
Date: Wed, 5 Aug 2020 13:24:56 -0500
Subject: [PATCH] arch-gcn3: Free registers when execMask = 0

Flat instructions free some of their registers through their memory
requests, in particuar a call to scheduleWriteOperandsFromLoad(),
which gets called from GlobalMemPipeline::exec.

When execMask is 0, the instruction doesn't issue a memory request.

This patch adds in a call to scheduleWriteOperandsFromLoad() when
execMask is 0 for Flat Load and AtomicReturn instructions, as those
are the instructions that call scheduleWriteOperandsFromLoad()
in the memory pipeline.

This patch also adds in a missing return statement when execMask is 0
in one of the Flat instructions.

Change-Id: I09296adb7401e7515d3cedceb780a5df4598b109
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/32234
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com>
Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/gcn3/insts/instructions.cc | 74 +++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/gcn3/insts/instructions.cc
index 955d8013d..fd89ae291 100644
--- a/src/arch/gcn3/insts/instructions.cc
+++ b/src/arch/gcn3/insts/instructions.cc
@@ -39406,6 +39406,9 @@ namespace Gcn3ISA
             wf->decLGKMInstsIssued();
             wf->rdGmReqsInPipe--;
             wf->rdLmReqsInPipe--;
+            gpuDynInst->exec_mask = wf->execMask();
+            wf->computeUnit->vrf[wf->simdId]->
+                scheduleWriteOperandsFromLoad(wf, gpuDynInst);
             return;
         }
 
@@ -39504,6 +39507,9 @@ namespace Gcn3ISA
             wf->decLGKMInstsIssued();
             wf->rdGmReqsInPipe--;
             wf->rdLmReqsInPipe--;
+            gpuDynInst->exec_mask = wf->execMask();
+            wf->computeUnit->vrf[wf->simdId]->
+                scheduleWriteOperandsFromLoad(wf, gpuDynInst);
             return;
         }
 
@@ -39602,6 +39608,9 @@ namespace Gcn3ISA
             wf->decLGKMInstsIssued();
             wf->rdGmReqsInPipe--;
             wf->rdLmReqsInPipe--;
+            gpuDynInst->exec_mask = wf->execMask();
+            wf->computeUnit->vrf[wf->simdId]->
+                scheduleWriteOperandsFromLoad(wf, gpuDynInst);
             return;
         }
 
@@ -39672,6 +39681,9 @@ namespace Gcn3ISA
             wf->decLGKMInstsIssued();
             wf->rdGmReqsInPipe--;
             wf->rdLmReqsInPipe--;
+            gpuDynInst->exec_mask = wf->execMask();
+            wf->computeUnit->vrf[wf->simdId]->
+                scheduleWriteOperandsFromLoad(wf, gpuDynInst);
             return;
         }
 
@@ -39742,6 +39754,9 @@ namespace Gcn3ISA
             wf->decLGKMInstsIssued();
             wf->rdGmReqsInPipe--;
             wf->rdLmReqsInPipe--;
+            gpuDynInst->exec_mask = wf->execMask();
+            wf->computeUnit->vrf[wf->simdId]->
+                scheduleWriteOperandsFromLoad(wf, gpuDynInst);
             return;
         }
 
@@ -39821,6 +39836,10 @@ namespace Gcn3ISA
             wf->decLGKMInstsIssued();
             wf->rdGmReqsInPipe--;
             wf->rdLmReqsInPipe--;
+            gpuDynInst->exec_mask = wf->execMask();
+            wf->computeUnit->vrf[wf->simdId]->
+                scheduleWriteOperandsFromLoad(wf, gpuDynInst);
+            return;
         }
 
         gpuDynInst->execUnitId = wf->execUnitId;
@@ -40355,6 +40374,11 @@ namespace Gcn3ISA
             wf->decLGKMInstsIssued();
             wf->wrGmReqsInPipe--;
             wf->rdGmReqsInPipe--;
+            if (instData.GLC) {
+                gpuDynInst->exec_mask = wf->execMask();
+                wf->computeUnit->vrf[wf->simdId]->
+                    scheduleWriteOperandsFromLoad(wf, gpuDynInst);
+            }
             return;
         }
 
@@ -40457,6 +40481,11 @@ namespace Gcn3ISA
             wf->decLGKMInstsIssued();
             wf->wrGmReqsInPipe--;
             wf->rdGmReqsInPipe--;
+            if (instData.GLC) {
+                gpuDynInst->exec_mask = wf->execMask();
+                wf->computeUnit->vrf[wf->simdId]->
+                    scheduleWriteOperandsFromLoad(wf, gpuDynInst);
+            }
             return;
         }
 
@@ -40560,6 +40589,11 @@ namespace Gcn3ISA
             wf->decLGKMInstsIssued();
             wf->wrGmReqsInPipe--;
             wf->rdGmReqsInPipe--;
+            if (instData.GLC) {
+                gpuDynInst->exec_mask = wf->execMask();
+                wf->computeUnit->vrf[wf->simdId]->
+                    scheduleWriteOperandsFromLoad(wf, gpuDynInst);
+            }
             return;
         }
 
@@ -40650,6 +40684,11 @@ namespace Gcn3ISA
             wf->decLGKMInstsIssued();
             wf->wrGmReqsInPipe--;
             wf->rdGmReqsInPipe--;
+            if (instData.GLC) {
+                gpuDynInst->exec_mask = wf->execMask();
+                wf->computeUnit->vrf[wf->simdId]->
+                    scheduleWriteOperandsFromLoad(wf, gpuDynInst);
+            }
             return;
         }
 
@@ -40914,6 +40953,11 @@ namespace Gcn3ISA
             wf->decLGKMInstsIssued();
             wf->wrGmReqsInPipe--;
             wf->rdGmReqsInPipe--;
+            if (instData.GLC) {
+                gpuDynInst->exec_mask = wf->execMask();
+                wf->computeUnit->vrf[wf->simdId]->
+                    scheduleWriteOperandsFromLoad(wf, gpuDynInst);
+            }
             return;
         }
 
@@ -41004,6 +41048,11 @@ namespace Gcn3ISA
             wf->decLGKMInstsIssued();
             wf->wrGmReqsInPipe--;
             wf->rdGmReqsInPipe--;
+            if (instData.GLC) {
+                gpuDynInst->exec_mask = wf->execMask();
+                wf->computeUnit->vrf[wf->simdId]->
+                    scheduleWriteOperandsFromLoad(wf, gpuDynInst);
+            }
             return;
         }
 
@@ -41123,6 +41172,11 @@ namespace Gcn3ISA
             wf->decLGKMInstsIssued();
             wf->wrGmReqsInPipe--;
             wf->rdGmReqsInPipe--;
+            if (instData.GLC) {
+                gpuDynInst->exec_mask = wf->execMask();
+                wf->computeUnit->vrf[wf->simdId]->
+                    scheduleWriteOperandsFromLoad(wf, gpuDynInst);
+            }
             return;
         }
 
@@ -41227,6 +41281,11 @@ namespace Gcn3ISA
             wf->decLGKMInstsIssued();
             wf->wrGmReqsInPipe--;
             wf->rdGmReqsInPipe--;
+            if (instData.GLC) {
+                gpuDynInst->exec_mask = wf->execMask();
+                wf->computeUnit->vrf[wf->simdId]->
+                    scheduleWriteOperandsFromLoad(wf, gpuDynInst);
+            }
             return;
         }
 
@@ -41319,6 +41378,11 @@ namespace Gcn3ISA
             wf->decLGKMInstsIssued();
             wf->wrGmReqsInPipe--;
             wf->rdGmReqsInPipe--;
+            if (instData.GLC) {
+                gpuDynInst->exec_mask = wf->execMask();
+                wf->computeUnit->vrf[wf->simdId]->
+                    scheduleWriteOperandsFromLoad(wf, gpuDynInst);
+            }
             return;
         }
 
@@ -41593,6 +41657,11 @@ namespace Gcn3ISA
             wf->decLGKMInstsIssued();
             wf->wrGmReqsInPipe--;
             wf->rdGmReqsInPipe--;
+            if (instData.GLC) {
+                gpuDynInst->exec_mask = wf->execMask();
+                wf->computeUnit->vrf[wf->simdId]->
+                    scheduleWriteOperandsFromLoad(wf, gpuDynInst);
+            }
             return;
         }
 
@@ -41686,6 +41755,11 @@ namespace Gcn3ISA
             wf->decLGKMInstsIssued();
             wf->wrGmReqsInPipe--;
             wf->rdGmReqsInPipe--;
+            if (instData.GLC) {
+                gpuDynInst->exec_mask = wf->execMask();
+                wf->computeUnit->vrf[wf->simdId]->
+                    scheduleWriteOperandsFromLoad(wf, gpuDynInst);
+            }
             return;
         }
 
-- 
2.30.2