gpu-compute: Fix FLAT insts decrementing lgkm count early
authorKyle Roarty <kyleroarty1716@gmail.com>
Thu, 24 Dec 2020 01:30:37 +0000 (19:30 -0600)
committerMatt Sinclair <mattdsinclair@gmail.com>
Thu, 7 Jan 2021 17:12:31 +0000 (17:12 +0000)
FLAT instructions used to decrement lgkm count on execute, while the
GCN3 ISA specifies that lgkm count should be decremented on data being
returned or data being written.

This patch changes it so that lgkm is decremented after initiateAcc (for
stores) and after completeAcc (for loads) to better reflect the ISA
definition.

This fixes a bug where waitcnts would be satisfied even though the
memory access wasn't completed, which lead to instructions using the
wrong data.

Change-Id: I596cb031af9cda8d47a1b5e146e4a4ffd793d36c
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/38696
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Matthew Poremba <matthew.poremba@amd.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
src/gpu-compute/global_memory_pipeline.cc
src/gpu-compute/gpu_dyn_inst.cc

index bcd93f886210d2381340c73d925c2e8cd1f11e35..a2b24e4b478d0d37fbd3f1a77fbd6bd026e35fb2 100644 (file)
@@ -130,6 +130,9 @@ GlobalMemPipeline::exec()
         DPRINTF(GPUMem, "CU%d: WF[%d][%d]: Completing global mem instr %s\n",
                 m->cu_id, m->simdId, m->wfSlotId, m->disassemble());
         m->completeAcc(m);
+        if (m->isFlat() && m->isLoad()) {
+            w->decLGKMInstsIssued();
+        }
         w->decVMemInstsIssued();
 
         if (m->isLoad() || m->isAtomicRet()) {
@@ -193,6 +196,10 @@ GlobalMemPipeline::exec()
                 mp->disassemble(), mp->seqNum());
         mp->initiateAcc(mp);
 
+        if (mp->isFlat() && mp->isStore()) {
+            mp->wavefront()->decLGKMInstsIssued();
+        }
+
         if (mp->isStore() && mp->isGlobalSeg()) {
             mp->wavefront()->decExpInstsIssued();
         }
index 03ed6895192c76a2aa2f693c4e2e181d5445436d..38e4ecf492a0eb1417de2e892b8a5ba550d9ac6d 100644 (file)
@@ -819,7 +819,6 @@ GPUDynInst::resolveFlatSegment(const VectorMask &mask)
     if (executedAs() == Enums::SC_GLOBAL) {
         // no transormation for global segment
         wavefront()->execUnitId =  wavefront()->flatGmUnitId;
-        wavefront()->decLGKMInstsIssued();
         if (isLoad()) {
             wavefront()->rdLmReqsInPipe--;
         } else if (isStore()) {