gpu-compute, arch-gcn3: Change how waitcnts are implemented

author Tony Gutierrez <anthony.gutierrez@amd.com>

Fri, 10 Aug 2018 19:01:12 +0000 (15:01 -0400)

committer Anthony Gutierrez <anthony.gutierrez@amd.com>

Fri, 17 Jul 2020 16:36:23 +0000 (16:36 +0000)
author Tony Gutierrez <anthony.gutierrez@amd.com>
Fri, 10 Aug 2018 19:01:12 +0000 (15:01 -0400)
committer Anthony Gutierrez <anthony.gutierrez@amd.com>
Fri, 17 Jul 2020 16:36:23 +0000 (16:36 +0000)
diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/gcn3/insts/instructions.cc

index 9987fade013d2eade8c58d7f75ca179f8b06c900..7c2cf0e49a86267d3557779f8220374e54af9e48 100644 (file)
--- a/src/arch/gcn3/insts/instructions.cc
+++ b/src/arch/gcn3/insts/instructions.cc
@@ -32565,6 +32565,7 @@ namespace Gcn3ISA
  
          vdst.write();
  
+        wf->decLGKMInstsIssued();
          wf->rdLmReqsInPipe--;
          wf->validateRequestCounters();
      } // execute
@@ -32635,6 +32636,7 @@ namespace Gcn3ISA
  
          vdst.write();
  
+        wf->decLGKMInstsIssued();
          wf->rdLmReqsInPipe--;
          wf->validateRequestCounters();
      } // execute
@@ -39400,6 +39402,8 @@ namespace Gcn3ISA
          Wavefront *wf = gpuDynInst->wavefront();
  
          if (wf->execMask().none()) {
+            wf->decVMemInstsIssued();
+            wf->decLGKMInstsIssued();
              wf->rdGmReqsInPipe--;
              wf->rdLmReqsInPipe--;
              return;
@@ -39496,6 +39500,8 @@ namespace Gcn3ISA
          Wavefront *wf = gpuDynInst->wavefront();
  
          if (wf->execMask().none()) {
+            wf->decVMemInstsIssued();
+            wf->decLGKMInstsIssued();
              wf->rdGmReqsInPipe--;
              wf->rdLmReqsInPipe--;
              return;
@@ -39592,6 +39598,8 @@ namespace Gcn3ISA
          Wavefront *wf = gpuDynInst->wavefront();
  
          if (wf->execMask().none()) {
+            wf->decVMemInstsIssued();
+            wf->decLGKMInstsIssued();
              wf->rdGmReqsInPipe--;
              wf->rdLmReqsInPipe--;
              return;
@@ -39660,6 +39668,8 @@ namespace Gcn3ISA
          Wavefront *wf = gpuDynInst->wavefront();
  
          if (wf->execMask().none()) {
+            wf->decVMemInstsIssued();
+            wf->decLGKMInstsIssued();
              wf->rdGmReqsInPipe--;
              wf->rdLmReqsInPipe--;
              return;
@@ -39728,6 +39738,8 @@ namespace Gcn3ISA
          Wavefront *wf = gpuDynInst->wavefront();
  
          if (wf->execMask().none()) {
+            wf->decVMemInstsIssued();
+            wf->decLGKMInstsIssued();
              wf->rdGmReqsInPipe--;
              wf->rdLmReqsInPipe--;
              return;
@@ -39805,6 +39817,8 @@ namespace Gcn3ISA
          Wavefront *wf = gpuDynInst->wavefront();
  
          if (wf->execMask().none()) {
+            wf->decVMemInstsIssued();
+            wf->decLGKMInstsIssued();
              wf->rdGmReqsInPipe--;
              wf->rdLmReqsInPipe--;
          }
@@ -39884,6 +39898,8 @@ namespace Gcn3ISA
          Wavefront *wf = gpuDynInst->wavefront();
  
          if (wf->execMask().none()) {
+            wf->decVMemInstsIssued();
+            wf->decLGKMInstsIssued();
              wf->wrGmReqsInPipe--;
              wf->wrLmReqsInPipe--;
              return;
@@ -39952,6 +39968,8 @@ namespace Gcn3ISA
          Wavefront *wf = gpuDynInst->wavefront();
  
          if (wf->execMask().none()) {
+            wf->decVMemInstsIssued();
+            wf->decLGKMInstsIssued();
              wf->wrGmReqsInPipe--;
              wf->wrLmReqsInPipe--;
              return;
@@ -40021,6 +40039,8 @@ namespace Gcn3ISA
          Wavefront *wf = gpuDynInst->wavefront();
  
          if (wf->execMask().none()) {
+            wf->decVMemInstsIssued();
+            wf->decLGKMInstsIssued();
              wf->wrGmReqsInPipe--;
              wf->wrLmReqsInPipe--;
              return;
@@ -40090,6 +40110,8 @@ namespace Gcn3ISA
          Wavefront *wf = gpuDynInst->wavefront();
  
          if (wf->execMask().none()) {
+            wf->decVMemInstsIssued();
+            wf->decLGKMInstsIssued();
              wf->wrGmReqsInPipe--;
              wf->wrLmReqsInPipe--;
              return;
@@ -40159,6 +40181,8 @@ namespace Gcn3ISA
          Wavefront *wf = gpuDynInst->wavefront();
  
          if (wf->execMask().none()) {
+            wf->decVMemInstsIssued();
+            wf->decLGKMInstsIssued();
              wf->wrGmReqsInPipe--;
              wf->wrLmReqsInPipe--;
              return;
@@ -40237,6 +40261,8 @@ namespace Gcn3ISA
          Wavefront *wf = gpuDynInst->wavefront();
  
          if (wf->execMask().none()) {
+            wf->decVMemInstsIssued();
+            wf->decLGKMInstsIssued();
              wf->wrGmReqsInPipe--;
              wf->wrLmReqsInPipe--;
              return;
@@ -40325,6 +40351,8 @@ namespace Gcn3ISA
          Wavefront *wf = gpuDynInst->wavefront();
  
          if (wf->execMask().none()) {
+            wf->decVMemInstsIssued();
+            wf->decLGKMInstsIssued();
              wf->wrGmReqsInPipe--;
              wf->rdGmReqsInPipe--;
              return;
@@ -40425,6 +40453,8 @@ namespace Gcn3ISA
          Wavefront *wf = gpuDynInst->wavefront();
  
          if (wf->execMask().none()) {
+            wf->decVMemInstsIssued();
+            wf->decLGKMInstsIssued();
              wf->wrGmReqsInPipe--;
              wf->rdGmReqsInPipe--;
              return;
@@ -40526,6 +40556,8 @@ namespace Gcn3ISA
          Wavefront *wf = gpuDynInst->wavefront();
  
          if (wf->execMask().none()) {
+            wf->decVMemInstsIssued();
+            wf->decLGKMInstsIssued();
              wf->wrGmReqsInPipe--;
              wf->rdGmReqsInPipe--;
              return;
@@ -40893,6 +40925,8 @@ namespace Gcn3ISA
          Wavefront *wf = gpuDynInst->wavefront();
  
          if (wf->execMask().none()) {
+            wf->decVMemInstsIssued();
+            wf->decLGKMInstsIssued();
              wf->wrGmReqsInPipe--;
              wf->rdGmReqsInPipe--;
              return;
@@ -40995,6 +41029,8 @@ namespace Gcn3ISA
          Wavefront *wf = gpuDynInst->wavefront();
  
          if (wf->execMask().none()) {
+            wf->decVMemInstsIssued();
+            wf->decLGKMInstsIssued();
              wf->wrGmReqsInPipe--;
              wf->rdGmReqsInPipe--;
              return;
diff --git a/src/gpu-compute/global_memory_pipeline.cc b/src/gpu-compute/global_memory_pipeline.cc

index dcc80f061c0bc69921e4cae73154dc2b75887dee..9fc515aef7359f320088fb0d8502c6353ea420d3 100644 (file)
--- a/src/gpu-compute/global_memory_pipeline.cc
+++ b/src/gpu-compute/global_memory_pipeline.cc
@@ -130,6 +130,7 @@ GlobalMemPipeline::exec()
          DPRINTF(GPUMem, "CU%d: WF[%d][%d]: Completing global mem instr %s\n",
                  m->cu_id, m->simdId, m->wfSlotId, m->disassemble());
          m->completeAcc(m);
+        w->decVMemInstsIssued();
  
          if (m->isLoad() || m->isAtomicRet()) {
              w->computeUnit->vrf[w->simdId]->
diff --git a/src/gpu-compute/gpu_dyn_inst.cc b/src/gpu-compute/gpu_dyn_inst.cc

index 2a49522da990dfc3faae8185c5b713b74468cfce..03ed6895192c76a2aa2f693c4e2e181d5445436d 100644 (file)
--- a/src/gpu-compute/gpu_dyn_inst.cc
+++ b/src/gpu-compute/gpu_dyn_inst.cc
@@ -819,6 +819,7 @@ GPUDynInst::resolveFlatSegment(const VectorMask &mask)
      if (executedAs() == Enums::SC_GLOBAL) {
          // no transormation for global segment
          wavefront()->execUnitId =  wavefront()->flatGmUnitId;
+        wavefront()->decLGKMInstsIssued();
          if (isLoad()) {
              wavefront()->rdLmReqsInPipe--;
          } else if (isStore()) {
@@ -838,6 +839,7 @@ GPUDynInst::resolveFlatSegment(const VectorMask &mask)
              }
          }
          wavefront()->execUnitId =  wavefront()->flatLmUnitId;
+        wavefront()->decVMemInstsIssued();
          if (isLoad()) {
              wavefront()->rdGmReqsInPipe--;
          } else if (isStore()) {
@@ -897,6 +899,7 @@ GPUDynInst::resolveFlatSegment(const VectorMask &mask)
              }
          }
          wavefront()->execUnitId =  wavefront()->flatLmUnitId;
+        wavefront()->decLGKMInstsIssued();
          if (isLoad()) {
              wavefront()->rdGmReqsInPipe--;
          } else if (isStore()) {
diff --git a/src/gpu-compute/local_memory_pipeline.cc b/src/gpu-compute/local_memory_pipeline.cc

index df576907cca34102abecf9bbbd306ee380d1b809..ca090e956a9b04f4652c0f2e38bd219ebdece5a4 100644 (file)
--- a/src/gpu-compute/local_memory_pipeline.cc
+++ b/src/gpu-compute/local_memory_pipeline.cc
@@ -76,6 +76,7 @@ LocalMemPipeline::exec()
          DPRINTF(GPUMem, "CU%d: WF[%d][%d]: Completing local mem instr %s\n",
                  m->cu_id, m->simdId, m->wfSlotId, m->disassemble());
          m->completeAcc(m);
+        w->decLGKMInstsIssued();
  
          if (m->isLoad() || m->isAtomicRet()) {
              w->computeUnit->vrf[w->simdId]->
diff --git a/src/gpu-compute/scalar_memory_pipeline.cc b/src/gpu-compute/scalar_memory_pipeline.cc

index 35b4ca5e596f1c9d5d5cabfeb6b64455e9acfff6..5e4496d51da52c9eea647c28cf18234bc7d16895 100644 (file)
--- a/src/gpu-compute/scalar_memory_pipeline.cc
+++ b/src/gpu-compute/scalar_memory_pipeline.cc
@@ -85,6 +85,7 @@ ScalarMemPipeline::exec()
          }
  
          m->completeAcc(m);
+        w->decLGKMInstsIssued();
  
          if (m->isLoad() || m->isAtomic()) {
              returnedLoads.pop();
diff --git a/src/gpu-compute/schedule_stage.cc b/src/gpu-compute/schedule_stage.cc

index fb52b6dd1c2e0a0f99f23681f3530c853d6803cc..005e6f61e2fb06be7f4d8f644f3100be7662ef32 100644 (file)
--- a/src/gpu-compute/schedule_stage.cc
+++ b/src/gpu-compute/schedule_stage.cc
@@ -135,6 +135,15 @@ ScheduleStage::exec()
              // this wave spends in SCH stage.
              wf->schCycles++;
              addToSchListStalls[j]++;
+        } else {
+            if (gpu_dyn_inst->isScalar() || gpu_dyn_inst->isGroupSeg()) {
+                wf->incLGKMInstsIssued();
+            } else {
+                wf->incVMemInstsIssued();
+                if (gpu_dyn_inst->isFlat()) {
+                    wf->incLGKMInstsIssued();
+                }
+            }
          }
      }
  
diff --git a/src/gpu-compute/wavefront.cc b/src/gpu-compute/wavefront.cc

index f72cd50fda03bd69a18644071aa7f1046b209f1f..0e737db0ec2ec75981f69ab813cba3344eba8456 100644 (file)
--- a/src/gpu-compute/wavefront.cc
+++ b/src/gpu-compute/wavefront.cc
@@ -53,6 +53,7 @@ Wavefront::Wavefront(const Params *p)
    : SimObject(p), wfSlotId(p->wf_slot_id), simdId(p->simdId),
      maxIbSize(p->max_ib_size), _gpuISA(*this),
      vmWaitCnt(-1), expWaitCnt(-1), lgkmWaitCnt(-1),
+    vmemInstsIssued(0), expInstsIssued(0), lgkmInstsIssued(0),
      barId(WFBarrier::InvalidID)
  {
      lastTrace = 0;
@@ -1253,37 +1254,27 @@ Wavefront::waitCntsSatisfied()
          return false;
      }
  
-    // If we reach here, that means waitCnt instruction is executed and
-    // the waitcnts are set by the execute method. Check if waitcnts are
-    // satisfied.
-
-    // current number of vector memory ops in flight
-    int vm_cnt = outstandingReqsWrGm + outstandingReqsRdGm;
-
-    // current number of export insts or vector memory writes in flight
-    int exp_cnt = outstandingReqsWrGm;
-
-    // current number of scalar/LDS memory ops in flight
-    // we do not consider GDS/message ops
-    int lgkm_cnt = outstandingReqsWrLm + outstandingReqsRdLm +
-        scalarOutstandingReqsRdGm + scalarOutstandingReqsWrGm;
-
+    /**
+     * If we reach here, that means an s_waitcnt instruction was executed
+     * and the waitcnts are set by the execute method. Check if waitcnts
+     * are satisfied.
+     */
      if (vmWaitCnt != -1) {
-        if (vm_cnt > vmWaitCnt) {
+        if (vmemInstsIssued > vmWaitCnt) {
              // vmWaitCnt not satisfied
              return false;
          }
      }
  
      if (expWaitCnt != -1) {
-        if (exp_cnt > expWaitCnt) {
+        if (expInstsIssued > expWaitCnt) {
              // expWaitCnt not satisfied
              return false;
          }
      }
  
      if (lgkmWaitCnt != -1) {
-        if (lgkm_cnt > lgkmWaitCnt) {
+        if (lgkmInstsIssued > lgkmWaitCnt) {
              // lgkmWaitCnt not satisfied
              return false;
          }
@@ -1355,6 +1346,42 @@ Wavefront::clearWaitCnts()
      status = S_RUNNING;
  }
  
+void
+Wavefront::incVMemInstsIssued()
+{
+    ++vmemInstsIssued;
+}
+
+void
+Wavefront::incExpInstsIssued()
+{
+    ++expInstsIssued;
+}
+
+void
+Wavefront::incLGKMInstsIssued()
+{
+    ++lgkmInstsIssued;
+}
+
+void
+Wavefront::decVMemInstsIssued()
+{
+    --vmemInstsIssued;
+}
+
+void
+Wavefront::decExpInstsIssued()
+{
+    --expInstsIssued;
+}
+
+void
+Wavefront::decLGKMInstsIssued()
+{
+    --lgkmInstsIssued;
+}
+
  Addr
  Wavefront::pc() const
  {
diff --git a/src/gpu-compute/wavefront.hh b/src/gpu-compute/wavefront.hh

index e07af0eccfa112c08578b09cdd56f941552c0473..34e45facf31bd712dc7ec514d5a664ee3b7d595f 100644 (file)
--- a/src/gpu-compute/wavefront.hh
+++ b/src/gpu-compute/wavefront.hh
@@ -304,6 +304,13 @@ class Wavefront : public SimObject
      void setWaitCnts(int vm_wait_cnt, int exp_wait_cnt, int lgkm_wait_cnt);
      void clearWaitCnts();
  
+    void incVMemInstsIssued();
+    void incExpInstsIssued();
+    void incLGKMInstsIssued();
+    void decVMemInstsIssued();
+    void decExpInstsIssued();
+    void decLGKMInstsIssued();
+
      /** Freeing VRF space */
      void freeRegisterFile();
  
@@ -343,6 +350,9 @@ class Wavefront : public SimObject
      int vmWaitCnt;
      int expWaitCnt;
      int lgkmWaitCnt;
+    int vmemInstsIssued;
+    int expInstsIssued;
+    int lgkmInstsIssued;
      status_e status;
      Addr _pc;
      VectorMask _execMask;
author	Tony Gutierrez <anthony.gutierrez@amd.com>
	Fri, 10 Aug 2018 19:01:12 +0000 (15:01 -0400)
committer	Anthony Gutierrez <anthony.gutierrez@amd.com>
	Fri, 17 Jul 2020 16:36:23 +0000 (16:36 +0000)
src/arch/gcn3/insts/instructions.cc		patch \| blob \| history
src/gpu-compute/global_memory_pipeline.cc		patch \| blob \| history
src/gpu-compute/gpu_dyn_inst.cc		patch \| blob \| history
src/gpu-compute/local_memory_pipeline.cc		patch \| blob \| history
src/gpu-compute/scalar_memory_pipeline.cc		patch \| blob \| history
src/gpu-compute/schedule_stage.cc		patch \| blob \| history
src/gpu-compute/wavefront.cc		patch \| blob \| history
src/gpu-compute/wavefront.hh		patch \| blob \| history