arch-gcn3: Implementation of s_sleep
authorAlexandru Dutu <alexandru.dutu@amd.com>
Tue, 22 May 2018 18:09:54 +0000 (14:09 -0400)
committerMatt Sinclair <mattdsinclair@gmail.com>
Thu, 4 Feb 2021 00:07:10 +0000 (00:07 +0000)
This changeset implements the s_sleep instruction in a similar
way to s_waitcnt.

Change-Id: I4811c318ac2c76c485e2bfd9d93baa1205ecf183
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/39115
Maintainer: Matthew Poremba <matthew.poremba@amd.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
src/arch/gcn3/insts/instructions.cc
src/gpu-compute/GPUStaticInstFlags.py
src/gpu-compute/gpu_dyn_inst.cc
src/gpu-compute/gpu_dyn_inst.hh
src/gpu-compute/gpu_static_inst.hh
src/gpu-compute/schedule_stage.cc
src/gpu-compute/scoreboard_check_stage.cc
src/gpu-compute/scoreboard_check_stage.hh
src/gpu-compute/wavefront.cc
src/gpu-compute/wavefront.hh

index 8c02951fd26fcf452805a3d78bba2610a4542edf..03b11ab4887da7937f1234faa4d66ee5be5997e1 100644 (file)
@@ -4187,6 +4187,8 @@ namespace Gcn3ISA
     Inst_SOPP__S_SLEEP::Inst_SOPP__S_SLEEP(InFmt_SOPP *iFmt)
         : Inst_SOPP(iFmt, "s_sleep")
     {
+        setFlag(ALU);
+        setFlag(Sleep);
     } // Inst_SOPP__S_SLEEP
 
     Inst_SOPP__S_SLEEP::~Inst_SOPP__S_SLEEP()
@@ -4197,8 +4199,12 @@ namespace Gcn3ISA
     void
     Inst_SOPP__S_SLEEP::execute(GPUDynInstPtr gpuDynInst)
     {
-        panicUnimplemented();
-    }
+        ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
+        gpuDynInst->wavefront()->setStatus(Wavefront::S_STALLED_SLEEP);
+        // sleep duration is specified in multiples of 64 cycles
+        gpuDynInst->wavefront()->setSleepTime(64 * simm16);
+    } // execute
+    // --- Inst_SOPP__S_SETPRIO class methods ---
 
     Inst_SOPP__S_SETPRIO::Inst_SOPP__S_SETPRIO(InFmt_SOPP *iFmt)
         : Inst_SOPP(iFmt, "s_setprio")
index ad4c6c3f7bea16fe4502cbfc7cb7e9307e2d73cd..1dc143ce235eee23970b7c61dd4b87128e4a8cd3 100644 (file)
@@ -48,6 +48,7 @@ class GPUStaticInstFlags(Enum):
         'UnconditionalJump', #
         'SpecialOp',         # Special op
         'Waitcnt',           # Is a waitcnt instruction
+        'Sleep',             # Is a sleep instruction
 
         # Memory ops
         'MemBarrier',        # Barrier instruction
index a17a93fcd3125d66ce8dd94d4a97bb3636c77c62..b8276325d828d096ce2f8f370206744b2b1c8e44 100644 (file)
@@ -398,6 +398,12 @@ GPUDynInst::isWaitcnt() const
     return _staticInst->isWaitcnt();
 }
 
+bool
+GPUDynInst::isSleep() const
+{
+    return _staticInst->isSleep();
+}
+
 bool
 GPUDynInst::isBarrier() const
 {
index 8c7cf878758446cf36b078894eb3739e55eb3e32..851a46adda8eaadf2f13e9e91668d7acefb30f73 100644 (file)
@@ -180,6 +180,7 @@ class GPUDynInst : public GPUExecContext
     bool isUnconditionalJump() const;
     bool isSpecialOp() const;
     bool isWaitcnt() const;
+    bool isSleep() const;
 
     bool isBarrier() const;
     bool isMemSync() const;
index 88fd9f9913849c2622633dc966fd67bf08185b14..f973f2f4301ce88d559eed3017f115089ff17a78 100644 (file)
@@ -119,6 +119,7 @@ class GPUStaticInst : public GPUStaticInstFlags
 
     bool isSpecialOp() const { return _flags[SpecialOp]; }
     bool isWaitcnt() const { return _flags[Waitcnt]; }
+    bool isSleep() const { return _flags[Sleep]; }
 
     bool isBarrier() const { return _flags[MemBarrier]; }
     bool isMemSync() const { return _flags[MemSync]; }
index 02580fe7da1b1c46642a04e53ce622f1b6bf1010..8a2ea18294289b8c0cbec2f8e9650edbad593eee 100644 (file)
@@ -317,6 +317,9 @@ ScheduleStage::addToSchList(int exeType, const GPUDynInstPtr &gpu_dyn_inst)
         if (wf->isOldestInstWaitcnt()) {
             wf->setStatus(Wavefront::S_WAITCNT);
         }
+        if (wf->isOldestInstSleep()) {
+            wf->setStatus(Wavefront::S_STALLED_SLEEP);
+        }
         if (!gpu_dyn_inst->isScalar()) {
             computeUnit.vrf[wf->simdId]
                 ->scheduleReadOperands(wf, gpu_dyn_inst);
index c246279d6003b34f38be7e57fd14ab32f7327ead..08ce6a193bda9625aed60daa928b95b76de07edb 100644 (file)
@@ -92,6 +92,15 @@ ScoreboardCheckStage::ready(Wavefront *w, nonrdytype_e *rdyStatus,
         }
     }
 
+    // sleep instruction has been dispatched or executed: next
+    // instruction should be blocked until the sleep period expires.
+    if (w->getStatus() == Wavefront::S_STALLED_SLEEP) {
+        if (!w->sleepDone()) {
+            *rdyStatus = NRDY_SLEEP;
+            return false;
+        }
+    }
+
     // Is the wave waiting at a barrier. Check this condition BEFORE checking
     // for instruction buffer occupancy to avoid a deadlock when the barrier is
     // the last instruction in the instruction buffer.
@@ -143,7 +152,8 @@ ScoreboardCheckStage::ready(Wavefront *w, nonrdytype_e *rdyStatus,
     // through this logic and always return not ready.
     if (!(ii->isBarrier() || ii->isNop() || ii->isReturn() || ii->isBranch() ||
          ii->isALU() || ii->isLoad() || ii->isStore() || ii->isAtomic() ||
-         ii->isEndOfKernel() || ii->isMemSync() || ii->isFlat())) {
+         ii->isEndOfKernel() || ii->isMemSync() || ii->isFlat() ||
+         ii->isSleep())) {
         panic("next instruction: %s is of unknown type\n", ii->disassemble());
     }
 
index 419dffb9f8df8504e7666f462aff504acbd41f85..c2c114ad24ec1eee74dfb437af1aabe8d3b53052 100644 (file)
@@ -65,6 +65,7 @@ class ScoreboardCheckStage
         NRDY_WF_STOP,
         NRDY_IB_EMPTY,
         NRDY_WAIT_CNT,
+        NRDY_SLEEP,
         NRDY_BARRIER_WAIT,
         NRDY_VGPR_NRDY,
         NRDY_SGPR_NRDY,
index 343b5c9f2df23b151a44e55b60589d21d4ddae55..e442e2adb8b1eaaaa58286d62ee4f4a73c32a194 100644 (file)
@@ -49,7 +49,7 @@ Wavefront::Wavefront(const Params &p)
     maxIbSize(p.max_ib_size), _gpuISA(*this),
     vmWaitCnt(-1), expWaitCnt(-1), lgkmWaitCnt(-1),
     vmemInstsIssued(0), expInstsIssued(0), lgkmInstsIssued(0),
-    barId(WFBarrier::InvalidID), stats(this)
+    sleepCnt(0), barId(WFBarrier::InvalidID), stats(this)
 {
     lastTrace = 0;
     execUnitId = -1;
@@ -583,6 +583,20 @@ Wavefront::isLmInstruction(GPUDynInstPtr ii)
     return false;
 }
 
+bool
+Wavefront::isOldestInstSleep()
+{
+    if (instructionBuffer.empty())
+        return false;
+
+    GPUDynInstPtr ii = instructionBuffer.front();
+
+    if (ii->isSleep()) {
+        return true;
+    }
+    return false;
+}
+
 bool
 Wavefront::isOldestInstWaitcnt()
 {
@@ -1226,6 +1240,32 @@ Wavefront::waitCntsSatisfied()
     return true;
 }
 
+bool
+Wavefront::sleepDone()
+{
+    assert(status == S_STALLED_SLEEP);
+
+    // if the sleep count has not been set, then the sleep instruction has not
+    // been executed yet, so we will return true without setting the wavefront
+    // status
+    if (sleepCnt == 0)
+        return false;
+
+    sleepCnt--;
+    if (sleepCnt != 0)
+        return false;
+
+    status = S_RUNNING;
+    return true;
+}
+
+void
+Wavefront::setSleepTime(int sleep_time)
+{
+    assert(sleepCnt == 0);
+    sleepCnt = sleep_time;
+}
+
 void
 Wavefront::setWaitCnts(int vm_wait_cnt, int exp_wait_cnt, int lgkm_wait_cnt)
 {
index 7b617c63fb798430716d81910f1501612583da8c..1edc6dc4988ccb14f2a63455e5d4d2a406b7d123 100644 (file)
@@ -68,6 +68,9 @@ class Wavefront : public SimObject
         S_RUNNING,
         // wavefront is stalled
         S_STALLED,
+
+        S_STALLED_SLEEP,
+
         /**
          * wavefront has unsatisfied wait counts
          *
@@ -134,6 +137,7 @@ class Wavefront : public SimObject
     bool isGmInstruction(GPUDynInstPtr ii);
     bool isLmInstruction(GPUDynInstPtr ii);
     bool isOldestInstWaitcnt();
+    bool isOldestInstSleep();
     bool isOldestInstGMem();
     bool isOldestInstLMem();
     bool isOldestInstPrivMem();
@@ -276,6 +280,9 @@ class Wavefront : public SimObject
     /** Freeing VRF space */
     void freeRegisterFile();
 
+    bool sleepDone();
+    void setSleepTime(int sleep_time);
+
     TheGpuISA::GPUISA&
     gpuISA()
     {
@@ -315,6 +322,7 @@ class Wavefront : public SimObject
     int vmemInstsIssued;
     int expInstsIssued;
     int lgkmInstsIssued;
+    int sleepCnt;
     status_e status;
     Addr _pc;
     VectorMask _execMask;