From: Alexandru Dutu Date: Tue, 22 May 2018 18:09:54 +0000 (-0400) Subject: arch-gcn3: Implementation of s_sleep X-Git-Tag: develop-gem5-snapshot~149 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=14d6e8fac4700036685dc19db837c3f3627bce23;p=gem5.git arch-gcn3: Implementation of s_sleep This changeset implements the s_sleep instruction in a similar way to s_waitcnt. Change-Id: I4811c318ac2c76c485e2bfd9d93baa1205ecf183 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/39115 Maintainer: Matthew Poremba Maintainer: Matt Sinclair Reviewed-by: Matt Sinclair Tested-by: kokoro --- diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/gcn3/insts/instructions.cc index 8c02951fd..03b11ab48 100644 --- a/src/arch/gcn3/insts/instructions.cc +++ b/src/arch/gcn3/insts/instructions.cc @@ -4187,6 +4187,8 @@ namespace Gcn3ISA Inst_SOPP__S_SLEEP::Inst_SOPP__S_SLEEP(InFmt_SOPP *iFmt) : Inst_SOPP(iFmt, "s_sleep") { + setFlag(ALU); + setFlag(Sleep); } // Inst_SOPP__S_SLEEP Inst_SOPP__S_SLEEP::~Inst_SOPP__S_SLEEP() @@ -4197,8 +4199,12 @@ namespace Gcn3ISA void Inst_SOPP__S_SLEEP::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); - } + ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; + gpuDynInst->wavefront()->setStatus(Wavefront::S_STALLED_SLEEP); + // sleep duration is specified in multiples of 64 cycles + gpuDynInst->wavefront()->setSleepTime(64 * simm16); + } // execute + // --- Inst_SOPP__S_SETPRIO class methods --- Inst_SOPP__S_SETPRIO::Inst_SOPP__S_SETPRIO(InFmt_SOPP *iFmt) : Inst_SOPP(iFmt, "s_setprio") diff --git a/src/gpu-compute/GPUStaticInstFlags.py b/src/gpu-compute/GPUStaticInstFlags.py index ad4c6c3f7..1dc143ce2 100644 --- a/src/gpu-compute/GPUStaticInstFlags.py +++ b/src/gpu-compute/GPUStaticInstFlags.py @@ -48,6 +48,7 @@ class GPUStaticInstFlags(Enum): 'UnconditionalJump', # 'SpecialOp', # Special op 'Waitcnt', # Is a waitcnt instruction + 'Sleep', # Is a sleep instruction # Memory ops 'MemBarrier', # Barrier instruction diff --git a/src/gpu-compute/gpu_dyn_inst.cc b/src/gpu-compute/gpu_dyn_inst.cc index a17a93fcd..b8276325d 100644 --- a/src/gpu-compute/gpu_dyn_inst.cc +++ b/src/gpu-compute/gpu_dyn_inst.cc @@ -398,6 +398,12 @@ GPUDynInst::isWaitcnt() const return _staticInst->isWaitcnt(); } +bool +GPUDynInst::isSleep() const +{ + return _staticInst->isSleep(); +} + bool GPUDynInst::isBarrier() const { diff --git a/src/gpu-compute/gpu_dyn_inst.hh b/src/gpu-compute/gpu_dyn_inst.hh index 8c7cf8787..851a46add 100644 --- a/src/gpu-compute/gpu_dyn_inst.hh +++ b/src/gpu-compute/gpu_dyn_inst.hh @@ -180,6 +180,7 @@ class GPUDynInst : public GPUExecContext bool isUnconditionalJump() const; bool isSpecialOp() const; bool isWaitcnt() const; + bool isSleep() const; bool isBarrier() const; bool isMemSync() const; diff --git a/src/gpu-compute/gpu_static_inst.hh b/src/gpu-compute/gpu_static_inst.hh index 88fd9f991..f973f2f43 100644 --- a/src/gpu-compute/gpu_static_inst.hh +++ b/src/gpu-compute/gpu_static_inst.hh @@ -119,6 +119,7 @@ class GPUStaticInst : public GPUStaticInstFlags bool isSpecialOp() const { return _flags[SpecialOp]; } bool isWaitcnt() const { return _flags[Waitcnt]; } + bool isSleep() const { return _flags[Sleep]; } bool isBarrier() const { return _flags[MemBarrier]; } bool isMemSync() const { return _flags[MemSync]; } diff --git a/src/gpu-compute/schedule_stage.cc b/src/gpu-compute/schedule_stage.cc index 02580fe7d..8a2ea1829 100644 --- a/src/gpu-compute/schedule_stage.cc +++ b/src/gpu-compute/schedule_stage.cc @@ -317,6 +317,9 @@ ScheduleStage::addToSchList(int exeType, const GPUDynInstPtr &gpu_dyn_inst) if (wf->isOldestInstWaitcnt()) { wf->setStatus(Wavefront::S_WAITCNT); } + if (wf->isOldestInstSleep()) { + wf->setStatus(Wavefront::S_STALLED_SLEEP); + } if (!gpu_dyn_inst->isScalar()) { computeUnit.vrf[wf->simdId] ->scheduleReadOperands(wf, gpu_dyn_inst); diff --git a/src/gpu-compute/scoreboard_check_stage.cc b/src/gpu-compute/scoreboard_check_stage.cc index c246279d6..08ce6a193 100644 --- a/src/gpu-compute/scoreboard_check_stage.cc +++ b/src/gpu-compute/scoreboard_check_stage.cc @@ -92,6 +92,15 @@ ScoreboardCheckStage::ready(Wavefront *w, nonrdytype_e *rdyStatus, } } + // sleep instruction has been dispatched or executed: next + // instruction should be blocked until the sleep period expires. + if (w->getStatus() == Wavefront::S_STALLED_SLEEP) { + if (!w->sleepDone()) { + *rdyStatus = NRDY_SLEEP; + return false; + } + } + // Is the wave waiting at a barrier. Check this condition BEFORE checking // for instruction buffer occupancy to avoid a deadlock when the barrier is // the last instruction in the instruction buffer. @@ -143,7 +152,8 @@ ScoreboardCheckStage::ready(Wavefront *w, nonrdytype_e *rdyStatus, // through this logic and always return not ready. if (!(ii->isBarrier() || ii->isNop() || ii->isReturn() || ii->isBranch() || ii->isALU() || ii->isLoad() || ii->isStore() || ii->isAtomic() || - ii->isEndOfKernel() || ii->isMemSync() || ii->isFlat())) { + ii->isEndOfKernel() || ii->isMemSync() || ii->isFlat() || + ii->isSleep())) { panic("next instruction: %s is of unknown type\n", ii->disassemble()); } diff --git a/src/gpu-compute/scoreboard_check_stage.hh b/src/gpu-compute/scoreboard_check_stage.hh index 419dffb9f..c2c114ad2 100644 --- a/src/gpu-compute/scoreboard_check_stage.hh +++ b/src/gpu-compute/scoreboard_check_stage.hh @@ -65,6 +65,7 @@ class ScoreboardCheckStage NRDY_WF_STOP, NRDY_IB_EMPTY, NRDY_WAIT_CNT, + NRDY_SLEEP, NRDY_BARRIER_WAIT, NRDY_VGPR_NRDY, NRDY_SGPR_NRDY, diff --git a/src/gpu-compute/wavefront.cc b/src/gpu-compute/wavefront.cc index 343b5c9f2..e442e2adb 100644 --- a/src/gpu-compute/wavefront.cc +++ b/src/gpu-compute/wavefront.cc @@ -49,7 +49,7 @@ Wavefront::Wavefront(const Params &p) maxIbSize(p.max_ib_size), _gpuISA(*this), vmWaitCnt(-1), expWaitCnt(-1), lgkmWaitCnt(-1), vmemInstsIssued(0), expInstsIssued(0), lgkmInstsIssued(0), - barId(WFBarrier::InvalidID), stats(this) + sleepCnt(0), barId(WFBarrier::InvalidID), stats(this) { lastTrace = 0; execUnitId = -1; @@ -583,6 +583,20 @@ Wavefront::isLmInstruction(GPUDynInstPtr ii) return false; } +bool +Wavefront::isOldestInstSleep() +{ + if (instructionBuffer.empty()) + return false; + + GPUDynInstPtr ii = instructionBuffer.front(); + + if (ii->isSleep()) { + return true; + } + return false; +} + bool Wavefront::isOldestInstWaitcnt() { @@ -1226,6 +1240,32 @@ Wavefront::waitCntsSatisfied() return true; } +bool +Wavefront::sleepDone() +{ + assert(status == S_STALLED_SLEEP); + + // if the sleep count has not been set, then the sleep instruction has not + // been executed yet, so we will return true without setting the wavefront + // status + if (sleepCnt == 0) + return false; + + sleepCnt--; + if (sleepCnt != 0) + return false; + + status = S_RUNNING; + return true; +} + +void +Wavefront::setSleepTime(int sleep_time) +{ + assert(sleepCnt == 0); + sleepCnt = sleep_time; +} + void Wavefront::setWaitCnts(int vm_wait_cnt, int exp_wait_cnt, int lgkm_wait_cnt) { diff --git a/src/gpu-compute/wavefront.hh b/src/gpu-compute/wavefront.hh index 7b617c63f..1edc6dc49 100644 --- a/src/gpu-compute/wavefront.hh +++ b/src/gpu-compute/wavefront.hh @@ -68,6 +68,9 @@ class Wavefront : public SimObject S_RUNNING, // wavefront is stalled S_STALLED, + + S_STALLED_SLEEP, + /** * wavefront has unsatisfied wait counts * @@ -134,6 +137,7 @@ class Wavefront : public SimObject bool isGmInstruction(GPUDynInstPtr ii); bool isLmInstruction(GPUDynInstPtr ii); bool isOldestInstWaitcnt(); + bool isOldestInstSleep(); bool isOldestInstGMem(); bool isOldestInstLMem(); bool isOldestInstPrivMem(); @@ -276,6 +280,9 @@ class Wavefront : public SimObject /** Freeing VRF space */ void freeRegisterFile(); + bool sleepDone(); + void setSleepTime(int sleep_time); + TheGpuISA::GPUISA& gpuISA() { @@ -315,6 +322,7 @@ class Wavefront : public SimObject int vmemInstsIssued; int expInstsIssued; int lgkmInstsIssued; + int sleepCnt; status_e status; Addr _pc; VectorMask _execMask;