#include "gpu-compute/vector_register_file.hh"
#include "gpu-compute/wavefront.hh"
-GlobalMemPipeline::GlobalMemPipeline(const ComputeUnitParams* p) :
- computeUnit(nullptr), gmQueueSize(p->global_mem_queue_size),
- maxWaveRequests(p->max_wave_requests), inflightStores(0),
- inflightLoads(0)
+GlobalMemPipeline::GlobalMemPipeline(const ComputeUnitParams &p,
+ ComputeUnit &cu)
+ : computeUnit(cu), _name(cu.name() + ".GlobalMemPipeline"),
+ gmQueueSize(p.global_mem_queue_size),
+ maxWaveRequests(p.max_wave_requests), inflightStores(0),
+ inflightLoads(0), stats(&cu)
{
}
void
-GlobalMemPipeline::init(ComputeUnit *cu)
+GlobalMemPipeline::init()
{
- computeUnit = cu;
- globalMemSize = computeUnit->shader->globalMemSize;
- _name = computeUnit->name() + ".GlobalMemPipeline";
+ globalMemSize = computeUnit.shader->globalMemSize;
}
bool
}
- if (m && m->latency.rdy() && computeUnit->glbMemToVrfBus.rdy() &&
- accessVrf && (computeUnit->shader->coissue_return ||
- computeUnit->vectorGlobalMemUnit.rdy())) {
+ if (m && m->latency.rdy() && computeUnit.glbMemToVrfBus.rdy() &&
+ accessVrf && (computeUnit.shader->coissue_return ||
+ computeUnit.vectorGlobalMemUnit.rdy())) {
w = m->wavefront();
DPRINTF(GPUMem, "CU%d: WF[%d][%d]: Completing global mem instr %s\n",
m->cu_id, m->simdId, m->wfSlotId, m->disassemble());
m->completeAcc(m);
+ if (m->isFlat()) {
+ w->decLGKMInstsIssued();
+ }
+ w->decVMemInstsIssued();
if (m->isLoad() || m->isAtomicRet()) {
w->computeUnit->vrf[w->simdId]->
Tick accessTime = curTick() - m->getAccessTime();
// Decrement outstanding requests count
- computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
+ computeUnit.shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
if (m->isStore() || m->isAtomic() || m->isMemSync()) {
- computeUnit->shader->sampleStore(accessTime);
- computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrGm,
+ computeUnit.shader->sampleStore(accessTime);
+ computeUnit.shader->ScheduleAdd(&w->outstandingReqsWrGm,
m->time, -1);
}
if (m->isLoad() || m->isAtomic() || m->isMemSync()) {
- computeUnit->shader->sampleLoad(accessTime);
- computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdGm,
+ computeUnit.shader->sampleLoad(accessTime);
+ computeUnit.shader->ScheduleAdd(&w->outstandingReqsRdGm,
m->time, -1);
}
// going all the way to memory and stats for individual cache
// blocks generated by the instruction.
m->profileRoundTripTime(curTick(), InstMemoryHop::Complete);
- computeUnit->shader->sampleInstRoundTrip(m->getRoundTripTime());
- computeUnit->shader->sampleLineRoundTrip(m->getLineAddressTime());
+ computeUnit.shader->sampleInstRoundTrip(m->getRoundTripTime());
+ computeUnit.shader->sampleLineRoundTrip(m->getLineAddressTime());
// Mark write bus busy for appropriate amount of time
- computeUnit->glbMemToVrfBus.set(m->time);
- if (!computeUnit->shader->coissue_return)
+ computeUnit.glbMemToVrfBus.set(m->time);
+ if (!computeUnit.shader->coissue_return)
w->computeUnit->vectorGlobalMemUnit.set(m->time);
}
mp->disassemble(), mp->seqNum());
mp->initiateAcc(mp);
+ if (mp->isStore() && mp->isGlobalSeg()) {
+ mp->wavefront()->decExpInstsIssued();
+ }
+
if (((mp->isMemSync() && !mp->isEndOfKernel()) || !mp->isMemSync())) {
/**
* if we are not in out-of-order data delivery mode
* correctly.
*/
handleResponse(mp);
- computeUnit->getTokenManager()->recvTokens(1);
+ computeUnit.getTokenManager()->recvTokens(1);
}
gmIssuedRequests.pop();
DPRINTF(GPUMem, "CU%d: WF[%d][%d] Popping 0 mem_op = \n",
- computeUnit->cu_id, mp->simdId, mp->wfSlotId);
+ computeUnit.cu_id, mp->simdId, mp->wfSlotId);
}
}
mem_req->second.second = true;
}
-void
-GlobalMemPipeline::regStats()
+GlobalMemPipeline::
+GlobalMemPipelineStats::GlobalMemPipelineStats(Stats::Group *parent)
+ : Stats::Group(parent, "GlobalMemPipeline"),
+ ADD_STAT(loadVrfBankConflictCycles, "total number of cycles GM data "
+ "are delayed before updating the VRF")
{
- loadVrfBankConflictCycles
- .name(name() + ".load_vrf_bank_conflict_cycles")
- .desc("total number of cycles GM data are delayed before updating "
- "the VRF")
- ;
}