From 30dbd90783498fe654b49eea437ef084222bc2af Mon Sep 17 00:00:00 2001 From: eavivi Date: Mon, 31 Aug 2020 12:10:19 -0700 Subject: [PATCH] cpu-o3: convert fetch to new style stats Change-Id: Ib50a303570ac1dd45ff11a32a823f47a6c4c02cd Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/33815 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/cpu/o3/cpu.cc | 1 - src/cpu/o3/fetch.hh | 115 +++++++++--------- src/cpu/o3/fetch_impl.hh | 255 +++++++++++++++++---------------------- 3 files changed, 173 insertions(+), 198 deletions(-) diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index d0a387cfc..a525ea4c2 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -441,7 +441,6 @@ FullO3CPU::regStats() .precision(6); totalIpc = sum(committedInsts) / numCycles; - this->fetch.regStats(); this->decode.regStats(); this->rename.regStats(); this->iew.regStats(); diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 77c6336bd..16f0c5eb6 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -222,8 +222,6 @@ class DefaultFetch /** Returns the name of fetch. */ std::string name() const; - /** Registers statistics. */ - void regStats(); /** Registers probes. */ void regProbePoints(); @@ -330,7 +328,8 @@ class DefaultFetch const DynInstPtr squashInst, ThreadID tid); /** Squashes a specific thread and resets the PC. Also tells the CPU to - * remove any instructions between fetch and decode that should be sqaushed. + * remove any instructions between fetch and decode + * that should be sqaushed. */ void squashFromDecode(const TheISA::PCState &newPC, const DynInstPtr squashInst, @@ -546,57 +545,65 @@ class DefaultFetch /** Event used to delay fault generation of translation faults */ FinishTranslationEvent finishTranslationEvent; - // @todo: Consider making these vectors and tracking on a per thread basis. - /** Stat for total number of cycles stalled due to an icache miss. */ - Stats::Scalar icacheStallCycles; - /** Stat for total number of fetched instructions. */ - Stats::Scalar fetchedInsts; - /** Total number of fetched branches. */ - Stats::Scalar fetchedBranches; - /** Stat for total number of predicted branches. */ - Stats::Scalar predictedBranches; - /** Stat for total number of cycles spent fetching. */ - Stats::Scalar fetchCycles; - /** Stat for total number of cycles spent squashing. */ - Stats::Scalar fetchSquashCycles; - /** Stat for total number of cycles spent waiting for translation */ - Stats::Scalar fetchTlbCycles; - /** Stat for total number of cycles spent blocked due to other stages in - * the pipeline. - */ - Stats::Scalar fetchIdleCycles; - /** Total number of cycles spent blocked. */ - Stats::Scalar fetchBlockedCycles; - /** Total number of cycles spent in any other state. */ - Stats::Scalar fetchMiscStallCycles; - /** Total number of cycles spent in waiting for drains. */ - Stats::Scalar fetchPendingDrainCycles; - /** Total number of stall cycles caused by no active threads to run. */ - Stats::Scalar fetchNoActiveThreadStallCycles; - /** Total number of stall cycles caused by pending traps. */ - Stats::Scalar fetchPendingTrapStallCycles; - /** Total number of stall cycles caused by pending quiesce instructions. */ - Stats::Scalar fetchPendingQuiesceStallCycles; - /** Total number of stall cycles caused by I-cache wait retrys. */ - Stats::Scalar fetchIcacheWaitRetryStallCycles; - /** Stat for total number of fetched cache lines. */ - Stats::Scalar fetchedCacheLines; - /** Total number of outstanding icache accesses that were dropped - * due to a squash. - */ - Stats::Scalar fetchIcacheSquashes; - /** Total number of outstanding tlb accesses that were dropped - * due to a squash. - */ - Stats::Scalar fetchTlbSquashes; - /** Distribution of number of instructions fetched each cycle. */ - Stats::Distribution fetchNisnDist; - /** Rate of how often fetch was idle. */ - Stats::Formula idleRate; - /** Number of branch fetches per cycle. */ - Stats::Formula branchRate; - /** Number of instruction fetched per cycle. */ - Stats::Formula fetchRate; + protected: + struct FetchStatGroup : public Stats::Group + { + FetchStatGroup(O3CPU *cpu, DefaultFetch *fetch); + // @todo: Consider making these + // vectors and tracking on a per thread basis. + /** Stat for total number of cycles stalled due to an icache miss. */ + Stats::Scalar icacheStallCycles; + /** Stat for total number of fetched instructions. */ + Stats::Scalar insts; + /** Total number of fetched branches. */ + Stats::Scalar branches; + /** Stat for total number of predicted branches. */ + Stats::Scalar predictedBranches; + /** Stat for total number of cycles spent fetching. */ + Stats::Scalar cycles; + /** Stat for total number of cycles spent squashing. */ + Stats::Scalar squashCycles; + /** Stat for total number of cycles spent waiting for translation */ + Stats::Scalar tlbCycles; + /** Stat for total number of cycles + * spent blocked due to other stages in + * the pipeline. + */ + Stats::Scalar idleCycles; + /** Total number of cycles spent blocked. */ + Stats::Scalar blockedCycles; + /** Total number of cycles spent in any other state. */ + Stats::Scalar miscStallCycles; + /** Total number of cycles spent in waiting for drains. */ + Stats::Scalar pendingDrainCycles; + /** Total number of stall cycles caused by no active threads to run. */ + Stats::Scalar noActiveThreadStallCycles; + /** Total number of stall cycles caused by pending traps. */ + Stats::Scalar pendingTrapStallCycles; + /** Total number of stall cycles + * caused by pending quiesce instructions. */ + Stats::Scalar pendingQuiesceStallCycles; + /** Total number of stall cycles caused by I-cache wait retrys. */ + Stats::Scalar icacheWaitRetryStallCycles; + /** Stat for total number of fetched cache lines. */ + Stats::Scalar cacheLines; + /** Total number of outstanding icache accesses that were dropped + * due to a squash. + */ + Stats::Scalar icacheSquashes; + /** Total number of outstanding tlb accesses that were dropped + * due to a squash. + */ + Stats::Scalar tlbSquashes; + /** Distribution of number of instructions fetched each cycle. */ + Stats::Distribution nisnDist; + /** Rate of how often fetch was idle. */ + Stats::Formula idleRate; + /** Number of branch fetches per cycle. */ + Stats::Formula branchRate; + /** Number of instruction fetched per cycle. */ + Stats::Formula rate; + } fetchStats; }; #endif //__CPU_O3_FETCH_HH__ diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 314e0147e..f449cac47 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -94,7 +94,7 @@ DefaultFetch::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params) numThreads(params->numThreads), numFetchingThreads(params->smtNumFetchingThreads), icachePort(this, _cpu), - finishTranslationEvent(this) + finishTranslationEvent(this), fetchStats(_cpu, this) { if (numThreads > Impl::MaxThreads) fatal("numThreads (%d) is larger than compiled limit (%d),\n" @@ -163,128 +163,97 @@ DefaultFetch::regProbePoints() } template -void -DefaultFetch::regStats() +DefaultFetch:: +FetchStatGroup::FetchStatGroup(O3CPU *cpu, DefaultFetch *fetch) + : Stats::Group(cpu, "fetch"), + ADD_STAT(icacheStallCycles, + "Number of cycles fetch is stalled on an Icache miss"), + ADD_STAT(insts, "Number of instructions fetch has processed"), + ADD_STAT(branches, "Number of branches that fetch encountered"), + ADD_STAT(predictedBranches, + "Number of branches that fetch has predicted taken"), + ADD_STAT(cycles, + "Number of cycles fetch has run and was not squashing or blocked"), + ADD_STAT(squashCycles, "Number of cycles fetch has spent squashing"), + ADD_STAT(tlbCycles, + "Number of cycles fetch has spent waiting for tlb"), + ADD_STAT(idleCycles, "Number of cycles fetch was idle"), + ADD_STAT(blockedCycles, "Number of cycles fetch has spent blocked"), + ADD_STAT(miscStallCycles, + "Number of cycles fetch has spent waiting on interrupts," + "or bad addresses, or out of MSHRs"), + ADD_STAT(pendingDrainCycles, + "Number of cycles fetch has spent waiting on pipes to drain"), + ADD_STAT(noActiveThreadStallCycles, + "Number of stall cycles due to no active thread to fetch from"), + ADD_STAT(pendingTrapStallCycles, + "Number of stall cycles due to pending traps"), + ADD_STAT(pendingQuiesceStallCycles, + "Number of stall cycles due to pending quiesce instructions"), + ADD_STAT(icacheWaitRetryStallCycles, + "Number of stall cycles due to full MSHR"), + ADD_STAT(cacheLines, "Number of cache lines fetched"), + ADD_STAT(icacheSquashes, + "Number of outstanding Icache misses that were squashed"), + ADD_STAT(tlbSquashes, + "Number of outstanding ITLB misses that were squashed"), + ADD_STAT(nisnDist, + "Number of instructions fetched each cycle (Total)"), + ADD_STAT(idleRate, "Percent of cycles fetch was idle", + idleCycles * 100 / cpu->numCycles), + ADD_STAT(branchRate, "Number of branch fetches per cycle", + branches / cpu->numCycles), + ADD_STAT(rate, "Number of inst fetches per cycle", + insts / cpu->numCycles) { - icacheStallCycles - .name(name() + ".icacheStallCycles") - .desc("Number of cycles fetch is stalled on an Icache miss") - .prereq(icacheStallCycles); - - fetchedInsts - .name(name() + ".Insts") - .desc("Number of instructions fetch has processed") - .prereq(fetchedInsts); - - fetchedBranches - .name(name() + ".Branches") - .desc("Number of branches that fetch encountered") - .prereq(fetchedBranches); - - predictedBranches - .name(name() + ".predictedBranches") - .desc("Number of branches that fetch has predicted taken") - .prereq(predictedBranches); - - fetchCycles - .name(name() + ".Cycles") - .desc("Number of cycles fetch has run and was not squashing or" - " blocked") - .prereq(fetchCycles); - - fetchSquashCycles - .name(name() + ".SquashCycles") - .desc("Number of cycles fetch has spent squashing") - .prereq(fetchSquashCycles); - - fetchTlbCycles - .name(name() + ".TlbCycles") - .desc("Number of cycles fetch has spent waiting for tlb") - .prereq(fetchTlbCycles); - - fetchIdleCycles - .name(name() + ".IdleCycles") - .desc("Number of cycles fetch was idle") - .prereq(fetchIdleCycles); - - fetchBlockedCycles - .name(name() + ".BlockedCycles") - .desc("Number of cycles fetch has spent blocked") - .prereq(fetchBlockedCycles); - - fetchedCacheLines - .name(name() + ".CacheLines") - .desc("Number of cache lines fetched") - .prereq(fetchedCacheLines); - - fetchMiscStallCycles - .name(name() + ".MiscStallCycles") - .desc("Number of cycles fetch has spent waiting on interrupts, or " - "bad addresses, or out of MSHRs") - .prereq(fetchMiscStallCycles); - - fetchPendingDrainCycles - .name(name() + ".PendingDrainCycles") - .desc("Number of cycles fetch has spent waiting on pipes to drain") - .prereq(fetchPendingDrainCycles); - - fetchNoActiveThreadStallCycles - .name(name() + ".NoActiveThreadStallCycles") - .desc("Number of stall cycles due to no active thread to fetch from") - .prereq(fetchNoActiveThreadStallCycles); - - fetchPendingTrapStallCycles - .name(name() + ".PendingTrapStallCycles") - .desc("Number of stall cycles due to pending traps") - .prereq(fetchPendingTrapStallCycles); - - fetchPendingQuiesceStallCycles - .name(name() + ".PendingQuiesceStallCycles") - .desc("Number of stall cycles due to pending quiesce instructions") - .prereq(fetchPendingQuiesceStallCycles); - - fetchIcacheWaitRetryStallCycles - .name(name() + ".IcacheWaitRetryStallCycles") - .desc("Number of stall cycles due to full MSHR") - .prereq(fetchIcacheWaitRetryStallCycles); - - fetchIcacheSquashes - .name(name() + ".IcacheSquashes") - .desc("Number of outstanding Icache misses that were squashed") - .prereq(fetchIcacheSquashes); - - fetchTlbSquashes - .name(name() + ".ItlbSquashes") - .desc("Number of outstanding ITLB misses that were squashed") - .prereq(fetchTlbSquashes); - - fetchNisnDist - .init(/* base value */ 0, - /* last value */ fetchWidth, + icacheStallCycles + .prereq(icacheStallCycles); + insts + .prereq(insts); + branches + .prereq(branches); + predictedBranches + .prereq(predictedBranches); + cycles + .prereq(cycles); + squashCycles + .prereq(squashCycles); + tlbCycles + .prereq(tlbCycles); + idleCycles + .prereq(idleCycles); + blockedCycles + .prereq(blockedCycles); + cacheLines + .prereq(cacheLines); + miscStallCycles + .prereq(miscStallCycles); + pendingDrainCycles + .prereq(pendingDrainCycles); + noActiveThreadStallCycles + .prereq(noActiveThreadStallCycles); + pendingTrapStallCycles + .prereq(pendingTrapStallCycles); + pendingQuiesceStallCycles + .prereq(pendingQuiesceStallCycles); + icacheWaitRetryStallCycles + .prereq(icacheWaitRetryStallCycles); + icacheSquashes + .prereq(icacheSquashes); + tlbSquashes + .prereq(tlbSquashes); + nisnDist + .init(/* base value */ 0, + /* last value */ fetch->fetchWidth, /* bucket size */ 1) - .name(name() + ".rateDist") - .desc("Number of instructions fetched each cycle (Total)") - .flags(Stats::pdf); - - idleRate - .name(name() + ".idleRate") - .desc("Percent of cycles fetch was idle") - .prereq(idleRate); - idleRate = fetchIdleCycles * 100 / cpu->numCycles; - - branchRate - .name(name() + ".branchRate") - .desc("Number of branch fetches per cycle") - .flags(Stats::total); - branchRate = fetchedBranches / cpu->numCycles; - - fetchRate - .name(name() + ".rate") - .desc("Number of inst fetches per cycle") - .flags(Stats::total); - fetchRate = fetchedInsts / cpu->numCycles; + .flags(Stats::pdf); + idleRate + .prereq(idleRate); + branchRate + .flags(Stats::total); + rate + .flags(Stats::total); } - template void DefaultFetch::setTimeBuffer(TimeBuffer *time_buffer) @@ -393,7 +362,7 @@ DefaultFetch::processCacheCompletion(PacketPtr pkt) // to return. if (fetchStatus[tid] != IcacheWaitResponse || pkt->req != memReq[tid]) { - ++fetchIcacheSquashes; + ++fetchStats.icacheSquashes; delete pkt; return; } @@ -586,10 +555,10 @@ DefaultFetch::lookupAndUpdateNextPC( inst->setPredTarg(nextPC); inst->setPredTaken(predict_taken); - ++fetchedBranches; + ++fetchStats.branches; if (predict_taken) { - ++predictedBranches; + ++fetchStats.predictedBranches; } return predict_taken; @@ -662,7 +631,7 @@ DefaultFetch::finishTranslation(const Fault &fault, mem_req->getVaddr() != memReq[tid]->getVaddr()) { DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n", tid); - ++fetchTlbSquashes; + ++fetchStats.tlbSquashes; return; } @@ -688,7 +657,7 @@ DefaultFetch::finishTranslation(const Fault &fault, fetchBufferValid[tid] = false; DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); - fetchedCacheLines++; + fetchStats.cacheLines++; // Access the cache. if (!icachePort.sendTimingReq(data_pkt)) { @@ -801,7 +770,7 @@ DefaultFetch::doSquash(const TheISA::PCState &newPC, // some opportunities to handle interrupts may be missed. delayedCommit[tid] = true; - ++fetchSquashCycles; + ++fetchStats.squashCycles; } template @@ -930,7 +899,7 @@ DefaultFetch::tick() } // Record number of instructions fetched this cycle for distribution. - fetchNisnDist.sample(numInst); + fetchStats.nisnDist.sample(numInst); if (status_change) { // Change the fetch stage status if there was a status change. @@ -1202,23 +1171,23 @@ DefaultFetch::fetch(bool &status_change) fetchCacheLine(fetchAddr, tid, thisPC.instAddr()); if (fetchStatus[tid] == IcacheWaitResponse) - ++icacheStallCycles; + ++fetchStats.icacheStallCycles; else if (fetchStatus[tid] == ItlbWait) - ++fetchTlbCycles; + ++fetchStats.tlbCycles; else - ++fetchMiscStallCycles; + ++fetchStats.miscStallCycles; return; } else if ((checkInterrupt(thisPC.instAddr()) && !delayedCommit[tid])) { // Stall CPU if an interrupt is posted and we're not issuing // an delayed commit micro-op currently (delayed commit instructions // are not interruptable by interrupts, only faults) - ++fetchMiscStallCycles; + ++fetchStats.miscStallCycles; DPRINTF(Fetch, "[tid:%i] Fetch is stalled!\n", tid); return; } } else { if (fetchStatus[tid] == Idle) { - ++fetchIdleCycles; + ++fetchStats.idleCycles; DPRINTF(Fetch, "[tid:%i] Fetch is idle!\n", tid); } @@ -1226,7 +1195,7 @@ DefaultFetch::fetch(bool &status_change) return; } - ++fetchCycles; + ++fetchStats.cycles; TheISA::PCState nextPC = thisPC; @@ -1296,7 +1265,7 @@ DefaultFetch::fetch(bool &status_change) staticInst = decoder[tid]->decode(thisPC); // Increment stat of fetched instructions. - ++fetchedInsts; + ++fetchStats.insts; if (staticInst->isMacroop()) { curMacroop = staticInst; @@ -1625,35 +1594,35 @@ DefaultFetch::profileStall(ThreadID tid) { // @todo Per-thread stats if (stalls[tid].drain) { - ++fetchPendingDrainCycles; + ++fetchStats.pendingDrainCycles; DPRINTF(Fetch, "Fetch is waiting for a drain!\n"); } else if (activeThreads->empty()) { - ++fetchNoActiveThreadStallCycles; + ++fetchStats.noActiveThreadStallCycles; DPRINTF(Fetch, "Fetch has no active thread!\n"); } else if (fetchStatus[tid] == Blocked) { - ++fetchBlockedCycles; + ++fetchStats.blockedCycles; DPRINTF(Fetch, "[tid:%i] Fetch is blocked!\n", tid); } else if (fetchStatus[tid] == Squashing) { - ++fetchSquashCycles; + ++fetchStats.squashCycles; DPRINTF(Fetch, "[tid:%i] Fetch is squashing!\n", tid); } else if (fetchStatus[tid] == IcacheWaitResponse) { - ++icacheStallCycles; + ++fetchStats.icacheStallCycles; DPRINTF(Fetch, "[tid:%i] Fetch is waiting cache response!\n", tid); } else if (fetchStatus[tid] == ItlbWait) { - ++fetchTlbCycles; + ++fetchStats.tlbCycles; DPRINTF(Fetch, "[tid:%i] Fetch is waiting ITLB walk to " "finish!\n", tid); } else if (fetchStatus[tid] == TrapPending) { - ++fetchPendingTrapStallCycles; + ++fetchStats.pendingTrapStallCycles; DPRINTF(Fetch, "[tid:%i] Fetch is waiting for a pending trap!\n", tid); } else if (fetchStatus[tid] == QuiescePending) { - ++fetchPendingQuiesceStallCycles; + ++fetchStats.pendingQuiesceStallCycles; DPRINTF(Fetch, "[tid:%i] Fetch is waiting for a pending quiesce " "instruction!\n", tid); } else if (fetchStatus[tid] == IcacheWaitRetry) { - ++fetchIcacheWaitRetryStallCycles; + ++fetchStats.icacheWaitRetryStallCycles; DPRINTF(Fetch, "[tid:%i] Fetch is waiting for an I-cache retry!\n", tid); } else if (fetchStatus[tid] == NoGoodAddr) { -- 2.30.2