From c7e7b890586ac6cf24f2ea6d8f3db936f0fe6e6d Mon Sep 17 00:00:00 2001 From: Geoffrey Blake Date: Sun, 10 Jul 2011 12:56:08 -0500 Subject: [PATCH] O3: Fix up pipelining icache accesses in fetch stage to function properly Fixed up the patch from Yasuko Watanabe that enabled pipelining of fetch accessess to icache to work with recent changes to main repository. Also added in ability for fetch stage to delay issuing the fault carrying nop when a pipeline fetch causes a fault and no fetch bandwidth is available until the next cycle. --- src/cpu/o3/fetch.hh | 61 +++++++++++++ src/cpu/o3/fetch_impl.hh | 180 ++++++++++++++++++++++++++++++++------- 2 files changed, 209 insertions(+), 32 deletions(-) diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 90fe5334a..7b9be7b67 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -150,6 +150,45 @@ class DefaultFetch } }; + private: + /* Event to delay delivery of a fetch translation result in case of + * a fault and the nop to carry the fault cannot be generated + * immediately */ + class FinishTranslationEvent : public Event + { + private: + DefaultFetch *fetch; + Fault fault; + RequestPtr req; + + public: + FinishTranslationEvent(DefaultFetch *_fetch) + : fetch(_fetch) + {} + + void setFault(Fault _fault) + { + fault = _fault; + } + + void setReq(RequestPtr _req) + { + req = _req; + } + + /** Process the delayed finish translation */ + void process() + { + assert(fetch->numInst < fetch->fetchWidth); + fetch->finishTranslation(fault, req); + } + + const char *description() const + { + return "FullO3CPU FetchFinishTranslation"; + } + }; + public: /** Overall fetch status. Used to determine if the CPU can * deschedule itsef due to a lack of activity. @@ -363,6 +402,12 @@ class DefaultFetch * policy. */ ThreadID branchCount(); + /** Pipeline the next I-cache access to the current one. */ + void pipelineIcacheAccesses(ThreadID tid); + + /** Profile the reasons of fetch stall. */ + void profileStall(ThreadID tid); + private: /** Pointer to the O3CPU. */ O3CPU *cpu; @@ -497,6 +542,12 @@ class DefaultFetch /** Records if fetch is switched out. */ bool switchedOut; + /** Set to true if a pipelined I-cache request should be issued. */ + bool issuePipelinedIfetch[Impl::MaxThreads]; + + /** Event used to delay fault generation of translation faults */ + FinishTranslationEvent finishTranslationEvent; + // @todo: Consider making these vectors and tracking on a per thread basis. /** Stat for total number of cycles stalled due to an icache miss. */ Stats::Scalar icacheStallCycles; @@ -520,6 +571,16 @@ class DefaultFetch Stats::Scalar fetchBlockedCycles; /** Total number of cycles spent in any other state. */ Stats::Scalar fetchMiscStallCycles; + /** Total number of cycles spent in waiting for drains. */ + Stats::Scalar fetchPendingDrainCycles; + /** Total number of stall cycles caused by no active threads to run. */ + Stats::Scalar fetchNoActiveThreadStallCycles; + /** Total number of stall cycles caused by pending traps. */ + Stats::Scalar fetchPendingTrapStallCycles; + /** Total number of stall cycles caused by pending quiesce instructions. */ + Stats::Scalar fetchPendingQuiesceStallCycles; + /** Total number of stall cycles caused by I-cache wait retrys. */ + Stats::Scalar fetchIcacheWaitRetryStallCycles; /** Stat for total number of fetched cache lines. */ Stats::Scalar fetchedCacheLines; /** Total number of outstanding icache accesses that were dropped diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 118f132ca..c58892e84 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -49,6 +49,7 @@ #include "base/types.hh" #include "config/the_isa.hh" #include "config/use_checker.hh" +#include "cpu/base.hh" #include "cpu/checker/cpu.hh" #include "cpu/o3/fetch.hh" #include "cpu/exetrace.hh" @@ -59,6 +60,7 @@ #include "params/DerivO3CPU.hh" #include "sim/byteswap.hh" #include "sim/core.hh" +#include "sim/eventq.hh" #if FULL_SYSTEM #include "arch/tlb.hh" @@ -135,6 +137,7 @@ DefaultFetch::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params) : cpu(_cpu), branchPred(params), predecoder(NULL), + numInst(0), decodeToFetchDelay(params->decodeToFetchDelay), renameToFetchDelay(params->renameToFetchDelay), iewToFetchDelay(params->iewToFetchDelay), @@ -147,7 +150,8 @@ DefaultFetch::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params) numFetchingThreads(params->smtNumFetchingThreads), interruptPending(false), drainPending(false), - switchedOut(false) + switchedOut(false), + finishTranslationEvent(this) { if (numThreads > Impl::MaxThreads) fatal("numThreads (%d) is larger than compiled limit (%d),\n" @@ -268,6 +272,31 @@ DefaultFetch::regStats() "bad addresses, or out of MSHRs") .prereq(fetchMiscStallCycles); + fetchPendingDrainCycles + .name(name() + ".PendingDrainCycles") + .desc("Number of cycles fetch has spent waiting on pipes to drain") + .prereq(fetchPendingDrainCycles); + + fetchNoActiveThreadStallCycles + .name(name() + ".NoActiveThreadStallCycles") + .desc("Number of stall cycles due to no active thread to fetch from") + .prereq(fetchNoActiveThreadStallCycles); + + fetchPendingTrapStallCycles + .name(name() + ".PendingTrapStallCycles") + .desc("Number of stall cycles due to pending traps") + .prereq(fetchPendingTrapStallCycles); + + fetchPendingQuiesceStallCycles + .name(name() + ".PendingQuiesceStallCycles") + .desc("Number of stall cycles due to pending quiesce instructions") + .prereq(fetchPendingQuiesceStallCycles); + + fetchIcacheWaitRetryStallCycles + .name(name() + ".IcacheWaitRetryStallCycles") + .desc("Number of stall cycles due to full MSHR") + .prereq(fetchIcacheWaitRetryStallCycles); + fetchIcacheSquashes .name(name() + ".IcacheSquashes") .desc("Number of outstanding Icache misses that were squashed") @@ -675,8 +704,15 @@ DefaultFetch::finishTranslation(Fault fault, RequestPtr mem_req) fetchStatus[tid] = IcacheWaitResponse; } } else { + if (!(numInst < fetchWidth)) { + assert(!finishTranslationEvent.scheduled()); + finishTranslationEvent.setFault(fault); + finishTranslationEvent.setReq(mem_req); + cpu->schedule(finishTranslationEvent, cpu->nextCycle(curTick() + cpu->ticks(1))); + return; + } DPRINTF(Fetch, "[tid:%i] Got back req with addr %#x but expected %#x\n", - mem_req->getVaddr(), memReq[tid]->getVaddr()); + tid, mem_req->getVaddr(), memReq[tid]->getVaddr()); // Translation faulted, icache request won't be sent. delete mem_req; memReq[tid] = NULL; @@ -851,6 +887,10 @@ DefaultFetch::tick() wroteToTimeBuffer = false; + for (ThreadID i = 0; i < Impl::MaxThreads; ++i) { + issuePipelinedIfetch[i] = false; + } + while (threads != end) { ThreadID tid = *threads++; @@ -862,10 +902,7 @@ DefaultFetch::tick() DPRINTF(Fetch, "Running stage.\n"); - // Reset the number of the instruction we're fetching. - numInst = 0; - -#if FULL_SYSTEM + #if FULL_SYSTEM if (fromCommit->commitInfo[0].interruptPending) { interruptPending = true; } @@ -895,6 +932,16 @@ DefaultFetch::tick() cpu->activityThisCycle(); } + + // Issue the next I-cache request if possible. + for (ThreadID i = 0; i < Impl::MaxThreads; ++i) { + if (issuePipelinedIfetch[i]) { + pipelineIcacheAccesses(i); + } + } + + // Reset the number of the instruction we've fetched. + numInst = 0; } template @@ -1099,10 +1146,13 @@ DefaultFetch::fetch(bool &status_change) ThreadID tid = getFetchingThread(fetchPolicy); if (tid == InvalidThreadID || drainPending) { - DPRINTF(Fetch,"There are no more threads available to fetch from.\n"); - // Breaks looping condition in tick() threadFetched = numFetchingThreads; + + if (numThreads == 1) { // @todo Per-thread stats + profileStall(0); + } + return; } @@ -1157,32 +1207,9 @@ DefaultFetch::fetch(bool &status_change) if (fetchStatus[tid] == Idle) { ++fetchIdleCycles; DPRINTF(Fetch, "[tid:%i]: Fetch is idle!\n", tid); - } else if (fetchStatus[tid] == Blocked) { - ++fetchBlockedCycles; - DPRINTF(Fetch, "[tid:%i]: Fetch is blocked!\n", tid); - } else if (fetchStatus[tid] == Squashing) { - ++fetchSquashCycles; - DPRINTF(Fetch, "[tid:%i]: Fetch is squashing!\n", tid); - } else if (fetchStatus[tid] == IcacheWaitResponse) { - ++icacheStallCycles; - DPRINTF(Fetch, "[tid:%i]: Fetch is waiting cache response!\n", - tid); - } else if (fetchStatus[tid] == ItlbWait) { - DPRINTF(Fetch, "[tid:%i]: Fetch is waiting ITLB walk to " - "finish! \n", tid); - ++fetchTlbCycles; - } else if (fetchStatus[tid] == TrapPending) { - DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending trap\n", - tid); - } else if (fetchStatus[tid] == NoGoodAddr) { - DPRINTF(Fetch, "[tid:%i]: Fetch predicted non-executable address\n", - tid); } - - - // Status is Idle, Squashing, Blocked, ItlbWait or IcacheWaitResponse - // so fetch should do nothing. + // Status is Idle, so fetch should do nothing. return; } @@ -1329,6 +1356,17 @@ DefaultFetch::fetch(bool &status_change) } pc[tid] = thisPC; + + // pipeline a fetch if we're crossing a cache boundary and not in + // a state that would preclude fetching + fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; + Addr block_PC = icacheBlockAlignPC(fetchAddr); + issuePipelinedIfetch[tid] = block_PC != cacheDataPC[tid] && + fetchStatus[tid] != IcacheWaitResponse && + fetchStatus[tid] != ItlbWait && + fetchStatus[tid] != IcacheWaitRetry && + fetchStatus[tid] != QuiescePending && + !curMacroop; } template @@ -1511,3 +1549,81 @@ DefaultFetch::branchCount() panic("Branch Count Fetch policy unimplemented\n"); return InvalidThreadID; } + +template +void +DefaultFetch::pipelineIcacheAccesses(ThreadID tid) +{ + if (!issuePipelinedIfetch[tid]) { + return; + } + + // The next PC to access. + TheISA::PCState thisPC = pc[tid]; + + if (isRomMicroPC(thisPC.microPC())) { + return; + } + + Addr pcOffset = fetchOffset[tid]; + Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; + + // Align the fetch PC so its at the start of a cache block. + Addr block_PC = icacheBlockAlignPC(fetchAddr); + + // Unless buffer already got the block, fetch it from icache. + if (!(cacheDataValid[tid] && block_PC == cacheDataPC[tid])) { + DPRINTF(Fetch, "[tid:%i]: Issuing a pipelined I-cache access, " + "starting at PC %s.\n", tid, thisPC); + + fetchCacheLine(fetchAddr, tid, thisPC.instAddr()); + } +} + +template +void +DefaultFetch::profileStall(ThreadID tid) { + DPRINTF(Fetch,"There are no more threads available to fetch from.\n"); + + // @todo Per-thread stats + + if (drainPending) { + ++fetchPendingDrainCycles; + DPRINTF(Fetch, "Fetch is waiting for a drain!\n"); + } else if (activeThreads->empty()) { + ++fetchNoActiveThreadStallCycles; + DPRINTF(Fetch, "Fetch has no active thread!\n"); + } else if (fetchStatus[tid] == Blocked) { + ++fetchBlockedCycles; + DPRINTF(Fetch, "[tid:%i]: Fetch is blocked!\n", tid); + } else if (fetchStatus[tid] == Squashing) { + ++fetchSquashCycles; + DPRINTF(Fetch, "[tid:%i]: Fetch is squashing!\n", tid); + } else if (fetchStatus[tid] == IcacheWaitResponse) { + ++icacheStallCycles; + DPRINTF(Fetch, "[tid:%i]: Fetch is waiting cache response!\n", + tid); + } else if (fetchStatus[tid] == ItlbWait) { + ++fetchTlbCycles; + DPRINTF(Fetch, "[tid:%i]: Fetch is waiting ITLB walk to " + "finish!\n", tid); + } else if (fetchStatus[tid] == TrapPending) { + ++fetchPendingTrapStallCycles; + DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending trap!\n", + tid); + } else if (fetchStatus[tid] == QuiescePending) { + ++fetchPendingQuiesceStallCycles; + DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending quiesce " + "instruction!\n", tid); + } else if (fetchStatus[tid] == IcacheWaitRetry) { + ++fetchIcacheWaitRetryStallCycles; + DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for an I-cache retry!\n", + tid); + } else if (fetchStatus[tid] == NoGoodAddr) { + DPRINTF(Fetch, "[tid:%i]: Fetch predicted non-executable address\n", + tid); + } else { + DPRINTF(Fetch, "[tid:%i]: Unexpected fetch stall reason (Status: %i).\n", + tid, fetchStatus[tid]); + } +} -- 2.30.2