From 6babda7123be5e69db137e77589d88c768c19345 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sat, 30 Jun 2007 13:34:16 -0700 Subject: [PATCH] Fix up a few statistics problems. Stats pretty much line up with old code, except: - bug in old code included L1 latency in L2 miss time, making it too high - UniCoherence did cache-to-cache transfers even from non-owner caches, so occasionally the icache would get a block from the dcache not the L2 - L2 can now receive ReadExReq from L1 since L1s have coherence --HG-- extra : convert_revision : 5052c1a1767b5a662f30a88f16012165a73b791c --- src/mem/cache/base_cache.cc | 54 +++++++++++++++++--------------- src/mem/cache/base_cache.hh | 6 ++-- src/mem/cache/cache_impl.hh | 21 +++++++------ src/mem/cache/miss/mshr.cc | 20 ++++++------ src/mem/cache/miss/mshr.hh | 10 +++--- src/mem/cache/miss/mshr_queue.cc | 4 +-- src/mem/cache/miss/mshr_queue.hh | 6 ++-- src/mem/tport.hh | 2 +- 8 files changed, 65 insertions(+), 58 deletions(-) diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index 870658675..ec9e1cf9b 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -150,20 +150,29 @@ BaseCache::regStats() ; } +// These macros make it easier to sum the right subset of commands and +// to change the subset of commands that are considered "demand" vs +// "non-demand" +#define SUM_DEMAND(s) \ + (s[MemCmd::ReadReq] + s[MemCmd::WriteReq] + s[MemCmd::ReadExReq]) + +// should writebacks be included here? prior code was inconsistent... +#define SUM_NON_DEMAND(s) \ + (s[MemCmd::SoftPFReq] + s[MemCmd::HardPFReq]) + demandHits .name(name() + ".demand_hits") .desc("number of demand (read+write) hits") .flags(total) ; - demandHits = hits[MemCmd::ReadReq] + hits[MemCmd::WriteReq]; + demandHits = SUM_DEMAND(hits); overallHits .name(name() + ".overall_hits") .desc("number of overall hits") .flags(total) ; - overallHits = demandHits + hits[MemCmd::SoftPFReq] + hits[MemCmd::HardPFReq] - + hits[MemCmd::Writeback]; + overallHits = demandHits + SUM_NON_DEMAND(hits); // Miss statistics for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { @@ -183,15 +192,14 @@ BaseCache::regStats() .desc("number of demand (read+write) misses") .flags(total) ; - demandMisses = misses[MemCmd::ReadReq] + misses[MemCmd::WriteReq]; + demandMisses = SUM_DEMAND(misses); overallMisses .name(name() + ".overall_misses") .desc("number of overall misses") .flags(total) ; - overallMisses = demandMisses + misses[MemCmd::SoftPFReq] + - misses[MemCmd::HardPFReq] + misses[MemCmd::Writeback]; + overallMisses = demandMisses + SUM_NON_DEMAND(misses); // Miss latency statistics for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { @@ -211,15 +219,14 @@ BaseCache::regStats() .desc("number of demand (read+write) miss cycles") .flags(total) ; - demandMissLatency = missLatency[MemCmd::ReadReq] + missLatency[MemCmd::WriteReq]; + demandMissLatency = SUM_DEMAND(missLatency); overallMissLatency .name(name() + ".overall_miss_latency") .desc("number of overall miss cycles") .flags(total) ; - overallMissLatency = demandMissLatency + missLatency[MemCmd::SoftPFReq] + - missLatency[MemCmd::HardPFReq]; + overallMissLatency = demandMissLatency + SUM_NON_DEMAND(missLatency); // access formulas for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { @@ -368,15 +375,14 @@ BaseCache::regStats() .desc("number of demand (read+write) MSHR hits") .flags(total) ; - demandMshrHits = mshr_hits[MemCmd::ReadReq] + mshr_hits[MemCmd::WriteReq]; + demandMshrHits = SUM_DEMAND(mshr_hits); overallMshrHits .name(name() + ".overall_mshr_hits") .desc("number of overall MSHR hits") .flags(total) ; - overallMshrHits = demandMshrHits + mshr_hits[MemCmd::SoftPFReq] + - mshr_hits[MemCmd::HardPFReq]; + overallMshrHits = demandMshrHits + SUM_NON_DEMAND(mshr_hits); // MSHR miss statistics for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { @@ -396,15 +402,14 @@ BaseCache::regStats() .desc("number of demand (read+write) MSHR misses") .flags(total) ; - demandMshrMisses = mshr_misses[MemCmd::ReadReq] + mshr_misses[MemCmd::WriteReq]; + demandMshrMisses = SUM_DEMAND(mshr_misses); overallMshrMisses .name(name() + ".overall_mshr_misses") .desc("number of overall MSHR misses") .flags(total) ; - overallMshrMisses = demandMshrMisses + mshr_misses[MemCmd::SoftPFReq] + - mshr_misses[MemCmd::HardPFReq]; + overallMshrMisses = demandMshrMisses + SUM_NON_DEMAND(mshr_misses); // MSHR miss latency statistics for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { @@ -424,16 +429,15 @@ BaseCache::regStats() .desc("number of demand (read+write) MSHR miss cycles") .flags(total) ; - demandMshrMissLatency = mshr_miss_latency[MemCmd::ReadReq] - + mshr_miss_latency[MemCmd::WriteReq]; + demandMshrMissLatency = SUM_DEMAND(mshr_miss_latency); overallMshrMissLatency .name(name() + ".overall_mshr_miss_latency") .desc("number of overall MSHR miss cycles") .flags(total) ; - overallMshrMissLatency = demandMshrMissLatency + - mshr_miss_latency[MemCmd::SoftPFReq] + mshr_miss_latency[MemCmd::HardPFReq]; + overallMshrMissLatency = + demandMshrMissLatency + SUM_NON_DEMAND(mshr_miss_latency); // MSHR uncacheable statistics for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { @@ -453,9 +457,8 @@ BaseCache::regStats() .desc("number of overall MSHR uncacheable misses") .flags(total) ; - overallMshrUncacheable = mshr_uncacheable[MemCmd::ReadReq] - + mshr_uncacheable[MemCmd::WriteReq] + mshr_uncacheable[MemCmd::SoftPFReq] - + mshr_uncacheable[MemCmd::HardPFReq]; + overallMshrUncacheable = + SUM_DEMAND(mshr_uncacheable) + SUM_NON_DEMAND(mshr_uncacheable); // MSHR miss latency statistics for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { @@ -475,10 +478,9 @@ BaseCache::regStats() .desc("number of overall MSHR uncacheable cycles") .flags(total) ; - overallMshrUncacheableLatency = mshr_uncacheable_lat[MemCmd::ReadReq] - + mshr_uncacheable_lat[MemCmd::WriteReq] - + mshr_uncacheable_lat[MemCmd::SoftPFReq] - + mshr_uncacheable_lat[MemCmd::HardPFReq]; + overallMshrUncacheableLatency = + SUM_DEMAND(mshr_uncacheable_lat) + + SUM_NON_DEMAND(mshr_uncacheable_lat); #if 0 // MSHR access formulas diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index 09484a14a..fcc040bd9 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -476,10 +476,10 @@ class BaseCache : public MemObject } } - Tick nextMSHRReadyTick() + Tick nextMSHRReadyTime() { - return std::min(mshrQueue.nextMSHRReadyTick(), - writeBuffer.nextMSHRReadyTick()); + return std::min(mshrQueue.nextMSHRReadyTime(), + writeBuffer.nextMSHRReadyTime()); } /** diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 1823ea6b9..568e7ff63 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -615,7 +615,7 @@ Cache::satisfyMSHR(MSHR *mshr, PacketPtr pkt, if (!target->pkt->req->isUncacheable()) { missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += - completion_time - target->time; + completion_time - target->recvTime; } target->pkt->makeTimingResponse(); cpuSidePort->respond(target->pkt, completion_time); @@ -668,11 +668,14 @@ Cache::handleResponse(PacketPtr pkt) // Can we deallocate MSHR when done? bool deallocate = false; + // Initial target is used just for stats + MSHR::Target *initial_tgt = mshr->getTarget(); + int stats_cmd_idx = initial_tgt->pkt->cmdToIndex(); + Tick miss_latency = curTick - initial_tgt->recvTime; + if (mshr->isCacheFill) { -#if 0 - mshr_miss_latency[mshr->originalCmd.toInt()][0/*pkt->req->getThreadNum()*/] += - curTick - pkt->time; -#endif + mshr_miss_latency[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] += + miss_latency; DPRINTF(Cache, "Block for addr %x being updated in Cache\n", pkt->getAddr()); BlkType *blk = tags->findBlock(pkt->getAddr()); @@ -698,8 +701,8 @@ Cache::handleResponse(PacketPtr pkt) } } else { if (pkt->req->isUncacheable()) { - mshr_uncacheable_lat[pkt->cmd.toInt()][0/*pkt->req->getThreadNum()*/] += - curTick - pkt->time; + mshr_uncacheable_lat[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] += + miss_latency; } while (mshr->hasTargets()) { @@ -1262,8 +1265,8 @@ Cache::MemSidePort::sendPacket() // tried to send packet... if it was successful (no retry), see if // we need to rerequest bus or not if (!waitingOnRetry) { - Tick nextReady = std::min(deferredPacketReadyTick(), - myCache()->nextMSHRReadyTick()); + Tick nextReady = std::min(deferredPacketReadyTime(), + myCache()->nextMSHRReadyTime()); // @TODO: need to facotr in prefetch requests here somehow if (nextReady != MaxTick) { DPRINTF(CachePort, "more packets to send @ %d\n", nextReady); diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index 63b3cacc2..5d5e63f90 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -56,11 +56,11 @@ MSHR::MSHR() void MSHR::allocate(Addr _addr, int _size, PacketPtr target, - Tick when, Counter _order) + Tick whenReady, Counter _order) { addr = _addr; size = _size; - readyTick = when; + readyTime = whenReady; order = _order; assert(target); isCacheFill = false; @@ -71,7 +71,7 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target, ntargets = 1; // Don't know of a case where we would allocate a new MSHR for a // snoop (mem-side request), so set cpuSide to true here. - targets.push_back(Target(target, when, _order, true)); + targets.push_back(Target(target, whenReady, _order, true)); assert(deferredTargets.empty()); deferredNeedsExclusive = false; pendingInvalidate = false; @@ -94,33 +94,33 @@ MSHR::deallocate() * Adds a target to an MSHR */ void -MSHR::allocateTarget(PacketPtr target, Tick when, Counter _order) +MSHR::allocateTarget(PacketPtr target, Tick whenReady, Counter _order) { if (inService) { if (!deferredTargets.empty() || pendingInvalidate || (!needsExclusive && target->needsExclusive())) { // need to put on deferred list - deferredTargets.push_back(Target(target, when, _order, true)); + deferredTargets.push_back(Target(target, whenReady, _order, true)); if (target->needsExclusive()) { deferredNeedsExclusive = true; } } else { // still OK to append to outstanding request - targets.push_back(Target(target, when, _order, true)); + targets.push_back(Target(target, whenReady, _order, true)); } } else { if (target->needsExclusive()) { needsExclusive = true; } - targets.push_back(Target(target, when, _order, true)); + targets.push_back(Target(target, whenReady, _order, true)); } ++ntargets; } void -MSHR::allocateSnoopTarget(PacketPtr pkt, Tick when, Counter _order) +MSHR::allocateSnoopTarget(PacketPtr pkt, Tick whenReady, Counter _order) { assert(inService); // don't bother to call otherwise @@ -137,7 +137,7 @@ MSHR::allocateSnoopTarget(PacketPtr pkt, Tick when, Counter _order) if (needsExclusive || pkt->needsExclusive()) { // actual target device (typ. PhysicalMemory) will delete the // packet on reception, so we need to save a copy here - targets.push_back(Target(new Packet(pkt), when, _order, false)); + targets.push_back(Target(new Packet(pkt), whenReady, _order, false)); ++ntargets; if (needsExclusive) { @@ -177,7 +177,7 @@ MSHR::promoteDeferredTargets() pendingShared = false; deferredNeedsExclusive = false; order = targets.front().order; - readyTick = std::max(curTick, targets.front().time); + readyTime = std::max(curTick, targets.front().readyTime); return true; } diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh index 4db7b1cfe..293f290b8 100644 --- a/src/mem/cache/miss/mshr.hh +++ b/src/mem/cache/miss/mshr.hh @@ -54,15 +54,17 @@ class MSHR : public Packet::SenderState class Target { public: - Tick time; //!< Time when request was received (for stats) + Tick recvTime; //!< Time when request was received (for stats) + Tick readyTime; //!< Time when request is ready to be serviced Counter order; //!< Global order (for memory consistency mgmt) PacketPtr pkt; //!< Pending request packet. bool cpuSide; //!< Did request come from cpu side or mem side? bool isCpuSide() { return cpuSide; } - Target(PacketPtr _pkt, Tick _time, Counter _order, bool _cpuSide) - : time(_time), order(_order), pkt(_pkt), cpuSide(_cpuSide) + Target(PacketPtr _pkt, Tick _readyTime, Counter _order, bool _cpuSide) + : recvTime(curTick), readyTime(_readyTime), order(_order), + pkt(_pkt), cpuSide(_cpuSide) {} }; @@ -81,7 +83,7 @@ class MSHR : public Packet::SenderState MSHRQueue *queue; /** Cycle when ready to issue */ - Tick readyTick; + Tick readyTime; /** Order number assigned by the miss queue. */ Counter order; diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc index 18184bd20..56ec62a7d 100644 --- a/src/mem/cache/miss/mshr_queue.cc +++ b/src/mem/cache/miss/mshr_queue.cc @@ -111,14 +111,14 @@ MSHRQueue::findPending(Addr addr, int size) const MSHR::Iterator MSHRQueue::addToReadyList(MSHR *mshr) { - if (readyList.empty() || readyList.back()->readyTick <= mshr->readyTick) { + if (readyList.empty() || readyList.back()->readyTime <= mshr->readyTime) { return readyList.insert(readyList.end(), mshr); } MSHR::Iterator i = readyList.begin(); MSHR::Iterator end = readyList.end(); for (; i != end; ++i) { - if ((*i)->readyTick > mshr->readyTick) { + if ((*i)->readyTime > mshr->readyTime) { return readyList.insert(i, mshr); } } diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh index fd61dec8b..1f1d59e98 100644 --- a/src/mem/cache/miss/mshr_queue.hh +++ b/src/mem/cache/miss/mshr_queue.hh @@ -193,15 +193,15 @@ class MSHRQueue */ MSHR *getNextMSHR() const { - if (readyList.empty() || readyList.front()->readyTick > curTick) { + if (readyList.empty() || readyList.front()->readyTime > curTick) { return NULL; } return readyList.front(); } - Tick nextMSHRReadyTick() const + Tick nextMSHRReadyTime() const { - return readyList.empty() ? MaxTick : readyList.front()->readyTick; + return readyList.empty() ? MaxTick : readyList.front()->readyTime; } }; diff --git a/src/mem/tport.hh b/src/mem/tport.hh index bfed29f34..bc9da6c44 100644 --- a/src/mem/tport.hh +++ b/src/mem/tport.hh @@ -105,7 +105,7 @@ class SimpleTimingPort : public Port bool deferredPacketReady() { return !transmitList.empty() && transmitList.front().tick <= curTick; } - Tick deferredPacketReadyTick() + Tick deferredPacketReadyTime() { return transmitList.empty() ? MaxTick : transmitList.front().tick; } void schedSendEvent(Tick when) -- 2.30.2