From 91178600947e174041f46f54e4241cedd01bbb34 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sat, 21 Jul 2007 13:45:17 -0700 Subject: [PATCH] Several more fixes for multi-level timing coherence. - Add "deferred snoop" flag to Packet so upper-level caches can distinguish whether lower-level cache request was in-service or not at the time of the original snoop. - Revamp response handling to properly handle deferred snoops on non-cache-fill requests (i.e. upgrades). - Make sure forwarded writebacks are kept in write buffer at lower-level caches so they get snooped properly. --HG-- extra : convert_revision : 17f8a3772a1ae31a16991a53f8225ddf54d31fc9 --- src/mem/cache/base_cache.hh | 26 ++--- src/mem/cache/cache_impl.hh | 195 ++++++++++++++++++------------------ src/mem/cache/miss/mshr.cc | 18 ++-- src/mem/cache/miss/mshr.hh | 2 +- src/mem/packet.hh | 3 + 5 files changed, 122 insertions(+), 122 deletions(-) diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index 46414974b..719ab0245 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -410,28 +410,28 @@ class BaseCache : public MemObject MSHR *allocateMissBuffer(PacketPtr pkt, Tick time, bool requestBus) { + assert(!pkt->req->isUncacheable()); return allocateBufferInternal(&mshrQueue, blockAlign(pkt->getAddr()), blkSize, pkt, time, requestBus); } - MSHR *allocateBuffer(PacketPtr pkt, Tick time, bool requestBus) + MSHR *allocateWriteBuffer(PacketPtr pkt, Tick time, bool requestBus) { - MSHRQueue *mq = NULL; - - if (pkt->isWrite() && !pkt->isRead()) { - /** - * @todo Add write merging here. - */ - mq = &writeBuffer; - } else { - mq = &mshrQueue; - } - - return allocateBufferInternal(mq, pkt->getAddr(), pkt->getSize(), + assert(pkt->isWrite() && !pkt->isRead()); + return allocateBufferInternal(&writeBuffer, + pkt->getAddr(), pkt->getSize(), pkt, time, requestBus); } + MSHR *allocateUncachedReadBuffer(PacketPtr pkt, Tick time, bool requestBus) + { + assert(pkt->req->isUncacheable()); + assert(pkt->isRead()); + return allocateBufferInternal(&mshrQueue, + pkt->getAddr(), pkt->getSize(), + pkt, time, requestBus); + } /** * Returns true if the cache is blocked for accesses. diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index c069d8ba9..b78360d4a 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -369,7 +369,12 @@ Cache::timingAccess(PacketPtr pkt) } if (pkt->req->isUncacheable()) { - allocateBuffer(pkt, time, true); + // writes go in write buffer, reads use MSHR + if (pkt->isWrite() && !pkt->isRead()) { + allocateWriteBuffer(pkt, time, true); + } else { + allocateUncachedReadBuffer(pkt, time, true); + } assert(pkt->needsResponse()); // else we should delete it here?? return true; } @@ -417,7 +422,7 @@ Cache::timingAccess(PacketPtr pkt) // copy writebacks to write buffer while (!writebacks.empty()) { PacketPtr wbPkt = writebacks.front(); - allocateBuffer(wbPkt, time, true); + allocateWriteBuffer(wbPkt, time, true); writebacks.pop_front(); } #endif @@ -458,7 +463,11 @@ Cache::timingAccess(PacketPtr pkt) // always mark as cache fill for now... if we implement // no-write-allocate or bypass accesses this will have to // be changed. - allocateMissBuffer(pkt, time, true); + if (pkt->cmd == MemCmd::Writeback) { + allocateWriteBuffer(pkt, time, true); + } else { + allocateMissBuffer(pkt, time, true); + } } } @@ -492,6 +501,10 @@ Cache::getBusPacket(PacketPtr cpu_pkt, BlkType *blk, assert(cpu_pkt->needsResponse()); MemCmd cmd; + // @TODO make useUpgrades a parameter. + // Note that ownership protocols require upgrade, otherwise a + // write miss on a shared owned block will generate a ReadExcl, + // which will clobber the owned copy. const bool useUpgrades = true; if (blkValid && useUpgrades) { // only reason to be here is that blk is shared @@ -648,62 +661,6 @@ Cache::functionalAccess(PacketPtr pkt, ///////////////////////////////////////////////////// -template -bool -Cache::satisfyMSHR(MSHR *mshr, PacketPtr pkt, - BlkType *blk) -{ - // respond to MSHR targets, if any - - // First offset for critical word first calculations - int initial_offset = 0; - - if (mshr->hasTargets()) { - initial_offset = mshr->getTarget()->pkt->getOffset(blkSize); - } - - while (mshr->hasTargets()) { - MSHR::Target *target = mshr->getTarget(); - - if (target->isCpuSide()) { - satisfyCpuSideRequest(target->pkt, blk); - // How many bytes pass the first request is this one - int transfer_offset = - target->pkt->getOffset(blkSize) - initial_offset; - if (transfer_offset < 0) { - transfer_offset += blkSize; - } - - // If critical word (no offset) return first word time - Tick completion_time = tags->getHitLatency() + - transfer_offset ? pkt->finishTime : pkt->firstWordTime; - - if (!target->pkt->req->isUncacheable()) { - missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += - completion_time - target->recvTime; - } - target->pkt->makeTimingResponse(); - cpuSidePort->respond(target->pkt, completion_time); - } else { - // response to snoop request - DPRINTF(Cache, "processing deferred snoop...\n"); - handleSnoop(target->pkt, blk, true, true); - } - - mshr->popTarget(); - } - - if (mshr->promoteDeferredTargets()) { - MSHRQueue *mq = mshr->queue; - mq->markPending(mshr); - requestMemSideBus((RequestCause)mq->index, pkt->finishTime); - return false; - } - - return true; -} - - template void Cache::handleResponse(PacketPtr pkt) @@ -730,68 +687,105 @@ Cache::handleResponse(PacketPtr pkt) noTargetMSHR = NULL; } - // Can we deallocate MSHR when done? - bool deallocate = false; - // Initial target is used just for stats MSHR::Target *initial_tgt = mshr->getTarget(); + BlkType *blk = tags->findBlock(pkt->getAddr()); int stats_cmd_idx = initial_tgt->pkt->cmdToIndex(); Tick miss_latency = curTick - initial_tgt->recvTime; + PacketList writebacks; - if (mshr->isCacheFill) { + if (pkt->req->isUncacheable()) { + mshr_uncacheable_lat[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] += + miss_latency; + } else { mshr_miss_latency[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] += miss_latency; + } + + if (mshr->isCacheFill) { DPRINTF(Cache, "Block for addr %x being updated in Cache\n", pkt->getAddr()); - BlkType *blk = tags->findBlock(pkt->getAddr()); // give mshr a chance to do some dirty work mshr->handleFill(pkt, blk); - PacketList writebacks; blk = handleFill(pkt, blk, writebacks); - deallocate = satisfyMSHR(mshr, pkt, blk); - // copy writebacks to write buffer - while (!writebacks.empty()) { - PacketPtr wbPkt = writebacks.front(); - allocateBuffer(wbPkt, time, true); - writebacks.pop_front(); - } - // if we used temp block, clear it out - if (blk == tempBlock) { - if (blk->isDirty()) { - allocateBuffer(writebackBlk(blk), time, true); - } - tags->invalidateBlk(blk); - } - } else { - if (pkt->req->isUncacheable()) { - mshr_uncacheable_lat[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] += - miss_latency; - } + assert(blk != NULL); + } - while (mshr->hasTargets()) { - MSHR::Target *target = mshr->getTarget(); - assert(target->isCpuSide()); - mshr->popTarget(); - if (pkt->isRead()) { - target->pkt->setData(pkt->getPtr()); + // First offset for critical word first calculations + int initial_offset = 0; + + if (mshr->hasTargets()) { + initial_offset = mshr->getTarget()->pkt->getOffset(blkSize); + } + + while (mshr->hasTargets()) { + MSHR::Target *target = mshr->getTarget(); + + if (target->isCpuSide()) { + Tick completion_time; + if (blk != NULL) { + satisfyCpuSideRequest(target->pkt, blk); + // How many bytes pass the first request is this one + int transfer_offset = + target->pkt->getOffset(blkSize) - initial_offset; + if (transfer_offset < 0) { + transfer_offset += blkSize; + } + + // If critical word (no offset) return first word time + completion_time = tags->getHitLatency() + + transfer_offset ? pkt->finishTime : pkt->firstWordTime; + + if (!target->pkt->req->isUncacheable()) { + missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += + completion_time - target->recvTime; + } + } else { + // not a cache fill, just forwarding response + completion_time = tags->getHitLatency() + pkt->finishTime; + if (pkt->isRead()) { + target->pkt->setData(pkt->getPtr()); + } } target->pkt->makeTimingResponse(); - cpuSidePort->respond(target->pkt, time); + cpuSidePort->respond(target->pkt, completion_time); + } else { + // response to snoop request + DPRINTF(Cache, "processing deferred snoop...\n"); + handleSnoop(target->pkt, blk, true, true); } - assert(!mshr->hasTargets()); - deallocate = true; - } - delete pkt; + mshr->popTarget(); + } - if (deallocate) { + if (mshr->promoteDeferredTargets()) { + MSHRQueue *mq = mshr->queue; + mq->markPending(mshr); + requestMemSideBus((RequestCause)mq->index, pkt->finishTime); + } else { mq->deallocate(mshr); if (wasFull && !mq->isFull()) { clearBlocked((BlockedCause)mq->index); } } + + // copy writebacks to write buffer + while (!writebacks.empty()) { + PacketPtr wbPkt = writebacks.front(); + allocateWriteBuffer(wbPkt, time, true); + writebacks.pop_front(); + } + // if we used temp block, clear it out + if (blk == tempBlock) { + if (blk->isDirty()) { + allocateWriteBuffer(writebackBlk(blk), time, true); + } + tags->invalidateBlk(blk); + } + + delete pkt; } @@ -933,6 +927,9 @@ Cache::handleSnoop(PacketPtr pkt, BlkType *blk, if (is_timing) { Packet *snoopPkt = new Packet(pkt, true); // clear flags snoopPkt->setExpressSnoop(); + if (is_deferred) { + snoopPkt->setDeferredSnoop(); + } snoopPkt->senderState = new ForwardResponseRecord(pkt, this); cpuSidePort->sendTiming(snoopPkt); if (snoopPkt->memInhibitAsserted()) { @@ -1020,12 +1017,11 @@ Cache::snoopTiming(PacketPtr pkt) MSHR *mshr = mshrQueue.findMatch(blk_addr); // better not be snooping a request that conflicts with something // we have outstanding... - if (mshr && mshr->inService) { + if (mshr && mshr->handleSnoop(pkt, order++)) { DPRINTF(Cache, "Deferring snoop on in-service MSHR to blk %x\n", blk_addr); - mshr->allocateSnoopTarget(pkt, curTick, order++); if (mshr->getNumTargets() > numTarget) - warn("allocating bonus target for snoop"); //handle later + warn("allocating bonus target for snoop"); //handle later return; } @@ -1226,6 +1222,7 @@ template bool Cache::CpuSidePort::recvTiming(PacketPtr pkt) { + // illegal to block responses... can lead to deadlock if (pkt->isRequest() && blocked) { DPRINTF(Cache,"Scheduling a retry while blocked\n"); mustSendRetry = true; diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index 5d5e63f90..7ba3789fe 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -119,25 +119,23 @@ MSHR::allocateTarget(PacketPtr target, Tick whenReady, Counter _order) ++ntargets; } -void -MSHR::allocateSnoopTarget(PacketPtr pkt, Tick whenReady, Counter _order) +bool +MSHR::handleSnoop(PacketPtr pkt, Counter _order) { - assert(inService); // don't bother to call otherwise + if (!inService || (pkt->isExpressSnoop() && !pkt->isDeferredSnoop())) { + return false; + } if (pendingInvalidate) { // a prior snoop has already appended an invalidation, so // logically we don't have the block anymore... - return; + return true; } - DPRINTF(Cache, "deferred snoop on %x: %s %s\n", addr, - needsExclusive ? "needsExclusive" : "", - pkt->needsExclusive() ? "pkt->needsExclusive()" : ""); - if (needsExclusive || pkt->needsExclusive()) { // actual target device (typ. PhysicalMemory) will delete the // packet on reception, so we need to save a copy here - targets.push_back(Target(new Packet(pkt), whenReady, _order, false)); + targets.push_back(Target(new Packet(pkt), curTick, _order, false)); ++ntargets; if (needsExclusive) { @@ -157,6 +155,8 @@ MSHR::allocateSnoopTarget(PacketPtr pkt, Tick whenReady, Counter _order) pendingShared = true; pkt->assertShared(); } + + return true; } diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh index a27f465aa..9c6a8cf33 100644 --- a/src/mem/cache/miss/mshr.hh +++ b/src/mem/cache/miss/mshr.hh @@ -162,7 +162,7 @@ public: * @param target The target. */ void allocateTarget(PacketPtr target, Tick when, Counter order); - void allocateSnoopTarget(PacketPtr target, Tick when, Counter order); + bool handleSnoop(PacketPtr target, Counter order); /** A simple constructor. */ MSHR(); diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 036bd3fd7..8063c7ae7 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -257,6 +257,7 @@ class Packet : public FastAlloc Shared, // Special control flags ExpressSnoop, + DeferredSnoop, NUM_PACKET_FLAGS }; @@ -322,6 +323,8 @@ class Packet : public FastAlloc // Special control flags void setExpressSnoop() { flags[ExpressSnoop] = true; } bool isExpressSnoop() { return flags[ExpressSnoop]; } + void setDeferredSnoop() { flags[DeferredSnoop] = true; } + bool isDeferredSnoop() { return flags[DeferredSnoop]; } // Network error conditions... encapsulate them as methods since // their encoding keeps changing (from result field to command -- 2.30.2