mem: Add clean evicts to improve snoop filter tracking
authorAli Jafri <ali.jafri@arm.com>
Fri, 3 Jul 2015 14:14:37 +0000 (10:14 -0400)
committerAli Jafri <ali.jafri@arm.com>
Fri, 3 Jul 2015 14:14:37 +0000 (10:14 -0400)
This patch adds eviction notices to the caches, to provide accurate
tracking of cache blocks in snoop filters. We add the CleanEvict
message to the memory heirarchy and use both CleanEvicts and
Writebacks with BLOCK_CACHED flags to propagate notice of clean and
dirty evictions respectively, down the memory hierarchy. Note that the
BLOCK_CACHED flag indicates whether there exist any copies of the
evicted block in the caches above the evicting cache.

The purpose of the CleanEvict message is to notify snoop filters of
silent evictions in the relevant caches. The CleanEvict message
behaves much like a Writeback. CleanEvict is a write and a request but
unlike a Writeback, CleanEvict does not have data and does not need
exclusive access to the block. The cache generates the CleanEvict
message on a fill resulting in eviction of a clean block. Before
travelling downwards CleanEvict requests generate zero-time snoop
requests to check if the same block is cached in upper levels of the
memory heirarchy. If the block exists, the cache discards the
CleanEvict message. The snoops check the tags, writeback queue and the
MSHRs of upper level caches in a manner similar to snoops generated
from HardPFReqs. Currently CleanEvicts keep travelling towards main
memory unless they encounter the block corresponding to their address
or reach main memory (since we have no well defined point of
serialisation). Main memory simply discards CleanEvict messages.

We have modified the behavior of Writebacks, such that they generate
snoops to check for the presence of blocks in upper level caches. It
is possible in our current implmentation for a lower level cache to be
writing back a block while a shared copy of the same block exists in
the upper level cache. If the snoops find the same block in upper
level caches, we set the BLOCK_CACHED flag in the Writeback message.

We have also added logic to account for interaction of other message
types with CleanEvicts waiting in the writeback queue. A simple
example is of a response arriving at a cache removing any CleanEvicts
to the same address from the cache's writeback queue.

src/mem/abstract_mem.cc
src/mem/cache/cache.hh
src/mem/cache/cache_impl.hh
src/mem/cache/prefetch/base.cc
src/mem/coherent_xbar.cc
src/mem/coherent_xbar.hh
src/mem/dram_ctrl.cc
src/mem/packet.cc
src/mem/packet.hh
src/mem/snoop_filter.cc

index ec1be04e1161e70bc4f1fc8e242d35b5ff4a3094..4690a5d8091fb2f5e42808e7fcf6424b35356496 100644 (file)
@@ -322,15 +322,21 @@ AbstractMemory::checkLockedAddrList(PacketPtr pkt)
 void
 AbstractMemory::access(PacketPtr pkt)
 {
-    assert(AddrRange(pkt->getAddr(),
-                     pkt->getAddr() + pkt->getSize() - 1).isSubset(range));
-
     if (pkt->memInhibitAsserted()) {
         DPRINTF(MemoryAccess, "mem inhibited on 0x%x: not responding\n",
                 pkt->getAddr());
         return;
     }
 
+    if (pkt->cmd == MemCmd::CleanEvict) {
+        DPRINTF(MemoryAccess, "CleanEvict  on 0x%x: not responding\n",
+                pkt->getAddr());
+      return;
+    }
+
+    assert(AddrRange(pkt->getAddr(),
+                     pkt->getAddr() + (pkt->getSize() - 1)).isSubset(range));
+
     uint8_t *hostAddr = pmemAddr + pkt->getAddr() - range.start();
 
     if (pkt->cmd == MemCmd::SwapReq) {
index 24a067eced2475edeb8a2e5a2c6f2edfea14b3dc..4c70d3a4040436812c05af586b602663249d0e87 100644 (file)
@@ -245,6 +245,11 @@ class Cache : public BaseCache
      */
     bool recvTimingReq(PacketPtr pkt);
 
+    /**
+     * Insert writebacks into the write buffer
+     */
+    void doWritebacks(PacketList& writebacks, Tick forward_time);
+
     /**
      * Handles a response (cache line fill/write ack) from the bus.
      * @param pkt The response packet
@@ -308,6 +313,13 @@ class Cache : public BaseCache
      */
     PacketPtr writebackBlk(CacheBlk *blk);
 
+    /**
+     * Create a CleanEvict request for the given block.
+     * @param blk The block to evict.
+     * @return The CleanEvict request for the block.
+     */
+    PacketPtr cleanEvictBlk(CacheBlk *blk);
+
 
     void memWriteback();
     void memInvalidate();
@@ -358,6 +370,12 @@ class Cache : public BaseCache
      */
     MSHR *getNextMSHR();
 
+    /**
+     * Send up a snoop request and find cached copies. If cached copies are
+     * found, set the BLOCK_CACHED flag in pkt.
+     */
+    bool isCachedAbove(const PacketPtr pkt) const;
+
     /**
      * Selects an outstanding request to service.  Called when the
      * cache gets granted the downstream bus in timing mode.
index 9c5070ffa009eeccd67aad488dda4c09e96fe312..117596d9b4f898972cea77f3ceca26ce426fccec 100644 (file)
@@ -334,6 +334,36 @@ Cache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
             pkt->getAddr(), pkt->getSize(), pkt->isSecure() ? "s" : "ns",
             blk ? "hit " + blk->print() : "miss");
 
+
+    if (pkt->evictingBlock()) {
+        // We check for presence of block in above caches before issuing
+        // Writeback or CleanEvict to write buffer. Therefore the only
+        // possible cases can be of a CleanEvict packet coming from above
+        // encountering a Writeback generated in this cache peer cache and
+        // waiting in the write buffer. Cases of upper level peer caches
+        // generating CleanEvict and Writeback or simply CleanEvict and
+        // CleanEvict almost simultaneously will be caught by snoops sent out
+        // by crossbar.
+        std::vector<MSHR *> outgoing;
+        if (writeBuffer.findMatches(pkt->getAddr(), pkt->isSecure(),
+                                   outgoing)) {
+            assert(outgoing.size() == 1);
+            PacketPtr wbPkt = outgoing[0]->getTarget()->pkt;
+            assert(pkt->cmd == MemCmd::CleanEvict &&
+                   wbPkt->cmd == MemCmd::Writeback);
+            // As the CleanEvict is coming from above, it would have snooped
+            // into other peer caches of the same level while traversing the
+            // crossbar. If a copy of the block had been found, the CleanEvict
+            // would have been deleted in the crossbar. Now that the
+            // CleanEvict is here we can be sure none of the other upper level
+            // caches connected to this cache have the block, so we can clear
+            // the BLOCK_CACHED flag in the Writeback if set and discard the
+            // CleanEvict by returning true.
+            wbPkt->clearBlockCached();
+            return true;
+        }
+    }
+
     // Writeback handling is special case.  We can write the block into
     // the cache without having a writeable copy (or any copy at all).
     if (pkt->cmd == MemCmd::Writeback) {
@@ -363,6 +393,19 @@ Cache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
         DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print());
         incHitCount(pkt);
         return true;
+    } else if (pkt->cmd == MemCmd::CleanEvict) {
+        if (blk != NULL) {
+            // Found the block in the tags, need to stop CleanEvict from
+            // propagating further down the hierarchy. Returning true will
+            // treat the CleanEvict like a satisfied write request and delete
+            // it.
+            return true;
+        }
+        // We didn't find the block here, propagate the CleanEvict further
+        // down the memory hierarchy. Returning false will treat the CleanEvict
+        // like a Writeback which could not find a replaceable block so has to
+        // go to next level.
+        return false;
     } else if ((blk != NULL) &&
                (pkt->needsExclusive() ? blk->isWritable()
                                       : blk->isReadable())) {
@@ -394,6 +437,41 @@ class ForwardResponseRecord : public Packet::SenderState
     ForwardResponseRecord() {}
 };
 
+void
+Cache::doWritebacks(PacketList& writebacks, Tick forward_time)
+{
+    while (!writebacks.empty()) {
+        PacketPtr wbPkt = writebacks.front();
+        // We use forwardLatency here because we are copying writebacks to
+        // write buffer.  Call isCachedAbove for both Writebacks and
+        // CleanEvicts. If isCachedAbove returns true we set BLOCK_CACHED flag
+        // in Writebacks and discard CleanEvicts.
+        if (isCachedAbove(wbPkt)) {
+            if (wbPkt->cmd == MemCmd::CleanEvict) {
+                // Delete CleanEvict because cached copies exist above. The
+                // packet destructor will delete the request object because
+                // this is a non-snoop request packet which does not require a
+                // response.
+                delete wbPkt;
+            } else {
+                // Set BLOCK_CACHED flag in Writeback and send below, so that
+                // the Writeback does not reset the bit corresponding to this
+                // address in the snoop filter below.
+                wbPkt->setBlockCached();
+                allocateWriteBuffer(wbPkt, forward_time, true);
+            }
+        } else {
+            // If the block is not cached above, send packet below. Both
+            // CleanEvict and Writeback with BLOCK_CACHED flag cleared will
+            // reset the bit corresponding to this address in the snoop filter
+            // below.
+            allocateWriteBuffer(wbPkt, forward_time, true);
+        }
+        writebacks.pop_front();
+    }
+}
+
+
 void
 Cache::recvTimingSnoopResp(PacketPtr pkt)
 {
@@ -510,7 +588,7 @@ Cache::recvTimingReq(PacketPtr pkt)
 
         /// @todo nominally we should just delete the packet here,
         /// however, until 4-phase stuff we can't because sending
-        /// cache is still relying on it
+        /// cache is still relying on it.
         pendingDelete.push_back(pkt);
 
         // no need to take any action in this particular cache as the
@@ -537,13 +615,7 @@ Cache::recvTimingReq(PacketPtr pkt)
 
         // copy writebacks to write buffer here to ensure they logically
         // proceed anything happening below
-        while (!writebacks.empty()) {
-            PacketPtr wbPkt = writebacks.front();
-            // We use forwardLatency here because we are copying
-            // writebacks to write buffer.
-            allocateWriteBuffer(wbPkt, forward_time, true);
-            writebacks.pop_front();
-        }
+        doWritebacks(writebacks, forward_time);
     }
 
     // Here we charge the headerDelay that takes into account the latencies
@@ -591,8 +663,10 @@ Cache::recvTimingReq(PacketPtr pkt)
             cpuSidePort->schedTimingResp(pkt, request_time);
         } else {
             /// @todo nominally we should just delete the packet here,
-            /// however, until 4-phase stuff we can't because sending
-            /// cache is still relying on it
+            /// however, until 4-phase stuff we can't because sending cache is
+            /// still relying on it. If the block is found in access(),
+            /// CleanEvict and Writeback messages will be deleted here as
+            /// well.
             pendingDelete.push_back(pkt);
         }
     } else {
@@ -660,31 +734,38 @@ Cache::recvTimingReq(PacketPtr pkt)
 
             // Coalesce unless it was a software prefetch (see above).
             if (pkt) {
-                DPRINTF(Cache, "%s coalescing MSHR for %s addr %#llx size %d\n",
-                        __func__, pkt->cmdString(), pkt->getAddr(),
-                        pkt->getSize());
-
-                assert(pkt->req->masterId() < system->maxMasters());
-                mshr_hits[pkt->cmdToIndex()][pkt->req->masterId()]++;
-                if (mshr->threadNum != 0/*pkt->req->threadId()*/) {
-                    mshr->threadNum = -1;
-                }
-                // We use forward_time here because it is the same
-                // considering new targets. We have multiple requests for the
-                // same address here. It specifies the latency to allocate an
-                // internal buffer and to schedule an event to the queued
-                // port and also takes into account the additional delay of
-                // the xbar.
-                mshr->allocateTarget(pkt, forward_time, order++);
-                if (mshr->getNumTargets() == numTarget) {
-                    noTargetMSHR = mshr;
-                    setBlocked(Blocked_NoTargets);
-                    // need to be careful with this... if this mshr isn't
-                    // ready yet (i.e. time > curTick()), we don't want to
-                    // move it ahead of mshrs that are ready
-                    // mshrQueue.moveToFront(mshr);
+                assert(pkt->cmd != MemCmd::Writeback);
+                // CleanEvicts corresponding to blocks which have outstanding
+                // requests in MSHRs can be deleted here.
+                if (pkt->cmd == MemCmd::CleanEvict) {
+                    pendingDelete.push_back(pkt);
+                } else {
+                    DPRINTF(Cache, "%s coalescing MSHR for %s addr %#llx size %d\n",
+                            __func__, pkt->cmdString(), pkt->getAddr(),
+                            pkt->getSize());
+
+                    assert(pkt->req->masterId() < system->maxMasters());
+                    mshr_hits[pkt->cmdToIndex()][pkt->req->masterId()]++;
+                    if (mshr->threadNum != 0/*pkt->req->threadId()*/) {
+                        mshr->threadNum = -1;
+                    }
+                    // We use forward_time here because it is the same
+                    // considering new targets. We have multiple
+                    // requests for the same address here. It
+                    // specifies the latency to allocate an internal
+                    // buffer and to schedule an event to the queued
+                    // port and also takes into account the additional
+                    // delay of the xbar.
+                    mshr->allocateTarget(pkt, forward_time, order++);
+                    if (mshr->getNumTargets() == numTarget) {
+                        noTargetMSHR = mshr;
+                        setBlocked(Blocked_NoTargets);
+                        // need to be careful with this... if this mshr isn't
+                        // ready yet (i.e. time > curTick()), we don't want to
+                        // move it ahead of mshrs that are ready
+                        // mshrQueue.moveToFront(mshr);
+                    }
                 }
-
                 // We should call the prefetcher reguardless if the request is
                 // satisfied or not, reguardless if the request is in the MSHR or
                 // not.  The request could be a ReadReq hit, but still not
@@ -707,7 +788,7 @@ Cache::recvTimingReq(PacketPtr pkt)
                 mshr_misses[pkt->cmdToIndex()][pkt->req->masterId()]++;
             }
 
-            if (pkt->cmd == MemCmd::Writeback ||
+            if (pkt->evictingBlock() ||
                 (pkt->req->isUncacheable() && pkt->isWrite())) {
                 // We use forward_time here because there is an
                 // uncached memory write, forwarded to WriteBuffer. It
@@ -782,7 +863,8 @@ Cache::getBusPacket(PacketPtr cpu_pkt, CacheBlk *blk,
     }
 
     if (!blkValid &&
-        (cpu_pkt->cmd == MemCmd::Writeback || cpu_pkt->isUpgrade())) {
+        (cpu_pkt->isUpgrade() ||
+         cpu_pkt->evictingBlock())) {
         // Writebacks that weren't allocated in access() and upgrades
         // from upper-level caches that missed completely just go
         // through.
@@ -834,8 +916,9 @@ Cache::getBusPacket(PacketPtr cpu_pkt, CacheBlk *blk,
     assert(pkt->getAddr() == blockAlign(pkt->getAddr()));
 
     pkt->allocate();
-    DPRINTF(Cache, "%s created %s addr %#llx size %d\n",
-            __func__, pkt->cmdString(), pkt->getAddr(), pkt->getSize());
+    DPRINTF(Cache, "%s created %s from %s for  addr %#llx size %d\n",
+            __func__, pkt->cmdString(), cpu_pkt->cmdString(), pkt->getAddr(),
+            pkt->getSize());
     return pkt;
 }
 
@@ -1302,19 +1385,28 @@ Cache::recvTimingResp(PacketPtr pkt)
     pkt->headerDelay = pkt->payloadDelay = 0;
 
     // copy writebacks to write buffer
-    while (!writebacks.empty()) {
-        PacketPtr wbPkt = writebacks.front();
-        allocateWriteBuffer(wbPkt, clockEdge(forwardLatency), true);
-        writebacks.pop_front();
-    }
-    // if we used temp block, clear it out
-    if (blk == tempBlock) {
+    doWritebacks(writebacks, forward_time);
+
+    // if we used temp block, check to see if its valid and then clear it out
+    if (blk == tempBlock && tempBlock->isValid()) {
+        // We use forwardLatency here because we are copying
+        // Writebacks/CleanEvicts to write buffer. It specifies the latency to
+        // allocate an internal buffer and to schedule an event to the
+        // queued port.
         if (blk->isDirty()) {
-            // We use forwardLatency here because we are copying
-            // writebacks to write buffer. It specifies the latency to
-            // allocate an internal buffer and to schedule an event to the
-            // queued port.
-            allocateWriteBuffer(writebackBlk(blk), forward_time, true);
+            PacketPtr wbPkt = writebackBlk(blk);
+            allocateWriteBuffer(wbPkt, forward_time, true);
+            // Set BLOCK_CACHED flag if cached above.
+            if (isCachedAbove(wbPkt))
+                wbPkt->setBlockCached();
+        } else {
+            PacketPtr wcPkt = cleanEvictBlk(blk);
+            // Check to see if block is cached above. If not allocate
+            // write buffer
+            if (isCachedAbove(wcPkt))
+                delete wcPkt;
+            else
+                allocateWriteBuffer(wcPkt, forward_time, true);
         }
         blk->invalidate();
     }
@@ -1352,6 +1444,30 @@ Cache::writebackBlk(CacheBlk *blk)
     return writeback;
 }
 
+PacketPtr
+Cache::cleanEvictBlk(CacheBlk *blk)
+{
+    assert(blk && blk->isValid() && !blk->isDirty());
+    // Creating a zero sized write, a message to the snoop filter
+    Request *req =
+        new Request(tags->regenerateBlkAddr(blk->tag, blk->set), blkSize, 0,
+                    Request::wbMasterId);
+    if (blk->isSecure())
+        req->setFlags(Request::SECURE);
+
+    req->taskId(blk->task_id);
+    blk->task_id = ContextSwitchTaskId::Unknown;
+    blk->tickInserted = curTick();
+
+    PacketPtr pkt = new Packet(req, MemCmd::CleanEvict);
+    pkt->allocate();
+    DPRINTF(Cache, "%s%s %x Create CleanEvict\n", pkt->cmdString(),
+            pkt->req->isInstFetch() ? " (ifetch)" : "",
+            pkt->getAddr());
+
+    return pkt;
+}
+
 void
 Cache::memWriteback()
 {
@@ -1434,9 +1550,13 @@ Cache::allocateBlock(Addr addr, bool is_secure, PacketList &writebacks)
                     addr, is_secure ? "s" : "ns",
                     blk->isDirty() ? "writeback" : "clean");
 
+            // Will send up Writeback/CleanEvict snoops via isCachedAbove
+            // when pushing this writeback list into the write buffer.
             if (blk->isDirty()) {
                 // Save writeback packet for handling by caller
                 writebacks.push_back(writebackBlk(blk));
+            } else {
+                writebacks.push_back(cleanEvictBlk(blk));
             }
         }
     }
@@ -1460,6 +1580,12 @@ Cache::handleFill(PacketPtr pkt, CacheBlk *blk, PacketList &writebacks)
     CacheBlk::State old_state = blk ? blk->status : 0;
 #endif
 
+    // When handling a fill, discard any CleanEvicts for the
+    // same address in write buffer.
+    Addr M5_VAR_USED blk_addr = blockAlign(pkt->getAddr());
+    std::vector<MSHR *> M5_VAR_USED wbs;
+    assert (!writeBuffer.findMatches(blk_addr, is_secure, wbs));
+
     if (blk == NULL) {
         // better have read new data...
         assert(pkt->hasData());
@@ -1633,9 +1759,9 @@ Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing,
             if (snoopPkt.sharedAsserted()) {
                 pkt->assertShared();
             }
-            // If this request is a prefetch or clean evict and an
-            // upper level signals block present, make sure to
-            // propagate the block presence to the requester.
+            // If this request is a prefetch or clean evict and an upper level
+            // signals block present, make sure to propagate the block
+            // presence to the requester.
             if (snoopPkt.isBlockCached()) {
                 pkt->setBlockCached();
             }
@@ -1674,9 +1800,9 @@ Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing,
     // MemCmd::HardPFReq is only observed by upstream caches.  After missing
     // above and in it's own cache, a new MemCmd::ReadReq is created that
     // downstream caches observe.
-    if (pkt->cmd == MemCmd::HardPFReq) {
-        DPRINTF(Cache, "Squashing prefetch from lower cache %#x\n",
-                pkt->getAddr());
+    if (pkt->mustCheckAbove()) {
+        DPRINTF(Cache, "Found addr %#llx in upper level cache for snoop %s from"
+                " lower cache\n", pkt->getAddr(), pkt->cmdString());
         pkt->setBlockCached();
         return;
     }
@@ -1754,7 +1880,7 @@ Cache::recvTimingSnoopReq(PacketPtr pkt)
     assert(!system->bypassCaches());
 
     // no need to snoop writebacks or requests that are not in range
-    if (pkt->cmd == MemCmd::Writeback || !inRange(pkt->getAddr())) {
+    if (!inRange(pkt->getAddr())) {
         return;
     }
 
@@ -1764,11 +1890,12 @@ Cache::recvTimingSnoopReq(PacketPtr pkt)
     Addr blk_addr = blockAlign(pkt->getAddr());
     MSHR *mshr = mshrQueue.findMatch(blk_addr, is_secure);
 
-    // Squash any prefetch requests from below on MSHR hits
-    if (mshr && pkt->cmd == MemCmd::HardPFReq) {
-        DPRINTF(Cache, "Setting block present to squash prefetch from"
+    // Inform request(Prefetch, CleanEvict or Writeback) from below of
+    // MSHR hit, set setBlockCached.
+    if (mshr && pkt->mustCheckAbove()) {
+        DPRINTF(Cache, "Setting block cached for %s from"
                 "lower cache on mshr hit %#x\n",
-                pkt->getAddr());
+                pkt->cmdString(), pkt->getAddr());
         pkt->setBlockCached();
         return;
     }
@@ -1795,28 +1922,60 @@ Cache::recvTimingSnoopReq(PacketPtr pkt)
         // We should only ever find a single match
         assert(writebacks.size() == 1);
         MSHR *wb_entry = writebacks[0];
+        // Expect to see only Writebacks and/or CleanEvicts here, both of
+        // which should not be generated for uncacheable data.
         assert(!wb_entry->isUncacheable());
+        // There should only be a single request responsible for generating
+        // Writebacks/CleanEvicts.
         assert(wb_entry->getNumTargets() == 1);
         PacketPtr wb_pkt = wb_entry->getTarget()->pkt;
-        assert(wb_pkt->cmd == MemCmd::Writeback);
+        assert(wb_pkt->evictingBlock());
+
+        if (pkt->evictingBlock()) {
+            // if the block is found in the write queue, set the BLOCK_CACHED
+            // flag for Writeback/CleanEvict snoop. On return the snoop will
+            // propagate the BLOCK_CACHED flag in Writeback packets and prevent
+            // any CleanEvicts from travelling down the memory hierarchy.
+            pkt->setBlockCached();
+            DPRINTF(Cache, "Squashing %s from lower cache on writequeue hit"
+                    " %#x\n", pkt->cmdString(), pkt->getAddr());
+            return;
+        }
 
-        assert(!pkt->memInhibitAsserted());
-        pkt->assertMemInhibit();
-        if (!pkt->needsExclusive()) {
-            pkt->assertShared();
-            // the writeback is no longer the exclusive copy in the system
-            wb_pkt->clearSupplyExclusive();
+        if (wb_pkt->cmd == MemCmd::Writeback) {
+            assert(!pkt->memInhibitAsserted());
+            pkt->assertMemInhibit();
+            if (!pkt->needsExclusive()) {
+                pkt->assertShared();
+                // the writeback is no longer the exclusive copy in
+                // the system
+                wb_pkt->clearSupplyExclusive();
+            } else {
+                // if we're not asserting the shared line, we need to
+                // invalidate our copy.  we'll do that below as long as
+                // the packet's invalidate flag is set...
+                assert(pkt->isInvalidate());
+            }
+            doTimingSupplyResponse(pkt, wb_pkt->getConstPtr<uint8_t>(),
+                                   false, false);
         } else {
-            // if we're not asserting the shared line, we need to
-            // invalidate our copy.  we'll do that below as long as
-            // the packet's invalidate flag is set...
-            assert(pkt->isInvalidate());
+            assert(wb_pkt->cmd == MemCmd::CleanEvict);
+            // The cache technically holds the block until the
+            // corresponding CleanEvict message reaches the crossbar
+            // below. Therefore when a snoop encounters a CleanEvict
+            // message we must set assertShared (just like when it
+            // encounters a Writeback) to avoid the snoop filter
+            // prematurely clearing the holder bit in the crossbar
+            // below
+            if (!pkt->needsExclusive())
+                pkt->assertShared();
+            else
+                assert(pkt->isInvalidate());
         }
-        doTimingSupplyResponse(pkt, wb_pkt->getConstPtr<uint8_t>(),
-                               false, false);
 
         if (pkt->isInvalidate()) {
             // Invalidation trumps our writeback... discard here
+            // Note: markInService will remove entry from writeback buffer.
             markInService(wb_entry, false);
             delete wb_pkt;
         }
@@ -1844,8 +2003,11 @@ Cache::recvAtomicSnoop(PacketPtr pkt)
     // Snoops shouldn't happen when bypassing caches
     assert(!system->bypassCaches());
 
-    // no need to snoop writebacks or requests that are not in range
-    if (pkt->cmd == MemCmd::Writeback || !inRange(pkt->getAddr())) {
+    // no need to snoop writebacks or requests that are not in range. In
+    // atomic we have no Writebacks/CleanEvicts queued and no prefetches,
+    // hence there is no need to snoop upwards and determine if they are
+    // present above.
+    if (pkt->evictingBlock() || !inRange(pkt->getAddr())) {
         return 0;
     }
 
@@ -1938,6 +2100,29 @@ Cache::getNextMSHR()
     return NULL;
 }
 
+bool
+Cache::isCachedAbove(const PacketPtr pkt) const
+{
+    if (isTopLevel)
+        return false;
+    // Mirroring the flow of HardPFReqs, the cache sends CleanEvict and
+    // Writeback snoops into upper level caches to check for copies of the
+    // same block. Using the BLOCK_CACHED flag with the Writeback/CleanEvict
+    // packet, the cache can inform the crossbar below of presence or absence
+    // of the block.
+
+    Packet snoop_pkt(pkt, true, false);
+    snoop_pkt.setExpressSnoop();
+    // Assert that packet is either Writeback or CleanEvict and not a prefetch
+    // request because prefetch requests need an MSHR and may generate a snoop
+    // response.
+    assert(pkt->evictingBlock());
+    snoop_pkt.senderState = NULL;
+    cpuSidePort->sendTimingSnoopReq(&snoop_pkt);
+    // Writeback/CleanEvict snoops do not generate a separate snoop response.
+    assert(!(snoop_pkt.memInhibitAsserted()));
+    return snoop_pkt.isBlockCached();
+}
 
 PacketPtr
 Cache::getTimingPacket()
@@ -1955,62 +2140,69 @@ Cache::getTimingPacket()
     DPRINTF(CachePort, "%s %s for addr %#llx size %d\n", __func__,
             tgt_pkt->cmdString(), tgt_pkt->getAddr(), tgt_pkt->getSize());
 
-    if (mshr->isForwardNoResponse()) {
-        // no response expected, just forward packet as it is
-        assert(tags->findBlock(mshr->blkAddr, mshr->isSecure) == NULL);
-        pkt = tgt_pkt;
-    } else {
-        CacheBlk *blk = tags->findBlock(mshr->blkAddr, mshr->isSecure);
-
-        if (tgt_pkt->cmd == MemCmd::HardPFReq && forwardSnoops) {
-            // We need to check the caches above us to verify that
-            // they don't have a copy of this block in the dirty state
-            // at the moment. Without this check we could get a stale
-            // copy from memory that might get used in place of the
-            // dirty one.
-            Packet snoop_pkt(tgt_pkt, true, false);
-            snoop_pkt.setExpressSnoop();
-            snoop_pkt.senderState = mshr;
-            cpuSidePort->sendTimingSnoopReq(&snoop_pkt);
-
-            // Check to see if the prefetch was squashed by an upper cache (to
-            // prevent us from grabbing the line) or if a Check to see if a
-            // writeback arrived between the time the prefetch was placed in
-            // the MSHRs and when it was selected to be sent or if the
-            // prefetch was squashed by an upper cache.
-
-            // It is important to check msmInhibitAsserted before
-            // prefetchSquashed. If another cache has asserted MEM_INGIBIT, it
-            // will be sending a response which will arrive at the MSHR
-            // allocated ofr this request. Checking the prefetchSquash first
-            // may result in the MSHR being prematurely deallocated.
-
-            if (snoop_pkt.memInhibitAsserted()) {
-                // If we are getting a non-shared response it is dirty
-                bool pending_dirty_resp = !snoop_pkt.sharedAsserted();
-                markInService(mshr, pending_dirty_resp);
-                DPRINTF(Cache, "Upward snoop of prefetch for addr"
-                        " %#x (%s) hit\n",
-                        tgt_pkt->getAddr(), tgt_pkt->isSecure()? "s": "ns");
-                return NULL;
-            }
+    CacheBlk *blk = tags->findBlock(mshr->blkAddr, mshr->isSecure);
+
+    if (tgt_pkt->cmd == MemCmd::HardPFReq && forwardSnoops) {
+        // We need to check the caches above us to verify that
+        // they don't have a copy of this block in the dirty state
+        // at the moment. Without this check we could get a stale
+        // copy from memory that might get used in place of the
+        // dirty one.
+        Packet snoop_pkt(tgt_pkt, true, false);
+        snoop_pkt.setExpressSnoop();
+        snoop_pkt.senderState = mshr;
+        cpuSidePort->sendTimingSnoopReq(&snoop_pkt);
+
+        // Check to see if the prefetch was squashed by an upper cache (to
+        // prevent us from grabbing the line) or if a Check to see if a
+        // writeback arrived between the time the prefetch was placed in
+        // the MSHRs and when it was selected to be sent or if the
+        // prefetch was squashed by an upper cache.
+
+        // It is important to check memInhibitAsserted before
+        // prefetchSquashed. If another cache has asserted MEM_INGIBIT, it
+        // will be sending a response which will arrive at the MSHR
+        // allocated ofr this request. Checking the prefetchSquash first
+        // may result in the MSHR being prematurely deallocated.
+
+        if (snoop_pkt.memInhibitAsserted()) {
+            // If we are getting a non-shared response it is dirty
+            bool pending_dirty_resp = !snoop_pkt.sharedAsserted();
+            markInService(mshr, pending_dirty_resp);
+            DPRINTF(Cache, "Upward snoop of prefetch for addr"
+                    " %#x (%s) hit\n",
+                    tgt_pkt->getAddr(), tgt_pkt->isSecure()? "s": "ns");
+            return NULL;
+        }
 
-            if (snoop_pkt.isBlockCached() || blk != NULL) {
-                DPRINTF(Cache, "Block present, prefetch squashed by cache.  "
-                               "Deallocating mshr target %#x.\n",
-                        mshr->blkAddr);
+        if (snoop_pkt.isBlockCached() || blk != NULL) {
+            DPRINTF(Cache, "Block present, prefetch squashed by cache.  "
+                    "Deallocating mshr target %#x.\n",
+                    mshr->blkAddr);
 
-                // Deallocate the mshr target
+            // Deallocate the mshr target
+            if (tgt_pkt->cmd != MemCmd::Writeback) {
                 if (mshr->queue->forceDeallocateTarget(mshr)) {
                     // Clear block if this deallocation resulted freed an
                     // mshr when all had previously been utilized
                     clearBlocked((BlockedCause)(mshr->queue->index));
                 }
                 return NULL;
+            } else {
+                // If this is a Writeback, and the snoops indicate that the blk
+                // is cached above, set the BLOCK_CACHED flag in the Writeback
+                // packet, so that it does not reset the bits corresponding to
+                // this block in the snoop filter below.
+                tgt_pkt->setBlockCached();
             }
-
         }
+    }
 
+    if (mshr->isForwardNoResponse()) {
+        // no response expected, just forward packet as it is
+        assert(tags->findBlock(mshr->blkAddr, mshr->isSecure) == NULL);
+        pkt = tgt_pkt;
+    } else {
         pkt = getBusPacket(tgt_pkt, blk, mshr->needsExclusive());
 
         mshr->isForward = (pkt == NULL);
index 3ab2d76a6a84c45444f70c3d68453e38834df8fc..de4eaca01597251632b38ac6b1354526c4b337be 100644 (file)
@@ -93,6 +93,7 @@ BasePrefetcher::observeAccess(const PacketPtr &pkt) const
     if (!fetch && read && !onRead) return false;
     if (!fetch && !read && !onWrite) return false;
     if (!fetch && !read && inv) return false;
+    if (pkt->cmd == MemCmd::CleanEvict) return false;
 
     if (onMiss) {
         return !inCache(addr, is_secure) &&
index cefad31613268a08b89075f0eefede942e5259fd..b58511db689082409314a84604ce2bbdcc9dbc71 100644 (file)
@@ -138,6 +138,12 @@ CoherentXBar::init()
 bool
 CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
 {
+    // @todo temporary hack to deal with memory corruption issue until
+    // 4-phase transactions are complete
+    for (int x = 0; x < pendingDelete.size(); x++)
+        delete pendingDelete[x];
+    pendingDelete.clear();
+
     // determine the source port based on the id
     SlavePort *src_port = slavePorts[slave_port_id];
 
@@ -201,6 +207,19 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
         }
     }
 
+    // forwardTiming snooped into peer caches of the sender, and if
+    // this is a clean evict, but the packet is found in a cache, do
+    // not forward it
+    if (pkt->cmd == MemCmd::CleanEvict && pkt->isBlockCached()) {
+        DPRINTF(CoherentXBar, "recvTimingReq: Clean evict 0x%x still cached, "
+                "not forwarding\n", pkt->getAddr());
+
+        // update the layer state and schedule an idle event
+        reqLayers[master_port_id]->succeededTiming(packetFinishTime);
+        pendingDelete.push_back(pkt);
+        return true;
+    }
+
     // remember if the packet will generate a snoop response
     const bool expect_snoop_resp = !is_inhibited && pkt->memInhibitAsserted();
     const bool expect_response = pkt->needsResponse() &&
index 3cf10689c068edf1527dbc72244c858f8ec950b6..24506d22b5175cbbf7595e41c3860c8978bd5b00 100644 (file)
@@ -275,6 +275,13 @@ class CoherentXBar : public BaseXBar
     /** Cycles of snoop response latency.*/
     const Cycles snoopResponseLatency;
 
+    /**
+     * @todo this is a temporary workaround until the 4-phase code is committed.
+     * upstream caches need this packet until true is returned, so hold it for
+     * deletion until a subsequent call
+     */
+    std::vector<PacketPtr> pendingDelete;
+
     /** Function called by the port when the crossbar is recieving a Timing
       request packet.*/
     bool recvTimingReq(PacketPtr pkt, PortID slave_port_id);
index 733a7390f2c0127b7cecc2d713519787e4ff7536..196f599af66b05e3bf499decbb3a3cf3d25a7198 100644 (file)
@@ -643,9 +643,10 @@ DRAMCtrl::recvTimingReq(PacketPtr pkt)
     DPRINTF(DRAM, "recvTimingReq: request %s addr %lld size %d\n",
             pkt->cmdString(), pkt->getAddr(), pkt->getSize());
 
-    // simply drop inhibited packets for now
-    if (pkt->memInhibitAsserted()) {
-        DPRINTF(DRAM, "Inhibited packet -- Dropping it now\n");
+    // simply drop inhibited packets and clean evictions
+    if (pkt->memInhibitAsserted() ||
+        pkt->cmd == MemCmd::CleanEvict) {
+        DPRINTF(DRAM, "Inhibited packet or clean evict -- Dropping it now\n");
         pendingDelete.push_back(pkt);
         return true;
     }
index ecc2feb26cd495781496e45052fb7226bba8a58b..68a5e6dc2b2eff480a3d9affbe51591850bdcc7a 100644 (file)
@@ -86,6 +86,8 @@ MemCmd::commandInfo[] =
     /* Writeback */
     { SET4(IsWrite, NeedsExclusive, IsRequest, HasData),
             InvalidCmd, "Writeback" },
+    /* CleanEvict */
+    { SET2(IsWrite, IsRequest), InvalidCmd, "CleanEvict" },
     /* SoftPFReq */
     { SET4(IsRead, IsRequest, IsSWPrefetch, NeedsResponse),
             SoftPFResp, "SoftPFReq" },
index 3f10458e7cfc226666567429033ebb498f9555d1..192136aba9ca8a09dd1b0514faeb0437c2d3b023 100644 (file)
@@ -87,6 +87,7 @@ class MemCmd
         WriteReq,
         WriteResp,
         Writeback,
+        CleanEvict,
         SoftPFReq,
         HardPFReq,
         SoftPFResp,
@@ -508,6 +509,7 @@ class Packet : public Printable
     bool suppressFuncError() const  { return flags.isSet(SUPPRESS_FUNC_ERROR); }
     void setBlockCached()          { flags.set(BLOCK_CACHED); }
     bool isBlockCached() const     { return flags.isSet(BLOCK_CACHED); }
+    void clearBlockCached()        { flags.clear(BLOCK_CACHED); }
 
     // Network error conditions... encapsulate them as methods since
     // their encoding keeps changing (from result field to command
@@ -936,6 +938,27 @@ class Packet : public Printable
                                other->getPtr<uint8_t>() : NULL);
     }
 
+    /**
+     * Is this request notification of a clean or dirty eviction from the cache.
+     **/
+    bool
+    evictingBlock() const
+    {
+        return (cmd == MemCmd::Writeback ||
+                cmd == MemCmd::CleanEvict);
+    }
+
+    /**
+     * Does the request need to check for cached copies of the same block
+     * in the memory hierarchy above.
+     **/
+    bool
+    mustCheckAbove() const
+    {
+        return (cmd == MemCmd::HardPFReq ||
+                evictingBlock());
+    }
+
     /**
      * Check a functional request against a memory value represented
      * by a base/size pair and an associated data array. If the
index 9ff591065bb08bf9246c1591caaa997bd3a4da01..48587c8ee6a3d149bc8d396b931a24b83b630221 100755 (executable)
@@ -134,7 +134,8 @@ SnoopFilter::updateRequest(const Packet* cpkt, const SlavePort& slave_port,
             // Writebacks -> the sender does not have the line anymore
             sf_item.holder &= ~req_port;
         } else {
-            assert(0 == "Handle non-writeback, here");
+            // @todo Add CleanEvicts
+            assert(cpkt->cmd == MemCmd::CleanEvict);
         }
         DPRINTF(SnoopFilter, "%s:   new SF value %x.%x\n",
                 __func__,  sf_item.requested, sf_item.holder);
@@ -174,8 +175,13 @@ SnoopFilter::lookupSnoop(const Packet* cpkt)
         else
             hitMultiSnoops++;
     }
-
-    assert(cpkt->isInvalidate() == cpkt->needsExclusive());
+    // ReadEx and Writes require both invalidation and exlusivity, while reads
+    // require neither. Writebacks on the other hand require exclusivity but
+    // not the invalidation. Previously Writebacks did not generate upward
+    // snoops so this was never an aissue. Now that Writebacks generate snoops
+    // we need to special case for Writebacks.
+    assert(cpkt->cmd == MemCmd::Writeback ||
+           (cpkt->isInvalidate() == cpkt->needsExclusive()));
     if (cpkt->isInvalidate() && !sf_item.requested) {
         // Early clear of the holder, if no other request is currently going on
         // @todo: This should possibly be updated even though we do not filter