refactor code for the packet, get rid of packet_impl.hh
[gem5.git] / src / mem / cache / cache_impl.hh
index 0d625054c8e2439ce1b93e0862b9271f433ebcbc..ea30dbba67d7cb62b9d52426751e05e97a86f0b0 100644 (file)
@@ -60,12 +60,11 @@ doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide)
 {
     if (isCpuSide)
     {
-        if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) {
+        if (pkt->isWrite() && (pkt->req->isLocked())) {
             pkt->req->setScResult(1);
         }
-        if (!(pkt->flags & SATISFIED)) {
-            access(pkt);
-        }
+        access(pkt);
+
     }
     else
     {
@@ -73,7 +72,7 @@ doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide)
             handleResponse(pkt);
         else {
             //Check if we should do the snoop
-            if (pkt->flags && SNOOP_COMMIT)
+            if (pkt->flags & SNOOP_COMMIT)
                 snoop(pkt);
         }
     }
@@ -87,11 +86,6 @@ doAtomicAccess(Packet *pkt, bool isCpuSide)
 {
     if (isCpuSide)
     {
-        //Temporary solution to LL/SC
-        if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) {
-            pkt->req->setScResult(1);
-        }
-
         probe(pkt, true, NULL);
         //TEMP ALWAYS SUCCES FOR NOW
         pkt->result = Packet::Success;
@@ -101,7 +95,7 @@ doAtomicAccess(Packet *pkt, bool isCpuSide)
         if (pkt->isResponse())
             handleResponse(pkt);
         else
-            snoopProbe(pkt);
+            return snoopProbe(pkt);
     }
     //Fix this timing info
     return hitLatency;
@@ -117,11 +111,6 @@ doFunctionalAccess(Packet *pkt, bool isCpuSide)
         //TEMP USE CPU?THREAD 0 0
         pkt->req->setThreadContext(0,0);
 
-        //Temporary solution to LL/SC
-        if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) {
-            assert("Can't handle LL/SC on functional path\n");
-        }
-
         probe(pkt, false, memSidePort);
         //TEMP ALWAYS SUCCESFUL FOR NOW
         pkt->result = Packet::Success;
@@ -149,14 +138,9 @@ Cache(const std::string &_name,
       prefetchAccess(params.prefetchAccess),
       tags(params.tags), missQueue(params.missQueue),
       coherence(params.coherence), prefetcher(params.prefetcher),
-      doCopy(params.doCopy), blockOnCopy(params.blockOnCopy)
+      hitLatency(params.hitLatency)
 {
-//FIX BUS POINTERS
-//    if (params.in == NULL) {
-        topLevelCache = true;
-//    }
-//PLEASE FIX THIS, BUS SIZES NOT BEING USED
-        tags->setCache(this, blkSize, 1/*params.out->width, params.out->clockRate*/);
+    tags->setCache(this);
     tags->setPrefetcher(prefetcher);
     missQueue->setCache(this);
     missQueue->setPrefetcher(prefetcher);
@@ -164,10 +148,8 @@ Cache(const std::string &_name,
     prefetcher->setCache(this);
     prefetcher->setTags(tags);
     prefetcher->setBuffer(missQueue);
-#if 0
-    invalidatePkt = new Packet;
-    invalidatePkt->cmd = Packet::InvalidateReq;
-#endif
+    invalidateReq = new Request((Addr) NULL, blkSize, 0);
+    invalidatePkt = new Packet(invalidateReq, Packet::InvalidateReq, 0);
 }
 
 template<class TagStore, class Buffering, class Coherence>
@@ -196,20 +178,6 @@ Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt)
         prefetcher->handleMiss(pkt, curTick);
     }
     if (!pkt->req->isUncacheable()) {
-        if (pkt->isInvalidate() && !pkt->isRead()
-            && !pkt->isWrite()) {
-            //Upgrade or Invalidate
-            //Look into what happens if two slave caches on bus
-            DPRINTF(Cache, "%s %x ? blk_addr: %x\n", pkt->cmdString(),
-                    pkt->getAddr() & (((ULL(1))<<48)-1),
-                    pkt->getAddr() & ~((Addr)blkSize - 1));
-
-            //@todo Should this return latency have the hit latency in it?
-//         respond(pkt,curTick+lat);
-            pkt->flags |= SATISFIED;
-//            return MA_HIT; //@todo, return values
-            return true;
-        }
         blk = tags->handleAccess(pkt, lat, writebacks);
     } else {
         size = pkt->getSize();
@@ -219,7 +187,8 @@ Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt)
     /** @todo make the fast write alloc (wh64) work with coherence. */
     /** @todo Do we want to do fast writes for writebacks as well? */
     if (!blk && pkt->getSize() >= blkSize && coherence->allowFastWrites() &&
-        (pkt->cmd == Packet::WriteReq || pkt->cmd == Packet::WriteInvalidateReq) ) {
+        (pkt->cmd == Packet::WriteReq
+         || pkt->cmd == Packet::WriteInvalidateReq) ) {
         // not outstanding misses, can do this
         MSHR* outstanding_miss = missQueue->findMSHR(pkt->getAddr());
         if (pkt->cmd == Packet::WriteInvalidateReq || !outstanding_miss) {
@@ -236,16 +205,19 @@ Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt)
         missQueue->doWriteback(writebacks.front());
         writebacks.pop_front();
     }
-    DPRINTF(Cache, "%s %x %s blk_addr: %x pc %x\n", pkt->cmdString(),
+    DPRINTF(Cache, "%s %x %s blk_addr: %x\n", pkt->cmdString(),
             pkt->getAddr() & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss",
-            pkt->getAddr() & ~((Addr)blkSize - 1), pkt->req->getPC());
+            pkt->getAddr() & ~((Addr)blkSize - 1));
     if (blk) {
         // Hit
         hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
         // clear dirty bit if write through
         if (pkt->needsResponse())
             respond(pkt, curTick+lat);
-//     return MA_HIT;
+        if (pkt->cmd == Packet::Writeback) {
+            //Signal that you can kill the pkt/req
+            pkt->flags |= SATISFIED;
+        }
         return true;
     }
 
@@ -269,10 +241,12 @@ template<class TagStore, class Buffering, class Coherence>
 Packet *
 Cache<TagStore,Buffering,Coherence>::getPacket()
 {
+    assert(missQueue->havePending());
     Packet * pkt = missQueue->getPacket();
     if (pkt) {
         if (!pkt->req->isUncacheable()) {
-            if (pkt->cmd == Packet::HardPFReq) misses[Packet::HardPFReq][0/*pkt->req->getThreadNum()*/]++;
+            if (pkt->cmd == Packet::HardPFReq)
+                misses[Packet::HardPFReq][0/*pkt->req->getThreadNum()*/]++;
             BlkType *blk = tags->findBlock(pkt);
             Packet::Command cmd = coherence->getBusCmd(pkt->cmd,
                                               (blk)? blk->status : 0);
@@ -287,15 +261,45 @@ Cache<TagStore,Buffering,Coherence>::getPacket()
 
 template<class TagStore, class Buffering, class Coherence>
 void
-Cache<TagStore,Buffering,Coherence>::sendResult(PacketPtr &pkt, bool success)
+Cache<TagStore,Buffering,Coherence>::sendResult(PacketPtr &pkt, MSHR* mshr,
+                                                bool success)
 {
-    if (success) {
-        missQueue->markInService(pkt);
-          //Temp Hack for UPGRADES
-          if (pkt->cmd == Packet::UpgradeReq) {
-              handleResponse(pkt);
-          }
+    if (success && !(pkt && (pkt->flags & NACKED_LINE))) {
+        if (!mshr->pkt->needsResponse()
+            && !(mshr->pkt->cmd == Packet::UpgradeReq)
+            && (pkt && (pkt->flags & SATISFIED))) {
+            //Writeback, clean up the non copy version of the packet
+            delete pkt;
+        }
+        missQueue->markInService(mshr->pkt, mshr);
+        //Temp Hack for UPGRADES
+        if (mshr->pkt && mshr->pkt->cmd == Packet::UpgradeReq) {
+            assert(pkt);  //Upgrades need to be fixed
+            pkt->flags &= ~CACHE_LINE_FILL;
+            BlkType *blk = tags->findBlock(pkt);
+            CacheBlk::State old_state = (blk) ? blk->status : 0;
+            CacheBlk::State new_state = coherence->getNewState(pkt,old_state);
+            if (old_state != new_state)
+                DPRINTF(Cache, "Block for blk addr %x moving from "
+                        "state %i to %i\n",
+                        pkt->getAddr() & (((ULL(1))<<48)-1),
+                        old_state, new_state);
+            //Set the state on the upgrade
+            memcpy(pkt->getPtr<uint8_t>(), blk->data, blkSize);
+            PacketList writebacks;
+            tags->handleFill(blk, mshr, new_state, writebacks, pkt);
+            assert(writebacks.empty());
+            missQueue->handleResponse(pkt, curTick + hitLatency);
+        }
     } else if (pkt && !pkt->req->isUncacheable()) {
+        pkt->flags &= ~NACKED_LINE;
+        pkt->flags &= ~SATISFIED;
+        pkt->flags &= ~SNOOP_COMMIT;
+
+//Rmove copy from mshr
+        delete mshr->pkt;
+        mshr->pkt = pkt;
+
         missQueue->restoreOrigCmd(pkt);
     }
 }
@@ -306,6 +310,18 @@ Cache<TagStore,Buffering,Coherence>::handleResponse(Packet * &pkt)
 {
     BlkType *blk = NULL;
     if (pkt->senderState) {
+        //Delete temp copy in MSHR, restore it.
+        delete ((MSHR*)pkt->senderState)->pkt;
+        ((MSHR*)pkt->senderState)->pkt = pkt;
+        if (pkt->result == Packet::Nacked) {
+            //pkt->reinitFromRequest();
+            warn("NACKs from devices not connected to the same bus "
+                 "not implemented\n");
+            return;
+        }
+        if (pkt->result == Packet::BadAddress) {
+            //Make the response a Bad address and send it
+        }
 //     MemDebug::cacheResponse(pkt);
         DPRINTF(Cache, "Handling reponse to %x, blk addr: %x\n",pkt->getAddr(),
                 pkt->getAddr() & (((ULL(1))<<48)-1));
@@ -314,9 +330,14 @@ Cache<TagStore,Buffering,Coherence>::handleResponse(Packet * &pkt)
             blk = tags->findBlock(pkt);
             CacheBlk::State old_state = (blk) ? blk->status : 0;
             PacketList writebacks;
+            CacheBlk::State new_state = coherence->getNewState(pkt,old_state);
+            if (old_state != new_state)
+                DPRINTF(Cache, "Block for blk addr %x moving from "
+                        "state %i to %i\n",
+                        pkt->getAddr() & (((ULL(1))<<48)-1),
+                        old_state, new_state);
             blk = tags->handleFill(blk, (MSHR*)pkt->senderState,
-                                   coherence->getNewState(pkt,old_state),
-                                   writebacks, pkt);
+                                   new_state, writebacks, pkt);
             while (!writebacks.empty()) {
                     missQueue->doWriteback(writebacks.front());
                     writebacks.pop_front();
@@ -327,47 +348,19 @@ Cache<TagStore,Buffering,Coherence>::handleResponse(Packet * &pkt)
 }
 
 template<class TagStore, class Buffering, class Coherence>
-void
-Cache<TagStore,Buffering,Coherence>::pseudoFill(Addr addr)
+Packet *
+Cache<TagStore,Buffering,Coherence>::getCoherencePacket()
 {
-    // Need to temporarily move this blk into MSHRs
-    MSHR *mshr = missQueue->allocateTargetList(addr);
-    int lat;
-    PacketList dummy;
-    // Read the data into the mshr
-    BlkType *blk = tags->handleAccess(mshr->pkt, lat, dummy, false);
-    assert(dummy.empty());
-    assert(mshr->pkt->flags & SATISFIED);
-    // can overload order since it isn't used on non pending blocks
-    mshr->order = blk->status;
-    // temporarily remove the block from the cache.
-    tags->invalidateBlk(addr);
+    return coherence->getPacket();
 }
 
 template<class TagStore, class Buffering, class Coherence>
 void
-Cache<TagStore,Buffering,Coherence>::pseudoFill(MSHR *mshr)
+Cache<TagStore,Buffering,Coherence>::sendCoherenceResult(Packet* &pkt,
+                                                         MSHR *cshr,
+                                                         bool success)
 {
-    // Need to temporarily move this blk into MSHRs
-    assert(mshr->pkt->cmd == Packet::ReadReq);
-    int lat;
-    PacketList dummy;
-    // Read the data into the mshr
-    BlkType *blk = tags->handleAccess(mshr->pkt, lat, dummy, false);
-    assert(dummy.empty());
-    assert(mshr->pkt->flags & SATISFIED);
-    // can overload order since it isn't used on non pending blocks
-    mshr->order = blk->status;
-    // temporarily remove the block from the cache.
-    tags->invalidateBlk(mshr->pkt->getAddr());
-}
-
-
-template<class TagStore, class Buffering, class Coherence>
-Packet *
-Cache<TagStore,Buffering,Coherence>::getCoherencePacket()
-{
-    return coherence->getPacket();
+    coherence->sendResult(pkt, cshr, success);
 }
 
 
@@ -375,40 +368,59 @@ template<class TagStore, class Buffering, class Coherence>
 void
 Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
 {
+    if (pkt->req->isUncacheable()) {
+        //Can't get a hit on an uncacheable address
+        //Revisit this for multi level coherence
+        return;
+    }
+
+    //Send a timing (true) invalidate up if the protocol calls for it
+    coherence->propogateInvalidate(pkt, true);
+
     Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
     BlkType *blk = tags->findBlock(pkt);
     MSHR *mshr = missQueue->findMSHR(blk_addr);
-    if (isTopLevel() && coherence->hasProtocol()) { //@todo Move this into handle bus req
-        //If we find an mshr, and it is in service, we need to NACK or invalidate
+    if (coherence->hasProtocol() || pkt->isInvalidate()) {
+        //@todo Move this into handle bus req
+        //If we find an mshr, and it is in service, we need to NACK or
+        //invalidate
         if (mshr) {
             if (mshr->inService) {
                 if ((mshr->pkt->isInvalidate() || !mshr->pkt->isCacheFill())
-                    && (pkt->cmd != Packet::InvalidateReq && pkt->cmd != Packet::WriteInvalidateReq)) {
-                    //If the outstanding request was an invalidate (upgrade,readex,..)
-                    //Then we need to ACK the request until we get the data
-                    //Also NACK if the outstanding request is not a cachefill (writeback)
+                    && (pkt->cmd != Packet::InvalidateReq
+                        && pkt->cmd != Packet::WriteInvalidateReq)) {
+                    //If the outstanding request was an invalidate
+                    //(upgrade,readex,..)  Then we need to ACK the request
+                    //until we get the data Also NACK if the outstanding
+                    //request is not a cachefill (writeback)
+                    assert(!(pkt->flags & SATISFIED));
                     pkt->flags |= SATISFIED;
                     pkt->flags |= NACKED_LINE;
-                    assert("Don't detect these on the other side yet\n");
-                    respondToSnoop(pkt, curTick + hitLatency);
+                    ///@todo NACK's from other levels
+                    //warn("NACKs from devices not connected to the same bus "
+                    //"not implemented\n");
+                    //respondToSnoop(pkt, curTick + hitLatency);
                     return;
                 }
                 else {
-                    //The supplier will be someone else, because we are waiting for
-                    //the data.  This should cause this cache to be forced to go to
-                    //the shared state, not the exclusive even though the shared line
-                    //won't be asserted.  But for now we will just invlidate ourselves
-                    //and allow the other cache to go into the exclusive state.
-                    //@todo Make it so a read to a pending read doesn't invalidate.
-                    //@todo Make it so that a read to a pending read can't be exclusive now.
+                    //The supplier will be someone else, because we are
+                    //waiting for the data.  This should cause this cache to
+                    //be forced to go to the shared state, not the exclusive
+                    //even though the shared line won't be asserted.  But for
+                    //now we will just invlidate ourselves and allow the other
+                    //cache to go into the exclusive state.  @todo Make it so
+                    //a read to a pending read doesn't invalidate.  @todo Make
+                    //it so that a read to a pending read can't be exclusive
+                    //now.
 
                     //Set the address so find match works
-                    assert("Don't have invalidates yet\n");
+                    //panic("Don't have invalidates yet\n");
                     invalidatePkt->addrOverride(pkt->getAddr());
 
                     //Append the invalidate on
                     missQueue->addTarget(mshr,invalidatePkt);
-                    DPRINTF(Cache, "Appending Invalidate to blk_addr: %x\n", pkt->getAddr() & (((ULL(1))<<48)-1));
+                    DPRINTF(Cache, "Appending Invalidate to blk_addr: %x\n",
+                            pkt->getAddr() & (((ULL(1))<<48)-1));
                     return;
                 }
             }
@@ -416,7 +428,8 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
         //We also need to check the writeback buffers and handle those
         std::vector<MSHR *> writebacks;
         if (missQueue->findWrites(blk_addr, writebacks)) {
-            DPRINTF(Cache, "Snoop hit in writeback to blk_addr: %x\n", pkt->getAddr() & (((ULL(1))<<48)-1));
+            DPRINTF(Cache, "Snoop hit in writeback to blk_addr: %x\n",
+                    pkt->getAddr() & (((ULL(1))<<48)-1));
 
             //Look through writebacks for any non-uncachable writes, use that
             for (int i=0; i<writebacks.size(); i++) {
@@ -426,6 +439,7 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
                     if (pkt->isRead()) {
                         //Only Upgrades don't get here
                         //Supply the data
+                        assert(!(pkt->flags & SATISFIED));
                         pkt->flags |= SATISFIED;
 
                         //If we are in an exclusive protocol, make it ask again
@@ -433,7 +447,7 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
                         pkt->flags |= SHARED_LINE;
 
                         assert(pkt->isRead());
-                        Addr offset = pkt->getAddr() & ~(blkSize - 1);
+                        Addr offset = pkt->getAddr() & (blkSize - 1);
                         assert(offset < blkSize);
                         assert(pkt->getSize() <= blkSize);
                         assert(offset + pkt->getSize() <=blkSize);
@@ -443,8 +457,9 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
                     }
 
                     if (pkt->isInvalidate()) {
-                        //This must be an upgrade or other cache will take ownership
-                        missQueue->markInService(mshr->pkt);
+                        //This must be an upgrade or other cache will take
+                        //ownership
+                        missQueue->markInService(mshr->pkt, mshr);
                     }
                     return;
                 }
@@ -454,10 +469,17 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
     CacheBlk::State new_state;
     bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state);
     if (satisfy) {
+        DPRINTF(Cache, "Cache snooped a %s request for addr %x and "
+                "now supplying data, new state is %i\n",
+                pkt->cmdString(), blk_addr, new_state);
+
         tags->handleSnoop(blk, new_state, pkt);
         respondToSnoop(pkt, curTick + hitLatency);
         return;
     }
+    if (blk)
+        DPRINTF(Cache, "Cache snooped a %s request for addr %x, "
+                "new state is %i\n", pkt->cmdString(), blk_addr, new_state);
     tags->handleSnoop(blk, new_state);
 }
 
@@ -470,12 +492,13 @@ Cache<TagStore,Buffering,Coherence>::snoopResponse(Packet * &pkt)
         //Need to mark it as not in service, and retry for bus
         assert(0); //Yeah, we saw a NACK come through
 
-        //For now this should never get called, we return false when we see a NACK
-        //instead, by doing this we allow the bus_blocked mechanism to handle the retry
-        //For now it retrys in just 2 cycles, need to figure out how to change that
-        //Eventually we will want to also have success come in as a parameter
-        //Need to make sure that we handle the functionality that happens on successufl
-        //return of the sendAddr function
+        //For now this should never get called, we return false when we see a
+        //NACK instead, by doing this we allow the bus_blocked mechanism to
+        //handle the retry For now it retrys in just 2 cycles, need to figure
+        //out how to change that Eventually we will want to also have success
+        //come in as a parameter Need to make sure that we handle the
+        //functionality that happens on successufl return of the sendAddr
+        //function
     }
 }
 
@@ -492,7 +515,8 @@ Cache<TagStore,Buffering,Coherence>::invalidateBlk(Addr addr)
  */
 template<class TagStore, class Buffering, class Coherence>
 Tick
-Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update, CachePort* otherSidePort)
+Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update,
+                                           CachePort* otherSidePort)
 {
 //    MemDebug::cacheProbe(pkt);
     if (!pkt->req->isUncacheable()) {
@@ -507,141 +531,183 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update, CachePort
         }
     }
 
+    if (!update && (pkt->isWrite() || (otherSidePort == cpuSidePort))) {
+        // Still need to change data in all locations.
+        otherSidePort->sendFunctional(pkt);
+        if (pkt->isRead() && pkt->result == Packet::Success)
+            return 0;
+    }
+
     PacketList writebacks;
     int lat;
     BlkType *blk = tags->handleAccess(pkt, lat, writebacks, update);
 
-    if (!blk) {
-        // Need to check for outstanding misses and writes
-        Addr blk_addr = pkt->getAddr() & ~(blkSize - 1);
-
-        // There can only be one matching outstanding miss.
-        MSHR* mshr = missQueue->findMSHR(blk_addr);
-
-        // There can be many matching outstanding writes.
-        std::vector<MSHR*> writes;
-        missQueue->findWrites(blk_addr, writes);
-
-        if (!update) {
-                otherSidePort->sendFunctional(pkt);
-
-            // Check for data in MSHR and writebuffer.
-            if (mshr) {
-                warn("Found outstanding miss on an non-update probe");
-                MSHR::TargetList *targets = mshr->getTargetList();
-                MSHR::TargetList::iterator i = targets->begin();
-                MSHR::TargetList::iterator end = targets->end();
-                for (; i != end; ++i) {
-                    Packet * target = *i;
-                    // If the target contains data, and it overlaps the
-                    // probed request, need to update data
-                    if (target->isWrite() && target->intersect(pkt)) {
-                        uint8_t* pkt_data;
-                        uint8_t* write_data;
-                        int data_size;
-                        if (target->getAddr() < pkt->getAddr()) {
-                            int offset = pkt->getAddr() - target->getAddr();
-                            pkt_data = pkt->getPtr<uint8_t>();
-                            write_data = target->getPtr<uint8_t>() + offset;
-                            data_size = target->getSize() - offset;
-                            assert(data_size > 0);
-                            if (data_size > pkt->getSize())
-                                data_size = pkt->getSize();
-                        } else {
-                            int offset = target->getAddr() - pkt->getAddr();
-                            pkt_data = pkt->getPtr<uint8_t>() + offset;
-                            write_data = target->getPtr<uint8_t>();
-                            data_size = pkt->getSize() - offset;
-                            assert(data_size > pkt->getSize());
-                            if (data_size > target->getSize())
-                                data_size = target->getSize();
-                        }
-
-                        if (pkt->isWrite()) {
-                            memcpy(pkt_data, write_data, data_size);
-                        } else {
-                            memcpy(write_data, pkt_data, data_size);
-                        }
-                    }
-                }
-            }
-            for (int i = 0; i < writes.size(); ++i) {
-                Packet * write = writes[i]->pkt;
-                if (write->intersect(pkt)) {
-                    warn("Found outstanding write on an non-update probe");
+    DPRINTF(Cache, "%s %x %s blk_addr: %x\n", pkt->cmdString(),
+            pkt->getAddr() & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss",
+            pkt->getAddr() & ~((Addr)blkSize - 1));
+
+
+    // Need to check for outstanding misses and writes
+    Addr blk_addr = pkt->getAddr() & ~(blkSize - 1);
+
+    // There can only be one matching outstanding miss.
+    MSHR* mshr = missQueue->findMSHR(blk_addr);
+
+    // There can be many matching outstanding writes.
+    std::vector<MSHR*> writes;
+    missQueue->findWrites(blk_addr, writes);
+
+    if (!update) {
+        // Check for data in MSHR and writebuffer.
+        if (mshr) {
+            warn("Found outstanding miss on an non-update probe");
+            MSHR::TargetList *targets = mshr->getTargetList();
+            MSHR::TargetList::iterator i = targets->begin();
+            MSHR::TargetList::iterator end = targets->end();
+            for (; i != end; ++i) {
+                Packet * target = *i;
+                // If the target contains data, and it overlaps the
+                // probed request, need to update data
+                if (target->isWrite() && target->intersect(pkt)) {
                     uint8_t* pkt_data;
                     uint8_t* write_data;
                     int data_size;
-                    if (write->getAddr() < pkt->getAddr()) {
-                        int offset = pkt->getAddr() - write->getAddr();
+                    if (target->getAddr() < pkt->getAddr()) {
+                        int offset = pkt->getAddr() - target->getAddr();
                         pkt_data = pkt->getPtr<uint8_t>();
-                        write_data = write->getPtr<uint8_t>() + offset;
-                        data_size = write->getSize() - offset;
+                        write_data = target->getPtr<uint8_t>() + offset;
+                        data_size = target->getSize() - offset;
                         assert(data_size > 0);
                         if (data_size > pkt->getSize())
                             data_size = pkt->getSize();
                     } else {
-                        int offset = write->getAddr() - pkt->getAddr();
+                        int offset = target->getAddr() - pkt->getAddr();
                         pkt_data = pkt->getPtr<uint8_t>() + offset;
-                        write_data = write->getPtr<uint8_t>();
+                        write_data = target->getPtr<uint8_t>();
                         data_size = pkt->getSize() - offset;
-                        assert(data_size > pkt->getSize());
-                        if (data_size > write->getSize())
-                            data_size = write->getSize();
+                        assert(data_size >= pkt->getSize());
+                        if (data_size > target->getSize())
+                            data_size = target->getSize();
                     }
 
                     if (pkt->isWrite()) {
                         memcpy(pkt_data, write_data, data_size);
                     } else {
+                        pkt->flags |= SATISFIED;
+                        pkt->result = Packet::Success;
                         memcpy(write_data, pkt_data, data_size);
                     }
-
                 }
             }
-            return 0;
-        } else {
-            // update the cache state and statistics
-            if (mshr || !writes.empty()){
-                // Can't handle it, return pktuest unsatisfied.
-                return 0;
+        }
+        for (int i = 0; i < writes.size(); ++i) {
+            Packet * write = writes[i]->pkt;
+            if (write->intersect(pkt)) {
+                warn("Found outstanding write on an non-update probe");
+                uint8_t* pkt_data;
+                uint8_t* write_data;
+                int data_size;
+                if (write->getAddr() < pkt->getAddr()) {
+                    int offset = pkt->getAddr() - write->getAddr();
+                    pkt_data = pkt->getPtr<uint8_t>();
+                    write_data = write->getPtr<uint8_t>() + offset;
+                    data_size = write->getSize() - offset;
+                    assert(data_size > 0);
+                    if (data_size > pkt->getSize())
+                        data_size = pkt->getSize();
+                } else {
+                    int offset = write->getAddr() - pkt->getAddr();
+                    pkt_data = pkt->getPtr<uint8_t>() + offset;
+                    write_data = write->getPtr<uint8_t>();
+                    data_size = pkt->getSize() - offset;
+                    assert(data_size >= pkt->getSize());
+                    if (data_size > write->getSize())
+                        data_size = write->getSize();
+                }
+
+                if (pkt->isWrite()) {
+                    memcpy(pkt_data, write_data, data_size);
+                } else {
+                    pkt->flags |= SATISFIED;
+                    pkt->result = Packet::Success;
+                    memcpy(write_data, pkt_data, data_size);
+                }
+
             }
-            if (!pkt->req->isUncacheable()) {
+        }
+        if (pkt->isRead()
+            && pkt->result != Packet::Success
+            && otherSidePort == memSidePort) {
+            otherSidePort->sendFunctional(pkt);
+            assert(pkt->result == Packet::Success);
+        }
+        return 0;
+    } else if (!blk) {
+        // update the cache state and statistics
+        if (mshr || !writes.empty()){
+            // Can't handle it, return pktuest unsatisfied.
+            panic("Atomic access ran into outstanding MSHR's or WB's!");
+        }
+        if (!pkt->req->isUncacheable()) {
                 // Fetch the cache block to fill
-                BlkType *blk = tags->findBlock(pkt);
-                Packet::Command temp_cmd = coherence->getBusCmd(pkt->cmd,
-                                                   (blk)? blk->status : 0);
+            BlkType *blk = tags->findBlock(pkt);
+            Packet::Command temp_cmd = coherence->getBusCmd(pkt->cmd,
+                                                            (blk)? blk->status : 0);
 
-                Packet * busPkt = new Packet(pkt->req,temp_cmd, -1, blkSize);
+            Packet * busPkt = new Packet(pkt->req,temp_cmd, -1, blkSize);
 
-                busPkt->allocate();
+            busPkt->allocate();
 
-                busPkt->time = curTick;
+            busPkt->time = curTick;
 
-                lat = memSidePort->sendAtomic(busPkt);
+            DPRINTF(Cache, "Sending a atomic %s for %x blk_addr: %x\n",
+                    busPkt->cmdString(),
+                    busPkt->getAddr() & (((ULL(1))<<48)-1),
+                    busPkt->getAddr() & ~((Addr)blkSize - 1));
 
-                //Be sure to flip the response to a request for coherence
+            lat = memSidePort->sendAtomic(busPkt);
+
+            //Be sure to flip the response to a request for coherence
+            if (busPkt->needsResponse()) {
                 busPkt->makeAtomicResponse();
+            }
 
 /*             if (!(busPkt->flags & SATISFIED)) {
-                    // blocked at a higher level, just return
-                    return 0;
-                }
+// blocked at a higher level, just return
+return 0;
+}
 
 */             misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
 
-                CacheBlk::State old_state = (blk) ? blk->status : 0;
-                tags->handleFill(blk, busPkt,
-                                 coherence->getNewState(busPkt, old_state),
-                                 writebacks, pkt);
-                // Handle writebacks if needed
-                while (!writebacks.empty()){
-                    memSidePort->sendAtomic(writebacks.front());
-                    writebacks.pop_front();
-                }
-                return lat + hitLatency;
-            } else {
-                return memSidePort->sendAtomic(pkt);
+            CacheBlk::State old_state = (blk) ? blk->status : 0;
+            CacheBlk::State new_state =
+                coherence->getNewState(busPkt, old_state);
+            DPRINTF(Cache,
+                        "Receive response:%s for blk addr %x in state %i\n",
+                    busPkt->cmdString(),
+                    busPkt->getAddr() & (((ULL(1))<<48)-1), old_state);
+            if (old_state != new_state)
+                    DPRINTF(Cache, "Block for blk addr %x moving from "
+                            "state %i to %i\n",
+                            busPkt->getAddr() & (((ULL(1))<<48)-1),
+                            old_state, new_state);
+
+            tags->handleFill(blk, busPkt,
+                             new_state,
+                             writebacks, pkt);
+            //Free the packet
+            delete busPkt;
+
+            // Handle writebacks if needed
+            while (!writebacks.empty()){
+                Packet *wbPkt = writebacks.front();
+                memSidePort->sendAtomic(wbPkt);
+                writebacks.pop_front();
+                delete wbPkt;
             }
+                return lat + hitLatency;
+        } else {
+            return memSidePort->sendAtomic(pkt);
         }
     } else {
         // There was a cache hit.
@@ -651,13 +717,9 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update, CachePort
             writebacks.pop_front();
         }
 
-        if (update) {
-            hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
-        } else if (pkt->isWrite()) {
-            // Still need to change data in all locations.
-            return otherSidePort->sendAtomic(pkt);
-        }
-        return curTick + lat;
+        hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
+
+        return hitLatency;
     }
     fatal("Probe not handled.\n");
     return 0;
@@ -667,16 +729,27 @@ template<class TagStore, class Buffering, class Coherence>
 Tick
 Cache<TagStore,Buffering,Coherence>::snoopProbe(PacketPtr &pkt)
 {
-        Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
-        BlkType *blk = tags->findBlock(pkt);
-        MSHR *mshr = missQueue->findMSHR(blk_addr);
-        CacheBlk::State new_state = 0;
-        bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state);
-        if (satisfy) {
+    //Send a atomic (false) invalidate up if the protocol calls for it
+    coherence->propogateInvalidate(pkt, false);
+
+    Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
+    BlkType *blk = tags->findBlock(pkt);
+    MSHR *mshr = missQueue->findMSHR(blk_addr);
+    CacheBlk::State new_state = 0;
+    bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state);
+    if (satisfy) {
+        DPRINTF(Cache, "Cache snooped a %s request for addr %x and "
+                "now supplying data, new state is %i\n",
+                pkt->cmdString(), blk_addr, new_state);
+
             tags->handleSnoop(blk, new_state, pkt);
             return hitLatency;
-        }
-        tags->handleSnoop(blk, new_state);
-        return 0;
+    }
+    if (blk)
+        DPRINTF(Cache, "Cache snooped a %s request for addr %x, "
+                "new state is %i\n",
+                    pkt->cmdString(), blk_addr, new_state);
+    tags->handleSnoop(blk, new_state);
+    return 0;
 }