Port: Make getAddrRanges const

[gem5.git] / src / mem / cache / cache_impl.hh
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh

index d8630c1f5d6f71cb82d82e5d45736408f98165e7..f7901261ff6f7e238ac3d649a21853b2d73d5f09 100644 (file)
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -1,5 +1,18 @@
  /*
+ * Copyright (c) 2010-2012 ARM Limited
+ * All rights reserved.
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
   * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * Copyright (c) 2010 Advanced Micro Devices, Inc.
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
@@ -37,21 +50,22 @@
   * Cache definitions.
   */
  
-#include "base/fast_alloc.hh"
  #include "base/misc.hh"
  #include "base/range.hh"
  #include "base/types.hh"
+#include "debug/Cache.hh"
+#include "debug/CachePort.hh"
+#include "mem/cache/prefetch/base.hh"
  #include "mem/cache/blk.hh"
  #include "mem/cache/cache.hh"
  #include "mem/cache/mshr.hh"
-#include "mem/cache/prefetch/base.hh"
  #include "sim/sim_exit.hh"
  
  template<class TagStore>
-Cache<TagStore>::Cache(const Params *p, TagStore *tags, BasePrefetcher *pf)
+Cache<TagStore>::Cache(const Params *p, TagStore *tags)
      : BaseCache(p),
        tags(tags),
-      prefetcher(pf),
+      prefetcher(p->prefetcher),
        doFastWrites(true),
        prefetchOnAccess(p->prefetch_on_access)
  {
@@ -62,8 +76,6 @@ Cache<TagStore>::Cache(const Params *p, TagStore *tags, BasePrefetcher *pf)
                                    "CpuSidePort");
      memSidePort = new MemSidePort(p->name + "-mem_side_port", this,
                                    "MemSidePort");
-    cpuSidePort->setOtherPort(memSidePort);
-    memSidePort->setOtherPort(cpuSidePort);
  
      tags->setCache(this);
      if (prefetcher)
@@ -76,40 +88,8 @@ Cache<TagStore>::regStats()
  {
      BaseCache::regStats();
      tags->regStats(name());
-    if (prefetcher)
-        prefetcher->regStats(name());
  }
  
-template<class TagStore>
-Port *
-Cache<TagStore>::getPort(const std::string &if_name, int idx)
-{
-    if (if_name == "" || if_name == "cpu_side") {
-        return cpuSidePort;
-    } else if (if_name == "mem_side") {
-        return memSidePort;
-    } else if (if_name == "functional") {
-        CpuSidePort *funcPort =
-            new CpuSidePort(name() + "-cpu_side_funcport", this,
-                            "CpuSideFuncPort");
-        funcPort->setOtherPort(memSidePort);
-        return funcPort;
-    } else {
-        panic("Port name %s unrecognized\n", if_name);
-    }
-}
-
-template<class TagStore>
-void
-Cache<TagStore>::deletePortRefs(Port *p)
-{
-    if (cpuSidePort == p || memSidePort == p)
-        panic("Can only delete functional ports\n");
-
-    delete p;
-}
-
-
  template<class TagStore>
  void
  Cache<TagStore>::cmpAndSwap(BlkType *blk, PacketPtr pkt)
@@ -152,16 +132,18 @@ Cache<TagStore>::cmpAndSwap(BlkType *blk, PacketPtr pkt)
  
  template<class TagStore>
  void
-Cache<TagStore>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
+Cache<TagStore>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk,
+                                       bool deferred_response,
+                                       bool pending_downgrade)
  {
-    assert(blk);
+    assert(blk && blk->isValid());
      // Occasionally this is not true... if we are a lower-level cache
      // satisfying a string of Read and ReadEx requests from
      // upper-level caches, a Read will mark the block as shared but we
      // can satisfy a following ReadEx anyway since we can rely on the
      // Read requester(s) to have buffered the ReadEx snoop and to
      // invalidate their blocks after receiving them.
-    // assert(pkt->needsExclusive() ? blk->isWritable() : blk->isValid());
+    // assert(!pkt->needsExclusive() || blk->isWritable());
      assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize);
  
      // Check RMW operations first since both isRead() and
@@ -169,9 +151,9 @@ Cache<TagStore>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
      if (pkt->cmd == MemCmd::SwapReq) {
          cmpAndSwap(blk, pkt);
      } else if (pkt->isWrite()) {
-        blk->status |= BlkDirty;
          if (blk->checkWrite(pkt)) {
              pkt->writeDataToBlock(blk->data, blkSize);
+            blk->status |= BlkDirty;
          }
      } else if (pkt->isRead()) {
          if (pkt->isLLSC()) {
@@ -182,20 +164,50 @@ Cache<TagStore>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
              // special handling for coherent block requests from
              // upper-level caches
              if (pkt->needsExclusive()) {
-                // on ReadExReq we give up our copy
+                // if we have a dirty copy, make sure the recipient
+                // keeps it marked dirty
+                if (blk->isDirty()) {
+                    pkt->assertMemInhibit();
+                }
+                // on ReadExReq we give up our copy unconditionally
                  tags->invalidateBlk(blk);
+            } else if (blk->isWritable() && !pending_downgrade
+                       && !pkt->sharedAsserted()) {
+                // we can give the requester an exclusive copy (by not
+                // asserting shared line) on a read request if:
+                // - we have an exclusive copy at this level (& below)
+                // - we don't have a pending snoop from below
+                //   signaling another read request
+                // - no other cache above has a copy (otherwise it
+                //   would have asseretd shared line on request)
+                
+                if (blk->isDirty()) {
+                    // special considerations if we're owner:
+                    if (!deferred_response && !isTopLevel) {
+                        // if we are responding immediately and can
+                        // signal that we're transferring ownership
+                        // along with exclusivity, do so
+                        pkt->assertMemInhibit();
+                        blk->status &= ~BlkDirty;
+                    } else {
+                        // if we're responding after our own miss,
+                        // there's a window where the recipient didn't
+                        // know it was getting ownership and may not
+                        // have responded to snoops correctly, so we
+                        // can't pass off ownership *or* exclusivity
+                        pkt->assertShared();
+                    }
+                }
              } else {
-                // on ReadReq we create shareable copies here and in
-                // the requester
+                // otherwise only respond with a shared copy
                  pkt->assertShared();
-                blk->status &= ~BlkWritable;
              }
          }
      } else {
          // Not a read or write... must be an upgrade.  it's OK
          // to just ack those as long as we have an exclusive
          // copy at this level.
-        assert(pkt->cmd == MemCmd::UpgradeReq);
+        assert(pkt->isUpgrade());
          tags->invalidateBlk(blk);
      }
  }
@@ -210,9 +222,9 @@ Cache<TagStore>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
  
  template<class TagStore>
  void
-Cache<TagStore>::markInService(MSHR *mshr)
+Cache<TagStore>::markInService(MSHR *mshr, PacketPtr pkt)
  {
-    markInServiceInternal(mshr);
+    markInServiceInternal(mshr, pkt);
  #if 0
          if (mshr->originalCmd == MemCmd::HardPFReq) {
              DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n",
@@ -260,13 +272,23 @@ bool
  Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
                          int &lat, PacketList &writebacks)
  {
-    if (pkt->req->isUncacheable())  {
+    if (pkt->req->isUncacheable()) {
+        if (pkt->req->isClearLL()) {
+            tags->clearLocks();
+        } else if (pkt->isWrite()) {
+           blk = tags->findBlock(pkt->getAddr());
+           if (blk != NULL) {
+               tags->invalidateBlk(blk);
+           }
+        }
+
          blk = NULL;
          lat = hitLatency;
          return false;
      }
  
-    blk = tags->accessBlock(pkt->getAddr(), lat);
+    int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1;
+    blk = tags->accessBlock(pkt->getAddr(), lat, id);
  
      DPRINTF(Cache, "%s%s %x %s\n", pkt->cmdString(),
              pkt->req->isInstFetch() ? " (ifetch)" : "",
@@ -276,7 +298,7 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
  
          if (pkt->needsExclusive() ? blk->isWritable() : blk->isReadable()) {
              // OK to satisfy access
-            hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
+            incHitCount(pkt);
              satisfyCpuSideRequest(pkt, blk);
              return true;
          }
@@ -299,14 +321,18 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
                  incMissCount(pkt);
                  return false;
              }
-            tags->insertBlock(pkt->getAddr(), blk);
+            int id = pkt->req->masterId();
+            tags->insertBlock(pkt->getAddr(), blk, id);
              blk->status = BlkValid | BlkReadable;
          }
          std::memcpy(blk->data, pkt->getPtr<uint8_t>(), blkSize);
          blk->status |= BlkDirty;
+        if (pkt->isSupplyExclusive()) {
+            blk->status |= BlkWritable;
+        }
          // nothing else to do; writeback doesn't expect response
          assert(!pkt->needsResponse());
-        hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
+        incHitCount(pkt);
          return true;
      }
  
@@ -322,10 +348,10 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
  }
  
  
-class ForwardResponseRecord : public Packet::SenderState, public FastAlloc
+class ForwardResponseRecord : public Packet::SenderState
  {
      Packet::SenderState *prevSenderState;
-    int prevSrc;
+    PortID prevSrc;
  #ifndef NDEBUG
      BaseCache *cache;
  #endif
@@ -352,14 +378,31 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
  //@todo Add back in MemDebug Calls
  //    MemDebug::cacheAccess(pkt);
  
+
+    /// @todo temporary hack to deal with memory corruption issue until
+    /// 4-phase transactions are complete
+    for (int x = 0; x < pendingDelete.size(); x++)
+        delete pendingDelete[x];
+    pendingDelete.clear();
+
      // we charge hitLatency for doing just about anything here
-    Tick time =  curTick + hitLatency;
+    Tick time =  curTick() + hitLatency;
  
      if (pkt->isResponse()) {
          // must be cache-to-cache response from upper to lower level
          ForwardResponseRecord *rec =
              dynamic_cast<ForwardResponseRecord *>(pkt->senderState);
-        assert(rec != NULL);
+
+        if (rec == NULL) {
+            assert(pkt->cmd == MemCmd::HardPFResp);
+            // Check if it's a prefetch response and handle it. We shouldn't
+            // get any other kinds of responses without FRRs.
+            DPRINTF(Cache, "Got prefetch response from above for addr %#x\n",
+                    pkt->getAddr());
+            handleResponse(pkt);
+            return true;
+        }
+
          rec->restore(pkt, this);
          delete rec;
          memSidePort->respond(pkt, time);
@@ -380,16 +423,29 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
              Packet *snoopPkt = new Packet(pkt, true);  // clear flags
              snoopPkt->setExpressSnoop();
              snoopPkt->assertMemInhibit();
-            memSidePort->sendTiming(snoopPkt);
+            memSidePort->sendTimingReq(snoopPkt);
              // main memory will delete snoopPkt
          }
          // since we're the official target but we aren't responding,
          // delete the packet now.
-        delete pkt;
+
+        /// @todo nominally we should just delete the packet here,
+        /// however, until 4-phase stuff we can't because sending
+        /// cache is still relying on it
+        pendingDelete.push_back(pkt);
          return true;
      }
  
      if (pkt->req->isUncacheable()) {
+        if (pkt->req->isClearLL()) {
+            tags->clearLocks();
+        } else if (pkt->isWrite()) {
+            BlkType *blk = tags->findBlock(pkt->getAddr());
+            if (blk != NULL) {
+                tags->invalidateBlk(blk);
+            }
+        }
+
          // writes go in write buffer, reads use MSHR
          if (pkt->isWrite() && !pkt->isRead()) {
              allocateWriteBuffer(pkt, time, true);
@@ -434,28 +490,32 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
      bool needsResponse = pkt->needsResponse();
  
      if (satisfied) {
-        if (needsResponse) {
-            pkt->makeTimingResponse();
-            cpuSidePort->respond(pkt, curTick+lat);
-        } else {
-            delete pkt;
-        }
-
          if (prefetcher && (prefetchOnAccess || (blk && blk->wasPrefetched()))) {
              if (blk)
                  blk->status &= ~BlkHWPrefetched;
              next_pf_time = prefetcher->notify(pkt, time);
          }
+
+        if (needsResponse) {
+            pkt->makeTimingResponse();
+            cpuSidePort->respond(pkt, curTick()+lat);
+        } else {
+            /// @todo nominally we should just delete the packet here,
+            /// however, until 4-phase stuff we can't because sending
+            /// cache is still relying on it
+            pendingDelete.push_back(pkt);
+        }
      } else {
          // miss
  
-        Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
+        Addr blk_addr = blockAlign(pkt->getAddr());
          MSHR *mshr = mshrQueue.findMatch(blk_addr);
  
          if (mshr) {
              // MSHR hit
              //@todo remove hw_pf here
-            mshr_hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
+            assert(pkt->req->masterId() < system->maxMasters());
+            mshr_hits[pkt->cmdToIndex()][pkt->req->masterId()]++;
              if (mshr->threadNum != 0/*pkt->req->threadId()*/) {
                  mshr->threadNum = -1;
              }
@@ -464,13 +524,14 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
                  noTargetMSHR = mshr;
                  setBlocked(Blocked_NoTargets);
                  // need to be careful with this... if this mshr isn't
-                // ready yet (i.e. time > curTick_, we don't want to
+                // ready yet (i.e. time > curTick()_, we don't want to
                  // move it ahead of mshrs that are ready
                  // mshrQueue.moveToFront(mshr);
              }
          } else {
              // no MSHR
-            mshr_misses[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
+            assert(pkt->req->masterId() < system->maxMasters());
+            mshr_misses[pkt->cmdToIndex()][pkt->req->masterId()]++;
              // always mark as cache fill for now... if we implement
              // no-write-allocate or bypass accesses this will have to
              // be changed.
@@ -529,12 +590,12 @@ Cache<TagStore>::getBusPacket(PacketPtr cpu_pkt, BlkType *blk,
      bool blkValid = blk && blk->isValid();
  
      if (cpu_pkt->req->isUncacheable()) {
-        assert(blk == NULL);
+        //assert(blk == NULL);
          return NULL;
      }
  
-    if (!blkValid && (cpu_pkt->cmd == MemCmd::Writeback ||
-                      cpu_pkt->cmd == MemCmd::UpgradeReq)) {
+    if (!blkValid &&
+        (cpu_pkt->cmd == MemCmd::Writeback || cpu_pkt->isUpgrade())) {
          // Writebacks that weren't allocated in access() and upgrades
          // from upper-level caches that missed completely just go
          // through.
@@ -553,12 +614,12 @@ Cache<TagStore>::getBusPacket(PacketPtr cpu_pkt, BlkType *blk,
          // only reason to be here is that blk is shared
          // (read-only) and we need exclusive
          assert(needsExclusive && !blk->isWritable());
-        cmd = MemCmd::UpgradeReq;
+        cmd = cpu_pkt->isLLSC() ? MemCmd::SCUpgradeReq : MemCmd::UpgradeReq;
      } else {
          // block is invalid
          cmd = needsExclusive ? MemCmd::ReadExReq : MemCmd::ReadReq;
      }
-    PacketPtr pkt = new Packet(cpu_pkt->req, cmd, Packet::Broadcast, blkSize);
+    PacketPtr pkt = new Packet(cpu_pkt->req, cmd, blkSize);
  
      pkt->allocate();
      return pkt;
@@ -688,28 +749,57 @@ Cache<TagStore>::atomicAccess(PacketPtr pkt)
  
  template<class TagStore>
  void
-Cache<TagStore>::functionalAccess(PacketPtr pkt,
-                                  CachePort *incomingPort,
-                                  CachePort *otherSidePort)
+Cache<TagStore>::functionalAccess(PacketPtr pkt, bool fromCpuSide)
  {
-    Addr blk_addr = pkt->getAddr() & ~(blkSize - 1);
+    Addr blk_addr = blockAlign(pkt->getAddr());
      BlkType *blk = tags->findBlock(pkt->getAddr());
+    MSHR *mshr = mshrQueue.findMatch(blk_addr);
  
      pkt->pushLabel(name());
  
      CacheBlkPrintWrapper cbpw(blk);
-    bool done =
-        (blk && pkt->checkFunctional(&cbpw, blk_addr, blkSize, blk->data))
-        || incomingPort->checkFunctional(pkt)
+
+    // Note that just because an L2/L3 has valid data doesn't mean an
+    // L1 doesn't have a more up-to-date modified copy that still
+    // needs to be found.  As a result we always update the request if
+    // we have it, but only declare it satisfied if we are the owner.
+
+    // see if we have data at all (owned or otherwise)
+    bool have_data = blk && blk->isValid()
+        && pkt->checkFunctional(&cbpw, blk_addr, blkSize, blk->data);
+
+    // data we have is dirty if marked as such or if valid & ownership
+    // pending due to outstanding UpgradeReq
+    bool have_dirty =
+        have_data && (blk->isDirty() ||
+                      (mshr && mshr->inService && mshr->isPendingDirty()));
+
+    bool done = have_dirty
+        || cpuSidePort->checkFunctional(pkt)
          || mshrQueue.checkFunctional(pkt, blk_addr)
          || writeBuffer.checkFunctional(pkt, blk_addr)
-        || otherSidePort->checkFunctional(pkt);
+        || memSidePort->checkFunctional(pkt);
+
+    DPRINTF(Cache, "functional %s %x %s%s%s\n",
+            pkt->cmdString(), pkt->getAddr(),
+            (blk && blk->isValid()) ? "valid " : "",
+            have_data ? "data " : "", done ? "done " : "");
  
      // We're leaving the cache, so pop cache->name() label
      pkt->popLabel();
  
-    if (!done) {
-        otherSidePort->sendFunctional(pkt);
+    if (done) {
+        pkt->makeResponse();
+    } else {
+        // if it came as a request from the CPU side then make sure it
+        // continues towards the memory side
+        if (fromCpuSide) {
+            memSidePort->sendFunctional(pkt);
+        } else if (forwardSnoops && cpuSidePort->isSnooping()) {
+            // if it came from the memory side, it must be a snoop request
+            // and we should only forward it if we are forwarding snoops
+            cpuSidePort->sendFunctionalSnoop(pkt);
+        }
      }
  }
  
@@ -725,7 +815,7 @@ template<class TagStore>
  void
  Cache<TagStore>::handleResponse(PacketPtr pkt)
  {
-    Tick time = curTick + hitLatency;
+    Tick time = curTick() + hitLatency;
      MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
      bool is_error = pkt->isError();
  
@@ -757,14 +847,16 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
      MSHR::Target *initial_tgt = mshr->getTarget();
      BlkType *blk = tags->findBlock(pkt->getAddr());
      int stats_cmd_idx = initial_tgt->pkt->cmdToIndex();
-    Tick miss_latency = curTick - initial_tgt->recvTime;
+    Tick miss_latency = curTick() - initial_tgt->recvTime;
      PacketList writebacks;
  
      if (pkt->req->isUncacheable()) {
-        mshr_uncacheable_lat[stats_cmd_idx][0/*pkt->req->threadId()*/] +=
+        assert(pkt->req->masterId() < system->maxMasters());
+        mshr_uncacheable_lat[stats_cmd_idx][pkt->req->masterId()] +=
              miss_latency;
      } else {
-        mshr_miss_latency[stats_cmd_idx][0/*pkt->req->threadId()*/] +=
+        assert(pkt->req->masterId() < system->maxMasters());
+        mshr_miss_latency[stats_cmd_idx][pkt->req->masterId()] +=
              miss_latency;
      }
  
@@ -796,7 +888,8 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
            case MSHR::Target::FromCPU:
              Tick completion_time;
              if (is_fill) {
-                satisfyCpuSideRequest(target->pkt, blk);
+                satisfyCpuSideRequest(target->pkt, blk,
+                                      true, mshr->hasPostDowngrade());
                  // How many bytes past the first request is this one
                  int transfer_offset =
                      target->pkt->getOffset(blkSize) - initial_offset;
@@ -809,8 +902,17 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
                      (transfer_offset ? pkt->finishTime : pkt->firstWordTime);
  
                  assert(!target->pkt->req->isUncacheable());
-                missLatency[target->pkt->cmdToIndex()][0/*pkt->req->threadId()*/] +=
+
+                assert(target->pkt->req->masterId() < system->maxMasters());
+                missLatency[target->pkt->cmdToIndex()][target->pkt->req->masterId()] +=
                      completion_time - target->recvTime;
+            } else if (pkt->cmd == MemCmd::UpgradeFailResp) {
+                // failed StoreCond upgrade
+                assert(target->pkt->cmd == MemCmd::StoreCondReq ||
+                       target->pkt->cmd == MemCmd::StoreCondFailReq ||
+                       target->pkt->cmd == MemCmd::SCUpgradeFailReq);
+                completion_time = tags->getHitLatency() + pkt->finishTime;
+                target->pkt->req->setExtraData(0);
              } else {
                  // not a cache fill, just forwarding response
                  completion_time = tags->getHitLatency() + pkt->finishTime;
@@ -822,12 +924,11 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
              // if this packet is an error copy that to the new packet
              if (is_error)
                  target->pkt->copyError(pkt);
-            if (pkt->isInvalidate()) {
+            if (target->pkt->cmd == MemCmd::ReadResp &&
+                (pkt->isInvalidate() || mshr->hasPostInvalidate())) {
                  // If intermediate cache got ReadRespWithInvalidate,
                  // propagate that.  Response should not have
                  // isInvalidate() set otherwise.
-                assert(target->pkt->cmd == MemCmd::ReadResp);
-                assert(pkt->cmd == MemCmd::ReadRespWithInvalidate);
                  target->pkt->cmd = MemCmd::ReadRespWithInvalidate;
              }
              cpuSidePort->respond(target->pkt, completion_time);
@@ -846,8 +947,9 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
              assert(!is_error);
              // response to snoop request
              DPRINTF(Cache, "processing deferred snoop...\n");
+            assert(!(pkt->isInvalidate() && !mshr->hasPostInvalidate()));
              handleSnoop(target->pkt, blk, true, true,
-                        mshr->pendingInvalidate || pkt->isInvalidate());
+                        mshr->hasPostInvalidate());
              break;
  
            default:
@@ -857,14 +959,20 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
          mshr->popTarget();
      }
  
-    if (pkt->isInvalidate()) {
-        tags->invalidateBlk(blk);
+    if (blk) {
+        if (pkt->isInvalidate() || mshr->hasPostInvalidate()) {
+            tags->invalidateBlk(blk);
+        } else if (mshr->hasPostDowngrade()) {
+            blk->status &= ~BlkWritable;
+        }
      }
  
      if (mshr->promoteDeferredTargets()) {
          // avoid later read getting stale data while write miss is
          // outstanding.. see comment in timingAccess()
-        blk->status &= ~BlkReadable;
+        if (blk) {
+            blk->status &= ~BlkReadable;
+        }
          MSHRQueue *mq = mshr->queue;
          mq->markPending(mshr);
          requestMemSideBus((RequestCause)mq->index, pkt->finishTime);
@@ -886,6 +994,7 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
          if (blk->isDirty()) {
              allocateWriteBuffer(writebackBlk(blk), time, true);
          }
+        blk->status &= ~BlkValid;
          tags->invalidateBlk(blk);
      }
  
@@ -901,11 +1010,15 @@ Cache<TagStore>::writebackBlk(BlkType *blk)
  {
      assert(blk && blk->isValid() && blk->isDirty());
  
-    writebacks[0/*pkt->req->threadId()*/]++;
+    writebacks[Request::wbMasterId]++;
  
      Request *writebackReq =
-        new Request(tags->regenerateBlkAddr(blk->tag, blk->set), blkSize, 0);
-    PacketPtr writeback = new Packet(writebackReq, MemCmd::Writeback, -1);
+        new Request(tags->regenerateBlkAddr(blk->tag, blk->set), blkSize, 0,
+                Request::wbMasterId);
+    PacketPtr writeback = new Packet(writebackReq, MemCmd::Writeback);
+    if (blk->isWritable()) {
+        writeback->setSupplyExclusive();
+    }
      writeback->allocate();
      std::memcpy(writeback->getPtr<uint8_t>(), blk->data, blkSize);
  
@@ -976,20 +1089,34 @@ Cache<TagStore>::handleFill(PacketPtr pkt, BlkType *blk,
              tempBlock->tag = tags->extractTag(addr);
              DPRINTF(Cache, "using temp block for %x\n", addr);
          } else {
-            tags->insertBlock(addr, blk);
+            int id = pkt->req->masterId();
+            tags->insertBlock(pkt->getAddr(), blk, id);
          }
+
+        // starting from scratch with a new block
+        blk->status = 0;
      } else {
          // existing block... probably an upgrade
          assert(blk->tag == tags->extractTag(addr));
          // either we're getting new data or the block should already be valid
          assert(pkt->hasData() || blk->isValid());
+        // don't clear block status... if block is already dirty we
+        // don't want to lose that
      }
  
+    blk->status |= BlkValid | BlkReadable;
+
      if (!pkt->sharedAsserted()) {
-        blk->status = BlkValid | BlkReadable | BlkWritable;
-    } else {
-        assert(!pkt->needsExclusive());
-        blk->status = BlkValid | BlkReadable;
+        blk->status |= BlkWritable;
+        // If we got this via cache-to-cache transfer (i.e., from a
+        // cache that was an owner) and took away that owner's copy,
+        // then we need to write it back.  Normally this happens
+        // anyway as a side effect of getting a copy to write it, but
+        // there are cases (such as failed store conditionals or
+        // compare-and-swaps) where we'll demand an exclusive copy but
+        // end up not writing it.
+        if (pkt->memInhibitAsserted())
+            blk->status |= BlkDirty;
      }
  
      DPRINTF(Cache, "Block addr %x moving from state %i to %i\n",
@@ -1020,14 +1147,8 @@ doTimingSupplyResponse(PacketPtr req_pkt, uint8_t *blk_data,
  {
      // timing-mode snoop responses require a new packet, unless we
      // already made a copy...
-    PacketPtr pkt = already_copied ? req_pkt : new Packet(req_pkt, true);
-    if (!req_pkt->isInvalidate()) {
-        // note that we're ignoring the shared flag on req_pkt... it's
-        // basically irrelevant, as we'll always assert shared unless
-        // it's an exclusive request, in which case the shared line
-        // should never be asserted1
-        pkt->assertShared();
-    }
+    PacketPtr pkt = already_copied ? req_pkt : new Packet(req_pkt);
+    assert(req_pkt->isInvalidate() || pkt->sharedAsserted());
      pkt->allocate();
      pkt->makeTimingResponse();
      if (pkt->isRead()) {
@@ -1043,7 +1164,7 @@ doTimingSupplyResponse(PacketPtr req_pkt, uint8_t *blk_data,
          // invalidate it.
          pkt->cmd = MemCmd::ReadRespWithInvalidate;
      }
-    memSidePort->respond(pkt, curTick + hitLatency);
+    memSidePort->respond(pkt, curTick() + hitLatency);
  }
  
  template<class TagStore>
@@ -1058,36 +1179,39 @@ Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
      assert(!(pending_inval && !is_deferred));
      assert(pkt->isRequest());
  
+    // the packet may get modified if we or a forwarded snooper
+    // responds in atomic mode, so remember a few things about the
+    // original packet up front
+    bool invalidate = pkt->isInvalidate();
+    bool M5_VAR_USED needs_exclusive = pkt->needsExclusive();
+
      if (forwardSnoops) {
          // first propagate snoop upward to see if anyone above us wants to
          // handle it.  save & restore packet src since it will get
          // rewritten to be relative to cpu-side bus (if any)
          bool alreadyResponded = pkt->memInhibitAsserted();
          if (is_timing) {
-            Packet *snoopPkt = new Packet(pkt, true);  // clear flags
-            snoopPkt->setExpressSnoop();
-            snoopPkt->senderState = new ForwardResponseRecord(pkt, this);
-            cpuSidePort->sendTiming(snoopPkt);
-            if (snoopPkt->memInhibitAsserted()) {
+            Packet snoopPkt(pkt, true);  // clear flags
+            snoopPkt.setExpressSnoop();
+            snoopPkt.senderState = new ForwardResponseRecord(pkt, this);
+            cpuSidePort->sendTimingSnoopReq(&snoopPkt);
+            if (snoopPkt.memInhibitAsserted()) {
                  // cache-to-cache response from some upper cache
                  assert(!alreadyResponded);
                  pkt->assertMemInhibit();
              } else {
-                delete snoopPkt->senderState;
+                delete snoopPkt.senderState;
              }
-            if (snoopPkt->sharedAsserted()) {
+            if (snoopPkt.sharedAsserted()) {
                  pkt->assertShared();
              }
-            delete snoopPkt;
          } else {
-            int origSrc = pkt->getSrc();
-            cpuSidePort->sendAtomic(pkt);
+            cpuSidePort->sendAtomicSnoop(pkt);
              if (!alreadyResponded && pkt->memInhibitAsserted()) {
                  // cache-to-cache response from some upper cache:
                  // forward response to original requester
                  assert(pkt->isResponse());
              }
-            pkt->setSrc(origSrc);
          }
      }
  
@@ -1100,10 +1224,9 @@ Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
      // and then do it later
      bool respond = blk->isDirty() && pkt->needsResponse();
      bool have_exclusive = blk->isWritable();
-    bool invalidate = pkt->isInvalidate();
  
-    if (pkt->isRead() && !pkt->isInvalidate()) {
-        assert(!pkt->needsExclusive());
+    if (pkt->isRead() && !invalidate) {
+        assert(!needs_exclusive);
          pkt->assertShared();
          int bits_to_clear = BlkWritable;
          const bool haveOwnershipState = true; // for now
@@ -1162,7 +1285,7 @@ Cache<TagStore>::snoopTiming(PacketPtr pkt)
  
      BlkType *blk = tags->findBlock(pkt->getAddr());
  
-    Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
+    Addr blk_addr = blockAlign(pkt->getAddr());
      MSHR *mshr = mshrQueue.findMatch(blk_addr);
  
      // Let the MSHR itself track the snoop and decide whether we want
@@ -1182,8 +1305,10 @@ Cache<TagStore>::snoopTiming(PacketPtr pkt)
                  pkt->getAddr());
  
          //Look through writebacks for any non-uncachable writes, use that
-        for (int i = 0; i < writebacks.size(); i++) {
-            mshr = writebacks[i];
+        if (writebacks.size()) {
+            // We should only ever find a single match
+            assert(writebacks.size() == 1);
+            mshr = writebacks[0];
              assert(!mshr->isUncacheable());
              assert(mshr->getNumTargets() == 1);
              PacketPtr wb_pkt = mshr->getTarget()->pkt;
@@ -1193,6 +1318,8 @@ Cache<TagStore>::snoopTiming(PacketPtr pkt)
              pkt->assertMemInhibit();
              if (!pkt->needsExclusive()) {
                  pkt->assertShared();
+                // the writeback is no longer the exclusive copy in the system
+                wb_pkt->clearSupplyExclusive();
              } else {
                  // if we're not asserting the shared line, we need to
                  // invalidate our copy.  we'll do that below as long as
@@ -1207,19 +1334,25 @@ Cache<TagStore>::snoopTiming(PacketPtr pkt)
                  markInService(mshr);
                  delete wb_pkt;
              }
-
-            // If this was a shared writeback, there may still be
-            // other shared copies above that require invalidation.
-            // We could be more selective and return here if the
-            // request is non-exclusive or if the writeback is
-            // exclusive.
-            break;
-        }
+        } // writebacks.size()
      }
  
+    // If this was a shared writeback, there may still be
+    // other shared copies above that require invalidation.
+    // We could be more selective and return here if the
+    // request is non-exclusive or if the writeback is
+    // exclusive.
      handleSnoop(pkt, blk, true, false, false);
  }
  
+template<class TagStore>
+bool
+Cache<TagStore>::CpuSidePort::recvTimingSnoopResp(PacketPtr pkt)
+{
+    // Express snoop responses from master to slave, e.g., from L1 to L2
+    cache->timingAccess(pkt);
+    return true;
+}
  
  template<class TagStore>
  Tick
@@ -1297,17 +1430,23 @@ Cache<TagStore>::getNextMSHR()
  
      // fall through... no pending requests.  Try a prefetch.
      assert(!miss_mshr && !write_mshr);
-    if (!mshrQueue.isFull()) {
+    if (prefetcher && !mshrQueue.isFull()) {
          // If we have a miss queue slot, we can try a prefetch
          PacketPtr pkt = prefetcher->getPacket();
          if (pkt) {
              Addr pf_addr = blockAlign(pkt->getAddr());
-            if (!tags->findBlock(pf_addr) && !mshrQueue.findMatch(pf_addr)) {
+            if (!tags->findBlock(pf_addr) && !mshrQueue.findMatch(pf_addr) &&
+                                             !writeBuffer.findMatch(pf_addr)) {
                  // Update statistic on number of prefetches issued
                  // (hwpf_mshr_misses)
-                mshr_misses[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
+                assert(pkt->req->masterId() < system->maxMasters());
+                mshr_misses[pkt->cmdToIndex()][pkt->req->masterId()]++;
                  // Don't request bus, since we already have it
-                return allocateMissBuffer(pkt, curTick, false);
+                return allocateMissBuffer(pkt, curTick(), false);
+            } else {
+                // free the request and packet
+                delete pkt->req;
+                delete pkt;
              }
          }
      }
@@ -1330,12 +1469,50 @@ Cache<TagStore>::getTimingPacket()
      PacketPtr tgt_pkt = mshr->getTarget()->pkt;
      PacketPtr pkt = NULL;
  
-    if (mshr->isForwardNoResponse()) {
+    if (tgt_pkt->cmd == MemCmd::SCUpgradeFailReq ||
+        tgt_pkt->cmd == MemCmd::StoreCondFailReq) {
+        // SCUpgradeReq or StoreCondReq saw invalidation while queued
+        // in MSHR, so now that we are getting around to processing
+        // it, just treat it as if we got a failure response
+        pkt = new Packet(tgt_pkt);
+        pkt->cmd = MemCmd::UpgradeFailResp;
+        pkt->senderState = mshr;
+        pkt->firstWordTime = pkt->finishTime = curTick();
+        handleResponse(pkt);
+        return NULL;
+    } else if (mshr->isForwardNoResponse()) {
          // no response expected, just forward packet as it is
          assert(tags->findBlock(mshr->addr) == NULL);
          pkt = tgt_pkt;
      } else {
          BlkType *blk = tags->findBlock(mshr->addr);
+
+        if (tgt_pkt->cmd == MemCmd::HardPFReq) {
+            // It might be possible for a writeback to arrive between
+            // the time the prefetch is placed in the MSHRs and when
+            // it's selected to send... if so, this assert will catch
+            // that, and then we'll have to figure out what to do.
+            assert(blk == NULL);
+
+            // We need to check the caches above us to verify that they don't have
+            // a copy of this block in the dirty state at the moment. Without this
+            // check we could get a stale copy from memory  that might get used
+            // in place of the dirty one.
+            PacketPtr snoop_pkt = new Packet(tgt_pkt, true);
+            snoop_pkt->setExpressSnoop();
+            snoop_pkt->senderState = mshr;
+            cpuSidePort->sendTimingSnoopReq(snoop_pkt);
+
+            if (snoop_pkt->memInhibitAsserted()) {
+                markInService(mshr, snoop_pkt);
+                DPRINTF(Cache, "Upward snoop of prefetch for addr %#x hit\n",
+                        tgt_pkt->getAddr());
+                delete snoop_pkt;
+                return NULL;
+            }
+            delete snoop_pkt;
+        }
+
          pkt = getBusPacket(tgt_pkt, blk, mshr->needsExclusive());
  
          mshr->isForward = (pkt == NULL);
@@ -1373,6 +1550,35 @@ Cache<TagStore>::nextMSHRReadyTime()
      return nextReady;
  }
  
+template<class TagStore>
+void
+Cache<TagStore>::serialize(std::ostream &os)
+{
+    warn("*** Creating checkpoints with caches is not supported. ***\n");
+    warn("    Remove any caches before taking checkpoints\n");
+    warn("    This checkpoint will not restore correctly and dirty data in "
+         "the cache will be lost!\n");
+
+    // Since we don't write back the data dirty in the caches to the physical
+    // memory if caches exist in the system we won't be able to restore
+    // from the checkpoint as any data dirty in the caches will be lost.
+
+    bool bad_checkpoint = true;
+    SERIALIZE_SCALAR(bad_checkpoint);
+}
+
+template<class TagStore>
+void
+Cache<TagStore>::unserialize(Checkpoint *cp, const std::string &section)
+{
+    bool bad_checkpoint;
+    UNSERIALIZE_SCALAR(bad_checkpoint);
+    if (bad_checkpoint) {
+        fatal("Restoring from checkpoints with caches is not supported in the "
+              "classic memory system. Please remove any caches before taking "
+              "checkpoints.\n");
+    }
+}
  
  ///////////////
  //
@@ -1381,54 +1587,48 @@ Cache<TagStore>::nextMSHRReadyTime()
  ///////////////
  
  template<class TagStore>
-void
-Cache<TagStore>::CpuSidePort::
-getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
+AddrRangeList
+Cache<TagStore>::CpuSidePort::getAddrRanges() const
  {
-    // CPU side port doesn't snoop; it's a target only.  It can
-    // potentially respond to any address.
-    snoop = false;
-    resp.push_back(myCache()->getAddrRange());
+    return cache->getAddrRanges();
  }
  
-
  template<class TagStore>
  bool
-Cache<TagStore>::CpuSidePort::recvTiming(PacketPtr pkt)
+Cache<TagStore>::CpuSidePort::recvTimingReq(PacketPtr pkt)
  {
-    // illegal to block responses... can lead to deadlock
-    if (pkt->isRequest() && !pkt->memInhibitAsserted() && blocked) {
+    // always let inhibited requests through even if blocked
+    if (!pkt->memInhibitAsserted() && blocked) {
          DPRINTF(Cache,"Scheduling a retry while blocked\n");
          mustSendRetry = true;
          return false;
      }
  
-    myCache()->timingAccess(pkt);
+    cache->timingAccess(pkt);
      return true;
  }
  
-
  template<class TagStore>
  Tick
  Cache<TagStore>::CpuSidePort::recvAtomic(PacketPtr pkt)
  {
-    return myCache()->atomicAccess(pkt);
+    // atomic request
+    return cache->atomicAccess(pkt);
  }
  
-
  template<class TagStore>
  void
  Cache<TagStore>::CpuSidePort::recvFunctional(PacketPtr pkt)
  {
-    myCache()->functionalAccess(pkt, this, otherPort);
+    // functional request
+    cache->functionalAccess(pkt, true);
  }
  
-
  template<class TagStore>
  Cache<TagStore>::
  CpuSidePort::CpuSidePort(const std::string &_name, Cache<TagStore> *_cache,
                           const std::string &_label)
-    : BaseCache::CachePort(_name, _cache, _label)
+    : BaseCache::CacheSlavePort(_name, _cache, _label), cache(_cache)
  {
  }
  
@@ -1438,21 +1638,9 @@ CpuSidePort::CpuSidePort(const std::string &_name, Cache<TagStore> *_cache,
  //
  ///////////////
  
-template<class TagStore>
-void
-Cache<TagStore>::MemSidePort::
-getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
-{
-    // Memory-side port always snoops, but never passes requests
-    // through to targets on the cpu side (so we don't add anything to
-    // the address range list).
-    snoop = true;
-}
-
-
  template<class TagStore>
  bool
-Cache<TagStore>::MemSidePort::recvTiming(PacketPtr pkt)
+Cache<TagStore>::MemSidePort::recvTimingResp(PacketPtr pkt)
  {
      // this needs to be fixed so that the cache updates the mshr and sends the
      // packet back out on the link, but it probably won't happen so until this
@@ -1460,60 +1648,48 @@ Cache<TagStore>::MemSidePort::recvTiming(PacketPtr pkt)
      if (pkt->wasNacked())
          panic("Need to implement cache resending nacked packets!\n");
  
-    if (pkt->isRequest() && blocked) {
-        DPRINTF(Cache,"Scheduling a retry while blocked\n");
-        mustSendRetry = true;
-        return false;
-    }
-
-    if (pkt->isResponse()) {
-        myCache()->handleResponse(pkt);
-    } else {
-        myCache()->snoopTiming(pkt);
-    }
+    cache->handleResponse(pkt);
      return true;
  }
  
+// Express snooping requests to memside port
+template<class TagStore>
+void
+Cache<TagStore>::MemSidePort::recvTimingSnoopReq(PacketPtr pkt)
+{
+    // handle snooping requests
+    cache->snoopTiming(pkt);
+}
  
  template<class TagStore>
  Tick
-Cache<TagStore>::MemSidePort::recvAtomic(PacketPtr pkt)
+Cache<TagStore>::MemSidePort::recvAtomicSnoop(PacketPtr pkt)
  {
-    // in atomic mode, responses go back to the sender via the
-    // function return from sendAtomic(), not via a separate
-    // sendAtomic() from the responder.  Thus we should never see a
-    // response packet in recvAtomic() (anywhere, not just here).
-    assert(!pkt->isResponse());
-    return myCache()->snoopAtomic(pkt);
+    // atomic snoop
+    return cache->snoopAtomic(pkt);
  }
  
-
  template<class TagStore>
  void
-Cache<TagStore>::MemSidePort::recvFunctional(PacketPtr pkt)
+Cache<TagStore>::MemSidePort::recvFunctionalSnoop(PacketPtr pkt)
  {
-    myCache()->functionalAccess(pkt, this, otherPort);
+    // functional snoop (note that in contrast to atomic we don't have
+    // a specific functionalSnoop method, as they have the same
+    // behaviour regardless)
+    cache->functionalAccess(pkt, false);
  }
  
-
-
  template<class TagStore>
  void
-Cache<TagStore>::MemSidePort::sendPacket()
+Cache<TagStore>::MemSidePacketQueue::sendDeferredPacket()
  {
-    // if we have responses that are ready, they take precedence
+    // if we have a response packet waiting we have to start with that
      if (deferredPacketReady()) {
-        bool success = sendTiming(transmitList.front().pkt);
-
-        if (success) {
-            //send successful, remove packet
-            transmitList.pop_front();
-        }
-
-        waitingOnRetry = !success;
+        // use the normal approach from the timing port
+        trySendTiming();
      } else {
-        // check for non-response packets (requests & writebacks)
-        PacketPtr pkt = myCache()->getTimingPacket();
+        // check for request packets (requests & writebacks)
+        PacketPtr pkt = cache.getTimingPacket();
          if (pkt == NULL) {
              // can happen if e.g. we attempt a writeback and fail, but
              // before the retry, the writeback is eliminated because
@@ -1522,65 +1698,40 @@ Cache<TagStore>::MemSidePort::sendPacket()
          } else {
              MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
  
-            bool success = sendTiming(pkt);
+            waitingOnRetry = !masterPort.sendTimingReq(pkt);
  
-            waitingOnRetry = !success;
              if (waitingOnRetry) {
                  DPRINTF(CachePort, "now waiting on a retry\n");
                  if (!mshr->isForwardNoResponse()) {
+                    // we are awaiting a retry, but we
+                    // delete the packet and will be creating a new packet
+                    // when we get the opportunity
                      delete pkt;
                  }
+                // note that we have now masked any requestBus and
+                // schedSendEvent (we will wait for a retry before
+                // doing anything), and this is so even if we do not
+                // care about this packet and might override it before
+                // it gets retried
              } else {
-                myCache()->markInService(mshr);
+                cache.markInService(mshr, pkt);
              }
          }
      }
  
-
-    // tried to send packet... if it was successful (no retry), see if
-    // we need to rerequest bus or not
+    // if we succeeded and are not waiting for a retry, schedule the
+    // next send, not only looking at the response transmit list, but
+    // also considering when the next MSHR is ready
      if (!waitingOnRetry) {
-        Tick nextReady = std::min(deferredPacketReadyTime(),
-                                  myCache()->nextMSHRReadyTime());
-        // @TODO: need to facotr in prefetch requests here somehow
-        if (nextReady != MaxTick) {
-            DPRINTF(CachePort, "more packets to send @ %d\n", nextReady);
-            schedule(sendEvent, std::max(nextReady, curTick + 1));
-        } else {
-            // no more to send right now: if we're draining, we may be done
-            if (drainEvent) {
-                drainEvent->process();
-                drainEvent = NULL;
-            }
-        }
+        scheduleSend(cache.nextMSHRReadyTime());
      }
  }
  
-template<class TagStore>
-void
-Cache<TagStore>::MemSidePort::recvRetry()
-{
-    assert(waitingOnRetry);
-    sendPacket();
-}
-
-
-template<class TagStore>
-void
-Cache<TagStore>::MemSidePort::processSendEvent()
-{
-    assert(!waitingOnRetry);
-    sendPacket();
-}
-
-
  template<class TagStore>
  Cache<TagStore>::
  MemSidePort::MemSidePort(const std::string &_name, Cache<TagStore> *_cache,
                           const std::string &_label)
-    : BaseCache::CachePort(_name, _cache, _label)
+    : BaseCache::CacheMasterPort(_name, _cache, _queue),
+      _queue(*_cache, *this, _label), cache(_cache)
  {
-    // override default send event from SimpleTimingPort
-    delete sendEvent;
-    sendEvent = new SendEvent(this);
  }