cpu, mem: Make software prefetches non-blocking

author Curtis Dunham <Curtis.Dunham@arm.com>

Tue, 13 May 2014 17:20:49 +0000 (12:20 -0500)

committer Curtis Dunham <Curtis.Dunham@arm.com>

Tue, 13 May 2014 17:20:49 +0000 (12:20 -0500)
author Curtis Dunham <Curtis.Dunham@arm.com>
Tue, 13 May 2014 17:20:49 +0000 (12:20 -0500)
committer Curtis Dunham <Curtis.Dunham@arm.com>
Tue, 13 May 2014 17:20:49 +0000 (12:20 -0500)
diff --git a/src/mem/cache/blk.hh b/src/mem/cache/blk.hh

index d38281e48d25403540e6800ed6132c498421bf28..626b4818dc5f8c622a25fac478cbcc95ff2d2bbe 100644 (file)
--- a/src/mem/cache/blk.hh
+++ b/src/mem/cache/blk.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2012-2013 ARM Limited
+ * Copyright (c) 2012-2014 ARM Limited
   * All rights reserved.
   *
   * The license below extends only to copyright in the software and shall
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh

index 3d7fc8fe3bd4e509d682cc19a3e960edf9e53e18..0ee1e353a09479dcc8f6da36ee77f7b84b8d94cd 100644 (file)
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2012-2013 ARM Limited
+ * Copyright (c) 2012-2014 ARM Limited
   * All rights reserved.
   *
   * The license below extends only to copyright in the software and shall
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh

index 34dacdf9f1b220428954dcd842af2888c1511f6c..91cb5a4e3ceae665e124a07280ab7d81a5883032 100644 (file)
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -473,8 +473,15 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
          // @todo: someone should pay for this
          pkt->busFirstWordDelay = pkt->busLastWordDelay = 0;
  
-        // writes go in write buffer, reads use MSHR
-        if (pkt->isWrite() && !pkt->isRead()) {
+        // writes go in write buffer, reads use MSHR,
+        // prefetches are acknowledged (responded to) and dropped
+        if (pkt->cmd.isPrefetch()) {
+            // prefetching (cache loading) uncacheable data is nonsensical
+            pkt->makeTimingResponse();
+            std::memset(pkt->getPtr<uint8_t>(), 0xFF, pkt->getSize());
+            cpuSidePort->schedTimingResp(pkt, clockEdge(hitLatency));
+            return true;
+        } else if (pkt->isWrite() && !pkt->isRead()) {
              allocateWriteBuffer(pkt, time, true);
          } else {
              allocateUncachedReadBuffer(pkt, time, true);
@@ -521,7 +528,10 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
          if (prefetcher && (prefetchOnAccess || (blk && blk->wasPrefetched()))) {
              if (blk)
                  blk->status &= ~BlkHWPrefetched;
-            next_pf_time = prefetcher->notify(pkt, time);
+
+            // Don't notify on SWPrefetch
+            if (!pkt->cmd.isSWPrefetch())
+                next_pf_time = prefetcher->notify(pkt, time);
          }
  
          if (needsResponse) {
@@ -544,36 +554,80 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
          Addr blk_addr = blockAlign(pkt->getAddr());
          MSHR *mshr = mshrQueue.findMatch(blk_addr, pkt->isSecure());
  
+        // Software prefetch handling:
+        // To keep the core from waiting on data it won't look at
+        // anyway, send back a response with dummy data. Miss handling
+        // will continue asynchronously. Unfortunately, the core will
+        // insist upon freeing original Packet/Request, so we have to
+        // create a new pair with a different lifecycle. Note that this
+        // processing happens before any MSHR munging on the behalf of
+        // this request because this new Request will be the one stored
+        // into the MSHRs, not the original.
+        if (pkt->cmd.isSWPrefetch() && isTopLevel) {
+            assert(needsResponse);
+            assert(pkt->req->hasPaddr());
+
+            // There's no reason to add a prefetch as an additional target
+            // to an existing MSHR.  If an outstanding request is already
+            // in progress, there is nothing for the prefetch to do.
+            // If this is the case, we don't even create a request at all.
+            PacketPtr pf = mshr ? NULL : new Packet(pkt);
+
+            if (pf) {
+                pf->req = new Request(pkt->req->getPaddr(),
+                                      pkt->req->getSize(),
+                                      pkt->req->getFlags(),
+                                      pkt->req->masterId());
+                // The core will clean up prior senderState; we need our own.
+                pf->senderState = NULL;
+            }
+
+            pkt->makeTimingResponse();
+            // for debugging, set all the bits in the response data
+            // (also keeps valgrind from complaining when debugging settings
+            //  print out instruction results)
+            std::memset(pkt->getPtr<uint8_t>(), 0xFF, pkt->getSize());
+            cpuSidePort->schedTimingResp(pkt, clockEdge(lat));
+
+            pkt = pf;
+        }
+
          if (mshr) {
              /// MSHR hit
              /// @note writebacks will be checked in getNextMSHR()
              /// for any conflicting requests to the same block
  
              //@todo remove hw_pf here
-            assert(pkt->req->masterId() < system->maxMasters());
-            mshr_hits[pkt->cmdToIndex()][pkt->req->masterId()]++;
-            if (mshr->threadNum != 0/*pkt->req->threadId()*/) {
-                mshr->threadNum = -1;
-            }
-            mshr->allocateTarget(pkt, time, order++);
-            if (mshr->getNumTargets() == numTarget) {
-                noTargetMSHR = mshr;
-                setBlocked(Blocked_NoTargets);
-                // need to be careful with this... if this mshr isn't
-                // ready yet (i.e. time > curTick()_, we don't want to
-                // move it ahead of mshrs that are ready
-                // mshrQueue.moveToFront(mshr);
-            }
  
-            // We should call the prefetcher reguardless if the request is
-            // satisfied or not, reguardless if the request is in the MSHR or
-            // not.  The request could be a ReadReq hit, but still not
-            // satisfied (potentially because of a prior write to the same
-            // cache line.  So, even when not satisfied, tehre is an MSHR
-            // already allocated for this, we need to let the prefetcher know
-            // about the request
-            if (prefetcher) {
-                next_pf_time = prefetcher->notify(pkt, time);
+            // Coalesce unless it was a software prefetch (see above).
+            if (pkt) {
+                assert(pkt->req->masterId() < system->maxMasters());
+                mshr_hits[pkt->cmdToIndex()][pkt->req->masterId()]++;
+                if (mshr->threadNum != 0/*pkt->req->threadId()*/) {
+                    mshr->threadNum = -1;
+                }
+                mshr->allocateTarget(pkt, time, order++);
+                if (mshr->getNumTargets() == numTarget) {
+                    noTargetMSHR = mshr;
+                    setBlocked(Blocked_NoTargets);
+                    // need to be careful with this... if this mshr isn't
+                    // ready yet (i.e. time > curTick()), we don't want to
+                    // move it ahead of mshrs that are ready
+                    // mshrQueue.moveToFront(mshr);
+                }
+
+                // We should call the prefetcher reguardless if the request is
+                // satisfied or not, reguardless if the request is in the MSHR or
+                // not.  The request could be a ReadReq hit, but still not
+                // satisfied (potentially because of a prior write to the same
+                // cache line.  So, even when not satisfied, tehre is an MSHR
+                // already allocated for this, we need to let the prefetcher know
+                // about the request
+                if (prefetcher) {
+                    // Don't notify on SWPrefetch
+                    if (!pkt->cmd.isSWPrefetch())
+                        next_pf_time = prefetcher->notify(pkt, time);
+                }
              }
          } else {
              // no MSHR
@@ -609,7 +663,9 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
              }
  
              if (prefetcher) {
-                next_pf_time = prefetcher->notify(pkt, time);
+                // Don't notify on SWPrefetch
+                if (!pkt->cmd.isSWPrefetch())
+                    next_pf_time = prefetcher->notify(pkt, time);
              }
          }
      }
@@ -963,6 +1019,17 @@ Cache<TagStore>::recvTimingResp(PacketPtr pkt)
          switch (target->source) {
            case MSHR::Target::FromCPU:
              Tick completion_time;
+
+            // Software prefetch handling for cache closest to core
+            if (target->pkt->cmd.isSWPrefetch() && isTopLevel) {
+                // a software prefetch would have already been ack'd immediately
+                // with dummy data so the core would be able to retire it.
+                // this request completes right here, so we deallocate it.
+                delete target->pkt->req;
+                delete target->pkt;
+                break; // skip response
+            }
+
              if (is_fill) {
                  satisfyCpuSideRequest(target->pkt, blk,
                                        true, mshr->hasPostDowngrade());
diff --git a/src/mem/packet.hh b/src/mem/packet.hh

index 0c6abe909cf198883c16d60edd1c741042b2ea2d..f93725fcbfb3ddf2ff26c329d674994565083a8e 100644 (file)
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -196,6 +196,10 @@ class MemCmd
      bool hasData() const        { return testCmdAttrib(HasData); }
      bool isReadWrite() const    { return isRead() && isWrite(); }
      bool isLLSC() const         { return testCmdAttrib(IsLlsc); }
+    bool isSWPrefetch() const   { return testCmdAttrib(IsSWPrefetch); }
+    bool isHWPrefetch() const   { return testCmdAttrib(IsHWPrefetch); }
+    bool isPrefetch() const     { return testCmdAttrib(IsSWPrefetch) ||
+                                         testCmdAttrib(IsHWPrefetch); }
      bool isError() const        { return testCmdAttrib(IsError); }
      bool isPrint() const        { return testCmdAttrib(IsPrint); }
      bool isFlush() const        { return testCmdAttrib(IsFlush); }
@@ -677,6 +681,8 @@ class Packet : public Printable
          if (cmd == MemCmd::ReadReq) {
              if (req->isLLSC()) {
                  cmd = MemCmd::LoadLockedReq;
+            } else if (req->isPrefetch()) {
+                cmd = MemCmd::SoftPFReq;
              }
          } else if (cmd == MemCmd::WriteReq) {
              if (req->isLLSC()) {
author	Curtis Dunham <Curtis.Dunham@arm.com>
	Tue, 13 May 2014 17:20:49 +0000 (12:20 -0500)
committer	Curtis Dunham <Curtis.Dunham@arm.com>
	Tue, 13 May 2014 17:20:49 +0000 (12:20 -0500)
src/mem/cache/blk.hh		patch \| blob \| history
src/mem/cache/cache.hh		patch \| blob \| history
src/mem/cache/cache_impl.hh		patch \| blob \| history
src/mem/packet.hh		patch \| blob \| history