mem: Clarify usage of latency in the cache

author Marco Balboni <Marco.Balboni@ARM.com>

Wed, 11 Feb 2015 15:23:36 +0000 (10:23 -0500)

committer Marco Balboni <Marco.Balboni@ARM.com>

Wed, 11 Feb 2015 15:23:36 +0000 (10:23 -0500)
author Marco Balboni <Marco.Balboni@ARM.com>
Wed, 11 Feb 2015 15:23:36 +0000 (10:23 -0500)
committer Marco Balboni <Marco.Balboni@ARM.com>
Wed, 11 Feb 2015 15:23:36 +0000 (10:23 -0500)
diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc

index d89517b9ce333165a8829e47cde7cdae84f3f0ec..78e2ca9abec3d0763a21822879c8d58e070f2678 100644 (file)
--- a/src/mem/cache/base.cc
+++ b/src/mem/cache/base.cc
@@ -72,7 +72,9 @@ BaseCache::BaseCache(const Params *p)
        writeBuffer("write buffer", p->write_buffers, p->mshrs+1000, 0,
                    MSHRQueue_WriteBuffer),
        blkSize(p->system->cacheLineSize()),
-      hitLatency(p->hit_latency),
+      lookupLatency(p->hit_latency),
+      forwardLatency(p->hit_latency),
+      fillLatency(p->response_latency),
        responseLatency(p->response_latency),
        numTarget(p->tgts_per_mshr),
        forwardSnoops(p->forward_snoops),
diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh

index 0be6b79447d7b65b646a6bcc315248cee92bf04d..beb81896130e905a7a9284f33271a44f589f4d32 100644 (file)
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2012-2013 ARM Limited
+ * Copyright (c) 2012-2013, 2015 ARM Limited
   * All rights reserved.
   *
   * The license below extends only to copyright in the software and shall
@@ -202,6 +202,17 @@ class BaseCache : public MemObject
      /** Write/writeback buffer */
      MSHRQueue writeBuffer;
  
+    /**
+     * Allocate a buffer, passing the time indicating when schedule an
+     * event to the queued port to go and ask the MSHR and write queue
+     * if they have packets to send.
+     *
+     * allocateBufferInternal() function is called in:
+     * - MSHR allocateWriteBuffer (unchached write forwarded to WriteBuffer);
+     * - MSHR allocateMissBuffer (cacheable miss in MSHR queue);
+     * - MSHR allocateUncachedReadBuffer (unchached read allocated in MSHR
+     *   queue)
+     */
      MSHR *allocateBufferInternal(MSHRQueue *mq, Addr addr, int size,
                                   PacketPtr pkt, Tick time, bool requestBus)
      {
@@ -251,15 +262,25 @@ class BaseCache : public MemObject
      const unsigned blkSize;
  
      /**
-     * The latency of a hit in this device.
+     * The latency of tag lookup of a cache. It occurs when there is
+     * an access to the cache.
       */
-    const Cycles hitLatency;
+    const Cycles lookupLatency;
+
+    /**
+     * This is the forward latency of the cache. It occurs when there
+     * is a cache miss and a request is forwarded downstream, in
+     * particular an outbound miss.
+     */
+    const Cycles forwardLatency;
+
+    /** The latency to fill a cache block */
+    const Cycles fillLatency;
  
      /**
-     * The latency of sending reponse to its upper level cache/core on a
-     * linefill. In most contemporary processors, the return path on a cache
-     * miss is much quicker that the hit latency. The responseLatency parameter
-     * tries to capture this latency.
+     * The latency of sending reponse to its upper level cache/core on
+     * a linefill. The responseLatency parameter captures this
+     * latency.
       */
      const Cycles responseLatency;
  
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh

index c671deb683b07a4c122e2d2892073f7bf70e8cd1..2fb0baaa4484d3fd0b6d6008810bf1284cbd5c90 100644 (file)
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2010-2014 ARM Limited
+ * Copyright (c) 2010-2015 ARM Limited
   * All rights reserved.
   *
   * The license below extends only to copyright in the software and shall
@@ -314,11 +314,14 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
      if (pkt->req->isUncacheable()) {
          uncacheableFlush(pkt);
          blk = NULL;
-        lat = hitLatency;
+        // lookupLatency is the latency in case the request is uncacheable.
+        lat = lookupLatency;
          return false;
      }
  
      int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1;
+    // Here lat is the value passed as parameter to accessBlock() function
+    // that can modify its value.
      blk = tags->accessBlock(pkt->getAddr(), pkt->isSecure(), lat, id);
  
      DPRINTF(Cache, "%s%s %x (%s) %s %s\n", pkt->cmdString(),
@@ -392,7 +395,6 @@ Cache<TagStore>::recvTimingSnoopResp(PacketPtr pkt)
  {
      DPRINTF(Cache, "%s for %s address %x size %d\n", __func__,
              pkt->cmdString(), pkt->getAddr(), pkt->getSize());
-    Tick time = clockEdge(hitLatency);
  
      assert(pkt->isResponse());
  
@@ -418,7 +420,10 @@ Cache<TagStore>::recvTimingSnoopResp(PacketPtr pkt)
      delete rec;
      // @todo someone should pay for this
      pkt->firstWordDelay = pkt->lastWordDelay = 0;
-    memSidePort->schedTimingSnoopResp(pkt, time);
+    // forwardLatency is set here because there is a response from an
+    // upper level cache.
+    memSidePort->schedTimingSnoopResp(pkt, clockEdge(forwardLatency));
+
  }
  
  template<class TagStore>
@@ -449,9 +454,6 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
          delete pendingDelete[x];
      pendingDelete.clear();
  
-    // we charge hitLatency for doing just about anything here
-    Tick time = clockEdge(hitLatency);
-
      assert(pkt->isRequest());
  
      // Just forward the packet if caches are disabled.
@@ -527,21 +529,34 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
              // prefetching (cache loading) uncacheable data is nonsensical
              pkt->makeTimingResponse();
              std::memset(pkt->getPtr<uint8_t>(), 0xFF, pkt->getSize());
-            cpuSidePort->schedTimingResp(pkt, clockEdge(hitLatency));
+            // We use lookupLatency here because the request is uncacheable
+            cpuSidePort->schedTimingResp(pkt, clockEdge(lookupLatency));
              return true;
          } else if (pkt->isWrite() && !pkt->isRead()) {
-            allocateWriteBuffer(pkt, time, true);
+            // We use forwardLatency here because there is an uncached
+            // memory write, forwarded to WriteBuffer. It specifies the
+            // latency to allocate an internal buffer and to schedule an
+            // event to the queued port.
+            allocateWriteBuffer(pkt, clockEdge(forwardLatency), true);
          } else {
-            allocateUncachedReadBuffer(pkt, time, true);
+            // We use forwardLatency here because there is an uncached
+            // memory read, allocateded to MSHR queue (it requires the same
+            // time of forwarding to WriteBuffer, in our assumption). It
+            // specifies the latency to allocate an internal buffer and to
+            // schedule an event to the queued port.
+            allocateUncachedReadBuffer(pkt, clockEdge(forwardLatency), true);
          }
          assert(pkt->needsResponse()); // else we should delete it here??
          return true;
      }
  
-    Cycles lat = hitLatency;
+    // We use lookupLatency here because it is used to specify the latency
+    // to access.
+    Cycles lat = lookupLatency;
      BlkType *blk = NULL;
      PacketList writebacks;
-
+    // Note that lat is passed by reference here. The function access() calls
+    // accessBlock() which can modify lat value.
      bool satisfied = access(pkt, blk, lat, writebacks);
  
      // track time of availability of next prefetch, if any
@@ -565,6 +580,13 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
              pkt->makeTimingResponse();
              // @todo: Make someone pay for this
              pkt->firstWordDelay = pkt->lastWordDelay = 0;
+
+            // In this case we are considering lat neglecting
+            // responseLatency, modelling hit latency just as
+            // lookupLatency We pass lat by reference to access(),
+            // which calls accessBlock() function. If it is a hit,
+            // accessBlock() can modify lat to override the
+            // lookupLatency value.
              cpuSidePort->schedTimingResp(pkt, clockEdge(lat));
          } else {
              /// @todo nominally we should just delete the packet here,
@@ -638,7 +660,12 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
                  if (mshr->threadNum != 0/*pkt->req->threadId()*/) {
                      mshr->threadNum = -1;
                  }
-                mshr->allocateTarget(pkt, time, order++);
+                // We use forwardLatency here because it is the same
+                // considering new targets. We have multiple requests for the
+                // same address here. It pecifies the latency to allocate an
+                // internal buffer and to schedule an event to the queued
+                // port.
+                mshr->allocateTarget(pkt, clockEdge(forwardLatency), order++);
                  if (mshr->getNumTargets() == numTarget) {
                      noTargetMSHR = mshr;
                      setBlocked(Blocked_NoTargets);
@@ -669,7 +696,11 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
              // no-write-allocate or bypass accesses this will have to
              // be changed.
              if (pkt->cmd == MemCmd::Writeback) {
-                allocateWriteBuffer(pkt, time, true);
+                // We use forwardLatency here because there is an
+                // uncached memory write, forwarded to WriteBuffer. It
+                // specifies the latency to allocate an internal buffer and to
+                // schedule an event to the queued port.
+                allocateWriteBuffer(pkt, clockEdge(forwardLatency), true);
              } else {
                  if (blk && blk->isValid()) {
                      // If we have a write miss to a valid block, we
@@ -691,8 +722,13 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
                      assert(!blk->isWritable());
                      blk->status &= ~BlkReadable;
                  }
-
-                allocateMissBuffer(pkt, time, true);
+                // Here we are using forwardLatency, modelling the latency of
+                // a miss (outbound) just as forwardLatency, neglecting the
+                // lookupLatency component. In this case this latency value
+                // specifies the latency to allocate an internal buffer and to
+                // schedule an event to the queued port, when a cacheable miss
+                // is forwarded to MSHR queue.
+                allocateMissBuffer(pkt, clockEdge(forwardLatency), true);
              }
  
              if (prefetcher) {
@@ -702,14 +738,17 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
              }
          }
      }
-
+    // Here we condiser just forward latency.
      if (next_pf_time != MaxTick)
-        requestMemSideBus(Request_PF, std::max(time, next_pf_time));
-
+        requestMemSideBus(Request_PF, std::max(clockEdge(forwardLatency),
+                                                next_pf_time));
      // copy writebacks to write buffer
      while (!writebacks.empty()) {
          PacketPtr wbPkt = writebacks.front();
-        allocateWriteBuffer(wbPkt, time, true);
+        // We use forwardLatency here because we are copying writebacks
+        // to write buffer. It specifies the latency to allocate an internal
+        // buffer and to schedule an event to the queued port.
+        allocateWriteBuffer(wbPkt, clockEdge(forwardLatency), true);
          writebacks.pop_front();
      }
  
@@ -778,8 +817,8 @@ template<class TagStore>
  Tick
  Cache<TagStore>::recvAtomic(PacketPtr pkt)
  {
-    Cycles lat = hitLatency;
-
+    // We are in atomic mode so we pay just for lookupLatency here.
+    Cycles lat = lookupLatency;
      // @TODO: make this a parameter
      bool last_level_cache = false;
  
@@ -996,7 +1035,6 @@ Cache<TagStore>::recvTimingResp(PacketPtr pkt)
  {
      assert(pkt->isResponse());
  
-    Tick time = clockEdge(hitLatency);
      MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
      bool is_error = pkt->isError();
  
@@ -1221,13 +1259,18 @@ Cache<TagStore>::recvTimingResp(PacketPtr pkt)
      // copy writebacks to write buffer
      while (!writebacks.empty()) {
          PacketPtr wbPkt = writebacks.front();
-        allocateWriteBuffer(wbPkt, time, true);
+        allocateWriteBuffer(wbPkt, clockEdge(forwardLatency), true);
          writebacks.pop_front();
      }
      // if we used temp block, clear it out
      if (blk == tempBlock) {
          if (blk->isDirty()) {
-            allocateWriteBuffer(writebackBlk(blk), time, true);
+            // We use forwardLatency here because we are copying
+            // writebacks to write buffer. It specifies the latency to
+            // allocate an internal buffer and to schedule an event to the
+            // queued port.
+            allocateWriteBuffer(writebackBlk(blk), clockEdge(forwardLatency),
+                                 true);
          }
          blk->invalidate();
      }
@@ -1467,8 +1510,8 @@ Cache<TagStore>::handleFill(PacketPtr pkt, BlkType *blk,
          assert(pkt->hasData());
          std::memcpy(blk->data, pkt->getConstPtr<uint8_t>(), blkSize);
      }
-
-    blk->whenReady = clockEdge() + responseLatency * clockPeriod() +
+    // We pay for fillLatency here.
+    blk->whenReady = clockEdge() + fillLatency * clockPeriod() +
          pkt->lastWordDelay;
  
      return blk;
@@ -1521,7 +1564,8 @@ doTimingSupplyResponse(PacketPtr req_pkt, const uint8_t *blk_data,
      }
      DPRINTF(Cache, "%s created response: %s address %x size %d\n",
              __func__, pkt->cmdString(), pkt->getAddr(), pkt->getSize());
-    memSidePort->schedTimingSnoopResp(pkt, clockEdge(hitLatency));
+    // We model a snoop just considering forwardLatency
+    memSidePort->schedTimingSnoopResp(pkt, clockEdge(forwardLatency));
  }
  
  template<class TagStore>
@@ -1794,7 +1838,8 @@ Cache<TagStore>::recvAtomicSnoop(PacketPtr pkt)
  
      BlkType *blk = tags->findBlock(pkt->getAddr(), pkt->isSecure());
      handleSnoop(pkt, blk, false, false, false);
-    return hitLatency * clockPeriod();
+    // We consider forwardLatency here because a snoop occurs in atomic mode
+    return forwardLatency * clockPeriod();
  }
  
  
diff --git a/src/mem/cache/tags/base.cc b/src/mem/cache/tags/base.cc

index 47a43fb7ec55dd6534497d2857076e6b09802e9e..8d2322e5142768f7bdcbe6896c950da6d6fb0777 100644 (file)
--- a/src/mem/cache/tags/base.cc
+++ b/src/mem/cache/tags/base.cc
@@ -55,7 +55,7 @@ using namespace std;
  
  BaseTags::BaseTags(const Params *p)
      : ClockedObject(p), blkSize(p->block_size), size(p->size),
-      hitLatency(p->hit_latency), cache(nullptr), warmupBound(0),
+      accessLatency(p->hit_latency), cache(nullptr), warmupBound(0),
        warmedUp(false), numBlocks(0)
  {
  }
diff --git a/src/mem/cache/tags/base.hh b/src/mem/cache/tags/base.hh

index 9e1fb197219af26edbbdd26589589923efb63ae2..03b6cfed83d2b354932a8b8f957187ec8ef20a62 100644 (file)
--- a/src/mem/cache/tags/base.hh
+++ b/src/mem/cache/tags/base.hh
@@ -68,9 +68,8 @@ class BaseTags : public ClockedObject
      const unsigned blkSize;
      /** The size of the cache. */
      const unsigned size;
-    /** The hit latency of the cache. */
-    const Cycles hitLatency;
-
+    /** The access latency of the cache. */
+    const Cycles accessLatency;
      /** Pointer to the parent cache. */
      BaseCache *cache;
  
diff --git a/src/mem/cache/tags/base_set_assoc.cc b/src/mem/cache/tags/base_set_assoc.cc

index bb0c20141e5398141d18c6a0edd82dec00233f01..3c8371edb7f66ad30cabd5229e75b26db8412ec1 100644 (file)
--- a/src/mem/cache/tags/base_set_assoc.cc
+++ b/src/mem/cache/tags/base_set_assoc.cc
@@ -68,9 +68,6 @@ BaseSetAssoc::BaseSetAssoc(const Params *p)
      if (assoc <= 0) {
          fatal("associativity must be greater than zero");
      }
-    if (hitLatency <= 0) {
-        fatal("access latency must be greater than zero");
-    }
  
      blkMask = blkSize - 1;
      setShift = floorLog2(blkSize);
diff --git a/src/mem/cache/tags/base_set_assoc.hh b/src/mem/cache/tags/base_set_assoc.hh

index ac575d2ffb3057c18ad5e62cc692579cff115392..0107aafafb74c28185dc2ac992dc1403733b382f 100644 (file)
--- a/src/mem/cache/tags/base_set_assoc.hh
+++ b/src/mem/cache/tags/base_set_assoc.hh
@@ -178,7 +178,7 @@ public:
          Addr tag = extractTag(addr);
          int set = extractSet(addr);
          BlkType *blk = sets[set].findBlk(tag, is_secure);
-        lat = hitLatency;
+        lat = accessLatency;;
  
          // Access all tags in parallel, hence one in each way.  The data side
          // either accesses all blocks in parallel, or one block sequentially on
@@ -195,7 +195,7 @@ public:
          if (blk != NULL) {
              if (blk->whenReady > curTick()
                  && cache->ticksToCycles(blk->whenReady - curTick())
-                > hitLatency) {
+                > accessLatency) {
                  lat = cache->ticksToCycles(blk->whenReady - curTick());
              }
              blk->refCount += 1;
@@ -342,14 +342,6 @@ public:
          return ((tag << tagShift) | ((Addr)set << setShift));
      }
  
-    /**
-     * Return the hit latency.
-     * @return the hit latency.
-     */
-    Cycles getHitLatency() const
-    {
-        return hitLatency;
-    }
      /**
       *iterated through all blocks and clear all locks
       *Needed to clear all lock tracking at once
diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc

index 6a63da673c78666fbf45328dea906d7688a13ff0..ffe2cbf252af5a7935a8b7275966bb7d838ad9d1 100644 (file)
--- a/src/mem/cache/tags/fa_lru.cc
+++ b/src/mem/cache/tags/fa_lru.cc
@@ -60,8 +60,6 @@ FALRU::FALRU(const Params *p)
      if (!isPowerOf2(blkSize))
          fatal("cache block size (in bytes) `%d' must be a power of two",
                blkSize);
-    if (!(hitLatency > 0))
-        fatal("Access latency in cycles must be at least one cycle");
      if (!isPowerOf2(size))
          fatal("Cache Size must be power of 2 for now");
  
@@ -202,7 +200,7 @@ FALRU::accessBlock(Addr addr, bool is_secure, Cycles &lat, int context_src,
          *inCache = tmp_in_cache;
      }
  
-    lat = hitLatency;
+    lat = accessLatency;
      //assert(check());
      return blk;
  }
diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh

index ef13b2c79ee5af61946f4950ec1c99b9051a20a8..07a31c154495fa67867c814ffc4453bbee2aa032 100644 (file)
--- a/src/mem/cache/tags/fa_lru.hh
+++ b/src/mem/cache/tags/fa_lru.hh
@@ -209,15 +209,6 @@ public:
  
      void insertBlock(PacketPtr pkt, BlkType *blk);
  
-    /**
-     * Return the hit latency of this cache.
-     * @return The hit latency.
-     */
-    Cycles getHitLatency() const
-    {
-        return hitLatency;
-    }
-
      /**
       * Return the block size of this cache.
       * @return The block size.
author	Marco Balboni <Marco.Balboni@ARM.com>
	Wed, 11 Feb 2015 15:23:36 +0000 (10:23 -0500)
committer	Marco Balboni <Marco.Balboni@ARM.com>
	Wed, 11 Feb 2015 15:23:36 +0000 (10:23 -0500)
src/mem/cache/base.cc		patch \| blob \| history
src/mem/cache/base.hh		patch \| blob \| history
src/mem/cache/cache_impl.hh		patch \| blob \| history
src/mem/cache/tags/base.cc		patch \| blob \| history
src/mem/cache/tags/base.hh		patch \| blob \| history
src/mem/cache/tags/base_set_assoc.cc		patch \| blob \| history
src/mem/cache/tags/base_set_assoc.hh		patch \| blob \| history
src/mem/cache/tags/fa_lru.cc		patch \| blob \| history
src/mem/cache/tags/fa_lru.hh		patch \| blob \| history