mem: track per-request latencies and access depths in the cache hierarchy

author Matt Horsnell <matt.horsnell@ARM.com>

Fri, 24 Jan 2014 21:29:30 +0000 (15:29 -0600)

committer Matt Horsnell <matt.horsnell@ARM.com>

Fri, 24 Jan 2014 21:29:30 +0000 (15:29 -0600)
author Matt Horsnell <matt.horsnell@ARM.com>
Fri, 24 Jan 2014 21:29:30 +0000 (15:29 -0600)
committer Matt Horsnell <matt.horsnell@ARM.com>
Fri, 24 Jan 2014 21:29:30 +0000 (15:29 -0600)
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh

index b35dd80f391a6e8de7be5293ef657008f913142f..b121ba70733877f9860481ab0c7bcbf90e3dcbc1 100644 (file)
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -400,6 +400,7 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
          fetchStatus[tid] = IcacheAccessComplete;
      }
  
+    pkt->req->setAccessLatency();
      // Reset the mem req to NULL.
      delete pkt->req;
      delete pkt;
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh

index 77b67ac6992512cbfaab6fd96d95b0da56617347..ade0769951fc1382ab5ed26c12e14e3ff0573f53 100644 (file)
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -129,6 +129,8 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
          delete state->mainPkt->req;
          delete state->mainPkt;
      }
+
+    pkt->req->setAccessLatency();
      delete state;
      delete pkt->req;
      delete pkt;
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc

index 744bf839773a3cf0cebd3d00a95751c6a98d86ea..9253d8005fe5f3613ff356affa050da1ac54e632 100644 (file)
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -646,7 +646,6 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt)
  
      // received a response from the icache: execute the received
      // instruction
-
      assert(!pkt || !pkt->isError());
      assert(_status == IcacheWaitResponse);
  
@@ -655,6 +654,10 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt)
      numCycles += curCycle() - previousCycle;
      previousCycle = curCycle();
  
+    if (pkt)
+        pkt->req->setAccessLatency();
+
+
      preExecute();
      if (curStaticInst && curStaticInst->isMemRef()) {
          // load or store: just send to dcache
@@ -749,6 +752,7 @@ TimingSimpleCPU::completeDataAccess(PacketPtr pkt)
      assert(_status == DcacheWaitResponse || _status == DTBWaitResponse ||
             pkt->req->getFlags().isSet(Request::NO_ACCESS));
  
+    pkt->req->setAccessLatency();
      numCycles += curCycle() - previousCycle;
      previousCycle = curCycle();
  
diff --git a/src/cpu/translation.hh b/src/cpu/translation.hh

index 90fffa03d56525116dce21ad5cc8251e31cd6079..c05cc86a04060e5db88eca81686917fbdcb1fb85 100644 (file)
--- a/src/cpu/translation.hh
+++ b/src/cpu/translation.hh
@@ -256,6 +256,7 @@ class DataTranslation : public BaseTLB::Translation
          assert(mode == state->mode);
          if (state->finish(fault, index)) {
              xc->finishTranslation(state);
+            req->setTranslateLatency();
          }
          delete this;
      }
diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh

index dce40e9157217cc279927cbbc60337b9732ee419..50ba396b27c7e9506b6869742cad277501bd6cca 100644 (file)
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@@ -568,7 +568,7 @@ class BaseCache : public MemObject
      {
          assert(pkt->req->masterId() < system->maxMasters());
          misses[pkt->cmdToIndex()][pkt->req->masterId()]++;
-
+        pkt->req->incAccessDepth();
          if (missCount) {
              --missCount;
              if (missCount == 0)
diff --git a/src/mem/request.hh b/src/mem/request.hh

index a0ff50910663b92a870e5190bde431382c904bd4..54b671645cccffb1fba835490f807bd9be235988 100644 (file)
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2012 ARM Limited
+ * Copyright (c) 2012-2013 ARM Limited
   * All rights reserved
   *
   * The license below extends only to copyright in the software and shall
@@ -244,6 +244,7 @@ class Request
       *  default constructor.)
       */
      Request()
+        : translateDelta(0), accessDelta(0), depth(0)
      {}
  
      /**
@@ -304,6 +305,9 @@ class Request
          _flags.set(flags);
          privateFlags.clear(~STICKY_PRIVATE_FLAGS);
          privateFlags.set(VALID_PADDR|VALID_SIZE);
+        depth = 0;
+        accessDelta = 0;
+        //translateDelta = 0;
      }
  
      void
@@ -331,6 +335,9 @@ class Request
          _flags.set(flags);
          privateFlags.clear(~STICKY_PRIVATE_FLAGS);
          privateFlags.set(VALID_VADDR|VALID_SIZE|VALID_PC);
+        depth = 0;
+        accessDelta = 0;
+        translateDelta = 0;
      }
  
      /**
@@ -381,6 +388,23 @@ class Request
          return _paddr;
      }
  
+    /**
+     * Time for the TLB/table walker to successfully translate this request.
+     */
+    Tick translateDelta;
+
+    /**
+     * Access latency to complete this memory transaction not including
+     * translation time.
+     */
+    Tick accessDelta;
+
+    /**
+     * Level of the cache hierachy where this request was responded to
+     * (e.g. 0 = L1; 1 = L2).
+     */
+    int depth;
+
      /**
       *  Accessor for size.
       */
@@ -535,6 +559,26 @@ class Request
          return _pc;
      }
  
+    /**
+     * Increment/Get the depth at which this request is responded to.
+     * This currently happens when the request misses in any cache level.
+     */
+    void incAccessDepth() { depth++; }
+    int getAccessDepth() const { return depth; }
+
+    /**
+     * Set/Get the time taken for this request to be successfully translated.
+     */
+    void setTranslateLatency() { translateDelta = curTick() - _time; }
+    Tick getTranslateLatency() const { return translateDelta; }
+
+    /**
+     * Set/Get the time taken to complete this request's access, not including
+     *  the time to successfully translate the request.
+     */
+    void setAccessLatency() { accessDelta = curTick() - _time - translateDelta; }
+    Tick getAccessLatency() const { return accessDelta; }
+
      /** Accessor functions for flags.  Note that these are for testing
         only; setting flags should be done via setFlags(). */
      bool isUncacheable() const { return _flags.isSet(UNCACHEABLE); }
author	Matt Horsnell <matt.horsnell@ARM.com>
	Fri, 24 Jan 2014 21:29:30 +0000 (15:29 -0600)
committer	Matt Horsnell <matt.horsnell@ARM.com>
	Fri, 24 Jan 2014 21:29:30 +0000 (15:29 -0600)
src/cpu/o3/fetch_impl.hh		patch \| blob \| history
src/cpu/o3/lsq_unit_impl.hh		patch \| blob \| history
src/cpu/simple/timing.cc		patch \| blob \| history
src/cpu/translation.hh		patch \| blob \| history
src/mem/cache/base.hh		patch \| blob \| history
src/mem/request.hh		patch \| blob \| history