cache: Make caches sharing aware and add occupancy stats.

author Lisa Hsu <Lisa.Hsu@amd.com>

Tue, 23 Feb 2010 17:34:22 +0000 (09:34 -0800)

committer Lisa Hsu <Lisa.Hsu@amd.com>

Tue, 23 Feb 2010 17:34:22 +0000 (09:34 -0800)
author Lisa Hsu <Lisa.Hsu@amd.com>
Tue, 23 Feb 2010 17:34:22 +0000 (09:34 -0800)
committer Lisa Hsu <Lisa.Hsu@amd.com>
Tue, 23 Feb 2010 17:34:22 +0000 (09:34 -0800)
diff --git a/configs/example/se.py b/configs/example/se.py

index c490ed6b611c551ed9696ae56fabac148d0c4655..7c09bcc5c04b377d1ae87151010541eff14fea76 100644 (file)
--- a/configs/example/se.py
+++ b/configs/example/se.py
@@ -151,6 +151,7 @@ if options.l2cache:
      system.tol2bus = Bus()
      system.l2.cpu_side = system.tol2bus.port
      system.l2.mem_side = system.membus.port
+    system.l2.num_cpus = np
  
  for i in xrange(np):
      if options.caches:
diff --git a/src/mem/cache/BaseCache.py b/src/mem/cache/BaseCache.py

index 5ded0540088c1575fd088d79a68fc13c85dca8e7..dffac2234f224e19edff14e9cd81a6dd8ef99a6f 100644 (file)
--- a/src/mem/cache/BaseCache.py
+++ b/src/mem/cache/BaseCache.py
@@ -44,6 +44,7 @@ class BaseCache(MemObject):
      prioritizeRequests = Param.Bool(False,
          "always service demand misses first")
      repl = Param.Repl(NULL, "replacement policy")
+    num_cpus =  Param.Int(1, "number of cpus sharing this cache")
      size = Param.MemorySize("capacity in bytes")
      forward_snoops = Param.Bool(True,
          "forward snoops from mem side to cpu side")
diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc

index fe1f580bddf8a20c8cd979be4e79f185970d6eb7..70bc51cda400b04a93c065e573ff0958c08561aa 100644 (file)
--- a/src/mem/cache/base.cc
+++ b/src/mem/cache/base.cc
@@ -62,7 +62,8 @@ BaseCache::BaseCache(const Params *p)
        noTargetMSHR(NULL),
        missCount(p->max_miss_count),
        drainEvent(NULL),
-      addrRange(p->addr_range)
+      addrRange(p->addr_range),
+      _numCpus(p->num_cpus)
  {
  }
  
@@ -148,7 +149,11 @@ BaseCache::regStats()
          const string &cstr = cmd.toString();
  
          hits[access_idx]
-            .init(maxThreadsPerCPU)
+#if FULL_SYSTEM
+            .init(_numCpus + 1)
+#else
+            .init(_numCpus)
+#endif
              .name(name() + "." + cstr + "_hits")
              .desc("number of " + cstr + " hits")
              .flags(total | nozero | nonan)
@@ -185,7 +190,11 @@ BaseCache::regStats()
          const string &cstr = cmd.toString();
  
          misses[access_idx]
-            .init(maxThreadsPerCPU)
+#if FULL_SYSTEM
+            .init(_numCpus + 1)
+#else
+            .init(_numCpus)
+#endif
              .name(name() + "." + cstr + "_misses")
              .desc("number of " + cstr + " misses")
              .flags(total | nozero | nonan)
diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh

index c245fecd24e6235ab319069b2c106a55dd76c5d2..62e8ae1260be222414902871d8705a4646b4aec9 100644 (file)
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@@ -47,6 +47,7 @@
  #include "base/statistics.hh"
  #include "base/trace.hh"
  #include "base/types.hh"
+#include "config/full_system.hh"
  #include "mem/cache/mshr_queue.hh"
  #include "mem/mem_object.hh"
  #include "mem/packet.hh"
@@ -219,7 +220,11 @@ class BaseCache : public MemObject
       * Normally this is all possible memory addresses. */
      Range<Addr> addrRange;
  
+    /** number of cpus sharing this cache - from config file */
+    int _numCpus;
+
    public:
+    int numCpus() { return _numCpus; }
      // Statistics
      /**
       * @addtogroup CacheStatistics
@@ -481,9 +486,25 @@ class BaseCache : public MemObject
  
      virtual bool inMissQueue(Addr addr) = 0;
  
-    void incMissCount(PacketPtr pkt)
+    void incMissCount(PacketPtr pkt, int id)
      {
-        misses[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
+
+        if (pkt->cmd == MemCmd::Writeback) {
+            assert(id == -1);
+            misses[pkt->cmdToIndex()][0]++;
+            /* same thing for writeback hits as misses - no context id
+             * available, meanwhile writeback hit/miss stats are not used
+             * in any aggregate hit/miss calculations, so just lump them all
+             * in bucket 0 */
+#if FULL_SYSTEM
+        } else if (id == -1) {
+            // Device accesses have id -1
+            // lump device accesses into their own bucket
+            misses[pkt->cmdToIndex()][_numCpus]++;
+#endif
+        } else {
+            misses[pkt->cmdToIndex()][id % _numCpus]++;
+        }
  
          if (missCount) {
              --missCount;
@@ -491,6 +512,29 @@ class BaseCache : public MemObject
                  exitSimLoop("A cache reached the maximum miss count");
          }
      }
+    void incHitCount(PacketPtr pkt, int id)
+    {
+
+        /* Writeback requests don't have a context id associated with
+         * them, so attributing a hit to a -1 context id is obviously a
+         * problem.  I've noticed in the stats that hits are split into
+         * demand and non-demand hits - neither of which include writeback
+         * hits, so here, I'll just put the writeback hits into bucket 0
+         * since it won't mess with any other stats -hsul */
+        if (pkt->cmd == MemCmd::Writeback) {
+            assert(id == -1);
+            hits[pkt->cmdToIndex()][0]++;
+#if FULL_SYSTEM
+        } else if (id == -1) {
+            // Device accesses have id -1
+            // lump device accesses into their own bucket
+            hits[pkt->cmdToIndex()][_numCpus]++;
+#endif
+        } else {
+            /* the % is necessary in case there are switch cpus */
+            hits[pkt->cmdToIndex()][id % _numCpus]++;
+        }
+    }
  
  };
  
diff --git a/src/mem/cache/blk.hh b/src/mem/cache/blk.hh

index 4f023e848ed57af6ed4c50037914d7e1a4f9fdd5..bf78a226876628dbe99cfff2261b710a2ff585e7 100644 (file)
--- a/src/mem/cache/blk.hh
+++ b/src/mem/cache/blk.hh
@@ -104,6 +104,9 @@ class CacheBlk
      /** Number of references to this block since it was brought in. */
      int refCount;
  
+    /** holds the context source ID of the requestor for this block. */
+    int contextSrc;
+
    protected:
      /**
       * Represents that the indicated thread context has a "lock" on
@@ -133,7 +136,7 @@ class CacheBlk
  
      CacheBlk()
          : asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0),
-          set(-1), isTouched(false), refCount(0)
+          set(-1), isTouched(false), refCount(0), contextSrc(-1)
      {}
  
      /**
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh

index 2397a17c52ffe427354e5f88c3a404629838036f..206361f8833709db0bc5c38404c213ae2c6dca8c 100644 (file)
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -277,7 +277,7 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
  
          if (pkt->needsExclusive() ? blk->isWritable() : blk->isReadable()) {
              // OK to satisfy access
-            hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
+            incHitCount(pkt, id);
              satisfyCpuSideRequest(pkt, blk);
              return true;
          }
@@ -297,7 +297,7 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
              if (blk == NULL) {
                  // no replaceable block available, give up.
                  // writeback will be forwarded to next level.
-                incMissCount(pkt);
+                incMissCount(pkt, id);
                  return false;
              }
              int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1;
@@ -308,11 +308,11 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
          blk->status |= BlkDirty;
          // nothing else to do; writeback doesn't expect response
          assert(!pkt->needsResponse());
-        hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
+        incHitCount(pkt, id);
          return true;
      }
  
-    incMissCount(pkt);
+    incMissCount(pkt, id);
  
      if (blk == NULL && pkt->isLLSC() && pkt->isWrite()) {
          // complete miss on store conditional... just give up now
diff --git a/src/mem/cache/tags/base.cc b/src/mem/cache/tags/base.cc

index e18026a21d957399e63eb3798176857dee85079e..8c6c145ca46188ace1cdafb67f8b2b670c902640 100644 (file)
--- a/src/mem/cache/tags/base.cc
+++ b/src/mem/cache/tags/base.cc
@@ -87,5 +87,20 @@ BaseTags::regStats(const string &name)
          .desc("Cycle when the warmup percentage was hit.")
          ;
  
+    occupancies
+        .init(cache->numCpus())
+        .name(name + ".occ_blocks")
+        .desc("Average occupied blocks per context")
+        .flags(nozero | nonan)
+        ;
+
+    avgOccs
+        .name(name + ".occ_%")
+        .desc("Average percentage of cache occupancy")
+        .flags(nozero)
+        ;
+
+    avgOccs = occupancies / Stats::constant(numBlocks);
+
      registerExitCallback(new BaseTagsCallback(this));
  }
diff --git a/src/mem/cache/tags/base.hh b/src/mem/cache/tags/base.hh

index 46c7186b13a3dd3e9ba807b5827931f6b8678493..fc847029032570013165a3b51999d09c7105a5da 100644 (file)
--- a/src/mem/cache/tags/base.hh
+++ b/src/mem/cache/tags/base.hh
@@ -63,6 +63,9 @@ class BaseTags
      /** Marked true when the cache is warmed up. */
      bool warmedUp;
  
+    /** the number of blocks in the cache */
+    unsigned numBlocks;
+
      // Statistics
      /**
       * @addtogroup CacheStatistics
@@ -92,6 +95,13 @@ class BaseTags
  
      /** The cycle that the warmup percentage was hit. */
      Stats::Scalar warmupCycle;
+
+    /** Average occupancy of each context/cpu using the cache */
+    Stats::AverageVector occupancies;
+
+    /** Average occ % of each context/cpu using the cache */
+    Stats::Formula avgOccs;
+
      /**
       * @}
       */
diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc

index 808f9e25a86da1f20a5433e2a7374bc7a718f5ed..d13ba49739cee1414db5941a5bfbfba025e008aa 100644 (file)
--- a/src/mem/cache/tags/fa_lru.cc
+++ b/src/mem/cache/tags/fa_lru.cc
@@ -43,8 +43,7 @@
  using namespace std;
  
  FALRU::FALRU(unsigned _blkSize, unsigned _size, unsigned hit_latency)
-    : blkSize(_blkSize), size(_size),
-      numBlks(size/blkSize), hitLatency(hit_latency)
+    : blkSize(_blkSize), size(_size), hitLatency(hit_latency)
  {
      if (!isPowerOf2(blkSize))
          fatal("cache block size (in bytes) `%d' must be a power of two",
@@ -65,23 +64,24 @@ FALRU::FALRU(unsigned _blkSize, unsigned _size, unsigned hit_latency)
  
      warmedUp = false;
      warmupBound = size/blkSize;
+    numBlocks = size/blkSize;
  
-    blks = new FALRUBlk[numBlks];
+    blks = new FALRUBlk[numBlocks];
      head = &(blks[0]);
-    tail = &(blks[numBlks-1]);
+    tail = &(blks[numBlocks-1]);
  
      head->prev = NULL;
      head->next = &(blks[1]);
      head->inCache = cacheMask;
  
-    tail->prev = &(blks[numBlks-2]);
+    tail->prev = &(blks[numBlocks-2]);
      tail->next = NULL;
      tail->inCache = 0;
  
      unsigned index = (1 << 17) / blkSize;
      unsigned j = 0;
      int flags = cacheMask;
-    for (unsigned i = 1; i < numBlks - 1; i++) {
+    for (unsigned i = 1; i < numBlocks - 1; i++) {
          blks[i].inCache = flags;
          if (i == index - 1){
              cacheBoundaries[j] = &(blks[i]);
@@ -94,7 +94,7 @@ FALRU::FALRU(unsigned _blkSize, unsigned _size, unsigned hit_latency)
          blks[i].isTouched = false;
      }
      assert(j == numCaches);
-    assert(index == numBlks);
+    assert(index == numBlocks);
      //assert(check());
  }
  
diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh

index b20d25d2bfc78316afb3855c7451219b1b438f32..5047da12a4b9009b3d4ce46147fcb0e9c4650de7 100644 (file)
--- a/src/mem/cache/tags/fa_lru.hh
+++ b/src/mem/cache/tags/fa_lru.hh
@@ -84,8 +84,6 @@ class FALRU : public BaseTags
      const unsigned blkSize;
      /** The size of the cache. */
      const unsigned size;
-    /** The number of blocks in the cache. */
-    const unsigned numBlks; // calculated internally
      /** The hit latency of the cache. */
      const unsigned hitLatency;
  
diff --git a/src/mem/cache/tags/iic.cc b/src/mem/cache/tags/iic.cc

index a8ef4e6fb2ea182912749ffe5f70e881e4537041..f9afa5839820a8e06474cbeb0d85cb8575a1ac5a 100644 (file)
--- a/src/mem/cache/tags/iic.cc
+++ b/src/mem/cache/tags/iic.cc
@@ -60,7 +60,6 @@ IIC::IIC(IIC::Params &params) :
      tagShift(floorLog2(blkSize)), blkMask(blkSize - 1),
      subShift(floorLog2(subSize)), subMask(numSub - 1),
      hashDelay(params.hashDelay),
-    numBlocks(params.size/subSize),
      numTags(hashSets * assoc + params.size/blkSize -1),
      numSecondary(params.size/blkSize),
      tagNull(numTags),
@@ -88,6 +87,7 @@ IIC::IIC(IIC::Params &params) :
  
      warmedUp = false;
      warmupBound = params.size/blkSize;
+    numBlocks = params.size/subSize;
  
      // Replacement Policy Initialization
      repl = params.rp;
diff --git a/src/mem/cache/tags/iic.hh b/src/mem/cache/tags/iic.hh

index c96cdaf3e43639b5bf285f122931fc06f9555227..5b12128c6c9ec9c5ed82922114c87ee0d25a8ec2 100644 (file)
--- a/src/mem/cache/tags/iic.hh
+++ b/src/mem/cache/tags/iic.hh
@@ -197,8 +197,6 @@ class IIC : public BaseTags
  
      /** The latency of a hash lookup. */
      const unsigned hashDelay;
-    /** The number of data blocks. */
-    const unsigned numBlocks;
      /** The total number of tags in primary and secondary. */
      const unsigned numTags;
      /** The number of tags in the secondary tag store. */
diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc

index 81d44b0c06f7c454c2283b763d87a2fcbdead211..0da2a72e926ac9a51f31cb5f8b13d5cedb7b8a9e 100644 (file)
--- a/src/mem/cache/tags/lru.cc
+++ b/src/mem/cache/tags/lru.cc
@@ -74,7 +74,8 @@ LRU::LRU(unsigned _numSets, unsigned _blkSize, unsigned _assoc,
      sets = new CacheSet[numSets];
      blks = new BlkType[numSets * assoc];
      // allocate data storage in one big chunk
-    dataBlks = new uint8_t[numSets*assoc*blkSize];
+    numBlocks = numSets * assoc;
+    dataBlks = new uint8_t[numBlocks * blkSize];
  
      unsigned blkIndex = 0;       // index into blks array
      for (unsigned i = 0; i < numSets; ++i) {
@@ -157,6 +158,12 @@ LRU::findVictim(Addr addr, PacketList &writebacks)
          ++sampledRefs;
          blk->refCount = 0;
  
+        // deal with evicted block
+        if (blk->contextSrc != -1) {
+            occupancies[blk->contextSrc % cache->numCpus()]--;
+            blk->contextSrc = -1;
+        }
+
          DPRINTF(CacheRepl, "set %x: selecting blk %x for replacement\n",
                  set, regenerateBlkAddr(blk->tag, set));
      }
@@ -178,6 +185,12 @@ LRU::insertBlock(Addr addr, BlkType *blk, int context_src)
      // Set tag for new block.  Caller is responsible for setting status.
      blk->tag = extractTag(addr);
  
+    // deal with what we are bringing in
+    if (context_src != -1) {
+        occupancies[context_src % cache->numCpus()]++;
+        blk->contextSrc = context_src;
+    }
+
      unsigned set = extractSet(addr);
      sets[set].moveToHead(blk);
  }
@@ -190,6 +203,10 @@ LRU::invalidateBlk(BlkType *blk)
          blk->isTouched = false;
          blk->clearLoadLocks();
          tagsInUse--;
+        if (blk->contextSrc != -1) {
+            occupancies[blk->contextSrc % cache->numCpus()]--;
+            blk->contextSrc = -1;
+        }
      }
  }
  
diff --git a/tests/configs/memtest.py b/tests/configs/memtest.py

index 93ea4cc0ec9485c85104d749dc2d7484ac29c156..d75bd3d8c6cb0ee35f5bab008d0d0bb035bad41c 100644 (file)
--- a/tests/configs/memtest.py
+++ b/tests/configs/memtest.py
@@ -63,6 +63,7 @@ system = System(cpu = cpus, funcmem = PhysicalMemory(),
  system.toL2Bus = Bus(clock="500GHz", width=16)
  system.l2c = L2(size='64kB', assoc=8)
  system.l2c.cpu_side = system.toL2Bus.port
+system.l2c.num_cpus = nb_cores
  
  # connect l2c to membus
  system.l2c.mem_side = system.membus.port
diff --git a/tests/configs/o3-timing-mp.py b/tests/configs/o3-timing-mp.py

index 59776d5c3eeb2ec371176bd6ece7311f91a412d2..b5c720ddaa4e5af3a2fc2258692fd1871a44678b 100644 (file)
--- a/tests/configs/o3-timing-mp.py
+++ b/tests/configs/o3-timing-mp.py
@@ -62,6 +62,7 @@ Bus())
  system.toL2Bus = Bus()
  system.l2c = L2(size='4MB', assoc=8)
  system.l2c.cpu_side = system.toL2Bus.port
+system.l2c.num_cpus = nb_cores
  
  # connect l2c to membus
  system.l2c.mem_side = system.membus.port
diff --git a/tests/configs/simple-atomic-mp.py b/tests/configs/simple-atomic-mp.py

index bc0ced25099b18a2b45aaf5baa13f22241bb1637..75ffefd0828bb6c889e98c38de6ba8e7eae31804 100644 (file)
--- a/tests/configs/simple-atomic-mp.py
+++ b/tests/configs/simple-atomic-mp.py
@@ -61,6 +61,7 @@ Bus())
  system.toL2Bus = Bus()
  system.l2c = L2(size='4MB', assoc=8)
  system.l2c.cpu_side = system.toL2Bus.port
+system.l2c.num_cpus = nb_cores
  
  # connect l2c to membus
  system.l2c.mem_side = system.membus.port
diff --git a/tests/configs/simple-timing-mp.py b/tests/configs/simple-timing-mp.py

index 0b400e6b7f9632eee3c7da6f08bb227cabafc842..7a8da70bbf2c0147e1290090897d6ed33d9b365c 100644 (file)
--- a/tests/configs/simple-timing-mp.py
+++ b/tests/configs/simple-timing-mp.py
@@ -61,6 +61,7 @@ Bus())
  system.toL2Bus = Bus()
  system.l2c = L2(size='4MB', assoc=8)
  system.l2c.cpu_side = system.toL2Bus.port
+system.l2c.num_cpus = nb_cores
  
  # connect l2c to membus
  system.l2c.mem_side = system.membus.port
diff --git a/tests/configs/tsunami-o3-dual.py b/tests/configs/tsunami-o3-dual.py

index 76aca38062a64b10662ce932f39b32bcaea04df4..d19dc9c261c557dbe80fdba0ddd7d0f730d5ba2e 100644 (file)
--- a/tests/configs/tsunami-o3-dual.py
+++ b/tests/configs/tsunami-o3-dual.py
@@ -85,6 +85,7 @@ system.iocache.mem_side = system.membus.port
  system.l2c = L2(size='4MB', assoc=8)
  system.l2c.cpu_side = system.toL2Bus.port
  system.l2c.mem_side = system.membus.port
+system.l2c.num_cpus = 2
  
  #connect up the cpu and l1s
  for c in cpus:
diff --git a/tests/configs/tsunami-simple-atomic-dual.py b/tests/configs/tsunami-simple-atomic-dual.py

index dfbdd101df5ec0cb75cb1a1a5a8a0cb2160f5d68..d78a09db458f6e6312d81b72557869cf107263b5 100644 (file)
--- a/tests/configs/tsunami-simple-atomic-dual.py
+++ b/tests/configs/tsunami-simple-atomic-dual.py
@@ -83,6 +83,7 @@ system.toL2Bus = Bus()
  system.l2c = L2(size='4MB', assoc=8)
  system.l2c.cpu_side = system.toL2Bus.port
  system.l2c.mem_side = system.membus.port
+system.l2c.num_cpus = 2
  
  #connect up the cpu and l1s
  for c in cpus:
diff --git a/tests/configs/tsunami-simple-timing-dual.py b/tests/configs/tsunami-simple-timing-dual.py

index ce17475e34fa3dabde55529388b7804f902cc1f7..13b7bf32eec8ac8303f5578c69a5cb802c4d44a7 100644 (file)
--- a/tests/configs/tsunami-simple-timing-dual.py
+++ b/tests/configs/tsunami-simple-timing-dual.py
@@ -83,6 +83,7 @@ system.toL2Bus = Bus()
  system.l2c = L2(size='4MB', assoc=8)
  system.l2c.cpu_side = system.toL2Bus.port
  system.l2c.mem_side = system.membus.port
+system.l2c.num_cpus = 2
  
  #connect up the cpu and l1s
  for c in cpus:
author	Lisa Hsu <Lisa.Hsu@amd.com>
	Tue, 23 Feb 2010 17:34:22 +0000 (09:34 -0800)
committer	Lisa Hsu <Lisa.Hsu@amd.com>
	Tue, 23 Feb 2010 17:34:22 +0000 (09:34 -0800)
configs/example/se.py		patch \| blob \| history
src/mem/cache/BaseCache.py		patch \| blob \| history
src/mem/cache/base.cc		patch \| blob \| history
src/mem/cache/base.hh		patch \| blob \| history
src/mem/cache/blk.hh		patch \| blob \| history
src/mem/cache/cache_impl.hh		patch \| blob \| history
src/mem/cache/tags/base.cc		patch \| blob \| history
src/mem/cache/tags/base.hh		patch \| blob \| history
src/mem/cache/tags/fa_lru.cc		patch \| blob \| history
src/mem/cache/tags/fa_lru.hh		patch \| blob \| history
src/mem/cache/tags/iic.cc		patch \| blob \| history
src/mem/cache/tags/iic.hh		patch \| blob \| history
src/mem/cache/tags/lru.cc		patch \| blob \| history
tests/configs/memtest.py		patch \| blob \| history
tests/configs/o3-timing-mp.py		patch \| blob \| history
tests/configs/simple-atomic-mp.py		patch \| blob \| history
tests/configs/simple-timing-mp.py		patch \| blob \| history
tests/configs/tsunami-o3-dual.py		patch \| blob \| history
tests/configs/tsunami-simple-atomic-dual.py		patch \| blob \| history
tests/configs/tsunami-simple-timing-dual.py		patch \| blob \| history