system.tol2bus = Bus()
system.l2.cpu_side = system.tol2bus.port
system.l2.mem_side = system.membus.port
+ system.l2.num_cpus = np
for i in xrange(np):
if options.caches:
prioritizeRequests = Param.Bool(False,
"always service demand misses first")
repl = Param.Repl(NULL, "replacement policy")
+ num_cpus = Param.Int(1, "number of cpus sharing this cache")
size = Param.MemorySize("capacity in bytes")
forward_snoops = Param.Bool(True,
"forward snoops from mem side to cpu side")
noTargetMSHR(NULL),
missCount(p->max_miss_count),
drainEvent(NULL),
- addrRange(p->addr_range)
+ addrRange(p->addr_range),
+ _numCpus(p->num_cpus)
{
}
const string &cstr = cmd.toString();
hits[access_idx]
- .init(maxThreadsPerCPU)
+#if FULL_SYSTEM
+ .init(_numCpus + 1)
+#else
+ .init(_numCpus)
+#endif
.name(name() + "." + cstr + "_hits")
.desc("number of " + cstr + " hits")
.flags(total | nozero | nonan)
const string &cstr = cmd.toString();
misses[access_idx]
- .init(maxThreadsPerCPU)
+#if FULL_SYSTEM
+ .init(_numCpus + 1)
+#else
+ .init(_numCpus)
+#endif
.name(name() + "." + cstr + "_misses")
.desc("number of " + cstr + " misses")
.flags(total | nozero | nonan)
#include "base/statistics.hh"
#include "base/trace.hh"
#include "base/types.hh"
+#include "config/full_system.hh"
#include "mem/cache/mshr_queue.hh"
#include "mem/mem_object.hh"
#include "mem/packet.hh"
* Normally this is all possible memory addresses. */
Range<Addr> addrRange;
+ /** number of cpus sharing this cache - from config file */
+ int _numCpus;
+
public:
+ int numCpus() { return _numCpus; }
// Statistics
/**
* @addtogroup CacheStatistics
virtual bool inMissQueue(Addr addr) = 0;
- void incMissCount(PacketPtr pkt)
+ void incMissCount(PacketPtr pkt, int id)
{
- misses[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
+
+ if (pkt->cmd == MemCmd::Writeback) {
+ assert(id == -1);
+ misses[pkt->cmdToIndex()][0]++;
+ /* same thing for writeback hits as misses - no context id
+ * available, meanwhile writeback hit/miss stats are not used
+ * in any aggregate hit/miss calculations, so just lump them all
+ * in bucket 0 */
+#if FULL_SYSTEM
+ } else if (id == -1) {
+ // Device accesses have id -1
+ // lump device accesses into their own bucket
+ misses[pkt->cmdToIndex()][_numCpus]++;
+#endif
+ } else {
+ misses[pkt->cmdToIndex()][id % _numCpus]++;
+ }
if (missCount) {
--missCount;
exitSimLoop("A cache reached the maximum miss count");
}
}
+ void incHitCount(PacketPtr pkt, int id)
+ {
+
+ /* Writeback requests don't have a context id associated with
+ * them, so attributing a hit to a -1 context id is obviously a
+ * problem. I've noticed in the stats that hits are split into
+ * demand and non-demand hits - neither of which include writeback
+ * hits, so here, I'll just put the writeback hits into bucket 0
+ * since it won't mess with any other stats -hsul */
+ if (pkt->cmd == MemCmd::Writeback) {
+ assert(id == -1);
+ hits[pkt->cmdToIndex()][0]++;
+#if FULL_SYSTEM
+ } else if (id == -1) {
+ // Device accesses have id -1
+ // lump device accesses into their own bucket
+ hits[pkt->cmdToIndex()][_numCpus]++;
+#endif
+ } else {
+ /* the % is necessary in case there are switch cpus */
+ hits[pkt->cmdToIndex()][id % _numCpus]++;
+ }
+ }
};
/** Number of references to this block since it was brought in. */
int refCount;
+ /** holds the context source ID of the requestor for this block. */
+ int contextSrc;
+
protected:
/**
* Represents that the indicated thread context has a "lock" on
CacheBlk()
: asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0),
- set(-1), isTouched(false), refCount(0)
+ set(-1), isTouched(false), refCount(0), contextSrc(-1)
{}
/**
if (pkt->needsExclusive() ? blk->isWritable() : blk->isReadable()) {
// OK to satisfy access
- hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
+ incHitCount(pkt, id);
satisfyCpuSideRequest(pkt, blk);
return true;
}
if (blk == NULL) {
// no replaceable block available, give up.
// writeback will be forwarded to next level.
- incMissCount(pkt);
+ incMissCount(pkt, id);
return false;
}
int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1;
blk->status |= BlkDirty;
// nothing else to do; writeback doesn't expect response
assert(!pkt->needsResponse());
- hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
+ incHitCount(pkt, id);
return true;
}
- incMissCount(pkt);
+ incMissCount(pkt, id);
if (blk == NULL && pkt->isLLSC() && pkt->isWrite()) {
// complete miss on store conditional... just give up now
.desc("Cycle when the warmup percentage was hit.")
;
+ occupancies
+ .init(cache->numCpus())
+ .name(name + ".occ_blocks")
+ .desc("Average occupied blocks per context")
+ .flags(nozero | nonan)
+ ;
+
+ avgOccs
+ .name(name + ".occ_%")
+ .desc("Average percentage of cache occupancy")
+ .flags(nozero)
+ ;
+
+ avgOccs = occupancies / Stats::constant(numBlocks);
+
registerExitCallback(new BaseTagsCallback(this));
}
/** Marked true when the cache is warmed up. */
bool warmedUp;
+ /** the number of blocks in the cache */
+ unsigned numBlocks;
+
// Statistics
/**
* @addtogroup CacheStatistics
/** The cycle that the warmup percentage was hit. */
Stats::Scalar warmupCycle;
+
+ /** Average occupancy of each context/cpu using the cache */
+ Stats::AverageVector occupancies;
+
+ /** Average occ % of each context/cpu using the cache */
+ Stats::Formula avgOccs;
+
/**
* @}
*/
using namespace std;
FALRU::FALRU(unsigned _blkSize, unsigned _size, unsigned hit_latency)
- : blkSize(_blkSize), size(_size),
- numBlks(size/blkSize), hitLatency(hit_latency)
+ : blkSize(_blkSize), size(_size), hitLatency(hit_latency)
{
if (!isPowerOf2(blkSize))
fatal("cache block size (in bytes) `%d' must be a power of two",
warmedUp = false;
warmupBound = size/blkSize;
+ numBlocks = size/blkSize;
- blks = new FALRUBlk[numBlks];
+ blks = new FALRUBlk[numBlocks];
head = &(blks[0]);
- tail = &(blks[numBlks-1]);
+ tail = &(blks[numBlocks-1]);
head->prev = NULL;
head->next = &(blks[1]);
head->inCache = cacheMask;
- tail->prev = &(blks[numBlks-2]);
+ tail->prev = &(blks[numBlocks-2]);
tail->next = NULL;
tail->inCache = 0;
unsigned index = (1 << 17) / blkSize;
unsigned j = 0;
int flags = cacheMask;
- for (unsigned i = 1; i < numBlks - 1; i++) {
+ for (unsigned i = 1; i < numBlocks - 1; i++) {
blks[i].inCache = flags;
if (i == index - 1){
cacheBoundaries[j] = &(blks[i]);
blks[i].isTouched = false;
}
assert(j == numCaches);
- assert(index == numBlks);
+ assert(index == numBlocks);
//assert(check());
}
const unsigned blkSize;
/** The size of the cache. */
const unsigned size;
- /** The number of blocks in the cache. */
- const unsigned numBlks; // calculated internally
/** The hit latency of the cache. */
const unsigned hitLatency;
tagShift(floorLog2(blkSize)), blkMask(blkSize - 1),
subShift(floorLog2(subSize)), subMask(numSub - 1),
hashDelay(params.hashDelay),
- numBlocks(params.size/subSize),
numTags(hashSets * assoc + params.size/blkSize -1),
numSecondary(params.size/blkSize),
tagNull(numTags),
warmedUp = false;
warmupBound = params.size/blkSize;
+ numBlocks = params.size/subSize;
// Replacement Policy Initialization
repl = params.rp;
/** The latency of a hash lookup. */
const unsigned hashDelay;
- /** The number of data blocks. */
- const unsigned numBlocks;
/** The total number of tags in primary and secondary. */
const unsigned numTags;
/** The number of tags in the secondary tag store. */
sets = new CacheSet[numSets];
blks = new BlkType[numSets * assoc];
// allocate data storage in one big chunk
- dataBlks = new uint8_t[numSets*assoc*blkSize];
+ numBlocks = numSets * assoc;
+ dataBlks = new uint8_t[numBlocks * blkSize];
unsigned blkIndex = 0; // index into blks array
for (unsigned i = 0; i < numSets; ++i) {
++sampledRefs;
blk->refCount = 0;
+ // deal with evicted block
+ if (blk->contextSrc != -1) {
+ occupancies[blk->contextSrc % cache->numCpus()]--;
+ blk->contextSrc = -1;
+ }
+
DPRINTF(CacheRepl, "set %x: selecting blk %x for replacement\n",
set, regenerateBlkAddr(blk->tag, set));
}
// Set tag for new block. Caller is responsible for setting status.
blk->tag = extractTag(addr);
+ // deal with what we are bringing in
+ if (context_src != -1) {
+ occupancies[context_src % cache->numCpus()]++;
+ blk->contextSrc = context_src;
+ }
+
unsigned set = extractSet(addr);
sets[set].moveToHead(blk);
}
blk->isTouched = false;
blk->clearLoadLocks();
tagsInUse--;
+ if (blk->contextSrc != -1) {
+ occupancies[blk->contextSrc % cache->numCpus()]--;
+ blk->contextSrc = -1;
+ }
}
}
system.toL2Bus = Bus(clock="500GHz", width=16)
system.l2c = L2(size='64kB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.port
+system.l2c.num_cpus = nb_cores
# connect l2c to membus
system.l2c.mem_side = system.membus.port
system.toL2Bus = Bus()
system.l2c = L2(size='4MB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.port
+system.l2c.num_cpus = nb_cores
# connect l2c to membus
system.l2c.mem_side = system.membus.port
system.toL2Bus = Bus()
system.l2c = L2(size='4MB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.port
+system.l2c.num_cpus = nb_cores
# connect l2c to membus
system.l2c.mem_side = system.membus.port
system.toL2Bus = Bus()
system.l2c = L2(size='4MB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.port
+system.l2c.num_cpus = nb_cores
# connect l2c to membus
system.l2c.mem_side = system.membus.port
system.l2c = L2(size='4MB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.port
system.l2c.mem_side = system.membus.port
+system.l2c.num_cpus = 2
#connect up the cpu and l1s
for c in cpus:
system.l2c = L2(size='4MB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.port
system.l2c.mem_side = system.membus.port
+system.l2c.num_cpus = 2
#connect up the cpu and l1s
for c in cpus:
system.l2c = L2(size='4MB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.port
system.l2c.mem_side = system.membus.port
+system.l2c.num_cpus = 2
#connect up the cpu and l1s
for c in cpus: