From 85e8779de78ed913bb6d2a794bee5252d719b0e5 Mon Sep 17 00:00:00 2001 From: Dam Sunwoo Date: Fri, 24 Jan 2014 15:29:30 -0600 Subject: [PATCH] mem: per-thread cache occupancy and per-block ages This patch enables tracking of cache occupancy per thread along with ages (in buckets) per cache blocks. Cache occupancy stats are recalculated on each stat dump. --- src/arch/arm/table_walker.cc | 2 ++ src/arch/arm/tlb.cc | 3 +++ src/cpu/base_dyn_inst.hh | 4 ++++ src/cpu/o3/fetch_impl.hh | 2 ++ src/cpu/simple/atomic.cc | 3 +++ src/cpu/simple/timing.cc | 5 +++++ src/dev/dma_device.cc | 1 + src/mem/cache/blk.hh | 12 ++++++++-- src/mem/cache/cache_impl.hh | 6 +++++ src/mem/cache/prefetch/base.cc | 1 + src/mem/cache/tags/base.cc | 23 +++++++++++++++++++ src/mem/cache/tags/base.hh | 22 +++++++++++++++++++ src/mem/cache/tags/lru.cc | 40 ++++++++++++++++++++++++++++++++++ src/mem/cache/tags/lru.hh | 5 +++++ src/mem/request.hh | 23 ++++++++++++++++++- 15 files changed, 149 insertions(+), 3 deletions(-) diff --git a/src/arch/arm/table_walker.cc b/src/arch/arm/table_walker.cc index 9755299ff..d419fdec5 100644 --- a/src/arch/arm/table_walker.cc +++ b/src/arch/arm/table_walker.cc @@ -308,6 +308,7 @@ TableWalker::processWalk() f = currState->fault; } else { RequestPtr req = new Request(l1desc_addr, sizeof(uint32_t), flag, masterId); + req->taskId(ContextSwitchTaskId::DMA); PacketPtr pkt = new Packet(req, MemCmd::ReadReq); pkt->dataStatic((uint8_t*)&currState->l1Desc.data); port.sendFunctional(pkt); @@ -653,6 +654,7 @@ TableWalker::doL1Descriptor() } else { RequestPtr req = new Request(l2desc_addr, sizeof(uint32_t), 0, masterId); + req->taskId(ContextSwitchTaskId::DMA); PacketPtr pkt = new Packet(req, MemCmd::ReadReq); pkt->dataStatic((uint8_t*)&currState->l2Desc.data); port.sendFunctional(pkt); diff --git a/src/arch/arm/tlb.cc b/src/arch/arm/tlb.cc index 107901f99..805898576 100644 --- a/src/arch/arm/tlb.cc +++ b/src/arch/arm/tlb.cc @@ -54,6 +54,7 @@ #include "base/inifile.hh" #include "base/str.hh" #include "base/trace.hh" +#include "cpu/base.hh" #include "cpu/thread_context.hh" #include "debug/Checkpoint.hh" #include "debug/TLB.hh" @@ -477,6 +478,8 @@ TLB::translateFs(RequestPtr req, ThreadContext *tc, Mode mode, if (is_priv) req->setFlags(Request::PRIVILEGED); + req->taskId(tc->getCpuPtr()->taskId()); + DPRINTF(TLBVerbose, "CPSR is priv:%d UserMode:%d\n", isPriv, flags & UserMode); // If this is a clrex instruction, provide a PA of 0 with no fault diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index b7b076820..f12a89bbd 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -890,6 +890,8 @@ BaseDynInst::readMem(Addr addr, uint8_t *data, req = new Request(asid, addr, size, flags, masterId(), this->pc.instAddr(), thread->contextId(), threadNumber); + req->taskId(cpu->taskId()); + // Only split the request if the ISA supports unaligned accesses. if (TheISA::HasUnalignedMemAcc) { splitRequest(req, sreqLow, sreqHigh); @@ -953,6 +955,8 @@ BaseDynInst::writeMem(uint8_t *data, unsigned size, req = new Request(asid, addr, size, flags, masterId(), this->pc.instAddr(), thread->contextId(), threadNumber); + req->taskId(cpu->taskId()); + // Only split the request if the ISA supports unaligned accesses. if (TheISA::HasUnalignedMemAcc) { splitRequest(req, sreqLow, sreqHigh); diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 5b04c2a25..a81125da6 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -604,6 +604,8 @@ DefaultFetch::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc) Request::INST_FETCH, cpu->instMasterId(), pc, cpu->thread[tid]->contextId(), tid); + mem_req->taskId(cpu->taskId()); + memReq[tid] = mem_req; // Initiate translation of the icache block diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 13c4b9bd3..617e845a5 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -301,6 +301,7 @@ AtomicSimpleCPU::readMem(Addr addr, uint8_t * data, dcache_latency = 0; + req->taskId(taskId()); while (1) { req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr()); @@ -387,6 +388,7 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, dcache_latency = 0; + req->taskId(taskId()); while(1) { req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr()); @@ -492,6 +494,7 @@ AtomicSimpleCPU::tick() bool needToFetch = !isRomMicroPC(pcState.microPC()) && !curMacroStaticInst; if (needToFetch) { + ifetch_req.taskId(taskId()); setupFetchRequest(&ifetch_req); fault = thread->itb->translateAtomic(&ifetch_req, tc, BaseTLB::Execute); diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 9253d8005..7996a6ddd 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -415,6 +415,8 @@ TimingSimpleCPU::readMem(Addr addr, uint8_t *data, RequestPtr req = new Request(asid, addr, size, flags, dataMasterId(), pc, _cpuId, tid); + req->taskId(taskId()); + Addr split_addr = roundDown(addr + size - 1, block_size); assert(split_addr <= addr || split_addr - addr < block_size); @@ -484,6 +486,8 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size, RequestPtr req = new Request(asid, addr, size, flags, dataMasterId(), pc, _cpuId, tid); + req->taskId(taskId()); + Addr split_addr = roundDown(addr + size - 1, block_size); assert(split_addr <= addr || split_addr - addr < block_size); @@ -561,6 +565,7 @@ TimingSimpleCPU::fetch() if (needToFetch) { _status = BaseSimpleCPU::Running; Request *ifetch_req = new Request(); + ifetch_req->taskId(taskId()); ifetch_req->setThreadContext(_cpuId, /* thread ID */ 0); setupFetchRequest(ifetch_req); DPRINTF(SimpleCPU, "Translating address %#x\n", ifetch_req->getVaddr()); diff --git a/src/dev/dma_device.cc b/src/dev/dma_device.cc index 5eb5e1f9d..5033c3617 100644 --- a/src/dev/dma_device.cc +++ b/src/dev/dma_device.cc @@ -166,6 +166,7 @@ DmaPort::dmaAction(Packet::Command cmd, Addr addr, int size, Event *event, for (ChunkGenerator gen(addr, size, sys->cacheLineSize()); !gen.done(); gen.next()) { Request *req = new Request(gen.addr(), gen.size(), flag, masterId); + req->taskId(ContextSwitchTaskId::DMA); PacketPtr pkt = new Packet(req, cmd); // Increment the data pointer on a write diff --git a/src/mem/cache/blk.hh b/src/mem/cache/blk.hh index 4a89f3892..47cd305c0 100644 --- a/src/mem/cache/blk.hh +++ b/src/mem/cache/blk.hh @@ -80,6 +80,9 @@ enum CacheBlkStatusBits { class CacheBlk { public: + /** Task Id associated with this block */ + uint32_t task_id; + /** The address space ID of this block. */ int asid; /** Data block tag value. */ @@ -119,6 +122,8 @@ class CacheBlk /** holds the source requestor ID for this block. */ int srcMasterId; + Tick tickInserted; + protected: /** * Represents that the indicated thread context has a "lock" on @@ -162,9 +167,11 @@ class CacheBlk public: CacheBlk() - : asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0), + : task_id(ContextSwitchTaskId::Unknown), + asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0), set(-1), isTouched(false), refCount(0), - srcMasterId(Request::invldMasterId) + srcMasterId(Request::invldMasterId), + tickInserted(0) {} /** @@ -182,6 +189,7 @@ class CacheBlk whenReady = rhs.whenReady; set = rhs.set; refCount = rhs.refCount; + task_id = rhs.task_id; return *this; } diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 6d7011819..e86b3d704 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -1074,6 +1074,11 @@ Cache::writebackBlk(BlkType *blk) Request *writebackReq = new Request(tags->regenerateBlkAddr(blk->tag, blk->set), blkSize, 0, Request::wbMasterId); + + writebackReq->taskId(blk->task_id); + blk->task_id= ContextSwitchTaskId::Unknown; + blk->tickInserted = curTick(); + PacketPtr writeback = new Packet(writebackReq, MemCmd::Writeback); if (blk->isWritable()) { writeback->setSupplyExclusive(); @@ -1120,6 +1125,7 @@ Cache::writebackVisitor(BlkType &blk) Request request(tags->regenerateBlkAddr(blk.tag, blk.set), blkSize, 0, Request::funcMasterId); + request.taskId(blk.task_id); Packet packet(&request, MemCmd::WriteReq); packet.dataStatic(blk.data); diff --git a/src/mem/cache/prefetch/base.cc b/src/mem/cache/prefetch/base.cc index 6463f78f8..ed7b63f82 100644 --- a/src/mem/cache/prefetch/base.cc +++ b/src/mem/cache/prefetch/base.cc @@ -247,6 +247,7 @@ BasePrefetcher::notify(PacketPtr &pkt, Tick tick) // create a prefetch memreq Request *prefetchReq = new Request(*addrIter, blkSize, 0, masterId); + prefetchReq->taskId(ContextSwitchTaskId::Prefetcher); PacketPtr prefetch = new Packet(prefetchReq, MemCmd::HardPFReq); prefetch->allocate(); diff --git a/src/mem/cache/tags/base.cc b/src/mem/cache/tags/base.cc index 947bd05de..b669a5b06 100644 --- a/src/mem/cache/tags/base.cc +++ b/src/mem/cache/tags/base.cc @@ -125,5 +125,28 @@ BaseTags::regStats() avgOccs = occupancies / Stats::constant(numBlocks); + occupanciesTaskId + .init(ContextSwitchTaskId::NumTaskId) + .name(name() + ".occ_task_id_blocks") + .desc("Occupied blocks per task id") + .flags(nozero | nonan) + ; + + ageTaskId + .init(ContextSwitchTaskId::NumTaskId, 5) + .name(name() + ".age_task_id_blocks") + .desc("Occupied blocks per task id") + .flags(nozero | nonan) + ; + + percentOccsTaskId + .name(name() + ".occ_task_id_percent") + .desc("Percentage of cache occupancy per task id") + .flags(nozero) + ; + + percentOccsTaskId = occupanciesTaskId / Stats::constant(numBlocks); + + registerDumpCallback(new BaseTagsDumpCallback(this)); registerExitCallback(new BaseTagsCallback(this)); } diff --git a/src/mem/cache/tags/base.hh b/src/mem/cache/tags/base.hh index 8ce7d972a..e8c71f01f 100644 --- a/src/mem/cache/tags/base.hh +++ b/src/mem/cache/tags/base.hh @@ -121,6 +121,15 @@ class BaseTags : public ClockedObject /** Average occ % of each requestor using the cache */ Stats::Formula avgOccs; + /** Occupancy of each context/cpu using the cache */ + Stats::Vector occupanciesTaskId; + + /** Occupancy of each context/cpu using the cache */ + Stats::Vector2d ageTaskId; + + /** Occ % of each context/cpu using the cache */ + Stats::Formula percentOccsTaskId; + /** * @} */ @@ -151,6 +160,11 @@ class BaseTags : public ClockedObject */ virtual void cleanupRefs() {} + /** + * Computes stats just prior to dump event + */ + virtual void computeStats() {} + /** *iterated through all blocks and clear all locks *Needed to clear all lock tracking at once @@ -171,4 +185,12 @@ class BaseTagsCallback : public Callback virtual void process() { tags->cleanupRefs(); }; }; +class BaseTagsDumpCallback : public Callback +{ + BaseTags *tags; + public: + BaseTagsDumpCallback(BaseTags *t) : tags(t) {} + virtual void process() { tags->computeStats(); }; +}; + #endif //__BASE_TAGS_HH__ diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc index db0cc0839..6b05744af 100644 --- a/src/mem/cache/tags/lru.cc +++ b/src/mem/cache/tags/lru.cc @@ -176,6 +176,7 @@ LRU::insertBlock(PacketPtr pkt, BlkType *blk) { Addr addr = pkt->getAddr(); MasterID master_id = pkt->req->masterId(); + uint32_t task_id = pkt->req->taskId(); if (!blk->isTouched) { tagsInUse++; blk->isTouched = true; @@ -210,6 +211,8 @@ LRU::insertBlock(PacketPtr pkt, BlkType *blk) assert(master_id < cache->system->maxMasters()); occupancies[master_id]++; blk->srcMasterId = master_id; + blk->task_id = task_id; + blk->tickInserted = curTick(); unsigned set = extractSet(addr); sets[set].moveToHead(blk); @@ -224,6 +227,8 @@ LRU::invalidate(BlkType *blk) assert(blk->srcMasterId < cache->system->maxMasters()); occupancies[blk->srcMasterId]--; blk->srcMasterId = Request::invldMasterId; + blk->task_id = ContextSwitchTaskId::Unknown; + blk->tickInserted = curTick(); // should be evicted before valid blocks unsigned set = blk->set; @@ -270,3 +275,38 @@ LRU::cleanupRefs() } } } + +void +LRU::computeStats() +{ + for (unsigned i = 0; i < ContextSwitchTaskId::NumTaskId; ++i) { + occupanciesTaskId[i] = 0; + for (unsigned j = 0; j < 5; ++j) { + ageTaskId[i][j] = 0; + } + } + + for (unsigned i = 0; i < numSets * assoc; ++i) { + if (blks[i].isValid()) { + assert(blks[i].task_id < ContextSwitchTaskId::NumTaskId); + occupanciesTaskId[blks[i].task_id]++; + Tick age = curTick() - blks[i].tickInserted; + assert(age >= 0); + + int age_index; + if (age / SimClock::Int::us < 10) { // <10us + age_index = 0; + } else if (age / SimClock::Int::us < 100) { // <100us + age_index = 1; + } else if (age / SimClock::Int::ms < 1) { // <1ms + age_index = 2; + } else if (age / SimClock::Int::ms < 10) { // <10ms + age_index = 3; + } else + age_index = 4; // >10ms + + ageTaskId[blks[i].task_id][age_index]++; + } + } +} + diff --git a/src/mem/cache/tags/lru.hh b/src/mem/cache/tags/lru.hh index af7f8665d..68c29b754 100644 --- a/src/mem/cache/tags/lru.hh +++ b/src/mem/cache/tags/lru.hh @@ -252,6 +252,11 @@ public: */ virtual std::string print() const; + /** + * Called prior to dumping stats to compute task occupancy + */ + virtual void computeStats(); + /** * Visit each block in the tag store and apply a visitor to the * block. diff --git a/src/mem/request.hh b/src/mem/request.hh index 54b671645..fb21e3ff3 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -219,6 +219,11 @@ class Request */ Tick _time; + /** + * The task id associated with this request + */ + uint32_t _taskId; + /** The address space ID. */ int _asid; @@ -244,7 +249,8 @@ class Request * default constructor.) */ Request() - : translateDelta(0), accessDelta(0), depth(0) + : _taskId(ContextSwitchTaskId::Unknown), + translateDelta(0), accessDelta(0), depth(0) {} /** @@ -253,16 +259,19 @@ class Request * These fields are adequate to perform a request. */ Request(Addr paddr, int size, Flags flags, MasterID mid) + : _taskId(ContextSwitchTaskId::Unknown) { setPhys(paddr, size, flags, mid); } Request(Addr paddr, int size, Flags flags, MasterID mid, Tick time) + : _taskId(ContextSwitchTaskId::Unknown) { setPhys(paddr, size, flags, mid, time); } Request(Addr paddr, int size, Flags flags, MasterID mid, Tick time, Addr pc) + : _taskId(ContextSwitchTaskId::Unknown) { setPhys(paddr, size, flags, mid, time); privateFlags.set(VALID_PC); @@ -271,6 +280,7 @@ class Request Request(int asid, Addr vaddr, int size, Flags flags, MasterID mid, Addr pc, int cid, ThreadID tid) + : _taskId(ContextSwitchTaskId::Unknown) { setVirt(asid, vaddr, size, flags, mid, pc); setThreadContext(cid, tid); @@ -477,6 +487,17 @@ class Request return _masterId; } + uint32_t + taskId() const + { + return _taskId; + } + + void + taskId(uint32_t id) { + _taskId = id; + } + /** Accessor function for asid.*/ int getAsid() -- 2.30.2