From: Andreas Sandberg Date: Mon, 23 Sep 2019 17:16:26 +0000 (+0100) Subject: mem-cache: Switch to new-style stats X-Git-Tag: v19.0.0.0~511 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=0d98a7170f50d07d737a6a78e32e6ac3021262ec;p=gem5.git mem-cache: Switch to new-style stats This change puts cache and tag stats into a Stats::Group struct. This makes it easier to identify stat updates (they are prefixed with stat.) and adds hierarchy information for output formats that need it. Change-Id: I2b8e9138f1cb977abb445ec864d80a79b588481d Signed-off-by: Andreas Sandberg Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/21140 Reviewed-by: Nikos Nikoleris Maintainer: Nikos Nikoleris Tested-by: kokoro --- diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc index bc29c8cee..20266963a 100644 --- a/src/mem/cache/base.cc +++ b/src/mem/cache/base.cc @@ -107,7 +107,8 @@ BaseCache::BaseCache(const BaseCacheParams *p, unsigned blk_size) noTargetMSHR(nullptr), missCount(p->max_miss_count), addrRanges(p->addr_ranges.begin(), p->addr_ranges.end()), - system(p->system) + system(p->system), + stats(*this) { // the MSHR queue has no reserve entries as we check the MSHR // queue on every single allocation, whereas the write queue has @@ -273,7 +274,7 @@ BaseCache::handleTimingReqMiss(PacketPtr pkt, MSHR *mshr, CacheBlk *blk, pkt->print()); assert(pkt->req->masterId() < system->maxMasters()); - mshr_hits[pkt->cmdToIndex()][pkt->req->masterId()]++; + stats.cmdStats(pkt).mshr_hits[pkt->req->masterId()]++; // We use forward_time here because it is the same // considering new targets. We have multiple @@ -297,7 +298,7 @@ BaseCache::handleTimingReqMiss(PacketPtr pkt, MSHR *mshr, CacheBlk *blk, } else { // no MSHR assert(pkt->req->masterId() < system->maxMasters()); - mshr_misses[pkt->cmdToIndex()][pkt->req->masterId()]++; + stats.cmdStats(pkt).mshr_misses[pkt->req->masterId()]++; if (pkt->isEviction() || pkt->cmd == MemCmd::WriteClean) { // We use forward_time here because there is an @@ -440,18 +441,16 @@ BaseCache::recvTimingResp(PacketPtr pkt) } // Initial target is used just for stats - QueueEntry::Target *initial_tgt = mshr->getTarget(); - int stats_cmd_idx = initial_tgt->pkt->cmdToIndex(); - Tick miss_latency = curTick() - initial_tgt->recvTime; - + const QueueEntry::Target *initial_tgt = mshr->getTarget(); + const Tick miss_latency = curTick() - initial_tgt->recvTime; if (pkt->req->isUncacheable()) { assert(pkt->req->masterId() < system->maxMasters()); - mshr_uncacheable_lat[stats_cmd_idx][pkt->req->masterId()] += - miss_latency; + stats.cmdStats(initial_tgt->pkt) + .mshr_uncacheable_lat[pkt->req->masterId()] += miss_latency; } else { assert(pkt->req->masterId() < system->maxMasters()); - mshr_miss_latency[stats_cmd_idx][pkt->req->masterId()] += - miss_latency; + stats.cmdStats(initial_tgt->pkt) + .mshr_miss_latency[pkt->req->masterId()] += miss_latency; } PacketList writebacks; @@ -779,7 +778,7 @@ BaseCache::getNextQueueEntry() // Update statistic on number of prefetches issued // (hwpf_mshr_misses) assert(pkt->req->masterId() < system->maxMasters()); - mshr_misses[pkt->cmdToIndex()][pkt->req->masterId()]++; + stats.cmdStats(pkt).mshr_misses[pkt->req->masterId()]++; // allocate an MSHR and return it, note // that we send the packet straight away, so do not @@ -866,7 +865,7 @@ BaseCache::updateCompressionData(CacheBlk *blk, const uint64_t* data, } // Update the number of data expansions - dataExpansions++; + stats.dataExpansions++; DPRINTF(CacheComp, "Data expansion: expanding [%s] from %d to %d bits" "\n", blk->print(), prev_size, compression_size); @@ -885,7 +884,7 @@ BaseCache::updateCompressionData(CacheBlk *blk, const uint64_t* data, for (const auto& evict_blk : evict_blks) { if (evict_blk->isValid()) { if (evict_blk->wasPrefetched()) { - unusedPrefetches++; + stats.unusedPrefetches++; } evictBlock(evict_blk, writebacks); } @@ -1499,14 +1498,14 @@ BaseCache::allocateBlock(const PacketPtr pkt, PacketList &writebacks) addr, is_secure); if (blk->wasPrefetched()) { - unusedPrefetches++; + stats.unusedPrefetches++; } evictBlock(blk, writebacks); } } - replacements++; + stats.replacements++; } // If using a compressor, set compression data. This must be done before @@ -1550,7 +1549,7 @@ BaseCache::writebackBlk(CacheBlk *blk) "Writeback from read-only cache"); assert(blk && blk->isValid() && (blk->isDirty() || writebackClean)); - writebacks[Request::wbMasterId]++; + stats.writebacks[Request::wbMasterId]++; RequestPtr req = std::make_shared( regenerateBlkAddr(blk), blkSize, 0, Request::wbMasterId); @@ -1873,248 +1872,350 @@ BaseCache::unserialize(CheckpointIn &cp) } } -void -BaseCache::regStats() + +BaseCache::CacheCmdStats::CacheCmdStats(BaseCache &c, + const std::string &name) + : Stats::Group(&c), cache(c), + + hits( + this, (name + "_hits").c_str(), + ("number of " + name + " hits").c_str()), + misses( + this, (name + "_misses").c_str(), + ("number of " + name + " misses").c_str()), + missLatency( + this, (name + "_miss_latency").c_str(), + ("number of " + name + " miss cycles").c_str()), + accesses( + this, (name + "_accesses").c_str(), + ("number of " + name + " accesses(hits+misses)").c_str()), + missRate( + this, (name + "_miss_rate").c_str(), + ("miss rate for " + name + " accesses").c_str()), + avgMissLatency( + this, (name + "_avg_miss_latency").c_str(), + ("average " + name + " miss latency").c_str()), + mshr_hits( + this, (name + "_mshr_hits").c_str(), + ("number of " + name + " MSHR hits").c_str()), + mshr_misses( + this, (name + "_mshr_misses").c_str(), + ("number of " + name + " MSHR misses").c_str()), + mshr_uncacheable( + this, (name + "_mshr_uncacheable").c_str(), + ("number of " + name + " MSHR uncacheable").c_str()), + mshr_miss_latency( + this, (name + "_mshr_miss_latency").c_str(), + ("number of " + name + " MSHR miss cycles").c_str()), + mshr_uncacheable_lat( + this, (name + "_mshr_uncacheable_latency").c_str(), + ("number of " + name + " MSHR uncacheable cycles").c_str()), + mshrMissRate( + this, (name + "_mshr_miss_rate").c_str(), + ("mshr miss rate for " + name + " accesses").c_str()), + avgMshrMissLatency( + this, (name + "_avg_mshr_miss_latency").c_str(), + ("average " + name + " mshr miss latency").c_str()), + avgMshrUncacheableLatency( + this, (name + "_avg_mshr_uncacheable_latency").c_str(), + ("average " + name + " mshr uncacheable latency").c_str()) { - ClockedObject::regStats(); +} +void +BaseCache::CacheCmdStats::regStatsFromParent() +{ using namespace Stats; - // Hit statistics - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - hits[access_idx] - .init(system->maxMasters()) - .name(name() + "." + cstr + "_hits") - .desc("number of " + cstr + " hits") - .flags(total | nozero | nonan) - ; - for (int i = 0; i < system->maxMasters(); i++) { - hits[access_idx].subname(i, system->getMasterName(i)); - } - } - -// These macros make it easier to sum the right subset of commands and -// to change the subset of commands that are considered "demand" vs -// "non-demand" -#define SUM_DEMAND(s) \ - (s[MemCmd::ReadReq] + s[MemCmd::WriteReq] + s[MemCmd::WriteLineReq] + \ - s[MemCmd::ReadExReq] + s[MemCmd::ReadCleanReq] + s[MemCmd::ReadSharedReq]) + Stats::Group::regStats(); + System *system = cache.system; + const auto max_masters = system->maxMasters(); -// should writebacks be included here? prior code was inconsistent... -#define SUM_NON_DEMAND(s) \ - (s[MemCmd::SoftPFReq] + s[MemCmd::HardPFReq] + s[MemCmd::SoftPFExReq]) + hits + .init(max_masters) + .flags(total | nozero | nonan) + ; + for (int i = 0; i < max_masters; i++) { + hits.subname(i, system->getMasterName(i)); + } - demandHits - .name(name() + ".demand_hits") - .desc("number of demand (read+write) hits") + // Miss statistics + misses + .init(max_masters) .flags(total | nozero | nonan) ; - demandHits = SUM_DEMAND(hits); - for (int i = 0; i < system->maxMasters(); i++) { - demandHits.subname(i, system->getMasterName(i)); + for (int i = 0; i < max_masters; i++) { + misses.subname(i, system->getMasterName(i)); } - overallHits - .name(name() + ".overall_hits") - .desc("number of overall hits") + // Miss latency statistics + missLatency + .init(max_masters) .flags(total | nozero | nonan) ; - overallHits = demandHits + SUM_NON_DEMAND(hits); - for (int i = 0; i < system->maxMasters(); i++) { - overallHits.subname(i, system->getMasterName(i)); + for (int i = 0; i < max_masters; i++) { + missLatency.subname(i, system->getMasterName(i)); } - // Miss statistics - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - misses[access_idx] - .init(system->maxMasters()) - .name(name() + "." + cstr + "_misses") - .desc("number of " + cstr + " misses") - .flags(total | nozero | nonan) - ; - for (int i = 0; i < system->maxMasters(); i++) { - misses[access_idx].subname(i, system->getMasterName(i)); - } + // access formulas + accesses.flags(total | nozero | nonan); + accesses = hits + misses; + for (int i = 0; i < max_masters; i++) { + accesses.subname(i, system->getMasterName(i)); + } + + // miss rate formulas + missRate.flags(total | nozero | nonan); + missRate = misses / accesses; + for (int i = 0; i < max_masters; i++) { + missRate.subname(i, system->getMasterName(i)); + } + + // miss latency formulas + avgMissLatency.flags(total | nozero | nonan); + avgMissLatency = missLatency / misses; + for (int i = 0; i < max_masters; i++) { + avgMissLatency.subname(i, system->getMasterName(i)); } - demandMisses - .name(name() + ".demand_misses") - .desc("number of demand (read+write) misses") + // MSHR statistics + // MSHR hit statistics + mshr_hits + .init(max_masters) .flags(total | nozero | nonan) ; - demandMisses = SUM_DEMAND(misses); - for (int i = 0; i < system->maxMasters(); i++) { - demandMisses.subname(i, system->getMasterName(i)); + for (int i = 0; i < max_masters; i++) { + mshr_hits.subname(i, system->getMasterName(i)); } - overallMisses - .name(name() + ".overall_misses") - .desc("number of overall misses") + // MSHR miss statistics + mshr_misses + .init(max_masters) .flags(total | nozero | nonan) ; - overallMisses = demandMisses + SUM_NON_DEMAND(misses); - for (int i = 0; i < system->maxMasters(); i++) { - overallMisses.subname(i, system->getMasterName(i)); + for (int i = 0; i < max_masters; i++) { + mshr_misses.subname(i, system->getMasterName(i)); } - // Miss latency statistics - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - missLatency[access_idx] - .init(system->maxMasters()) - .name(name() + "." + cstr + "_miss_latency") - .desc("number of " + cstr + " miss cycles") - .flags(total | nozero | nonan) - ; - for (int i = 0; i < system->maxMasters(); i++) { - missLatency[access_idx].subname(i, system->getMasterName(i)); - } + // MSHR miss latency statistics + mshr_miss_latency + .init(max_masters) + .flags(total | nozero | nonan) + ; + for (int i = 0; i < max_masters; i++) { + mshr_miss_latency.subname(i, system->getMasterName(i)); } - demandMissLatency - .name(name() + ".demand_miss_latency") - .desc("number of demand (read+write) miss cycles") + // MSHR uncacheable statistics + mshr_uncacheable + .init(max_masters) .flags(total | nozero | nonan) ; - demandMissLatency = SUM_DEMAND(missLatency); - for (int i = 0; i < system->maxMasters(); i++) { - demandMissLatency.subname(i, system->getMasterName(i)); + for (int i = 0; i < max_masters; i++) { + mshr_uncacheable.subname(i, system->getMasterName(i)); } - overallMissLatency - .name(name() + ".overall_miss_latency") - .desc("number of overall miss cycles") + // MSHR miss latency statistics + mshr_uncacheable_lat + .init(max_masters) .flags(total | nozero | nonan) ; - overallMissLatency = demandMissLatency + SUM_NON_DEMAND(missLatency); - for (int i = 0; i < system->maxMasters(); i++) { - overallMissLatency.subname(i, system->getMasterName(i)); + for (int i = 0; i < max_masters; i++) { + mshr_uncacheable_lat.subname(i, system->getMasterName(i)); } - // access formulas - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - accesses[access_idx] - .name(name() + "." + cstr + "_accesses") - .desc("number of " + cstr + " accesses(hits+misses)") - .flags(total | nozero | nonan) - ; - accesses[access_idx] = hits[access_idx] + misses[access_idx]; - - for (int i = 0; i < system->maxMasters(); i++) { - accesses[access_idx].subname(i, system->getMasterName(i)); - } + // MSHR miss rate formulas + mshrMissRate.flags(total | nozero | nonan); + mshrMissRate = mshr_misses / accesses; + + for (int i = 0; i < max_masters; i++) { + mshrMissRate.subname(i, system->getMasterName(i)); } - demandAccesses - .name(name() + ".demand_accesses") - .desc("number of demand (read+write) accesses") - .flags(total | nozero | nonan) - ; + // mshrMiss latency formulas + avgMshrMissLatency.flags(total | nozero | nonan); + avgMshrMissLatency = mshr_miss_latency / mshr_misses; + for (int i = 0; i < max_masters; i++) { + avgMshrMissLatency.subname(i, system->getMasterName(i)); + } + + // mshrUncacheable latency formulas + avgMshrUncacheableLatency.flags(total | nozero | nonan); + avgMshrUncacheableLatency = mshr_uncacheable_lat / mshr_uncacheable; + for (int i = 0; i < max_masters; i++) { + avgMshrUncacheableLatency.subname(i, system->getMasterName(i)); + } +} + +BaseCache::CacheStats::CacheStats(BaseCache &c) + : Stats::Group(&c), cache(c), + + demandHits(this, "demand_hits", "number of demand (read+write) hits"), + + overallHits(this, "overall_hits", "number of overall hits"), + demandMisses(this, "demand_misses", + "number of demand (read+write) misses"), + overallMisses(this, "overall_misses", "number of overall misses"), + demandMissLatency(this, "demand_miss_latency", + "number of demand (read+write) miss cycles"), + overallMissLatency(this, "overall_miss_latency", + "number of overall miss cycles"), + demandAccesses(this, "demand_accesses", + "number of demand (read+write) accesses"), + overallAccesses(this, "overall_accesses", + "number of overall (read+write) accesses"), + demandMissRate(this, "demand_miss_rate", + "miss rate for demand accesses"), + overallMissRate(this, "overall_miss_rate", + "miss rate for overall accesses"), + demandAvgMissLatency(this, "demand_avg_miss_latency", + "average overall miss latency"), + overallAvgMissLatency(this, "overall_avg_miss_latency", + "average overall miss latency"), + blocked_cycles(this, "blocked_cycles", + "number of cycles access was blocked"), + blocked_causes(this, "blocked", "number of cycles access was blocked"), + avg_blocked(this, "avg_blocked_cycles", + "average number of cycles each access was blocked"), + unusedPrefetches(this, "unused_prefetches", + "number of HardPF blocks evicted w/o reference"), + writebacks(this, "writebacks", "number of writebacks"), + demandMshrHits(this, "demand_mshr_hits", + "number of demand (read+write) MSHR hits"), + overallMshrHits(this, "overall_mshr_hits", + "number of overall MSHR hits"), + demandMshrMisses(this, "demand_mshr_misses", + "number of demand (read+write) MSHR misses"), + overallMshrMisses(this, "overall_mshr_misses", + "number of overall MSHR misses"), + overallMshrUncacheable(this, "overall_mshr_uncacheable_misses", + "number of overall MSHR uncacheable misses"), + demandMshrMissLatency(this, "demand_mshr_miss_latency", + "number of demand (read+write) MSHR miss cycles"), + overallMshrMissLatency(this, "overall_mshr_miss_latency", + "number of overall MSHR miss cycles"), + overallMshrUncacheableLatency(this, "overall_mshr_uncacheable_latency", + "number of overall MSHR uncacheable cycles"), + demandMshrMissRate(this, "demand_mshr_miss_rate", + "mshr miss rate for demand accesses"), + overallMshrMissRate(this, "overall_mshr_miss_rate", + "mshr miss rate for overall accesses"), + demandAvgMshrMissLatency(this, "demand_avg_mshr_miss_latency", + "average overall mshr miss latency"), + overallAvgMshrMissLatency(this, "overall_avg_mshr_miss_latency", + "average overall mshr miss latency"), + overallAvgMshrUncacheableLatency( + this, "overall_avg_mshr_uncacheable_latency", + "average overall mshr uncacheable latency"), + replacements(this, "replacements", "number of replacements"), + + dataExpansions(this, "data_expansions", "number of data expansions"), + cmd(MemCmd::NUM_MEM_CMDS) +{ + for (int idx = 0; idx < MemCmd::NUM_MEM_CMDS; ++idx) + cmd[idx].reset(new CacheCmdStats(c, MemCmd(idx).toString())); +} + +void +BaseCache::CacheStats::regStats() +{ + using namespace Stats; + + Stats::Group::regStats(); + + System *system = cache.system; + const auto max_masters = system->maxMasters(); + + for (auto &cs : cmd) + cs->regStatsFromParent(); + +// These macros make it easier to sum the right subset of commands and +// to change the subset of commands that are considered "demand" vs +// "non-demand" +#define SUM_DEMAND(s) \ + (cmd[MemCmd::ReadReq]->s + cmd[MemCmd::WriteReq]->s + \ + cmd[MemCmd::WriteLineReq]->s + cmd[MemCmd::ReadExReq]->s + \ + cmd[MemCmd::ReadCleanReq]->s + cmd[MemCmd::ReadSharedReq]->s) + +// should writebacks be included here? prior code was inconsistent... +#define SUM_NON_DEMAND(s) \ + (cmd[MemCmd::SoftPFReq]->s + cmd[MemCmd::HardPFReq]->s + \ + cmd[MemCmd::SoftPFExReq]->s) + + demandHits.flags(total | nozero | nonan); + demandHits = SUM_DEMAND(hits); + for (int i = 0; i < max_masters; i++) { + demandHits.subname(i, system->getMasterName(i)); + } + + overallHits.flags(total | nozero | nonan); + overallHits = demandHits + SUM_NON_DEMAND(hits); + for (int i = 0; i < max_masters; i++) { + overallHits.subname(i, system->getMasterName(i)); + } + + demandMisses.flags(total | nozero | nonan); + demandMisses = SUM_DEMAND(misses); + for (int i = 0; i < max_masters; i++) { + demandMisses.subname(i, system->getMasterName(i)); + } + + overallMisses.flags(total | nozero | nonan); + overallMisses = demandMisses + SUM_NON_DEMAND(misses); + for (int i = 0; i < max_masters; i++) { + overallMisses.subname(i, system->getMasterName(i)); + } + + demandMissLatency.flags(total | nozero | nonan); + demandMissLatency = SUM_DEMAND(missLatency); + for (int i = 0; i < max_masters; i++) { + demandMissLatency.subname(i, system->getMasterName(i)); + } + + overallMissLatency.flags(total | nozero | nonan); + overallMissLatency = demandMissLatency + SUM_NON_DEMAND(missLatency); + for (int i = 0; i < max_masters; i++) { + overallMissLatency.subname(i, system->getMasterName(i)); + } + + demandAccesses.flags(total | nozero | nonan); demandAccesses = demandHits + demandMisses; - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { demandAccesses.subname(i, system->getMasterName(i)); } - overallAccesses - .name(name() + ".overall_accesses") - .desc("number of overall (read+write) accesses") - .flags(total | nozero | nonan) - ; + overallAccesses.flags(total | nozero | nonan); overallAccesses = overallHits + overallMisses; - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { overallAccesses.subname(i, system->getMasterName(i)); } - // miss rate formulas - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - missRate[access_idx] - .name(name() + "." + cstr + "_miss_rate") - .desc("miss rate for " + cstr + " accesses") - .flags(total | nozero | nonan) - ; - missRate[access_idx] = misses[access_idx] / accesses[access_idx]; - - for (int i = 0; i < system->maxMasters(); i++) { - missRate[access_idx].subname(i, system->getMasterName(i)); - } - } - - demandMissRate - .name(name() + ".demand_miss_rate") - .desc("miss rate for demand accesses") - .flags(total | nozero | nonan) - ; + demandMissRate.flags(total | nozero | nonan); demandMissRate = demandMisses / demandAccesses; - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { demandMissRate.subname(i, system->getMasterName(i)); } - overallMissRate - .name(name() + ".overall_miss_rate") - .desc("miss rate for overall accesses") - .flags(total | nozero | nonan) - ; + overallMissRate.flags(total | nozero | nonan); overallMissRate = overallMisses / overallAccesses; - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { overallMissRate.subname(i, system->getMasterName(i)); } - // miss latency formulas - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - avgMissLatency[access_idx] - .name(name() + "." + cstr + "_avg_miss_latency") - .desc("average " + cstr + " miss latency") - .flags(total | nozero | nonan) - ; - avgMissLatency[access_idx] = - missLatency[access_idx] / misses[access_idx]; - - for (int i = 0; i < system->maxMasters(); i++) { - avgMissLatency[access_idx].subname(i, system->getMasterName(i)); - } - } - - demandAvgMissLatency - .name(name() + ".demand_avg_miss_latency") - .desc("average overall miss latency") - .flags(total | nozero | nonan) - ; + demandAvgMissLatency.flags(total | nozero | nonan); demandAvgMissLatency = demandMissLatency / demandMisses; - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { demandAvgMissLatency.subname(i, system->getMasterName(i)); } - overallAvgMissLatency - .name(name() + ".overall_avg_miss_latency") - .desc("average overall miss latency") - .flags(total | nozero | nonan) - ; + overallAvgMissLatency.flags(total | nozero | nonan); overallAvgMissLatency = overallMissLatency / overallMisses; - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { overallAvgMissLatency.subname(i, system->getMasterName(i)); } blocked_cycles.init(NUM_BLOCKED_CAUSES); blocked_cycles - .name(name() + ".blocked_cycles") - .desc("number of cycles access was blocked") .subname(Blocked_NoMSHRs, "no_mshrs") .subname(Blocked_NoTargets, "no_targets") ; @@ -2122,320 +2223,111 @@ BaseCache::regStats() blocked_causes.init(NUM_BLOCKED_CAUSES); blocked_causes - .name(name() + ".blocked") - .desc("number of cycles access was blocked") .subname(Blocked_NoMSHRs, "no_mshrs") .subname(Blocked_NoTargets, "no_targets") ; avg_blocked - .name(name() + ".avg_blocked_cycles") - .desc("average number of cycles each access was blocked") .subname(Blocked_NoMSHRs, "no_mshrs") .subname(Blocked_NoTargets, "no_targets") ; - avg_blocked = blocked_cycles / blocked_causes; - unusedPrefetches - .name(name() + ".unused_prefetches") - .desc("number of HardPF blocks evicted w/o reference") - .flags(nozero) - ; + unusedPrefetches.flags(nozero); writebacks - .init(system->maxMasters()) - .name(name() + ".writebacks") - .desc("number of writebacks") + .init(max_masters) .flags(total | nozero | nonan) ; - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { writebacks.subname(i, system->getMasterName(i)); } - // MSHR statistics - // MSHR hit statistics - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - mshr_hits[access_idx] - .init(system->maxMasters()) - .name(name() + "." + cstr + "_mshr_hits") - .desc("number of " + cstr + " MSHR hits") - .flags(total | nozero | nonan) - ; - for (int i = 0; i < system->maxMasters(); i++) { - mshr_hits[access_idx].subname(i, system->getMasterName(i)); - } - } - - demandMshrHits - .name(name() + ".demand_mshr_hits") - .desc("number of demand (read+write) MSHR hits") - .flags(total | nozero | nonan) - ; + demandMshrHits.flags(total | nozero | nonan); demandMshrHits = SUM_DEMAND(mshr_hits); - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { demandMshrHits.subname(i, system->getMasterName(i)); } - overallMshrHits - .name(name() + ".overall_mshr_hits") - .desc("number of overall MSHR hits") - .flags(total | nozero | nonan) - ; + overallMshrHits.flags(total | nozero | nonan); overallMshrHits = demandMshrHits + SUM_NON_DEMAND(mshr_hits); - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { overallMshrHits.subname(i, system->getMasterName(i)); } - // MSHR miss statistics - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - mshr_misses[access_idx] - .init(system->maxMasters()) - .name(name() + "." + cstr + "_mshr_misses") - .desc("number of " + cstr + " MSHR misses") - .flags(total | nozero | nonan) - ; - for (int i = 0; i < system->maxMasters(); i++) { - mshr_misses[access_idx].subname(i, system->getMasterName(i)); - } - } - - demandMshrMisses - .name(name() + ".demand_mshr_misses") - .desc("number of demand (read+write) MSHR misses") - .flags(total | nozero | nonan) - ; + demandMshrMisses.flags(total | nozero | nonan); demandMshrMisses = SUM_DEMAND(mshr_misses); - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { demandMshrMisses.subname(i, system->getMasterName(i)); } - overallMshrMisses - .name(name() + ".overall_mshr_misses") - .desc("number of overall MSHR misses") - .flags(total | nozero | nonan) - ; + overallMshrMisses.flags(total | nozero | nonan); overallMshrMisses = demandMshrMisses + SUM_NON_DEMAND(mshr_misses); - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { overallMshrMisses.subname(i, system->getMasterName(i)); } - // MSHR miss latency statistics - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - mshr_miss_latency[access_idx] - .init(system->maxMasters()) - .name(name() + "." + cstr + "_mshr_miss_latency") - .desc("number of " + cstr + " MSHR miss cycles") - .flags(total | nozero | nonan) - ; - for (int i = 0; i < system->maxMasters(); i++) { - mshr_miss_latency[access_idx].subname(i, system->getMasterName(i)); - } - } - - demandMshrMissLatency - .name(name() + ".demand_mshr_miss_latency") - .desc("number of demand (read+write) MSHR miss cycles") - .flags(total | nozero | nonan) - ; + demandMshrMissLatency.flags(total | nozero | nonan); demandMshrMissLatency = SUM_DEMAND(mshr_miss_latency); - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { demandMshrMissLatency.subname(i, system->getMasterName(i)); } - overallMshrMissLatency - .name(name() + ".overall_mshr_miss_latency") - .desc("number of overall MSHR miss cycles") - .flags(total | nozero | nonan) - ; + overallMshrMissLatency.flags(total | nozero | nonan); overallMshrMissLatency = demandMshrMissLatency + SUM_NON_DEMAND(mshr_miss_latency); - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { overallMshrMissLatency.subname(i, system->getMasterName(i)); } - // MSHR uncacheable statistics - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - mshr_uncacheable[access_idx] - .init(system->maxMasters()) - .name(name() + "." + cstr + "_mshr_uncacheable") - .desc("number of " + cstr + " MSHR uncacheable") - .flags(total | nozero | nonan) - ; - for (int i = 0; i < system->maxMasters(); i++) { - mshr_uncacheable[access_idx].subname(i, system->getMasterName(i)); - } - } - - overallMshrUncacheable - .name(name() + ".overall_mshr_uncacheable_misses") - .desc("number of overall MSHR uncacheable misses") - .flags(total | nozero | nonan) - ; + overallMshrUncacheable.flags(total | nozero | nonan); overallMshrUncacheable = SUM_DEMAND(mshr_uncacheable) + SUM_NON_DEMAND(mshr_uncacheable); - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { overallMshrUncacheable.subname(i, system->getMasterName(i)); } - // MSHR miss latency statistics - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - mshr_uncacheable_lat[access_idx] - .init(system->maxMasters()) - .name(name() + "." + cstr + "_mshr_uncacheable_latency") - .desc("number of " + cstr + " MSHR uncacheable cycles") - .flags(total | nozero | nonan) - ; - for (int i = 0; i < system->maxMasters(); i++) { - mshr_uncacheable_lat[access_idx].subname( - i, system->getMasterName(i)); - } - } - overallMshrUncacheableLatency - .name(name() + ".overall_mshr_uncacheable_latency") - .desc("number of overall MSHR uncacheable cycles") - .flags(total | nozero | nonan) - ; + overallMshrUncacheableLatency.flags(total | nozero | nonan); overallMshrUncacheableLatency = SUM_DEMAND(mshr_uncacheable_lat) + SUM_NON_DEMAND(mshr_uncacheable_lat); - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { overallMshrUncacheableLatency.subname(i, system->getMasterName(i)); } - // MSHR miss rate formulas - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - mshrMissRate[access_idx] - .name(name() + "." + cstr + "_mshr_miss_rate") - .desc("mshr miss rate for " + cstr + " accesses") - .flags(total | nozero | nonan) - ; - mshrMissRate[access_idx] = - mshr_misses[access_idx] / accesses[access_idx]; - - for (int i = 0; i < system->maxMasters(); i++) { - mshrMissRate[access_idx].subname(i, system->getMasterName(i)); - } - } - - demandMshrMissRate - .name(name() + ".demand_mshr_miss_rate") - .desc("mshr miss rate for demand accesses") - .flags(total | nozero | nonan) - ; + demandMshrMissRate.flags(total | nozero | nonan); demandMshrMissRate = demandMshrMisses / demandAccesses; - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { demandMshrMissRate.subname(i, system->getMasterName(i)); } - overallMshrMissRate - .name(name() + ".overall_mshr_miss_rate") - .desc("mshr miss rate for overall accesses") - .flags(total | nozero | nonan) - ; + overallMshrMissRate.flags(total | nozero | nonan); overallMshrMissRate = overallMshrMisses / overallAccesses; - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { overallMshrMissRate.subname(i, system->getMasterName(i)); } - // mshrMiss latency formulas - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - avgMshrMissLatency[access_idx] - .name(name() + "." + cstr + "_avg_mshr_miss_latency") - .desc("average " + cstr + " mshr miss latency") - .flags(total | nozero | nonan) - ; - avgMshrMissLatency[access_idx] = - mshr_miss_latency[access_idx] / mshr_misses[access_idx]; - - for (int i = 0; i < system->maxMasters(); i++) { - avgMshrMissLatency[access_idx].subname( - i, system->getMasterName(i)); - } - } - - demandAvgMshrMissLatency - .name(name() + ".demand_avg_mshr_miss_latency") - .desc("average overall mshr miss latency") - .flags(total | nozero | nonan) - ; + demandAvgMshrMissLatency.flags(total | nozero | nonan); demandAvgMshrMissLatency = demandMshrMissLatency / demandMshrMisses; - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { demandAvgMshrMissLatency.subname(i, system->getMasterName(i)); } - overallAvgMshrMissLatency - .name(name() + ".overall_avg_mshr_miss_latency") - .desc("average overall mshr miss latency") - .flags(total | nozero | nonan) - ; + overallAvgMshrMissLatency.flags(total | nozero | nonan); overallAvgMshrMissLatency = overallMshrMissLatency / overallMshrMisses; - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { overallAvgMshrMissLatency.subname(i, system->getMasterName(i)); } - // mshrUncacheable latency formulas - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - avgMshrUncacheableLatency[access_idx] - .name(name() + "." + cstr + "_avg_mshr_uncacheable_latency") - .desc("average " + cstr + " mshr uncacheable latency") - .flags(total | nozero | nonan) - ; - avgMshrUncacheableLatency[access_idx] = - mshr_uncacheable_lat[access_idx] / mshr_uncacheable[access_idx]; - - for (int i = 0; i < system->maxMasters(); i++) { - avgMshrUncacheableLatency[access_idx].subname( - i, system->getMasterName(i)); - } - } - - overallAvgMshrUncacheableLatency - .name(name() + ".overall_avg_mshr_uncacheable_latency") - .desc("average overall mshr uncacheable latency") - .flags(total | nozero | nonan) - ; + overallAvgMshrUncacheableLatency.flags(total | nozero | nonan); overallAvgMshrUncacheableLatency = overallMshrUncacheableLatency / overallMshrUncacheable; - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { overallAvgMshrUncacheableLatency.subname(i, system->getMasterName(i)); } - replacements - .name(name() + ".replacements") - .desc("number of replacements") - ; - - dataExpansions - .name(name() + ".data_expansions") - .desc("number of data expansions") - .flags(nozero | nonan) - ; + dataExpansions.flags(nozero | nonan); } void diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh index ceb356a1a..cd467c8ad 100644 --- a/src/mem/cache/base.hh +++ b/src/mem/cache/base.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2013, 2015-2016, 2018 ARM Limited + * Copyright (c) 2012-2013, 2015-2016, 2018-2019 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -912,137 +912,155 @@ class BaseCache : public ClockedObject /** System we are currently operating in. */ System *system; - // Statistics - /** - * @addtogroup CacheStatistics - * @{ - */ - - /** Number of hits per thread for each type of command. - @sa Packet::Command */ - Stats::Vector hits[MemCmd::NUM_MEM_CMDS]; - /** Number of hits for demand accesses. */ - Stats::Formula demandHits; - /** Number of hit for all accesses. */ - Stats::Formula overallHits; - - /** Number of misses per thread for each type of command. - @sa Packet::Command */ - Stats::Vector misses[MemCmd::NUM_MEM_CMDS]; - /** Number of misses for demand accesses. */ - Stats::Formula demandMisses; - /** Number of misses for all accesses. */ - Stats::Formula overallMisses; - - /** - * Total number of cycles per thread/command spent waiting for a miss. - * Used to calculate the average miss latency. - */ - Stats::Vector missLatency[MemCmd::NUM_MEM_CMDS]; - /** Total number of cycles spent waiting for demand misses. */ - Stats::Formula demandMissLatency; - /** Total number of cycles spent waiting for all misses. */ - Stats::Formula overallMissLatency; - - /** The number of accesses per command and thread. */ - Stats::Formula accesses[MemCmd::NUM_MEM_CMDS]; - /** The number of demand accesses. */ - Stats::Formula demandAccesses; - /** The number of overall accesses. */ - Stats::Formula overallAccesses; - - /** The miss rate per command and thread. */ - Stats::Formula missRate[MemCmd::NUM_MEM_CMDS]; - /** The miss rate of all demand accesses. */ - Stats::Formula demandMissRate; - /** The miss rate for all accesses. */ - Stats::Formula overallMissRate; - - /** The average miss latency per command and thread. */ - Stats::Formula avgMissLatency[MemCmd::NUM_MEM_CMDS]; - /** The average miss latency for demand misses. */ - Stats::Formula demandAvgMissLatency; - /** The average miss latency for all misses. */ - Stats::Formula overallAvgMissLatency; - - /** The total number of cycles blocked for each blocked cause. */ - Stats::Vector blocked_cycles; - /** The number of times this cache blocked for each blocked cause. */ - Stats::Vector blocked_causes; - - /** The average number of cycles blocked for each blocked cause. */ - Stats::Formula avg_blocked; - - /** The number of times a HW-prefetched block is evicted w/o reference. */ - Stats::Scalar unusedPrefetches; - - /** Number of blocks written back per thread. */ - Stats::Vector writebacks; - - /** Number of misses that hit in the MSHRs per command and thread. */ - Stats::Vector mshr_hits[MemCmd::NUM_MEM_CMDS]; - /** Demand misses that hit in the MSHRs. */ - Stats::Formula demandMshrHits; - /** Total number of misses that hit in the MSHRs. */ - Stats::Formula overallMshrHits; - - /** Number of misses that miss in the MSHRs, per command and thread. */ - Stats::Vector mshr_misses[MemCmd::NUM_MEM_CMDS]; - /** Demand misses that miss in the MSHRs. */ - Stats::Formula demandMshrMisses; - /** Total number of misses that miss in the MSHRs. */ - Stats::Formula overallMshrMisses; - - /** Number of misses that miss in the MSHRs, per command and thread. */ - Stats::Vector mshr_uncacheable[MemCmd::NUM_MEM_CMDS]; - /** Total number of misses that miss in the MSHRs. */ - Stats::Formula overallMshrUncacheable; - - /** Total cycle latency of each MSHR miss, per command and thread. */ - Stats::Vector mshr_miss_latency[MemCmd::NUM_MEM_CMDS]; - /** Total cycle latency of demand MSHR misses. */ - Stats::Formula demandMshrMissLatency; - /** Total cycle latency of overall MSHR misses. */ - Stats::Formula overallMshrMissLatency; - - /** Total cycle latency of each MSHR miss, per command and thread. */ - Stats::Vector mshr_uncacheable_lat[MemCmd::NUM_MEM_CMDS]; - /** Total cycle latency of overall MSHR misses. */ - Stats::Formula overallMshrUncacheableLatency; - - /** The miss rate in the MSHRs pre command and thread. */ - Stats::Formula mshrMissRate[MemCmd::NUM_MEM_CMDS]; - /** The demand miss rate in the MSHRs. */ - Stats::Formula demandMshrMissRate; - /** The overall miss rate in the MSHRs. */ - Stats::Formula overallMshrMissRate; - - /** The average latency of an MSHR miss, per command and thread. */ - Stats::Formula avgMshrMissLatency[MemCmd::NUM_MEM_CMDS]; - /** The average latency of a demand MSHR miss. */ - Stats::Formula demandAvgMshrMissLatency; - /** The average overall latency of an MSHR miss. */ - Stats::Formula overallAvgMshrMissLatency; - - /** The average latency of an MSHR miss, per command and thread. */ - Stats::Formula avgMshrUncacheableLatency[MemCmd::NUM_MEM_CMDS]; - /** The average overall latency of an MSHR miss. */ - Stats::Formula overallAvgMshrUncacheableLatency; - - /** Number of replacements of valid blocks. */ - Stats::Scalar replacements; - - /** Number of data expansions. */ - Stats::Scalar dataExpansions; - - /** - * @} - */ - - /** - * Register stats for this object. - */ - void regStats() override; + struct CacheCmdStats : public Stats::Group + { + CacheCmdStats(BaseCache &c, const std::string &name); + + /** + * Callback to register stats from parent + * CacheStats::regStats(). We can't use the normal flow since + * there is is no guaranteed order and CacheStats::regStats() + * needs to rely on these stats being initialised. + */ + void regStatsFromParent(); + + const BaseCache &cache; + + /** Number of hits per thread for each type of command. + @sa Packet::Command */ + Stats::Vector hits; + /** Number of misses per thread for each type of command. + @sa Packet::Command */ + Stats::Vector misses; + /** + * Total number of cycles per thread/command spent waiting for a miss. + * Used to calculate the average miss latency. + */ + Stats::Vector missLatency; + /** The number of accesses per command and thread. */ + Stats::Formula accesses; + /** The miss rate per command and thread. */ + Stats::Formula missRate; + /** The average miss latency per command and thread. */ + Stats::Formula avgMissLatency; + /** Number of misses that hit in the MSHRs per command and thread. */ + Stats::Vector mshr_hits; + /** Number of misses that miss in the MSHRs, per command and thread. */ + Stats::Vector mshr_misses; + /** Number of misses that miss in the MSHRs, per command and thread. */ + Stats::Vector mshr_uncacheable; + /** Total cycle latency of each MSHR miss, per command and thread. */ + Stats::Vector mshr_miss_latency; + /** Total cycle latency of each MSHR miss, per command and thread. */ + Stats::Vector mshr_uncacheable_lat; + /** The miss rate in the MSHRs pre command and thread. */ + Stats::Formula mshrMissRate; + /** The average latency of an MSHR miss, per command and thread. */ + Stats::Formula avgMshrMissLatency; + /** The average latency of an MSHR miss, per command and thread. */ + Stats::Formula avgMshrUncacheableLatency; + }; + + struct CacheStats : public Stats::Group + { + CacheStats(BaseCache &c); + + void regStats() override; + + CacheCmdStats &cmdStats(const PacketPtr p) { + return *cmd[p->cmdToIndex()]; + } + + const BaseCache &cache; + + /** Number of hits for demand accesses. */ + Stats::Formula demandHits; + /** Number of hit for all accesses. */ + Stats::Formula overallHits; + + /** Number of misses for demand accesses. */ + Stats::Formula demandMisses; + /** Number of misses for all accesses. */ + Stats::Formula overallMisses; + + /** Total number of cycles spent waiting for demand misses. */ + Stats::Formula demandMissLatency; + /** Total number of cycles spent waiting for all misses. */ + Stats::Formula overallMissLatency; + + /** The number of demand accesses. */ + Stats::Formula demandAccesses; + /** The number of overall accesses. */ + Stats::Formula overallAccesses; + + /** The miss rate of all demand accesses. */ + Stats::Formula demandMissRate; + /** The miss rate for all accesses. */ + Stats::Formula overallMissRate; + + /** The average miss latency for demand misses. */ + Stats::Formula demandAvgMissLatency; + /** The average miss latency for all misses. */ + Stats::Formula overallAvgMissLatency; + + /** The total number of cycles blocked for each blocked cause. */ + Stats::Vector blocked_cycles; + /** The number of times this cache blocked for each blocked cause. */ + Stats::Vector blocked_causes; + + /** The average number of cycles blocked for each blocked cause. */ + Stats::Formula avg_blocked; + + /** The number of times a HW-prefetched block is evicted w/o + * reference. */ + Stats::Scalar unusedPrefetches; + + /** Number of blocks written back per thread. */ + Stats::Vector writebacks; + + /** Demand misses that hit in the MSHRs. */ + Stats::Formula demandMshrHits; + /** Total number of misses that hit in the MSHRs. */ + Stats::Formula overallMshrHits; + + /** Demand misses that miss in the MSHRs. */ + Stats::Formula demandMshrMisses; + /** Total number of misses that miss in the MSHRs. */ + Stats::Formula overallMshrMisses; + + /** Total number of misses that miss in the MSHRs. */ + Stats::Formula overallMshrUncacheable; + + /** Total cycle latency of demand MSHR misses. */ + Stats::Formula demandMshrMissLatency; + /** Total cycle latency of overall MSHR misses. */ + Stats::Formula overallMshrMissLatency; + + /** Total cycle latency of overall MSHR misses. */ + Stats::Formula overallMshrUncacheableLatency; + + /** The demand miss rate in the MSHRs. */ + Stats::Formula demandMshrMissRate; + /** The overall miss rate in the MSHRs. */ + Stats::Formula overallMshrMissRate; + + /** The average latency of a demand MSHR miss. */ + Stats::Formula demandAvgMshrMissLatency; + /** The average overall latency of an MSHR miss. */ + Stats::Formula overallAvgMshrMissLatency; + + /** The average overall latency of an MSHR miss. */ + Stats::Formula overallAvgMshrUncacheableLatency; + + /** Number of replacements of valid blocks. */ + Stats::Scalar replacements; + + /** Number of data expansions. */ + Stats::Scalar dataExpansions; + + /** Per-command statistics */ + std::vector> cmd; + } stats; /** Registers probes. */ void regProbePoints() override; @@ -1135,7 +1153,7 @@ class BaseCache : public ClockedObject { uint8_t flag = 1 << cause; if (blocked == 0) { - blocked_causes[cause]++; + stats.blocked_causes[cause]++; blockedCycle = curCycle(); cpuSidePort.setBlocked(); } @@ -1156,7 +1174,7 @@ class BaseCache : public ClockedObject blocked &= ~flag; DPRINTF(Cache,"Unblocking for cause %d, mask=%d\n", cause, blocked); if (blocked == 0) { - blocked_cycles[cause] += curCycle() - blockedCycle; + stats.blocked_cycles[cause] += curCycle() - blockedCycle; cpuSidePort.clearBlocked(); } } @@ -1194,7 +1212,7 @@ class BaseCache : public ClockedObject void incMissCount(PacketPtr pkt) { assert(pkt->req->masterId() < system->maxMasters()); - misses[pkt->cmdToIndex()][pkt->req->masterId()]++; + stats.cmdStats(pkt).misses[pkt->req->masterId()]++; pkt->req->incAccessDepth(); if (missCount) { --missCount; @@ -1205,8 +1223,7 @@ class BaseCache : public ClockedObject void incHitCount(PacketPtr pkt) { assert(pkt->req->masterId() < system->maxMasters()); - hits[pkt->cmdToIndex()][pkt->req->masterId()]++; - + stats.cmdStats(pkt).hits[pkt->req->masterId()]++; } /** diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc index b054cd43d..e7dd5efc9 100644 --- a/src/mem/cache/cache.cc +++ b/src/mem/cache/cache.cc @@ -334,7 +334,7 @@ Cache::handleTimingReqMiss(PacketPtr pkt, CacheBlk *blk, Tick forward_time, // should have flushed and have no valid block assert(!blk || !blk->isValid()); - mshr_uncacheable[pkt->cmdToIndex()][pkt->req->masterId()]++; + stats.cmdStats(pkt).mshr_uncacheable[pkt->req->masterId()]++; if (pkt->isWrite()) { allocateWriteBuffer(pkt, forward_time); @@ -776,7 +776,8 @@ Cache::serviceMSHRTargets(MSHR *mshr, const PacketPtr pkt, CacheBlk *blk) assert(!tgt_pkt->req->isUncacheable()); assert(tgt_pkt->req->masterId() < system->maxMasters()); - missLatency[tgt_pkt->cmdToIndex()][tgt_pkt->req->masterId()] += + stats.cmdStats(tgt_pkt) + .missLatency[tgt_pkt->req->masterId()] += completion_time - target.recvTime; } else if (pkt->cmd == MemCmd::UpgradeFailResp) { // failed StoreCond upgrade diff --git a/src/mem/cache/noncoherent_cache.cc b/src/mem/cache/noncoherent_cache.cc index 9a2a1db9d..f25f76846 100644 --- a/src/mem/cache/noncoherent_cache.cc +++ b/src/mem/cache/noncoherent_cache.cc @@ -278,7 +278,7 @@ NoncoherentCache::serviceMSHRTargets(MSHR *mshr, const PacketPtr pkt, (transfer_offset ? pkt->payloadDelay : 0); assert(tgt_pkt->req->masterId() < system->maxMasters()); - missLatency[tgt_pkt->cmdToIndex()][tgt_pkt->req->masterId()] += + stats.cmdStats(tgt_pkt).missLatency[tgt_pkt->req->masterId()] += completion_time - target.recvTime; tgt_pkt->makeTimingResponse(); diff --git a/src/mem/cache/tags/base.cc b/src/mem/cache/tags/base.cc index 4855ebd51..cbfdff2df 100644 --- a/src/mem/cache/tags/base.cc +++ b/src/mem/cache/tags/base.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013,2016,2018 ARM Limited + * Copyright (c) 2013,2016,2018-2019 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -64,8 +64,10 @@ BaseTags::BaseTags(const Params *p) system(p->system), indexingPolicy(p->indexing_policy), warmupBound((p->warmup_percentage/100.0) * (p->size / p->block_size)), warmedUp(false), numBlocks(p->size / p->block_size), - dataBlks(new uint8_t[p->size]) // Allocate data storage in one big chunk + dataBlks(new uint8_t[p->size]), // Allocate data storage in one big chunk + stats(*this) { + registerExitCallback(new BaseTagsCallback(this)); } ReplaceableEntry* @@ -108,21 +110,21 @@ BaseTags::insertBlock(const PacketPtr pkt, CacheBlk *blk) // Deal with what we are bringing in MasterID master_id = pkt->req->masterId(); assert(master_id < system->maxMasters()); - occupancies[master_id]++; + stats.occupancies[master_id]++; // Insert block with tag, src master id and task id blk->insert(extractTag(pkt->getAddr()), pkt->isSecure(), master_id, pkt->req->taskId()); // Check if cache warm up is done - if (!warmedUp && tagsInUse.value() >= warmupBound) { + if (!warmedUp && stats.tagsInUse.value() >= warmupBound) { warmedUp = true; - warmupCycle = curTick(); + stats.warmupCycle = curTick(); } // We only need to write into one tag and one data block. - tagAccesses += 1; - dataAccesses += 1; + stats.tagAccesses += 1; + stats.dataAccesses += 1; } Addr @@ -135,8 +137,8 @@ void BaseTags::cleanupRefsVisitor(CacheBlk &blk) { if (blk.isValid()) { - totalRefs += blk.refCount; - ++sampledRefs; + stats.totalRefs += blk.refCount; + ++stats.sampledRefs; } } @@ -151,7 +153,7 @@ BaseTags::computeStatsVisitor(CacheBlk &blk) { if (blk.isValid()) { assert(blk.task_id < ContextSwitchTaskId::NumTaskId); - occupanciesTaskId[blk.task_id]++; + stats.occupanciesTaskId[blk.task_id]++; assert(blk.tickInserted <= curTick()); Tick age = curTick() - blk.tickInserted; @@ -167,7 +169,7 @@ BaseTags::computeStatsVisitor(CacheBlk &blk) } else age_index = 4; // >10ms - ageTaskId[blk.task_id][age_index]++; + stats.ageTaskId[blk.task_id][age_index]++; } } @@ -175,9 +177,9 @@ void BaseTags::computeStats() { for (unsigned i = 0; i < ContextSwitchTaskId::NumTaskId; ++i) { - occupanciesTaskId[i] = 0; + stats.occupanciesTaskId[i] = 0; for (unsigned j = 0; j < 5; ++j) { - ageTaskId[i][j] = 0; + stats.ageTaskId[i][j] = 0; } } @@ -201,93 +203,79 @@ BaseTags::print() return str; } -void -BaseTags::regStats() +BaseTags::BaseTagStats::BaseTagStats(BaseTags &_tags) + : Stats::Group(&_tags), + tags(_tags), + + tagsInUse(this, "tagsinuse", + "Cycle average of tags in use"), + totalRefs(this, "total_refs", + "Total number of references to valid blocks."), + sampledRefs(this, "sampled_refs", + "Sample count of references to valid blocks."), + avgRefs(this, "avg_refs", + "Average number of references to valid blocks."), + warmupCycle(this, "warmup_cycle", + "Cycle when the warmup percentage was hit."), + occupancies(this, "occ_blocks", + "Average occupied blocks per requestor"), + avgOccs(this, "occ_percent", + "Average percentage of cache occupancy"), + occupanciesTaskId(this, "occ_task_id_blocks", + "Occupied blocks per task id"), + ageTaskId(this, "age_task_id_blocks", "Occupied blocks per task id"), + percentOccsTaskId(this, "occ_task_id_percent", + "Percentage of cache occupancy per task id"), + tagAccesses(this, "tag_accesses", "Number of tag accesses"), + dataAccesses(this, "data_accesses", "Number of data accesses") { - ClockedObject::regStats(); +} +void +BaseTags::BaseTagStats::regStats() +{ using namespace Stats; - tagsInUse - .name(name() + ".tagsinuse") - .desc("Cycle average of tags in use") - ; - - totalRefs - .name(name() + ".total_refs") - .desc("Total number of references to valid blocks.") - ; - - sampledRefs - .name(name() + ".sampled_refs") - .desc("Sample count of references to valid blocks.") - ; - - avgRefs - .name(name() + ".avg_refs") - .desc("Average number of references to valid blocks.") - ; + Stats::Group::regStats(); - avgRefs = totalRefs/sampledRefs; + System *system = tags.system; - warmupCycle - .name(name() + ".warmup_cycle") - .desc("Cycle when the warmup percentage was hit.") - ; + avgRefs = totalRefs / sampledRefs; occupancies .init(system->maxMasters()) - .name(name() + ".occ_blocks") - .desc("Average occupied blocks per requestor") .flags(nozero | nonan) ; for (int i = 0; i < system->maxMasters(); i++) { occupancies.subname(i, system->getMasterName(i)); } - avgOccs - .name(name() + ".occ_percent") - .desc("Average percentage of cache occupancy") - .flags(nozero | total) - ; + avgOccs.flags(nozero | total); for (int i = 0; i < system->maxMasters(); i++) { avgOccs.subname(i, system->getMasterName(i)); } - avgOccs = occupancies / Stats::constant(numBlocks); + avgOccs = occupancies / Stats::constant(tags.numBlocks); occupanciesTaskId .init(ContextSwitchTaskId::NumTaskId) - .name(name() + ".occ_task_id_blocks") - .desc("Occupied blocks per task id") .flags(nozero | nonan) ; ageTaskId .init(ContextSwitchTaskId::NumTaskId, 5) - .name(name() + ".age_task_id_blocks") - .desc("Occupied blocks per task id") .flags(nozero | nonan) ; - percentOccsTaskId - .name(name() + ".occ_task_id_percent") - .desc("Percentage of cache occupancy per task id") - .flags(nozero) - ; - - percentOccsTaskId = occupanciesTaskId / Stats::constant(numBlocks); + percentOccsTaskId.flags(nozero); - tagAccesses - .name(name() + ".tag_accesses") - .desc("Number of tag accesses") - ; + percentOccsTaskId = occupanciesTaskId / Stats::constant(tags.numBlocks); +} - dataAccesses - .name(name() + ".data_accesses") - .desc("Number of data accesses") - ; +void +BaseTags::BaseTagStats::preDumpStats() +{ + Stats::Group::preDumpStats(); - registerDumpCallback(new BaseTagsDumpCallback(this)); - registerExitCallback(new BaseTagsCallback(this)); + tags.computeStats(); } diff --git a/src/mem/cache/tags/base.hh b/src/mem/cache/tags/base.hh index ae9cab87e..0a9f16bd3 100644 --- a/src/mem/cache/tags/base.hh +++ b/src/mem/cache/tags/base.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2014,2016-2018 ARM Limited + * Copyright (c) 2012-2014,2016-2019 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -102,58 +102,60 @@ class BaseTags : public ClockedObject /** The data blocks, 1 per cache block. */ std::unique_ptr dataBlks; - // Statistics /** * TODO: It would be good if these stats were acquired after warmup. - * @addtogroup CacheStatistics - * @{ */ + struct BaseTagStats : public Stats::Group + { + BaseTagStats(BaseTags &tags); - /** Per cycle average of the number of tags that hold valid data. */ - Stats::Average tagsInUse; + void regStats() override; + void preDumpStats() override; - /** The total number of references to a block before it is replaced. */ - Stats::Scalar totalRefs; + BaseTags &tags; - /** - * The number of reference counts sampled. This is different from - * replacements because we sample all the valid blocks when the simulator - * exits. - */ - Stats::Scalar sampledRefs; + /** Per cycle average of the number of tags that hold valid data. */ + Stats::Average tagsInUse; - /** - * Average number of references to a block before is was replaced. - * @todo This should change to an average stat once we have them. - */ - Stats::Formula avgRefs; + /** The total number of references to a block before it is replaced. */ + Stats::Scalar totalRefs; - /** The cycle that the warmup percentage was hit. 0 on failure. */ - Stats::Scalar warmupCycle; + /** + * The number of reference counts sampled. This is different + * from replacements because we sample all the valid blocks + * when the simulator exits. + */ + Stats::Scalar sampledRefs; - /** Average occupancy of each requestor using the cache */ - Stats::AverageVector occupancies; + /** + * Average number of references to a block before is was replaced. + * @todo This should change to an average stat once we have them. + */ + Stats::Formula avgRefs; - /** Average occ % of each requestor using the cache */ - Stats::Formula avgOccs; + /** The cycle that the warmup percentage was hit. 0 on failure. */ + Stats::Scalar warmupCycle; - /** Occupancy of each context/cpu using the cache */ - Stats::Vector occupanciesTaskId; + /** Average occupancy of each requestor using the cache */ + Stats::AverageVector occupancies; - /** Occupancy of each context/cpu using the cache */ - Stats::Vector2d ageTaskId; + /** Average occ % of each requestor using the cache */ + Stats::Formula avgOccs; - /** Occ % of each context/cpu using the cache */ - Stats::Formula percentOccsTaskId; + /** Occupancy of each context/cpu using the cache */ + Stats::Vector occupanciesTaskId; - /** Number of tags consulted over all accesses. */ - Stats::Scalar tagAccesses; - /** Number of data blocks consulted over all accesses. */ - Stats::Scalar dataAccesses; + /** Occupancy of each context/cpu using the cache */ + Stats::Vector2d ageTaskId; - /** - * @} - */ + /** Occ % of each context/cpu using the cache */ + Stats::Formula percentOccsTaskId; + + /** Number of tags consulted over all accesses. */ + Stats::Scalar tagAccesses; + /** Number of data blocks consulted over all accesses. */ + Stats::Scalar dataAccesses; + } stats; public: typedef BaseTagsParams Params; @@ -171,11 +173,6 @@ class BaseTags : public ClockedObject */ virtual void tagsInit() = 0; - /** - * Register local statistics. - */ - void regStats(); - /** * Average in the reference count for valid blocks when the simulation * exits. @@ -259,9 +256,9 @@ class BaseTags : public ClockedObject assert(blk); assert(blk->isValid()); - occupancies[blk->srcMasterId]--; - totalRefs += blk->refCount; - sampledRefs++; + stats.occupancies[blk->srcMasterId]--; + stats.totalRefs += blk->refCount; + stats.sampledRefs++; blk->invalidate(); } @@ -367,12 +364,4 @@ class BaseTagsCallback : public Callback virtual void process() { tags->cleanupRefs(); }; }; -class BaseTagsDumpCallback : public Callback -{ - BaseTags *tags; - public: - BaseTagsDumpCallback(BaseTags *t) : tags(t) {} - virtual void process() { tags->computeStats(); }; -}; - #endif //__MEM_CACHE_TAGS_BASE_HH__ diff --git a/src/mem/cache/tags/base_set_assoc.cc b/src/mem/cache/tags/base_set_assoc.cc index 1b53ef050..1934ef4e3 100644 --- a/src/mem/cache/tags/base_set_assoc.cc +++ b/src/mem/cache/tags/base_set_assoc.cc @@ -87,7 +87,7 @@ BaseSetAssoc::invalidate(CacheBlk *blk) BaseTags::invalidate(blk); // Decrease the number of tags in use - tagsInUse--; + stats.tagsInUse--; // Invalidate replacement data replacementPolicy->invalidate(blk->replacementData); diff --git a/src/mem/cache/tags/base_set_assoc.hh b/src/mem/cache/tags/base_set_assoc.hh index f58f93951..efb08caa6 100644 --- a/src/mem/cache/tags/base_set_assoc.hh +++ b/src/mem/cache/tags/base_set_assoc.hh @@ -131,13 +131,13 @@ class BaseSetAssoc : public BaseTags // Access all tags in parallel, hence one in each way. The data side // either accesses all blocks in parallel, or one block sequentially on // a hit. Sequential access with a miss doesn't access data. - tagAccesses += allocAssoc; + stats.tagAccesses += allocAssoc; if (sequentialAccess) { if (blk != nullptr) { - dataAccesses += 1; + stats.dataAccesses += 1; } } else { - dataAccesses += allocAssoc; + stats.dataAccesses += allocAssoc; } // If a cache hit @@ -195,7 +195,7 @@ class BaseSetAssoc : public BaseTags BaseTags::insertBlock(pkt, blk); // Increment tag counter - tagsInUse++; + stats.tagsInUse++; // Update replacement policy replacementPolicy->reset(blk->replacementData); diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc index 0ebef4be7..5738dcb50 100644 --- a/src/mem/cache/tags/fa_lru.cc +++ b/src/mem/cache/tags/fa_lru.cc @@ -131,7 +131,7 @@ FALRU::invalidate(CacheBlk *blk) BaseTags::invalidate(blk); // Decrease the number of tags in use - tagsInUse--; + stats.tagsInUse--; // Move the block to the tail to make it the next victim moveToTail((FALRUBlk*)blk); @@ -220,7 +220,7 @@ FALRU::insertBlock(const PacketPtr pkt, CacheBlk *blk) BaseTags::insertBlock(pkt, blk); // Increment tag counter - tagsInUse++; + stats.tagsInUse++; // New block is the MRU moveToHead(falruBlk); diff --git a/src/mem/cache/tags/sector_tags.cc b/src/mem/cache/tags/sector_tags.cc index 1098885c2..77fb53cda 100644 --- a/src/mem/cache/tags/sector_tags.cc +++ b/src/mem/cache/tags/sector_tags.cc @@ -125,7 +125,7 @@ SectorTags::invalidate(CacheBlk *blk) // in the sector. if (!sector_blk->isValid()) { // Decrease the number of tags in use - tagsInUse--; + stats.tagsInUse--; // Invalidate replacement data, as we're invalidating the sector replacementPolicy->invalidate(sector_blk->replacementData); @@ -140,13 +140,13 @@ SectorTags::accessBlock(Addr addr, bool is_secure, Cycles &lat) // Access all tags in parallel, hence one in each way. The data side // either accesses all blocks in parallel, or one block sequentially on // a hit. Sequential access with a miss doesn't access data. - tagAccesses += allocAssoc; + stats.tagAccesses += allocAssoc; if (sequentialAccess) { if (blk != nullptr) { - dataAccesses += 1; + stats.dataAccesses += 1; } } else { - dataAccesses += allocAssoc*numBlocksPerSector; + stats.dataAccesses += allocAssoc*numBlocksPerSector; } // If a cache hit @@ -183,7 +183,7 @@ SectorTags::insertBlock(const PacketPtr pkt, CacheBlk *blk) replacementPolicy->touch(sector_blk->replacementData); } else { // Increment tag counter - tagsInUse++; + stats.tagsInUse++; // A new entry resets the replacement data replacementPolicy->reset(sector_blk->replacementData);