From 765ba547a203dc3ba78d9e1aaf6186da7b2a003c Mon Sep 17 00:00:00 2001 From: Hoa Nguyen Date: Fri, 16 Oct 2020 00:38:44 -0700 Subject: [PATCH] cpu-o3,stats: Update stats style for cpu.hh and cpu.cc Change-Id: If4ddaf6a9a84ea71fa19f5ca6d2e5294ec9a0b23 Signed-off-by: Hoa Nguyen Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/36195 Reviewed-by: Daniel Carvalho Maintainer: Bobby R. Bruce Tested-by: kokoro --- src/cpu/o3/cpu.cc | 154 +++++++++++++++++++--------------------------- src/cpu/o3/cpu.hh | 93 ++++++++++++++-------------- 2 files changed, 112 insertions(+), 135 deletions(-) diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 8461d0609..de14ecbeb 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -72,12 +72,6 @@ BaseO3CPU::BaseO3CPU(const BaseCPUParams ¶ms) { } -void -BaseO3CPU::regStats() -{ - BaseCPU::regStats(); -} - template FullO3CPU::FullO3CPU(const DerivO3CPUParams ¶ms) : BaseO3CPU(params), @@ -126,7 +120,8 @@ FullO3CPU::FullO3CPU(const DerivO3CPUParams ¶ms) globalSeqNum(1), system(params.system), - lastRunningCycle(curCycle()) + lastRunningCycle(curCycle()), + cpuStats(this) { fatal_if(FullSystem && params.numThreads > 1, "SMT is not supported in O3 in full system mode currently."); @@ -383,28 +378,45 @@ FullO3CPU::regProbePoints() } template -void -FullO3CPU::regStats() +FullO3CPU:: +FullO3CPUStats::FullO3CPUStats(FullO3CPU *cpu) + : Stats::Group(cpu), + ADD_STAT(timesIdled, + "Number of times that the entire CPU went into an idle state " + "and unscheduled itself"), + ADD_STAT(idleCycles, + "Total number of cycles that the CPU has spent unscheduled due " + "to idling"), + ADD_STAT(quiesceCycles, + "Total number of cycles that CPU has spent quiesced or waiting " + "for an interrupt"), + ADD_STAT(committedInsts, "Number of Instructions Simulated"), + ADD_STAT(committedOps, "Number of Ops (including micro ops) Simulated"), + ADD_STAT(cpi, "CPI: Cycles Per Instruction"), + ADD_STAT(totalCpi, "CPI: Total CPI of All Threads"), + ADD_STAT(ipc, "IPC: Instructions Per Cycle"), + ADD_STAT(totalIpc, "IPC: Total IPC of All Threads"), + ADD_STAT(intRegfileReads, "Number of integer regfile reads"), + ADD_STAT(intRegfileWrites, "Number of integer regfile writes"), + ADD_STAT(fpRegfileReads, "Number of floating regfile reads"), + ADD_STAT(fpRegfileWrites, "Number of floating regfile writes"), + ADD_STAT(vecRegfileReads, "number of vector regfile reads"), + ADD_STAT(vecRegfileWrites, "number of vector regfile writes"), + ADD_STAT(vecPredRegfileReads, "number of predicate regfile reads"), + ADD_STAT(vecPredRegfileWrites, "number of predicate regfile writes"), + ADD_STAT(ccRegfileReads, "number of cc regfile reads"), + ADD_STAT(ccRegfileWrites, "number of cc regfile writes"), + ADD_STAT(miscRegfileReads, "number of misc regfile reads"), + ADD_STAT(miscRegfileWrites, "number of misc regfile writes") { - BaseO3CPU::regStats(); - // Register any of the O3CPU's stats here. timesIdled - .name(name() + ".timesIdled") - .desc("Number of times that the entire CPU went into an idle state and" - " unscheduled itself") .prereq(timesIdled); idleCycles - .name(name() + ".idleCycles") - .desc("Total number of cycles that the CPU has spent unscheduled due " - "to idling") .prereq(idleCycles); quiesceCycles - .name(name() + ".quiesceCycles") - .desc("Total number of cycles that CPU has spent quiesced or waiting " - "for an interrupt") .prereq(quiesceCycles); // Number of Instructions simulated @@ -412,99 +424,63 @@ FullO3CPU::regStats() // Should probably be in Base CPU but need templated // MaxThreads so put in here instead committedInsts - .init(numThreads) - .name(name() + ".committedInsts") - .desc("Number of Instructions Simulated") + .init(cpu->numThreads) .flags(Stats::total); committedOps - .init(numThreads) - .name(name() + ".committedOps") - .desc("Number of Ops (including micro ops) Simulated") + .init(cpu->numThreads) .flags(Stats::total); cpi - .name(name() + ".cpi") - .desc("CPI: Cycles Per Instruction") .precision(6); - cpi = numCycles / committedInsts; + cpi = cpu->numCycles / committedInsts; totalCpi - .name(name() + ".cpi_total") - .desc("CPI: Total CPI of All Threads") .precision(6); - totalCpi = numCycles / sum(committedInsts); + totalCpi = cpu->numCycles / sum(committedInsts); ipc - .name(name() + ".ipc") - .desc("IPC: Instructions Per Cycle") .precision(6); - ipc = committedInsts / numCycles; + ipc = committedInsts / cpu->numCycles; totalIpc - .name(name() + ".ipc_total") - .desc("IPC: Total IPC of All Threads") .precision(6); - totalIpc = sum(committedInsts) / numCycles; + totalIpc = sum(committedInsts) / cpu->numCycles; intRegfileReads - .name(name() + ".int_regfile_reads") - .desc("number of integer regfile reads") .prereq(intRegfileReads); intRegfileWrites - .name(name() + ".int_regfile_writes") - .desc("number of integer regfile writes") .prereq(intRegfileWrites); fpRegfileReads - .name(name() + ".fp_regfile_reads") - .desc("number of floating regfile reads") .prereq(fpRegfileReads); fpRegfileWrites - .name(name() + ".fp_regfile_writes") - .desc("number of floating regfile writes") .prereq(fpRegfileWrites); vecRegfileReads - .name(name() + ".vec_regfile_reads") - .desc("number of vector regfile reads") .prereq(vecRegfileReads); vecRegfileWrites - .name(name() + ".vec_regfile_writes") - .desc("number of vector regfile writes") .prereq(vecRegfileWrites); vecPredRegfileReads - .name(name() + ".pred_regfile_reads") - .desc("number of predicate regfile reads") .prereq(vecPredRegfileReads); vecPredRegfileWrites - .name(name() + ".pred_regfile_writes") - .desc("number of predicate regfile writes") .prereq(vecPredRegfileWrites); ccRegfileReads - .name(name() + ".cc_regfile_reads") - .desc("number of cc regfile reads") .prereq(ccRegfileReads); ccRegfileWrites - .name(name() + ".cc_regfile_writes") - .desc("number of cc regfile writes") .prereq(ccRegfileWrites); miscRegfileReads - .name(name() + ".misc_regfile_reads") - .desc("number of misc regfile reads") .prereq(miscRegfileReads); miscRegfileWrites - .name(name() + ".misc_regfile_writes") - .desc("number of misc regfile writes") .prereq(miscRegfileWrites); } @@ -554,7 +530,7 @@ FullO3CPU::tick() } else if (!activityRec.active() || _status == Idle) { DPRINTF(O3CPU, "Idle!\n"); lastRunningCycle = curCycle(); - timesIdled++; + cpuStats.timesIdled++; } else { schedule(tickEvent, clockEdge(Cycles(1))); DPRINTF(O3CPU, "Scheduling next tick!\n"); @@ -699,7 +675,7 @@ FullO3CPU::activateContext(ThreadID tid) // @todo: This is an oddity that is only here to match the stats if (cycles != 0) --cycles; - quiesceCycles += cycles; + cpuStats.quiesceCycles += cycles; lastActivatedCycle = curTick(); @@ -1155,7 +1131,7 @@ template RegVal FullO3CPU::readMiscReg(int misc_reg, ThreadID tid) { - miscRegfileReads++; + cpuStats.miscRegfileReads++; return this->isa[tid]->readMiscReg(misc_reg); } @@ -1170,7 +1146,7 @@ template void FullO3CPU::setMiscReg(int misc_reg, RegVal val, ThreadID tid) { - miscRegfileWrites++; + cpuStats.miscRegfileWrites++; this->isa[tid]->setMiscReg(misc_reg, val); } @@ -1178,7 +1154,7 @@ template RegVal FullO3CPU::readIntReg(PhysRegIdPtr phys_reg) { - intRegfileReads++; + cpuStats.intRegfileReads++; return regFile.readIntReg(phys_reg); } @@ -1186,7 +1162,7 @@ template RegVal FullO3CPU::readFloatReg(PhysRegIdPtr phys_reg) { - fpRegfileReads++; + cpuStats.fpRegfileReads++; return regFile.readFloatReg(phys_reg); } @@ -1195,7 +1171,7 @@ auto FullO3CPU::readVecReg(PhysRegIdPtr phys_reg) const -> const VecRegContainer& { - vecRegfileReads++; + cpuStats.vecRegfileReads++; return regFile.readVecReg(phys_reg); } @@ -1204,7 +1180,7 @@ auto FullO3CPU::getWritableVecReg(PhysRegIdPtr phys_reg) -> VecRegContainer& { - vecRegfileWrites++; + cpuStats.vecRegfileWrites++; return regFile.getWritableVecReg(phys_reg); } @@ -1212,7 +1188,7 @@ template auto FullO3CPU::readVecElem(PhysRegIdPtr phys_reg) const -> const VecElem& { - vecRegfileReads++; + cpuStats.vecRegfileReads++; return regFile.readVecElem(phys_reg); } @@ -1221,7 +1197,7 @@ auto FullO3CPU::readVecPredReg(PhysRegIdPtr phys_reg) const -> const VecPredRegContainer& { - vecPredRegfileReads++; + cpuStats.vecPredRegfileReads++; return regFile.readVecPredReg(phys_reg); } @@ -1230,7 +1206,7 @@ auto FullO3CPU::getWritableVecPredReg(PhysRegIdPtr phys_reg) -> VecPredRegContainer& { - vecPredRegfileWrites++; + cpuStats.vecPredRegfileWrites++; return regFile.getWritableVecPredReg(phys_reg); } @@ -1238,7 +1214,7 @@ template RegVal FullO3CPU::readCCReg(PhysRegIdPtr phys_reg) { - ccRegfileReads++; + cpuStats.ccRegfileReads++; return regFile.readCCReg(phys_reg); } @@ -1246,7 +1222,7 @@ template void FullO3CPU::setIntReg(PhysRegIdPtr phys_reg, RegVal val) { - intRegfileWrites++; + cpuStats.intRegfileWrites++; regFile.setIntReg(phys_reg, val); } @@ -1254,7 +1230,7 @@ template void FullO3CPU::setFloatReg(PhysRegIdPtr phys_reg, RegVal val) { - fpRegfileWrites++; + cpuStats.fpRegfileWrites++; regFile.setFloatReg(phys_reg, val); } @@ -1262,7 +1238,7 @@ template void FullO3CPU::setVecReg(PhysRegIdPtr phys_reg, const VecRegContainer& val) { - vecRegfileWrites++; + cpuStats.vecRegfileWrites++; regFile.setVecReg(phys_reg, val); } @@ -1270,7 +1246,7 @@ template void FullO3CPU::setVecElem(PhysRegIdPtr phys_reg, const VecElem& val) { - vecRegfileWrites++; + cpuStats.vecRegfileWrites++; regFile.setVecElem(phys_reg, val); } @@ -1279,7 +1255,7 @@ void FullO3CPU::setVecPredReg(PhysRegIdPtr phys_reg, const VecPredRegContainer& val) { - vecPredRegfileWrites++; + cpuStats.vecPredRegfileWrites++; regFile.setVecPredReg(phys_reg, val); } @@ -1287,7 +1263,7 @@ template void FullO3CPU::setCCReg(PhysRegIdPtr phys_reg, RegVal val) { - ccRegfileWrites++; + cpuStats.ccRegfileWrites++; regFile.setCCReg(phys_reg, val); } @@ -1295,7 +1271,7 @@ template RegVal FullO3CPU::readArchIntReg(int reg_idx, ThreadID tid) { - intRegfileReads++; + cpuStats.intRegfileReads++; PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( RegId(IntRegClass, reg_idx)); @@ -1306,7 +1282,7 @@ template RegVal FullO3CPU::readArchFloatReg(int reg_idx, ThreadID tid) { - fpRegfileReads++; + cpuStats.fpRegfileReads++; PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( RegId(FloatRegClass, reg_idx)); @@ -1367,7 +1343,7 @@ template RegVal FullO3CPU::readArchCCReg(int reg_idx, ThreadID tid) { - ccRegfileReads++; + cpuStats.ccRegfileReads++; PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( RegId(CCRegClass, reg_idx)); @@ -1378,7 +1354,7 @@ template void FullO3CPU::setArchIntReg(int reg_idx, RegVal val, ThreadID tid) { - intRegfileWrites++; + cpuStats.intRegfileWrites++; PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( RegId(IntRegClass, reg_idx)); @@ -1389,7 +1365,7 @@ template void FullO3CPU::setArchFloatReg(int reg_idx, RegVal val, ThreadID tid) { - fpRegfileWrites++; + cpuStats.fpRegfileWrites++; PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( RegId(FloatRegClass, reg_idx)); @@ -1430,7 +1406,7 @@ template void FullO3CPU::setArchCCReg(int reg_idx, RegVal val, ThreadID tid) { - ccRegfileWrites++; + cpuStats.ccRegfileWrites++; PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( RegId(CCRegClass, reg_idx)); @@ -1497,7 +1473,7 @@ FullO3CPU::instDone(ThreadID tid, const DynInstPtr &inst) if (!inst->isMicroop() || inst->isLastMicroop()) { thread[tid]->numInst++; thread[tid]->threadStats.numInsts++; - committedInsts[tid]++; + cpuStats.committedInsts[tid]++; system->totalNumInsts++; // Check for instruction-count-based events. @@ -1505,7 +1481,7 @@ FullO3CPU::instDone(ThreadID tid, const DynInstPtr &inst) } thread[tid]->numOp++; thread[tid]->threadStats.numOps++; - committedOps[tid]++; + cpuStats.committedOps[tid]++; probeInstCommit(inst->staticInst, inst->instAddr()); } @@ -1688,7 +1664,7 @@ FullO3CPU::wakeCPU() // @todo: This is an oddity that is only here to match the stats if (cycles > 1) { --cycles; - idleCycles += cycles; + cpuStats.idleCycles += cycles; numCycles += cycles; } diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 57c855b98..b68ad9c96 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -80,8 +80,6 @@ class BaseO3CPU : public BaseCPU //Stuff that's pretty ISA independent will go here. public: BaseO3CPU(const BaseCPUParams ¶ms); - - void regStats(); }; /** @@ -182,9 +180,6 @@ class FullO3CPU : public BaseO3CPU /** Destructor. */ ~FullO3CPU(); - /** Registers statistics. */ - void regStats() override; - ProbePointArg *ppInstAccessComplete; ProbePointArg > *ppDataAccessComplete; @@ -362,7 +357,7 @@ class FullO3CPU : public BaseO3CPU VecLaneT readVecLane(PhysRegIdPtr phys_reg) const { - vecRegfileReads++; + cpuStats.vecRegfileReads++; return regFile.readVecLane(phys_reg); } @@ -373,7 +368,7 @@ class FullO3CPU : public BaseO3CPU VecLaneT readVecLane(PhysRegIdPtr phys_reg) const { - vecRegfileReads++; + cpuStats.vecRegfileReads++; return regFile.readVecLane(phys_reg); } @@ -382,7 +377,7 @@ class FullO3CPU : public BaseO3CPU void setVecLane(PhysRegIdPtr phys_reg, const LD& val) { - vecRegfileWrites++; + cpuStats.vecRegfileWrites++; return regFile.setVecLane(phys_reg, val); } @@ -733,44 +728,50 @@ class FullO3CPU : public BaseO3CPU return this->iew.ldstQueue.getDataPort(); } - /** Stat for total number of times the CPU is descheduled. */ - Stats::Scalar timesIdled; - /** Stat for total number of cycles the CPU spends descheduled. */ - Stats::Scalar idleCycles; - /** Stat for total number of cycles the CPU spends descheduled due to a - * quiesce operation or waiting for an interrupt. */ - Stats::Scalar quiesceCycles; - /** Stat for the number of committed instructions per thread. */ - Stats::Vector committedInsts; - /** Stat for the number of committed ops (including micro ops) per thread. */ - Stats::Vector committedOps; - /** Stat for the CPI per thread. */ - Stats::Formula cpi; - /** Stat for the total CPI. */ - Stats::Formula totalCpi; - /** Stat for the IPC per thread. */ - Stats::Formula ipc; - /** Stat for the total IPC. */ - Stats::Formula totalIpc; - - //number of integer register file accesses - Stats::Scalar intRegfileReads; - Stats::Scalar intRegfileWrites; - //number of float register file accesses - Stats::Scalar fpRegfileReads; - Stats::Scalar fpRegfileWrites; - //number of vector register file accesses - mutable Stats::Scalar vecRegfileReads; - Stats::Scalar vecRegfileWrites; - //number of predicate register file accesses - mutable Stats::Scalar vecPredRegfileReads; - Stats::Scalar vecPredRegfileWrites; - //number of CC register file accesses - Stats::Scalar ccRegfileReads; - Stats::Scalar ccRegfileWrites; - //number of misc - Stats::Scalar miscRegfileReads; - Stats::Scalar miscRegfileWrites; + struct FullO3CPUStats : public Stats::Group + { + FullO3CPUStats(FullO3CPU *cpu); + + /** Stat for total number of times the CPU is descheduled. */ + Stats::Scalar timesIdled; + /** Stat for total number of cycles the CPU spends descheduled. */ + Stats::Scalar idleCycles; + /** Stat for total number of cycles the CPU spends descheduled due to a + * quiesce operation or waiting for an interrupt. */ + Stats::Scalar quiesceCycles; + /** Stat for the number of committed instructions per thread. */ + Stats::Vector committedInsts; + /** Stat for the number of committed ops (including micro ops) per + * thread. */ + Stats::Vector committedOps; + /** Stat for the CPI per thread. */ + Stats::Formula cpi; + /** Stat for the total CPI. */ + Stats::Formula totalCpi; + /** Stat for the IPC per thread. */ + Stats::Formula ipc; + /** Stat for the total IPC. */ + Stats::Formula totalIpc; + + //number of integer register file accesses + Stats::Scalar intRegfileReads; + Stats::Scalar intRegfileWrites; + //number of float register file accesses + Stats::Scalar fpRegfileReads; + Stats::Scalar fpRegfileWrites; + //number of vector register file accesses + mutable Stats::Scalar vecRegfileReads; + Stats::Scalar vecRegfileWrites; + //number of predicate register file accesses + mutable Stats::Scalar vecPredRegfileReads; + Stats::Scalar vecPredRegfileWrites; + //number of CC register file accesses + Stats::Scalar ccRegfileReads; + Stats::Scalar ccRegfileWrites; + //number of misc + Stats::Scalar miscRegfileReads; + Stats::Scalar miscRegfileWrites; + } cpuStats; public: // hardware transactional memory -- 2.30.2