From: Hoa Nguyen Date: Thu, 15 Oct 2020 09:20:00 +0000 (-0700) Subject: cpu-o3,stats: Update stats style for iew and iew_impl X-Git-Tag: develop-gem5-snapshot~444 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=13176f9fd060a7e29012595f6490878d7833a561;p=gem5.git cpu-o3,stats: Update stats style for iew and iew_impl Change-Id: Ie213aeb402fee5f015f10c9c03e5b9c02ba1f3fe Signed-off-by: Hoa Nguyen Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/36095 Reviewed-by: Daniel Carvalho Maintainer: Bobby R. Bruce Tested-by: kokoro --- diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 07f986d6d..683902a5e 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -426,70 +426,81 @@ class DefaultIEW /** Maximum size of the skid buffer. */ unsigned skidBufferMax; - /** Stat for total number of idle cycles. */ - Stats::Scalar iewIdleCycles; - /** Stat for total number of squashing cycles. */ - Stats::Scalar iewSquashCycles; - /** Stat for total number of blocking cycles. */ - Stats::Scalar iewBlockCycles; - /** Stat for total number of unblocking cycles. */ - Stats::Scalar iewUnblockCycles; - /** Stat for total number of instructions dispatched. */ - Stats::Scalar iewDispatchedInsts; - /** Stat for total number of squashed instructions dispatch skips. */ - Stats::Scalar iewDispSquashedInsts; - /** Stat for total number of dispatched load instructions. */ - Stats::Scalar iewDispLoadInsts; - /** Stat for total number of dispatched store instructions. */ - Stats::Scalar iewDispStoreInsts; - /** Stat for total number of dispatched non speculative instructions. */ - Stats::Scalar iewDispNonSpecInsts; - /** Stat for number of times the IQ becomes full. */ - Stats::Scalar iewIQFullEvents; - /** Stat for number of times the LSQ becomes full. */ - Stats::Scalar iewLSQFullEvents; - /** Stat for total number of memory ordering violation events. */ - Stats::Scalar memOrderViolationEvents; - /** Stat for total number of incorrect predicted taken branches. */ - Stats::Scalar predictedTakenIncorrect; - /** Stat for total number of incorrect predicted not taken branches. */ - Stats::Scalar predictedNotTakenIncorrect; - /** Stat for total number of mispredicted branches detected at execute. */ - Stats::Formula branchMispredicts; - - /** Stat for total number of executed instructions. */ - Stats::Scalar iewExecutedInsts; - /** Stat for total number of executed load instructions. */ - Stats::Vector iewExecLoadInsts; - /** Stat for total number of executed store instructions. */ -// Stats::Scalar iewExecStoreInsts; - /** Stat for total number of squashed instructions skipped at execute. */ - Stats::Scalar iewExecSquashedInsts; - /** Number of executed software prefetches. */ - Stats::Vector iewExecutedSwp; - /** Number of executed nops. */ - Stats::Vector iewExecutedNop; - /** Number of executed meomory references. */ - Stats::Vector iewExecutedRefs; - /** Number of executed branches. */ - Stats::Vector iewExecutedBranches; - /** Number of executed store instructions. */ - Stats::Formula iewExecStoreInsts; - /** Number of instructions executed per cycle. */ - Stats::Formula iewExecRate; - - /** Number of instructions sent to commit. */ - Stats::Vector iewInstsToCommit; - /** Number of instructions that writeback. */ - Stats::Vector writebackCount; - /** Number of instructions that wake consumers. */ - Stats::Vector producerInst; - /** Number of instructions that wake up from producers. */ - Stats::Vector consumerInst; - /** Number of instructions per cycle written back. */ - Stats::Formula wbRate; - /** Average number of woken instructions per writeback. */ - Stats::Formula wbFanout; + + struct IEWStats : public Stats::Group + { + IEWStats(O3CPU *cpu); + + /** Stat for total number of idle cycles. */ + Stats::Scalar idleCycles; + /** Stat for total number of squashing cycles. */ + Stats::Scalar squashCycles; + /** Stat for total number of blocking cycles. */ + Stats::Scalar blockCycles; + /** Stat for total number of unblocking cycles. */ + Stats::Scalar unblockCycles; + /** Stat for total number of instructions dispatched. */ + Stats::Scalar dispatchedInsts; + /** Stat for total number of squashed instructions dispatch skips. */ + Stats::Scalar dispSquashedInsts; + /** Stat for total number of dispatched load instructions. */ + Stats::Scalar dispLoadInsts; + /** Stat for total number of dispatched store instructions. */ + Stats::Scalar dispStoreInsts; + /** Stat for total number of dispatched non speculative instructions. */ + Stats::Scalar dispNonSpecInsts; + /** Stat for number of times the IQ becomes full. */ + Stats::Scalar iqFullEvents; + /** Stat for number of times the LSQ becomes full. */ + Stats::Scalar lsqFullEvents; + /** Stat for total number of memory ordering violation events. */ + Stats::Scalar memOrderViolationEvents; + /** Stat for total number of incorrect predicted taken branches. */ + Stats::Scalar predictedTakenIncorrect; + /** Stat for total number of incorrect predicted not taken branches. */ + Stats::Scalar predictedNotTakenIncorrect; + /** Stat for total number of mispredicted branches detected at + * execute. */ + Stats::Formula branchMispredicts; + + struct ExecutedInstStats : public Stats::Group + { + ExecutedInstStats(O3CPU* cpu); + + /** Stat for total number of executed instructions. */ + Stats::Scalar numInsts; + /** Stat for total number of executed load instructions. */ + Stats::Vector numLoadInsts; + /** Stat for total number of squashed instructions skipped at + * execute. */ + Stats::Scalar numSquashedInsts; + /** Number of executed software prefetches. */ + Stats::Vector numSwp; + /** Number of executed nops. */ + Stats::Vector numNop; + /** Number of executed meomory references. */ + Stats::Vector numRefs; + /** Number of executed branches. */ + Stats::Vector numBranches; + /** Number of executed store instructions. */ + Stats::Formula numStoreInsts; + /** Number of instructions executed per cycle. */ + Stats::Formula numRate; + } executedInstStats; + + /** Number of instructions sent to commit. */ + Stats::Vector instsToCommit; + /** Number of instructions that writeback. */ + Stats::Vector writebackCount; + /** Number of instructions that wake consumers. */ + Stats::Vector producerInst; + /** Number of instructions that wake up from producers. */ + Stats::Vector consumerInst; + /** Number of instructions per cycle written back. */ + Stats::Formula wbRate; + /** Average number of woken instructions per writeback. */ + Stats::Formula wbFanout; + } iewStats; }; #endif // __CPU_O3_IEW_HH__ diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 0eec64444..fc89a8910 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -77,7 +77,8 @@ DefaultIEW::DefaultIEW(O3CPU *_cpu, const DerivO3CPUParams ¶ms) wbNumInst(0), wbCycle(0), wbWidth(params.wbWidth), - numThreads(params.numThreads) + numThreads(params.numThreads), + iewStats(cpu) { if (dispatchWidth > Impl::MaxWidth) fatal("dispatchWidth (%d) is larger than compiled limit (%d),\n" @@ -140,162 +141,113 @@ DefaultIEW::regProbePoints() } template -void -DefaultIEW::regStats() +DefaultIEW:: +IEWStats::IEWStats(O3CPU *cpu) + : Stats::Group(cpu), + ADD_STAT(idleCycles, "Number of cycles IEW is idle"), + ADD_STAT(squashCycles, "Number of cycles IEW is squashing"), + ADD_STAT(blockCycles, "Number of cycles IEW is blocking"), + ADD_STAT(unblockCycles, "Number of cycles IEW is unblocking"), + ADD_STAT(dispatchedInsts, "Number of instructions dispatched to IQ"), + ADD_STAT(dispSquashedInsts, + "Number of squashed instructions skipped by dispatch"), + ADD_STAT(dispLoadInsts, "Number of dispatched load instructions"), + ADD_STAT(dispStoreInsts, "Number of dispatched store instructions"), + ADD_STAT(dispNonSpecInsts, + "Number of dispatched non-speculative instructions"), + ADD_STAT(iqFullEvents, + "Number of times the IQ has become full, causing a stall"), + ADD_STAT(lsqFullEvents, + "Number of times the LSQ has become full, causing a stall"), + ADD_STAT(memOrderViolationEvents, "Number of memory order violations"), + ADD_STAT(predictedTakenIncorrect, + "Number of branches that were predicted taken incorrectly"), + ADD_STAT(predictedNotTakenIncorrect, + "Number of branches that were predicted not taken incorrectly"), + ADD_STAT(branchMispredicts, + "Number of branch mispredicts detected at execute", + predictedTakenIncorrect + predictedNotTakenIncorrect), + executedInstStats(cpu), + ADD_STAT(instsToCommit, "Cumulative count of insts sent to commit"), + ADD_STAT(writebackCount, "Cumulative count of insts written-back"), + ADD_STAT(producerInst, "Number of instructions producing a value"), + ADD_STAT(consumerInst, "Number of instructions consuming a value"), + ADD_STAT(wbRate, "Insts written-back per cycle"), + ADD_STAT(wbFanout, "Average fanout of values written-back") { - using namespace Stats; - - instQueue.regStats(); - - iewIdleCycles - .name(name() + ".iewIdleCycles") - .desc("Number of cycles IEW is idle"); - - iewSquashCycles - .name(name() + ".iewSquashCycles") - .desc("Number of cycles IEW is squashing"); - - iewBlockCycles - .name(name() + ".iewBlockCycles") - .desc("Number of cycles IEW is blocking"); - - iewUnblockCycles - .name(name() + ".iewUnblockCycles") - .desc("Number of cycles IEW is unblocking"); - - iewDispatchedInsts - .name(name() + ".iewDispatchedInsts") - .desc("Number of instructions dispatched to IQ"); - - iewDispSquashedInsts - .name(name() + ".iewDispSquashedInsts") - .desc("Number of squashed instructions skipped by dispatch"); - - iewDispLoadInsts - .name(name() + ".iewDispLoadInsts") - .desc("Number of dispatched load instructions"); - - iewDispStoreInsts - .name(name() + ".iewDispStoreInsts") - .desc("Number of dispatched store instructions"); - - iewDispNonSpecInsts - .name(name() + ".iewDispNonSpecInsts") - .desc("Number of dispatched non-speculative instructions"); - - iewIQFullEvents - .name(name() + ".iewIQFullEvents") - .desc("Number of times the IQ has become full, causing a stall"); - - iewLSQFullEvents - .name(name() + ".iewLSQFullEvents") - .desc("Number of times the LSQ has become full, causing a stall"); - - memOrderViolationEvents - .name(name() + ".memOrderViolationEvents") - .desc("Number of memory order violations"); - - predictedTakenIncorrect - .name(name() + ".predictedTakenIncorrect") - .desc("Number of branches that were predicted taken incorrectly"); - - predictedNotTakenIncorrect - .name(name() + ".predictedNotTakenIncorrect") - .desc("Number of branches that were predicted not taken incorrectly"); - - branchMispredicts - .name(name() + ".branchMispredicts") - .desc("Number of branch mispredicts detected at execute"); - - branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect; - - iewExecutedInsts - .name(name() + ".iewExecutedInsts") - .desc("Number of executed instructions"); - - iewExecLoadInsts - .init(cpu->numThreads) - .name(name() + ".iewExecLoadInsts") - .desc("Number of load instructions executed") - .flags(total); - - iewExecSquashedInsts - .name(name() + ".iewExecSquashedInsts") - .desc("Number of squashed instructions skipped in execute"); - - iewExecutedSwp + instsToCommit .init(cpu->numThreads) - .name(name() + ".exec_swp") - .desc("number of swp insts executed") - .flags(total); + .flags(Stats::total); - iewExecutedNop + writebackCount .init(cpu->numThreads) - .name(name() + ".exec_nop") - .desc("number of nop insts executed") - .flags(total); + .flags(Stats::total); - iewExecutedRefs + producerInst .init(cpu->numThreads) - .name(name() + ".exec_refs") - .desc("number of memory reference insts executed") - .flags(total); + .flags(Stats::total); - iewExecutedBranches + consumerInst .init(cpu->numThreads) - .name(name() + ".exec_branches") - .desc("Number of branches executed") - .flags(total); + .flags(Stats::total); - iewExecStoreInsts - .name(name() + ".exec_stores") - .desc("Number of stores executed") - .flags(total); - iewExecStoreInsts = iewExecutedRefs - iewExecLoadInsts; + wbRate + .flags(Stats::total); + wbRate = writebackCount / cpu->numCycles; - iewExecRate - .name(name() + ".exec_rate") - .desc("Inst execution rate") - .flags(total); + wbFanout + .flags(Stats::total); + wbFanout = producerInst / consumerInst; +} - iewExecRate = iewExecutedInsts / cpu->numCycles; +template +DefaultIEW::IEWStats:: +ExecutedInstStats::ExecutedInstStats(O3CPU *cpu) + : Stats::Group(cpu), + ADD_STAT(numInsts, "Number of executed instructions"), + ADD_STAT(numLoadInsts, "Number of load instructions executed"), + ADD_STAT(numSquashedInsts, + "Number of squashed instructions skipped in execute"), + ADD_STAT(numSwp, "Number of swp insts executed"), + ADD_STAT(numNop, "Number of nop insts executed"), + ADD_STAT(numRefs, "Number of memory reference insts executed"), + ADD_STAT(numBranches, "Number of branches executed"), + ADD_STAT(numStoreInsts, "Number of stores executed"), + ADD_STAT(numRate, "Inst execution rate", numInsts / cpu->numCycles) +{ + numLoadInsts + .init(cpu->numThreads) + .flags(Stats::total); - iewInstsToCommit + numSwp .init(cpu->numThreads) - .name(name() + ".wb_sent") - .desc("cumulative count of insts sent to commit") - .flags(total); + .flags(Stats::total); - writebackCount + numNop .init(cpu->numThreads) - .name(name() + ".wb_count") - .desc("cumulative count of insts written-back") - .flags(total); + .flags(Stats::total); - producerInst + numRefs .init(cpu->numThreads) - .name(name() + ".wb_producers") - .desc("num instructions producing a value") - .flags(total); + .flags(Stats::total); - consumerInst + numBranches .init(cpu->numThreads) - .name(name() + ".wb_consumers") - .desc("num instructions consuming a value") - .flags(total); + .flags(Stats::total); - wbFanout - .name(name() + ".wb_fanout") - .desc("average fanout of values written-back") - .flags(total); + numStoreInsts + .flags(Stats::total); + numStoreInsts = numRefs - numLoadInsts; - wbFanout = producerInst / consumerInst; + numRate + .flags(Stats::total); +} - wbRate - .name(name() + ".wb_rate") - .desc("insts written-back per cycle") - .flags(total); - wbRate = writebackCount / cpu->numCycles; +template +void +DefaultIEW::regStats() +{ + instQueue.regStats(); } template @@ -917,10 +869,10 @@ DefaultIEW::dispatch(ThreadID tid) // check if stall conditions have passed if (dispatchStatus[tid] == Blocked) { - ++iewBlockCycles; + ++iewStats.blockCycles; } else if (dispatchStatus[tid] == Squashing) { - ++iewSquashCycles; + ++iewStats.squashCycles; } // Dispatch should try to dispatch as many instructions as its bandwidth @@ -941,7 +893,7 @@ DefaultIEW::dispatch(ThreadID tid) // the rest of unblocking. dispatchInsts(tid); - ++iewUnblockCycles; + ++iewStats.unblockCycles; if (validInstsFromRename()) { // Add the current inputs to the skid buffer so they can be @@ -998,7 +950,7 @@ DefaultIEW::dispatchInsts(ThreadID tid) DPRINTF(IEW, "[tid:%i] Issue: Squashed instruction encountered, " "not adding to IQ.\n", tid); - ++iewDispSquashedInsts; + ++iewStats.dispSquashedInsts; insts_to_dispatch.pop(); @@ -1027,7 +979,7 @@ DefaultIEW::dispatchInsts(ThreadID tid) // get full in the IQ. toRename->iewUnblock[tid] = false; - ++iewIQFullEvents; + ++iewStats.iqFullEvents; break; } @@ -1046,7 +998,7 @@ DefaultIEW::dispatchInsts(ThreadID tid) // get full in the IQ. toRename->iewUnblock[tid] = false; - ++iewLSQFullEvents; + ++iewStats.lsqFullEvents; break; } @@ -1071,7 +1023,7 @@ DefaultIEW::dispatchInsts(ThreadID tid) ldstQueue.insertStore(inst); - ++iewDispStoreInsts; + ++iewStats.dispStoreInsts; // AMOs need to be set as "canCommit()" // so that commit can process them when they reach the @@ -1080,7 +1032,7 @@ DefaultIEW::dispatchInsts(ThreadID tid) instQueue.insertNonSpec(inst); add_to_iq = false; - ++iewDispNonSpecInsts; + ++iewStats.dispNonSpecInsts; toRename->iewInfo[tid].dispatchedToSQ++; } else if (inst->isLoad()) { @@ -1091,7 +1043,7 @@ DefaultIEW::dispatchInsts(ThreadID tid) // memory access. ldstQueue.insertLoad(inst); - ++iewDispLoadInsts; + ++iewStats.dispLoadInsts; add_to_iq = true; @@ -1102,7 +1054,7 @@ DefaultIEW::dispatchInsts(ThreadID tid) ldstQueue.insertStore(inst); - ++iewDispStoreInsts; + ++iewStats.dispStoreInsts; if (inst->isStoreConditional()) { // Store conditionals need to be set as "canCommit()" @@ -1113,7 +1065,7 @@ DefaultIEW::dispatchInsts(ThreadID tid) instQueue.insertNonSpec(inst); add_to_iq = false; - ++iewDispNonSpecInsts; + ++iewStats.dispNonSpecInsts; } else { add_to_iq = true; } @@ -1134,7 +1086,7 @@ DefaultIEW::dispatchInsts(ThreadID tid) instQueue.recordProducer(inst); - iewExecutedNop[tid]++; + iewStats.executedInstStats.numNop[tid]++; add_to_iq = false; } else { @@ -1152,7 +1104,7 @@ DefaultIEW::dispatchInsts(ThreadID tid) // Specifically insert it as nonspeculative. instQueue.insertNonSpec(inst); - ++iewDispNonSpecInsts; + ++iewStats.dispNonSpecInsts; add_to_iq = false; } @@ -1167,7 +1119,7 @@ DefaultIEW::dispatchInsts(ThreadID tid) toRename->iewInfo[tid].dispatched++; - ++iewDispatchedInsts; + ++iewStats.dispatchedInsts; #if TRACING_ON inst->dispatchTick = curTick() - inst->fetchTick; @@ -1263,7 +1215,7 @@ DefaultIEW::executeInsts() // commit any squashed instructions. I like the latter a bit more. inst->setCanCommit(); - ++iewExecSquashedInsts; + ++iewStats.executedInstStats.numSquashedInsts; continue; } @@ -1395,9 +1347,9 @@ DefaultIEW::executeInsts() ppMispredict->notify(inst); if (inst->readPredTaken()) { - predictedTakenIncorrect++; + iewStats.predictedTakenIncorrect++; } else { - predictedNotTakenIncorrect++; + iewStats.predictedNotTakenIncorrect++; } } else if (ldstQueue.violation(tid)) { assert(inst->isMemRef()); @@ -1420,7 +1372,7 @@ DefaultIEW::executeInsts() // Squash. squashDueToMemOrder(violator, tid); - ++memOrderViolationEvents; + ++iewStats.memOrderViolationEvents; } } else { // Reset any state associated with redirects that will not @@ -1437,7 +1389,7 @@ DefaultIEW::executeInsts() DPRINTF(IEW, "Violation will not be handled because " "already squashing\n"); - ++memOrderViolationEvents; + ++iewStats.memOrderViolationEvents; } } } @@ -1477,7 +1429,7 @@ DefaultIEW::writebackInsts() DPRINTF(IEW, "Sending instructions to commit, [sn:%lli] PC %s.\n", inst->seqNum, inst->pcState()); - iewInstsToCommit[tid]++; + iewStats.instsToCommit[tid]++; // Notify potential listeners that execution is complete for this // instruction. ppToCommit->notify(inst); @@ -1502,10 +1454,10 @@ DefaultIEW::writebackInsts() } if (dependents) { - producerInst[tid]++; - consumerInst[tid]+= dependents; + iewStats.producerInst[tid]++; + iewStats.consumerInst[tid]+= dependents; } - writebackCount[tid]++; + iewStats.writebackCount[tid]++; } } } @@ -1646,7 +1598,7 @@ DefaultIEW::updateExeInstStats(const DynInstPtr& inst) { ThreadID tid = inst->threadNumber; - iewExecutedInsts++; + iewStats.executedInstStats.numInsts++; #if TRACING_ON if (DTRACE(O3PipeView)) { @@ -1658,16 +1610,16 @@ DefaultIEW::updateExeInstStats(const DynInstPtr& inst) // Control operations // if (inst->isControl()) - iewExecutedBranches[tid]++; + iewStats.executedInstStats.numBranches[tid]++; // // Memory operations // if (inst->isMemRef()) { - iewExecutedRefs[tid]++; + iewStats.executedInstStats.numRefs[tid]++; if (inst->isLoad()) { - iewExecLoadInsts[tid]++; + iewStats.executedInstStats.numLoadInsts[tid]++; } } } @@ -1702,9 +1654,9 @@ DefaultIEW::checkMisprediction(const DynInstPtr& inst) squashDueToBranch(inst, tid); if (inst->readPredTaken()) { - predictedTakenIncorrect++; + iewStats.predictedTakenIncorrect++; } else { - predictedNotTakenIncorrect++; + iewStats.predictedNotTakenIncorrect++; } } }