From c4a87f874a69535f70c0f6f2733ea716e32c70cf Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Fri, 19 May 2006 15:37:52 -0400 Subject: [PATCH] Move activity tracking code into its own class. Now the CPU no longer has to keep track of the activity tracking internals; it just calls advance() on the class and uses it to tell if it should deschedule itself. SConscript: Split off activity/idling code into its own class to do the processing separately. cpu/o3/alpha_cpu_builder.cc: cpu/o3/alpha_params.hh: Activity stuff. This is mostly for debugging and may be removed later on (or changed to enable/disable activity idling). cpu/o3/cpu.cc: Move activity idling stuff mostly into its own class, so it no longer clutters this file. cpu/o3/cpu.hh: Move activity idling stuff into its own class. python/m5/objects/AlphaFullCPU.py: Add parameter for initial activity value. --HG-- extra : convert_revision : f32f7cc03895dc07ab57ddba78c5402a1a8b0f1a --- SConscript | 1 + cpu/activity.cc | 122 ++++++++++++++++++++++ cpu/activity.hh | 67 ++++++++++++ cpu/o3/alpha_cpu_builder.cc | 3 + cpu/o3/alpha_params.hh | 2 + cpu/o3/cpu.cc | 167 +++++++++--------------------- cpu/o3/cpu.hh | 80 +++++--------- python/m5/objects/AlphaFullCPU.py | 2 +- 8 files changed, 271 insertions(+), 173 deletions(-) create mode 100644 cpu/activity.cc create mode 100644 cpu/activity.hh diff --git a/SConscript b/SConscript index 5546e6f71..e5ca7c380 100644 --- a/SConscript +++ b/SConscript @@ -80,6 +80,7 @@ base_sources = Split(''' base/stats/visit.cc base/stats/text.cc + cpu/activity.cc cpu/base.cc cpu/base_dyn_inst.cc cpu/cpu_exec_context.cc diff --git a/cpu/activity.cc b/cpu/activity.cc new file mode 100644 index 000000000..6dcb6e341 --- /dev/null +++ b/cpu/activity.cc @@ -0,0 +1,122 @@ + +#include "base/timebuf.hh" +#include "cpu/activity.hh" + +ActivityRecorder::ActivityRecorder(int num_stages, int longest_latency, + int activity) + : activityBuffer(longest_latency, 0), longestLatency(longest_latency), + activityCount(activity), numStages(num_stages) +{ + stageActive = new bool[numStages]; + memset(stageActive, 0, numStages); +} + +void +ActivityRecorder::activity() +{ + if (activityBuffer[0]) { + return; + } + + activityBuffer[0] = true; + + ++activityCount; + + DPRINTF(Activity, "Activity: %i\n", activityCount); +} + +void +ActivityRecorder::advance() +{ + if (activityBuffer[-longestLatency]) { + --activityCount; + + assert(activityCount >= 0); + + DPRINTF(Activity, "Activity: %i\n", activityCount); + + if (activityCount == 0) { + DPRINTF(Activity, "No activity left!\n"); + } + } + + activityBuffer.advance(); +} + +void +ActivityRecorder::activateStage(const int idx) +{ + if (!stageActive[idx]) { + ++activityCount; + + stageActive[idx] = true; + + DPRINTF(Activity, "Activity: %i\n", activityCount); + } else { + DPRINTF(Activity, "Stage %i already active.\n", idx); + } + +// assert(activityCount < longestLatency + numStages + 1); +} + +void +ActivityRecorder::deactivateStage(const int idx) +{ + if (stageActive[idx]) { + --activityCount; + + stageActive[idx] = false; + + DPRINTF(Activity, "Activity: %i\n", activityCount); + } else { + DPRINTF(Activity, "Stage %i already inactive.\n", idx); + } + + assert(activityCount >= 0); +} + +void +ActivityRecorder::reset() +{ + activityCount = 0; + memset(stageActive, 0, numStages); + for (int i = 0; i < longestLatency + 1; ++i) + activityBuffer.advance(); +} + +void +ActivityRecorder::dump() +{ + for (int i = 0; i <= longestLatency; ++i) { + cprintf("[Idx:%i %i] ", i, activityBuffer[-i]); + } + + cprintf("\n"); + + for (int i = 0; i < numStages; ++i) { + cprintf("[Stage:%i %i]\n", i, stageActive[i]); + } + + cprintf("\n"); + + cprintf("Activity count: %i\n", activityCount); +} + +void +ActivityRecorder::validate() +{ + int count = 0; + for (int i = 0; i <= longestLatency; ++i) { + if (activityBuffer[-i]) { + count++; + } + } + + for (int i = 0; i < numStages; ++i) { + if (stageActive[i]) { + count++; + } + } + + assert(count == activityCount); +} diff --git a/cpu/activity.hh b/cpu/activity.hh new file mode 100644 index 000000000..2d53dc4bb --- /dev/null +++ b/cpu/activity.hh @@ -0,0 +1,67 @@ + +#ifndef __CPU_ACTIVITY_HH__ +#define __CPU_ACTIVITY_HH__ + +#include "base/timebuf.hh" +#include "base/trace.hh" + +class ActivityRecorder { + public: + ActivityRecorder(int num_stages, int longest_latency, int count); + + /** Records that there is activity this cycle. */ + void activity(); + /** Advances the activity buffer, decrementing the activityCount if active + * communication just left the time buffer, and descheduling the CPU if + * there is no activity. + */ + void advance(); + /** Marks a stage as active. */ + void activateStage(const int idx); + /** Deactivates a stage. */ + void deactivateStage(const int idx); + + int getActivityCount() { return activityCount; } + + void setActivityCount(int count) + { activityCount = count; } + + bool active() { return activityCount; } + + void reset(); + + void dump(); + + void validate(); + + private: + /** Time buffer that tracks if any cycles has active communication + * in them. It should be as long as the longest communication + * latency in the system. Each time any time buffer is written, + * the activity buffer should also be written to. The + * activityBuffer is advanced along with all the other time + * buffers, so it should have a 1 somewhere in it only if there + * is active communication in a time buffer. + */ + TimeBuffer activityBuffer; + + int longestLatency; + + /** Tracks how many stages and cycles of time buffer have + * activity. Stages increment this count when they switch to + * active, and decrement it when they switch to + * inactive. Whenever a cycle that previously had no information + * is written in the time buffer, this is incremented. When a + * cycle that had information exits the time buffer due to age, + * this count is decremented. When the count is 0, there is no + * activity in the CPU, and it can be descheduled. + */ + int activityCount; + + int numStages; + + /** Records which stages are active/inactive. */ + bool *stageActive; +}; + +#endif // __CPU_ACTIVITY_HH__ diff --git a/cpu/o3/alpha_cpu_builder.cc b/cpu/o3/alpha_cpu_builder.cc index 0f9116d71..b0d812edc 100644 --- a/cpu/o3/alpha_cpu_builder.cc +++ b/cpu/o3/alpha_cpu_builder.cc @@ -48,6 +48,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) Param clock; Param numThreads; +Param activity; #if FULL_SYSTEM SimObjectParam system; @@ -156,6 +157,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) INIT_PARAM(clock, "clock speed"), INIT_PARAM(numThreads, "number of HW thread contexts"), + INIT_PARAM_DFLT(activity, "Initial activity count", 0), #if FULL_SYSTEM INIT_PARAM(system, "System object"), @@ -301,6 +303,7 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) params->name = getInstanceName(); params->numberOfThreads = actual_num_threads; + params->activity = activity; #if FULL_SYSTEM params->system = system; diff --git a/cpu/o3/alpha_params.hh b/cpu/o3/alpha_params.hh index b8ebae21e..e3acf2c05 100644 --- a/cpu/o3/alpha_params.hh +++ b/cpu/o3/alpha_params.hh @@ -64,6 +64,8 @@ class AlphaSimpleParams : public BaseFullCPU::Params BaseCPU *checker; + unsigned activity; + // // Caches // diff --git a/cpu/o3/cpu.cc b/cpu/o3/cpu.cc index 9a46f2e7c..8d72bdc41 100644 --- a/cpu/o3/cpu.cc +++ b/cpu/o3/cpu.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,8 +33,8 @@ #else #include "sim/process.hh" #endif -#include "sim/root.hh" +#include "cpu/activity.hh" #include "cpu/checker/cpu.hh" #include "cpu/cpu_exec_context.hh" #include "cpu/exec_context.hh" @@ -42,6 +42,7 @@ #include "cpu/o3/alpha_impl.hh" #include "cpu/o3/cpu.hh" +#include "sim/root.hh" #include "sim/stat_control.hh" using namespace std; @@ -104,16 +105,15 @@ FullO3CPU::FullO3CPU(Params *params) TheISA::NumMiscRegs * number_of_threads, TheISA::ZeroReg), - // What to pass to these time buffers? // For now just have these time buffers be pretty big. - // @todo: Make these time buffer sizes parameters. + // @todo: Make these time buffer sizes parameters or derived + // from latencies timeBuffer(5, 5), fetchQueue(5, 5), decodeQueue(5, 5), renameQueue(5, 5), iewQueue(5, 5), - activityBuffer(5, 0), - activityCount(0), + activityRec(NumStages, 10, params->activity), globalSeqNum(1), @@ -150,9 +150,9 @@ FullO3CPU::FullO3CPU(Params *params) tids.resize(number_of_threads); #endif - // The stages also need their CPU pointer setup. However this must be - // done at the upper level CPU because they have pointers to the upper - // level CPU, and not this FullO3CPU. + // The stages also need their CPU pointer setup. However this + // must be done at the upper level CPU because they have pointers + // to the upper level CPU, and not this FullO3CPU. // Set up Pointers to the activeThreads list for each stage fetch.setActiveThreads(&activeThreads); @@ -207,11 +207,11 @@ FullO3CPU::FullO3CPU(Params *params) commitRenameMap[tid].init(TheISA::NumIntRegs, params->numPhysIntRegs, - lreg_idx, //Index for Logical. Regs + lreg_idx, //Index for Logical. Regs TheISA::NumFloatRegs, params->numPhysFloatRegs, - freg_idx, //Index for Float Regs + freg_idx, //Index for Float Regs TheISA::NumMiscRegs, @@ -223,11 +223,11 @@ FullO3CPU::FullO3CPU(Params *params) renameMap[tid].init(TheISA::NumIntRegs, params->numPhysIntRegs, - lreg_idx, //Index for Logical. Regs + lreg_idx, //Index for Logical. Regs TheISA::NumFloatRegs, params->numPhysFloatRegs, - freg_idx, //Index for Float Regs + freg_idx, //Index for Float Regs TheISA::NumMiscRegs, @@ -258,10 +258,6 @@ FullO3CPU::FullO3CPU(Params *params) lastRunningCycle = curTick; - for (int i = 0; i < NumStages; ++i) { - stageActive[i] = false; - } - contextSwitch = false; } @@ -336,7 +332,7 @@ FullO3CPU::tick() ++numCycles; - activity = false; +// activity = false; //Tick each of the stages fetch.tick(); @@ -361,14 +357,22 @@ FullO3CPU::tick() renameQueue.advance(); iewQueue.advance(); - advanceActivityBuffer(); + activityRec.advance(); if (removeInstsThisCycle) { cleanUpRemovedInsts(); } - if (_status != SwitchedOut && activityCount && !tickEvent.scheduled()) { - tickEvent.schedule(curTick + cycles(1)); + if (!tickEvent.scheduled()) { + if (_status == SwitchedOut) { + // increment stat + lastRunningCycle = curTick; + } else if (!activityRec.active()) { + lastRunningCycle = curTick; + timesIdled++; + } else { + tickEvent.schedule(curTick + cycles(1)); + } } #if !FULL_SYSTEM @@ -592,7 +596,7 @@ FullO3CPU::activateContext(int tid, int delay) // Be sure to signal that there's some activity so the CPU doesn't // deschedule itself. - activityThisCycle(); + activityRec.activity(); fetch.wakeFromQuiesce(); _status = Running; @@ -669,13 +673,18 @@ FullO3CPU::switchOut(Sampler *_sampler) rename.switchOut(); iew.switchOut(); commit.switchOut(); + + // Wake the CPU and record activity so everything can drain out if + // the CPU is currently idle. + wakeCPU(); + activityRec.activity(); } template void FullO3CPU::signalSwitched() { - if (++switchCount == 5) { + if (++switchCount == NumStages) { fetch.doSwitchOut(); rename.doSwitchOut(); commit.doSwitchOut(); @@ -699,18 +708,16 @@ template void FullO3CPU::takeOverFrom(BaseCPU *oldCPU) { - // Flush out any old data from the activity buffers. - for (int i = 0; i < 6; ++i) { + // Flush out any old data from the time buffers. + for (int i = 0; i < 10; ++i) { timeBuffer.advance(); fetchQueue.advance(); decodeQueue.advance(); renameQueue.advance(); iewQueue.advance(); - activityBuffer.advance(); } - activityCount = 0; - bzero(&stageActive, sizeof(stageActive)); + activityRec.reset(); BaseCPU::takeOverFrom(oldCPU); @@ -722,23 +729,23 @@ FullO3CPU::takeOverFrom(BaseCPU *oldCPU) assert(!tickEvent.scheduled()); - // @todo: Figure out how to properly select the tid to put onto the active threads list. + // @todo: Figure out how to properly select the tid to put onto + // the active threads list. int tid = 0; list::iterator isActive = find( activeThreads.begin(), activeThreads.end(), tid); if (isActive == activeThreads.end()) { - //May Need to Re-code this if the delay variable is the - //delay needed for thread to activate + //May Need to Re-code this if the delay variable is the delay + //needed for thread to activate DPRINTF(FullCPU, "Adding Thread %i to active threads list\n", tid); activeThreads.push_back(tid); } - // Set all status's to active, schedule the - // CPU's tick event. + // Set all statuses to active, schedule the CPU's tick event. // @todo: Fix up statuses so this is handled properly for (int i = 0; i < execContexts.size(); ++i) { ExecContext *xc = execContexts[i]; @@ -850,10 +857,6 @@ template void FullO3CPU::setArchIntReg(int reg_idx, uint64_t val, unsigned tid) { - if (reg_idx == TheISA::ZeroReg) { - warn("Setting r31 through ArchIntReg in CPU, cycle %i\n", curTick); - } - PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); regFile.setIntReg(phys_reg, val); @@ -1049,8 +1052,8 @@ FullO3CPU::squashInstIt(const ListIt &instIt, const unsigned &tid) // Mark it as squashed. (*instIt)->setSquashed(); - //@todo: Formulate a consistent method for deleting - //instructions from the instruction list + // @todo: Formulate a consistent method for deleting + // instructions from the instruction list // Remove the instruction from the list. removeList.push(instIt); } @@ -1074,14 +1077,14 @@ FullO3CPU::cleanUpRemovedInsts() removeInstsThisCycle = false; } - +/* template void FullO3CPU::removeAllInsts() { instList.clear(); } - +*/ template void FullO3CPU::dumpInsts() @@ -1102,96 +1105,28 @@ FullO3CPU::dumpInsts() ++num; } } - +/* template void FullO3CPU::wakeDependents(DynInstPtr &inst) { iew.wakeDependents(inst); } - +*/ template void FullO3CPU::wakeCPU() { - if (activityCount || tickEvent.scheduled()) { - return; - } - - idleCycles += curTick - lastRunningCycle; - - tickEvent.schedule(curTick); -} - -template -void -FullO3CPU::activityThisCycle() -{ - if (activityBuffer[0]) { + if (activityRec.active() || tickEvent.scheduled()) { + DPRINTF(Activity, "CPU already running.\n"); return; } - activityBuffer[0] = true; - activity = true; - ++activityCount; - - DPRINTF(Activity, "Activity: %i\n", activityCount); -} - -template -void -FullO3CPU::advanceActivityBuffer() -{ - if (activityBuffer[-5]) { - --activityCount; - - assert(activityCount >= 0); - - DPRINTF(Activity, "Activity: %i\n", activityCount); - - if (activityCount == 0) { - DPRINTF(FullCPU, "No activity left, going to idle!\n"); - lastRunningCycle = curTick; - timesIdled++; - } - } - - activityBuffer.advance(); -} - -template -void -FullO3CPU::activateStage(const StageIdx idx) -{ - if (!stageActive[idx]) { - ++activityCount; - - stageActive[idx] = true; - - DPRINTF(Activity, "Activity: %i\n", activityCount); - } else { - DPRINTF(Activity, "Stage %i already active.\n", idx); - } - - // @todo: Number is hardcoded for now. Replace with parameter. - assert(activityCount < 15); -} - -template -void -FullO3CPU::deactivateStage(const StageIdx idx) -{ - if (stageActive[idx]) { - --activityCount; - - stageActive[idx] = false; + DPRINTF(Activity, "Waking up CPU\n"); - DPRINTF(Activity, "Activity: %i\n", activityCount); - } else { - DPRINTF(Activity, "Stage %i already inactive.\n", idx); - } + idleCycles += (curTick - 1) - lastRunningCycle; - assert(activityCount >= 0); + tickEvent.schedule(curTick); } template diff --git a/cpu/o3/cpu.hh b/cpu/o3/cpu.hh index 789729e61..8db65d501 100644 --- a/cpu/o3/cpu.hh +++ b/cpu/o3/cpu.hh @@ -26,8 +26,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef __CPU_O3_FULL_CPU_HH__ -#define __CPU_O3_FULL_CPU_HH__ +#ifndef __CPU_O3_CPU_HH__ +#define __CPU_O3_CPU_HH__ #include #include @@ -38,6 +38,7 @@ #include "base/statistics.hh" #include "base/timebuf.hh" #include "config/full_system.hh" +#include "cpu/activity.hh" #include "cpu/base.hh" #include "cpu/cpu_exec_context.hh" #include "cpu/o3/comm.hh" @@ -70,7 +71,7 @@ template class FullO3CPU : public BaseFullCPU { public: - //Put typedefs from the Impl here. + // Typedefs from the Impl here. typedef typename Impl::CPUPol CPUPolicy; typedef typename Impl::Params Params; typedef typename Impl::DynInstPtr DynInstPtr; @@ -191,20 +192,18 @@ class FullO3CPU : public BaseFullCPU * Note: this is a virtual function. CPU-Specific * functionality defined in derived classes */ - virtual void syscall(int tid) {} + virtual void syscall(int tid) { panic("Unimplemented!"); } /** Check if there are any system calls pending. */ void checkSyscalls(); /** Switches out this CPU. - * @todo: Implement this. */ void switchOut(Sampler *sampler); void signalSwitched(); /** Takes over from another CPU. - * @todo: Implement this. */ void takeOverFrom(BaseCPU *oldCPU); @@ -299,12 +298,8 @@ class FullO3CPU : public BaseFullCPU /** Add Instructions to the CPU Remove List*/ void addToRemoveList(DynInstPtr &inst); - /** Remove an instruction from the front of the list. It is expected - * that there are no instructions in front of it (that is, none are older - * than the instruction being removed). Used when retiring instructions. - * @todo: Remove the argument to this function, and just have it remove - * last instruction once it's verified that commit has the same ordering - * as the instruction list. + /** Remove an instruction from the front end of the list. There's + * no restriction on location of the instruction. */ void removeFrontInst(DynInstPtr &inst); @@ -319,15 +314,15 @@ class FullO3CPU : public BaseFullCPU void cleanUpRemovedInsts(); /** Remove all instructions from the list. */ - void removeAllInsts(); +// void removeAllInsts(); void dumpInsts(); /** Basically a wrapper function so that instructions executed at - * commit can tell the instruction queue that they have completed. - * Eventually this hack should be removed. + * commit can tell the instruction queue that they have + * completed. Eventually this hack should be removed. */ - void wakeDependents(DynInstPtr &inst); +// void wakeDependents(DynInstPtr &inst); public: /** List of all the instructions in flight. */ @@ -338,12 +333,12 @@ class FullO3CPU : public BaseFullCPU */ std::queue removeList; -//#ifdef DEBUG +#ifdef DEBUG std::set snList; -//#endif +#endif - /** Records if instructions need to be removed this cycle due to being - * retired or squashed. + /** Records if instructions need to be removed this cycle due to + * being retired or squashed. */ bool removeInstsThisCycle; @@ -425,46 +420,19 @@ class FullO3CPU : public BaseFullCPU /** The IEW stage's instruction queue. */ TimeBuffer iewQueue; - private: - /** Time buffer that tracks if any cycles has active communication in them. - * It should be as long as the longest communication latency in the system. - * Each time any time buffer is written, the activity buffer should also - * be written to. The activityBuffer is advanced along with all the other - * time buffers, so it should always have a 1 somewhere in it only if there - * is active communication in a time buffer. - */ - TimeBuffer activityBuffer; - - /** Tracks how many stages and cycles of time buffer have activity. Stages - * increment this count when they switch to active, and decrement it when - * they switch to inactive. Whenever a cycle that previously had no - * information is written in the time buffer, this is incremented. When - * a cycle that had information exits the time buffer due to age, this - * count is decremented. When the count is 0, there is no activity in the - * CPU, and it can be descheduled. - */ - int activityCount; + public: + ActivityRecorder activityRec; - /** Records if there has been activity this cycle. */ - bool activity; + void activityThisCycle() { activityRec.activity(); } - /** Records which stages are active/inactive. */ - bool stageActive[NumStages]; + void activateStage(const StageIdx idx) + { activityRec.activateStage(idx); } + + void deactivateStage(const StageIdx idx) + { activityRec.deactivateStage(idx); } - public: /** Wakes the CPU, rescheduling the CPU if it's not already active. */ void wakeCPU(); - /** Records that there is activity this cycle. */ - void activityThisCycle(); - /** Advances the activity buffer, decrementing the activityCount if active - * communication just left the time buffer, and descheduling the CPU if - * there is no activity. - */ - void advanceActivityBuffer(); - /** Marks a stage as active. */ - void activateStage(const StageIdx idx); - /** Deactivates a stage. */ - void deactivateStage(const StageIdx idx); /** Gets a free thread id. Use if thread ids change across system. */ int getFreeTid(); @@ -550,4 +518,4 @@ class FullO3CPU : public BaseFullCPU Stats::Formula totalIpc; }; -#endif +#endif // __CPU_O3_CPU_HH__ diff --git a/python/m5/objects/AlphaFullCPU.py b/python/m5/objects/AlphaFullCPU.py index 1541b9494..d719bf783 100644 --- a/python/m5/objects/AlphaFullCPU.py +++ b/python/m5/objects/AlphaFullCPU.py @@ -3,7 +3,7 @@ from BaseCPU import BaseCPU class DerivAlphaFullCPU(BaseCPU): type = 'DerivAlphaFullCPU' - + activity = Param.Unsigned("Initial count") numThreads = Param.Unsigned("number of HW thread contexts") if not build_env['FULL_SYSTEM']: -- 2.30.2