From 74e8abd37ecd637a607f90e36aed1a3a16eea7da Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 24 Aug 2006 17:29:34 -0400 Subject: [PATCH] Switch out fixups for the CPUs. cpu/cpu_exec_context.cc: Be sure to switch over the kernel stats so things don't get messed up. This may lead to weird stats files for sampling runs (detailed stats should be correct, regardless of which kernel stats this is defined on). cpu/o3/cpu.cc: Updates for switching out. Also include a bunch of debug info if needed. cpu/o3/fetch_impl.hh: Switch out properly. cpu/o3/inst_queue.hh: cpu/o3/inst_queue_impl.hh: Comment out unused stats (they made the stats file huge). cpu/o3/lsq_unit.hh: cpu/o3/lsq_unit_impl.hh: Add in new stat. cpu/o3/rename.hh: Fix up for switching out. cpu/o3/rename_impl.hh: Fix up for switching out. Be sure to mark any Misc regs as ready if their renamed inst got squashed from being switched out. cpu/ozone/cpu_impl.hh: cpu/simple/cpu.cc: Switch out fixup. sim/eventq.hh: Make CPU switching more immediate. Also comment out the assertion, as it doesn't apply if we're putting it on an inst-based queue. --HG-- extra : convert_revision : f40ed40604738993f061e0c628810ff37a920562 --- cpu/cpu_exec_context.cc | 5 ++++ cpu/o3/cpu.cc | 40 ++++++++++++++++++++++++++ cpu/o3/fetch_impl.hh | 3 +- cpu/o3/inst_queue.hh | 4 +-- cpu/o3/inst_queue_impl.hh | 7 +++-- cpu/o3/lsq_unit.hh | 3 ++ cpu/o3/lsq_unit_impl.hh | 5 ++++ cpu/o3/rename.hh | 2 ++ cpu/o3/rename_impl.hh | 8 +++++- cpu/ozone/cpu_impl.hh | 59 +++++++++++++++++++++++++++++---------- cpu/simple/cpu.cc | 31 +++++++++++++++----- sim/eventq.hh | 6 ++-- 12 files changed, 142 insertions(+), 31 deletions(-) diff --git a/cpu/cpu_exec_context.cc b/cpu/cpu_exec_context.cc index 0dcf149fd..9f151dd6a 100644 --- a/cpu/cpu_exec_context.cc +++ b/cpu/cpu_exec_context.cc @@ -183,6 +183,11 @@ CPUExecContext::takeOverFrom(ExecContext *oldContext) if (quiesceEvent) { quiesceEvent->xc = proxy; } + + Kernel::Statistics *stats = oldContext->getKernelStats(); + if (stats) { + kernelStats = stats; + } #endif storeCondFailures = 0; diff --git a/cpu/o3/cpu.cc b/cpu/o3/cpu.cc index 0025d4144..88de6c746 100644 --- a/cpu/o3/cpu.cc +++ b/cpu/o3/cpu.cc @@ -599,8 +599,11 @@ FullO3CPU::activateContext(int tid, int delay) // Be sure to signal that there's some activity so the CPU doesn't // deschedule itself. activityRec.activity(); + +#if FULL_SYSTEM if (thread[tid]->quiesceEvent && thread[tid]->quiesceEvent->scheduled()) thread[tid]->quiesceEvent->deschedule(); +#endif fetch.wakeFromQuiesce(); @@ -671,6 +674,8 @@ template void FullO3CPU::switchOut(Sampler *_sampler) { + DPRINTF(FullCPU, "Switching out\n"); + BaseCPU::switchOut(_sampler); sampler = _sampler; switchCount = 0; fetch.switchOut(); @@ -694,6 +699,41 @@ FullO3CPU::signalSwitched() rename.doSwitchOut(); commit.doSwitchOut(); instList.clear(); + +#ifndef NDEBUG + PhysRegIndex renamed_reg; + // First loop through the integer registers. + for (int i = 0; i < AlphaISA::NumIntRegs; ++i) { + renamed_reg = renameMap[0].lookup(i); + assert(renamed_reg == commitRenameMap[0].lookup(i)); + + DPRINTF(FullCPU, "FullCPU: Checking if register %i is ready.\n", + renamed_reg); + + assert(scoreboard.getReg(renamed_reg)); + } + + // Then loop through the floating point registers. + for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) { + renamed_reg = renameMap[0].lookup(i + AlphaISA::FP_Base_DepTag); + assert(renamed_reg == commitRenameMap[0].lookup(i + AlphaISA::FP_Base_DepTag)); + + DPRINTF(FullCPU, "FullCPU: Checking if register %i is ready.\n", + renamed_reg); + + assert(scoreboard.getReg(renamed_reg)); + } + + for (int i = 0; i < AlphaISA::NumMiscRegs; ++i) { + renamed_reg = i + ((Params *)params)->numPhysFloatRegs + ((Params *)params)->numPhysIntRegs; + + DPRINTF(FullCPU, "FullCPU: Checking if register %i is ready.\n", + renamed_reg); + + assert(scoreboard.getReg(renamed_reg)); + } +#endif + while (!removeList.empty()) { removeList.pop(); } diff --git a/cpu/o3/fetch_impl.hh b/cpu/o3/fetch_impl.hh index cc09c4a41..7a3292dbe 100644 --- a/cpu/o3/fetch_impl.hh +++ b/cpu/o3/fetch_impl.hh @@ -391,6 +391,7 @@ DefaultFetch::takeOverFrom() wroteToTimeBuffer = false; _status = Inactive; switchedOut = false; + interruptPending = false; branchPred.takeOverFrom(); } @@ -469,7 +470,7 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid unsigned flags = 0; #endif // FULL_SYSTEM - if (interruptPending && flags == 0) { + if (isSwitchedOut() || (interruptPending && flags == 0)) { // Hold off fetch from getting new instructions while an interrupt // is pending. return false; diff --git a/cpu/o3/inst_queue.hh b/cpu/o3/inst_queue.hh index 80cd71f0d..e96fbc667 100644 --- a/cpu/o3/inst_queue.hh +++ b/cpu/o3/inst_queue.hh @@ -474,11 +474,11 @@ class InstructionQueue Stats::Scalar<> iqSquashedNonSpecRemoved; /** Distribution of number of instructions in the queue. */ - Stats::VectorDistribution<> queueResDist; +// Stats::VectorDistribution<> queueResDist; /** Distribution of the number of instructions issued. */ Stats::Distribution<> numIssuedDist; /** Distribution of the cycles it takes to issue an instruction. */ - Stats::VectorDistribution<> issueDelayDist; +// Stats::VectorDistribution<> issueDelayDist; /** Number of times an instruction could not be issued because a * FU was busy. diff --git a/cpu/o3/inst_queue_impl.hh b/cpu/o3/inst_queue_impl.hh index 72cb0d708..b6b06ca77 100644 --- a/cpu/o3/inst_queue_impl.hh +++ b/cpu/o3/inst_queue_impl.hh @@ -230,7 +230,7 @@ InstructionQueue::regStats() .name(name() + ".iqSquashedNonSpecRemoved") .desc("Number of squashed non-spec instructions that were removed") .prereq(iqSquashedNonSpecRemoved); - +/* queueResDist .init(Num_OpClasses, 0, 99, 2) .name(name() + ".IQ:residence:") @@ -240,6 +240,7 @@ InstructionQueue::regStats() for (int i = 0; i < Num_OpClasses; ++i) { queueResDist.subname(i, opClassStrings[i]); } +*/ numIssuedDist .init(0,totalWidth,1) .name(name() + ".ISSUE:issued_per_cycle") @@ -268,7 +269,7 @@ InstructionQueue::regStats() // // How long did instructions for a particular FU type wait prior to issue // - +/* issueDelayDist .init(Num_OpClasses,0,99,2) .name(name() + ".ISSUE:") @@ -281,7 +282,7 @@ InstructionQueue::regStats() subname << opClassStrings[i] << "_delay"; issueDelayDist.subname(i, subname.str()); } - +*/ issueRate .name(name() + ".ISSUE:rate") .desc("Inst issue rate") diff --git a/cpu/o3/lsq_unit.hh b/cpu/o3/lsq_unit.hh index fe174a97d..1db6dc02d 100644 --- a/cpu/o3/lsq_unit.hh +++ b/cpu/o3/lsq_unit.hh @@ -382,6 +382,9 @@ class LSQUnit { * ignored due to the instruction already being squashed. */ Stats::Scalar<> lsqIgnoredResponses; + /** Tota number of memory ordering violations. */ + Stats::Scalar<> lsqMemOrderViolation; + /** Total number of squashed stores. */ Stats::Scalar<> lsqSquashedStores; diff --git a/cpu/o3/lsq_unit_impl.hh b/cpu/o3/lsq_unit_impl.hh index 5cc3078f8..7086c381e 100644 --- a/cpu/o3/lsq_unit_impl.hh +++ b/cpu/o3/lsq_unit_impl.hh @@ -144,6 +144,10 @@ LSQUnit::regStats() .name(name() + ".ignoredResponses") .desc("Number of memory responses ignored because the instruction is squashed"); + lsqMemOrderViolation + .name(name() + ".memOrderViolation") + .desc("Number of memory ordering violations"); + lsqSquashedStores .name(name() + ".squashedStores") .desc("Number of stores squashed"); @@ -495,6 +499,7 @@ LSQUnit::executeStore(DynInstPtr &store_inst) // A load incorrectly passed this store. Squash and refetch. // For now return a fault to show that it was unsuccessful. memDepViolator = loadQueue[load_idx]; + ++lsqMemOrderViolation; return genMachineCheckFault(); } diff --git a/cpu/o3/rename.hh b/cpu/o3/rename.hh index 4912431ad..5769dbd37 100644 --- a/cpu/o3/rename.hh +++ b/cpu/o3/rename.hh @@ -411,6 +411,8 @@ class DefaultRename /** The maximum skid buffer size. */ unsigned skidBufferMax; + PhysRegIndex maxPhysicalRegs; + /** Enum to record the source of a structure full stall. Can come from * either ROB, IQ, LSQ, and it is priortized in that order. */ diff --git a/cpu/o3/rename_impl.hh b/cpu/o3/rename_impl.hh index 93f5b3504..49627e3d4 100644 --- a/cpu/o3/rename_impl.hh +++ b/cpu/o3/rename_impl.hh @@ -40,7 +40,8 @@ DefaultRename::DefaultRename(Params *params) commitToRenameDelay(params->commitToRenameDelay), renameWidth(params->renameWidth), commitWidth(params->commitWidth), - numThreads(params->numberOfThreads) + numThreads(params->numberOfThreads), + maxPhysicalRegs(params->numPhysIntRegs + params->numPhysFloatRegs) { _status = Inactive; @@ -283,6 +284,11 @@ DefaultRename::doSwitchOut() // Put the renamed physical register back on the free list. freeList->addReg(hb_it->newPhysReg); + // Be sure to mark its register as ready if it's a misc register. + if (hb_it->newPhysReg >= maxPhysicalRegs) { + scoreboard->setReg(hb_it->newPhysReg); + } + historyBuffer[i].erase(hb_it++); } insts[i].clear(); diff --git a/cpu/ozone/cpu_impl.hh b/cpu/ozone/cpu_impl.hh index 050bdb9a3..1a0de29f5 100644 --- a/cpu/ozone/cpu_impl.hh +++ b/cpu/ozone/cpu_impl.hh @@ -184,7 +184,9 @@ OzoneCPU::OzoneCPU(Params *p) globalSeqNum = 1; +#if FULL_SYSTEM checkInterrupts = false; +#endif lockFlag = 0; @@ -213,6 +215,7 @@ template void OzoneCPU::switchOut(Sampler *_sampler) { + BaseCPU::switchOut(_sampler); sampler = _sampler; switchCount = 0; // Front end needs state from back end, so switch out the back end first. @@ -234,6 +237,16 @@ OzoneCPU::signalSwitched() checker->switchOut(sampler); _status = SwitchedOut; +#ifndef NDEBUG + // Loop through all registers + for (int i = 0; i < AlphaISA::TotalNumRegs; ++i) { + assert(thread.renameTable[i] == frontEnd->renameTable[i]); + + assert(thread.renameTable[i] == backEnd->renameTable[i]); + + DPRINTF(OzoneCPU, "Checking if register %i matches.\n", i); + } +#endif if (tickEvent.scheduled()) tickEvent.squash(); @@ -256,9 +269,16 @@ OzoneCPU::takeOverFrom(BaseCPU *oldCPU) frontEnd->takeOverFrom(); assert(!tickEvent.scheduled()); +#ifndef NDEBUG + // Check rename table. + for (int i = 0; i < TheISA::TotalNumRegs; ++i) { + assert(thread.renameTable[i]->isResultReady()); + } +#endif + // @todo: Fix hardcoded number // Clear out any old information in time buffer. - for (int i = 0; i < 6; ++i) { + for (int i = 0; i < 15; ++i) { comm.advance(); } @@ -291,8 +311,10 @@ OzoneCPU::activateContext(int thread_num, int delay) scheduleTickEvent(delay); _status = Running; thread._status = ExecContext::Active; +#if FULL_SYSTEM if (thread.quiesceEvent && thread.quiesceEvent->scheduled()) thread.quiesceEvent->deschedule(); +#endif frontEnd->wakeFromQuiesce(); } @@ -369,7 +391,7 @@ template void OzoneCPU::resetStats() { - startNumInst = numInst; +// startNumInst = numInst; notIdleFraction = (_status != Idle); } @@ -777,7 +799,9 @@ OzoneCPU::OzoneXC::halt() template void OzoneCPU::OzoneXC::dumpFuncProfile() -{ } +{ + thread->dumpFuncProfile(); +} #endif template @@ -797,6 +821,7 @@ OzoneCPU::OzoneXC::takeOverFrom(ExecContext *old_context) copyArchRegs(old_context); setCpuId(old_context->readCpuId()); + thread->inst = old_context->getInst(); #if !FULL_SYSTEM setFuncExeInst(old_context->readFuncExeInst()); #else @@ -869,16 +894,14 @@ template void OzoneCPU::OzoneXC::profileClear() { - if (thread->profile) - thread->profile->clear(); + thread->profileClear(); } template void OzoneCPU::OzoneXC::profileSample() { - if (thread->profile) - thread->profile->sample(thread->profileNode, thread->profilePC); + thread->profileSample(); } #endif @@ -906,14 +929,20 @@ OzoneCPU::OzoneXC::copyArchRegs(ExecContext *xc) cpu->frontEnd->setPC(thread->PC); cpu->frontEnd->setNextPC(thread->nextPC); - for (int i = 0; i < TheISA::TotalNumRegs; ++i) { - if (i < TheISA::FP_Base_DepTag) { - thread->renameTable[i]->setIntResult(xc->readIntReg(i)); - } else if (i < (TheISA::FP_Base_DepTag + TheISA::NumFloatRegs)) { - int fp_idx = i - TheISA::FP_Base_DepTag; - thread->renameTable[i]->setDoubleResult( - xc->readFloatRegDouble(fp_idx)); - } + // First loop through the integer registers. + for (int i = 0; i < TheISA::NumIntRegs; ++i) { +/* DPRINTF(OzoneCPU, "Copying over register %i, had data %lli, " + "now has data %lli.\n", + i, thread->renameTable[i]->readIntResult(), + xc->readIntReg(i)); +*/ + thread->renameTable[i]->setIntResult(xc->readIntReg(i)); + } + + // Then loop through the floating point registers. + for (int i = 0; i < TheISA::NumFloatRegs; ++i) { + int fp_idx = i + TheISA::FP_Base_DepTag; + thread->renameTable[fp_idx]->setIntResult(xc->readFloatRegInt(i)); } #if !FULL_SYSTEM diff --git a/cpu/simple/cpu.cc b/cpu/simple/cpu.cc index 0a4b3c3e4..eb19115b2 100644 --- a/cpu/simple/cpu.cc +++ b/cpu/simple/cpu.cc @@ -181,7 +181,9 @@ SimpleCPU::switchOut(Sampler *s) _status = SwitchedOut; if (tickEvent.scheduled()) - tickEvent.squash(); + tickEvent.deschedule(); + + assert(!tickEvent.scheduled()); sampler->signalSwitched(); } @@ -294,7 +296,7 @@ SimpleCPU::regStats() void SimpleCPU::resetStats() { - startNumInst = numInst; +// startNumInst = numInst; notIdleFraction = (_status != Idle); } @@ -352,6 +354,7 @@ SimpleCPU::copySrcTranslate(Addr src) Fault fault = cpuXC->translateDataReadReq(memReq); if (fault == NoFault) { + panic("We can't copy!"); cpuXC->copySrcAddr = src; cpuXC->copySrcPhysAddr = memReq->paddr + offset; } else { @@ -600,6 +603,8 @@ SimpleCPU::dbg_vtophys(Addr addr) void SimpleCPU::processCacheCompletion() { + Fault fault; + switch (status()) { case IcacheMissStall: icacheStallCycles += curTick - lastIcacheStall; @@ -618,12 +623,17 @@ SimpleCPU::processCacheCompletion() break; case DcacheMissSwitch: if (memReq->cmd.isRead()) { - curStaticInst->execute(this,traceData); + fault = curStaticInst->execute(this,traceData); if (traceData) traceData->finalize(); + } else { + fault = NoFault; } + assert(fault == NoFault); + assert(!tickEvent.scheduled()); _status = SwitchedOut; sampler->signalSwitched(); + return; case SwitchedOut: // If this CPU has been switched out due to sampling/warm-up, // ignore any further status changes (e.g., due to cache @@ -787,9 +797,10 @@ SimpleCPU::tick() } if (cpuXC->profile) { - bool usermode = - (cpuXC->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0; - cpuXC->profilePC = usermode ? 1 : cpuXC->readPC(); +// bool usermode = +// (cpuXC->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0; +// cpuXC->profilePC = usermode ? 1 : cpuXC->readPC(); + cpuXC->profilePC = cpuXC->readPC(); ProfileNode *node = cpuXC->profile->consume(xcProxy, inst); if (node) cpuXC->profileNode = node; @@ -849,8 +860,10 @@ SimpleCPU::tick() status() == Idle || status() == DcacheMissStall); - if (status() == Running && !tickEvent.scheduled()) + if (status() == Running && !tickEvent.scheduled()) { + assert(_status != SwitchedOut); tickEvent.schedule(curTick + cycles(1)); + } } //////////////////////////////////////////////////////////////////////// @@ -863,6 +876,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleCPU) Param max_insts_all_threads; Param max_loads_any_thread; Param max_loads_all_threads; + Param stats_reset_inst; Param progress_interval; #if FULL_SYSTEM @@ -897,6 +911,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleCPU) "terminate when any thread reaches this load count"), INIT_PARAM(max_loads_all_threads, "terminate when all threads have reached this load count"), + INIT_PARAM(stats_reset_inst, + "instruction to reset stats on"), INIT_PARAM_DFLT(progress_interval, "CPU Progress interval", 0), #if FULL_SYSTEM @@ -930,6 +946,7 @@ CREATE_SIM_OBJECT(SimpleCPU) params->max_insts_all_threads = max_insts_all_threads; params->max_loads_any_thread = max_loads_any_thread; params->max_loads_all_threads = max_loads_all_threads; + params->stats_reset_inst = stats_reset_inst; params->deferRegistration = defer_registration; params->clock = clock; params->functionTrace = function_trace; diff --git a/sim/eventq.hh b/sim/eventq.hh index 5fc73bb53..b9a0abc12 100644 --- a/sim/eventq.hh +++ b/sim/eventq.hh @@ -43,6 +43,7 @@ #include "sim/host.hh" // for Tick #include "base/fast_alloc.hh" +#include "base/misc.hh" #include "base/trace.hh" #include "sim/serialize.hh" @@ -131,7 +132,7 @@ class Event : public Serializable, public FastAlloc /// same cycle (after unscheduling the old CPU's tick event). /// The switch needs to come before any tick events to make /// sure we don't tick both CPUs in the same cycle. - CPU_Switch_Pri = 31, + CPU_Switch_Pri = -31, /// Serailization needs to occur before tick events also, so /// that a serialize/unserialize is identical to an on-line @@ -344,7 +345,8 @@ inline void Event::schedule(Tick t) { assert(!scheduled()); - assert(t >= curTick); +// if (t < curTick) +// warn("t is less than curTick, ensure you don't want cycles"); setFlags(Scheduled); #if TRACING_ON -- 2.30.2