From: Kevin Lim Date: Tue, 16 May 2006 18:06:35 +0000 (-0400) Subject: Updates for sampler, checker, and general correctness. X-Git-Tag: m5_2.0_beta1~36^2~108^2~26 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ef6e2eb3c4dbf337df7380ae93360c13140f11f6;p=gem5.git Updates for sampler, checker, and general correctness. cpu/o3/alpha_cpu.hh: Update for sampler to work properly. Also code cleanup. cpu/o3/alpha_cpu_builder.cc: cpu/o3/alpha_dyn_inst.hh: Updates to support the checker. cpu/o3/alpha_cpu_impl.hh: Updates to support the checker. Also general code cleanup. cpu/o3/alpha_dyn_inst_impl.hh: Code cleanup. cpu/o3/alpha_params.hh: Updates to support the checker. Also supports trap latencies set through the parameters. cpu/o3/commit.hh: Supports sampler, checker. Code cleanup. cpu/o3/commit_impl.hh: Updates to support the sampler and checker, as well as general code cleanup. cpu/o3/cpu.cc: cpu/o3/cpu.hh: Support sampler and checker. cpu/o3/decode_impl.hh: Supports sampler. cpu/o3/fetch.hh: Supports sampler. Also update to hold the youngest valid SN fetch has seen to ensure that the entire pipeline has been drained. cpu/o3/fetch_impl.hh: Sampler updates. Also be sure to not fetches to uncached space (bad path). cpu/o3/iew.hh: cpu/o3/iew_impl.hh: Sampler updates. cpu/o3/lsq_unit_impl.hh: Supports checker. cpu/o3/regfile.hh: No need for accessing xcProxies directly. cpu/o3/rename.hh: cpu/o3/rename_impl.hh: Sampler support. --HG-- extra : convert_revision : 03881885dd50ebbca13ef31f31492fd4ef59121c --- diff --git a/cpu/o3/alpha_cpu.hh b/cpu/o3/alpha_cpu.hh index dfdf092ed..f70793aaa 100644 --- a/cpu/o3/alpha_cpu.hh +++ b/cpu/o3/alpha_cpu.hh @@ -34,6 +34,8 @@ #include "cpu/o3/cpu.hh" #include "sim/byteswap.hh" +class EndQuiesceEvent; + template class AlphaFullCPU : public FullO3CPU { @@ -61,7 +63,7 @@ class AlphaFullCPU : public FullO3CPU Tick lastActivate; Tick lastSuspend; - Event *quiesceEvent; + EndQuiesceEvent *quiesceEvent; virtual BaseCPU *getCpuPtr() { return cpu; } @@ -112,10 +114,8 @@ class AlphaFullCPU : public FullO3CPU virtual void unserialize(Checkpoint *cp, const std::string §ion); #if FULL_SYSTEM - virtual Event *getQuiesceEvent(); + virtual EndQuiesceEvent *getQuiesceEvent(); - // Not necessarily the best location for these... - // Having an extra function just to read these is obnoxious virtual Tick readLastActivate(); virtual Tick readLastSuspend(); @@ -125,17 +125,12 @@ class AlphaFullCPU : public FullO3CPU virtual int getThreadNum() { return thread->tid; } - // Also somewhat obnoxious. Really only used for the TLB fault. - // However, may be quite useful in SPARC. virtual TheISA::MachInst getInst(); virtual void copyArchRegs(ExecContext *xc); virtual void clearArchRegs(); - // - // New accessors for new decoder. - // virtual uint64_t readIntReg(int reg_idx); virtual float readFloatRegSingle(int reg_idx); @@ -172,9 +167,7 @@ class AlphaFullCPU : public FullO3CPU virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val); - // Also not necessarily the best location for these two. - // Hopefully will go away once we decide upon where st cond - // failures goes. + // @todo: Figure out where these store cond failures should go. virtual unsigned readStCondFailures() { return thread->storeCondFailures; } virtual void setStCondFailures(unsigned sc_failures) { thread->storeCondFailures = sc_failures; } @@ -183,27 +176,27 @@ class AlphaFullCPU : public FullO3CPU virtual bool inPalMode() { return TheISA::PcPAL(cpu->readPC(thread->tid)); } #endif - // Only really makes sense for old CPU model. Still could be useful though. + // Only really makes sense for old CPU model. Lots of code + // outside the CPU still checks this function, so it will + // always return false to keep everything working. virtual bool misspeculating() { return false; } #if !FULL_SYSTEM virtual IntReg getSyscallArg(int i); - // used to shift args for indirect syscall virtual void setSyscallArg(int i, IntReg val); virtual void setSyscallReturn(SyscallReturn return_value); virtual void syscall() { return cpu->syscall(thread->tid); } - // Same with st cond failures. virtual Counter readFuncExeInst() { return thread->funcExeInst; } #endif }; - friend class AlphaXC; +// friend class AlphaXC; - std::vector xcProxies; +// std::vector xcProxies; #if FULL_SYSTEM /** ITB pointer. */ @@ -216,13 +209,6 @@ class AlphaFullCPU : public FullO3CPU void regStats(); #if FULL_SYSTEM - //Note that the interrupt stuff from the base CPU might be somewhat - //ISA specific (ie NumInterruptLevels). These functions might not - //be needed in FullCPU though. -// void post_interrupt(int int_num, int index); -// void clear_interrupt(int int_num, int index); -// void clear_interrupts(); - /** Translates instruction requestion. */ Fault translateInstReq(MemReqPtr &req) { @@ -273,11 +259,6 @@ class AlphaFullCPU : public FullO3CPU } #endif - - // Later on may want to remove this misc stuff from the regfile and - // have it handled at this level. This would be similar to moving certain - // IPRs into the devices themselves. Might prove to be an issue when - // trying to rename source/destination registers... MiscReg readMiscReg(int misc_reg, unsigned tid); MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, unsigned tid); @@ -302,18 +283,21 @@ class AlphaFullCPU : public FullO3CPU /** Traps to handle given fault. */ void trap(Fault fault, unsigned tid); - bool simPalCheck(int palFunc); + bool simPalCheck(int palFunc, unsigned tid); /** Processes any interrupts. */ void processInterrupts(); + + /** Halts the CPU. */ + void halt() { panic("Halt not implemented!\n"); } #endif #if !FULL_SYSTEM - // Need to change these into regfile calls that directly set a certain - // register. Actually, these functions should handle most of this - // functionality by themselves; should look up the rename and then - // set the register. + /** Executes a syscall. + * @todo: Determine if this needs to be virtual. + */ + void syscall(int thread_num); /** Gets a syscall argument. */ IntReg getSyscallArg(int i, int tid); @@ -322,25 +306,12 @@ class AlphaFullCPU : public FullO3CPU /** Sets the return value of a syscall. */ void setSyscallReturn(SyscallReturn return_value, int tid); - - /** Executes a syscall. - * @todo: Determine if this needs to be virtual. - */ - virtual void syscall(int thread_num); - -#endif - - public: -#if FULL_SYSTEM - /** Halts the CPU. */ - void halt() { panic("Halt not implemented!\n"); } #endif - /** Old CPU read from memory function. No longer used. */ + /** Read from memory function. */ template Fault read(MemReqPtr &req, T &data) { -// panic("CPU READ NOT IMPLEMENTED W/NEW MEMORY\n"); #if 0 #if FULL_SYSTEM && defined(TARGET_ALPHA) if (req->flags & LOCKED) { @@ -350,10 +321,14 @@ class AlphaFullCPU : public FullO3CPU #endif #endif Fault error; + +#if FULL_SYSTEM + // @todo: Fix this LL/SC hack. if (req->flags & LOCKED) { lockAddr = req->paddr; lockFlag = true; } +#endif error = this->mem->read(req, data); data = gtoh(data); @@ -367,7 +342,7 @@ class AlphaFullCPU : public FullO3CPU return this->iew.ldstQueue.read(req, data, load_idx); } - /** Old CPU write to memory function. No longer used. */ + /** Write to memory function. */ template Fault write(MemReqPtr &req, T &data) { @@ -420,11 +395,13 @@ class AlphaFullCPU : public FullO3CPU #endif #endif +#if FULL_SYSTEM + // @todo: Fix this LL/SC hack. if (req->flags & LOCKED) { if (req->flags & UNCACHEABLE) { req->result = 2; } else { - if (this->lockFlag/* && this->lockAddr == req->paddr*/) { + if (this->lockFlag) { req->result = 1; } else { req->result = 0; @@ -432,6 +409,7 @@ class AlphaFullCPU : public FullO3CPU } } } +#endif return this->mem->write(req, (T)htog(data)); } @@ -444,6 +422,7 @@ class AlphaFullCPU : public FullO3CPU } Addr lockAddr; + bool lockFlag; }; diff --git a/cpu/o3/alpha_cpu_builder.cc b/cpu/o3/alpha_cpu_builder.cc index d676a69c1..0f9116d71 100644 --- a/cpu/o3/alpha_cpu_builder.cc +++ b/cpu/o3/alpha_cpu_builder.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -61,6 +61,8 @@ SimObjectVectorParam workload; SimObjectParam mem; +SimObjectParam checker; + Param max_insts_any_thread; Param max_insts_all_threads; Param max_loads_any_thread; @@ -103,6 +105,8 @@ Param iewToCommitDelay; Param renameToROBDelay; Param commitWidth; Param squashWidth; +Param trapLatency; +Param fetchTrapLatency; Param localPredictorSize; Param localCtrBits; @@ -165,6 +169,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) INIT_PARAM_DFLT(mem, "Memory", NULL), + INIT_PARAM_DFLT(checker, "Checker CPU", NULL), + INIT_PARAM_DFLT(max_insts_any_thread, "Terminate when any thread reaches this inst count", 0), @@ -223,6 +229,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), INIT_PARAM(commitWidth, "Commit width"), INIT_PARAM(squashWidth, "Squash width"), + INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6), + INIT_PARAM_DFLT(fetchTrapLatency, "Number of cycles before the fetch trap is handled", 12), INIT_PARAM(localPredictorSize, "Size of local predictor"), INIT_PARAM(localCtrBits, "Bits per counter"), @@ -301,12 +309,13 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) params->dtb = dtb; #else params->workload = workload; - //@todo: change to pageTable // params->pTable = page_table; #endif // FULL_SYSTEM params->mem = mem; + params->checker = checker; + params->max_insts_any_thread = max_insts_any_thread; params->max_insts_all_threads = max_insts_all_threads; params->max_loads_any_thread = max_loads_any_thread; @@ -351,7 +360,8 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) params->renameToROBDelay = renameToROBDelay; params->commitWidth = commitWidth; params->squashWidth = squashWidth; - + params->trapLatency = trapLatency; + params->fetchTrapLatency = fetchTrapLatency; params->localPredictorSize = localPredictorSize; params->localCtrBits = localCtrBits; diff --git a/cpu/o3/alpha_cpu_impl.hh b/cpu/o3/alpha_cpu_impl.hh index 7a2d5d2b9..856fcb1c8 100644 --- a/cpu/o3/alpha_cpu_impl.hh +++ b/cpu/o3/alpha_cpu_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -30,10 +30,9 @@ #include "base/cprintf.hh" #include "base/statistics.hh" #include "base/timebuf.hh" +#include "cpu/checker/exec_context.hh" #include "cpu/quiesce_event.hh" -#include "mem/cache/cache.hh" // for dynamic cast #include "mem/mem_interface.hh" -#include "sim/builder.hh" #include "sim/sim_events.hh" #include "sim/stats.hh" @@ -63,11 +62,9 @@ AlphaFullCPU::AlphaFullCPU(Params *params) for (int i = 0; i < this->numThreads; ++i) { #if FULL_SYSTEM - assert(i == 0); + assert(this->numThreads == 1); this->thread[i] = new Thread(this, 0, params->mem); -// this->system->execContexts[i] = this->thread[i]->getXCProxy(); this->thread[i]->setStatus(ExecContext::Suspended); - #else if (i < params->workload.size()) { DPRINTF(FullCPU, "FullCPU: Workload[%i]'s starting PC is %#x, " @@ -91,19 +88,27 @@ AlphaFullCPU::AlphaFullCPU(Params *params) this->thread[i]->numInst = 0; - xcProxies.push_back(new AlphaXC); + ExecContext *xc_proxy; - xcProxies[i]->cpu = this; - xcProxies[i]->thread = this->thread[i]; + AlphaXC *alpha_xc_proxy = new AlphaXC; - xcProxies[i]->quiesceEvent = new EndQuiesceEvent(xcProxies[i]); - xcProxies[i]->lastActivate = 0; - xcProxies[i]->lastSuspend = 0; + if (params->checker) { + xc_proxy = new CheckerExecContext(alpha_xc_proxy, this->checker); + } else { + xc_proxy = alpha_xc_proxy; + } + alpha_xc_proxy->cpu = this; + alpha_xc_proxy->thread = this->thread[i]; - this->thread[i]->xcProxy = xcProxies[i]; + alpha_xc_proxy->quiesceEvent = + new EndQuiesceEvent(xc_proxy); + alpha_xc_proxy->lastActivate = 0; + alpha_xc_proxy->lastSuspend = 0; - this->execContexts.push_back(this->thread[i]->getXCProxy()); + this->thread[i]->xcProxy = xc_proxy; + + this->execContexts.push_back(xc_proxy); } @@ -144,6 +149,7 @@ template void AlphaFullCPU::AlphaXC::dumpFuncProfile() { + // Currently not supported } #endif @@ -167,6 +173,18 @@ AlphaFullCPU::AlphaXC::takeOverFrom(ExecContext *old_context) thread->funcExeInst = old_context->readFuncExeInst(); #endif + EndQuiesceEvent *other_quiesce = old_context->getQuiesceEvent(); + if (other_quiesce) { + // Point the quiesce event's XC at this XC so that it wakes up + // the proper CPU. + other_quiesce->xc = this; + } + if (thread->quiesceEvent) { + thread->quiesceEvent->xc = this; + } +// storeCondFailures = 0; + cpu->lockFlag = false; + old_context->setStatus(ExecContext::Unallocated); thread->inSyscall = false; @@ -178,7 +196,7 @@ void AlphaFullCPU::AlphaXC::activate(int delay) { DPRINTF(FullCPU, "Calling activate on AlphaXC\n"); -// warn("Calling activate on AlphaXC"); + if (thread->status() == ExecContext::Active) return; @@ -200,7 +218,7 @@ void AlphaFullCPU::AlphaXC::suspend() { DPRINTF(FullCPU, "Calling suspend on AlphaXC\n"); -// warn("Calling suspend on AlphaXC"); + if (thread->status() == ExecContext::Suspended) return; @@ -224,7 +242,7 @@ void AlphaFullCPU::AlphaXC::deallocate() { DPRINTF(FullCPU, "Calling deallocate on AlphaXC\n"); -// warn("Calling deallocate on AlphaXC"); + if (thread->status() == ExecContext::Unallocated) return; @@ -237,7 +255,7 @@ void AlphaFullCPU::AlphaXC::halt() { DPRINTF(FullCPU, "Calling halt on AlphaXC\n"); -// warn("Calling halt on AlphaXC"); + if (thread->status() == ExecContext::Halted) return; @@ -254,6 +272,7 @@ template void AlphaFullCPU::AlphaXC::serialize(std::ostream &os) {} + template void AlphaFullCPU::AlphaXC::unserialize(Checkpoint *cp, const std::string §ion) @@ -261,7 +280,7 @@ AlphaFullCPU::AlphaXC::unserialize(Checkpoint *cp, const std::string § #if FULL_SYSTEM template -Event * +EndQuiesceEvent * AlphaFullCPU::AlphaXC::getQuiesceEvent() { return quiesceEvent; @@ -345,9 +364,6 @@ void AlphaFullCPU::AlphaXC::clearArchRegs() {} -// -// New accessors for new decoder. -// template uint64_t AlphaFullCPU::AlphaXC::readIntReg(int reg_idx) @@ -503,26 +519,6 @@ AlphaFullCPU::AlphaXC::setSyscallReturn(SyscallReturn return_value) cpu->setSyscallReturn(return_value, thread->tid); } -template -void -AlphaFullCPU::syscall(int tid) -{ - DPRINTF(FullCPU, "AlphaFullCPU: [tid:%i] Executing syscall().\n\n", tid); - - DPRINTF(Activity,"Activity: syscall() called.\n"); - - // Temporarily increase this by one to account for the syscall - // instruction. - ++(this->thread[tid]->funcExeInst); - - // Execute the actual syscall. - this->thread[tid]->syscall(); - - // Decrease funcExeInst by one as the normal commit will handle - // incrementing it. - --(this->thread[tid]->funcExeInst); -} - #endif // FULL_SYSTEM template @@ -544,14 +540,7 @@ template Fault AlphaFullCPU::setMiscReg(int misc_reg, const MiscReg &val, unsigned tid) { - // I think that these registers should always be set, regardless of what - // mode the thread is in. The main difference is if the thread needs to - // squash as a result of the write, which is controlled by the AlphaXC. -// if (!this->thread[tid]->trapPending) { - return this->regFile.setMiscReg(misc_reg, val, tid); -// } else { -// return NoFault; -// } + return this->regFile.setMiscReg(misc_reg, val, tid); } template @@ -559,18 +548,13 @@ Fault AlphaFullCPU::setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid) { -// if (!this->thread[tid]->trapPending) { - return this->regFile.setMiscRegWithEffect(misc_reg, val, tid); -// } else { -// return NoFault; -// } + return this->regFile.setMiscRegWithEffect(misc_reg, val, tid); } template void AlphaFullCPU::squashFromXC(unsigned tid) { -// this->thread[tid]->trapPending = true; this->thread[tid]->inSyscall = true; this->commit.generateXCEvent(tid); } @@ -585,7 +569,8 @@ AlphaFullCPU::post_interrupt(int int_num, int index) if (this->thread[0]->status() == ExecContext::Suspended) { DPRINTF(IPI,"Suspended Processor awoke\n"); - xcProxies[0]->activate(); +// xcProxies[0]->activate(); + this->execContexts[0]->activate(); } } @@ -607,31 +592,24 @@ template Fault AlphaFullCPU::hwrei(unsigned tid) { -#if 0 - if (!inPalMode(this->readPC(tid))) - return new AlphaISA::UnimplementedOpcodeFault; - - setNextPC(cpu->readMiscReg(AlphaISA::IPR_EXC_ADDR, tid), tid); + // Need to clear the lock flag upon returning from an interrupt. + this->lockFlag = false; - cpu->kernelStats->hwrei(); + this->kernelStats->hwrei(); -// if ((this->regFile.miscRegs[tid].readReg(AlphaISA::IPR_EXC_ADDR) & 1) == 0) -// AlphaISA::swap_palshadow(®s, false); + this->checkInterrupts = true; - cpu->checkInterrupts = true; -#endif -// panic("Do not call this function!"); - // Need to clear the lock flag upon returning from an interrupt. - this->lockFlag = false; // FIXME: XXX check for interrupts? XXX return NoFault; } template bool -AlphaFullCPU::simPalCheck(int palFunc) +AlphaFullCPU::simPalCheck(int palFunc, unsigned tid) { -// kernelStats.callpal(palFunc); + if (this->kernelStats) + this->kernelStats->callpal(palFunc, + this->execContexts[tid]); switch (palFunc) { case PAL::halt: @@ -650,47 +628,11 @@ AlphaFullCPU::simPalCheck(int palFunc) return true; } -// Probably shouldn't be able to switch to the trap handler as quickly as -// this. Also needs to get the exception restart address from the commit -// stage. template void AlphaFullCPU::trap(Fault fault, unsigned tid) { - - fault->invoke(this->xcProxies[tid]); -/* // Keep in mind that a trap may be initiated by fetch if there's a TLB - // miss - uint64_t PC = this->commit.readCommitPC(); - - DPRINTF(Fault, "Fault %s\n", fault->name()); - this->recordEvent(csprintf("Fault %s", fault->name())); - - //kernelStats.fault(fault); - - if (fault->isA()) - panic("Arithmetic traps are unimplemented!"); - - // exception restart address - Get the commit PC - if (!fault->isA() || !inPalMode(PC)) - this->regFile.miscRegs.setReg(AlphaISA::IPR_EXC_ADDR, PC); - - if (fault->isA() || fault->isA()) - // || fault == InterruptFault && !PC_PAL(regs.pc) - { - // traps... skip faulting instruction - AlphaISA::MiscReg ipr_exc_addr = - this->regFile.miscRegs.readReg(AlphaISA::IPR_EXC_ADDR); - this->regFile.miscRegs.setReg(AlphaISA::IPR_EXC_ADDR, - ipr_exc_addr + 4); - } - - if (!inPalMode(PC)) - swapPALShadow(true); - - this->regFile.setPC(this->regFile.miscRegs.readReg(AlphaISA::IPR_PAL_BASE) + - (dynamic_cast(fault.get()))->vect(), 0); - this->regFile.setNextPC(PC + sizeof(MachInst), 0);*/ + fault->invoke(this->execContexts[tid]); } template @@ -700,6 +642,8 @@ AlphaFullCPU::processInterrupts() // Check for interrupts here. For now can copy the code that // exists within isa_fullsys_traits.hh. Also assume that thread 0 // is the one that handles the interrupts. + // @todo: Possibly consolidate the interrupt checking code. + // @todo: Allow other threads to handle interrupts. // Check if there are any outstanding interrupts //Handle the interrupts @@ -738,6 +682,10 @@ AlphaFullCPU::processInterrupts() if (ipl && ipl > this->readMiscReg(IPR_IPLR, 0)) { this->setMiscReg(IPR_ISR, summary, 0); this->setMiscReg(IPR_INTID, ipl, 0); + if (this->checker) { + this->checker->cpuXCBase()->setMiscReg(IPR_ISR, summary); + this->checker->cpuXCBase()->setMiscReg(IPR_INTID, ipl); + } this->trap(Fault(new InterruptFault), 0); DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n", this->readMiscReg(IPR_IPLR, 0), ipl, summary); @@ -747,6 +695,27 @@ AlphaFullCPU::processInterrupts() #endif // FULL_SYSTEM #if !FULL_SYSTEM + +template +void +AlphaFullCPU::syscall(int tid) +{ + DPRINTF(FullCPU, "AlphaFullCPU: [tid:%i] Executing syscall().\n\n", tid); + + DPRINTF(Activity,"Activity: syscall() called.\n"); + + // Temporarily increase this by one to account for the syscall + // instruction. + ++(this->thread[tid]->funcExeInst); + + // Execute the actual syscall. + this->thread[tid]->syscall(); + + // Decrease funcExeInst by one as the normal commit will handle + // incrementing it. + --(this->thread[tid]->funcExeInst); +} + template TheISA::IntReg AlphaFullCPU::getSyscallArg(int i, int tid) diff --git a/cpu/o3/alpha_dyn_inst.hh b/cpu/o3/alpha_dyn_inst.hh index 24774bd0a..1c5b738aa 100644 --- a/cpu/o3/alpha_dyn_inst.hh +++ b/cpu/o3/alpha_dyn_inst.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -35,14 +35,11 @@ #include "cpu/o3/alpha_impl.hh" /** - * Mostly implementation & ISA specific AlphaDynInst. As with most other classes - * in the new CPU model, it is templated on the Impl to allow for passing in of - * all types, such as the CPU type and the ISA type. The AlphaDynInst serves - * as the primary interface to the CPU; it plays the role that the ExecContext - * does for the old CPU and the SimpleCPU. The goal is to abstract ExecContext - * purely into an interface, and have it forward calls to the appropriate - * CPU interface, which in the new CPU model's case would be this AlphaDynInst, - * or any other high level implementation specific DynInst. + * Mostly implementation & ISA specific AlphaDynInst. As with most + * other classes in the new CPU model, it is templated on the Impl to + * allow for passing in of all types, such as the CPU type and the ISA + * type. The AlphaDynInst serves as the primary interface to the CPU + * for instructions that are executing. */ template class AlphaDynInst : public BaseDynInst @@ -78,8 +75,10 @@ class AlphaDynInst : public BaseDynInst /** Executes the instruction.*/ Fault execute(); + /** Initiates the access. Only valid for memory operations. */ Fault initiateAcc(); + /** Completes the access. Only valid for memory operations. */ Fault completeAcc(); private: @@ -100,6 +99,7 @@ class AlphaDynInst : public BaseDynInst Fault setMiscReg(int misc_reg, const MiscReg &val) { + this->instResult.integer = val; return this->cpu->setMiscReg(misc_reg, val, this->threadNumber); } @@ -126,8 +126,6 @@ class AlphaDynInst : public BaseDynInst void syscall(); #endif - - private: /** Physical register index of the destination registers of this * instruction. @@ -247,9 +245,9 @@ class AlphaDynInst : public BaseDynInst } public: - /** Calculates EA part of a memory instruction. Currently unused, though - * it may be useful in the future when memory instructions aren't - * executed with the EA calculation and the memory access being atomic. + /** Calculates EA part of a memory instruction. Currently unused, + * though it may be useful in the future if we want to split + * memory operations into EA calculation and memory access parts. */ Fault calcEA() { @@ -257,8 +255,8 @@ class AlphaDynInst : public BaseDynInst } /** Does the memory access part of a memory instruction. Currently unused, - * though it may be useful in the future when memory instructions aren't - * executed with the EA calculation and the memory access being atomic. + * though it may be useful in the future if we want to split + * memory operations into EA calculation and memory access parts. */ Fault memAccess() { diff --git a/cpu/o3/alpha_dyn_inst_impl.hh b/cpu/o3/alpha_dyn_inst_impl.hh index b5999f8d1..541d5ab82 100644 --- a/cpu/o3/alpha_dyn_inst_impl.hh +++ b/cpu/o3/alpha_dyn_inst_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -124,13 +124,9 @@ AlphaDynInst::hwrei() this->setNextPC(this->cpu->readMiscReg(AlphaISA::IPR_EXC_ADDR, this->threadNumber)); - this->cpu->kernelStats->hwrei(); - // Tell CPU to clear any state it needs to if a hwrei is taken. this->cpu->hwrei(this->threadNumber); - this->cpu->checkInterrupts = true; - // FIXME: XXX check for interrupts? XXX return NoFault; } @@ -167,7 +163,7 @@ template bool AlphaDynInst::simPalCheck(int palFunc) { - return this->cpu->simPalCheck(palFunc); + return this->cpu->simPalCheck(palFunc, this->threadNumber); } #else template diff --git a/cpu/o3/alpha_params.hh b/cpu/o3/alpha_params.hh index 04b790815..b8ebae21e 100644 --- a/cpu/o3/alpha_params.hh +++ b/cpu/o3/alpha_params.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -62,6 +62,8 @@ class AlphaSimpleParams : public BaseFullCPU::Params FunctionalMemory *mem; + BaseCPU *checker; + // // Caches // @@ -117,6 +119,8 @@ class AlphaSimpleParams : public BaseFullCPU::Params unsigned renameToROBDelay; unsigned commitWidth; unsigned squashWidth; + Tick trapLatency; + Tick fetchTrapLatency; // // Branch predictor (BP & BTB) diff --git a/cpu/o3/commit.hh b/cpu/o3/commit.hh index 028bd5295..73eccd2b0 100644 --- a/cpu/o3/commit.hh +++ b/cpu/o3/commit.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -40,25 +40,27 @@ template class O3ThreadState; /** - * DefaultCommit handles single threaded and SMT commit. Its width is specified - * by the parameters; each cycle it tries to commit that many instructions. The - * SMT policy decides which thread it tries to commit instructions from. Non- - * speculative instructions must reach the head of the ROB before they are - * ready to execute; once they reach the head, commit will broadcast the - * instruction's sequence number to the previous stages so that they can issue/ - * execute the instruction. Only one non-speculative instruction is handled per - * cycle. Commit is responsible for handling all back-end initiated redirects. - * It receives the redirect, and then broadcasts it to all stages, indicating - * the sequence number they should squash until, and any necessary branch mis- - * prediction information as well. It priortizes redirects by instruction's age, - * only broadcasting a redirect if it corresponds to an instruction that should - * currently be in the ROB. This is done by tracking the sequence number of the - * youngest instruction in the ROB, which gets updated to any squashing - * instruction's sequence number, and only broadcasting a redirect if it - * corresponds to an older instruction. Commit also supports multiple cycle - * squashing, to model a ROB that can only remove a certain number of - * instructions per cycle. Eventually traps and interrupts will most likely - * be handled here as well. + * DefaultCommit handles single threaded and SMT commit. Its width is + * specified by the parameters; each cycle it tries to commit that + * many instructions. The SMT policy decides which thread it tries to + * commit instructions from. Non- speculative instructions must reach + * the head of the ROB before they are ready to execute; once they + * reach the head, commit will broadcast the instruction's sequence + * number to the previous stages so that they can issue/ execute the + * instruction. Only one non-speculative instruction is handled per + * cycle. Commit is responsible for handling all back-end initiated + * redirects. It receives the redirect, and then broadcasts it to all + * stages, indicating the sequence number they should squash until, + * and any necessary branch misprediction information as well. It + * priortizes redirects by instruction's age, only broadcasting a + * redirect if it corresponds to an instruction that should currently + * be in the ROB. This is done by tracking the sequence number of the + * youngest instruction in the ROB, which gets updated to any + * squashing instruction's sequence number, and only broadcasting a + * redirect if it corresponds to an older instruction. Commit also + * supports multiple cycle squashing, to model a ROB that can only + * remove a certain number of instructions per cycle. Eventually traps + * and interrupts will most likely be handled here as well. */ template class DefaultCommit @@ -78,6 +80,7 @@ class DefaultCommit typedef typename CPUPol::IEWStruct IEWStruct; typedef typename CPUPol::RenameStruct RenameStruct; + typedef typename CPUPol::Fetch Fetch; typedef typename CPUPol::IEW IEW; typedef O3ThreadState Thread; @@ -155,11 +158,16 @@ class DefaultCommit /** Sets the pointer to the queue coming from IEW. */ void setIEWQueue(TimeBuffer *iq_ptr); + void setFetchStage(Fetch *fetch_stage); + + Fetch *fetchStage; + /** Sets the poitner to the IEW stage. */ void setIEWStage(IEW *iew_stage); - /** The pointer to the IEW stage. Used solely to ensure that syscalls do - * not execute until all stores have written back. + /** The pointer to the IEW stage. Used solely to ensure that + * various events (traps, interrupts, syscalls) do not occur until + * all stores have written back. */ IEW *iewStage; @@ -177,6 +185,8 @@ class DefaultCommit void switchOut(); + void doSwitchOut(); + void takeOverFrom(); /** Ticks the commit stage, which tries to commit instructions. */ @@ -213,13 +223,12 @@ class DefaultCommit */ bool changedROBEntries(); + void squashAll(unsigned tid); + void squashFromTrap(unsigned tid); void squashFromXC(unsigned tid); - void squashInFlightInsts(unsigned tid); - - private: /** Commits as many instructions as possible. */ void commitInsts(); @@ -246,8 +255,10 @@ class DefaultCommit int oldestReady(); public: - /** Returns the PC of the head instruction of the ROB. */ - uint64_t readPC(); + /** Returns the PC of the head instruction of the ROB. + * @todo: Probably remove this function as it returns only thread 0. + */ + uint64_t readPC() { return PC[0]; } uint64_t readPC(unsigned tid) { return PC[tid]; } @@ -257,9 +268,6 @@ class DefaultCommit void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; } - /** Sets that the ROB is currently squashing. */ - void setSquashing(unsigned tid); - private: /** Time buffer interface. */ TimeBuffer *timeBuffer; @@ -299,10 +307,10 @@ class DefaultCommit std::vector thread; - private: Fault fetchFault; - InstSeqNum fetchFaultSN; + int fetchTrapWait; + /** Records that commit has written to the time buffer this cycle. Used for * the CPU to determine if it can deschedule itself if there is no activity. */ @@ -355,11 +363,13 @@ class DefaultCommit /** Number of Active Threads */ unsigned numThreads; + bool switchPending; bool switchedOut; Tick trapLatency; Tick fetchTrapLatency; + Tick fetchFaultTick; Addr PC[Impl::MaxThreads]; @@ -390,27 +400,26 @@ class DefaultCommit * speculative instruction reaching the head of the ROB. */ Stats::Scalar<> commitNonSpecStalls; - /** Stat for the total number of committed branches. */ -// Stats::Scalar<> commitCommittedBranches; - /** Stat for the total number of committed loads. */ -// Stats::Scalar<> commitCommittedLoads; - /** Stat for the total number of committed memory references. */ -// Stats::Scalar<> commitCommittedMemRefs; /** Stat for the total number of branch mispredicts that caused a squash. */ Stats::Scalar<> branchMispredicts; /** Distribution of the number of committed instructions each cycle. */ Stats::Distribution<> numCommittedDist; - // total number of instructions committed - Stats::Vector<> stat_com_inst; - Stats::Vector<> stat_com_swp; - Stats::Vector<> stat_com_refs; - Stats::Vector<> stat_com_loads; - Stats::Vector<> stat_com_membars; - Stats::Vector<> stat_com_branches; - - Stats::Scalar<> commit_eligible_samples; - Stats::Vector<> commit_eligible; + /** Total number of instructions committed. */ + Stats::Vector<> statComInst; + /** Total number of software prefetches committed. */ + Stats::Vector<> statComSwp; + /** Stat for the total number of committed memory references. */ + Stats::Vector<> statComRefs; + /** Stat for the total number of committed loads. */ + Stats::Vector<> statComLoads; + /** Total number of committed memory barriers. */ + Stats::Vector<> statComMembars; + /** Total number of committed branches. */ + Stats::Vector<> statComBranches; + + Stats::Scalar<> commitEligibleSamples; + Stats::Vector<> commitEligible; }; #endif // __CPU_O3_COMMIT_HH__ diff --git a/cpu/o3/commit_impl.hh b/cpu/o3/commit_impl.hh index 034565f90..170f5b01f 100644 --- a/cpu/o3/commit_impl.hh +++ b/cpu/o3/commit_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,6 +36,7 @@ #include "base/loader/symtab.hh" #include "base/timebuf.hh" +#include "cpu/checker/cpu.hh" #include "cpu/exetrace.hh" #include "cpu/o3/commit.hh" #include "cpu/o3/thread_state.hh" @@ -54,7 +55,8 @@ template void DefaultCommit::TrapEvent::process() { - // This will get reset if it was switched out. + // This will get reset by commit if it was switched out at the + // time of this event processing. commit->trapSquash[tid] = true; } @@ -77,7 +79,9 @@ DefaultCommit::DefaultCommit(Params *params) iewWidth(params->executeWidth), commitWidth(params->commitWidth), numThreads(params->numberOfThreads), - switchedOut(false) + switchedOut(false), + trapLatency(params->trapLatency), + fetchTrapLatency(params->fetchTrapLatency) { _status = Active; _nextStatus = Inactive; @@ -117,9 +121,6 @@ DefaultCommit::DefaultCommit(Params *params) xcSquash[i] = false; } - // Hardcoded trap latency. - trapLatency = 6; - fetchTrapLatency = 12; fetchFaultTick = 0; fetchTrapWait = 0; } @@ -153,20 +154,6 @@ DefaultCommit::regStats() .desc("The number of times commit has been forced to stall to " "communicate backwards") .prereq(commitNonSpecStalls); -/* - commitCommittedBranches - .name(name() + ".commitCommittedBranches") - .desc("The number of committed branches") - .prereq(commitCommittedBranches); - commitCommittedLoads - .name(name() + ".commitCommittedLoads") - .desc("The number of committed loads") - .prereq(commitCommittedLoads); - commitCommittedMemRefs - .name(name() + ".commitCommittedMemRefs") - .desc("The number of committed memory references") - .prereq(commitCommittedMemRefs); -*/ branchMispredicts .name(name() + ".branchMispredicts") .desc("The number of times a branch was mispredicted") @@ -178,42 +165,42 @@ DefaultCommit::regStats() .flags(Stats::pdf) ; - stat_com_inst + statComInst .init(cpu->number_of_threads) .name(name() + ".COM:count") .desc("Number of instructions committed") .flags(total) ; - stat_com_swp + statComSwp .init(cpu->number_of_threads) .name(name() + ".COM:swp_count") .desc("Number of s/w prefetches committed") .flags(total) ; - stat_com_refs + statComRefs .init(cpu->number_of_threads) .name(name() + ".COM:refs") .desc("Number of memory references committed") .flags(total) ; - stat_com_loads + statComLoads .init(cpu->number_of_threads) .name(name() + ".COM:loads") .desc("Number of loads committed") .flags(total) ; - stat_com_membars + statComMembars .init(cpu->number_of_threads) .name(name() + ".COM:membars") .desc("Number of memory barriers committed") .flags(total) ; - stat_com_branches + statComBranches .init(cpu->number_of_threads) .name(name() + ".COM:branches") .desc("Number of branches committed") @@ -233,14 +220,14 @@ DefaultCommit::regStats() // -> The standard deviation is computed only over cycles where // we reached the BW limit // - commit_eligible + commitEligible .init(cpu->number_of_threads) .name(name() + ".COM:bw_limited") .desc("number of insts not committed due to BW limits") .flags(total) ; - commit_eligible_samples + commitEligibleSamples .name(name() + ".COM:bw_lim_events") .desc("number cycles where commit BW limit reached") ; @@ -257,8 +244,8 @@ DefaultCommit::setCPU(FullCPU *cpu_ptr) // the simulation, so it starts as active. cpu->activateStage(FullCPU::CommitIdx); - trapLatency = cpu->cycles(6); - fetchTrapLatency = cpu->cycles(12); + trapLatency = cpu->cycles(trapLatency); + fetchTrapLatency = cpu->cycles(fetchTrapLatency); } template @@ -315,6 +302,13 @@ DefaultCommit::setIEWQueue(TimeBuffer *iq_ptr) fromIEW = iewQueue->getWire(-iewToCommitDelay); } +template +void +DefaultCommit::setFetchStage(Fetch *fetch_stage) +{ + fetchStage = fetch_stage; +} + template void DefaultCommit::setIEWStage(IEW *iew_stage) @@ -369,6 +363,15 @@ template void DefaultCommit::switchOut() { + switchPending = true; +} + +template +void +DefaultCommit::doSwitchOut() +{ + switchedOut = true; + switchPending = false; rob->switchOut(); } @@ -376,6 +379,7 @@ template void DefaultCommit::takeOverFrom() { + switchedOut = false; _status = Active; _nextStatus = Inactive; for (int i=0; i < numThreads; i++) { @@ -392,9 +396,17 @@ template void DefaultCommit::updateStatus() { - if (commitStatus[0] == TrapPending || - commitStatus[0] == FetchTrapPending) { - _nextStatus = Active; + // reset ROB changed variable + list::iterator threads = (*activeThreads).begin(); + while (threads != (*activeThreads).end()) { + unsigned tid = *threads++; + changedROBNumEntries[tid] = false; + + // Also check if any of the threads has a trap pending + if (commitStatus[tid] == TrapPending || + commitStatus[tid] == FetchTrapPending) { + _nextStatus = Active; + } } if (_nextStatus == Inactive && _status == Active) { @@ -406,13 +418,6 @@ DefaultCommit::updateStatus() } _status = _nextStatus; - - // reset ROB changed variable - list::iterator threads = (*activeThreads).begin(); - while (threads != (*activeThreads).end()) { - unsigned tid = *threads++; - changedROBNumEntries[tid] = false; - } } template @@ -488,14 +493,14 @@ DefaultCommit::generateXCEvent(unsigned tid) template void -DefaultCommit::squashFromTrap(unsigned tid) +DefaultCommit::squashAll(unsigned tid) { // If we want to include the squashing instruction in the squash, // then use one older sequence number. // Hopefully this doesn't mess things up. Basically I want to squash // all instructions of this thread. InstSeqNum squashed_inst = rob->isEmpty() ? - 0 : rob->readHeadInst(tid)->seqNum - 1; + 0 : rob->readHeadInst(tid)->seqNum - 1;; // All younger instructions will be squashed. Set the sequence // number as the youngest instruction in the ROB (0 in this case. @@ -518,21 +523,22 @@ DefaultCommit::squashFromTrap(unsigned tid) toIEW->commitInfo[tid].branchMispredict = false; -// toIEW->commitInfo[tid].branchTaken = fromIEW->branchTaken[tid]; - toIEW->commitInfo[tid].nextPC = PC[tid]; +} + +template +void +DefaultCommit::squashFromTrap(unsigned tid) +{ + squashAll(tid); DPRINTF(Commit, "Squashing from trap, restarting at PC %#x\n", PC[tid]); - // Hopefully nobody tries to use the mispredPC becuase I said there - // wasn't a branch mispredict. -// toIEW->commitInfo[tid].mispredPC = fromIEW->mispredPC[tid]; thread[tid]->trapPending = false; thread[tid]->inSyscall = false; trapSquash[tid] = false; - // Not sure what to set this to... commitStatus[tid] = ROBSquashing; cpu->activityThisCycle(); @@ -543,49 +549,13 @@ template void DefaultCommit::squashFromXC(unsigned tid) { - // For now these are identical. In the future, the squash from trap - // might execute the trap prior to the squash. - - // If we want to include the squashing instruction in the squash, - // then use one older sequence number. - // Hopefully this doesn't mess things up. Basically I want to squash - // all instructions of this thread. - InstSeqNum squashed_inst = rob->isEmpty() ? - 0 : rob->readHeadInst(tid)->seqNum - 1;; - - // All younger instructions will be squashed. Set the sequence - // number as the youngest instruction in the ROB (0 in this case. - // Hopefully nothing breaks.) - youngestSeqNum[tid] = 0; - - rob->squash(squashed_inst, tid); - changedROBNumEntries[tid] = true; - - // Send back the sequence number of the squashed instruction. - toIEW->commitInfo[tid].doneSeqNum = squashed_inst; - - // Send back the squash signal to tell stages that they should - // squash. - toIEW->commitInfo[tid].squash = true; - - // Send back the rob squashing signal so other stages know that - // the ROB is in the process of squashing. - toIEW->commitInfo[tid].robSquashing = true; - - toIEW->commitInfo[tid].branchMispredict = false; - -// toIEW->commitInfo[tid].branchTaken = fromIEW->branchTaken[tid]; - - toIEW->commitInfo[tid].nextPC = PC[tid]; + squashAll(tid); DPRINTF(Commit, "Squashing from XC, restarting at PC %#x\n", PC[tid]); - // Hopefully nobody tries to use the mispredPC becuase I said there - // wasn't a branch mispredict. -// toIEW->commitInfo[tid].mispredPC = fromIEW->mispredPC[tid]; thread[tid]->inSyscall = false; assert(!thread[tid]->trapPending); - // Not sure what to set this to... + commitStatus[tid] = ROBSquashing; cpu->activityThisCycle(); @@ -594,22 +564,6 @@ DefaultCommit::squashFromXC(unsigned tid) ++squashCounter; } -template -void -DefaultCommit::squashInFlightInsts(unsigned tid) -{ - // @todo: Fix this hardcoded number. - for (int i = 0; i < -5; ++i) { - for (int j = 0; j < (*iewQueue)[i].size; ++j) { - DynInstPtr inst = (*iewQueue)[i].insts[j]; - if (inst->threadNumber == tid && - !inst->isSquashed()) { - inst->setSquashed(); - } - } - } -} - template void DefaultCommit::tick() @@ -617,13 +571,15 @@ DefaultCommit::tick() wroteToTimeBuffer = false; _nextStatus = Inactive; - // If the ROB is currently in its squash sequence, then continue - // to squash. In this case, commit does not do anything. Otherwise - // run commit. + if (switchPending && rob->isEmpty() && !iewStage->hasStoresToWB()) { + cpu->signalSwitched(); + return; + } + list::iterator threads = (*activeThreads).begin(); - // Maybe this should be dependent upon any of the commits actually - // squashing. + // Check if any of the threads are done squashing. Change the + // status if they are done. while (threads != (*activeThreads).end()) { unsigned tid = *threads++; @@ -673,7 +629,7 @@ DefaultCommit::tick() if (wroteToTimeBuffer) { - DPRINTF(Activity,"Activity This Cycle.\n"); + DPRINTF(Activity, "Activity This Cycle.\n"); cpu->activityThisCycle(); } @@ -689,28 +645,23 @@ DefaultCommit::commit() // Check for interrupts ////////////////////////////////////// - // Process interrupts if interrupts are enabled and not in PAL mode. - // Take the PC from commit and write it to the IPR, then squash. The - // interrupt completing will take care of restoring the PC from that value - // in the IPR. Look at IPR[EXC_ADDR]; - // hwrei() is what resets the PC to the place where instruction execution - // beings again. #if FULL_SYSTEM -//#if 0 + // Process interrupts if interrupts are enabled, not in PAL mode, + // and no other traps or external squashes are currently pending. + // @todo: Allow other threads to handle interrupts. if (cpu->checkInterrupts && cpu->check_interrupts() && !cpu->inPalMode(readPC()) && !trapSquash[0] && !xcSquash[0]) { -// commitStatus[0] = TrapPending; + // Tell fetch that there is an interrupt pending. This will + // make fetch wait until it sees a non PAL-mode PC, at which + // point it stops fetching instructions. toIEW->commitInfo[0].interruptPending = true; - if (rob->isEmpty() && !iewStage->hasStoresToWB()) { - // Will need to squash all instructions currently in flight and have - // the interrupt handler restart at the last non-committed inst. - // Most of that can be handled through the trap() function. The - // processInterrupts() function really just checks for interrupts - // and then calls trap() if there is an interrupt present. + // Wait until the ROB is empty and all stores have drained in + // order to enter the interrupt. + if (rob->isEmpty() && !iewStage->hasStoresToWB()) { // Not sure which thread should be the one to interrupt. For now // always do thread 0. assert(!thread[0]->inSyscall); @@ -738,26 +689,27 @@ DefaultCommit::commit() #endif // FULL_SYSTEM //////////////////////////////////// - // Check for squash signal, handle that first + // Check for any possible squashes, handle them first //////////////////////////////////// - // Check if the IEW stage is telling the ROB to squash. list::iterator threads = (*activeThreads).begin(); while (threads != (*activeThreads).end()) { unsigned tid = *threads++; if (fromFetch->fetchFault && commitStatus[0] != TrapPending) { - // Record the fault. Wait until it's empty in the ROB. Then handle the trap. - // Ignore it if there's already a trap pending as fetch will be redirected. + // Record the fault. Wait until it's empty in the ROB. + // Then handle the trap. Ignore it if there's already a + // trap pending as fetch will be redirected. fetchFault = fromFetch->fetchFault; - fetchFaultSN = fromFetch->fetchFaultSN; fetchFaultTick = curTick + fetchTrapLatency; commitStatus[0] = FetchTrapPending; DPRINTF(Commit, "Fault from fetch recorded. Will trap if the " "ROB empties without squashing the fault.\n"); fetchTrapWait = 0; } + + // Fetch may tell commit to clear the trap if it's been squashed. if (fromFetch->clearFetchFault) { DPRINTF(Commit, "Received clear fetch fault signal\n"); fetchTrapWait = 0; @@ -783,10 +735,6 @@ DefaultCommit::commit() commitStatus[tid] != TrapPending && fromIEW->squashedSeqNum[tid] <= youngestSeqNum[tid]) { - DPRINTF(Commit, "[tid:%u]: Squashing instructions in the " - "ROB.\n", - tid); - DPRINTF(Commit, "[tid:%i]: Squashing due to PC %#x [sn:%i]\n", tid, fromIEW->mispredPC[tid], @@ -814,11 +762,8 @@ DefaultCommit::commit() rob->squash(squashed_inst, tid); changedROBNumEntries[tid] = true; - // Send back the sequence number of the squashed instruction. toIEW->commitInfo[tid].doneSeqNum = squashed_inst; - // Send back the squash signal to tell stages that they should - // squash. toIEW->commitInfo[tid].squash = true; // Send back the rob squashing signal so other stages know that @@ -833,11 +778,7 @@ DefaultCommit::commit() toIEW->commitInfo[tid].nextPC = fromIEW->nextPC[tid]; - DPRINTF(Commit, "Squashing from IEW, restarting at PC %#x\n", - fromIEW->nextPC[tid]); - - toIEW->commitInfo[tid].mispredPC = - fromIEW->mispredPC[tid]; + toIEW->commitInfo[tid].mispredPC = fromIEW->mispredPC[tid]; if (toIEW->commitInfo[tid].branchMispredict) { ++branchMispredicts; @@ -882,10 +823,11 @@ DefaultCommit::commitInsts() { //////////////////////////////////// // Handle commit - // Note that commit will be handled prior to the ROB so that the ROB - // only tries to commit instructions it has in this current cycle, and - // not instructions it is writing in during this cycle. - // Can't commit and squash things at the same time... + // Note that commit will be handled prior to putting new + // instructions in the ROB so that the ROB only tries to commit + // instructions it has in this current cycle, and not instructions + // it is writing in during this cycle. Can't commit and squash + // things at the same time... //////////////////////////////////// DPRINTF(Commit, "Trying to commit instructions in the ROB.\n"); @@ -894,51 +836,58 @@ DefaultCommit::commitInsts() DynInstPtr head_inst; #if FULL_SYSTEM - if (commitStatus[0] == FetchTrapPending) { + // Not the best way to check if the front end is empty, but it should + // work. + // @todo: Try to avoid directly accessing fetch. + if (commitStatus[0] == FetchTrapPending && rob->isEmpty()) { DPRINTF(Commit, "Fault from fetch is pending.\n"); - if (rob->isEmpty()) { - fetchTrapWait++; - if (fetchTrapWait > 10000000) { - panic("Fetch trap has been pending for a long time!"); - } - if (fetchFaultTick > curTick) { - DPRINTF(Commit, "Not enough cycles since fault, fault will " - "happen on %lli\n", - fetchFaultTick); - cpu->activityThisCycle(); - return; - } else if (iewStage->hasStoresToWB()) { - DPRINTF(Commit, "IEW still has stores to WB. Waiting until " - "they are completed. fetchTrapWait:%i\n", - fetchTrapWait); - cpu->activityThisCycle(); - return; - } else if (cpu->inPalMode(readPC())) { - DPRINTF(Commit, "In pal mode right now. fetchTrapWait:%i\n", - fetchTrapWait); - return; - } - fetchTrapWait = 0; - DPRINTF(Commit, "ROB is empty, handling fetch trap.\n"); - assert(!thread[0]->inSyscall); + fetchTrapWait++; + if (fetchTrapWait > 10000000) { + panic("Fetch trap has been pending for a long time!"); + } + if (fetchFaultTick > curTick) { + DPRINTF(Commit, "Not enough cycles since fault, fault will " + "happen on %lli\n", + fetchFaultTick); + cpu->activityThisCycle(); + return; + } else if (iewStage->hasStoresToWB()) { + DPRINTF(Commit, "IEW still has stores to WB. Waiting until " + "they are completed. fetchTrapWait:%i\n", + fetchTrapWait); + cpu->activityThisCycle(); + return; + } else if (cpu->inPalMode(readPC())) { + DPRINTF(Commit, "In pal mode right now. fetchTrapWait:%i\n", + fetchTrapWait); + return; + } else if (fetchStage->getYoungestSN() > youngestSeqNum[0]) { + DPRINTF(Commit, "Waiting for front end to drain. fetchTrapWait:%i\n", + fetchTrapWait); + return; + } + fetchTrapWait = 0; + DPRINTF(Commit, "ROB is empty, handling fetch trap.\n"); - thread[0]->inSyscall = true; + assert(!thread[0]->inSyscall); - // Consider holding onto the trap and waiting until the trap event - // happens for this to be executed. - cpu->trap(fetchFault, 0); + thread[0]->inSyscall = true; - // Exit state update mode to avoid accidental updating. - thread[0]->inSyscall = false; + // Consider holding onto the trap and waiting until the trap event + // happens for this to be executed. + cpu->trap(fetchFault, 0); - commitStatus[0] = TrapPending; - // Set it up so that we squash next cycle - trapSquash[0] = true; - return; - } + // Exit state update mode to avoid accidental updating. + thread[0]->inSyscall = false; + + commitStatus[0] = TrapPending; + // Set it up so that we squash next cycle + trapSquash[0] = true; + return; } #endif + // Commit as many instructions as possible until the commit bandwidth // limit is reached, or it becomes impossible to commit any more. while (num_committed < commitWidth) { @@ -956,16 +905,13 @@ DefaultCommit::commitInsts() DPRINTF(Commit, "Trying to commit head instruction, [sn:%i] [tid:%i]\n", head_inst->seqNum, tid); - // If the head instruction is squashed, it is ready to retire at any - // time. However, we need to avoid updating any other state - // incorrectly if it's already been squashed. + // If the head instruction is squashed, it is ready to retire + // (be removed from the ROB) at any time. if (head_inst->isSquashed()) { DPRINTF(Commit, "Retiring squashed instruction from " "ROB.\n"); - // Tell ROB to retire head instruction. This retires the head - // inst in the ROB without affecting any other stages. rob->retireHead(commit_thread); ++commitSquashedInsts; @@ -989,7 +935,6 @@ DefaultCommit::commitInsts() if (commit_success) { ++num_committed; - // Record that the number of ROB entries has changed. changedROBNumEntries[tid] = true; // Set the doneSeqNum to the youngest committed instruction. @@ -1009,8 +954,11 @@ DefaultCommit::commitInsts() int count = 0; Addr oldpc; do { + // Debug statement. Checks to make sure we're not + // currently updating state while handling PC events. if (count == 0) - assert(!thread[tid]->inSyscall && !thread[tid]->trapPending); + assert(!thread[tid]->inSyscall && + !thread[tid]->trapPending); oldpc = PC[tid]; cpu->system->pcEventQueue.service( thread[tid]->getXCProxy()); @@ -1034,7 +982,7 @@ DefaultCommit::commitInsts() numCommittedDist.sample(num_committed); if (num_committed == commitWidth) { - commit_eligible[0]++; + commitEligible[0]++; } } @@ -1042,13 +990,12 @@ template bool DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) { - // Make sure instruction is valid assert(head_inst); int tid = head_inst->threadNumber; - // If the instruction is not executed yet, then it is a non-speculative - // or store inst. Signal backwards that it should be executed. + // If the instruction is not executed yet, then it will need extra + // handling. Signal backwards that it should be executed. if (!head_inst->isExecuted()) { // Keep this number correct. We have not yet actually executed // and committed this instruction. @@ -1059,10 +1006,16 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) if (head_inst->isNonSpeculative() || head_inst->isMemBarrier() || head_inst->isWriteBarrier()) { + + DPRINTF(Commit, "Encountered a barrier or non-speculative " + "instruction [sn:%lli] at the head of the ROB, PC %#x.\n", + head_inst->seqNum, head_inst->readPC()); + #if !FULL_SYSTEM - // Hack to make sure syscalls aren't executed until all stores - // write back their data. This direct communication shouldn't - // be used for anything other than this. + // Hack to make sure syscalls/memory barriers/quiesces + // aren't executed until all stores write back their data. + // This direct communication shouldn't be used for + // anything other than this. if (inst_num > 0 || iewStage->hasStoresToWB()) #else if ((head_inst->isMemBarrier() || head_inst->isWriteBarrier() || @@ -1074,11 +1027,6 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) return false; } - DPRINTF(Commit, "Encountered a barrier or non-speculative " - "instruction [sn:%lli] at the head of the ROB, PC %#x.\n", - head_inst->seqNum, head_inst->readPC()); - - // Send back the non-speculative instruction's sequence number. toIEW->commitInfo[tid].nonSpecSeqNum = head_inst->seqNum; // Change the instruction so it won't try to commit again until @@ -1093,7 +1041,7 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) head_inst->seqNum, head_inst->readPC()); // Send back the non-speculative instruction's sequence - // number. Maybe just tell the lsq to re-execute the load. + // number. Tell the lsq to re-execute the load. toIEW->commitInfo[tid].nonSpecSeqNum = head_inst->seqNum; toIEW->commitInfo[tid].uncached = true; toIEW->commitInfo[tid].uncachedLoad = head_inst; @@ -1107,76 +1055,77 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) } } - // Now check if it's one of the special trap or barrier or - // serializing instructions. - if (head_inst->isThreadSync())/* || -// head_inst->isMemBarrier() || -head_inst->isWriteBarrier())*/ - { + if (head_inst->isThreadSync()) { // Not handled for now. - panic("Barrier instructions are not handled yet.\n"); + panic("Thread sync instructions are not handled yet.\n"); } + // Stores mark themselves as completed. if (!head_inst->isStore()) { head_inst->setCompleted(); } + // Use checker prior to updating anything due to traps or PC + // based events. + if (cpu->checker) { + cpu->checker->tick(head_inst); + } + // Check if the instruction caused a fault. If so, trap. Fault inst_fault = head_inst->getFault(); if (inst_fault != NoFault) { - if (!head_inst->isNop()) { + head_inst->setCompleted(); #if FULL_SYSTEM - DPRINTF(Commit, "Inst [sn:%lli] PC %#x has a fault\n", - head_inst->seqNum, head_inst->readPC()); + DPRINTF(Commit, "Inst [sn:%lli] PC %#x has a fault\n", + head_inst->seqNum, head_inst->readPC()); - if (iewStage->hasStoresToWB()) { - DPRINTF(Commit, "Stores outstanding, fault must wait.\n"); - return false; - } + if (iewStage->hasStoresToWB() || inst_num > 0) { + DPRINTF(Commit, "Stores outstanding, fault must wait.\n"); + return false; + } - assert(!thread[tid]->inSyscall); + if (cpu->checker && head_inst->isStore()) { + cpu->checker->tick(head_inst); + } - thread[tid]->inSyscall = true; + assert(!thread[tid]->inSyscall); - // Hack for now; DTB will sometimes need the machine instruction - // for when faults happen. So we will set it here, prior to the - // DTB possibly needing it for this translation. - thread[tid]->setInst( - static_cast(head_inst->staticInst->machInst)); + // Mark that we're in state update mode so that the trap's + // execution doesn't generate extra squashes. + thread[tid]->inSyscall = true; - // Consider holding onto the trap and waiting until the trap event - // happens for this to be executed. - cpu->trap(inst_fault, tid); + // DTB will sometimes need the machine instruction for when + // faults happen. So we will set it here, prior to the DTB + // possibly needing it for its fault. + thread[tid]->setInst( + static_cast(head_inst->staticInst->machInst)); - // Exit state update mode to avoid accidental updating. - thread[tid]->inSyscall = false; + // Execute the trap. Although it's slightly unrealistic in + // terms of timing (as it doesn't wait for the full timing of + // the trap event to complete before updating state), it's + // needed to update the state as soon as possible. This + // prevents external agents from changing any specific state + // that the trap need. + cpu->trap(inst_fault, tid); - commitStatus[tid] = TrapPending; + // Exit state update mode to avoid accidental updating. + thread[tid]->inSyscall = false; - // Generate trap squash event. - generateTrapEvent(tid); + commitStatus[tid] = TrapPending; - return false; -#else // !FULL_SYSTEM - panic("fault (%d) detected @ PC %08p", inst_fault, - head_inst->PC); -#endif // FULL_SYSTEM - } - } + // Generate trap squash event. + generateTrapEvent(tid); - // Check if we're really ready to commit. If not then return false. - // I'm pretty sure all instructions should be able to commit if they've - // reached this far. For now leave this in as a check. - if (!rob->isHeadReady(tid)) { - panic("Unable to commit head instruction!\n"); return false; +#else // !FULL_SYSTEM + panic("fault (%d) detected @ PC %08p", inst_fault, + head_inst->PC); +#endif // FULL_SYSTEM } updateComInstStats(head_inst); - // Now that the instruction is going to be committed, finalize its - // trace data. if (head_inst->traceData) { head_inst->traceData->setFetchSeq(head_inst->seqNum); head_inst->traceData->setCPSeq(thread[tid]->numInst); @@ -1201,13 +1150,7 @@ template void DefaultCommit::getInsts() { - ////////////////////////////////////// - // Handle ROB functions - ////////////////////////////////////// - - // Read any renamed instructions and place them into the ROB. Do this - // prior to squashing to avoid having instructions in the ROB that - // don't get squashed properly. + // Read any renamed instructions and place them into the ROB. int insts_to_process = min((int)renameWidth, fromRename->size); for (int inst_num = 0; inst_num < insts_to_process; ++inst_num) @@ -1246,7 +1189,8 @@ DefaultCommit::markCompletedInsts() ++inst_num) { if (!fromIEW->insts[inst_num]->isSquashed()) { - DPRINTF(Commit, "[tid:%i]: Marking PC %#x, SN %i ready within ROB.\n", + DPRINTF(Commit, "[tid:%i]: Marking PC %#x, [sn:%lli] ready " + "within ROB.\n", fromIEW->insts[inst_num]->threadNumber, fromIEW->insts[inst_num]->readPC(), fromIEW->insts[inst_num]->seqNum); @@ -1257,30 +1201,6 @@ DefaultCommit::markCompletedInsts() } } -template -uint64_t -DefaultCommit::readPC() -{ - // @todo: Fix this single thread hack. - return PC[0]; -} - -template -void -DefaultCommit::setSquashing(unsigned tid) -{ - if (_status == Inactive) { - DPRINTF(Activity, "Activating stage.\n"); - _status = Active; - cpu->activateStage(FullCPU::CommitIdx); - } - - if (commitStatus[tid] != ROBSquashing) { - commitStatus[tid] = ROBSquashing; - ++squashCounter; - } -} - template bool DefaultCommit::robDoneSquashing() @@ -1308,39 +1228,39 @@ DefaultCommit::updateComInstStats(DynInstPtr &inst) // #ifdef TARGET_ALPHA if (inst->isDataPrefetch()) { - stat_com_swp[thread]++; + statComSwp[thread]++; } else { - stat_com_inst[thread]++; + statComInst[thread]++; } #else - stat_com_inst[thread]++; + statComInst[thread]++; #endif // // Control Instructions // if (inst->isControl()) - stat_com_branches[thread]++; + statComBranches[thread]++; // // Memory references // if (inst->isMemRef()) { - stat_com_refs[thread]++; + statComRefs[thread]++; if (inst->isLoad()) { - stat_com_loads[thread]++; + statComLoads[thread]++; } } if (inst->isMemBarrier()) { - stat_com_membars[thread]++; + statComMembars[thread]++; } } //////////////////////////////////////// // // -// SMT COMMIT POLICY MAITAINED HERE // +// SMT COMMIT POLICY MAINTAINED HERE // // // //////////////////////////////////////// template diff --git a/cpu/o3/cpu.cc b/cpu/o3/cpu.cc index 59308d6a9..9a46f2e7c 100644 --- a/cpu/o3/cpu.cc +++ b/cpu/o3/cpu.cc @@ -35,6 +35,7 @@ #endif #include "sim/root.hh" +#include "cpu/checker/cpu.hh" #include "cpu/cpu_exec_context.hh" #include "cpu/exec_context.hh" #include "cpu/o3/alpha_dyn_inst.hh" @@ -76,7 +77,6 @@ FullO3CPU::TickEvent::description() return "FullO3CPU tick event"; } -//Call constructor to all the pipeline stages here template FullO3CPU::FullO3CPU(Params *params) : BaseFullCPU(params), @@ -126,13 +126,25 @@ FullO3CPU::FullO3CPU(Params *params) // pTable(params->pTable), mem(params->workload[0]->getMemory()), #endif // FULL_SYSTEM - + switchCount(0), icacheInterface(params->icacheInterface), dcacheInterface(params->dcacheInterface), - deferRegistration(params->deferRegistration) + deferRegistration(params->deferRegistration), + numThreads(number_of_threads) { _status = Idle; + if (params->checker) { + BaseCPU *temp_checker = params->checker; + checker = dynamic_cast *>(temp_checker); + checker->setMemory(mem); +#if FULL_SYSTEM + checker->setSystem(params->system); +#endif + } else { + checker = NULL; + } + #if !FULL_SYSTEM thread.resize(number_of_threads); tids.resize(number_of_threads); @@ -168,20 +180,18 @@ FullO3CPU::FullO3CPU(Params *params) commit.setIEWQueue(&iewQueue); commit.setRenameQueue(&renameQueue); + commit.setFetchStage(&fetch); commit.setIEWStage(&iew); rename.setIEWStage(&iew); rename.setCommitStage(&commit); - //Make Sure That this a Valid Architeture - //@todo: move this up in constructor - numThreads = number_of_threads; - #if !FULL_SYSTEM int active_threads = params->workload.size(); #else int active_threads = 1; #endif + //Make Sure That this a Valid Architeture assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs); assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs); @@ -357,7 +367,7 @@ FullO3CPU::tick() cleanUpRemovedInsts(); } - if (activityCount && !tickEvent.scheduled()) { + if (_status != SwitchedOut && activityCount && !tickEvent.scheduled()) { tickEvent.schedule(curTick + cycles(1)); } @@ -380,13 +390,7 @@ FullO3CPU::init() for (int i = 0; i < number_of_threads; ++i) thread[i]->inSyscall = true; - - // Need to do a copy of the xc->regs into the CPU's regfile so - // that it can start properly. - for (int tid=0; tid < number_of_threads; tid++) { - // Need to do a copy of the xc->regs into the CPU's regfile so - // that it can start properly. #if FULL_SYSTEM ExecContext *src_xc = execContexts[tid]; #else @@ -406,8 +410,7 @@ FullO3CPU::init() for (int i = 0; i < number_of_threads; ++i) thread[i]->inSyscall = false; - // Probably should just make a call to all the stages to init stage, - // regardless of whether or not they need it. Keeps it more independent. + // Initialize stages. fetch.initStage(); iew.initStage(); rename.initStage(); @@ -570,7 +573,6 @@ template void FullO3CPU::activateContext(int tid, int delay) { - // Needs to set each stage to running as well. list::iterator isActive = find( activeThreads.begin(), activeThreads.end(), tid); @@ -658,30 +660,46 @@ FullO3CPU::haltContext(int tid) template void -FullO3CPU::switchOut(Sampler *sampler) +FullO3CPU::switchOut(Sampler *_sampler) { -// panic("FullO3CPU does not have a switch out function.\n"); + sampler = _sampler; + switchCount = 0; fetch.switchOut(); decode.switchOut(); rename.switchOut(); iew.switchOut(); commit.switchOut(); +} - instList.clear(); - while (!removeList.empty()) { - removeList.pop(); - } +template +void +FullO3CPU::signalSwitched() +{ + if (++switchCount == 5) { + fetch.doSwitchOut(); + rename.doSwitchOut(); + commit.doSwitchOut(); + instList.clear(); + while (!removeList.empty()) { + removeList.pop(); + } - if (tickEvent.scheduled()) - tickEvent.squash(); - sampler->signalSwitched(); - _status = SwitchedOut; + if (checker) + checker->switchOut(sampler); + + if (tickEvent.scheduled()) + tickEvent.squash(); + sampler->signalSwitched(); + _status = SwitchedOut; + } + assert(switchCount <= 5); } template void FullO3CPU::takeOverFrom(BaseCPU *oldCPU) { + // Flush out any old data from the activity buffers. for (int i = 0; i < 6; ++i) { timeBuffer.advance(); fetchQueue.advance(); @@ -733,13 +751,6 @@ FullO3CPU::takeOverFrom(BaseCPU *oldCPU) tickEvent.schedule(curTick); } -template -InstSeqNum -FullO3CPU::getAndIncrementInstSeq() -{ - return globalSeqNum++; -} - template uint64_t FullO3CPU::readIntReg(int reg_idx) @@ -982,14 +993,9 @@ FullO3CPU::removeInstsNotInROB(unsigned tid) while (inst_it != end_it) { assert(!instList.empty()); - bool break_loop = (inst_it == instList.begin()); - squashInstIt(inst_it, tid); inst_it--; - - if (break_loop) - break; } // If the ROB was empty, then we actually need to remove the first @@ -1095,8 +1101,6 @@ FullO3CPU::dumpInsts() inst_list_it++; ++num; } - - } template diff --git a/cpu/o3/cpu.hh b/cpu/o3/cpu.hh index 621ddf541..789729e61 100644 --- a/cpu/o3/cpu.hh +++ b/cpu/o3/cpu.hh @@ -46,6 +46,8 @@ #include "cpu/o3/thread_state.hh" #include "sim/process.hh" +template +class Checker; class ExecContext; class MemInterface; class Process; @@ -199,13 +201,16 @@ class FullO3CPU : public BaseFullCPU */ void switchOut(Sampler *sampler); + void signalSwitched(); + /** Takes over from another CPU. * @todo: Implement this. */ void takeOverFrom(BaseCPU *oldCPU); /** Get the current instruction sequence number, and increment it. */ - InstSeqNum getAndIncrementInstSeq(); + InstSeqNum getAndIncrementInstSeq() + { return globalSeqNum++; } #if FULL_SYSTEM /** Check if this address is a valid instruction address. */ @@ -333,9 +338,9 @@ class FullO3CPU : public BaseFullCPU */ std::queue removeList; -#ifdef DEBUG +//#ifdef DEBUG std::set snList; -#endif +//#endif /** Records if instructions need to be removed this cycle due to being * retired or squashed. @@ -474,6 +479,8 @@ class FullO3CPU : public BaseFullCPU /** The global sequence number counter. */ InstSeqNum globalSeqNum; + Checker *checker; + #if FULL_SYSTEM /** Pointer to the system. */ System *system; @@ -484,12 +491,16 @@ class FullO3CPU : public BaseFullCPU PhysicalMemory *physmem; #endif - // List of all ExecContexts. - std::vector thread; - /** Pointer to memory. */ FunctionalMemory *mem; + Sampler *sampler; + + int switchCount; + + // List of all ExecContexts. + std::vector thread; + #if 0 /** Page table pointer. */ PageTable *pTable; diff --git a/cpu/o3/decode_impl.hh b/cpu/o3/decode_impl.hh index caa97067b..a419a8932 100644 --- a/cpu/o3/decode_impl.hh +++ b/cpu/o3/decode_impl.hh @@ -166,6 +166,7 @@ template void DefaultDecode::switchOut() { + cpu->signalSwitched(); } template diff --git a/cpu/o3/fetch.hh b/cpu/o3/fetch.hh index 6074831c6..b03d4afe3 100644 --- a/cpu/o3/fetch.hh +++ b/cpu/o3/fetch.hh @@ -165,6 +165,8 @@ class DefaultFetch void switchOut(); + void doSwitchOut(); + void takeOverFrom(); bool isSwitchedOut() { return switchedOut; } @@ -371,6 +373,11 @@ class DefaultFetch bool switchedOut; + public: + InstSeqNum &getYoungestSN() { return youngestSN; } + private: + InstSeqNum youngestSN; + #if !FULL_SYSTEM /** Page table pointer. */ // PageTable *pTable; diff --git a/cpu/o3/fetch_impl.hh b/cpu/o3/fetch_impl.hh index 92f923c65..b4ff69d89 100644 --- a/cpu/o3/fetch_impl.hh +++ b/cpu/o3/fetch_impl.hh @@ -372,6 +372,13 @@ void DefaultFetch::switchOut() { switchedOut = true; + cpu->signalSwitched(); +} + +template +void +DefaultFetch::doSwitchOut() +{ branchPred.switchOut(); } @@ -474,7 +481,7 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid unsigned flags = 0; #endif // FULL_SYSTEM - if (interruptPending && flags == 0) { + if (interruptPending && flags == 0 || switchedOut) { // Hold off fetch from getting new instructions while an interrupt // is pending. return false; @@ -508,7 +515,8 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // instruction. if (fault == NoFault) { #if FULL_SYSTEM - if (cpu->system->memctrl->badaddr(memReq[tid]->paddr)) { + if (cpu->system->memctrl->badaddr(memReq[tid]->paddr) || + memReq[tid]->flags & UNCACHEABLE) { DPRINTF(Fetch, "Fetch: Bad address %#x (hopefully on a " "misspeculating path!", memReq[tid]->paddr); @@ -625,8 +633,8 @@ DefaultFetch::doSquash(const Addr &new_PC, unsigned tid) template void DefaultFetch::squashFromDecode(const Addr &new_PC, - const InstSeqNum &seq_num, - unsigned tid) + const InstSeqNum &seq_num, + unsigned tid) { DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid); @@ -635,6 +643,7 @@ DefaultFetch::squashFromDecode(const Addr &new_PC, // Tell the CPU to remove any instructions that are in flight between // fetch and decode. cpu->removeInstsUntil(seq_num, tid); + youngestSN = seq_num; } template @@ -820,6 +829,7 @@ DefaultFetch::checkSignalsAndUpdate(unsigned tid) // In any case, squash. squash(fromCommit->commitInfo[tid].nextPC,tid); + youngestSN = fromCommit->commitInfo[tid].doneSeqNum; // Also check if there's a mispredict that happened. if (fromCommit->commitInfo[tid].branchMispredict) { @@ -999,6 +1009,8 @@ DefaultFetch::fetch(bool &status_change) // Get a sequence number. inst_seq = cpu->getAndIncrementInstSeq(); + youngestSN = inst_seq; + // Make sure this is a valid index. assert(offset <= cacheBlkSize - instSize); diff --git a/cpu/o3/iew.hh b/cpu/o3/iew.hh index ae0ba6a21..72be25668 100644 --- a/cpu/o3/iew.hh +++ b/cpu/o3/iew.hh @@ -159,6 +159,8 @@ class DefaultIEW void switchOut(); + void doSwitchOut(); + void takeOverFrom(); bool isSwitchedOut() { return switchedOut; } diff --git a/cpu/o3/iew_impl.hh b/cpu/o3/iew_impl.hh index 42d83ee72..cbd7396f7 100644 --- a/cpu/o3/iew_impl.hh +++ b/cpu/o3/iew_impl.hh @@ -55,7 +55,11 @@ DefaultIEW::LdWritebackEvent::process() //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); - if (inst->isSquashed() || iewStage->isSwitchedOut()) { + if (iewStage->isSwitchedOut()) { + inst = NULL; + return; + } else if (inst->isSquashed()) { + iewStage->wakeCPU(); inst = NULL; return; } @@ -440,8 +444,16 @@ DefaultIEW::setPageTable(PageTable *pt_ptr) template void DefaultIEW::switchOut() +{ + cpu->signalSwitched(); +} + +template +void +DefaultIEW::doSwitchOut() { switchedOut = true; + instQueue.switchOut(); ldstQueue.switchOut(); fuPool->switchOut(); diff --git a/cpu/o3/lsq_unit_impl.hh b/cpu/o3/lsq_unit_impl.hh index 3bb9a81f8..dca808ac9 100644 --- a/cpu/o3/lsq_unit_impl.hh +++ b/cpu/o3/lsq_unit_impl.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "cpu/checker/cpu.hh" #include "cpu/o3/lsq_unit.hh" #include "base/str.hh" @@ -690,6 +691,9 @@ LSQUnit::writebackStores() } if (!(req->flags & LOCKED)) { storeQueue[storeWBIdx].inst->setCompleted(); + if (cpu->checker) { + cpu->checker->tick(storeQueue[storeWBIdx].inst); + } } if (dcacheInterface) { @@ -937,6 +941,11 @@ LSQUnit::completeStore(int store_idx) stallingStoreIsn = 0; iewStage->replayMemInst(loadQueue[stallingLoadIdx]); } + + storeQueue[store_idx].inst->setCompleted(); + if (cpu->checker) { + cpu->checker->tick(storeQueue[store_idx].inst); + } } template diff --git a/cpu/o3/regfile.hh b/cpu/o3/regfile.hh index 78674c32c..ed1238d36 100644 --- a/cpu/o3/regfile.hh +++ b/cpu/o3/regfile.hh @@ -200,7 +200,7 @@ class PhysRegFile unsigned thread_id) { return miscRegs[thread_id].readRegWithEffect(misc_reg, fault, - cpu->xcProxies[thread_id]); + cpu->xcBase(thread_id)); } Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned thread_id) @@ -212,7 +212,7 @@ class PhysRegFile unsigned thread_id) { return miscRegs[thread_id].setRegWithEffect(misc_reg, val, - cpu->xcProxies[thread_id]); + cpu->xcBase(thread_id)); } #if FULL_SYSTEM diff --git a/cpu/o3/rename.hh b/cpu/o3/rename.hh index 4c5c46356..dd2cb0c18 100644 --- a/cpu/o3/rename.hh +++ b/cpu/o3/rename.hh @@ -155,6 +155,8 @@ class DefaultRename void switchOut(); + void doSwitchOut(); + void takeOverFrom(); /** Squashes all instructions in a thread. */ diff --git a/cpu/o3/rename_impl.hh b/cpu/o3/rename_impl.hh index d41058deb..db4bb2ffe 100644 --- a/cpu/o3/rename_impl.hh +++ b/cpu/o3/rename_impl.hh @@ -261,6 +261,13 @@ DefaultRename::setScoreboard(Scoreboard *_scoreboard) template void DefaultRename::switchOut() +{ + cpu->signalSwitched(); +} + +template +void +DefaultRename::doSwitchOut() { for (int i = 0; i < numThreads; i++) { typename list::iterator hb_it = historyBuffer[i].begin();