X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fcpu%2Finorder%2Fcpu.cc;h=5a02f94d9bcc969b8ca0fcf80ab666860596aea8;hb=7d0344704a9ecc566d82ad43ec44b4becbaf4d77;hp=97748deba1e3e4fd489a37641b1a1e669534fb72;hpb=1a451cd2c5ec20c27c39a1cd3e3b5422c2b4f679;p=gem5.git diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index 97748deba..5a02f94d9 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -1,4 +1,17 @@ /* + * Copyright (c) 2012 ARM Limited + * Copyright (c) 2013 Advanced Micro Devices, Inc. + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2007 MIPS Technologies, Inc. * All rights reserved. * @@ -33,8 +46,8 @@ #include "arch/utility.hh" #include "base/bigint.hh" -#include "config/full_system.hh" #include "config/the_isa.hh" +#include "cpu/inorder/resources/cache_unit.hh" #include "cpu/inorder/resources/resource_list.hh" #include "cpu/inorder/cpu.hh" #include "cpu/inorder/first_stage.hh" @@ -46,21 +59,22 @@ #include "cpu/activity.hh" #include "cpu/base.hh" #include "cpu/exetrace.hh" +#include "cpu/quiesce_event.hh" +#include "cpu/reg_class.hh" #include "cpu/simple_thread.hh" #include "cpu/thread_context.hh" #include "debug/Activity.hh" #include "debug/InOrderCPU.hh" +#include "debug/InOrderCachePort.hh" +#include "debug/Interrupt.hh" +#include "debug/Quiesce.hh" #include "debug/RefCount.hh" #include "debug/SkedCache.hh" -#include "mem/translating_port.hh" #include "params/InOrderCPU.hh" +#include "sim/full_system.hh" #include "sim/process.hh" #include "sim/stat_control.hh" - -#if FULL_SYSTEM -#include "cpu/quiesce_event.hh" #include "sim/system.hh" -#endif #if THE_ISA == ALPHA_ISA #include "arch/alpha/osfpal.hh" @@ -70,6 +84,30 @@ using namespace std; using namespace TheISA; using namespace ThePipeline; +InOrderCPU::CachePort::CachePort(CacheUnit *_cacheUnit, + const std::string& name) : + MasterPort(_cacheUnit->name() + name, _cacheUnit->cpu), + cacheUnit(_cacheUnit) +{ } + +bool +InOrderCPU::CachePort::recvTimingResp(Packet *pkt) +{ + if (pkt->isError()) + DPRINTF(InOrderCachePort, "Got error packet back for address: %x\n", + pkt->getAddr()); + else + cacheUnit->processCacheCompletion(pkt); + + return true; +} + +void +InOrderCPU::CachePort::recvRetry() +{ + cacheUnit->recvRetry(); +} + InOrderCPU::TickEvent::TickEvent(InOrderCPU *c) : Event(CPU_Tick_Pri), cpu(c) { } @@ -83,16 +121,15 @@ InOrderCPU::TickEvent::process() const char * -InOrderCPU::TickEvent::description() +InOrderCPU::TickEvent::description() const { return "InOrderCPU tick event"; } InOrderCPU::CPUEvent::CPUEvent(InOrderCPU *_cpu, CPUEventType e_type, Fault fault, ThreadID _tid, DynInstPtr inst, - unsigned event_pri_offset) - : Event(Event::Priority((unsigned int)CPU_Tick_Pri + event_pri_offset)), - cpu(_cpu) + CPUEventPri event_pri) + : Event(event_pri), cpu(_cpu) { setEvent(e_type, fault, _tid, inst); } @@ -106,7 +143,7 @@ std::string InOrderCPU::eventNames[NumCPUEvents] = "HaltThread", "SuspendThread", "Trap", - "InstGraduated", + "Syscall", "SquashFromMemStall", "UpdatePCs" }; @@ -118,6 +155,7 @@ InOrderCPU::CPUEvent::process() { case ActivateThread: cpu->activateThread(tid); + cpu->resPool->activateThread(tid); break; case ActivateNextReadyThread: @@ -126,22 +164,35 @@ InOrderCPU::CPUEvent::process() case DeactivateThread: cpu->deactivateThread(tid); + cpu->resPool->deactivateThread(tid); break; case HaltThread: cpu->haltThread(tid); + cpu->resPool->deactivateThread(tid); break; case SuspendThread: cpu->suspendThread(tid); + cpu->resPool->suspendThread(tid); break; case SquashFromMemStall: cpu->squashDueToMemStall(inst->squashingStage, inst->seqNum, tid); + cpu->resPool->squashDueToMemStall(inst, inst->squashingStage, + inst->seqNum, tid); break; case Trap: - cpu->trapCPU(fault, tid, inst); + DPRINTF(InOrderCPU, "Trapping CPU\n"); + cpu->trap(fault, tid, inst); + cpu->resPool->trap(fault, tid, inst); + cpu->trapPending[tid] = false; + break; + + case Syscall: + cpu->syscall(inst->syscallNum, tid); + cpu->resPool->trap(fault, tid, inst); break; default: @@ -154,16 +205,16 @@ InOrderCPU::CPUEvent::process() const char * -InOrderCPU::CPUEvent::description() +InOrderCPU::CPUEvent::description() const { return "InOrderCPU event"; } void -InOrderCPU::CPUEvent::scheduleEvent(int delay) +InOrderCPU::CPUEvent::scheduleEvent(Cycles delay) { assert(!scheduled() || squashed()); - cpu->reschedule(this, cpu->nextCycle(curTick() + cpu->ticks(delay)), true); + cpu->reschedule(this, cpu->clockEdge(delay), true); } void @@ -180,98 +231,90 @@ InOrderCPU::InOrderCPU(Params *params) _status(Idle), tickEvent(this), stageWidth(params->stageWidth), + resPool(new ResourcePool(this, params)), + isa(numThreads, NULL), timeBuffer(2 , 2), + dataPort(resPool->getDataUnit(), ".dcache_port"), + instPort(resPool->getInstUnit(), ".icache_port"), removeInstsThisCycle(false), activityRec(params->name, NumStages, 10, params->activity), -#if FULL_SYSTEM system(params->system), - physmem(system->physmem), -#endif // FULL_SYSTEM #ifdef DEBUG cpuEventNum(0), resReqCount(0), #endif // DEBUG - switchCount(0), - deferRegistration(false/*params->deferRegistration*/), + drainCount(0), stageTracing(params->stageTracing), + lastRunningCycle(0), instsPerSwitch(0) { - ThreadID active_threads; cpu_params = params; - resPool = new ResourcePool(this, params); - // Resize for Multithreading CPUs thread.resize(numThreads); -#if FULL_SYSTEM - active_threads = 1; -#else - active_threads = params->workload.size(); - - if (active_threads > MaxThreads) { - panic("Workload Size too large. Increase the 'MaxThreads'" - "in your InOrder implementation or " - "edit your workload size."); - } + ThreadID active_threads = params->workload.size(); + if (FullSystem) { + active_threads = 1; + } else { + active_threads = params->workload.size(); - - if (active_threads > 1) { - threadModel = (InOrderCPU::ThreadModel) params->threadModel; - - if (threadModel == SMT) { - DPRINTF(InOrderCPU, "Setting Thread Model to SMT.\n"); - } else if (threadModel == SwitchOnCacheMiss) { - DPRINTF(InOrderCPU, "Setting Thread Model to " - "Switch On Cache Miss\n"); + if (active_threads > MaxThreads) { + panic("Workload Size too large. Increase the 'MaxThreads'" + "in your InOrder implementation or " + "edit your workload size."); } - - } else { - threadModel = Single; - } - - - -#endif - // Bind the fetch & data ports from the resource pool. - fetchPortIdx = resPool->getPortIdx(params->fetchMemPort); - if (fetchPortIdx == 0) { - fatal("Unable to find port to fetch instructions from.\n"); - } - dataPortIdx = resPool->getPortIdx(params->dataMemPort); - if (dataPortIdx == 0) { - fatal("Unable to find port for data.\n"); + if (active_threads > 1) { + threadModel = (InOrderCPU::ThreadModel) params->threadModel; + + if (threadModel == SMT) { + DPRINTF(InOrderCPU, "Setting Thread Model to SMT.\n"); + } else if (threadModel == SwitchOnCacheMiss) { + DPRINTF(InOrderCPU, "Setting Thread Model to " + "Switch On Cache Miss\n"); + } + + } else { + threadModel = Single; + } } for (ThreadID tid = 0; tid < numThreads; ++tid) { -#if FULL_SYSTEM - // SMT is not supported in FS mode yet. - assert(numThreads == 1); - thread[tid] = new Thread(this, 0); -#else - if (tid < (ThreadID)params->workload.size()) { - DPRINTF(InOrderCPU, "Workload[%i] process is %#x\n", - tid, params->workload[tid]->prog_fname); - thread[tid] = - new Thread(this, tid, params->workload[tid]); + isa[tid] = params->isa[tid]; + pc[tid].set(0); + lastCommittedPC[tid].set(0); + + if (FullSystem) { + // SMT is not supported in FS mode yet. + assert(numThreads == 1); + thread[tid] = new Thread(this, 0, NULL); } else { - //Allocate Empty thread so M5 can use later - //when scheduling threads to CPU - Process* dummy_proc = params->workload[0]; - thread[tid] = new Thread(this, tid, dummy_proc); + if (tid < (ThreadID)params->workload.size()) { + DPRINTF(InOrderCPU, "Workload[%i] process is %#x\n", + tid, params->workload[tid]->progName()); + thread[tid] = + new Thread(this, tid, params->workload[tid]); + } else { + //Allocate Empty thread so M5 can use later + //when scheduling threads to CPU + Process* dummy_proc = params->workload[0]; + thread[tid] = new Thread(this, tid, dummy_proc); + } + + // Eventually set this with parameters... + asid[tid] = tid; } - - // Eventually set this with parameters... - asid[tid] = tid; -#endif // Setup the TC that will serve as the interface to the threads/CPU. InOrderThreadContext *tc = new InOrderThreadContext; tc->cpu = this; tc->thread = thread[tid]; + // Setup quiesce event. + this->thread[tid]->quiesceEvent = new EndQuiesceEvent(tc); + // Give the thread the TC. thread[tid]->tc = tc; thread[tid]->setFuncExeInst(0); @@ -318,7 +361,10 @@ InOrderCPU::InOrderCPU(Params *params) memset(intRegs[tid], 0, sizeof(intRegs[tid])); memset(floatRegs.i[tid], 0, sizeof(floatRegs.i[tid])); - isa[tid].clear(); +#ifdef ISA_HAS_CC_REGS + memset(ccRegs[tid], 0, sizeof(ccRegs[tid])); +#endif + isa[tid]->clear(); // Define dummy instructions and resource requests to be used. dummyInst[tid] = new InOrderDynInst(this, @@ -328,6 +374,25 @@ InOrderCPU::InOrderCPU(Params *params) asid[tid]); dummyReq[tid] = new ResourceRequest(resPool->getResource(0)); + + + if (FullSystem) { + // Use this dummy inst to force squashing behind every instruction + // in pipeline + dummyTrapInst[tid] = new InOrderDynInst(this, NULL, 0, 0, 0); + dummyTrapInst[tid]->seqNum = 0; + dummyTrapInst[tid]->squashSeqNum = 0; + dummyTrapInst[tid]->setTid(tid); + } + + trapPending[tid] = false; + + } + + // InOrderCPU always requires an interrupt controller. + if (!params->switched_out && !interrupts) { + fatal("InOrderCPU %s has no interrupt controller.\n" + "Ensure createInterruptController() is called.\n", name()); } dummyReqInst = new InOrderDynInst(this, NULL, 0, 0, 0); @@ -340,28 +405,23 @@ InOrderCPU::InOrderCPU(Params *params) endOfSkedIt = skedCache.end(); frontEndSked = createFrontEndSked(); - - lastRunningCycle = curTick(); + faultSked = createFaultSked(); - // Reset CPU to reset state. -#if FULL_SYSTEM - Fault resetFault = new ResetFault(); - resetFault->invoke(tcBase()); -#endif + lastRunningCycle = curCycle(); + lockAddr = 0; + lockFlag = false; // Schedule First Tick Event, CPU will reschedule itself from here on out. - scheduleTickEvent(0); + scheduleTickEvent(Cycles(0)); } InOrderCPU::~InOrderCPU() { delete resPool; - std::map::iterator sked_it = - skedCache.begin(); - std::map::iterator sked_end = - skedCache.end(); + SkedCacheIt sked_it = skedCache.begin(); + SkedCacheIt sked_end = skedCache.end(); while (sked_it != sked_end) { delete (*sked_it).second; @@ -370,7 +430,7 @@ InOrderCPU::~InOrderCPU() skedCache.clear(); } -std::map InOrderCPU::skedCache; +m5::hash_map InOrderCPU::skedCache; RSkedPtr InOrderCPU::createFrontEndSked() @@ -391,8 +451,18 @@ InOrderCPU::createFrontEndSked() D.needs(FetchSeq, FetchSeqUnit::UpdateTargetPC); - DPRINTF(SkedCache, "Resource Sked created for instruction \"front_end\"\n"); + DPRINTF(SkedCache, "Resource Sked created for instruction Front End\n"); + + return res_sked; +} +RSkedPtr +InOrderCPU::createFaultSked() +{ + RSkedPtr res_sked = new ResourceSked(); + StageScheduler W(res_sked, NumStages - 1); + W.needs(Grad, GraduationUnit::CheckFault); + DPRINTF(SkedCache, "Resource Sked created for instruction Faults\n"); return res_sked; } @@ -420,12 +490,15 @@ InOrderCPU::createBackEndSked(DynInstPtr inst) } // EXECUTE + X.needs(RegManager, UseDefUnit::MarkDestRegs); for (int idx=0; idx < inst->numSrcRegs(); idx++) { if (!idx || !inst->isStore()) { X.needs(RegManager, UseDefUnit::ReadSrcReg, idx); } } + //@todo: schedule non-spec insts to operate on this cycle + // as long as all previous insts are done if ( inst->isNonSpeculative() ) { // skip execution of non speculative insts until later } else if ( inst->isMemRef() ) { @@ -438,40 +511,77 @@ InOrderCPU::createBackEndSked(DynInstPtr inst) X.needs(ExecUnit, ExecutionUnit::ExecuteInst); } - if (inst->opClass() == IntMultOp || inst->opClass() == IntDivOp) { - X.needs(MDU, MultDivUnit::EndMultDiv); - } - // MEMORY - if ( inst->isLoad() ) { - M.needs(DCache, CacheUnit::InitiateReadData); - } else if ( inst->isStore() ) { - if ( inst->numSrcRegs() >= 2 ) { - M.needs(RegManager, UseDefUnit::ReadSrcReg, 1); + if (!inst->isNonSpeculative()) { + if (inst->opClass() == IntMultOp || inst->opClass() == IntDivOp) { + M.needs(MDU, MultDivUnit::EndMultDiv); } - M.needs(AGEN, AGENUnit::GenerateAddr); - M.needs(DCache, CacheUnit::InitiateWriteData); - } - - // WRITEBACK - if ( inst->isLoad() ) { - W.needs(DCache, CacheUnit::CompleteReadData); - } else if ( inst->isStore() ) { - W.needs(DCache, CacheUnit::CompleteWriteData); + if ( inst->isLoad() ) { + M.needs(DCache, CacheUnit::InitiateReadData); + if (inst->splitInst) + M.needs(DCache, CacheUnit::InitSecondSplitRead); + } else if ( inst->isStore() ) { + for (int i = 1; i < inst->numSrcRegs(); i++ ) { + M.needs(RegManager, UseDefUnit::ReadSrcReg, i); + } + M.needs(AGEN, AGENUnit::GenerateAddr); + M.needs(DCache, CacheUnit::InitiateWriteData); + if (inst->splitInst) + M.needs(DCache, CacheUnit::InitSecondSplitWrite); + } } - if ( inst->isNonSpeculative() ) { - if ( inst->isMemRef() ) fatal("Non-Speculative Memory Instruction"); - W.needs(ExecUnit, ExecutionUnit::ExecuteInst); + // WRITEBACK + if (!inst->isNonSpeculative()) { + if ( inst->isLoad() ) { + W.needs(DCache, CacheUnit::CompleteReadData); + if (inst->splitInst) + W.needs(DCache, CacheUnit::CompleteSecondSplitRead); + } else if ( inst->isStore() ) { + W.needs(DCache, CacheUnit::CompleteWriteData); + if (inst->splitInst) + W.needs(DCache, CacheUnit::CompleteSecondSplitWrite); + } + } else { + // Finally, Execute Speculative Data + if (inst->isMemRef()) { + if (inst->isLoad()) { + W.needs(AGEN, AGENUnit::GenerateAddr); + W.needs(DCache, CacheUnit::InitiateReadData); + if (inst->splitInst) + W.needs(DCache, CacheUnit::InitSecondSplitRead); + W.needs(DCache, CacheUnit::CompleteReadData); + if (inst->splitInst) + W.needs(DCache, CacheUnit::CompleteSecondSplitRead); + } else if (inst->isStore()) { + if ( inst->numSrcRegs() >= 2 ) { + W.needs(RegManager, UseDefUnit::ReadSrcReg, 1); + } + W.needs(AGEN, AGENUnit::GenerateAddr); + W.needs(DCache, CacheUnit::InitiateWriteData); + if (inst->splitInst) + W.needs(DCache, CacheUnit::InitSecondSplitWrite); + W.needs(DCache, CacheUnit::CompleteWriteData); + if (inst->splitInst) + W.needs(DCache, CacheUnit::CompleteSecondSplitWrite); + } + } else { + W.needs(ExecUnit, ExecutionUnit::ExecuteInst); + } } - W.needs(Grad, GraduationUnit::GraduateInst); + W.needs(Grad, GraduationUnit::CheckFault); for (int idx=0; idx < inst->numDestRegs(); idx++) { W.needs(RegManager, UseDefUnit::WriteDestReg, idx); } + if (inst->isControl()) + W.needs(BPred, BranchPredictor::UpdatePredictor); + + W.needs(Grad, GraduationUnit::GraduateInst); + // Insert Back Schedule into our cache of // resource schedules addToSkedCache(inst, res_sked); @@ -564,16 +674,21 @@ InOrderCPU::regStats() committedInsts .init(numThreads) .name(name() + ".committedInsts") - .desc("Number of Instructions Simulated (Per-Thread)"); + .desc("Number of Instructions committed (Per-Thread)"); + + committedOps + .init(numThreads) + .name(name() + ".committedOps") + .desc("Number of Ops committed (Per-Thread)"); smtCommittedInsts .init(numThreads) .name(name() + ".smtCommittedInsts") - .desc("Number of SMT Instructions Simulated (Per-Thread)"); + .desc("Number of SMT Instructions committed (Per-Thread)"); totalCommittedInsts .name(name() + ".committedInsts_total") - .desc("Number of Instructions Simulated (Total)"); + .desc("Number of Instructions committed (Total)"); cpi .name(name() + ".cpi") @@ -622,8 +737,9 @@ InOrderCPU::tick() ++numCycles; + checkForInterrupts(); + bool pipes_idle = true; - //Tick each of the stages for (int stNum=NumStages - 1; stNum >= 0 ; stNum--) { pipelineStage[stNum]->tick(); @@ -651,17 +767,17 @@ InOrderCPU::tick() if (!tickEvent.scheduled()) { if (_status == SwitchedOut) { // increment stat - lastRunningCycle = curTick(); + lastRunningCycle = curCycle(); } else if (!activityRec.active()) { DPRINTF(InOrderCPU, "sleeping CPU.\n"); - lastRunningCycle = curTick(); + lastRunningCycle = curCycle(); timesIdled++; } else { //Tick next_tick = curTick() + cycles(1); //tickEvent.schedule(next_tick); - schedule(&tickEvent, nextCycle(curTick() + 1)); + schedule(&tickEvent, clockEdge(Cycles(1))); DPRINTF(InOrderCPU, "Scheduled CPU for next tick @ %i.\n", - nextCycle(curTick() + 1)); + clockEdge(Cycles(1))); } } @@ -673,41 +789,50 @@ InOrderCPU::tick() void InOrderCPU::init() { - if (!deferRegistration) { - registerThreadContexts(); - } + BaseCPU::init(); - // Set inSyscall so that the CPU doesn't squash when initially - // setting up registers. - for (ThreadID tid = 0; tid < numThreads; ++tid) - thread[tid]->inSyscall = true; + for (ThreadID tid = 0; tid < numThreads; ++tid) { + // Set noSquashFromTC so that the CPU doesn't squash when initially + // setting up registers. + thread[tid]->noSquashFromTC = true; + // Initialise the ThreadContext's memory proxies + thread[tid]->initMemProxies(thread[tid]->getTC()); + } -#if FULL_SYSTEM - for (ThreadID tid = 0; tid < numThreads; tid++) { - ThreadContext *src_tc = threadContexts[tid]; - TheISA::initCPU(src_tc, src_tc->contextId()); + if (FullSystem && !params()->switched_out) { + for (ThreadID tid = 0; tid < numThreads; tid++) { + ThreadContext *src_tc = threadContexts[tid]; + TheISA::initCPU(src_tc, src_tc->contextId()); + } } -#endif - // Clear inSyscall. + // Clear noSquashFromTC. for (ThreadID tid = 0; tid < numThreads; ++tid) - thread[tid]->inSyscall = false; + thread[tid]->noSquashFromTC = false; // Call Initializiation Routine for Resource Pool resPool->init(); } -Port* -InOrderCPU::getPort(const std::string &if_name, int idx) +void +InOrderCPU::verifyMemoryMode() const { - return resPool->getPort(if_name, idx); + if (!system->isTimingMode()) { + fatal("The in-order CPU requires the memory system to be in " + "'timing' mode.\n"); + } } -#if FULL_SYSTEM Fault InOrderCPU::hwrei(ThreadID tid) { - panic("hwrei: Unimplemented"); +#if THE_ISA == ALPHA_ISA + // Need to clear the lock flag upon returning from an interrupt. + setMiscRegNoEffect(AlphaISA::MISCREG_LOCKFLAG, false, tid); + + thread[tid]->kernelStats->hwrei(); + // FIXME: XXX check for interrupts? XXX +#endif return NoFault; } @@ -716,11 +841,65 @@ InOrderCPU::hwrei(ThreadID tid) bool InOrderCPU::simPalCheck(int palFunc, ThreadID tid) { - panic("simPalCheck: Unimplemented"); +#if THE_ISA == ALPHA_ISA + if (this->thread[tid]->kernelStats) + this->thread[tid]->kernelStats->callpal(palFunc, + this->threadContexts[tid]); + + switch (palFunc) { + case PAL::halt: + halt(); + if (--System::numSystemsRunning == 0) + exitSimLoop("all cpus halted"); + break; + case PAL::bpt: + case PAL::bugchk: + if (this->system->breakpoint()) + return false; + break; + } +#endif return true; } +void +InOrderCPU::checkForInterrupts() +{ + for (int i = 0; i < threadContexts.size(); i++) { + ThreadContext *tc = threadContexts[i]; + + if (interrupts->checkInterrupts(tc)) { + Fault interrupt = interrupts->getInterrupt(tc); + + if (interrupt != NoFault) { + DPRINTF(Interrupt, "Processing Intterupt for [tid:%i].\n", + tc->threadId()); + + ThreadID tid = tc->threadId(); + interrupts->updateIntrInfo(tc); + + // Squash from Last Stage in Pipeline + unsigned last_stage = NumStages - 1; + dummyTrapInst[tid]->squashingStage = last_stage; + pipelineStage[last_stage]->setupSquash(dummyTrapInst[tid], + tid); + + // By default, setupSquash will always squash from stage + 1 + pipelineStage[BackEndStartStage - 1]->setupSquash(dummyTrapInst[tid], + tid); + + // Schedule Squash Through-out Resource Pool + resPool->scheduleEvent( + (InOrderCPU::CPUEventType)ResourcePool::SquashAll, + dummyTrapInst[tid], Cycles(0)); + + // Finally, Setup Trap to happen at end of cycle + trapContext(interrupt, tid, dummyTrapInst[tid]); + } + } + } +} Fault InOrderCPU::getInterrupts() @@ -729,7 +908,6 @@ InOrderCPU::getInterrupts() return interrupts->getInterrupt(threadContexts[0]); } - void InOrderCPU::processInterrupts(Fault interrupt) { @@ -748,33 +926,24 @@ InOrderCPU::processInterrupts(Fault interrupt) trap(interrupt, threadContexts[0]->contextId(), dummyBufferInst); } - -void -InOrderCPU::updateMemPorts() -{ - // Update all ThreadContext's memory ports (Functional/Virtual - // Ports) - ThreadID size = thread.size(); - for (ThreadID i = 0; i < size; ++i) - thread[i]->connectMemPorts(thread[i]->getTC()); -} -#endif - void -InOrderCPU::trap(Fault fault, ThreadID tid, DynInstPtr inst, int delay) +InOrderCPU::trapContext(Fault fault, ThreadID tid, DynInstPtr inst, + Cycles delay) { - //@ Squash Pipeline during TRAP scheduleCpuEvent(Trap, fault, tid, inst, delay); + trapPending[tid] = true; } void -InOrderCPU::trapCPU(Fault fault, ThreadID tid, DynInstPtr inst) +InOrderCPU::trap(Fault fault, ThreadID tid, DynInstPtr inst) { fault->invoke(tcBase(tid), inst->staticInst); + removePipelineStalls(tid); } void -InOrderCPU::squashFromMemStall(DynInstPtr inst, ThreadID tid, int delay) +InOrderCPU::squashFromMemStall(DynInstPtr inst, ThreadID tid, + Cycles delay) { scheduleCpuEvent(SquashFromMemStall, NoFault, tid, inst, delay); } @@ -802,25 +971,21 @@ InOrderCPU::squashDueToMemStall(int stage_num, InstSeqNum seq_num, void InOrderCPU::scheduleCpuEvent(CPUEventType c_event, Fault fault, ThreadID tid, DynInstPtr inst, - unsigned delay, unsigned event_pri_offset) + Cycles delay, CPUEventPri event_pri) { CPUEvent *cpu_event = new CPUEvent(this, c_event, fault, tid, inst, - event_pri_offset); + event_pri); - Tick sked_tick = nextCycle(curTick() + ticks(delay)); - if (delay >= 0) { - DPRINTF(InOrderCPU, "Scheduling CPU Event (%s) for cycle %i, [tid:%i].\n", - eventNames[c_event], curTick() + delay, tid); - schedule(cpu_event, sked_tick); - } else { - cpu_event->process(); - cpuEventRemoveList.push(cpu_event); - } + Tick sked_tick = clockEdge(delay); + DPRINTF(InOrderCPU, "Scheduling CPU Event (%s) for cycle %i, [tid:%i].\n", + eventNames[c_event], curTick() + delay, tid); + schedule(cpu_event, sked_tick); // Broadcast event to the Resource Pool // Need to reset tid just in case this is a dummy instruction inst->setTid(tid); - resPool->scheduleEvent(c_event, inst, 0, 0, tid); + // @todo: Is this really right? Should the delay not be passed on? + resPool->scheduleEvent(c_event, inst, Cycles(0), 0, tid); } bool @@ -860,7 +1025,7 @@ InOrderCPU::activateNextReadyThread() activateThread(ready_tid); // Activate in Resource Pool - resPool->activateAll(ready_tid); + resPool->activateThread(ready_tid); list::iterator ready_it = std::find(readyThreads.begin(), readyThreads.end(), ready_tid); @@ -924,7 +1089,7 @@ InOrderCPU::activateThreadInPipeline(ThreadID tid) } void -InOrderCPU::deactivateContext(ThreadID tid, int delay) +InOrderCPU::deactivateContext(ThreadID tid, Cycles delay) { DPRINTF(InOrderCPU,"[tid:%i]: Deactivating ...\n", tid); @@ -980,7 +1145,6 @@ InOrderCPU::updateThreadPriority() //DEFAULT TO ROUND ROBIN SCHEME //e.g. Move highest priority to end of thread list list::iterator list_begin = activeThreads.begin(); - list::iterator list_end = activeThreads.end(); unsigned high_thread = *list_begin; @@ -1007,7 +1171,7 @@ InOrderCPU::tickThreadStats() } void -InOrderCPU::activateContext(ThreadID tid, int delay) +InOrderCPU::activateContext(ThreadID tid, Cycles delay) { DPRINTF(InOrderCPU,"[tid:%i]: Activating ...\n", tid); @@ -1022,15 +1186,12 @@ InOrderCPU::activateContext(ThreadID tid, int delay) } void -InOrderCPU::activateNextReadyContext(int delay) +InOrderCPU::activateNextReadyContext(Cycles delay) { DPRINTF(InOrderCPU,"Activating next ready thread\n"); - // NOTE: Add 5 to the event priority so that we always activate - // threads after we've finished deactivating, squashing,etc. - // other threads scheduleCpuEvent(ActivateNextReadyThread, NoFault, 0/*tid*/, dummyInst[0], - delay, 5); + delay, ActivateNextReadyThread_Pri); // Be sure to signal that there's some activity so the CPU doesn't // deschedule itself. @@ -1040,11 +1201,11 @@ InOrderCPU::activateNextReadyContext(int delay) } void -InOrderCPU::haltContext(ThreadID tid, int delay) +InOrderCPU::haltContext(ThreadID tid) { DPRINTF(InOrderCPU, "[tid:%i]: Calling Halt Context...\n", tid); - scheduleCpuEvent(HaltThread, NoFault, tid, dummyInst[tid], delay); + scheduleCpuEvent(HaltThread, NoFault, tid, dummyInst[tid]); activityRec.activity(); } @@ -1065,9 +1226,9 @@ InOrderCPU::haltThread(ThreadID tid) } void -InOrderCPU::suspendContext(ThreadID tid, int delay) +InOrderCPU::suspendContext(ThreadID tid) { - scheduleCpuEvent(SuspendThread, NoFault, tid, dummyInst[tid], delay); + scheduleCpuEvent(SuspendThread, NoFault, tid, dummyInst[tid]); } void @@ -1097,42 +1258,118 @@ InOrderCPU::getPipeStage(int stage_num) return pipelineStage[stage_num]; } + +RegIndex +InOrderCPU::flattenRegIdx(RegIndex reg_idx, RegClass ®_type, ThreadID tid) +{ + RegIndex rel_idx; + + reg_type = regIdxToClass(reg_idx, &rel_idx); + + switch (reg_type) { + case IntRegClass: + return isa[tid]->flattenIntIndex(rel_idx); + + case FloatRegClass: + return isa[tid]->flattenFloatIndex(rel_idx); + + case MiscRegClass: + return rel_idx; + + default: + panic("register %d out of range\n", reg_idx); + } +} + uint64_t -InOrderCPU::readIntReg(int reg_idx, ThreadID tid) +InOrderCPU::readIntReg(RegIndex reg_idx, ThreadID tid) { + DPRINTF(IntRegs, "[tid:%i]: Reading Int. Reg %i as %x\n", + tid, reg_idx, intRegs[tid][reg_idx]); + return intRegs[tid][reg_idx]; } FloatReg -InOrderCPU::readFloatReg(int reg_idx, ThreadID tid) +InOrderCPU::readFloatReg(RegIndex reg_idx, ThreadID tid) { + DPRINTF(FloatRegs, "[tid:%i]: Reading Float Reg %i as %x, %08f\n", + tid, reg_idx, floatRegs.i[tid][reg_idx], floatRegs.f[tid][reg_idx]); + return floatRegs.f[tid][reg_idx]; } FloatRegBits -InOrderCPU::readFloatRegBits(int reg_idx, ThreadID tid) -{; +InOrderCPU::readFloatRegBits(RegIndex reg_idx, ThreadID tid) +{ + DPRINTF(FloatRegs, "[tid:%i]: Reading Float Reg %i as %x, %08f\n", + tid, reg_idx, floatRegs.i[tid][reg_idx], floatRegs.f[tid][reg_idx]); + return floatRegs.i[tid][reg_idx]; } +CCReg +InOrderCPU::readCCReg(RegIndex reg_idx, ThreadID tid) +{ +#ifdef ISA_HAS_CC_REGS + DPRINTF(CCRegs, "[tid:%i]: Reading CC. Reg %i as %x\n", + tid, reg_idx, ccRegs[tid][reg_idx]); + + return ccRegs[tid][reg_idx]; +#else + panic("readCCReg: ISA does not have CC regs\n"); +#endif +} + void -InOrderCPU::setIntReg(int reg_idx, uint64_t val, ThreadID tid) +InOrderCPU::setIntReg(RegIndex reg_idx, uint64_t val, ThreadID tid) { - intRegs[tid][reg_idx] = val; + if (reg_idx == TheISA::ZeroReg) { + DPRINTF(IntRegs, "[tid:%i]: Ignoring Setting of ISA-ZeroReg " + "(Int. Reg %i) to %x\n", tid, reg_idx, val); + return; + } else { + DPRINTF(IntRegs, "[tid:%i]: Setting Int. Reg %i to %x\n", + tid, reg_idx, val); + + intRegs[tid][reg_idx] = val; + } } void -InOrderCPU::setFloatReg(int reg_idx, FloatReg val, ThreadID tid) +InOrderCPU::setFloatReg(RegIndex reg_idx, FloatReg val, ThreadID tid) { floatRegs.f[tid][reg_idx] = val; + DPRINTF(FloatRegs, "[tid:%i]: Setting Float. Reg %i bits to " + "%x, %08f\n", + tid, reg_idx, + floatRegs.i[tid][reg_idx], + floatRegs.f[tid][reg_idx]); } void -InOrderCPU::setFloatRegBits(int reg_idx, FloatRegBits val, ThreadID tid) +InOrderCPU::setFloatRegBits(RegIndex reg_idx, FloatRegBits val, ThreadID tid) { floatRegs.i[tid][reg_idx] = val; + DPRINTF(FloatRegs, "[tid:%i]: Setting Float. Reg %i bits to " + "%x, %08f\n", + tid, reg_idx, + floatRegs.i[tid][reg_idx], + floatRegs.f[tid][reg_idx]); +} + +void +InOrderCPU::setCCReg(RegIndex reg_idx, CCReg val, ThreadID tid) +{ +#ifdef ISA_HAS_CC_REGS + DPRINTF(CCRegs, "[tid:%i]: Setting CC. Reg %i to %x\n", + tid, reg_idx, val); + ccRegs[tid][reg_idx] = val; +#else + panic("readCCReg: ISA does not have CC regs\n"); +#endif } uint64_t @@ -1143,18 +1380,25 @@ InOrderCPU::readRegOtherThread(unsigned reg_idx, ThreadID tid) tid = TheISA::getTargetThread(tcBase(tid)); } - if (reg_idx < FP_Base_DepTag) { + RegIndex rel_idx; + + switch (regIdxToClass(reg_idx, &rel_idx)) { + case IntRegClass: // Integer Register File - return readIntReg(reg_idx, tid); - } else if (reg_idx < Ctrl_Base_DepTag) { + return readIntReg(rel_idx, tid); + + case FloatRegClass: // Float Register File - reg_idx -= FP_Base_DepTag; - return readFloatRegBits(reg_idx, tid); - } else { - reg_idx -= Ctrl_Base_DepTag; - return readMiscReg(reg_idx, tid); // Misc. Register File + return readFloatRegBits(rel_idx, tid); + + case MiscRegClass: + return readMiscReg(rel_idx, tid); // Misc. Register File + + default: + panic("register %d out of range\n", reg_idx); } } + void InOrderCPU::setRegOtherThread(unsigned reg_idx, const MiscReg &val, ThreadID tid) @@ -1164,44 +1408,54 @@ InOrderCPU::setRegOtherThread(unsigned reg_idx, const MiscReg &val, tid = TheISA::getTargetThread(tcBase(tid)); } - if (reg_idx < FP_Base_DepTag) { // Integer Register File - setIntReg(reg_idx, val, tid); - } else if (reg_idx < Ctrl_Base_DepTag) { // Float Register File - reg_idx -= FP_Base_DepTag; - setFloatRegBits(reg_idx, val, tid); - } else { - reg_idx -= Ctrl_Base_DepTag; - setMiscReg(reg_idx, val, tid); // Misc. Register File + RegIndex rel_idx; + + switch (regIdxToClass(reg_idx, &rel_idx)) { + case IntRegClass: + setIntReg(rel_idx, val, tid); + break; + + case FloatRegClass: + setFloatRegBits(rel_idx, val, tid); + break; + + case CCRegClass: + setCCReg(rel_idx, val, tid); + break; + + case MiscRegClass: + setMiscReg(rel_idx, val, tid); // Misc. Register File + break; } } MiscReg InOrderCPU::readMiscRegNoEffect(int misc_reg, ThreadID tid) { - return isa[tid].readMiscRegNoEffect(misc_reg); + return isa[tid]->readMiscRegNoEffect(misc_reg); } MiscReg InOrderCPU::readMiscReg(int misc_reg, ThreadID tid) { - return isa[tid].readMiscReg(misc_reg, tcBase(tid)); + return isa[tid]->readMiscReg(misc_reg, tcBase(tid)); } void InOrderCPU::setMiscRegNoEffect(int misc_reg, const MiscReg &val, ThreadID tid) { - isa[tid].setMiscRegNoEffect(misc_reg, val); + isa[tid]->setMiscRegNoEffect(misc_reg, val); } void InOrderCPU::setMiscReg(int misc_reg, const MiscReg &val, ThreadID tid) { - isa[tid].setMiscReg(misc_reg, val, tcBase(tid)); + isa[tid]->setMiscReg(misc_reg, val, tcBase(tid)); } InOrderCPU::ListIt -InOrderCPU::addInst(DynInstPtr &inst) +InOrderCPU::addInst(DynInstPtr inst) { ThreadID tid = inst->readTid(); @@ -1210,6 +1464,24 @@ InOrderCPU::addInst(DynInstPtr &inst) return --(instList[tid].end()); } +InOrderCPU::ListIt +InOrderCPU::findInst(InstSeqNum seq_num, ThreadID tid) +{ + ListIt it = instList[tid].begin(); + ListIt end = instList[tid].end(); + + while (it != end) { + if ((*it)->seqNum == seq_num) + return it; + else if ((*it)->seqNum > seq_num) + break; + + it++; + } + + return instList[tid].end(); +} + void InOrderCPU::updateContextSwitchStats() { @@ -1222,11 +1494,18 @@ InOrderCPU::updateContextSwitchStats() void InOrderCPU::instDone(DynInstPtr inst, ThreadID tid) { - // Set the CPU's PCs - This contributes to the precise state of the CPU + // Set the nextPC to be fetched if this is the last instruction + // committed + // ======== + // This contributes to the precise state of the CPU // which can be used when restoring a thread to the CPU after after any // type of context switching activity (fork, exception, etc.) - pcState(inst->pcState(), tid); + TheISA::PCState comm_pc = inst->pcState(); + lastCommittedPC[tid] = comm_pc; + TheISA::advancePC(comm_pc, inst->staticInst); + pcState(comm_pc, tid); + //@todo: may be unnecessary with new-ISA-specific branch handling code if (inst->isControl()) { thread[tid]->lastGradIsBranch = true; thread[tid]->lastBranchPC = inst->pcState(); @@ -1251,19 +1530,26 @@ InOrderCPU::instDone(DynInstPtr inst, ThreadID tid) // Increment thread-state's instruction count thread[tid]->numInst++; + thread[tid]->numOp++; // Increment thread-state's instruction stats thread[tid]->numInsts++; + thread[tid]->numOps++; // Count committed insts per thread stats - committedInsts[tid]++; + if (!inst->isMicroop() || inst->isLastMicroop()) { + committedInsts[tid]++; - // Count total insts committed stat - totalCommittedInsts++; + // Count total insts committed stat + totalCommittedInsts++; + } + + committedOps[tid]++; // Count SMT-committed insts per thread stat if (numActiveThreads() > 1) { - smtCommittedInsts[tid]++; + if (!inst->isMicroop() || inst->isLastMicroop()) + smtCommittedInsts[tid]++; } // Instruction-Mix Stats @@ -1284,12 +1570,7 @@ InOrderCPU::instDone(DynInstPtr inst, ThreadID tid) } // Check for instruction-count-based events. - comInstEventQueue[tid]->serviceEvents(thread[tid]->numInst); - - // Broadcast to other resources an instruction - // has been completed - resPool->scheduleEvent((CPUEventType)ResourcePool::InstGraduated, inst, - 0, 0, tid); + comInstEventQueue[tid]->serviceEvents(thread[tid]->numOp); // Finally, remove instruction from CPU removeInst(inst); @@ -1298,7 +1579,7 @@ InOrderCPU::instDone(DynInstPtr inst, ThreadID tid) // currently unused function, but substitute repetitive code w/this function // call void -InOrderCPU::addToRemoveList(DynInstPtr &inst) +InOrderCPU::addToRemoveList(DynInstPtr inst) { removeInstsThisCycle = true; if (!inst->isRemoveList()) { @@ -1316,7 +1597,7 @@ InOrderCPU::addToRemoveList(DynInstPtr &inst) } void -InOrderCPU::removeInst(DynInstPtr &inst) +InOrderCPU::removeInst(DynInstPtr inst) { DPRINTF(InOrderCPU, "Removing graduated instruction [tid:%i] PC %s " "[sn:%lli]\n", @@ -1369,29 +1650,31 @@ InOrderCPU::removeInstsUntil(const InstSeqNum &seq_num, ThreadID tid) inline void -InOrderCPU::squashInstIt(const ListIt &instIt, ThreadID tid) +InOrderCPU::squashInstIt(const ListIt inst_it, ThreadID tid) { - if ((*instIt)->threadNumber == tid) { + DynInstPtr inst = (*inst_it); + if (inst->threadNumber == tid) { DPRINTF(InOrderCPU, "Squashing instruction, " "[tid:%i] [sn:%lli] PC %s\n", - (*instIt)->threadNumber, - (*instIt)->seqNum, - (*instIt)->pcState()); + inst->threadNumber, + inst->seqNum, + inst->pcState()); - (*instIt)->setSquashed(); + inst->setSquashed(); + archRegDepMap[tid].remove(inst); - if (!(*instIt)->isRemoveList()) { + if (!inst->isRemoveList()) { DPRINTF(InOrderCPU, "Pushing instruction [tid:%i] PC %s " "[sn:%lli] to remove list\n", - (*instIt)->threadNumber, (*instIt)->pcState(), - (*instIt)->seqNum); - (*instIt)->setRemoveList(); - removeList.push(instIt); + inst->threadNumber, inst->pcState(), + inst->seqNum); + inst->setRemoveList(); + removeList.push(inst_it); } else { DPRINTF(InOrderCPU, "Ignoring instruction removal for [tid:%i]" " PC %s [sn:%lli], already on remove list\n", - (*instIt)->threadNumber, (*instIt)->pcState(), - (*instIt)->seqNum); + inst->threadNumber, inst->pcState(), + inst->seqNum); } } @@ -1413,9 +1696,7 @@ InOrderCPU::cleanUpRemovedInsts() ThreadID tid = inst->threadNumber; // Remove From Register Dependency Map, If Necessary - archRegDepMap[(*removeList.front())->threadNumber]. - remove((*removeList.front())); - + // archRegDepMap[tid].remove(inst); // Clear if Non-Speculative if (inst->staticInst && @@ -1424,6 +1705,8 @@ InOrderCPU::cleanUpRemovedInsts() nonSpecInstActive[tid] = false; } + inst->onInstList = false; + instList[tid].erase(removeList.front()); removeList.pop(); @@ -1474,7 +1757,9 @@ InOrderCPU::wakeCPU() DPRINTF(Activity, "Waking up CPU\n"); - Tick extra_cycles = tickToCycles((curTick() - 1) - lastRunningCycle); + Tick extra_cycles = curCycle() - lastRunningCycle; + if (extra_cycles != 0) + --extra_cycles; idleCycles += extra_cycles; for (int stage_num = 0; stage_num < NumStages; stage_num++) { @@ -1483,11 +1768,10 @@ InOrderCPU::wakeCPU() numCycles += extra_cycles; - schedule(&tickEvent, nextCycle(curTick())); + schedule(&tickEvent, clockEdge()); } -#if FULL_SYSTEM - +// Lots of copied full system code...place into BaseCPU class? void InOrderCPU::wakeup() { @@ -1499,9 +1783,25 @@ InOrderCPU::wakeup() DPRINTF(Quiesce, "Suspended Processor woken\n"); threadContexts[0]->activate(); } -#endif -#if !FULL_SYSTEM +void +InOrderCPU::syscallContext(Fault fault, ThreadID tid, DynInstPtr inst, + Cycles delay) +{ + // Syscall must be non-speculative, so squash from last stage + unsigned squash_stage = NumStages - 1; + inst->setSquashInfo(squash_stage); + + // Squash In Pipeline Stage + pipelineStage[squash_stage]->setupSquash(inst, tid); + + // Schedule Squash Through-out Resource Pool + resPool->scheduleEvent( + (InOrderCPU::CPUEventType)ResourcePool::SquashAll, inst, + Cycles(0)); + scheduleCpuEvent(Syscall, fault, tid, inst, delay, Syscall_Pri); +} + void InOrderCPU::syscall(int64_t callnum, ThreadID tid) { @@ -1523,13 +1823,11 @@ InOrderCPU::syscall(int64_t callnum, ThreadID tid) // Clear Non-Speculative Block Variable nonSpecInstActive[tid] = false; } -#endif TheISA::TLB* InOrderCPU::getITBPtr() { - CacheUnit *itb_res = - dynamic_cast(resPool->getResource(fetchPortIdx)); + CacheUnit *itb_res = resPool->getInstUnit(); return itb_res->tlb(); } @@ -1537,30 +1835,26 @@ InOrderCPU::getITBPtr() TheISA::TLB* InOrderCPU::getDTBPtr() { - CacheUnit *dtb_res = - dynamic_cast(resPool->getResource(dataPortIdx)); - return dtb_res->tlb(); + return resPool->getDataUnit()->tlb(); +} + +TheISA::Decoder * +InOrderCPU::getDecoderPtr(unsigned tid) +{ + return resPool->getInstUnit()->decoder[tid]; } Fault InOrderCPU::read(DynInstPtr inst, Addr addr, uint8_t *data, unsigned size, unsigned flags) { - //@TODO: Generalize name "CacheUnit" to "MemUnit" just in case - // you want to run w/out caches? - CacheUnit *cache_res = - dynamic_cast(resPool->getResource(dataPortIdx)); - - return cache_res->read(inst, addr, data, size, flags); + return resPool->getDataUnit()->read(inst, addr, data, size, flags); } Fault InOrderCPU::write(DynInstPtr inst, uint8_t *data, unsigned size, Addr addr, unsigned flags, uint64_t *write_res) { - //@TODO: Generalize name "CacheUnit" to "MemUnit" just in case - // you want to run w/out caches? - CacheUnit *cache_res = - dynamic_cast(resPool->getResource(dataPortIdx)); - return cache_res->write(inst, data, size, addr, flags, write_res); + return resPool->getDataUnit()->write(inst, data, size, addr, flags, + write_res); }