From cbfbb7bc56630ddefb95625a6da87b3c1da9599d Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Wed, 2 Aug 2006 12:05:34 -0400 Subject: [PATCH] Updates to bring CPU portion of m5 up-to-date with newmem. --HG-- extra : convert_revision : 00e6eefb24e6ffd9c7c5d8165db26fbf6199fdc4 --- base/timebuf.hh | 5 + cpu/base_dyn_inst.cc | 35 ++---- cpu/base_dyn_inst.hh | 193 +++++++++++++---------------- cpu/o3/alpha_cpu_builder.cc | 33 ++--- cpu/o3/alpha_cpu_impl.hh | 2 +- cpu/o3/alpha_params.hh | 14 ++- cpu/o3/commit.hh | 13 -- cpu/o3/commit_impl.hh | 62 +++------ cpu/o3/cpu.cc | 66 ++++++++-- cpu/o3/cpu.hh | 6 + cpu/o3/decode_impl.hh | 9 +- cpu/o3/fetch.hh | 6 + cpu/o3/fetch_impl.hh | 19 ++- cpu/o3/iew.hh | 107 ++++++++++------ cpu/o3/iew_impl.hh | 157 +++++++++++------------ cpu/o3/inst_queue.hh | 2 - cpu/o3/inst_queue_impl.hh | 23 +--- cpu/o3/lsq.hh | 3 + cpu/o3/lsq_impl.hh | 10 ++ cpu/o3/lsq_unit.hh | 53 +++++--- cpu/o3/lsq_unit_impl.hh | 47 ++++++- cpu/o3/mem_dep_unit.cc | 2 + cpu/o3/mem_dep_unit_impl.hh | 14 +++ cpu/o3/regfile.hh | 16 ++- cpu/o3/rename_impl.hh | 4 +- cpu/o3/rob.hh | 2 +- cpu/o3/rob_impl.hh | 14 +-- cpu/ozone/cpu.hh | 45 ++++--- cpu/ozone/cpu_impl.hh | 53 ++++---- cpu/ozone/inorder_back_end_impl.hh | 2 +- cpu/ozone/inst_queue_impl.hh | 8 +- cpu/ozone/lw_back_end.hh | 78 ++++++------ cpu/ozone/lw_back_end_impl.hh | 138 +++++++++++---------- cpu/ozone/lw_lsq.hh | 2 +- cpu/ozone/thread_state.hh | 2 - cpu/thread_state.hh | 1 + python/m5/objects/AlphaFullCPU.py | 13 +- 37 files changed, 685 insertions(+), 574 deletions(-) diff --git a/base/timebuf.hh b/base/timebuf.hh index f6b5b2781..db34528d8 100644 --- a/base/timebuf.hh +++ b/base/timebuf.hh @@ -212,6 +212,11 @@ class TimeBuffer { return wire(this, 0); } + + int getSize() + { + return size; + } }; #endif // __BASE_TIMEBUF_HH__ diff --git a/cpu/base_dyn_inst.cc b/cpu/base_dyn_inst.cc index 64a995689..1a52279cc 100644 --- a/cpu/base_dyn_inst.cc +++ b/cpu/base_dyn_inst.cc @@ -100,32 +100,15 @@ BaseDynInst::initVars() readyRegs = 0; - completed = false; - resultReady = false; - canIssue = false; - issued = false; - executed = false; - canCommit = false; - committed = false; - squashed = false; - squashedInIQ = false; - squashedInLSQ = false; - squashedInROB = false; + instResult.integer = 0; + + status.reset(); + eaCalcDone = false; memOpDone = false; + lqIdx = -1; sqIdx = -1; - reachedCommit = false; - - blockingInst = false; - recoverInst = false; - - iqEntry = false; - robEntry = false; - - serializeBefore = false; - serializeAfter = false; - serializeHandled = false; // Eventually make this a parameter. threadNumber = 0; @@ -395,7 +378,7 @@ void BaseDynInst::markSrcRegReady() { if (++readyRegs == numSrcRegs()) { - canIssue = true; + status.set(CanIssue); } } @@ -403,13 +386,9 @@ template void BaseDynInst::markSrcRegReady(RegIndex src_idx) { - ++readyRegs; - _readySrcRegIdx[src_idx] = true; - if (readyRegs == numSrcRegs()) { - canIssue = true; - } + markSrcRegReady(); } template diff --git a/cpu/base_dyn_inst.hh b/cpu/base_dyn_inst.hh index 388ea4a8d..01f6be185 100644 --- a/cpu/base_dyn_inst.hh +++ b/cpu/base_dyn_inst.hh @@ -127,56 +127,34 @@ class BaseDynInst : public FastAlloc, public RefCounted /** The sequence number of the instruction. */ InstSeqNum seqNum; - /** Is the instruction in the IQ */ - bool iqEntry; - - /** Is the instruction in the ROB */ - bool robEntry; - - /** Is the instruction in the LSQ */ - bool lsqEntry; - - /** Is the instruction completed. */ - bool completed; - - /** Is the instruction's result ready. */ - bool resultReady; - - /** Can this instruction issue. */ - bool canIssue; - - /** Has this instruction issued. */ - bool issued; - - /** Has this instruction executed (or made it through execute) yet. */ - bool executed; - - /** Can this instruction commit. */ - bool canCommit; - - /** Is this instruction committed. */ - bool committed; - - /** Is this instruction squashed. */ - bool squashed; - - /** Is this instruction squashed in the instruction queue. */ - bool squashedInIQ; - - /** Is this instruction squashed in the instruction queue. */ - bool squashedInLSQ; - - /** Is this instruction squashed in the instruction queue. */ - bool squashedInROB; - - /** Is this a recover instruction. */ - bool recoverInst; - - /** Is this a thread blocking instruction. */ - bool blockingInst; /* this inst has called thread_block() */ + enum Status { + IqEntry, /// Instruction is in the IQ + RobEntry, /// Instruction is in the ROB + LsqEntry, /// Instruction is in the LSQ + Completed, /// Instruction has completed + ResultReady, /// Instruction has its result + CanIssue, /// Instruction can issue and execute + Issued, /// Instruction has issued + Executed, /// Instruction has executed + CanCommit, /// Instruction can commit + AtCommit, /// Instruction has reached commit + Committed, /// Instruction has committed + Squashed, /// Instruction is squashed + SquashedInIQ, /// Instruction is squashed in the IQ + SquashedInLSQ, /// Instruction is squashed in the LSQ + SquashedInROB, /// Instruction is squashed in the ROB + RecoverInst, /// Is a recover instruction + BlockingInst, /// Is a blocking instruction + ThreadsyncWait, /// Is a thread synchronization instruction + SerializeBefore, /// Needs to serialize on + /// instructions ahead of it + SerializeAfter, /// Needs to serialize instructions behind it + SerializeHandled, /// Serialization has been handled + NumStatus + }; - /** Is this a thread syncrhonization instruction. */ - bool threadsyncWait; + /** The status of this BaseDynInst. Several bits can be set. */ + std::bitset status; /** The thread this instruction is from. */ short threadNumber; @@ -351,9 +329,9 @@ class BaseDynInst : public FastAlloc, public RefCounted bool isThreadSync() const { return staticInst->isThreadSync(); } bool isSerializing() const { return staticInst->isSerializing(); } bool isSerializeBefore() const - { return staticInst->isSerializeBefore() || serializeBefore; } + { return staticInst->isSerializeBefore() || status[SerializeBefore]; } bool isSerializeAfter() const - { return staticInst->isSerializeAfter() || serializeAfter; } + { return staticInst->isSerializeAfter() || status[SerializeAfter]; } bool isMemBarrier() const { return staticInst->isMemBarrier(); } bool isWriteBarrier() const { return staticInst->isWriteBarrier(); } bool isNonSpeculative() const { return staticInst->isNonSpeculative(); } @@ -362,41 +340,32 @@ class BaseDynInst : public FastAlloc, public RefCounted bool isUnverifiable() const { return staticInst->isUnverifiable(); } /** Temporarily sets this instruction as a serialize before instruction. */ - void setSerializeBefore() { serializeBefore = true; } + void setSerializeBefore() { status.set(SerializeBefore); } /** Clears the serializeBefore part of this instruction. */ - void clearSerializeBefore() { serializeBefore = false; } + void clearSerializeBefore() { status.reset(SerializeBefore); } /** Checks if this serializeBefore is only temporarily set. */ - bool isTempSerializeBefore() { return serializeBefore; } - - /** Tracks if instruction has been externally set as serializeBefore. */ - bool serializeBefore; + bool isTempSerializeBefore() { return status[SerializeBefore]; } /** Temporarily sets this instruction as a serialize after instruction. */ - void setSerializeAfter() { serializeAfter = true; } + void setSerializeAfter() { status.set(SerializeAfter); } /** Clears the serializeAfter part of this instruction.*/ - void clearSerializeAfter() { serializeAfter = false; } + void clearSerializeAfter() { status.reset(SerializeAfter); } /** Checks if this serializeAfter is only temporarily set. */ - bool isTempSerializeAfter() { return serializeAfter; } + bool isTempSerializeAfter() { return status[SerializeAfter]; } - /** Tracks if instruction has been externally set as serializeAfter. */ - bool serializeAfter; + /** Sets the serialization part of this instruction as handled. */ + void setSerializeHandled() { status.set(SerializeHandled); } /** Checks if the serialization part of this instruction has been * handled. This does not apply to the temporary serializing * state; it only applies to this instruction's own permanent * serializing state. */ - bool isSerializeHandled() { return serializeHandled; } - - /** Sets the serialization part of this instruction as handled. */ - void setSerializeHandled() { serializeHandled = true; } - - /** Whether or not the serialization of this instruction has been handled. */ - bool serializeHandled; + bool isSerializeHandled() { return status[SerializeHandled]; } /** Returns the opclass of this instruction. */ OpClass opClass() const { return staticInst->opClass(); } @@ -463,106 +432,112 @@ class BaseDynInst : public FastAlloc, public RefCounted } /** Sets this instruction as completed. */ - void setCompleted() { completed = true; } + void setCompleted() { status.set(Completed); } /** Returns whether or not this instruction is completed. */ - bool isCompleted() const { return completed; } + bool isCompleted() const { return status[Completed]; } - void setResultReady() { resultReady = true; } + /** Marks the result as ready. */ + void setResultReady() { status.set(ResultReady); } - bool isResultReady() const { return resultReady; } + /** Returns whether or not the result is ready. */ + bool isResultReady() const { return status[ResultReady]; } /** Sets this instruction as ready to issue. */ - void setCanIssue() { canIssue = true; } + void setCanIssue() { status.set(CanIssue); } /** Returns whether or not this instruction is ready to issue. */ - bool readyToIssue() const { return canIssue; } + bool readyToIssue() const { return status[CanIssue]; } /** Sets this instruction as issued from the IQ. */ - void setIssued() { issued = true; } + void setIssued() { status.set(Issued); } /** Returns whether or not this instruction has issued. */ - bool isIssued() const { return issued; } + bool isIssued() const { return status[Issued]; } /** Sets this instruction as executed. */ - void setExecuted() { executed = true; } + void setExecuted() { status.set(Executed); } /** Returns whether or not this instruction has executed. */ - bool isExecuted() const { return executed; } + bool isExecuted() const { return status[Executed]; } /** Sets this instruction as ready to commit. */ - void setCanCommit() { canCommit = true; } + void setCanCommit() { status.set(CanCommit); } /** Clears this instruction as being ready to commit. */ - void clearCanCommit() { canCommit = false; } + void clearCanCommit() { status.reset(CanCommit); } /** Returns whether or not this instruction is ready to commit. */ - bool readyToCommit() const { return canCommit; } + bool readyToCommit() const { return status[CanCommit]; } + + void setAtCommit() { status.set(AtCommit); } + + bool isAtCommit() { return status[AtCommit]; } /** Sets this instruction as committed. */ - void setCommitted() { committed = true; } + void setCommitted() { status.set(Committed); } /** Returns whether or not this instruction is committed. */ - bool isCommitted() const { return committed; } + bool isCommitted() const { return status[Committed]; } /** Sets this instruction as squashed. */ - void setSquashed() { squashed = true; } + void setSquashed() { status.set(Squashed); } /** Returns whether or not this instruction is squashed. */ - bool isSquashed() const { return squashed; } + bool isSquashed() const { return status[Squashed]; } //Instruction Queue Entry //----------------------- /** Sets this instruction as a entry the IQ. */ - void setInIQ() { iqEntry = true; } + void setInIQ() { status.set(IqEntry); } /** Sets this instruction as a entry the IQ. */ - void removeInIQ() { iqEntry = false; } + void clearInIQ() { status.reset(IqEntry); } + + /** Returns whether or not this instruction has issued. */ + bool isInIQ() const { return status[IqEntry]; } /** Sets this instruction as squashed in the IQ. */ - void setSquashedInIQ() { squashedInIQ = true; squashed = true;} + void setSquashedInIQ() { status.set(SquashedInIQ); status.set(Squashed);} /** Returns whether or not this instruction is squashed in the IQ. */ - bool isSquashedInIQ() const { return squashedInIQ; } - - /** Returns whether or not this instruction has issued. */ - bool isInIQ() const { return iqEntry; } + bool isSquashedInIQ() const { return status[SquashedInIQ]; } //Load / Store Queue Functions //----------------------- /** Sets this instruction as a entry the LSQ. */ - void setInLSQ() { lsqEntry = true; } + void setInLSQ() { status.set(LsqEntry); } /** Sets this instruction as a entry the LSQ. */ - void removeInLSQ() { lsqEntry = false; } + void removeInLSQ() { status.reset(LsqEntry); } + + /** Returns whether or not this instruction is in the LSQ. */ + bool isInLSQ() const { return status[LsqEntry]; } /** Sets this instruction as squashed in the LSQ. */ - void setSquashedInLSQ() { squashedInLSQ = true;} + void setSquashedInLSQ() { status.set(SquashedInLSQ);} /** Returns whether or not this instruction is squashed in the LSQ. */ - bool isSquashedInLSQ() const { return squashedInLSQ; } - - /** Returns whether or not this instruction is in the LSQ. */ - bool isInLSQ() const { return lsqEntry; } + bool isSquashedInLSQ() const { return status[SquashedInLSQ]; } //Reorder Buffer Functions //----------------------- /** Sets this instruction as a entry the ROB. */ - void setInROB() { robEntry = true; } + void setInROB() { status.set(RobEntry); } /** Sets this instruction as a entry the ROB. */ - void removeInROB() { robEntry = false; } + void clearInROB() { status.reset(RobEntry); } + + /** Returns whether or not this instruction is in the ROB. */ + bool isInROB() const { return status[RobEntry]; } /** Sets this instruction as squashed in the ROB. */ - void setSquashedInROB() { squashedInROB = true; } + void setSquashedInROB() { status.set(SquashedInROB); } /** Returns whether or not this instruction is squashed in the ROB. */ - bool isSquashedInROB() const { return squashedInROB; } - - /** Returns whether or not this instruction is in the ROB. */ - bool isInROB() const { return robEntry; } + bool isSquashedInROB() const { return status[SquashedInROB]; } /** Read the PC of this instruction. */ const Addr readPC() const { return PC; } @@ -619,8 +594,6 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Store queue index. */ int16_t sqIdx; - bool reachedCommit; - /** Iterator pointing to this BaseDynInst in the list of all insts. */ ListIt instListIt; @@ -636,7 +609,7 @@ template inline Fault BaseDynInst::read(Addr addr, T &data, unsigned flags) { - if (executed) { + if (status[Executed]) { fault = cpu->read(req, data, lqIdx); return fault; } diff --git a/cpu/o3/alpha_cpu_builder.cc b/cpu/o3/alpha_cpu_builder.cc index 08d42cd46..c563fbef3 100644 --- a/cpu/o3/alpha_cpu_builder.cc +++ b/cpu/o3/alpha_cpu_builder.cc @@ -94,12 +94,10 @@ Param renameWidth; Param commitToIEWDelay; Param renameToIEWDelay; Param issueToExecuteDelay; +Param dispatchWidth; Param issueWidth; -Param executeWidth; -Param executeIntWidth; -Param executeFloatWidth; -Param executeBranchWidth; -Param executeMemoryWidth; +Param wbWidth; +Param wbDepth; SimObjectParam fuPool; Param iewToCommitDelay; @@ -109,6 +107,9 @@ Param squashWidth; Param trapLatency; Param fetchTrapLatency; +Param backComSize; +Param forwardComSize; + Param predType; Param localPredictorSize; Param localCtrBits; @@ -219,12 +220,10 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) "Issue/Execute/Writeback delay"), INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" "to the IEW stage)"), + INIT_PARAM(dispatchWidth, "Dispatch width"), INIT_PARAM(issueWidth, "Issue width"), - INIT_PARAM(executeWidth, "Execute width"), - INIT_PARAM(executeIntWidth, "Integer execute width"), - INIT_PARAM(executeFloatWidth, "Floating point execute width"), - INIT_PARAM(executeBranchWidth, "Branch execute width"), - INIT_PARAM(executeMemoryWidth, "Memory execute width"), + INIT_PARAM(wbWidth, "Writeback width"), + INIT_PARAM(wbDepth, "Writeback depth (number of cycles it can buffer)"), INIT_PARAM_DFLT(fuPool, "Functional unit pool", NULL), INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " @@ -235,6 +234,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6), INIT_PARAM_DFLT(fetchTrapLatency, "Number of cycles before the fetch trap is handled", 12), + INIT_PARAM(backComSize, "Time buffer size for backwards communication"), + INIT_PARAM(forwardComSize, "Time buffer size for forward communication"), + INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"), INIT_PARAM(localPredictorSize, "Size of local predictor"), INIT_PARAM(localCtrBits, "Bits per counter"), @@ -353,12 +355,10 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) params->commitToIEWDelay = commitToIEWDelay; params->renameToIEWDelay = renameToIEWDelay; params->issueToExecuteDelay = issueToExecuteDelay; + params->dispatchWidth = dispatchWidth; params->issueWidth = issueWidth; - params->executeWidth = executeWidth; - params->executeIntWidth = executeIntWidth; - params->executeFloatWidth = executeFloatWidth; - params->executeBranchWidth = executeBranchWidth; - params->executeMemoryWidth = executeMemoryWidth; + params->wbWidth = wbWidth; + params->wbDepth = wbDepth; params->fuPool = fuPool; params->iewToCommitDelay = iewToCommitDelay; @@ -368,6 +368,9 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) params->trapLatency = trapLatency; params->fetchTrapLatency = fetchTrapLatency; + params->backComSize = backComSize; + params->forwardComSize = forwardComSize; + params->predType = predType; params->localPredictorSize = localPredictorSize; params->localCtrBits = localCtrBits; diff --git a/cpu/o3/alpha_cpu_impl.hh b/cpu/o3/alpha_cpu_impl.hh index f39fdf6b6..1bf0652cd 100644 --- a/cpu/o3/alpha_cpu_impl.hh +++ b/cpu/o3/alpha_cpu_impl.hh @@ -383,7 +383,7 @@ AlphaFullCPU::AlphaXC::copyArchRegs(ExecContext *xc) } // Copy the misc regs. - cpu->regFile.miscRegs[tid].copyMiscRegs(xc); + TheISA::copyMiscRegs(xc, this); // Then finally set the PC and the next PC. cpu->setPC(xc->readPC(), tid); diff --git a/cpu/o3/alpha_params.hh b/cpu/o3/alpha_params.hh index f0836a9fd..4ab130d02 100644 --- a/cpu/o3/alpha_params.hh +++ b/cpu/o3/alpha_params.hh @@ -106,12 +106,10 @@ class AlphaSimpleParams : public BaseFullCPU::Params unsigned commitToIEWDelay; unsigned renameToIEWDelay; unsigned issueToExecuteDelay; + unsigned dispatchWidth; unsigned issueWidth; - unsigned executeWidth; - unsigned executeIntWidth; - unsigned executeFloatWidth; - unsigned executeBranchWidth; - unsigned executeMemoryWidth; + unsigned wbWidth; + unsigned wbDepth; FUPool *fuPool; // @@ -124,6 +122,12 @@ class AlphaSimpleParams : public BaseFullCPU::Params Tick trapLatency; Tick fetchTrapLatency; + // + // Timebuffer sizes + // + unsigned backComSize; + unsigned forwardComSize; + // // Branch predictor (BP, BTB, RAS) // diff --git a/cpu/o3/commit.hh b/cpu/o3/commit.hh index d93822394..b153effc4 100644 --- a/cpu/o3/commit.hh +++ b/cpu/o3/commit.hh @@ -160,10 +160,6 @@ class DefaultCommit /** Sets the pointer to the queue coming from IEW. */ void setIEWQueue(TimeBuffer *iq_ptr); - void setFetchStage(Fetch *fetch_stage); - - Fetch *fetchStage; - /** Sets the pointer to the IEW stage. */ void setIEWStage(IEW *iew_stage); @@ -367,11 +363,6 @@ class DefaultCommit */ unsigned renameWidth; - /** IEW width, in instructions. Used so ROB knows how many - * instructions to get from the IEW instruction queue. - */ - unsigned iewWidth; - /** Commit width, in instructions. */ unsigned commitWidth; @@ -392,10 +383,6 @@ class DefaultCommit */ Tick trapLatency; - Tick fetchTrapLatency; - - Tick fetchFaultTick; - /** The commit PC of each thread. Refers to the instruction that * is currently being processed/committed. */ diff --git a/cpu/o3/commit_impl.hh b/cpu/o3/commit_impl.hh index 798f30294..364e685c2 100644 --- a/cpu/o3/commit_impl.hh +++ b/cpu/o3/commit_impl.hh @@ -71,12 +71,10 @@ DefaultCommit::DefaultCommit(Params *params) renameToROBDelay(params->renameToROBDelay), fetchToCommitDelay(params->commitToFetchDelay), renameWidth(params->renameWidth), - iewWidth(params->executeWidth), commitWidth(params->commitWidth), numThreads(params->numberOfThreads), switchedOut(false), - trapLatency(params->trapLatency), - fetchTrapLatency(params->fetchTrapLatency) + trapLatency(params->trapLatency) { _status = Active; _nextStatus = Inactive; @@ -114,10 +112,8 @@ DefaultCommit::DefaultCommit(Params *params) changedROBNumEntries[i] = false; trapSquash[i] = false; xcSquash[i] = false; + PC[i] = nextPC[i] = 0; } - - fetchFaultTick = 0; - fetchTrapWait = 0; } template @@ -240,7 +236,6 @@ DefaultCommit::setCPU(FullCPU *cpu_ptr) cpu->activateStage(FullCPU::CommitIdx); trapLatency = cpu->cycles(trapLatency); - fetchTrapLatency = cpu->cycles(fetchTrapLatency); } template @@ -297,13 +292,6 @@ DefaultCommit::setIEWQueue(TimeBuffer *iq_ptr) fromIEW = iewQueue->getWire(-iewToCommitDelay); } -template -void -DefaultCommit::setFetchStage(Fetch *fetch_stage) -{ - fetchStage = fetch_stage; -} - template void DefaultCommit::setIEWStage(IEW *iew_stage) @@ -431,7 +419,7 @@ DefaultCommit::setNextStatus() } } - assert(squashes == squashCounter); + squashCounter = squashes; // If commit is currently squashing, then it will have activity for the // next cycle. Set its next status as active. @@ -536,8 +524,6 @@ DefaultCommit::squashFromTrap(unsigned tid) commitStatus[tid] = ROBSquashing; cpu->activityThisCycle(); - - ++squashCounter; } template @@ -555,8 +541,6 @@ DefaultCommit::squashFromXC(unsigned tid) cpu->activityThisCycle(); xcSquash[tid] = false; - - ++squashCounter; } template @@ -571,6 +555,9 @@ DefaultCommit::tick() return; } + if ((*activeThreads).size() <=0) + return; + list::iterator threads = (*activeThreads).begin(); // Check if any of the threads are done squashing. Change the @@ -582,10 +569,12 @@ DefaultCommit::tick() if (rob->isDoneSquashing(tid)) { commitStatus[tid] = Running; - --squashCounter; } else { DPRINTF(Commit,"[tid:%u]: Still Squashing, cannot commit any" - "insts this cycle.\n", tid); + " insts this cycle.\n", tid); + rob->doSquash(tid); + toIEW->commitInfo[tid].robSquashing = true; + wroteToTimeBuffer = true; } } } @@ -691,29 +680,7 @@ DefaultCommit::commit() while (threads != (*activeThreads).end()) { unsigned tid = *threads++; -/* - if (fromFetch->fetchFault && commitStatus[0] != TrapPending) { - // Record the fault. Wait until it's empty in the ROB. - // Then handle the trap. Ignore it if there's already a - // trap pending as fetch will be redirected. - fetchFault = fromFetch->fetchFault; - fetchFaultTick = curTick + fetchTrapLatency; - commitStatus[0] = FetchTrapPending; - DPRINTF(Commit, "Fault from fetch recorded. Will trap if the " - "ROB empties without squashing the fault.\n"); - fetchTrapWait = 0; - } - // Fetch may tell commit to clear the trap if it's been squashed. - if (fromFetch->clearFetchFault) { - DPRINTF(Commit, "Received clear fetch fault signal\n"); - fetchTrapWait = 0; - if (commitStatus[0] == FetchTrapPending) { - DPRINTF(Commit, "Clearing fault from fetch\n"); - commitStatus[0] = Running; - } - } -*/ // Not sure which one takes priority. I think if we have // both, that's a bad sign. if (trapSquash[tid] == true) { @@ -741,8 +708,6 @@ DefaultCommit::commit() commitStatus[tid] = ROBSquashing; - ++squashCounter; - // If we want to include the squashing instruction in the squash, // then use one older sequence number. InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid]; @@ -944,7 +909,7 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) // and committed this instruction. thread[tid]->funcExeInst--; - head_inst->reachedCommit = true; + head_inst->setAtCommit(); if (head_inst->isNonSpeculative() || head_inst->isStoreConditional() || @@ -1060,7 +1025,7 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) // Generate trap squash event. generateTrapEvent(tid); - +// warn("%lli fault (%d) handled @ PC %08p", curTick, inst_fault->name(), head_inst->readPC()); return false; #else // !FULL_SYSTEM panic("fault (%d) detected @ PC %08p", inst_fault, @@ -1083,6 +1048,9 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) head_inst->renamedDestRegIdx(i)); } + if (head_inst->isCopy()) + panic("Should not commit any copy instructions!"); + // Finally clear the head ROB entry. rob->retireHead(tid); diff --git a/cpu/o3/cpu.cc b/cpu/o3/cpu.cc index 8d72bdc41..f1571e61b 100644 --- a/cpu/o3/cpu.cc +++ b/cpu/o3/cpu.cc @@ -108,12 +108,14 @@ FullO3CPU::FullO3CPU(Params *params) // For now just have these time buffers be pretty big. // @todo: Make these time buffer sizes parameters or derived // from latencies - timeBuffer(5, 5), - fetchQueue(5, 5), - decodeQueue(5, 5), - renameQueue(5, 5), - iewQueue(5, 5), - activityRec(NumStages, 10, params->activity), + timeBuffer(params->backComSize, params->forwardComSize), + fetchQueue(params->backComSize, params->forwardComSize), + decodeQueue(params->backComSize, params->forwardComSize), + renameQueue(params->backComSize, params->forwardComSize), + iewQueue(params->backComSize, params->forwardComSize), + activityRec(NumStages, + params->backComSize + params->forwardComSize, + params->activity), globalSeqNum(1), @@ -180,7 +182,6 @@ FullO3CPU::FullO3CPU(Params *params) commit.setIEWQueue(&iewQueue); commit.setRenameQueue(&renameQueue); - commit.setFetchStage(&fetch); commit.setIEWStage(&iew); rename.setIEWStage(&iew); rename.setCommitStage(&commit); @@ -709,7 +710,7 @@ void FullO3CPU::takeOverFrom(BaseCPU *oldCPU) { // Flush out any old data from the time buffers. - for (int i = 0; i < 10; ++i) { + for (int i = 0; i < timeBuffer.getSize(); ++i) { timeBuffer.advance(); fetchQueue.advance(); decodeQueue.advance(); @@ -758,6 +759,46 @@ FullO3CPU::takeOverFrom(BaseCPU *oldCPU) tickEvent.schedule(curTick); } +/* +template +void +FullO3CPU::serialize(std::ostream &os) +{ + BaseCPU::serialize(os); + nameOut(os, csprintf("%s.tickEvent", name())); + tickEvent.serialize(os); + + // Use SimpleThread's ability to checkpoint to make it easier to + // write out the registers. Also make this static so it doesn't + // get instantiated multiple times (causes a panic in statistics). + static SimpleThread temp; + + for (int i = 0; i < thread.size(); i++) { + nameOut(os, csprintf("%s.xc.%i", name(), i)); + temp.copyXC(thread[i]->getXC()); + temp.serialize(os); + } +} + +template +void +FullO3CPU::unserialize(Checkpoint *cp, const std::string §ion) +{ + BaseCPU::unserialize(cp, section); + tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); + + // Use SimpleThread's ability to checkpoint to make it easier to + // read in the registers. Also make this static so it doesn't + // get instantiated multiple times (causes a panic in statistics). + static SimpleThread temp; + + for (int i = 0; i < thread.size(); i++) { + temp.copyXC(thread[i]->getXC()); + temp.unserialize(cp, csprintf("%s.xc.%i", section, i)); + thread[i]->getXC()->copyArchRegs(temp.getXC()); + } +} +*/ template uint64_t FullO3CPU::readIntReg(int reg_idx) @@ -866,7 +907,8 @@ template void FullO3CPU::setArchFloatRegSingle(int reg_idx, float val, unsigned tid) { - PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); + int idx = reg_idx + TheISA::FP_Base_DepTag; + PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); regFile.setFloatRegSingle(phys_reg, val); } @@ -875,7 +917,8 @@ template void FullO3CPU::setArchFloatRegDouble(int reg_idx, double val, unsigned tid) { - PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); + int idx = reg_idx + TheISA::FP_Base_DepTag; + PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); regFile.setFloatRegDouble(phys_reg, val); } @@ -884,7 +927,8 @@ template void FullO3CPU::setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid) { - PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); + int idx = reg_idx + TheISA::FP_Base_DepTag; + PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); regFile.setFloatRegInt(phys_reg, val); } diff --git a/cpu/o3/cpu.hh b/cpu/o3/cpu.hh index f4b19bfb3..ef5c9ae53 100644 --- a/cpu/o3/cpu.hh +++ b/cpu/o3/cpu.hh @@ -63,6 +63,12 @@ class BaseFullCPU : public BaseCPU void regStats(); + /** Sets this CPU's ID. */ + void setCpuId(int id) { cpu_id = id; } + + /** Reads this CPU's ID. */ + int readCpuId() { return cpu_id; } + protected: int cpu_id; }; diff --git a/cpu/o3/decode_impl.hh b/cpu/o3/decode_impl.hh index 0b686375e..e1af4d423 100644 --- a/cpu/o3/decode_impl.hh +++ b/cpu/o3/decode_impl.hh @@ -278,7 +278,7 @@ DefaultDecode::squash(DynInstPtr &inst, unsigned tid) toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum; toFetch->decodeInfo[tid].predIncorrect = true; toFetch->decodeInfo[tid].squash = true; - toFetch->decodeInfo[tid].nextPC = inst->readNextPC(); + toFetch->decodeInfo[tid].nextPC = inst->branchTarget(); toFetch->decodeInfo[tid].branchTaken = inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst)); @@ -294,7 +294,7 @@ DefaultDecode::squash(DynInstPtr &inst, unsigned tid) for (int i=0; isize; i++) { if (fromFetch->insts[i]->threadNumber == tid && fromFetch->insts[i]->seqNum > inst->seqNum) { - fromFetch->insts[i]->squashed = true; + fromFetch->insts[i]->setSquashed(); } } @@ -343,7 +343,7 @@ DefaultDecode::squash(unsigned tid) for (int i=0; isize; i++) { if (fromFetch->insts[i]->threadNumber == tid) { - fromFetch->insts[i]->squashed = true; + fromFetch->insts[i]->setSquashed(); squash_count++; } } @@ -721,9 +721,8 @@ DefaultDecode::decodeInsts(unsigned tid) // Go ahead and compute any PC-relative branches. if (inst->isDirectCtrl() && inst->isUncondCtrl()) { ++decodeBranchResolved; - inst->setNextPC(inst->branchTarget()); - if (inst->mispredicted()) { + if (inst->branchTarget() != inst->readPredTarg()) { ++decodeBranchMispred; // Might want to set some sort of boolean and just do diff --git a/cpu/o3/fetch.hh b/cpu/o3/fetch.hh index 92a87ab54..0bde56ce9 100644 --- a/cpu/o3/fetch.hh +++ b/cpu/o3/fetch.hh @@ -358,6 +358,12 @@ class DefaultFetch /** The cache line being fetched. */ uint8_t *cacheData[Impl::MaxThreads]; + /** The PC of the cacheline that has been loaded. */ + Addr cacheDataPC[Impl::MaxThreads]; + + /** Whether or not the cache data is valid. */ + bool cacheDataValid[Impl::MaxThreads]; + /** Size of instructions. */ int instSize; diff --git a/cpu/o3/fetch_impl.hh b/cpu/o3/fetch_impl.hh index a309bd49a..cc09c4a41 100644 --- a/cpu/o3/fetch_impl.hh +++ b/cpu/o3/fetch_impl.hh @@ -138,6 +138,8 @@ DefaultFetch::DefaultFetch(Params *params) // Create space to store a cache line. cacheData[tid] = new uint8_t[cacheBlkSize]; + cacheDataPC[tid] = 0; + cacheDataValid[tid] = false; stalls[tid].decode = 0; stalls[tid].rename = 0; @@ -334,6 +336,7 @@ DefaultFetch::processCacheCompletion(MemReqPtr &req) // Wake up the CPU (if it went to sleep and was waiting on this completion // event). cpu->wakeCPU(); + cacheDataValid[tid] = true; DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n", tid); @@ -466,7 +469,7 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid unsigned flags = 0; #endif // FULL_SYSTEM - if (interruptPending && flags == 0 || switchedOut) { + if (interruptPending && flags == 0) { // Hold off fetch from getting new instructions while an interrupt // is pending. return false; @@ -475,6 +478,11 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // Align the fetch PC so it's at the start of a cache block. fetch_PC = icacheBlockAlignPC(fetch_PC); + // If we've already got the block, no need to try to fetch it again. + if (cacheDataValid[tid] && fetch_PC == cacheDataPC[tid]) { + return true; + } + // Setup the memReq to do a read of the first instruction's address. // Set the appropriate read size and flags as well. memReq[tid] = new MemReq(); @@ -525,6 +533,9 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid MemAccessResult result = icacheInterface->access(memReq[tid]); + cacheDataPC[tid] = fetch_PC; + cacheDataValid[tid] = false; + fetchedCacheLines++; // If the cache missed, then schedule an event to wake @@ -1002,8 +1013,8 @@ DefaultFetch::fetch(bool &status_change) fetch_PC = next_PC; if (instruction->isQuiesce()) { - warn("%lli: Quiesce instruction encountered, halting fetch!", - curTick); +// warn("%lli: Quiesce instruction encountered, halting fetch!", +// curTick); fetchStatus[tid] = QuiescePending; ++numInst; status_change = true; @@ -1067,7 +1078,7 @@ DefaultFetch::fetch(bool &status_change) fetchStatus[tid] = TrapPending; status_change = true; - warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); +// warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); #else // !FULL_SYSTEM fatal("fault (%d) detected @ PC %08p", fault, PC[tid]); #endif // FULL_SYSTEM diff --git a/cpu/o3/iew.hh b/cpu/o3/iew.hh index eda6a6bc0..d21c573fe 100644 --- a/cpu/o3/iew.hh +++ b/cpu/o3/iew.hh @@ -224,6 +224,47 @@ class DefaultIEW /** Returns if the LSQ has any stores to writeback. */ bool hasStoresToWB() { return ldstQueue.hasStoresToWB(); } + void incrWb(InstSeqNum &sn) + { + if (++wbOutstanding == wbMax) + ableToIssue = false; + DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding); + assert(wbOutstanding <= wbMax); +#ifdef DEBUG + wbList.insert(sn); +#endif + } + + void decrWb(InstSeqNum &sn) + { + if (wbOutstanding-- == wbMax) + ableToIssue = true; + DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding); + assert(wbOutstanding >= 0); +#ifdef DEBUG + assert(wbList.find(sn) != wbList.end()); + wbList.erase(sn); +#endif + } + +#ifdef DEBUG + std::set wbList; + + void dumpWb() + { + std::set::iterator wb_it = wbList.begin(); + while (wb_it != wbList.end()) { + cprintf("[sn:%lli]\n", + (*wb_it)); + wb_it++; + } + } +#endif + + bool canIssue() { return ableToIssue; } + + bool ableToIssue; + private: /** Sends commit proper information for a squash due to a branch * mispredict. @@ -281,6 +322,9 @@ class DefaultIEW /** Processes inputs and changes state accordingly. */ void checkSignalsAndUpdate(unsigned tid); + /** Removes instructions from rename from a thread's instruction list. */ + void emptyRenameInsts(unsigned tid); + /** Sorts instructions coming from rename into lists separated by thread. */ void sortInsts(); @@ -401,20 +445,12 @@ class DefaultIEW */ unsigned issueToExecuteDelay; - /** Width of issue's read path, in instructions. The read path is both - * the skid buffer and the rename instruction queue. - * Note to self: is this really different than issueWidth? - */ - unsigned issueReadWidth; + /** Width of dispatch, in instructions. */ + unsigned dispatchWidth; /** Width of issue, in instructions. */ unsigned issueWidth; - /** Width of execute, in instructions. Might make more sense to break - * down into FP vs int. - */ - unsigned executeWidth; - /** Index into queue of instructions being written back. */ unsigned wbNumInst; @@ -425,6 +461,17 @@ class DefaultIEW */ unsigned wbCycle; + /** Number of instructions in flight that will writeback. */ + int wbOutstanding; + + /** Writeback width. */ + unsigned wbWidth; + + /** Writeback width * writeback depth, where writeback depth is + * the number of cycles of writing back instructions that can be + * buffered. */ + unsigned wbMax; + /** Number of active threads. */ unsigned numThreads; @@ -459,14 +506,6 @@ class DefaultIEW Stats::Scalar<> iewIQFullEvents; /** Stat for number of times the LSQ becomes full. */ Stats::Scalar<> iewLSQFullEvents; - /** Stat for total number of executed instructions. */ - Stats::Scalar<> iewExecutedInsts; - /** Stat for total number of executed load instructions. */ - Stats::Vector<> iewExecLoadInsts; - /** Stat for total number of executed store instructions. */ -// Stats::Scalar<> iewExecStoreInsts; - /** Stat for total number of squashed instructions skipped at execute. */ - Stats::Scalar<> iewExecSquashedInsts; /** Stat for total number of memory ordering violation events. */ Stats::Scalar<> memOrderViolationEvents; /** Stat for total number of incorrect predicted taken branches. */ @@ -476,28 +515,27 @@ class DefaultIEW /** Stat for total number of mispredicted branches detected at execute. */ Stats::Formula branchMispredicts; + /** Stat for total number of executed instructions. */ + Stats::Scalar<> iewExecutedInsts; + /** Stat for total number of executed load instructions. */ + Stats::Vector<> iewExecLoadInsts; + /** Stat for total number of executed store instructions. */ +// Stats::Scalar<> iewExecStoreInsts; + /** Stat for total number of squashed instructions skipped at execute. */ + Stats::Scalar<> iewExecSquashedInsts; /** Number of executed software prefetches. */ - Stats::Vector<> exeSwp; + Stats::Vector<> iewExecutedSwp; /** Number of executed nops. */ - Stats::Vector<> exeNop; + Stats::Vector<> iewExecutedNop; /** Number of executed meomory references. */ - Stats::Vector<> exeRefs; + Stats::Vector<> iewExecutedRefs; /** Number of executed branches. */ - Stats::Vector<> exeBranches; - -// Stats::Vector<> issued_ops; -/* - Stats::Vector<> stat_fu_busy; - Stats::Vector2d<> stat_fuBusy; - Stats::Vector<> dist_unissued; - Stats::Vector2d<> stat_issued_inst_type; -*/ - /** Number of instructions issued per cycle. */ - Stats::Formula issueRate; + Stats::Vector<> iewExecutedBranches; /** Number of executed store instructions. */ Stats::Formula iewExecStoreInsts; -// Stats::Formula issue_op_rate; -// Stats::Formula fu_busy_rate; + /** Number of instructions executed per cycle. */ + Stats::Formula iewExecRate; + /** Number of instructions sent to commit. */ Stats::Vector<> iewInstsToCommit; /** Number of instructions that writeback. */ @@ -510,7 +548,6 @@ class DefaultIEW * to resource contention. */ Stats::Vector<> wbPenalized; - /** Number of instructions per cycle written back. */ Stats::Formula wbRate; /** Average number of woken instructions per writeback. */ diff --git a/cpu/o3/iew_impl.hh b/cpu/o3/iew_impl.hh index 3ed20cb75..102be4f8d 100644 --- a/cpu/o3/iew_impl.hh +++ b/cpu/o3/iew_impl.hh @@ -56,9 +56,11 @@ DefaultIEW::LdWritebackEvent::process() //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); if (iewStage->isSwitchedOut()) { + iewStage->decrWb(inst->seqNum); inst = NULL; return; } else if (inst->isSquashed()) { + iewStage->decrWb(inst->seqNum); iewStage->wakeCPU(); inst = NULL; return; @@ -93,16 +95,17 @@ DefaultIEW::LdWritebackEvent::description() template DefaultIEW::DefaultIEW(Params *params) : // @todo: Make this into a parameter. - issueToExecQueue(5, 5), + issueToExecQueue(params->backComSize, params->forwardComSize), instQueue(params), ldstQueue(params), fuPool(params->fuPool), commitToIEWDelay(params->commitToIEWDelay), renameToIEWDelay(params->renameToIEWDelay), issueToExecuteDelay(params->issueToExecuteDelay), - issueReadWidth(params->issueWidth), + dispatchWidth(params->dispatchWidth), issueWidth(params->issueWidth), - executeWidth(params->executeWidth), + wbOutstanding(0), + wbWidth(params->wbWidth), numThreads(params->numberOfThreads), switchedOut(false) { @@ -125,8 +128,12 @@ DefaultIEW::DefaultIEW(Params *params) fetchRedirect[i] = false; } + wbMax = wbWidth * params->wbDepth; + updateLSQNextCycle = false; + ableToIssue = true; + skidBufferMax = (3 * (renameToIEWDelay * params->renameWidth)) + issueWidth; } @@ -144,6 +151,7 @@ DefaultIEW::regStats() using namespace Stats; instQueue.regStats(); + ldstQueue.regStats(); iewIdleCycles .name(name() + ".iewIdleCycles") @@ -189,20 +197,6 @@ DefaultIEW::regStats() .name(name() + ".iewLSQFullEvents") .desc("Number of times the LSQ has become full, causing a stall"); - iewExecutedInsts - .name(name() + ".iewExecutedInsts") - .desc("Number of executed instructions"); - - iewExecLoadInsts - .init(cpu->number_of_threads) - .name(name() + ".iewExecLoadInsts") - .desc("Number of load instructions executed") - .flags(total); - - iewExecSquashedInsts - .name(name() + ".iewExecSquashedInsts") - .desc("Number of squashed instructions skipped in execute"); - memOrderViolationEvents .name(name() + ".memOrderViolationEvents") .desc("Number of memory order violations"); @@ -221,47 +215,49 @@ DefaultIEW::regStats() branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect; - exeSwp + iewExecutedInsts + .name(name() + ".iewExecutedInsts") + .desc("Number of executed instructions"); + + iewExecLoadInsts + .init(cpu->number_of_threads) + .name(name() + ".iewExecLoadInsts") + .desc("Number of load instructions executed") + .flags(total); + + iewExecSquashedInsts + .name(name() + ".iewExecSquashedInsts") + .desc("Number of squashed instructions skipped in execute"); + + iewExecutedSwp .init(cpu->number_of_threads) .name(name() + ".EXEC:swp") .desc("number of swp insts executed") - .flags(total) - ; + .flags(total); - exeNop + iewExecutedNop .init(cpu->number_of_threads) .name(name() + ".EXEC:nop") .desc("number of nop insts executed") - .flags(total) - ; + .flags(total); - exeRefs + iewExecutedRefs .init(cpu->number_of_threads) .name(name() + ".EXEC:refs") .desc("number of memory reference insts executed") - .flags(total) - ; + .flags(total); - exeBranches + iewExecutedBranches .init(cpu->number_of_threads) .name(name() + ".EXEC:branches") .desc("Number of branches executed") - .flags(total) - ; - - issueRate - .name(name() + ".EXEC:rate") - .desc("Inst execution rate") - .flags(total) - ; - issueRate = iewExecutedInsts / cpu->numCycles; + .flags(total); iewExecStoreInsts .name(name() + ".EXEC:stores") .desc("Number of stores executed") - .flags(total) - ; - iewExecStoreInsts = exeRefs - iewExecLoadInsts; + .flags(total); + iewExecStoreInsts = iewExecutedRefs - iewExecLoadInsts; /* for (int i=0; i::regStats() .init(cpu->number_of_threads) .name(name() + ".WB:sent") .desc("cumulative count of insts sent to commit") - .flags(total) - ; + .flags(total); writebackCount .init(cpu->number_of_threads) .name(name() + ".WB:count") .desc("cumulative count of insts written-back") - .flags(total) - ; + .flags(total); producerInst .init(cpu->number_of_threads) .name(name() + ".WB:producers") .desc("num instructions producing a value") - .flags(total) - ; + .flags(total); consumerInst .init(cpu->number_of_threads) .name(name() + ".WB:consumers") .desc("num instructions consuming a value") - .flags(total) - ; + .flags(total); wbPenalized .init(cpu->number_of_threads) .name(name() + ".WB:penalized") .desc("number of instrctions required to write to 'other' IQ") - .flags(total) - ; + .flags(total); wbPenalizedRate .name(name() + ".WB:penalized_rate") .desc ("fraction of instructions written-back that wrote to 'other' IQ") - .flags(total) - ; + .flags(total); wbPenalizedRate = wbPenalized / writebackCount; wbFanout .name(name() + ".WB:fanout") .desc("average fanout of values written-back") - .flags(total) - ; + .flags(total); wbFanout = producerInst / consumerInst; wbRate .name(name() + ".WB:rate") .desc("insts written-back per cycle") - .flags(total) - ; + .flags(total); wbRate = writebackCount / cpu->numCycles; } @@ -481,8 +469,7 @@ DefaultIEW::takeOverFrom() updateLSQNextCycle = false; - // @todo: Fix hardcoded number - for (int i = 0; i < 6; ++i) { + for (int i = 0; i < issueToExecQueue.getSize(); ++i) { issueToExecQueue.advance(); } } @@ -515,16 +502,7 @@ DefaultIEW::squash(unsigned tid) skidBuffer[tid].pop(); } - while (!insts[tid].empty()) { - if (insts[tid].front()->isLoad() || - insts[tid].front()->isStore() ) { - toRename->iewInfo[tid].dispatchedToLSQ++; - } - - toRename->iewInfo[tid].dispatched++; - - insts[tid].pop(); - } + emptyRenameInsts(tid); } template @@ -650,14 +628,16 @@ DefaultIEW::instToCommit(DynInstPtr &inst) // free slot. while ((*iewQueue)[wbCycle].insts[wbNumInst]) { ++wbNumInst; - if (wbNumInst == issueWidth) { + if (wbNumInst == wbWidth) { ++wbCycle; wbNumInst = 0; } - assert(wbCycle < 5); + assert((wbCycle * wbWidth + wbNumInst) <= wbMax); } + DPRINTF(IEW, "Current wb cycle: %i, width: %i, numInst: %i\nwbActual:%i\n", + wbCycle, wbWidth, wbNumInst, wbCycle * wbWidth + wbNumInst); // Add finished instruction to queue to commit. (*iewQueue)[wbCycle].insts[wbNumInst] = inst; (*iewQueue)[wbCycle].size++; @@ -670,7 +650,7 @@ DefaultIEW::validInstsFromRename() unsigned inst_count = 0; for (int i=0; isize; i++) { - if (!fromRename->insts[i]->squashed) + if (!fromRename->insts[i]->isSquashed()) inst_count++; } @@ -858,10 +838,12 @@ DefaultIEW::checkSignalsAndUpdate(unsigned tid) } if (fromCommit->commitInfo[tid].robSquashing) { - DPRINTF(IEW, "[tid:%i]: ROB is still squashing.\n"); + DPRINTF(IEW, "[tid:%i]: ROB is still squashing.\n", tid); dispatchStatus[tid] = Squashing; + emptyRenameInsts(tid); + wroteToTimeBuffer = true; return; } @@ -910,6 +892,22 @@ DefaultIEW::sortInsts() } } +template +void +DefaultIEW::emptyRenameInsts(unsigned tid) +{ + while (!insts[tid].empty()) { + if (insts[tid].front()->isLoad() || + insts[tid].front()->isStore() ) { + toRename->iewInfo[tid].dispatchedToLSQ++; + } + + toRename->iewInfo[tid].dispatched++; + + insts[tid].pop(); + } +} + template void DefaultIEW::wakeCPU() @@ -1010,7 +1008,7 @@ DefaultIEW::dispatchInsts(unsigned tid) // Loop through the instructions, putting them in the instruction // queue. for ( ; dis_num_inst < insts_to_add && - dis_num_inst < issueReadWidth; + dis_num_inst < dispatchWidth; ++dis_num_inst) { inst = insts_to_dispatch.front(); @@ -1149,7 +1147,7 @@ DefaultIEW::dispatchInsts(unsigned tid) instQueue.recordProducer(inst); - exeNop[tid]++; + iewExecutedNop[tid]++; add_to_iq = false; } else if (inst->isExecuted()) { @@ -1263,6 +1261,7 @@ DefaultIEW::executeInsts() ++iewExecSquashedInsts; + decrWb(inst->seqNum); continue; } @@ -1399,8 +1398,8 @@ DefaultIEW::writebackInsts() DynInstPtr inst = toCommit->insts[inst_num]; int tid = inst->threadNumber; - DPRINTF(IEW, "Sending instructions to commit, PC %#x.\n", - inst->readPC()); + DPRINTF(IEW, "Sending instructions to commit, [sn:%lli] PC %#x.\n", + inst->seqNum, inst->readPC()); iewInstsToCommit[tid]++; @@ -1425,6 +1424,8 @@ DefaultIEW::writebackInsts() } writebackCount[tid]++; } + + decrWb(inst->seqNum); } } @@ -1561,7 +1562,7 @@ DefaultIEW::updateExeInstStats(DynInstPtr &inst) // #ifdef TARGET_ALPHA if (inst->isDataPrefetch()) - exeSwp[thread_number]++; + iewExecutedSwp[thread_number]++; else iewExecutedInsts++; #else @@ -1572,13 +1573,13 @@ DefaultIEW::updateExeInstStats(DynInstPtr &inst) // Control operations // if (inst->isControl()) - exeBranches[thread_number]++; + iewExecutedBranches[thread_number]++; // // Memory operations // if (inst->isMemRef()) { - exeRefs[thread_number]++; + iewExecutedRefs[thread_number]++; if (inst->isLoad()) { iewExecLoadInsts[thread_number]++; diff --git a/cpu/o3/inst_queue.hh b/cpu/o3/inst_queue.hh index 4802cbaf4..80cd71f0d 100644 --- a/cpu/o3/inst_queue.hh +++ b/cpu/o3/inst_queue.hh @@ -490,8 +490,6 @@ class InstructionQueue /** Number of instructions issued per cycle. */ Stats::Formula issueRate; -// Stats::Formula issue_stores; -// Stats::Formula issue_op_rate; /** Number of times the FU was busy. */ Stats::Vector<> fuBusy; /** Number of times the FU was busy per instruction issued. */ diff --git a/cpu/o3/inst_queue_impl.hh b/cpu/o3/inst_queue_impl.hh index d677a259c..72cb0d708 100644 --- a/cpu/o3/inst_queue_impl.hh +++ b/cpu/o3/inst_queue_impl.hh @@ -288,22 +288,7 @@ InstructionQueue::regStats() .flags(total) ; issueRate = iqInstsIssued / cpu->numCycles; -/* - issue_stores - .name(name() + ".ISSUE:stores") - .desc("Number of stores issued") - .flags(total) - ; - issue_stores = exe_refs - exe_loads; -*/ -/* - issue_op_rate - .name(name() + ".ISSUE:op_rate") - .desc("Operation issue rate") - .flags(total) - ; - issue_op_rate = issued_ops / numCycles; -*/ + statFuBusy .init(Num_OpClasses) .name(name() + ".ISSUE:fu_full") @@ -700,6 +685,7 @@ InstructionQueue::scheduleReadyInsts() int total_issued = 0; while (total_issued < totalWidth && + iewStage->canIssue() && order_it != order_end_it) { OpClass op_class = (*order_it).queueType; @@ -790,13 +776,14 @@ InstructionQueue::scheduleReadyInsts() // complete. ++freeEntries; count[tid]--; - issuing_inst->removeInIQ(); + issuing_inst->clearInIQ(); } else { memDepUnit[tid].issue(issuing_inst); } listOrder.erase(order_it++); statIssuedInstType[tid][op_class]++; + iewStage->incrWb(issuing_inst->seqNum); } else { statFuBusy[op_class]++; fuBusy[tid]++; @@ -1096,7 +1083,7 @@ InstructionQueue::doSquash(unsigned tid) // inst will flow through the rest of the pipeline. squashed_inst->setIssued(); squashed_inst->setCanCommit(); - squashed_inst->removeInIQ(); + squashed_inst->clearInIQ(); //Update Thread IQ Count count[squashed_inst->threadNumber]--; diff --git a/cpu/o3/lsq.hh b/cpu/o3/lsq.hh index b321d4590..c67225bc0 100644 --- a/cpu/o3/lsq.hh +++ b/cpu/o3/lsq.hh @@ -62,6 +62,9 @@ class LSQ { /** Returns the name of the LSQ. */ std::string name() const; + /** Registers the statistics for each LSQ Unit. */ + void regStats(); + /** Sets the pointer to the list of active threads. */ void setActiveThreads(std::list *at_ptr); /** Sets the CPU pointer. */ diff --git a/cpu/o3/lsq_impl.hh b/cpu/o3/lsq_impl.hh index a6ad27522..a8a55af1a 100644 --- a/cpu/o3/lsq_impl.hh +++ b/cpu/o3/lsq_impl.hh @@ -104,6 +104,16 @@ LSQ::name() const return iewStage->name() + ".lsq"; } +template +void +LSQ::regStats() +{ + //Initialize LSQs + for (int tid=0; tid < numThreads; tid++) { + thread[tid].regStats(); + } +} + template void LSQ::setActiveThreads(list *at_ptr) diff --git a/cpu/o3/lsq_unit.hh b/cpu/o3/lsq_unit.hh index a6afff743..fe174a97d 100644 --- a/cpu/o3/lsq_unit.hh +++ b/cpu/o3/lsq_unit.hh @@ -101,6 +101,9 @@ class LSQUnit { /** Returns the name of the LSQ unit. */ std::string name() const; + /** Registers statistics. */ + void regStats(); + /** Sets the CPU pointer. */ void setCPU(FullCPU *cpu_ptr) { cpu = cpu_ptr; } @@ -153,9 +156,6 @@ class LSQUnit { /** Writes back stores. */ void writebackStores(); - // @todo: Include stats in the LSQ unit. - //void regStats(); - /** Clears all the entries in the LQ. */ void clearLQ(); @@ -369,25 +369,34 @@ class LSQUnit { // Will also need how many read/write ports the Dcache has. Or keep track // of that in stage that is one level up, and only call executeLoad/Store // the appropriate number of times. -/* - // total number of loads forwaded from LSQ stores - Stats::Vector<> lsq_forw_loads; + /** Total number of loads forwaded from LSQ stores. */ + Stats::Scalar<> lsqForwLoads; + + /** Total number of loads ignored due to invalid addresses. */ + Stats::Scalar<> invAddrLoads; + + /** Total number of squashed loads. */ + Stats::Scalar<> lsqSquashedLoads; - // total number of loads ignored due to invalid addresses - Stats::Vector<> inv_addr_loads; + /** Total number of responses from the memory system that are + * ignored due to the instruction already being squashed. */ + Stats::Scalar<> lsqIgnoredResponses; - // total number of software prefetches ignored due to invalid addresses - Stats::Vector<> inv_addr_swpfs; + /** Total number of squashed stores. */ + Stats::Scalar<> lsqSquashedStores; - // total non-speculative bogus addresses seen (debug var) - Counter sim_invalid_addrs; - Stats::Vector<> fu_busy; //cumulative fu busy + /** Total number of software prefetches ignored due to invalid addresses. */ + Stats::Scalar<> invAddrSwpfs; - // ready loads blocked due to memory disambiguation - Stats::Vector<> lsq_blocked_loads; + /** Ready loads blocked due to partial store-forwarding. */ + Stats::Scalar<> lsqBlockedLoads; + + /** Number of loads that were rescheduled. */ + Stats::Scalar<> lsqRescheduledLoads; + + /** Number of times the LSQ is blocked due to the cache. */ + Stats::Scalar<> lsqCacheBlocked; - Stats::Scalar<> lsqInversion; -*/ public: /** Executes the load at the given index. */ template @@ -441,8 +450,9 @@ LSQUnit::read(MemReqPtr &req, T &data, int load_idx) // at the head of the LSQ and are ready to commit (at the head of the ROB // too). if (req->flags & UNCACHEABLE && - (load_idx != loadHead || !loadQueue[load_idx]->reachedCommit)) { + (load_idx != loadHead || !loadQueue[load_idx]->isAtCommit())) { iewStage->rescheduleMemInst(loadQueue[load_idx]); + ++lsqRescheduledLoads; return TheISA::genMachineCheckFault(); } @@ -552,6 +562,8 @@ LSQUnit::read(MemReqPtr &req, T &data, int load_idx) // Tell IQ/mem dep unit that this instruction will need to be // rescheduled eventually iewStage->rescheduleMemInst(loadQueue[load_idx]); + iewStage->decrWb(loadQueue[load_idx]->seqNum); + ++lsqRescheduledLoads; // Do not generate a writeback event as this instruction is not // complete. @@ -559,6 +571,7 @@ LSQUnit::read(MemReqPtr &req, T &data, int load_idx) "Store idx %i to load addr %#x\n", store_idx, req->vaddr); + ++lsqBlockedLoads; return NoFault; } } @@ -579,6 +592,10 @@ LSQUnit::read(MemReqPtr &req, T &data, int load_idx) // if we have a cache, do cache access too if (fault == NoFault && dcacheInterface) { if (dcacheInterface->isBlocked()) { + ++lsqCacheBlocked; + + iewStage->decrWb(inst->seqNum); + // There's an older load that's already going to squash. if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum) return NoFault; diff --git a/cpu/o3/lsq_unit_impl.hh b/cpu/o3/lsq_unit_impl.hh index 4ee8bb234..5cc3078f8 100644 --- a/cpu/o3/lsq_unit_impl.hh +++ b/cpu/o3/lsq_unit_impl.hh @@ -124,6 +124,47 @@ LSQUnit::name() const } } +template +void +LSQUnit::regStats() +{ + lsqForwLoads + .name(name() + ".forwLoads") + .desc("Number of loads that had data forwarded from stores"); + + invAddrLoads + .name(name() + ".invAddrLoads") + .desc("Number of loads ignored due to an invalid address"); + + lsqSquashedLoads + .name(name() + ".squashedLoads") + .desc("Number of loads squashed"); + + lsqIgnoredResponses + .name(name() + ".ignoredResponses") + .desc("Number of memory responses ignored because the instruction is squashed"); + + lsqSquashedStores + .name(name() + ".squashedStores") + .desc("Number of stores squashed"); + + invAddrSwpfs + .name(name() + ".invAddrSwpfs") + .desc("Number of software prefetches ignored due to an invalid address"); + + lsqBlockedLoads + .name(name() + ".blockedLoads") + .desc("Number of blocked loads due to partial load-store forwarding"); + + lsqRescheduledLoads + .name(name() + ".rescheduledLoads") + .desc("Number of loads that were rescheduled"); + + lsqCacheBlocked + .name(name() + ".cacheBlocked") + .desc("Number of times an access to memory failed due to the cache being blocked"); +} + template void LSQUnit::clearLQ() @@ -548,6 +589,7 @@ LSQUnit::writebackStores() if (dcacheInterface && dcacheInterface->isBlocked()) { DPRINTF(LSQUnit, "Unable to write back any more stores, cache" " is blocked!\n"); + ++lsqCacheBlocked; break; } @@ -705,7 +747,7 @@ LSQUnit::squash(const InstSeqNum &squashed_num) } // Clear the smart pointer to make sure it is decremented. - loadQueue[load_idx]->squashed = true; + loadQueue[load_idx]->setSquashed(); loadQueue[load_idx] = NULL; --loads; @@ -748,7 +790,7 @@ LSQUnit::squash(const InstSeqNum &squashed_num) } // Clear the smart pointer to make sure it is decremented. - storeQueue[store_idx].inst->squashed = true; + storeQueue[store_idx].inst->setSquashed(); storeQueue[store_idx].inst = NULL; storeQueue[store_idx].canWB = 0; @@ -765,6 +807,7 @@ LSQUnit::squash(const InstSeqNum &squashed_num) storeTail = store_idx; decrStIdx(store_idx); + ++lsqSquashedStores; } } diff --git a/cpu/o3/mem_dep_unit.cc b/cpu/o3/mem_dep_unit.cc index ccdd1a515..b0f91d44f 100644 --- a/cpu/o3/mem_dep_unit.cc +++ b/cpu/o3/mem_dep_unit.cc @@ -35,6 +35,7 @@ // AlphaSimpleImpl. template class MemDepUnit; +#ifdef DEBUG template <> int MemDepUnit::MemDepEntry::memdep_count = 0; @@ -44,3 +45,4 @@ MemDepUnit::MemDepEntry::memdep_insert = 0; template <> int MemDepUnit::MemDepEntry::memdep_erase = 0; +#endif diff --git a/cpu/o3/mem_dep_unit_impl.hh b/cpu/o3/mem_dep_unit_impl.hh index 595e9293f..bfe694bd8 100644 --- a/cpu/o3/mem_dep_unit_impl.hh +++ b/cpu/o3/mem_dep_unit_impl.hh @@ -59,7 +59,9 @@ MemDepUnit::~MemDepUnit() } } +#ifdef DEBUG assert(MemDepEntry::memdep_count == 0); +#endif } template @@ -141,7 +143,9 @@ MemDepUnit::insert(DynInstPtr &inst) // Add the MemDepEntry to the hash. memDepHash.insert( std::pair(inst->seqNum, inst_entry)); +#ifdef DEBUG MemDepEntry::memdep_insert++; +#endif instList[tid].push_back(inst); @@ -227,7 +231,9 @@ MemDepUnit::insertNonSpec(DynInstPtr &inst) // Insert the MemDepEntry into the hash. memDepHash.insert( std::pair(inst->seqNum, inst_entry)); +#ifdef DEBUG MemDepEntry::memdep_insert++; +#endif // Add the instruction to the list. instList[tid].push_back(inst); @@ -275,7 +281,9 @@ MemDepUnit::insertBarrier(DynInstPtr &barr_inst) // Add the MemDepEntry to the hash. memDepHash.insert( std::pair(barr_sn, inst_entry)); +#ifdef DEBUG MemDepEntry::memdep_insert++; +#endif // Add the instruction to the instruction list. instList[tid].push_back(barr_inst); @@ -375,7 +383,9 @@ MemDepUnit::completed(DynInstPtr &inst) (*hash_it).second = NULL; memDepHash.erase(hash_it); +#ifdef DEBUG MemDepEntry::memdep_erase++; +#endif } template @@ -470,7 +480,9 @@ MemDepUnit::squash(const InstSeqNum &squashed_num, (*hash_it).second = NULL; memDepHash.erase(hash_it); +#ifdef DEBUG MemDepEntry::memdep_erase++; +#endif instList[tid].erase(squash_it--); } @@ -551,5 +563,7 @@ MemDepUnit::dumpLists() cprintf("Memory dependence hash size: %i\n", memDepHash.size()); +#ifdef DEBUG cprintf("Memory dependence entries: %i\n", MemDepEntry::memdep_count); +#endif } diff --git a/cpu/o3/regfile.hh b/cpu/o3/regfile.hh index ed1238d36..76c43d3a1 100644 --- a/cpu/o3/regfile.hh +++ b/cpu/o3/regfile.hh @@ -223,10 +223,10 @@ class PhysRegFile public: /** (signed) integer register file. */ - std::vector intRegFile; + IntReg *intRegFile; /** Floating point register file. */ - std::vector floatRegFile; + FloatReg *floatRegFile; /** Miscellaneous register file. */ MiscRegFile miscRegs[Impl::MaxThreads]; @@ -256,11 +256,15 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, : numPhysicalIntRegs(_numPhysicalIntRegs), numPhysicalFloatRegs(_numPhysicalFloatRegs) { - intRegFile.resize(numPhysicalIntRegs); - floatRegFile.resize(numPhysicalFloatRegs); + intRegFile = new IntReg[numPhysicalIntRegs]; + floatRegFile = new FloatReg[numPhysicalFloatRegs]; - //memset(intRegFile, 0, sizeof(*intRegFile)); - //memset(floatRegFile, 0, sizeof(*floatRegFile)); + for (int i = 0; i < Impl::MaxThreads; ++i) { + miscRegs[i].clear(); + } + + memset(intRegFile, 0, sizeof(*intRegFile)); + memset(floatRegFile, 0, sizeof(*floatRegFile)); } #endif diff --git a/cpu/o3/rename_impl.hh b/cpu/o3/rename_impl.hh index 829c99584..93f5b3504 100644 --- a/cpu/o3/rename_impl.hh +++ b/cpu/o3/rename_impl.hh @@ -348,7 +348,7 @@ DefaultRename::squash(unsigned tid) for (int i=0; isize; i++) { if (fromDecode->insts[i]->threadNumber == tid) { - fromDecode->insts[i]->squashed = true; + fromDecode->insts[i]->setSquashed(); wroteToTimeBuffer = true; squashCount++; } @@ -1029,7 +1029,7 @@ DefaultRename::validInsts() unsigned inst_count = 0; for (int i=0; isize; i++) { - if (!fromDecode->insts[i]->squashed) + if (!fromDecode->insts[i]->isSquashed()) inst_count++; } diff --git a/cpu/o3/rob.hh b/cpu/o3/rob.hh index bdbdde32f..2043e0b34 100644 --- a/cpu/o3/rob.hh +++ b/cpu/o3/rob.hh @@ -305,7 +305,7 @@ class ROB private: /** The sequence number of the squashed instruction. */ - InstSeqNum squashedSeqNum; + InstSeqNum squashedSeqNum[Impl::MaxThreads]; /** Is the ROB done squashing. */ bool doneSquashing[Impl::MaxThreads]; diff --git a/cpu/o3/rob_impl.hh b/cpu/o3/rob_impl.hh index 25e0c80fd..62c4d9cf7 100644 --- a/cpu/o3/rob_impl.hh +++ b/cpu/o3/rob_impl.hh @@ -38,10 +38,10 @@ ROB::ROB(unsigned _numEntries, unsigned _squashWidth, : numEntries(_numEntries), squashWidth(_squashWidth), numInstsInROB(0), - squashedSeqNum(0), numThreads(_numThreads) { for (int tid=0; tid < numThreads; tid++) { + squashedSeqNum[tid] = 0; doneSquashing[tid] = true; threadEntries[tid] = 0; } @@ -274,7 +274,7 @@ ROB::retireHead(unsigned tid) --numInstsInROB; --threadEntries[tid]; - head_inst->removeInROB(); + head_inst->clearInROB(); head_inst->setCommitted(); instList[tid].erase(head_it); @@ -349,11 +349,11 @@ void ROB::doSquash(unsigned tid) { DPRINTF(ROB, "[tid:%u]: Squashing instructions until [sn:%i].\n", - tid, squashedSeqNum); + tid, squashedSeqNum[tid]); assert(squashIt[tid] != instList[tid].end()); - if ((*squashIt[tid])->seqNum < squashedSeqNum) { + if ((*squashIt[tid])->seqNum < squashedSeqNum[tid]) { DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n", tid); @@ -368,7 +368,7 @@ ROB::doSquash(unsigned tid) for (int numSquashed = 0; numSquashed < squashWidth && squashIt[tid] != instList[tid].end() && - (*squashIt[tid])->seqNum > squashedSeqNum; + (*squashIt[tid])->seqNum > squashedSeqNum[tid]; ++numSquashed) { DPRINTF(ROB, "[tid:%u]: Squashing instruction PC %#x, seq num %i.\n", @@ -405,7 +405,7 @@ ROB::doSquash(unsigned tid) // Check if ROB is done squashing. - if ((*squashIt[tid])->seqNum <= squashedSeqNum) { + if ((*squashIt[tid])->seqNum <= squashedSeqNum[tid]) { DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n", tid); @@ -517,7 +517,7 @@ ROB::squash(InstSeqNum squash_num,unsigned tid) doneSquashing[tid] = false; - squashedSeqNum = squash_num; + squashedSeqNum[tid] = squash_num; if (!instList[tid].empty()) { InstIt tail_thread = instList[tid].end(); diff --git a/cpu/ozone/cpu.hh b/cpu/ozone/cpu.hh index 5af2b02b2..c272528b1 100644 --- a/cpu/ozone/cpu.hh +++ b/cpu/ozone/cpu.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005 The Regents of The University of Michigan + * Copyright (c) 2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -79,13 +79,13 @@ template class Checker; /** - * Declaration of Out-of-Order CPU class. Basically it is a SimpleCPU with - * simple out-of-order capabilities added to it. It is still a 1 CPI machine - * (?), but is capable of handling cache misses. Basically it models having - * a ROB/IQ by only allowing a certain amount of instructions to execute while - * the cache miss is outstanding. + * Light weight out of order CPU model that approximates an out of + * order CPU. It is separated into a front end and a back end, with + * the template parameter Impl describing the classes used for each. + * The goal is to be able to specify through the Impl the class to use + * for the front end and back end, with different classes used to + * model different levels of detail. */ - template class OzoneCPU : public BaseCPU { @@ -98,6 +98,11 @@ class OzoneCPU : public BaseCPU typedef TheISA::MiscReg MiscReg; public: + /** + * The ExecContext for this CPU, which is used to provide the + * CPU's interface to any external objects. Internally most of + * the CPU state is stored within the OzoneThreadState class. + */ class OzoneXC : public ExecContext { public: OzoneCPU *cpu; @@ -235,14 +240,19 @@ class OzoneCPU : public BaseCPU #endif }; - // execution context proxy + // ExecContext for OzoneCPU OzoneXC ozoneXC; + + // ExecContext pointer that will be given to any external objects. ExecContext *xcProxy; + + // ExecContext pointer to the CheckerCPU's ExecContext. ExecContext *checkerXC; typedef OzoneThreadState ImplState; private: + // Committed thread state for the OzoneCPU. OzoneThreadState thread; public: @@ -280,12 +290,6 @@ class OzoneCPU : public BaseCPU tickEvent.squash(); } - private: - Trace::InstRecord *traceData; - - template - void trace_data(T data); - public: enum Status { Running, @@ -361,6 +365,7 @@ class OzoneCPU : public BaseCPU FrontEnd *frontEnd; BackEnd *backEnd; + private: Status status() const { return _status; } void setStatus(Status new_status) { _status = new_status; } @@ -392,12 +397,11 @@ class OzoneCPU : public BaseCPU // number of idle cycles Stats::Average<> notIdleFraction; Stats::Formula idleFraction; - public: + public: virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); - #if FULL_SYSTEM bool validInstAddr(Addr addr) { return true; } bool validDataAddr(Addr addr) { return true; } @@ -585,12 +589,9 @@ class OzoneCPU : public BaseCPU Fault copy(Addr dest); - InstSeqNum globalSeqNum; - public: void squashFromXC(); - // @todo: This can be a useful debug function. Implement it. void dumpInsts() { frontEnd->dumpInsts(); } #if FULL_SYSTEM @@ -608,7 +609,6 @@ class OzoneCPU : public BaseCPU ExecContext *xcBase() { return xcProxy; } - bool decoupledFrontEnd; struct CommStruct { InstSeqNum doneSeqNum; InstSeqNum nonSpecSeqNum; @@ -617,8 +617,13 @@ class OzoneCPU : public BaseCPU bool stall; }; + + InstSeqNum globalSeqNum; + TimeBuffer comm; + bool decoupledFrontEnd; + bool lockFlag; Stats::Scalar<> quiesceCycles; diff --git a/cpu/ozone/cpu_impl.hh b/cpu/ozone/cpu_impl.hh index 5675da3a8..4f41f220a 100644 --- a/cpu/ozone/cpu_impl.hh +++ b/cpu/ozone/cpu_impl.hh @@ -26,9 +26,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -//#include -//#include - #include "arch/isa_traits.hh" // For MachInst #include "base/trace.hh" #include "config/full_system.hh" @@ -39,7 +36,6 @@ #include "cpu/ozone/cpu.hh" #include "cpu/quiesce_event.hh" #include "cpu/static_inst.hh" -//#include "mem/base_mem.hh" #include "mem/mem_interface.hh" #include "sim/sim_object.hh" #include "sim/stats.hh" @@ -50,7 +46,6 @@ #include "arch/alpha/tlb.hh" #include "arch/vtophys.hh" #include "base/callback.hh" -//#include "base/remote_gdb.hh" #include "cpu/profile.hh" #include "kern/kernel_stats.hh" #include "mem/functional/memory_control.hh" @@ -66,15 +61,6 @@ using namespace TheISA; -template -template -void -OzoneCPU::trace_data(T data) { - if (traceData) { - traceData->setData(data); - } -} - template OzoneCPU::TickEvent::TickEvent(OzoneCPU *c, int w) : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c), width(w) @@ -104,7 +90,7 @@ OzoneCPU::OzoneCPU(Params *p) : BaseCPU(p), thread(this, 0, p->workload[0], 0), tickEvent(this, p->width), mem(p->workload[0]->getMemory()), #endif - comm(5, 5) + comm(5, 5), decoupledFrontEnd(p->decoupledFrontEnd) { frontEnd = new FrontEnd(p); backEnd = new BackEnd(p); @@ -112,6 +98,9 @@ OzoneCPU::OzoneCPU(Params *p) _status = Idle; if (p->checker) { + // If checker is being used, get the checker from the params + // pointer, make the Checker's ExecContext, and setup the + // xcProxy to point to it. BaseCPU *temp_checker = p->checker; checker = dynamic_cast *>(temp_checker); checker->setMemory(mem); @@ -122,11 +111,17 @@ OzoneCPU::OzoneCPU(Params *p) thread.xcProxy = checkerXC; xcProxy = checkerXC; } else { + // If checker is not being used, then the xcProxy points + // directly to the CPU's ExecContext. checker = NULL; thread.xcProxy = &ozoneXC; xcProxy = &ozoneXC; } + // Add xcProxy to CPU list of ExecContexts. + execContexts.push_back(xcProxy); + + // Give the OzoneXC pointers to the CPU and the thread state. ozoneXC.cpu = this; ozoneXC.thread = &thread; @@ -134,7 +129,7 @@ OzoneCPU::OzoneCPU(Params *p) thread.setStatus(ExecContext::Suspended); #if FULL_SYSTEM - /***** All thread state stuff *****/ + // Setup thread state stuff. thread.cpu = this; thread.tid = 0; thread.mem = p->mem; @@ -171,8 +166,7 @@ OzoneCPU::OzoneCPU(Params *p) numInst = 0; startNumInst = 0; - execContexts.push_back(xcProxy); - + // Give pointers to the front and back end to all things they may need. frontEnd->setCPU(this); backEnd->setCPU(this); @@ -188,12 +182,13 @@ OzoneCPU::OzoneCPU(Params *p) frontEnd->setBackEnd(backEnd); backEnd->setFrontEnd(frontEnd); - decoupledFrontEnd = p->decoupledFrontEnd; - globalSeqNum = 1; checkInterrupts = false; + lockFlag = 0; + + // Setup rename table, initializing all values to ready. for (int i = 0; i < TheISA::TotalNumRegs; ++i) { thread.renameTable[i] = new DynInst(this); thread.renameTable[i]->setResultReady(); @@ -206,8 +201,6 @@ OzoneCPU::OzoneCPU(Params *p) // pTable = p->pTable; #endif - lockFlag = 0; - DPRINTF(OzoneCPU, "OzoneCPU: Created Ozone cpu object.\n"); } @@ -231,14 +224,20 @@ template void OzoneCPU::signalSwitched() { + // Only complete the switchout when both the front end and back + // end have signalled they are ready to switch. if (++switchCount == 2) { backEnd->doSwitchOut(); frontEnd->doSwitchOut(); + if (checker) checker->switchOut(sampler); + _status = SwitchedOut; + if (tickEvent.scheduled()) tickEvent.squash(); + sampler->signalSwitched(); } assert(switchCount <= 2); @@ -793,6 +792,7 @@ OzoneCPU::OzoneXC::takeOverFrom(ExecContext *old_context) thread->quiesceEvent->xc = this; } + // Copy kernel stats pointer from old context. thread->kernelStats = old_context->getKernelStats(); // storeCondFailures = 0; cpu->lockFlag = false; @@ -814,7 +814,11 @@ OzoneCPU::OzoneXC::regStats(const std::string &name) template void OzoneCPU::OzoneXC::serialize(std::ostream &os) -{ } +{ + // Once serialization is added, serialize the quiesce event and + // kernel stats. Will need to make sure there aren't multiple + // things that serialize them. +} template void @@ -867,7 +871,6 @@ OzoneCPU::OzoneXC::getThreadNum() return thread->tid; } -// Also somewhat obnoxious. Really only used for the TLB fault. template TheISA::MachInst OzoneCPU::OzoneXC::getInst() @@ -901,7 +904,7 @@ OzoneCPU::OzoneXC::copyArchRegs(ExecContext *xc) // Need to copy the XC values into the current rename table, // copy the misc regs. - thread->regs.miscRegs.copyMiscRegs(xc); + TheISA::copyMiscRegs(xc, this); } template diff --git a/cpu/ozone/inorder_back_end_impl.hh b/cpu/ozone/inorder_back_end_impl.hh index 5a378ec76..cc92ec92f 100644 --- a/cpu/ozone/inorder_back_end_impl.hh +++ b/cpu/ozone/inorder_back_end_impl.hh @@ -257,7 +257,7 @@ InorderBackEnd::executeInsts() } inst->setExecuted(); - inst->setCompleted(); + inst->setResultReady(); inst->setCanCommit(); instList.pop_front(); diff --git a/cpu/ozone/inst_queue_impl.hh b/cpu/ozone/inst_queue_impl.hh index 0523c68d6..1b9fcdc84 100644 --- a/cpu/ozone/inst_queue_impl.hh +++ b/cpu/ozone/inst_queue_impl.hh @@ -848,13 +848,13 @@ template void InstQueue::addReadyMemInst(DynInstPtr &ready_inst) { - OpClass op_class = ready_inst->opClass(); +// OpClass op_class = ready_inst->opClass(); readyInsts.push(ready_inst); DPRINTF(IQ, "Instruction is ready to issue, putting it onto " "the ready list, PC %#x opclass:%i [sn:%lli].\n", - ready_inst->readPC(), op_class, ready_inst->seqNum); + ready_inst->readPC(), ready_inst->opClass(), ready_inst->seqNum); } /* template @@ -1175,11 +1175,11 @@ InstQueue::addIfReady(DynInstPtr &inst) return; } - OpClass op_class = inst->opClass(); +// OpClass op_class = inst->opClass(); DPRINTF(IQ, "Instruction is ready to issue, putting it onto " "the ready list, PC %#x opclass:%i [sn:%lli].\n", - inst->readPC(), op_class, inst->seqNum); + inst->readPC(), inst->opClass(), inst->seqNum); readyInsts.push(inst); } diff --git a/cpu/ozone/lw_back_end.hh b/cpu/ozone/lw_back_end.hh index 1c03ffb73..19f2b2b61 100644 --- a/cpu/ozone/lw_back_end.hh +++ b/cpu/ozone/lw_back_end.hh @@ -369,37 +369,37 @@ class LWBackEnd /* Stats::Scalar<> dcacheStallCycles; Counter lastDcacheStall; */ - Stats::Vector<> rob_cap_events; - Stats::Vector<> rob_cap_inst_count; - Stats::Vector<> iq_cap_events; - Stats::Vector<> iq_cap_inst_count; + Stats::Vector<> robCapEvents; + Stats::Vector<> robCapInstCount; + Stats::Vector<> iqCapEvents; + Stats::Vector<> iqCapInstCount; // total number of instructions executed - Stats::Vector<> exe_inst; - Stats::Vector<> exe_swp; - Stats::Vector<> exe_nop; - Stats::Vector<> exe_refs; - Stats::Vector<> exe_loads; - Stats::Vector<> exe_branches; + Stats::Vector<> exeInst; + Stats::Vector<> exeSwp; + Stats::Vector<> exeNop; + Stats::Vector<> exeRefs; + Stats::Vector<> exeLoads; + Stats::Vector<> exeBranches; - Stats::Vector<> issued_ops; + Stats::Vector<> issuedOps; // total number of loads forwaded from LSQ stores - Stats::Vector<> lsq_forw_loads; + Stats::Vector<> lsqForwLoads; // total number of loads ignored due to invalid addresses - Stats::Vector<> inv_addr_loads; + Stats::Vector<> invAddrLoads; // total number of software prefetches ignored due to invalid addresses - Stats::Vector<> inv_addr_swpfs; + Stats::Vector<> invAddrSwpfs; // ready loads blocked due to memory disambiguation - Stats::Vector<> lsq_blocked_loads; + Stats::Vector<> lsqBlockedLoads; Stats::Scalar<> lsqInversion; - Stats::Vector<> n_issued_dist; - Stats::VectorDistribution<> issue_delay_dist; + Stats::Vector<> nIssuedDist; + Stats::VectorDistribution<> issueDelayDist; - Stats::VectorDistribution<> queue_res_dist; + Stats::VectorDistribution<> queueResDist; /* Stats::Vector<> stat_fu_busy; Stats::Vector2d<> stat_fuBusy; @@ -417,37 +417,37 @@ class LWBackEnd Stats::Formula commit_ipb; Stats::Formula lsq_inv_rate; */ - Stats::Vector<> writeback_count; - Stats::Vector<> producer_inst; - Stats::Vector<> consumer_inst; - Stats::Vector<> wb_penalized; + Stats::Vector<> writebackCount; + Stats::Vector<> producerInst; + Stats::Vector<> consumerInst; + Stats::Vector<> wbPenalized; - Stats::Formula wb_rate; - Stats::Formula wb_fanout; - Stats::Formula wb_penalized_rate; + Stats::Formula wbRate; + Stats::Formula wbFanout; + Stats::Formula wbPenalizedRate; // total number of instructions committed - Stats::Vector<> stat_com_inst; - Stats::Vector<> stat_com_swp; - Stats::Vector<> stat_com_refs; - Stats::Vector<> stat_com_loads; - Stats::Vector<> stat_com_membars; - Stats::Vector<> stat_com_branches; + Stats::Vector<> statComInst; + Stats::Vector<> statComSwp; + Stats::Vector<> statComRefs; + Stats::Vector<> statComLoads; + Stats::Vector<> statComMembars; + Stats::Vector<> statComBranches; - Stats::Distribution<> n_committed_dist; + Stats::Distribution<> nCommittedDist; - Stats::Scalar<> commit_eligible_samples; - Stats::Vector<> commit_eligible; + Stats::Scalar<> commitEligibleSamples; + Stats::Vector<> commitEligible; Stats::Vector<> squashedInsts; Stats::Vector<> ROBSquashedInsts; - Stats::Scalar<> ROB_fcount; - Stats::Formula ROB_full_rate; + Stats::Scalar<> ROBFcount; + Stats::Formula ROBFullRate; - Stats::Vector<> ROB_count; // cumulative ROB occupancy - Stats::Formula ROB_occ_rate; - Stats::VectorDistribution<> ROB_occ_dist; + Stats::Vector<> ROBCount; // cumulative ROB occupancy + Stats::Formula ROBOccRate; + Stats::VectorDistribution<> ROBOccDist; public: void dumpInsts(); diff --git a/cpu/ozone/lw_back_end_impl.hh b/cpu/ozone/lw_back_end_impl.hh index 41b4ea24b..18b2e8f47 100644 --- a/cpu/ozone/lw_back_end_impl.hh +++ b/cpu/ozone/lw_back_end_impl.hh @@ -251,78 +251,77 @@ void LWBackEnd::regStats() { using namespace Stats; - rob_cap_events + robCapEvents .init(cpu->number_of_threads) .name(name() + ".ROB:cap_events") .desc("number of cycles where ROB cap was active") .flags(total) ; - rob_cap_inst_count + robCapInstCount .init(cpu->number_of_threads) .name(name() + ".ROB:cap_inst") .desc("number of instructions held up by ROB cap") .flags(total) ; - iq_cap_events + iqCapEvents .init(cpu->number_of_threads) .name(name() +".IQ:cap_events" ) .desc("number of cycles where IQ cap was active") .flags(total) ; - iq_cap_inst_count + iqCapInstCount .init(cpu->number_of_threads) .name(name() + ".IQ:cap_inst") .desc("number of instructions held up by IQ cap") .flags(total) ; - - exe_inst + exeInst .init(cpu->number_of_threads) .name(name() + ".ISSUE:count") .desc("number of insts issued") .flags(total) ; - exe_swp + exeSwp .init(cpu->number_of_threads) .name(name() + ".ISSUE:swp") .desc("number of swp insts issued") .flags(total) ; - exe_nop + exeNop .init(cpu->number_of_threads) .name(name() + ".ISSUE:nop") .desc("number of nop insts issued") .flags(total) ; - exe_refs + exeRefs .init(cpu->number_of_threads) .name(name() + ".ISSUE:refs") .desc("number of memory reference insts issued") .flags(total) ; - exe_loads + exeLoads .init(cpu->number_of_threads) .name(name() + ".ISSUE:loads") .desc("number of load insts issued") .flags(total) ; - exe_branches + exeBranches .init(cpu->number_of_threads) .name(name() + ".ISSUE:branches") .desc("Number of branches issued") .flags(total) ; - issued_ops + issuedOps .init(cpu->number_of_threads) .name(name() + ".ISSUE:op_count") .desc("number of insts issued") @@ -339,28 +338,28 @@ LWBackEnd::regStats() // // Other stats // - lsq_forw_loads + lsqForwLoads .init(cpu->number_of_threads) .name(name() + ".LSQ:forw_loads") .desc("number of loads forwarded via LSQ") .flags(total) ; - inv_addr_loads + invAddrLoads .init(cpu->number_of_threads) .name(name() + ".ISSUE:addr_loads") .desc("number of invalid-address loads") .flags(total) ; - inv_addr_swpfs + invAddrSwpfs .init(cpu->number_of_threads) .name(name() + ".ISSUE:addr_swpfs") .desc("number of invalid-address SW prefetches") .flags(total) ; - lsq_blocked_loads + lsqBlockedLoads .init(cpu->number_of_threads) .name(name() + ".LSQ:blocked_loads") .desc("number of ready loads not issued due to memory disambiguation") @@ -372,51 +371,51 @@ LWBackEnd::regStats() .desc("Number of times LSQ instruction issued early") ; - n_issued_dist + nIssuedDist .init(issueWidth + 1) .name(name() + ".ISSUE:issued_per_cycle") .desc("Number of insts issued each cycle") .flags(total | pdf | dist) ; - issue_delay_dist + issueDelayDist .init(Num_OpClasses,0,99,2) .name(name() + ".ISSUE:") .desc("cycles from operands ready to issue") .flags(pdf | cdf) ; - queue_res_dist + queueResDist .init(Num_OpClasses, 0, 99, 2) .name(name() + ".IQ:residence:") .desc("cycles from dispatch to issue") .flags(total | pdf | cdf ) ; for (int i = 0; i < Num_OpClasses; ++i) { - queue_res_dist.subname(i, opClassStrings[i]); + queueResDist.subname(i, opClassStrings[i]); } - writeback_count + writebackCount .init(cpu->number_of_threads) .name(name() + ".WB:count") .desc("cumulative count of insts written-back") .flags(total) ; - producer_inst + producerInst .init(cpu->number_of_threads) .name(name() + ".WB:producers") .desc("num instructions producing a value") .flags(total) ; - consumer_inst + consumerInst .init(cpu->number_of_threads) .name(name() + ".WB:consumers") .desc("num instructions consuming a value") .flags(total) ; - wb_penalized + wbPenalized .init(cpu->number_of_threads) .name(name() + ".WB:penalized") .desc("number of instrctions required to write to 'other' IQ") @@ -424,71 +423,71 @@ LWBackEnd::regStats() ; - wb_penalized_rate + wbPenalizedRate .name(name() + ".WB:penalized_rate") .desc ("fraction of instructions written-back that wrote to 'other' IQ") .flags(total) ; - wb_penalized_rate = wb_penalized / writeback_count; + wbPenalizedRate = wbPenalized / writebackCount; - wb_fanout + wbFanout .name(name() + ".WB:fanout") .desc("average fanout of values written-back") .flags(total) ; - wb_fanout = producer_inst / consumer_inst; + wbFanout = producerInst / consumerInst; - wb_rate + wbRate .name(name() + ".WB:rate") .desc("insts written-back per cycle") .flags(total) ; - wb_rate = writeback_count / cpu->numCycles; + wbRate = writebackCount / cpu->numCycles; - stat_com_inst + statComInst .init(cpu->number_of_threads) .name(name() + ".COM:count") .desc("Number of instructions committed") .flags(total) ; - stat_com_swp + statComSwp .init(cpu->number_of_threads) .name(name() + ".COM:swp_count") .desc("Number of s/w prefetches committed") .flags(total) ; - stat_com_refs + statComRefs .init(cpu->number_of_threads) .name(name() + ".COM:refs") .desc("Number of memory references committed") .flags(total) ; - stat_com_loads + statComLoads .init(cpu->number_of_threads) .name(name() + ".COM:loads") .desc("Number of loads committed") .flags(total) ; - stat_com_membars + statComMembars .init(cpu->number_of_threads) .name(name() + ".COM:membars") .desc("Number of memory barriers committed") .flags(total) ; - stat_com_branches + statComBranches .init(cpu->number_of_threads) .name(name() + ".COM:branches") .desc("Number of branches committed") .flags(total) ; - n_committed_dist + nCommittedDist .init(0,commitWidth,1) .name(name() + ".COM:committed_per_cycle") .desc("Number of insts commited each cycle") @@ -508,14 +507,14 @@ LWBackEnd::regStats() // -> The standard deviation is computed only over cycles where // we reached the BW limit // - commit_eligible + commitEligible .init(cpu->number_of_threads) .name(name() + ".COM:bw_limited") .desc("number of insts not committed due to BW limits") .flags(total) ; - commit_eligible_samples + commitEligibleSamples .name(name() + ".COM:bw_lim_events") .desc("number cycles where commit BW limit reached") ; @@ -532,32 +531,32 @@ LWBackEnd::regStats() .desc("Number of instructions removed from inst list when they reached the head of the ROB") ; - ROB_fcount + ROBFcount .name(name() + ".ROB:full_count") .desc("number of cycles where ROB was full") ; - ROB_count + ROBCount .init(cpu->number_of_threads) .name(name() + ".ROB:occupancy") .desc(name() + ".ROB occupancy (cumulative)") .flags(total) ; - ROB_full_rate + ROBFullRate .name(name() + ".ROB:full_rate") .desc("ROB full per cycle") ; - ROB_full_rate = ROB_fcount / cpu->numCycles; + ROBFullRate = ROBFcount / cpu->numCycles; - ROB_occ_rate + ROBOccRate .name(name() + ".ROB:occ_rate") .desc("ROB occupancy rate") .flags(total) ; - ROB_occ_rate = ROB_count / cpu->numCycles; + ROBOccRate = ROBCount / cpu->numCycles; - ROB_occ_dist + ROBOccDist .init(cpu->number_of_threads,0,numROBEntries,2) .name(name() + ".ROB:occ_dist") .desc("ROB Occupancy per cycle") @@ -660,7 +659,7 @@ LWBackEnd::tick() return; } - ROB_count[0]+= numInsts; + ROBCount[0]+= numInsts; wbCycle = 0; @@ -980,8 +979,8 @@ LWBackEnd::executeInsts() } } - issued_ops[0]+= num_executed; - n_issued_dist[num_executed]++; + issuedOps[0]+= num_executed; + nIssuedDist[num_executed]++; } template @@ -1002,13 +1001,13 @@ LWBackEnd::instToCommit(DynInstPtr &inst) inst->setResultReady(); int dependents = wakeDependents(inst); if (dependents) { - producer_inst[0]++; - consumer_inst[0]+= dependents; + producerInst[0]++; + consumerInst[0]+= dependents; } } } - writeback_count[0]++; + writebackCount[0]++; } #if 0 template @@ -1076,7 +1075,7 @@ LWBackEnd::commitInst(int inst_num) thread->setPC(inst->readPC()); thread->setNextPC(inst->readNextPC()); - inst->reachedCommit = true; + inst->setAtCommit(); // If the instruction is not executed yet, then it is a non-speculative // or store inst. Signal backwards that it should be executed. @@ -1229,6 +1228,9 @@ LWBackEnd::commitInst(int inst_num) inst->traceData = NULL; } + if (inst->isCopy()) + panic("Should not commit any copy instructions!"); + inst->clearDependents(); frontEnd->addFreeRegs(freed_regs); @@ -1292,7 +1294,7 @@ LWBackEnd::commitInsts() break; } } - n_committed_dist.sample(inst_num); + nCommittedDist.sample(inst_num); } template @@ -1344,7 +1346,7 @@ LWBackEnd::squash(const InstSeqNum &sn) (*insts_it)->setCanCommit(); - (*insts_it)->removeInROB(); + (*insts_it)->clearInROB(); for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) { DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i); @@ -1522,27 +1524,27 @@ LWBackEnd::updateExeInstStats(DynInstPtr &inst) // #ifdef TARGET_ALPHA if (inst->isDataPrefetch()) - exe_swp[thread_number]++; + exeSwp[thread_number]++; else - exe_inst[thread_number]++; + exeInst[thread_number]++; #else - exe_inst[thread_number]++; + exeInst[thread_number]++; #endif // // Control operations // if (inst->isControl()) - exe_branches[thread_number]++; + exeBranches[thread_number]++; // // Memory operations // if (inst->isMemRef()) { - exe_refs[thread_number]++; + exeRefs[thread_number]++; if (inst->isLoad()) - exe_loads[thread_number]++; + exeLoads[thread_number]++; } } @@ -1562,33 +1564,33 @@ LWBackEnd::updateComInstStats(DynInstPtr &inst) // #ifdef TARGET_ALPHA if (inst->isDataPrefetch()) { - stat_com_swp[tid]++; + statComSwp[tid]++; } else { - stat_com_inst[tid]++; + statComInst[tid]++; } #else - stat_com_inst[tid]++; + statComInst[tid]++; #endif // // Control Instructions // if (inst->isControl()) - stat_com_branches[tid]++; + statComBranches[tid]++; // // Memory references // if (inst->isMemRef()) { - stat_com_refs[tid]++; + statComRefs[tid]++; if (inst->isLoad()) { - stat_com_loads[tid]++; + statComLoads[tid]++; } } if (inst->isMemBarrier()) { - stat_com_membars[tid]++; + statComMembars[tid]++; } } diff --git a/cpu/ozone/lw_lsq.hh b/cpu/ozone/lw_lsq.hh index 6fe343b42..c0bf0b0fe 100644 --- a/cpu/ozone/lw_lsq.hh +++ b/cpu/ozone/lw_lsq.hh @@ -447,7 +447,7 @@ OzoneLWLSQ::read(MemReqPtr &req, T &data, int load_idx) // too). // @todo: Fix uncached accesses. if (req->flags & UNCACHEABLE && - (inst != loadQueue.back() || !inst->reachedCommit)) { + (inst != loadQueue.back() || !inst->isAtCommit())) { DPRINTF(OzoneLSQ, "[sn:%lli] Uncached load and not head of " "commit/LSQ!\n", inst->seqNum); diff --git a/cpu/ozone/thread_state.hh b/cpu/ozone/thread_state.hh index c86c3a720..f104dff23 100644 --- a/cpu/ozone/thread_state.hh +++ b/cpu/ozone/thread_state.hh @@ -182,8 +182,6 @@ struct OzoneThreadState : public ThreadState { void setNextPC(uint64_t val) { nextPC = val; } - bool misspeculating() { return false; } - void setInst(TheISA::MachInst _inst) { inst = _inst; } Counter readFuncExeInst() { return funcExeInst; } diff --git a/cpu/thread_state.hh b/cpu/thread_state.hh index e09cb12fd..12146bd11 100644 --- a/cpu/thread_state.hh +++ b/cpu/thread_state.hh @@ -60,6 +60,7 @@ struct ThreadState { : cpuId(_cpuId), tid(_tid), mem(_mem), process(_process), asid(_asid) #endif { + numInst = 0; funcExeInst = 0; storeCondFailures = 0; } diff --git a/python/m5/objects/AlphaFullCPU.py b/python/m5/objects/AlphaFullCPU.py index 043c3c08f..015e9d872 100644 --- a/python/m5/objects/AlphaFullCPU.py +++ b/python/m5/objects/AlphaFullCPU.py @@ -39,12 +39,10 @@ class DerivAlphaFullCPU(BaseCPU): "Issue/Execute/Writeback delay") issueToExecuteDelay = Param.Unsigned("Issue to execute delay (internal " "to the IEW stage)") - issueWidth = Param.Unsigned("Issue width") - executeWidth = Param.Unsigned("Execute width") - executeIntWidth = Param.Unsigned("Integer execute width") - executeFloatWidth = Param.Unsigned("Floating point execute width") - executeBranchWidth = Param.Unsigned("Branch execute width") - executeMemoryWidth = Param.Unsigned("Memory execute width") + dispatchWidth = Param.Unsigned(8, "Dispatch width") + issueWidth = Param.Unsigned(8, "Issue width") + wbWidth = Param.Unsigned(8, "Writeback width") + wbDepth = Param.Unsigned(1, "Writeback depth") fuPool = Param.FUPool(NULL, "Functional Unit pool") iewToCommitDelay = Param.Unsigned("Issue/Execute/Writeback to commit " @@ -55,6 +53,9 @@ class DerivAlphaFullCPU(BaseCPU): trapLatency = Param.Tick("Trap latency") fetchTrapLatency = Param.Tick("Fetch trap latency") + backComSize = Param.Unsigned(5, "Time buffer size for backwards communication") + forwardComSize = Param.Unsigned(5, "Time buffer size for forward communication") + predType = Param.String("Branch predictor type ('local', 'tournament')") localPredictorSize = Param.Unsigned("Size of local predictor") localCtrBits = Param.Unsigned("Bits per counter") -- 2.30.2