/*
+ * Copyright (c) 2012, 2014 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Kevin Lim
*/
+#ifndef __CPU_O3_DECODE_IMPL_HH__
+#define __CPU_O3_DECODE_IMPL_HH__
+
#include "arch/types.hh"
#include "base/trace.hh"
#include "config/the_isa.hh"
#include "cpu/inst_seq.hh"
#include "debug/Activity.hh"
#include "debug/Decode.hh"
+#include "debug/O3PipeView.hh"
#include "params/DerivO3CPU.hh"
#include "sim/full_system.hh"
using std::list;
template<class Impl>
-DefaultDecode<Impl>::DefaultDecode(O3CPU *_cpu, DerivO3CPUParams *params)
+DefaultDecode<Impl>::DefaultDecode(O3CPU *_cpu, const DerivO3CPUParams ¶ms)
: cpu(_cpu),
- renameToDecodeDelay(params->renameToDecodeDelay),
- iewToDecodeDelay(params->iewToDecodeDelay),
- commitToDecodeDelay(params->commitToDecodeDelay),
- fetchToDecodeDelay(params->fetchToDecodeDelay),
- decodeWidth(params->decodeWidth),
- numThreads(params->numThreads)
+ renameToDecodeDelay(params.renameToDecodeDelay),
+ iewToDecodeDelay(params.iewToDecodeDelay),
+ commitToDecodeDelay(params.commitToDecodeDelay),
+ fetchToDecodeDelay(params.fetchToDecodeDelay),
+ decodeWidth(params.decodeWidth),
+ numThreads(params.numThreads),
+ stats(_cpu)
+{
+ if (decodeWidth > Impl::MaxWidth)
+ fatal("decodeWidth (%d) is larger than compiled limit (%d),\n"
+ "\tincrease MaxWidth in src/cpu/o3/impl.hh\n",
+ decodeWidth, static_cast<int>(Impl::MaxWidth));
+
+ // @todo: Make into a parameter
+ skidBufferMax = (fetchToDecodeDelay + 1) * params.fetchWidth;
+ for (int tid = 0; tid < Impl::MaxThreads; tid++) {
+ stalls[tid] = {false};
+ decodeStatus[tid] = Idle;
+ bdelayDoneSeqNum[tid] = 0;
+ squashInst[tid] = nullptr;
+ squashAfterDelaySlot[tid] = 0;
+ }
+}
+
+template<class Impl>
+void
+DefaultDecode<Impl>::startupStage()
+{
+ resetStage();
+}
+
+template<class Impl>
+void
+DefaultDecode<Impl>::clearStates(ThreadID tid)
+{
+ decodeStatus[tid] = Idle;
+ stalls[tid].rename = false;
+}
+
+template<class Impl>
+void
+DefaultDecode<Impl>::resetStage()
{
_status = Inactive;
decodeStatus[tid] = Idle;
stalls[tid].rename = false;
- stalls[tid].iew = false;
- stalls[tid].commit = false;
}
-
- // @todo: Make into a parameter
- skidBufferMax = (fetchToDecodeDelay + 1) * params->fetchWidth;
}
template <class Impl>
}
template <class Impl>
-void
-DefaultDecode<Impl>::regStats()
+DefaultDecode<Impl>::DecodeStats::DecodeStats(O3CPU *cpu)
+ : Stats::Group(cpu, "decode"),
+ ADD_STAT(idleCycles, "Number of cycles decode is idle"),
+ ADD_STAT(blockedCycles, "Number of cycles decode is blocked"),
+ ADD_STAT(runCycles, "Number of cycles decode is running"),
+ ADD_STAT(unblockCycles, "Number of cycles decode is unblocking"),
+ ADD_STAT(squashCycles, "Number of cycles decode is squashing"),
+ ADD_STAT(branchResolved, "Number of times decode resolved a "
+ " branch"),
+ ADD_STAT(branchMispred, "Number of times decode detected a branch"
+ " misprediction"),
+ ADD_STAT(controlMispred,"Number of times decode detected an"
+ " instruction incorrectly predicted as a control"),
+ ADD_STAT(decodedInsts, "Number of instructions handled by decode"),
+ ADD_STAT(squashedInsts, "Number of squashed instructions handled"
+ " by decode")
{
- decodeIdleCycles
- .name(name() + ".IdleCycles")
- .desc("Number of cycles decode is idle")
- .prereq(decodeIdleCycles);
- decodeBlockedCycles
- .name(name() + ".BlockedCycles")
- .desc("Number of cycles decode is blocked")
- .prereq(decodeBlockedCycles);
- decodeRunCycles
- .name(name() + ".RunCycles")
- .desc("Number of cycles decode is running")
- .prereq(decodeRunCycles);
- decodeUnblockCycles
- .name(name() + ".UnblockCycles")
- .desc("Number of cycles decode is unblocking")
- .prereq(decodeUnblockCycles);
- decodeSquashCycles
- .name(name() + ".SquashCycles")
- .desc("Number of cycles decode is squashing")
- .prereq(decodeSquashCycles);
- decodeBranchResolved
- .name(name() + ".BranchResolved")
- .desc("Number of times decode resolved a branch")
- .prereq(decodeBranchResolved);
- decodeBranchMispred
- .name(name() + ".BranchMispred")
- .desc("Number of times decode detected a branch misprediction")
- .prereq(decodeBranchMispred);
- decodeControlMispred
- .name(name() + ".ControlMispred")
- .desc("Number of times decode detected an instruction incorrectly"
- " predicted as a control")
- .prereq(decodeControlMispred);
- decodeDecodedInsts
- .name(name() + ".DecodedInsts")
- .desc("Number of instructions handled by decode")
- .prereq(decodeDecodedInsts);
- decodeSquashedInsts
- .name(name() + ".SquashedInsts")
- .desc("Number of squashed instructions handled by decode")
- .prereq(decodeSquashedInsts);
+ idleCycles.prereq(idleCycles);
+ blockedCycles.prereq(blockedCycles);
+ runCycles.prereq(runCycles);
+ unblockCycles.prereq(unblockCycles);
+ squashCycles.prereq(squashCycles);
+ branchResolved.prereq(branchResolved);
+ branchMispred.prereq(branchMispred);
+ controlMispred.prereq(controlMispred);
+ decodedInsts.prereq(decodedInsts);
+ squashedInsts.prereq(squashedInsts);
}
template<class Impl>
}
template <class Impl>
-bool
-DefaultDecode<Impl>::drain()
+void
+DefaultDecode<Impl>::drainSanityCheck() const
{
- // Decode is done draining at any time.
- cpu->signalDrained();
- return true;
+ for (ThreadID tid = 0; tid < numThreads; ++tid) {
+ assert(insts[tid].empty());
+ assert(skidBuffer[tid].empty());
+ }
}
template <class Impl>
-void
-DefaultDecode<Impl>::takeOverFrom()
+bool
+DefaultDecode<Impl>::isDrained() const
{
- _status = Inactive;
-
- // Be sure to reset state and clear out any old instructions.
for (ThreadID tid = 0; tid < numThreads; ++tid) {
- decodeStatus[tid] = Idle;
-
- stalls[tid].rename = false;
- stalls[tid].iew = false;
- stalls[tid].commit = false;
- while (!insts[tid].empty())
- insts[tid].pop();
- while (!skidBuffer[tid].empty())
- skidBuffer[tid].pop();
- branchCount[tid] = 0;
+ if (!insts[tid].empty() || !skidBuffer[tid].empty() ||
+ (decodeStatus[tid] != Running && decodeStatus[tid] != Idle))
+ return false;
}
- wroteToTimeBuffer = false;
+ return true;
}
template<class Impl>
bool ret_val = false;
if (stalls[tid].rename) {
- DPRINTF(Decode,"[tid:%i]: Stall fom Rename stage detected.\n", tid);
- ret_val = true;
- } else if (stalls[tid].iew) {
- DPRINTF(Decode,"[tid:%i]: Stall fom IEW stage detected.\n", tid);
- ret_val = true;
- } else if (stalls[tid].commit) {
- DPRINTF(Decode,"[tid:%i]: Stall fom Commit stage detected.\n", tid);
+ DPRINTF(Decode,"[tid:%i] Stall fom Rename stage detected.\n", tid);
ret_val = true;
}
bool
DefaultDecode<Impl>::block(ThreadID tid)
{
- DPRINTF(Decode, "[tid:%u]: Blocking.\n", tid);
+ DPRINTF(Decode, "[tid:%i] Blocking.\n", tid);
// Add the current inputs to the skid buffer so they can be
// reprocessed when this stage unblocks.
// Set the status to Blocked.
decodeStatus[tid] = Blocked;
- if (decodeStatus[tid] != Unblocking) {
+ if (toFetch->decodeUnblock[tid]) {
+ toFetch->decodeUnblock[tid] = false;
+ } else {
toFetch->decodeBlock[tid] = true;
wroteToTimeBuffer = true;
}
{
// Decode is done unblocking only if the skid buffer is empty.
if (skidBuffer[tid].empty()) {
- DPRINTF(Decode, "[tid:%u]: Done unblocking.\n", tid);
+ DPRINTF(Decode, "[tid:%i] Done unblocking.\n", tid);
toFetch->decodeUnblock[tid] = true;
wroteToTimeBuffer = true;
return true;
}
- DPRINTF(Decode, "[tid:%u]: Currently unblocking.\n", tid);
+ DPRINTF(Decode, "[tid:%i] Currently unblocking.\n", tid);
return false;
}
template<class Impl>
void
-DefaultDecode<Impl>::squash(DynInstPtr &inst, ThreadID tid)
+DefaultDecode<Impl>::squash(const DynInstPtr &inst, ThreadID tid)
{
- DPRINTF(Decode, "[tid:%i]: [sn:%i] Squashing due to incorrect branch "
+ DPRINTF(Decode, "[tid:%i] [sn:%llu] Squashing due to incorrect branch "
"prediction detected at decode.\n", tid, inst->seqNum);
// Send back mispredict information.
unsigned
DefaultDecode<Impl>::squash(ThreadID tid)
{
- DPRINTF(Decode, "[tid:%i]: Squashing.\n",tid);
+ DPRINTF(Decode, "[tid:%i] Squashing.\n",tid);
if (decodeStatus[tid] == Blocked ||
decodeStatus[tid] == Unblocking) {
assert(tid == inst->threadNumber);
- DPRINTF(Decode,"Inserting [sn:%lli] PC: %s into decode skidBuffer %i\n",
- inst->seqNum, inst->pcState(), inst->threadNumber);
-
skidBuffer[tid].push(inst);
+
+ DPRINTF(Decode,"Inserting [tid:%d][sn:%lli] PC: %s into decode skidBuffer %i\n",
+ inst->threadNumber, inst->seqNum, inst->pcState(), skidBuffer[tid].size());
}
// @todo: Eventually need to enforce this by not letting a thread
assert(stalls[tid].rename);
stalls[tid].rename = false;
}
-
- if (fromIEW->iewBlock[tid]) {
- stalls[tid].iew = true;
- }
-
- if (fromIEW->iewUnblock[tid]) {
- assert(stalls[tid].iew);
- stalls[tid].iew = false;
- }
-
- if (fromCommit->commitBlock[tid]) {
- stalls[tid].commit = true;
- }
-
- if (fromCommit->commitUnblock[tid]) {
- assert(stalls[tid].commit);
- stalls[tid].commit = false;
- }
}
template <class Impl>
// Check squash signals from commit.
if (fromCommit->commitInfo[tid].squash) {
- DPRINTF(Decode, "[tid:%u]: Squashing instructions due to squash "
+ DPRINTF(Decode, "[tid:%i] Squashing instructions due to squash "
"from commit.\n", tid);
squash(tid);
return true;
}
- // Check ROB squash signals from commit.
- if (fromCommit->commitInfo[tid].robSquashing) {
- DPRINTF(Decode, "[tid:%u]: ROB is still squashing.\n", tid);
-
- // Continue to squash.
- decodeStatus[tid] = Squashing;
-
- return true;
- }
-
if (checkStall(tid)) {
return block(tid);
}
if (decodeStatus[tid] == Blocked) {
- DPRINTF(Decode, "[tid:%u]: Done blocking, switching to unblocking.\n",
+ DPRINTF(Decode, "[tid:%i] Done blocking, switching to unblocking.\n",
tid);
decodeStatus[tid] = Unblocking;
if (decodeStatus[tid] == Squashing) {
// Switch status to running if decode isn't being told to block or
// squash this cycle.
- DPRINTF(Decode, "[tid:%u]: Done squashing, switching to running.\n",
+ DPRINTF(Decode, "[tid:%i] Done squashing, switching to running.\n",
tid);
decodeStatus[tid] = Running;
// check if stall conditions have passed
if (decodeStatus[tid] == Blocked) {
- ++decodeBlockedCycles;
+ ++stats.blockedCycles;
} else if (decodeStatus[tid] == Squashing) {
- ++decodeSquashCycles;
+ ++stats.squashCycles;
}
// Decode should try to decode as many instructions as its bandwidth
// will allow, as long as it is not currently blocked.
if (decodeStatus[tid] == Running ||
decodeStatus[tid] == Idle) {
- DPRINTF(Decode, "[tid:%u]: Not blocked, so attempting to run "
+ DPRINTF(Decode, "[tid:%i] Not blocked, so attempting to run "
"stage.\n",tid);
decodeInsts(tid);
skidBuffer[tid].size() : insts[tid].size();
if (insts_available == 0) {
- DPRINTF(Decode, "[tid:%u] Nothing to do, breaking out"
+ DPRINTF(Decode, "[tid:%i] Nothing to do, breaking out"
" early.\n",tid);
// Should I change the status to idle?
- ++decodeIdleCycles;
+ ++stats.idleCycles;
return;
} else if (decodeStatus[tid] == Unblocking) {
- DPRINTF(Decode, "[tid:%u] Unblocking, removing insts from skid "
+ DPRINTF(Decode, "[tid:%i] Unblocking, removing insts from skid "
"buffer.\n",tid);
- ++decodeUnblockCycles;
+ ++stats.unblockCycles;
} else if (decodeStatus[tid] == Running) {
- ++decodeRunCycles;
+ ++stats.runCycles;
}
- DynInstPtr inst;
-
std::queue<DynInstPtr>
&insts_to_decode = decodeStatus[tid] == Unblocking ?
skidBuffer[tid] : insts[tid];
- DPRINTF(Decode, "[tid:%u]: Sending instruction to rename.\n",tid);
+ DPRINTF(Decode, "[tid:%i] Sending instruction to rename.\n",tid);
while (insts_available > 0 && toRenameIndex < decodeWidth) {
assert(!insts_to_decode.empty());
- inst = insts_to_decode.front();
+ DynInstPtr inst = std::move(insts_to_decode.front());
insts_to_decode.pop();
- DPRINTF(Decode, "[tid:%u]: Processing instruction [sn:%lli] with "
+ DPRINTF(Decode, "[tid:%i] Processing instruction [sn:%lli] with "
"PC %s\n", tid, inst->seqNum, inst->pcState());
if (inst->isSquashed()) {
- DPRINTF(Decode, "[tid:%u]: Instruction %i with PC %s is "
+ DPRINTF(Decode, "[tid:%i] Instruction %i with PC %s is "
"squashed, skipping.\n",
tid, inst->seqNum, inst->pcState());
- ++decodeSquashedInsts;
+ ++stats.squashedInsts;
--insts_available;
++(toRename->size);
++toRenameIndex;
- ++decodeDecodedInsts;
+ ++stats.decodedInsts;
--insts_available;
#if TRACING_ON
- inst->decodeTick = curTick() - inst->fetchTick;
+ if (DTRACE(O3PipeView)) {
+ inst->decodeTick = curTick() - inst->fetchTick;
+ }
#endif
// Ensure that if it was predicted as a branch, it really is a
if (inst->readPredTaken() && !inst->isControl()) {
panic("Instruction predicted as a branch!");
- ++decodeControlMispred;
+ ++stats.controlMispred;
// Might want to set some sort of boolean and just do
// a check at the end
}
// Go ahead and compute any PC-relative branches.
- if (inst->isDirectCtrl() && inst->isUncondCtrl()) {
- ++decodeBranchResolved;
+ // This includes direct unconditional control and
+ // direct conditional control that is predicted taken.
+ if (inst->isDirectCtrl() &&
+ (inst->isUncondCtrl() || inst->readPredTaken()))
+ {
+ ++stats.branchResolved;
if (!(inst->branchTarget() == inst->readPredTarg())) {
- ++decodeBranchMispred;
+ ++stats.branchMispred;
// Might want to set some sort of boolean and just do
// a check at the end
squash(inst, inst->threadNumber);
TheISA::PCState target = inst->branchTarget();
- DPRINTF(Decode, "[sn:%i]: Updating predictions: PredPC: %s\n",
- inst->seqNum, target);
+ DPRINTF(Decode,
+ "[tid:%i] [sn:%llu] "
+ "Updating predictions: Wrong predicted target: %s \
+ PredPC: %s\n",
+ tid, inst->seqNum, inst->readPredTarg(), target);
//The micro pc after an instruction level branch should be 0
inst->setPredTarg(target);
break;
wroteToTimeBuffer = true;
}
}
+
+#endif//__CPU_O3_DECODE_IMPL_HH__