From: Gabe Black Date: Sat, 16 Dec 2006 12:32:06 +0000 (-0500) Subject: Made branch delay slots get squashed, and passed back an NPC and NNPC to start fetchi... X-Git-Tag: m5_2.0_beta3~274^2~8 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=37b9966eb466b1655f0d4e604bafa729a3aaea6a;p=gem5.git Made branch delay slots get squashed, and passed back an NPC and NNPC to start fetching from. --HG-- extra : convert_revision : a2e4845fedf113b5a2fd92d3d28ce5b006278103 --- diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh index 4683c77af..d96919007 100644 --- a/src/cpu/o3/comm.hh +++ b/src/cpu/o3/comm.hh @@ -90,6 +90,7 @@ struct DefaultIEWDefaultCommit { bool squashDelaySlot[Impl::MaxThreads]; uint64_t mispredPC[Impl::MaxThreads]; uint64_t nextPC[Impl::MaxThreads]; + uint64_t nextNPC[Impl::MaxThreads]; InstSeqNum squashedSeqNum[Impl::MaxThreads]; bool includeSquashInst[Impl::MaxThreads]; @@ -121,6 +122,7 @@ struct TimeBufStruct { bool branchTaken; uint64_t mispredPC; uint64_t nextPC; + uint64_t nextNPC; unsigned branchCount; }; @@ -160,6 +162,7 @@ struct TimeBufStruct { bool branchTaken; uint64_t mispredPC; uint64_t nextPC; + uint64_t nextNPC; // Represents the instruction that has either been retired or // squashed. Similar to having a single bus that broadcasts the diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 3178410a8..194138efc 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -514,6 +514,7 @@ DefaultCommit::squashAll(unsigned tid) toIEW->commitInfo[tid].branchMispredict = false; toIEW->commitInfo[tid].nextPC = PC[tid]; + toIEW->commitInfo[tid].nextNPC = nextPC[tid]; } template @@ -770,6 +771,7 @@ DefaultCommit::commit() fromIEW->branchTaken[tid]; toIEW->commitInfo[tid].nextPC = fromIEW->nextPC[tid]; + toIEW->commitInfo[tid].nextNPC = fromIEW->nextNPC[tid]; toIEW->commitInfo[tid].mispredPC = fromIEW->mispredPC[tid]; diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 4056d876f..5616ba398 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -700,7 +700,7 @@ FullO3CPU::removeThread(unsigned tid) // Squash Throughout Pipeline InstSeqNum squash_seq_num = commit.rob->readHeadInst(tid)->seqNum; - fetch.squash(0, squash_seq_num, true, tid); + fetch.squash(0, sizeof(TheISA::MachInst), squash_seq_num, true, tid); decode.squash(tid); rename.squash(squash_seq_num, tid); iew.squash(tid); diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 04016347a..4f5a161e0 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -239,13 +239,13 @@ class DefaultFetch bool fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid); /** Squashes a specific thread and resets the PC. */ - inline void doSquash(const Addr &new_PC, unsigned tid); + inline void doSquash(const Addr &new_PC, const Addr &new_NPC, unsigned tid); /** Squashes a specific thread and resets the PC. Also tells the CPU to * remove any instructions between fetch and decode that should be sqaushed. */ - void squashFromDecode(const Addr &new_PC, const InstSeqNum &seq_num, - unsigned tid); + void squashFromDecode(const Addr &new_PC, const Addr &new_NPC, + const InstSeqNum &seq_num, unsigned tid); /** Checks if a thread is stalled. */ bool checkStall(unsigned tid) const; @@ -259,7 +259,8 @@ class DefaultFetch * remove any instructions that are not in the ROB. The source of this * squash should be the commit stage. */ - void squash(const Addr &new_PC, const InstSeqNum &seq_num, + void squash(const Addr &new_PC, const Addr &new_NPC, + const InstSeqNum &seq_num, bool squash_delay_slot, unsigned tid); /** Ticks the fetch stage, processing all inputs signals and fetching diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 6cff52429..5cd2e3514 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -319,9 +319,7 @@ DefaultFetch::initStage() for (int tid = 0; tid < numThreads; tid++) { PC[tid] = cpu->readPC(tid); nextPC[tid] = cpu->readNextPC(tid); -#if ISA_HAS_DELAY_SLOT nextNPC[tid] = cpu->readNextNPC(tid); -#endif } // Size of cache block. @@ -504,14 +502,14 @@ DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, if (!inst->isControl()) { #if ISA_HAS_DELAY_SLOT - Addr cur_PC = next_PC; - next_PC = cur_PC + instSize; //next_NPC; - next_NPC = cur_PC + (2 * instSize);//next_NPC + instSize; - inst->setPredTarg(next_NPC); + next_PC = next_NPC; + next_NPC = next_NPC + instSize; + inst->setPredTarg(next_PC, next_NPC); #else next_PC = next_PC + instSize; - inst->setPredTarg(next_PC); + inst->setPredTarg(next_PC, next_PC + sizeof(TheISA::MachInst)); #endif + inst->setPredTaken(false); return false; } @@ -521,36 +519,29 @@ DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, predict_taken = branchPred.predict(inst, pred_PC, tid); if (predict_taken) { - DPRINTF(Fetch, "[tid:%i]: Branch predicted to be true.\n", tid); + DPRINTF(Fetch, "[tid:%i]: Branch predicted to be taken.\n", tid); } else { - DPRINTF(Fetch, "[tid:%i]: Branch predicted to be false.\n", tid); + DPRINTF(Fetch, "[tid:%i]: Branch predicted to be not taken.\n", tid); } + next_PC = next_NPC; if (predict_taken) { - next_PC = next_NPC; next_NPC = pred_PC; - // Update delay slot info ++delaySlotInfo[tid].numInsts; delaySlotInfo[tid].targetAddr = pred_PC; DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) to process.\n", tid, delaySlotInfo[tid].numInsts); - } else { // !predict_taken - if (inst->isCondDelaySlot()) { - next_PC = pred_PC; - // The delay slot is skipped here if there is on - // prediction - } else { - next_PC = next_NPC; - // No need to declare a delay slot here since - // there is no for the pred. target to jump - } - + } else { next_NPC = next_NPC + instSize; } #else predict_taken = branchPred.predict(inst, next_PC, tid); #endif + DPRINTF(Fetch, "[tid:%i]: Branch predicted to go to %#x and then %#x.\n", + tid, next_PC, next_NPC); + inst->setPredTarg(next_PC, next_NPC); + inst->setPredTaken(predict_taken); ++fetchedBranches; @@ -671,14 +662,15 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid template inline void -DefaultFetch::doSquash(const Addr &new_PC, unsigned tid) +DefaultFetch::doSquash(const Addr &new_PC, + const Addr &new_NPC, unsigned tid) { - DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x.\n", - tid, new_PC); + DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x, NPC to: %#x.\n", + tid, new_PC, new_NPC); PC[tid] = new_PC; - nextPC[tid] = new_PC + instSize; - nextNPC[tid] = new_PC + (2 * instSize); + nextPC[tid] = new_NPC; + nextNPC[tid] = new_NPC + instSize; // Clear the icache miss if it's outstanding. if (fetchStatus[tid] == IcacheWaitResponse) { @@ -704,13 +696,13 @@ DefaultFetch::doSquash(const Addr &new_PC, unsigned tid) template void -DefaultFetch::squashFromDecode(const Addr &new_PC, +DefaultFetch::squashFromDecode(const Addr &new_PC, const Addr &new_NPC, const InstSeqNum &seq_num, unsigned tid) { DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid); - doSquash(new_PC, tid); + doSquash(new_PC, new_NPC, tid); #if ISA_HAS_DELAY_SLOT if (seq_num <= delaySlotInfo[tid].branchSeqNum) { @@ -793,12 +785,13 @@ DefaultFetch::updateFetchStatus() template void -DefaultFetch::squash(const Addr &new_PC, const InstSeqNum &seq_num, +DefaultFetch::squash(const Addr &new_PC, const Addr &new_NPC, + const InstSeqNum &seq_num, bool squash_delay_slot, unsigned tid) { DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid); - doSquash(new_PC, tid); + doSquash(new_PC, new_NPC, tid); #if ISA_HAS_DELAY_SLOT if (seq_num <= delaySlotInfo[tid].branchSeqNum) { @@ -928,6 +921,7 @@ DefaultFetch::checkSignalsAndUpdate(unsigned tid) #endif // In any case, squash. squash(fromCommit->commitInfo[tid].nextPC, + fromCommit->commitInfo[tid].nextNPC, doneSeqNum, fromCommit->commitInfo[tid].squashDelaySlot, tid); @@ -985,6 +979,7 @@ DefaultFetch::checkSignalsAndUpdate(unsigned tid) #endif // Squash unless we're already squashing squashFromDecode(fromDecode->decodeInfo[tid].nextPC, + fromDecode->decodeInfo[tid].nextNPC, doneSeqNum, tid); @@ -1041,6 +1036,8 @@ DefaultFetch::fetch(bool &status_change) // The current PC. Addr &fetch_PC = PC[tid]; + Addr &fetch_NPC = nextPC[tid]; + // Fault code for memory access. Fault fault = NoFault; @@ -1097,7 +1094,8 @@ DefaultFetch::fetch(bool &status_change) } Addr next_PC = fetch_PC; - Addr next_NPC = next_PC + instSize; + Addr next_NPC = fetch_NPC; + InstSeqNum inst_seq; MachInst inst; ExtMachInst ext_inst; @@ -1144,8 +1142,9 @@ DefaultFetch::fetch(bool &status_change) #endif // Create a new DynInst from the instruction fetched. - DynInstPtr instruction = new DynInst(ext_inst, fetch_PC, - next_PC, + DynInstPtr instruction = new DynInst(ext_inst, + fetch_PC, fetch_NPC, + next_PC, next_NPC, inst_seq, cpu); instruction->setTid(tid); @@ -1243,9 +1242,9 @@ DefaultFetch::fetch(bool &status_change) if (delaySlotInfo[tid].targetReady && delaySlotInfo[tid].numInsts == 0) { // Set PC to target - PC[tid] = delaySlotInfo[tid].targetAddr; //next_PC - nextPC[tid] = next_PC + instSize; //next_NPC - nextNPC[tid] = next_PC + (2 * instSize); + PC[tid] = next_PC; + nextPC[tid] = next_NPC; + nextNPC[tid] = next_NPC + instSize; delaySlotInfo[tid].targetReady = false; } else { diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 85db68576..24c8484b4 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -481,25 +481,28 @@ DefaultIEW::squashDueToBranch(DynInstPtr &inst, unsigned tid) toCommit->branchMispredict[tid] = true; #if ISA_HAS_DELAY_SLOT + int instSize = sizeof(TheISA::MachInst); bool branch_taken = - (inst->readNextNPC() != (inst->readPC() + 2 * sizeof(TheISA::MachInst)) && - inst->readNextNPC() != (inst->readPC() + 3 * sizeof(TheISA::MachInst))); + !(inst->readNextPC() + instSize == inst->readNextNPC() && + (inst->readNextPC() == inst->readPC() + instSize || + inst->readNextPC() == inst->readPC() + 2 * instSize)); DPRINTF(Sparc, "Branch taken = %s [sn:%i]\n", branch_taken ? "true": "false", inst->seqNum); toCommit->branchTaken[tid] = branch_taken; - bool squashDelaySlot = - (inst->readNextPC() != inst->readPC() + sizeof(TheISA::MachInst)); + bool squashDelaySlot = true; +// (inst->readNextPC() != inst->readPC() + sizeof(TheISA::MachInst)); DPRINTF(Sparc, "Squash delay slot = %s [sn:%i]\n", squashDelaySlot ? "true": "false", inst->seqNum); toCommit->squashDelaySlot[tid] = squashDelaySlot; //If we're squashing the delay slot, we need to pick back up at NextPC. //Otherwise, NextPC isn't being squashed, so we should pick back up at //NextNPC. - if (squashDelaySlot) + if (squashDelaySlot) { toCommit->nextPC[tid] = inst->readNextPC(); - else + toCommit->nextNPC[tid] = inst->readNextNPC(); + } else toCommit->nextPC[tid] = inst->readNextNPC(); #else toCommit->branchTaken[tid] = inst->readNextPC() != @@ -522,6 +525,9 @@ DefaultIEW::squashDueToMemOrder(DynInstPtr &inst, unsigned tid) toCommit->squash[tid] = true; toCommit->squashedSeqNum[tid] = inst->seqNum; toCommit->nextPC[tid] = inst->readNextPC(); +#if ISA_HAS_DELAY_SLOT + toCommit->nextNPC[tid] = inst->readNextNPC(); +#endif toCommit->includeSquashInst[tid] = false; @@ -538,6 +544,9 @@ DefaultIEW::squashDueToMemBlocked(DynInstPtr &inst, unsigned tid) toCommit->squash[tid] = true; toCommit->squashedSeqNum[tid] = inst->seqNum; toCommit->nextPC[tid] = inst->readPC(); +#if ISA_HAS_DELAY_SLOT + toCommit->nextNPC[tid] = inst->readNextNPC(); +#endif // Must include the broadcasted SN in the squash. toCommit->includeSquashInst[tid] = true; @@ -1342,6 +1351,7 @@ DefaultIEW::executeInsts() fetchRedirect[tid] = true; DPRINTF(IEW, "Execute: Branch mispredict detected.\n"); + DPRINTF(IEW, "Predicted target was %#x.\n", inst->predPC); #if ISA_HAS_DELAY_SLOT DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x.\n", inst->nextNPC); @@ -1352,7 +1362,7 @@ DefaultIEW::executeInsts() // If incorrect, then signal the ROB that it must be squashed. squashDueToBranch(inst, tid); - if (inst->predTaken()) { + if (inst->readPredTaken()) { predictedTakenIncorrect++; } else { predictedNotTakenIncorrect++; diff --git a/src/cpu/o3/sparc/dyn_inst.hh b/src/cpu/o3/sparc/dyn_inst.hh index fda99cb6c..e95ae2fd5 100644 --- a/src/cpu/o3/sparc/dyn_inst.hh +++ b/src/cpu/o3/sparc/dyn_inst.hh @@ -56,8 +56,8 @@ class SparcDynInst : public BaseDynInst public: /** BaseDynInst constructor given a binary instruction. */ - SparcDynInst(TheISA::ExtMachInst inst, Addr PC, - Addr Pred_PC, InstSeqNum seq_num, O3CPU *cpu); + SparcDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC, + Addr Pred_PC, Addr Pred_NPC, InstSeqNum seq_num, O3CPU *cpu); /** BaseDynInst constructor given a static inst pointer. */ SparcDynInst(StaticInstPtr &_staticInst); diff --git a/src/cpu/o3/sparc/dyn_inst_impl.hh b/src/cpu/o3/sparc/dyn_inst_impl.hh index b830ee7bd..c4d30b6f4 100644 --- a/src/cpu/o3/sparc/dyn_inst_impl.hh +++ b/src/cpu/o3/sparc/dyn_inst_impl.hh @@ -31,9 +31,10 @@ #include "cpu/o3/sparc/dyn_inst.hh" template -SparcDynInst::SparcDynInst(TheISA::ExtMachInst inst, Addr PC, - Addr Pred_PC, InstSeqNum seq_num, O3CPU *cpu) - : BaseDynInst(inst, PC, Pred_PC, seq_num, cpu) +SparcDynInst::SparcDynInst(TheISA::ExtMachInst inst, + Addr PC, Addr NPC, Addr Pred_PC, Addr Pred_NPC, + InstSeqNum seq_num, O3CPU *cpu) + : BaseDynInst(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu) { initVars(); }