bool squashDelaySlot[Impl::MaxThreads];
uint64_t mispredPC[Impl::MaxThreads];
uint64_t nextPC[Impl::MaxThreads];
+ uint64_t nextNPC[Impl::MaxThreads];
InstSeqNum squashedSeqNum[Impl::MaxThreads];
bool includeSquashInst[Impl::MaxThreads];
bool branchTaken;
uint64_t mispredPC;
uint64_t nextPC;
+ uint64_t nextNPC;
unsigned branchCount;
};
bool branchTaken;
uint64_t mispredPC;
uint64_t nextPC;
+ uint64_t nextNPC;
// Represents the instruction that has either been retired or
// squashed. Similar to having a single bus that broadcasts the
toIEW->commitInfo[tid].branchMispredict = false;
toIEW->commitInfo[tid].nextPC = PC[tid];
+ toIEW->commitInfo[tid].nextNPC = nextPC[tid];
}
template <class Impl>
fromIEW->branchTaken[tid];
toIEW->commitInfo[tid].nextPC = fromIEW->nextPC[tid];
+ toIEW->commitInfo[tid].nextNPC = fromIEW->nextNPC[tid];
toIEW->commitInfo[tid].mispredPC = fromIEW->mispredPC[tid];
// Squash Throughout Pipeline
InstSeqNum squash_seq_num = commit.rob->readHeadInst(tid)->seqNum;
- fetch.squash(0, squash_seq_num, true, tid);
+ fetch.squash(0, sizeof(TheISA::MachInst), squash_seq_num, true, tid);
decode.squash(tid);
rename.squash(squash_seq_num, tid);
iew.squash(tid);
bool fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid);
/** Squashes a specific thread and resets the PC. */
- inline void doSquash(const Addr &new_PC, unsigned tid);
+ inline void doSquash(const Addr &new_PC, const Addr &new_NPC, unsigned tid);
/** Squashes a specific thread and resets the PC. Also tells the CPU to
* remove any instructions between fetch and decode that should be sqaushed.
*/
- void squashFromDecode(const Addr &new_PC, const InstSeqNum &seq_num,
- unsigned tid);
+ void squashFromDecode(const Addr &new_PC, const Addr &new_NPC,
+ const InstSeqNum &seq_num, unsigned tid);
/** Checks if a thread is stalled. */
bool checkStall(unsigned tid) const;
* remove any instructions that are not in the ROB. The source of this
* squash should be the commit stage.
*/
- void squash(const Addr &new_PC, const InstSeqNum &seq_num,
+ void squash(const Addr &new_PC, const Addr &new_NPC,
+ const InstSeqNum &seq_num,
bool squash_delay_slot, unsigned tid);
/** Ticks the fetch stage, processing all inputs signals and fetching
for (int tid = 0; tid < numThreads; tid++) {
PC[tid] = cpu->readPC(tid);
nextPC[tid] = cpu->readNextPC(tid);
-#if ISA_HAS_DELAY_SLOT
nextNPC[tid] = cpu->readNextNPC(tid);
-#endif
}
// Size of cache block.
if (!inst->isControl()) {
#if ISA_HAS_DELAY_SLOT
- Addr cur_PC = next_PC;
- next_PC = cur_PC + instSize; //next_NPC;
- next_NPC = cur_PC + (2 * instSize);//next_NPC + instSize;
- inst->setPredTarg(next_NPC);
+ next_PC = next_NPC;
+ next_NPC = next_NPC + instSize;
+ inst->setPredTarg(next_PC, next_NPC);
#else
next_PC = next_PC + instSize;
- inst->setPredTarg(next_PC);
+ inst->setPredTarg(next_PC, next_PC + sizeof(TheISA::MachInst));
#endif
+ inst->setPredTaken(false);
return false;
}
predict_taken = branchPred.predict(inst, pred_PC, tid);
if (predict_taken) {
- DPRINTF(Fetch, "[tid:%i]: Branch predicted to be true.\n", tid);
+ DPRINTF(Fetch, "[tid:%i]: Branch predicted to be taken.\n", tid);
} else {
- DPRINTF(Fetch, "[tid:%i]: Branch predicted to be false.\n", tid);
+ DPRINTF(Fetch, "[tid:%i]: Branch predicted to be not taken.\n", tid);
}
+ next_PC = next_NPC;
if (predict_taken) {
- next_PC = next_NPC;
next_NPC = pred_PC;
-
// Update delay slot info
++delaySlotInfo[tid].numInsts;
delaySlotInfo[tid].targetAddr = pred_PC;
DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) to process.\n", tid,
delaySlotInfo[tid].numInsts);
- } else { // !predict_taken
- if (inst->isCondDelaySlot()) {
- next_PC = pred_PC;
- // The delay slot is skipped here if there is on
- // prediction
- } else {
- next_PC = next_NPC;
- // No need to declare a delay slot here since
- // there is no for the pred. target to jump
- }
-
+ } else {
next_NPC = next_NPC + instSize;
}
#else
predict_taken = branchPred.predict(inst, next_PC, tid);
#endif
+ DPRINTF(Fetch, "[tid:%i]: Branch predicted to go to %#x and then %#x.\n",
+ tid, next_PC, next_NPC);
+ inst->setPredTarg(next_PC, next_NPC);
+ inst->setPredTaken(predict_taken);
++fetchedBranches;
template <class Impl>
inline void
-DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid)
+DefaultFetch<Impl>::doSquash(const Addr &new_PC,
+ const Addr &new_NPC, unsigned tid)
{
- DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x.\n",
- tid, new_PC);
+ DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x, NPC to: %#x.\n",
+ tid, new_PC, new_NPC);
PC[tid] = new_PC;
- nextPC[tid] = new_PC + instSize;
- nextNPC[tid] = new_PC + (2 * instSize);
+ nextPC[tid] = new_NPC;
+ nextNPC[tid] = new_NPC + instSize;
// Clear the icache miss if it's outstanding.
if (fetchStatus[tid] == IcacheWaitResponse) {
template<class Impl>
void
-DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC,
+DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC, const Addr &new_NPC,
const InstSeqNum &seq_num,
unsigned tid)
{
DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid);
- doSquash(new_PC, tid);
+ doSquash(new_PC, new_NPC, tid);
#if ISA_HAS_DELAY_SLOT
if (seq_num <= delaySlotInfo[tid].branchSeqNum) {
template <class Impl>
void
-DefaultFetch<Impl>::squash(const Addr &new_PC, const InstSeqNum &seq_num,
+DefaultFetch<Impl>::squash(const Addr &new_PC, const Addr &new_NPC,
+ const InstSeqNum &seq_num,
bool squash_delay_slot, unsigned tid)
{
DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid);
- doSquash(new_PC, tid);
+ doSquash(new_PC, new_NPC, tid);
#if ISA_HAS_DELAY_SLOT
if (seq_num <= delaySlotInfo[tid].branchSeqNum) {
#endif
// In any case, squash.
squash(fromCommit->commitInfo[tid].nextPC,
+ fromCommit->commitInfo[tid].nextNPC,
doneSeqNum,
fromCommit->commitInfo[tid].squashDelaySlot,
tid);
#endif
// Squash unless we're already squashing
squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
+ fromDecode->decodeInfo[tid].nextNPC,
doneSeqNum,
tid);
// The current PC.
Addr &fetch_PC = PC[tid];
+ Addr &fetch_NPC = nextPC[tid];
+
// Fault code for memory access.
Fault fault = NoFault;
}
Addr next_PC = fetch_PC;
- Addr next_NPC = next_PC + instSize;
+ Addr next_NPC = fetch_NPC;
+
InstSeqNum inst_seq;
MachInst inst;
ExtMachInst ext_inst;
#endif
// Create a new DynInst from the instruction fetched.
- DynInstPtr instruction = new DynInst(ext_inst, fetch_PC,
- next_PC,
+ DynInstPtr instruction = new DynInst(ext_inst,
+ fetch_PC, fetch_NPC,
+ next_PC, next_NPC,
inst_seq, cpu);
instruction->setTid(tid);
if (delaySlotInfo[tid].targetReady &&
delaySlotInfo[tid].numInsts == 0) {
// Set PC to target
- PC[tid] = delaySlotInfo[tid].targetAddr; //next_PC
- nextPC[tid] = next_PC + instSize; //next_NPC
- nextNPC[tid] = next_PC + (2 * instSize);
+ PC[tid] = next_PC;
+ nextPC[tid] = next_NPC;
+ nextNPC[tid] = next_NPC + instSize;
delaySlotInfo[tid].targetReady = false;
} else {
toCommit->branchMispredict[tid] = true;
#if ISA_HAS_DELAY_SLOT
+ int instSize = sizeof(TheISA::MachInst);
bool branch_taken =
- (inst->readNextNPC() != (inst->readPC() + 2 * sizeof(TheISA::MachInst)) &&
- inst->readNextNPC() != (inst->readPC() + 3 * sizeof(TheISA::MachInst)));
+ !(inst->readNextPC() + instSize == inst->readNextNPC() &&
+ (inst->readNextPC() == inst->readPC() + instSize ||
+ inst->readNextPC() == inst->readPC() + 2 * instSize));
DPRINTF(Sparc, "Branch taken = %s [sn:%i]\n",
branch_taken ? "true": "false", inst->seqNum);
toCommit->branchTaken[tid] = branch_taken;
- bool squashDelaySlot =
- (inst->readNextPC() != inst->readPC() + sizeof(TheISA::MachInst));
+ bool squashDelaySlot = true;
+// (inst->readNextPC() != inst->readPC() + sizeof(TheISA::MachInst));
DPRINTF(Sparc, "Squash delay slot = %s [sn:%i]\n",
squashDelaySlot ? "true": "false", inst->seqNum);
toCommit->squashDelaySlot[tid] = squashDelaySlot;
//If we're squashing the delay slot, we need to pick back up at NextPC.
//Otherwise, NextPC isn't being squashed, so we should pick back up at
//NextNPC.
- if (squashDelaySlot)
+ if (squashDelaySlot) {
toCommit->nextPC[tid] = inst->readNextPC();
- else
+ toCommit->nextNPC[tid] = inst->readNextNPC();
+ } else
toCommit->nextPC[tid] = inst->readNextNPC();
#else
toCommit->branchTaken[tid] = inst->readNextPC() !=
toCommit->squash[tid] = true;
toCommit->squashedSeqNum[tid] = inst->seqNum;
toCommit->nextPC[tid] = inst->readNextPC();
+#if ISA_HAS_DELAY_SLOT
+ toCommit->nextNPC[tid] = inst->readNextNPC();
+#endif
toCommit->includeSquashInst[tid] = false;
toCommit->squash[tid] = true;
toCommit->squashedSeqNum[tid] = inst->seqNum;
toCommit->nextPC[tid] = inst->readPC();
+#if ISA_HAS_DELAY_SLOT
+ toCommit->nextNPC[tid] = inst->readNextNPC();
+#endif
// Must include the broadcasted SN in the squash.
toCommit->includeSquashInst[tid] = true;
fetchRedirect[tid] = true;
DPRINTF(IEW, "Execute: Branch mispredict detected.\n");
+ DPRINTF(IEW, "Predicted target was %#x.\n", inst->predPC);
#if ISA_HAS_DELAY_SLOT
DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x.\n",
inst->nextNPC);
// If incorrect, then signal the ROB that it must be squashed.
squashDueToBranch(inst, tid);
- if (inst->predTaken()) {
+ if (inst->readPredTaken()) {
predictedTakenIncorrect++;
} else {
predictedNotTakenIncorrect++;
public:
/** BaseDynInst constructor given a binary instruction. */
- SparcDynInst(TheISA::ExtMachInst inst, Addr PC,
- Addr Pred_PC, InstSeqNum seq_num, O3CPU *cpu);
+ SparcDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC,
+ Addr Pred_PC, Addr Pred_NPC, InstSeqNum seq_num, O3CPU *cpu);
/** BaseDynInst constructor given a static inst pointer. */
SparcDynInst(StaticInstPtr &_staticInst);
#include "cpu/o3/sparc/dyn_inst.hh"
template <class Impl>
-SparcDynInst<Impl>::SparcDynInst(TheISA::ExtMachInst inst, Addr PC,
- Addr Pred_PC, InstSeqNum seq_num, O3CPU *cpu)
- : BaseDynInst<Impl>(inst, PC, Pred_PC, seq_num, cpu)
+SparcDynInst<Impl>::SparcDynInst(TheISA::ExtMachInst inst,
+ Addr PC, Addr NPC, Addr Pred_PC, Addr Pred_NPC,
+ InstSeqNum seq_num, O3CPU *cpu)
+ : BaseDynInst<Impl>(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu)
{
initVars();
}