inorder-bpred: edits to handle non-delay-slot ISAs
authorKorey Sewell <ksewell@umich.edu>
Tue, 12 May 2009 19:01:14 +0000 (15:01 -0400)
committerKorey Sewell <ksewell@umich.edu>
Tue, 12 May 2009 19:01:14 +0000 (15:01 -0400)
Changes so that InOrder can work for a non-delay-slot ISA like Alpha. Typically, changes have to do with handling misspeculated branches at different points in pipeline

src/arch/alpha/process.cc
src/cpu/inorder/inorder_dyn_inst.hh
src/cpu/inorder/pipeline_stage.cc
src/cpu/inorder/resources/bpred_unit.cc
src/cpu/inorder/resources/branch_predictor.cc
src/cpu/inorder/resources/execution_unit.cc
src/cpu/inorder/resources/fetch_seq_unit.cc
src/cpu/o3/thread_context.hh

index 093d83d8a4131aa36e911f81ff4c4243517c92e4..93df459ae25d91c58ed4eb9d70c7096aa1138829 100644 (file)
@@ -166,11 +166,11 @@ AlphaLiveProcess::argsInit(int intSize, int pageSize)
     tc->setPC(prog_entry);
     tc->setNextPC(prog_entry + sizeof(MachInst));
 
-#if THE_ISA != ALPHA_ISA //e.g. MIPS or Sparc
+    // MIPS/Sparc need NNPC for delay slot handling, while
+    // Alpha has no delay slots... However, CPU models
+    // cycle PCs by PC=NPC, NPC=NNPC, etc. so setting this
+    // here ensures CPU-Model Compatibility across board
     tc->setNextNPC(prog_entry + (2 * sizeof(MachInst)));
-#endif
-
-
 }
 
 void
index 12a9a41760c1ad62a46fc8df9d4f2beee651a91b..8e88fc583b6a112ee82beb61bb7e78fdd45d1043 100644 (file)
@@ -264,6 +264,12 @@ class InOrderDynInst : public FastAlloc, public RefCounted
     /** Predicted next PC. */
     Addr predPC;
 
+    /** Predicted next NPC. */
+    Addr predNPC;
+
+    /** Predicted next microPC */
+    Addr predMicroPC;
+
     /** Address to fetch from */
     Addr fetchAddr;
 
@@ -506,7 +512,14 @@ class InOrderDynInst : public FastAlloc, public RefCounted
     /** Returns the next NPC.  This could be the speculative next NPC if it is
      *  called prior to the actual branch target being calculated.
      */
-    Addr readNextNPC() { return nextNPC; }
+    Addr readNextNPC()
+    {
+#if ISA_HAS_DELAY_SLOT
+        return nextNPC;
+#else
+        return nextPC + sizeof(TheISA::MachInst);
+#endif
+    }
 
     /** Set the next PC of this instruction (its actual target). */
     void setNextNPC(uint64_t val) { nextNPC = val; }
@@ -522,19 +535,26 @@ class InOrderDynInst : public FastAlloc, public RefCounted
     /** Returns the predicted target of the branch. */
     Addr readPredTarg() { return predPC; }
 
+    /** Returns the predicted PC immediately after the branch. */
+    Addr readPredPC() { return predPC; }
+
+    /** Returns the predicted PC two instructions after the branch */
+    Addr readPredNPC() { return predNPC; }
+
+    /** Returns the predicted micro PC after the branch */
+    Addr readPredMicroPC() { return predMicroPC; }
+
     /** Returns whether the instruction was predicted taken or not. */
     bool predTaken() { return predictTaken; }
 
     /** Returns whether the instruction mispredicted. */
     bool mispredicted()
     {
-        // Special case since a not-taken, cond. delay slot, effectively
-        // nullifies the delay slot instruction
-        if (isCondDelaySlot() && !predictTaken) {
-            return predPC != nextPC;
-        } else {
-            return predPC != nextNPC;
-        }
+#if ISA_HAS_DELAY_SLOT
+        return predPC != nextNPC;
+#else
+        return predPC != nextPC;
+#endif
     }
 
     /** Returns whether the instruction mispredicted. */
index cb69464b0aee663aac295b9c034ce349394ad7f8..d8e26f725e564ae446abc3425fc28104099bfae6 100644 (file)
@@ -342,13 +342,21 @@ PipelineStage::squashDueToBranch(DynInstPtr &inst, unsigned tid)
         toPrevStages->stageInfo[stageNum][tid].doneSeqNum = inst->seqNum;
         toPrevStages->stageInfo[stageNum][tid].squash = true;
         toPrevStages->stageInfo[stageNum][tid].nextPC = inst->readPredTarg();
+
+
+#if ISA_HAS_DELAY_SLOT
         toPrevStages->stageInfo[stageNum][tid].branchTaken = inst->readNextNPC() !=
             (inst->readNextPC() + sizeof(TheISA::MachInst));
         toPrevStages->stageInfo[stageNum][tid].bdelayDoneSeqNum = inst->bdelaySeqNum;
-
-        DPRINTF(InOrderStage, "Target being re-set to %08p\n", inst->readPredTarg());
         InstSeqNum squash_seq_num = inst->bdelaySeqNum;
+#else
+        toPrevStages->stageInfo[stageNum][tid].branchTaken = inst->readNextPC() !=
+            (inst->readPC() + sizeof(TheISA::MachInst));
+        toPrevStages->stageInfo[stageNum][tid].bdelayDoneSeqNum = inst->seqNum;
+        InstSeqNum squash_seq_num = inst->seqNum;
+#endif
 
+        DPRINTF(InOrderStage, "Target being re-set to %08p\n", inst->readPredTarg());
         DPRINTF(InOrderStage, "[tid:%i]: Squashing after [sn:%i], due to [sn:%i] "
                 "branch.\n", tid, squash_seq_num, inst->seqNum);
 
index 66d0779a2a14a8805e3d69be1cebb1c2af9abacf..df6b33792719d9449f0ec27a3603cd1a3c73a2aa 100644 (file)
@@ -196,7 +196,7 @@ BPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
             predict_record.RASIndex = RAS[tid].topIdx();
             predict_record.RASTarget = target;
 
-           assert(predict_record.RASIndex < 16);
+            assert(predict_record.RASIndex < 16);
 
             RAS[tid].pop();
 
@@ -219,14 +219,14 @@ BPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
             }
 
             if (inst->isCall() &&
-                 inst->isUncondCtrl() &&
-                   inst->isDirectCtrl()) {
-               target = inst->branchTarget();
+                inst->isUncondCtrl() &&
+                inst->isDirectCtrl()) {
+                target = inst->branchTarget();
 
                 DPRINTF(Fetch, "BranchPred: [tid:%i]: Setting %#x predicted"
                         " target to %#x.\n",
                         tid, inst->readPC(), target);
-           } else if (BTB.valid(PC, tid)) {
+            } else if (BTB.valid(PC, tid)) {
                 ++BTBHits;
 
                 // If it's not a return, use the BTB to get the target addr.
@@ -248,7 +248,12 @@ BPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
         PC = target;
         inst->setPredTarg(target);
     } else {
+#if ISA_HAS_DELAY_SLOT
+        // This value will be inst->PC + 4 (nextPC)
+        // Delay Slot archs need this to be inst->PC + 8 (nextNPC)
+        // so we increment one more time here.
         PC = PC + sizeof(MachInst);
+#endif
         inst->setPredTarg(PC);
     }
 
index 511a0ac82fd060aa875ae76ddab383797314376b..d8c0730afb7855f80529a2ab81f088b962859279 100644 (file)
@@ -78,12 +78,12 @@ BranchPredictor::execute(int slot_num)
             Addr pred_PC = inst->readNextPC();
 
             if (inst->isControl()) {
-                // If predicted, the pred_PC will be updated to new target value
                 // If not, the pred_PC be updated to pc+8
+                // If predicted, the pred_PC will be updated to new target value
                 bool predict_taken = branchPred.predict(inst, pred_PC, tid);
 
                 if (predict_taken) {
-                    DPRINTF(Resource, "[tid:%i]: [sn:%i]: Branch predicted true.\n",
+                    DPRINTF(InOrderBPred, "[tid:%i]: [sn:%i]: Branch predicted true.\n",
                             tid, seq_num);
 
                     inst->setPredTarg(pred_PC);
index 843adb5b0d9335b81b51552412766dea8e9d19b9..60cbac8af30e838ccbaa8d1e448ae8109149b1f5 100644 (file)
@@ -76,7 +76,7 @@ ExecutionUnit::execute(int slot_num)
       case ExecuteInst:
         {
             if (inst->isMemRef()) {
-                fatal("%s not configured to handle memory ops.\n", resName);
+                panic("%s not configured to handle memory ops.\n", resName);
             } else if (inst->isControl()) {
                 // Evaluate Branch
                 fault = inst->execute();
@@ -111,23 +111,33 @@ ExecutionUnit::execute(int slot_num)
                                         "[sn:%i] PC %#x mispredicted as not taken.\n", tid,
                                         seq_num, inst->PC);
                             } else {
+#if ISA_HAS_DELAY_SLOT
                                 inst->bdelaySeqNum = seq_num + 1;
-
+                                inst->setPredTarg(inst->nextNPC);
+#else
+                                inst->bdelaySeqNum = seq_num;
+                                inst->setPredTarg(inst->nextPC);
+#endif
                                 DPRINTF(InOrderExecute, "[tid:%i]: Misprediction detected at "
                                         "[sn:%i] PC %#x,\n\t squashing after delay slot "
                                         "instruction [sn:%i].\n",
                                         tid, seq_num, inst->PC, inst->bdelaySeqNum);
                                 DPRINTF(InOrderStall, "STALL: [tid:%i]: Branch "
                                         "misprediction at %#x\n", tid, inst->PC);
-                                inst->setPredTarg(inst->nextNPC);
                             }
 
                             DPRINTF(InOrderExecute, "[tid:%i] Redirecting fetch to %#x.\n", tid,
                                     inst->readPredTarg());
 
                         } else if(inst->isIndirectCtrl()){
+#if ISA_HAS_DELAY_SLOT
                             inst->setPredTarg(inst->nextNPC);
                             inst->bdelaySeqNum = seq_num + 1;
+#else
+                            inst->setPredTarg(inst->nextPC);
+                            inst->bdelaySeqNum = seq_num;
+#endif
+
                             DPRINTF(InOrderExecute, "[tid:%i] Redirecting fetch to %#x.\n", tid,
                                     inst->readPredTarg());
                         } else {
@@ -151,7 +161,13 @@ ExecutionUnit::execute(int slot_num)
                         } else {
                             predictedNotTakenIncorrect++;
                         }
+                    } else {
+                        DPRINTF(InOrderExecute, "[tid:%i]: [sn:%i]: Prediction Correct.\n",
+                                inst->readTid(), seq_num, inst->readIntResult(0));
                     }
+
+                    DPRINTF(InOrderExecute, "[tid:%i]: [sn:%i]: The result of execution is 0x%x.\n",
+                            inst->readTid(), seq_num, inst->readIntResult(0));
                     exec_req->done();
                 } else {
                     warn("inst [sn:%i] had a %s fault", seq_num, fault->name());
@@ -164,8 +180,8 @@ ExecutionUnit::execute(int slot_num)
                     inst->setExecuted();
                     exec_req->done();
 
-                    DPRINTF(InOrderExecute, "[tid:%i]: The result of execution is 0x%x.\n",
-                            inst->readTid(), inst->readIntResult(0));
+                    DPRINTF(InOrderExecute, "[tid:%i]: [sn:%i]: The result of execution is 0x%x.\n",
+                            inst->readTid(), seq_num, inst->readIntResult(0));
                 } else {
                     warn("inst [sn:%i] had a %s fault", seq_num, fault->name());
                     cpu->trap(fault, tid);
index 444252e1b339a7eb58e5d98a95bc6d77aa8f1a13..69610ae58f872bb65c351d3cbfae68f404fa7e23 100644 (file)
@@ -96,13 +96,16 @@ FetchSeqUnit::execute(int slot_num)
                 inst->setNextPC(PC[tid] + instSize);
                 inst->setNextNPC(PC[tid] + (instSize * 2));
 
+#if ISA_HAS_DELAY_SLOT
                 inst->setPredTarg(inst->readNextNPC());
-
+#else
+                inst->setPredTarg(inst->readNextPC());
+#endif
                 inst->setMemAddr(PC[tid]);
                 inst->setSeqNum(cpu->getAndIncrementInstSeq(tid));
 
-                DPRINTF(InOrderFetchSeq, "[tid:%i]: Assigning [sn:%i] to PC %08p\n", tid,
-                        inst->seqNum, inst->readPC());
+                DPRINTF(InOrderFetchSeq, "[tid:%i]: Assigning [sn:%i] to PC %08p, NPC %08p, NNPC %08p\n", tid,
+                        inst->seqNum, inst->readPC(), inst->readNextPC(), inst->readNextNPC());
 
                 if (delaySlotInfo[tid].numInsts > 0) {
                     --delaySlotInfo[tid].numInsts;
@@ -150,30 +153,37 @@ FetchSeqUnit::execute(int slot_num)
 
                     squashAfterInst(inst, stage_num, tid);
                 } else if (!inst->isCondDelaySlot() && !inst->predTaken()) {
-                // Not-Taken Control
+                    // Not-Taken Control
                     DPRINTF(InOrderFetchSeq, "[tid:%i]: [sn:%i]: Predicted Not-Taken Control "
                             "inst. updating PC to %08p\n", tid, inst->seqNum,
                             inst->readNextPC());
-
+#if ISA_HAS_DELAY_SLOT
                     ++delaySlotInfo[tid].numInsts;
                     delaySlotInfo[tid].targetReady = false;
                     delaySlotInfo[tid].targetAddr = inst->readNextNPC();
-
+#else
+                    assert(delaySlotInfo[tid].numInsts == 0);
+#endif
                 } else if (inst->predTaken()) {
-                // Taken Control
+                    // Taken Control
+#if ISA_HAS_DELAY_SLOT
                     ++delaySlotInfo[tid].numInsts;
                     delaySlotInfo[tid].targetReady = false;
                     delaySlotInfo[tid].targetAddr = inst->readPredTarg();
 
                     DPRINTF(InOrderFetchSeq, "[tid:%i]: [sn:%i] Updating delay slot target "
                             "to PC %08p\n", tid, inst->seqNum, inst->readPredTarg());
-
-                    // Set-Up Squash Through-Out Pipeline
-                    DPRINTF(InOrderFetchSeq, "[tid:%i] Setting up squash to start from stage %i, after [sn:%i].\n",
-                            tid, stage_num, seq_num + 1);
                     inst->bdelaySeqNum = seq_num + 1;
+#else
+                    inst->bdelaySeqNum = seq_num;
+                    assert(delaySlotInfo[tid].numInsts == 0);
+#endif
+
                     inst->squashingStage = stage_num;
 
+                    DPRINTF(InOrderFetchSeq, "[tid:%i] Setting up squash to start from stage %i, after [sn:%i].\n",
+                            tid, stage_num, inst->bdelaySeqNum);
+
                     // Do Squashing
                     squashAfterInst(inst, stage_num, tid);
                 }
@@ -239,6 +249,10 @@ FetchSeqUnit::squash(DynInstPtr inst, int squash_stage,
             DPRINTF(InOrderFetchSeq, "[tid:%i]: Setting PC to %08p.\n",
                     tid, PC[tid]);
         } else {
+#if !ISA_HAS_DELAY_SLOT
+            assert(0);
+#endif
+
             delaySlotInfo[tid].numInsts = 1;
             delaySlotInfo[tid].targetReady = false;
             delaySlotInfo[tid].targetAddr = (inst->procDelaySlotOnMispred) ? inst->branchTarget() : new_PC;
index e7c9c3b8fa65ec13663b08a4760c79a8c664d80f..b10305d5d66502ba90e186dfe76383ac20c38cce 100755 (executable)
@@ -265,9 +265,6 @@ class O3ThreadContext : public ThreadContext
 
     virtual void setNextNPC(uint64_t val)
     {
-#if THE_ISA == ALPHA_ISA
-        panic("Not supported on Alpha!");
-#endif
         this->cpu->setNextNPC(val, this->thread->threadId());
     }
 };