Made branch delay slots get squashed, and passed back an NPC and NNPC to start fetchi...

author Gabe Black <gblack@eecs.umich.edu>

Sat, 16 Dec 2006 12:32:06 +0000 (07:32 -0500)

committer Gabe Black <gblack@eecs.umich.edu>

Sat, 16 Dec 2006 12:32:06 +0000 (07:32 -0500)
author Gabe Black <gblack@eecs.umich.edu>
Sat, 16 Dec 2006 12:32:06 +0000 (07:32 -0500)
committer Gabe Black <gblack@eecs.umich.edu>
Sat, 16 Dec 2006 12:32:06 +0000 (07:32 -0500)
diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh

index 4683c77afacdee08a6a28e2682feb8adc7eafe22..d9691900746ce420422e0d48f5ca4452d4e4a9c5 100644 (file)
--- a/src/cpu/o3/comm.hh
+++ b/src/cpu/o3/comm.hh
@@ -90,6 +90,7 @@ struct DefaultIEWDefaultCommit {
      bool squashDelaySlot[Impl::MaxThreads];
      uint64_t mispredPC[Impl::MaxThreads];
      uint64_t nextPC[Impl::MaxThreads];
+    uint64_t nextNPC[Impl::MaxThreads];
      InstSeqNum squashedSeqNum[Impl::MaxThreads];
  
      bool includeSquashInst[Impl::MaxThreads];
@@ -121,6 +122,7 @@ struct TimeBufStruct {
          bool branchTaken;
          uint64_t mispredPC;
          uint64_t nextPC;
+        uint64_t nextNPC;
  
          unsigned branchCount;
      };
@@ -160,6 +162,7 @@ struct TimeBufStruct {
          bool branchTaken;
          uint64_t mispredPC;
          uint64_t nextPC;
+        uint64_t nextNPC;
  
          // Represents the instruction that has either been retired or
          // squashed.  Similar to having a single bus that broadcasts the
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh

index 3178410a835cc9a1ef4f947b5459fdb4bb01bb37..194138efc218b06af8b45d3a2df20a1be793688b 100644 (file)
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -514,6 +514,7 @@ DefaultCommit<Impl>::squashAll(unsigned tid)
      toIEW->commitInfo[tid].branchMispredict = false;
  
      toIEW->commitInfo[tid].nextPC = PC[tid];
+    toIEW->commitInfo[tid].nextNPC = nextPC[tid];
  }
  
  template <class Impl>
@@ -770,6 +771,7 @@ DefaultCommit<Impl>::commit()
                  fromIEW->branchTaken[tid];
  
              toIEW->commitInfo[tid].nextPC = fromIEW->nextPC[tid];
+            toIEW->commitInfo[tid].nextNPC = fromIEW->nextNPC[tid];
  
              toIEW->commitInfo[tid].mispredPC = fromIEW->mispredPC[tid];
  
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc

index 4056d876f8070a4d28a7db0e77b0ebbaac49f43b..5616ba3985c7632cfa82a68ac4b4086f6feab64f 100644 (file)
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -700,7 +700,7 @@ FullO3CPU<Impl>::removeThread(unsigned tid)
  
      // Squash Throughout Pipeline
      InstSeqNum squash_seq_num = commit.rob->readHeadInst(tid)->seqNum;
-    fetch.squash(0, squash_seq_num, true, tid);
+    fetch.squash(0, sizeof(TheISA::MachInst), squash_seq_num, true, tid);
      decode.squash(tid);
      rename.squash(squash_seq_num, tid);
      iew.squash(tid);
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh

index 04016347a4ef316c3f35cb0d4cf5475805b455f4..4f5a161e08cd308665cf419a816977e850aee013 100644 (file)
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -239,13 +239,13 @@ class DefaultFetch
      bool fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid);
  
      /** Squashes a specific thread and resets the PC. */
-    inline void doSquash(const Addr &new_PC, unsigned tid);
+    inline void doSquash(const Addr &new_PC, const Addr &new_NPC, unsigned tid);
  
      /** Squashes a specific thread and resets the PC. Also tells the CPU to
       * remove any instructions between fetch and decode that should be sqaushed.
       */
-    void squashFromDecode(const Addr &new_PC, const InstSeqNum &seq_num,
-                          unsigned tid);
+    void squashFromDecode(const Addr &new_PC, const Addr &new_NPC,
+                          const InstSeqNum &seq_num, unsigned tid);
  
      /** Checks if a thread is stalled. */
      bool checkStall(unsigned tid) const;
@@ -259,7 +259,8 @@ class DefaultFetch
       * remove any instructions that are not in the ROB. The source of this
       * squash should be the commit stage.
       */
-    void squash(const Addr &new_PC, const InstSeqNum &seq_num,
+    void squash(const Addr &new_PC, const Addr &new_NPC,
+                const InstSeqNum &seq_num,
                  bool squash_delay_slot, unsigned tid);
  
      /** Ticks the fetch stage, processing all inputs signals and fetching
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh

index 6cff5242903c5caeb84bea157338641eddd813d0..5cd2e3514a00ff05a7b3a054600ad523051f10a6 100644 (file)
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -319,9 +319,7 @@ DefaultFetch<Impl>::initStage()
      for (int tid = 0; tid < numThreads; tid++) {
          PC[tid] = cpu->readPC(tid);
          nextPC[tid] = cpu->readNextPC(tid);
-#if ISA_HAS_DELAY_SLOT
          nextNPC[tid] = cpu->readNextNPC(tid);
-#endif
      }
  
      // Size of cache block.
@@ -504,14 +502,14 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
  
      if (!inst->isControl()) {
  #if ISA_HAS_DELAY_SLOT
-        Addr cur_PC = next_PC;
-        next_PC  = cur_PC + instSize;      //next_NPC;
-        next_NPC = cur_PC + (2 * instSize);//next_NPC + instSize;
-        inst->setPredTarg(next_NPC);
+        next_PC  = next_NPC;
+        next_NPC = next_NPC + instSize;
+        inst->setPredTarg(next_PC, next_NPC);
  #else
          next_PC = next_PC + instSize;
-        inst->setPredTarg(next_PC);
+        inst->setPredTarg(next_PC, next_PC + sizeof(TheISA::MachInst));
  #endif
+        inst->setPredTaken(false);
          return false;
      }
  
@@ -521,36 +519,29 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
      predict_taken = branchPred.predict(inst, pred_PC, tid);
  
      if (predict_taken) {
-        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be true.\n", tid);
+        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be taken.\n", tid);
      } else {
-        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be false.\n", tid);
+        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be not taken.\n", tid);
      }
  
+    next_PC = next_NPC;
      if (predict_taken) {
-        next_PC = next_NPC;
          next_NPC = pred_PC;
-
          // Update delay slot info
          ++delaySlotInfo[tid].numInsts;
          delaySlotInfo[tid].targetAddr = pred_PC;
          DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) to process.\n", tid,
                  delaySlotInfo[tid].numInsts);
-    } else { // !predict_taken
-        if (inst->isCondDelaySlot()) {
-            next_PC = pred_PC;
-            // The delay slot is skipped here if there is on
-            // prediction
-        } else {
-            next_PC = next_NPC;
-            // No need to declare a delay slot here since
-            // there is no for the pred. target to jump
-        }
-
+    } else {
          next_NPC = next_NPC + instSize;
      }
  #else
      predict_taken = branchPred.predict(inst, next_PC, tid);
  #endif
+    DPRINTF(Fetch, "[tid:%i]: Branch predicted to go to %#x and then %#x.\n",
+            tid, next_PC, next_NPC);
+    inst->setPredTarg(next_PC, next_NPC);
+    inst->setPredTaken(predict_taken);
  
      ++fetchedBranches;
  
@@ -671,14 +662,15 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
  
  template <class Impl>
  inline void
-DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid)
+DefaultFetch<Impl>::doSquash(const Addr &new_PC,
+        const Addr &new_NPC, unsigned tid)
  {
-    DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x.\n",
-            tid, new_PC);
+    DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x, NPC to: %#x.\n",
+            tid, new_PC, new_NPC);
  
      PC[tid] = new_PC;
-    nextPC[tid] = new_PC + instSize;
-    nextNPC[tid] = new_PC + (2 * instSize);
+    nextPC[tid] = new_NPC;
+    nextNPC[tid] = new_NPC + instSize;
  
      // Clear the icache miss if it's outstanding.
      if (fetchStatus[tid] == IcacheWaitResponse) {
@@ -704,13 +696,13 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid)
  
  template<class Impl>
  void
-DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC,
+DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC, const Addr &new_NPC,
                                       const InstSeqNum &seq_num,
                                       unsigned tid)
  {
      DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid);
  
-    doSquash(new_PC, tid);
+    doSquash(new_PC, new_NPC, tid);
  
  #if ISA_HAS_DELAY_SLOT
      if (seq_num <=  delaySlotInfo[tid].branchSeqNum) {
@@ -793,12 +785,13 @@ DefaultFetch<Impl>::updateFetchStatus()
  
  template <class Impl>
  void
-DefaultFetch<Impl>::squash(const Addr &new_PC, const InstSeqNum &seq_num,
+DefaultFetch<Impl>::squash(const Addr &new_PC, const Addr &new_NPC,
+                           const InstSeqNum &seq_num,
                             bool squash_delay_slot, unsigned tid)
  {
      DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid);
  
-    doSquash(new_PC, tid);
+    doSquash(new_PC, new_NPC, tid);
  
  #if ISA_HAS_DELAY_SLOT
      if (seq_num <=  delaySlotInfo[tid].branchSeqNum) {
@@ -928,6 +921,7 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
  #endif
          // In any case, squash.
          squash(fromCommit->commitInfo[tid].nextPC,
+               fromCommit->commitInfo[tid].nextNPC,
                 doneSeqNum,
                 fromCommit->commitInfo[tid].squashDelaySlot,
                 tid);
@@ -985,6 +979,7 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
  #endif
              // Squash unless we're already squashing
              squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
+                             fromDecode->decodeInfo[tid].nextNPC,
                               doneSeqNum,
                               tid);
  
@@ -1041,6 +1036,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
      // The current PC.
      Addr &fetch_PC = PC[tid];
  
+    Addr &fetch_NPC = nextPC[tid];
+
      // Fault code for memory access.
      Fault fault = NoFault;
  
@@ -1097,7 +1094,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
      }
  
      Addr next_PC = fetch_PC;
-    Addr next_NPC = next_PC + instSize;
+    Addr next_NPC = fetch_NPC;
+
      InstSeqNum inst_seq;
      MachInst inst;
      ExtMachInst ext_inst;
@@ -1144,8 +1142,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
  #endif
  
              // Create a new DynInst from the instruction fetched.
-            DynInstPtr instruction = new DynInst(ext_inst, fetch_PC,
-                                                 next_PC,
+            DynInstPtr instruction = new DynInst(ext_inst,
+                                                 fetch_PC, fetch_NPC,
+                                                 next_PC, next_NPC,
                                                   inst_seq, cpu);
              instruction->setTid(tid);
  
@@ -1243,9 +1242,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
          if (delaySlotInfo[tid].targetReady &&
              delaySlotInfo[tid].numInsts == 0) {
              // Set PC to target
-            PC[tid] = delaySlotInfo[tid].targetAddr; //next_PC
-            nextPC[tid] = next_PC + instSize;        //next_NPC
-            nextNPC[tid] = next_PC + (2 * instSize);
+            PC[tid] = next_PC;
+            nextPC[tid] = next_NPC;
+            nextNPC[tid] = next_NPC + instSize;
  
              delaySlotInfo[tid].targetReady = false;
          } else {
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh

index 85db68576490a35f9cf5a7cae6a484d64d4bc82e..24c8484b48ad57d72ae429c58ea8fb37ce6bd29d 100644 (file)
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -481,25 +481,28 @@ DefaultIEW<Impl>::squashDueToBranch(DynInstPtr &inst, unsigned tid)
      toCommit->branchMispredict[tid] = true;
  
  #if ISA_HAS_DELAY_SLOT
+    int instSize = sizeof(TheISA::MachInst);
      bool branch_taken =
-        (inst->readNextNPC() != (inst->readPC() + 2 * sizeof(TheISA::MachInst)) &&
-         inst->readNextNPC() != (inst->readPC() + 3 * sizeof(TheISA::MachInst)));
+        !(inst->readNextPC() + instSize == inst->readNextNPC() &&
+          (inst->readNextPC() == inst->readPC() + instSize ||
+           inst->readNextPC() == inst->readPC() + 2 * instSize));
      DPRINTF(Sparc, "Branch taken = %s [sn:%i]\n",
              branch_taken ? "true": "false", inst->seqNum);
  
      toCommit->branchTaken[tid] = branch_taken;
  
-    bool squashDelaySlot =
-        (inst->readNextPC() != inst->readPC() + sizeof(TheISA::MachInst));
+    bool squashDelaySlot = true;
+//     (inst->readNextPC() != inst->readPC() + sizeof(TheISA::MachInst));
      DPRINTF(Sparc, "Squash delay slot = %s [sn:%i]\n",
              squashDelaySlot ? "true": "false", inst->seqNum);
      toCommit->squashDelaySlot[tid] = squashDelaySlot;
      //If we're squashing the delay slot, we need to pick back up at NextPC.
      //Otherwise, NextPC isn't being squashed, so we should pick back up at
      //NextNPC.
-    if (squashDelaySlot)
+    if (squashDelaySlot) {
          toCommit->nextPC[tid] = inst->readNextPC();
-    else
+        toCommit->nextNPC[tid] = inst->readNextNPC();
+    } else
          toCommit->nextPC[tid] = inst->readNextNPC();
  #else
      toCommit->branchTaken[tid] = inst->readNextPC() !=
@@ -522,6 +525,9 @@ DefaultIEW<Impl>::squashDueToMemOrder(DynInstPtr &inst, unsigned tid)
      toCommit->squash[tid] = true;
      toCommit->squashedSeqNum[tid] = inst->seqNum;
      toCommit->nextPC[tid] = inst->readNextPC();
+#if ISA_HAS_DELAY_SLOT
+    toCommit->nextNPC[tid] = inst->readNextNPC();
+#endif
  
      toCommit->includeSquashInst[tid] = false;
  
@@ -538,6 +544,9 @@ DefaultIEW<Impl>::squashDueToMemBlocked(DynInstPtr &inst, unsigned tid)
      toCommit->squash[tid] = true;
      toCommit->squashedSeqNum[tid] = inst->seqNum;
      toCommit->nextPC[tid] = inst->readPC();
+#if ISA_HAS_DELAY_SLOT
+    toCommit->nextNPC[tid] = inst->readNextNPC();
+#endif
  
      // Must include the broadcasted SN in the squash.
      toCommit->includeSquashInst[tid] = true;
@@ -1342,6 +1351,7 @@ DefaultIEW<Impl>::executeInsts()
                  fetchRedirect[tid] = true;
  
                  DPRINTF(IEW, "Execute: Branch mispredict detected.\n");
+                DPRINTF(IEW, "Predicted target was %#x.\n", inst->predPC);
  #if ISA_HAS_DELAY_SLOT
                  DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x.\n",
                          inst->nextNPC);
@@ -1352,7 +1362,7 @@ DefaultIEW<Impl>::executeInsts()
                  // If incorrect, then signal the ROB that it must be squashed.
                  squashDueToBranch(inst, tid);
  
-                if (inst->predTaken()) {
+                if (inst->readPredTaken()) {
                      predictedTakenIncorrect++;
                  } else {
                      predictedNotTakenIncorrect++;
diff --git a/src/cpu/o3/sparc/dyn_inst.hh b/src/cpu/o3/sparc/dyn_inst.hh

index fda99cb6c1948947caa7a12498e1ecd3033d1ee3..e95ae2fd5eecfb447022a6544231da7e8b62217c 100644 (file)
--- a/src/cpu/o3/sparc/dyn_inst.hh
+++ b/src/cpu/o3/sparc/dyn_inst.hh
@@ -56,8 +56,8 @@ class SparcDynInst : public BaseDynInst<Impl>
  
    public:
      /** BaseDynInst constructor given a binary instruction. */
-    SparcDynInst(TheISA::ExtMachInst inst, Addr PC,
-            Addr Pred_PC, InstSeqNum seq_num, O3CPU *cpu);
+    SparcDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC,
+            Addr Pred_PC, Addr Pred_NPC, InstSeqNum seq_num, O3CPU *cpu);
  
      /** BaseDynInst constructor given a static inst pointer. */
      SparcDynInst(StaticInstPtr &_staticInst);
diff --git a/src/cpu/o3/sparc/dyn_inst_impl.hh b/src/cpu/o3/sparc/dyn_inst_impl.hh

index b830ee7bd44c5beb9f2f451547e630d969891f64..c4d30b6f4167973f82c3c874aebe685a42cc9be2 100644 (file)
--- a/src/cpu/o3/sparc/dyn_inst_impl.hh
+++ b/src/cpu/o3/sparc/dyn_inst_impl.hh
@@ -31,9 +31,10 @@
  #include "cpu/o3/sparc/dyn_inst.hh"
  
  template <class Impl>
-SparcDynInst<Impl>::SparcDynInst(TheISA::ExtMachInst inst, Addr PC,
-        Addr Pred_PC, InstSeqNum seq_num, O3CPU *cpu)
-    : BaseDynInst<Impl>(inst, PC, Pred_PC, seq_num, cpu)
+SparcDynInst<Impl>::SparcDynInst(TheISA::ExtMachInst inst,
+        Addr PC, Addr NPC, Addr Pred_PC, Addr Pred_NPC,
+        InstSeqNum seq_num, O3CPU *cpu)
+    : BaseDynInst<Impl>(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu)
  {
      initVars();
  }
author	Gabe Black <gblack@eecs.umich.edu>
	Sat, 16 Dec 2006 12:32:06 +0000 (07:32 -0500)
committer	Gabe Black <gblack@eecs.umich.edu>
	Sat, 16 Dec 2006 12:32:06 +0000 (07:32 -0500)
src/cpu/o3/comm.hh		patch \| blob \| history
src/cpu/o3/commit_impl.hh		patch \| blob \| history
src/cpu/o3/cpu.cc		patch \| blob \| history
src/cpu/o3/fetch.hh		patch \| blob \| history
src/cpu/o3/fetch_impl.hh		patch \| blob \| history
src/cpu/o3/iew_impl.hh		patch \| blob \| history
src/cpu/o3/sparc/dyn_inst.hh		patch \| blob \| history
src/cpu/o3/sparc/dyn_inst_impl.hh		patch \| blob \| history