cpu: Fixed the indirect branch predictor GHR handling
authorPau Cabre <pau.cabre@metempsy.com>
Fri, 1 Mar 2019 16:32:16 +0000 (17:32 +0100)
committerJairo Balart <jairo.balart@metempsy.com>
Wed, 27 Mar 2019 09:55:09 +0000 (09:55 +0000)
The internal indirect predictor global history was not being updated
properly, resulting in higher than expected miss rates

Also added a parameter to set the size of the indirect predictor GHR

Change-Id: Ibc797816974cba6719da65122801e8919559a003
Signed-off-by: Pau Cabre <pau.cabre@metempsy.com>
Reported-by: Daniel Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/16928
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Reviewed-by: Andrea Mondelli <Andrea.Mondelli@ucf.edu>
Reviewed-by: Sudhanshu Jha <sudhanshu.jha@arm.com>
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>

src/cpu/pred/BranchPredictor.py
src/cpu/pred/bpred_unit.cc
src/cpu/pred/indirect.cc
src/cpu/pred/indirect.hh

index 85b225c3bd3a502f129bd6d8024c87cc23d0803e..edcb5869856d51f1144fbe46174129dc742e8b14 100644 (file)
@@ -51,6 +51,7 @@ class BranchPredictor(SimObject):
     indirectTagSize = Param.Unsigned(16, "Indirect target cache tag bits")
     indirectPathLength = Param.Unsigned(3,
         "Previous indirect targets to use for path history")
+    indirectGHRBits = Param.Unsigned(13, "Indirect GHR number of bits")
 
 
 
index 2bfd901400f6baa46da4a889f1dd3a2f549c1409..33604799d0ade0390a5f1205ab004c7634031160 100644 (file)
@@ -70,7 +70,8 @@ BPredUnit::BPredUnit(const Params *params)
             params->indirectTagSize,
             params->indirectPathLength,
             params->instShiftAmt,
-            params->numThreads),
+            params->numThreads,
+            params->indirectGHRBits),
       instShiftAmt(params->instShiftAmt)
 {
     for (auto& r : RAS)
@@ -207,8 +208,9 @@ BPredUnit::predict(const StaticInstPtr &inst, const InstSeqNum &seqNum,
                 " predicted %i for PC %s\n", tid, seqNum,  pred_taken, pc);
     }
 
+    const bool orig_pred_taken = pred_taken;
     if (useIndirect) {
-        iPred.updateDirectionInfo(tid, pred_taken, indirect_history);
+        iPred.genIndirectInfo(tid, indirect_history);
     }
 
     DPRINTF(Branch, "[tid:%i]: [sn:%i] Creating prediction history "
@@ -317,6 +319,15 @@ BPredUnit::predict(const StaticInstPtr &inst, const InstSeqNum &seqNum,
 
     pc = target;
 
+    if (useIndirect) {
+        // Update the indirect predictor with the direction prediction
+        // Note that this happens after indirect lookup, so it does not use
+        // the new information
+        // Note also that we use orig_pred_taken instead of pred_taken in
+        // as this is the actual outcome of the direction prediction
+        iPred.updateDirectionInfo(tid, orig_pred_taken);
+    }
+
     predHist[tid].push_front(predict_record);
 
     DPRINTF(Branch, "[tid:%i]: [sn:%i]: History entry added."
@@ -340,7 +351,9 @@ BPredUnit::update(const InstSeqNum &done_sn, ThreadID tid)
                     predHist[tid].back().inst,
                     predHist[tid].back().target);
 
-        iPred.commit(done_sn, tid, predHist[tid].back().indirectHistory);
+        if (useIndirect) {
+            iPred.commit(done_sn, tid, predHist[tid].back().indirectHistory);
+        }
 
         predHist[tid].pop_back();
     }
@@ -351,7 +364,10 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, ThreadID tid)
 {
     History &pred_hist = predHist[tid];
 
-    iPred.squash(squashed_sn, tid);
+    if (useIndirect) {
+        iPred.squash(squashed_sn, tid);
+    }
+
     while (!pred_hist.empty() &&
            pred_hist.front().seqNum > squashed_sn) {
         if (pred_hist.front().usedRAS) {
@@ -372,7 +388,7 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, ThreadID tid)
         // This call should delete the bpHistory.
         squash(tid, pred_hist.front().bpHistory);
         if (useIndirect) {
-            iPred.deleteDirectionInfo(tid, pred_hist.front().indirectHistory);
+            iPred.deleteIndirectInfo(tid, pred_hist.front().indirectHistory);
         }
 
         DPRINTF(Branch, "[tid:%i]: Removing history for [sn:%i] "
@@ -469,7 +485,9 @@ BPredUnit::squash(const InstSeqNum &squashed_sn,
             }
             if (hist_it->wasIndirect) {
                 ++indirectMispredicted;
-                iPred.recordTarget(hist_it->seqNum, corrTarget, tid);
+                iPred.recordTarget(
+                    hist_it->seqNum, pred_hist.front().indirectHistory,
+                    corrTarget, tid);
             } else {
                 DPRINTF(Branch,"[tid: %i] BTB Update called for [sn:%i]"
                         " PC: %s\n", tid,hist_it->seqNum, hist_it->pc);
index 6690abbd6f5a3c854897916aab8fccf73beef74d..2f88566b42109e9a27f110e6f2400baa630a4867 100644 (file)
 IndirectPredictor::IndirectPredictor(bool hash_ghr, bool hash_targets,
     unsigned num_sets, unsigned num_ways,
     unsigned tag_bits, unsigned path_len, unsigned inst_shift,
-    unsigned num_threads)
+    unsigned num_threads, unsigned ghr_size)
     : hashGHR(hash_ghr), hashTargets(hash_targets),
       numSets(num_sets), numWays(num_ways), tagBits(tag_bits),
-      pathLength(path_len), instShift(inst_shift)
+      pathLength(path_len), instShift(inst_shift),
+      ghrNumBits(ghr_size), ghrMask((1 << ghr_size)-1)
 {
     if (!isPowerOf2(numSets)) {
       panic("Indirect predictor requires power of 2 number of sets");
@@ -51,18 +52,26 @@ IndirectPredictor::IndirectPredictor(bool hash_ghr, bool hash_targets,
     for (unsigned i = 0; i < numSets; i++) {
         targetCache[i].resize(numWays);
     }
+
+    fatal_if(ghrNumBits > (sizeof(ThreadInfo::ghr)*8), "ghr_size is too big");
 }
 
 void
-IndirectPredictor::updateDirectionInfo(ThreadID tid, bool taken,
-    void* & indirect_history)
+IndirectPredictor::genIndirectInfo(ThreadID tid, void* & indirect_history)
 {
     // record the GHR as it was before this prediction
     // It will be used to recover the history in case this prediction is
     // wrong or belongs to bad path
     indirect_history = new unsigned(threadInfo[tid].ghr);
+}
 
-    threadInfo[tid].ghr <<= taken;
+void
+IndirectPredictor::updateDirectionInfo(
+    ThreadID tid, bool actually_taken)
+{
+    threadInfo[tid].ghr <<= 1;
+    threadInfo[tid].ghr |= actually_taken;
+    threadInfo[tid].ghr &= ghrMask;
 }
 
 void
@@ -71,6 +80,7 @@ IndirectPredictor::changeDirectionPrediction(ThreadID tid,
 {
     unsigned * previousGhr = static_cast<unsigned *>(indirect_history);
     threadInfo[tid].ghr = ((*previousGhr) << 1) + actually_taken;
+    threadInfo[tid].ghr &= ghrMask;
 }
 
 bool
@@ -147,7 +157,7 @@ IndirectPredictor::squash(InstSeqNum seq_num, ThreadID tid)
 }
 
 void
-IndirectPredictor::deleteDirectionInfo(ThreadID tid, void * indirect_history)
+IndirectPredictor::deleteIndirectInfo(ThreadID tid, void * indirect_history)
 {
     unsigned * previousGhr = static_cast<unsigned *>(indirect_history);
     threadInfo[tid].ghr = *previousGhr;
@@ -156,16 +166,19 @@ IndirectPredictor::deleteDirectionInfo(ThreadID tid, void * indirect_history)
 }
 
 void
-IndirectPredictor::recordTarget(InstSeqNum seq_num,
-        const TheISA::PCState& target, ThreadID tid)
+IndirectPredictor::recordTarget(
+    InstSeqNum seq_num, void * indirect_history, const TheISA::PCState& target,
+    ThreadID tid)
 {
     ThreadInfo &t_info = threadInfo[tid];
 
+    unsigned * ghr = static_cast<unsigned *>(indirect_history);
+
     // Should have just squashed so this branch should be the oldest
     auto hist_entry = *(t_info.pathHist.rbegin());
     // Temporarily pop it off the history so we can calculate the set
     t_info.pathHist.pop_back();
-    Addr set_index = getSetIndex(hist_entry.pcAddr, t_info.ghr, tid);
+    Addr set_index = getSetIndex(hist_entry.pcAddr, *ghr, tid);
     Addr tag = getTag(hist_entry.pcAddr);
     hist_entry.targetAddr = target.instAddr();
     t_info.pathHist.push_back(hist_entry);
index 226a77848e323b55834232dd5f656df988ae369d..b3c3c4cf61fff4128dccfc03adc72e28788d35e8 100644 (file)
@@ -43,19 +43,20 @@ class IndirectPredictor
     IndirectPredictor(bool hash_ghr, bool hash_targets,
                       unsigned num_sets, unsigned num_ways,
                       unsigned tag_bits, unsigned path_len,
-                      unsigned inst_shift, unsigned num_threads);
+                      unsigned inst_shift, unsigned num_threads,
+                      unsigned ghr_size);
     bool lookup(Addr br_addr, TheISA::PCState& br_target, ThreadID tid);
     void recordIndirect(Addr br_addr, Addr tgt_addr, InstSeqNum seq_num,
                         ThreadID tid);
     void commit(InstSeqNum seq_num, ThreadID tid, void * indirect_history);
     void squash(InstSeqNum seq_num, ThreadID tid);
-    void recordTarget(InstSeqNum seq_num, const TheISA::PCState& target,
-                      ThreadID tid);
-    void updateDirectionInfo(ThreadID tid, bool taken,
-                             void* & indirect_history);
+    void recordTarget(InstSeqNum seq_num, void * indirect_history,
+                      const TheISA::PCState& target, ThreadID tid);
+    void genIndirectInfo(ThreadID tid, void* & indirect_history);
+    void updateDirectionInfo(ThreadID tid, bool actually_taken);
+    void deleteIndirectInfo(ThreadID tid, void * indirect_history);
     void changeDirectionPrediction(ThreadID tid, void * indirect_history,
                                    bool actually_taken);
-    void deleteDirectionInfo(ThreadID tid, void * indirect_history);
 
   private:
     const bool hashGHR;
@@ -65,6 +66,8 @@ class IndirectPredictor
     const unsigned tagBits;
     const unsigned pathLength;
     const unsigned instShift;
+    const unsigned ghrNumBits;
+    const unsigned ghrMask;
 
     struct IPredEntry
     {