From 78f1f4d8f9267b5345f5a34f113eabae2a2faac9 Mon Sep 17 00:00:00 2001 From: Pau Cabre Date: Fri, 1 Mar 2019 17:32:16 +0100 Subject: [PATCH] cpu: Fixed the indirect branch predictor GHR handling The internal indirect predictor global history was not being updated properly, resulting in higher than expected miss rates Also added a parameter to set the size of the indirect predictor GHR Change-Id: Ibc797816974cba6719da65122801e8919559a003 Signed-off-by: Pau Cabre Reported-by: Daniel Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/16928 Reviewed-by: Daniel Carvalho Reviewed-by: Andrea Mondelli Reviewed-by: Sudhanshu Jha Reviewed-by: Andreas Sandberg Maintainer: Andreas Sandberg --- src/cpu/pred/BranchPredictor.py | 1 + src/cpu/pred/bpred_unit.cc | 30 ++++++++++++++++++++++++------ src/cpu/pred/indirect.cc | 31 ++++++++++++++++++++++--------- src/cpu/pred/indirect.hh | 15 +++++++++------ 4 files changed, 56 insertions(+), 21 deletions(-) diff --git a/src/cpu/pred/BranchPredictor.py b/src/cpu/pred/BranchPredictor.py index 85b225c3b..edcb58698 100644 --- a/src/cpu/pred/BranchPredictor.py +++ b/src/cpu/pred/BranchPredictor.py @@ -51,6 +51,7 @@ class BranchPredictor(SimObject): indirectTagSize = Param.Unsigned(16, "Indirect target cache tag bits") indirectPathLength = Param.Unsigned(3, "Previous indirect targets to use for path history") + indirectGHRBits = Param.Unsigned(13, "Indirect GHR number of bits") diff --git a/src/cpu/pred/bpred_unit.cc b/src/cpu/pred/bpred_unit.cc index 2bfd90140..33604799d 100644 --- a/src/cpu/pred/bpred_unit.cc +++ b/src/cpu/pred/bpred_unit.cc @@ -70,7 +70,8 @@ BPredUnit::BPredUnit(const Params *params) params->indirectTagSize, params->indirectPathLength, params->instShiftAmt, - params->numThreads), + params->numThreads, + params->indirectGHRBits), instShiftAmt(params->instShiftAmt) { for (auto& r : RAS) @@ -207,8 +208,9 @@ BPredUnit::predict(const StaticInstPtr &inst, const InstSeqNum &seqNum, " predicted %i for PC %s\n", tid, seqNum, pred_taken, pc); } + const bool orig_pred_taken = pred_taken; if (useIndirect) { - iPred.updateDirectionInfo(tid, pred_taken, indirect_history); + iPred.genIndirectInfo(tid, indirect_history); } DPRINTF(Branch, "[tid:%i]: [sn:%i] Creating prediction history " @@ -317,6 +319,15 @@ BPredUnit::predict(const StaticInstPtr &inst, const InstSeqNum &seqNum, pc = target; + if (useIndirect) { + // Update the indirect predictor with the direction prediction + // Note that this happens after indirect lookup, so it does not use + // the new information + // Note also that we use orig_pred_taken instead of pred_taken in + // as this is the actual outcome of the direction prediction + iPred.updateDirectionInfo(tid, orig_pred_taken); + } + predHist[tid].push_front(predict_record); DPRINTF(Branch, "[tid:%i]: [sn:%i]: History entry added." @@ -340,7 +351,9 @@ BPredUnit::update(const InstSeqNum &done_sn, ThreadID tid) predHist[tid].back().inst, predHist[tid].back().target); - iPred.commit(done_sn, tid, predHist[tid].back().indirectHistory); + if (useIndirect) { + iPred.commit(done_sn, tid, predHist[tid].back().indirectHistory); + } predHist[tid].pop_back(); } @@ -351,7 +364,10 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, ThreadID tid) { History &pred_hist = predHist[tid]; - iPred.squash(squashed_sn, tid); + if (useIndirect) { + iPred.squash(squashed_sn, tid); + } + while (!pred_hist.empty() && pred_hist.front().seqNum > squashed_sn) { if (pred_hist.front().usedRAS) { @@ -372,7 +388,7 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, ThreadID tid) // This call should delete the bpHistory. squash(tid, pred_hist.front().bpHistory); if (useIndirect) { - iPred.deleteDirectionInfo(tid, pred_hist.front().indirectHistory); + iPred.deleteIndirectInfo(tid, pred_hist.front().indirectHistory); } DPRINTF(Branch, "[tid:%i]: Removing history for [sn:%i] " @@ -469,7 +485,9 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, } if (hist_it->wasIndirect) { ++indirectMispredicted; - iPred.recordTarget(hist_it->seqNum, corrTarget, tid); + iPred.recordTarget( + hist_it->seqNum, pred_hist.front().indirectHistory, + corrTarget, tid); } else { DPRINTF(Branch,"[tid: %i] BTB Update called for [sn:%i]" " PC: %s\n", tid,hist_it->seqNum, hist_it->pc); diff --git a/src/cpu/pred/indirect.cc b/src/cpu/pred/indirect.cc index 6690abbd6..2f88566b4 100644 --- a/src/cpu/pred/indirect.cc +++ b/src/cpu/pred/indirect.cc @@ -36,10 +36,11 @@ IndirectPredictor::IndirectPredictor(bool hash_ghr, bool hash_targets, unsigned num_sets, unsigned num_ways, unsigned tag_bits, unsigned path_len, unsigned inst_shift, - unsigned num_threads) + unsigned num_threads, unsigned ghr_size) : hashGHR(hash_ghr), hashTargets(hash_targets), numSets(num_sets), numWays(num_ways), tagBits(tag_bits), - pathLength(path_len), instShift(inst_shift) + pathLength(path_len), instShift(inst_shift), + ghrNumBits(ghr_size), ghrMask((1 << ghr_size)-1) { if (!isPowerOf2(numSets)) { panic("Indirect predictor requires power of 2 number of sets"); @@ -51,18 +52,26 @@ IndirectPredictor::IndirectPredictor(bool hash_ghr, bool hash_targets, for (unsigned i = 0; i < numSets; i++) { targetCache[i].resize(numWays); } + + fatal_if(ghrNumBits > (sizeof(ThreadInfo::ghr)*8), "ghr_size is too big"); } void -IndirectPredictor::updateDirectionInfo(ThreadID tid, bool taken, - void* & indirect_history) +IndirectPredictor::genIndirectInfo(ThreadID tid, void* & indirect_history) { // record the GHR as it was before this prediction // It will be used to recover the history in case this prediction is // wrong or belongs to bad path indirect_history = new unsigned(threadInfo[tid].ghr); +} - threadInfo[tid].ghr <<= taken; +void +IndirectPredictor::updateDirectionInfo( + ThreadID tid, bool actually_taken) +{ + threadInfo[tid].ghr <<= 1; + threadInfo[tid].ghr |= actually_taken; + threadInfo[tid].ghr &= ghrMask; } void @@ -71,6 +80,7 @@ IndirectPredictor::changeDirectionPrediction(ThreadID tid, { unsigned * previousGhr = static_cast(indirect_history); threadInfo[tid].ghr = ((*previousGhr) << 1) + actually_taken; + threadInfo[tid].ghr &= ghrMask; } bool @@ -147,7 +157,7 @@ IndirectPredictor::squash(InstSeqNum seq_num, ThreadID tid) } void -IndirectPredictor::deleteDirectionInfo(ThreadID tid, void * indirect_history) +IndirectPredictor::deleteIndirectInfo(ThreadID tid, void * indirect_history) { unsigned * previousGhr = static_cast(indirect_history); threadInfo[tid].ghr = *previousGhr; @@ -156,16 +166,19 @@ IndirectPredictor::deleteDirectionInfo(ThreadID tid, void * indirect_history) } void -IndirectPredictor::recordTarget(InstSeqNum seq_num, - const TheISA::PCState& target, ThreadID tid) +IndirectPredictor::recordTarget( + InstSeqNum seq_num, void * indirect_history, const TheISA::PCState& target, + ThreadID tid) { ThreadInfo &t_info = threadInfo[tid]; + unsigned * ghr = static_cast(indirect_history); + // Should have just squashed so this branch should be the oldest auto hist_entry = *(t_info.pathHist.rbegin()); // Temporarily pop it off the history so we can calculate the set t_info.pathHist.pop_back(); - Addr set_index = getSetIndex(hist_entry.pcAddr, t_info.ghr, tid); + Addr set_index = getSetIndex(hist_entry.pcAddr, *ghr, tid); Addr tag = getTag(hist_entry.pcAddr); hist_entry.targetAddr = target.instAddr(); t_info.pathHist.push_back(hist_entry); diff --git a/src/cpu/pred/indirect.hh b/src/cpu/pred/indirect.hh index 226a77848..b3c3c4cf6 100644 --- a/src/cpu/pred/indirect.hh +++ b/src/cpu/pred/indirect.hh @@ -43,19 +43,20 @@ class IndirectPredictor IndirectPredictor(bool hash_ghr, bool hash_targets, unsigned num_sets, unsigned num_ways, unsigned tag_bits, unsigned path_len, - unsigned inst_shift, unsigned num_threads); + unsigned inst_shift, unsigned num_threads, + unsigned ghr_size); bool lookup(Addr br_addr, TheISA::PCState& br_target, ThreadID tid); void recordIndirect(Addr br_addr, Addr tgt_addr, InstSeqNum seq_num, ThreadID tid); void commit(InstSeqNum seq_num, ThreadID tid, void * indirect_history); void squash(InstSeqNum seq_num, ThreadID tid); - void recordTarget(InstSeqNum seq_num, const TheISA::PCState& target, - ThreadID tid); - void updateDirectionInfo(ThreadID tid, bool taken, - void* & indirect_history); + void recordTarget(InstSeqNum seq_num, void * indirect_history, + const TheISA::PCState& target, ThreadID tid); + void genIndirectInfo(ThreadID tid, void* & indirect_history); + void updateDirectionInfo(ThreadID tid, bool actually_taken); + void deleteIndirectInfo(ThreadID tid, void * indirect_history); void changeDirectionPrediction(ThreadID tid, void * indirect_history, bool actually_taken); - void deleteDirectionInfo(ThreadID tid, void * indirect_history); private: const bool hashGHR; @@ -65,6 +66,8 @@ class IndirectPredictor const unsigned tagBits; const unsigned pathLength; const unsigned instShift; + const unsigned ghrNumBits; + const unsigned ghrMask; struct IPredEntry { -- 2.30.2