From: Arthur Perais Date: Wed, 21 Dec 2016 21:07:16 +0000 (-0600) Subject: cpu: disallow speculative update of branch predictor tables (o3) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=497cc2d373d1559aaae0263635b88f670fd239cd;p=gem5.git cpu: disallow speculative update of branch predictor tables (o3) The Minor and o3 cpu models share the branch prediction code. Minor relies on the BPredUnit::squash() function to update the branch predictor tables on a branch mispre- diction. This is fine because Minor executes in-order, so the update is on the correct path. However, this causes the branch predictor to be updated on out-of-order branch mispredictions when using the o3 model, which should not be the case. This patch guards against speculative update of the branch prediction tables. On a branch misprediction, BPredUnit::squash() calls BpredUnit::update(..., squashed = true). The underlying branch predictor tests against the value of squashed. If it is true, it restores any speculatively updated internal state it might have (e.g., global/local branch history), then returns. If false, it updates its prediction tables. Previously, exist- ing predictors did not test against the "squashed" parameter. To accomodate for this change, the Minor model must now call BPredUnit::squash() then BPredUnit::update(..., squashed = false) on branch mispredictions. Before, calling BpredUnit::squash() performed the prediction tables update. The effect is a slight MPKI improvement when using the o3 model. A further patch should perform the same modifications for the indirect target predictor and BTB (less critical). Signed-off-by: Jason Lowe-Power --- diff --git a/src/cpu/minor/fetch2.cc b/src/cpu/minor/fetch2.cc index 394fe8549..15d362da7 100644 --- a/src/cpu/minor/fetch2.cc +++ b/src/cpu/minor/fetch2.cc @@ -152,6 +152,10 @@ Fetch2::updateBranchPrediction(const BranchData &branch) DPRINTF(Branch, "Unpredicted branch seen inst: %s\n", *inst); branchPredictor.squash(inst->id.fetchSeqNum, branch.target, true, inst->id.threadId); + // Update after squashing to accomodate O3CPU + // using the branch prediction code. + branchPredictor.update(inst->id.fetchSeqNum, + inst->id.threadId); break; case BranchData::CorrectlyPredictedBranch: /* Predicted taken, was taken */ @@ -164,6 +168,10 @@ Fetch2::updateBranchPrediction(const BranchData &branch) DPRINTF(Branch, "Branch mis-predicted inst: %s\n", *inst); branchPredictor.squash(inst->id.fetchSeqNum, branch.target /* Not used */, false, inst->id.threadId); + // Update after squashing to accomodate O3CPU + // using the branch prediction code. + branchPredictor.update(inst->id.fetchSeqNum, + inst->id.threadId); break; case BranchData::BadlyPredictedBranchTarget: /* Predicted taken, was taken but to a different target */ diff --git a/src/cpu/pred/2bit_local.cc b/src/cpu/pred/2bit_local.cc index 9e1c781c5..6f821e94c 100644 --- a/src/cpu/pred/2bit_local.cc +++ b/src/cpu/pred/2bit_local.cc @@ -123,6 +123,12 @@ LocalBP::update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history, assert(bp_history == NULL); unsigned local_predictor_idx; + // No state to restore, and we do not update on the wrong + // path. + if (squashed) { + return; + } + // Update the local predictor. local_predictor_idx = getLocalIndex(branch_addr); diff --git a/src/cpu/pred/2bit_local.hh b/src/cpu/pred/2bit_local.hh index e3f87491b..30327b7bc 100644 --- a/src/cpu/pred/2bit_local.hh +++ b/src/cpu/pred/2bit_local.hh @@ -94,9 +94,6 @@ class LocalBP : public BPredUnit void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history, bool squashed); - void retireSquashed(ThreadID tid, void *bp_history) - { assert(bp_history == NULL); } - void squash(ThreadID tid, void *bp_history) { assert(bp_history == NULL); } diff --git a/src/cpu/pred/bi_mode.cc b/src/cpu/pred/bi_mode.cc index 48e798d96..355d0d8f7 100644 --- a/src/cpu/pred/bi_mode.cc +++ b/src/cpu/pred/bi_mode.cc @@ -162,78 +162,69 @@ void BiModeBP::update(ThreadID tid, Addr branchAddr, bool taken, void *bpHistory, bool squashed) { - if (bpHistory) { - BPHistory *history = static_cast(bpHistory); + assert(bpHistory); - unsigned choiceHistoryIdx = ((branchAddr >> instShiftAmt) - & choiceHistoryMask); - unsigned globalHistoryIdx = (((branchAddr >> instShiftAmt) - ^ history->globalHistoryReg) - & globalHistoryMask); + BPHistory *history = static_cast(bpHistory); - assert(choiceHistoryIdx < choicePredictorSize); - assert(globalHistoryIdx < globalPredictorSize); + // We do not update the counters speculatively on a squash. + // We just restore the global history register. + if (squashed) { + globalHistoryReg[tid] = (history->globalHistoryReg << 1) | taken; + return; + } - if (history->takenUsed) { - // if the taken array's prediction was used, update it - if (taken) { - takenCounters[globalHistoryIdx].increment(); - } else { - takenCounters[globalHistoryIdx].decrement(); - } + unsigned choiceHistoryIdx = ((branchAddr >> instShiftAmt) + & choiceHistoryMask); + unsigned globalHistoryIdx = (((branchAddr >> instShiftAmt) + ^ history->globalHistoryReg) + & globalHistoryMask); + + assert(choiceHistoryIdx < choicePredictorSize); + assert(globalHistoryIdx < globalPredictorSize); + + if (history->takenUsed) { + // if the taken array's prediction was used, update it + if (taken) { + takenCounters[globalHistoryIdx].increment(); } else { - // if the not-taken array's prediction was used, update it - if (taken) { - notTakenCounters[globalHistoryIdx].increment(); - } else { - notTakenCounters[globalHistoryIdx].decrement(); - } + takenCounters[globalHistoryIdx].decrement(); } - - if (history->finalPred == taken) { - /* If the final prediction matches the actual branch's - * outcome and the choice predictor matches the final - * outcome, we update the choice predictor, otherwise it - * is not updated. While the designers of the bi-mode - * predictor don't explicity say why this is done, one - * can infer that it is to preserve the choice predictor's - * bias with respect to the branch being predicted; afterall, - * the whole point of the bi-mode predictor is to identify the - * atypical case when a branch deviates from its bias. - */ - if (history->finalPred == history->takenUsed) { - if (taken) { - choiceCounters[choiceHistoryIdx].increment(); - } else { - choiceCounters[choiceHistoryIdx].decrement(); - } - } + } else { + // if the not-taken array's prediction was used, update it + if (taken) { + notTakenCounters[globalHistoryIdx].increment(); } else { - // always update the choice predictor on an incorrect prediction + notTakenCounters[globalHistoryIdx].decrement(); + } + } + + if (history->finalPred == taken) { + /* If the final prediction matches the actual branch's + * outcome and the choice predictor matches the final + * outcome, we update the choice predictor, otherwise it + * is not updated. While the designers of the bi-mode + * predictor don't explicity say why this is done, one + * can infer that it is to preserve the choice predictor's + * bias with respect to the branch being predicted; afterall, + * the whole point of the bi-mode predictor is to identify the + * atypical case when a branch deviates from its bias. + */ + if (history->finalPred == history->takenUsed) { if (taken) { choiceCounters[choiceHistoryIdx].increment(); } else { choiceCounters[choiceHistoryIdx].decrement(); } } - - if (squashed) { - if (taken) { - globalHistoryReg[tid] = (history->globalHistoryReg << 1) | 1; - } else { - globalHistoryReg[tid] = (history->globalHistoryReg << 1); - } - globalHistoryReg[tid] &= historyRegisterMask; + } else { + // always update the choice predictor on an incorrect prediction + if (taken) { + choiceCounters[choiceHistoryIdx].increment(); } else { - delete history; + choiceCounters[choiceHistoryIdx].decrement(); } } -} -void -BiModeBP::retireSquashed(ThreadID tid, void *bp_history) -{ - BPHistory *history = static_cast(bp_history); delete history; } diff --git a/src/cpu/pred/bi_mode.hh b/src/cpu/pred/bi_mode.hh index 96b3b2ef7..7b091d111 100644 --- a/src/cpu/pred/bi_mode.hh +++ b/src/cpu/pred/bi_mode.hh @@ -63,7 +63,6 @@ class BiModeBP : public BPredUnit void btbUpdate(ThreadID tid, Addr branch_addr, void * &bp_history); void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history, bool squashed); - void retireSquashed(ThreadID tid, void *bp_history); unsigned getGHR(ThreadID tid, void *bp_history) const; private: diff --git a/src/cpu/pred/bpred_unit.cc b/src/cpu/pred/bpred_unit.cc index 660475017..05770cba9 100644 --- a/src/cpu/pred/bpred_unit.cc +++ b/src/cpu/pred/bpred_unit.cc @@ -330,13 +330,9 @@ BPredUnit::update(const InstSeqNum &done_sn, ThreadID tid) while (!predHist[tid].empty() && predHist[tid].back().seqNum <= done_sn) { // Update the branch predictor with the correct results. - if (!predHist[tid].back().wasSquashed) { - update(tid, predHist[tid].back().pc, - predHist[tid].back().predTaken, - predHist[tid].back().bpHistory, false); - } else { - retireSquashed(tid, predHist[tid].back().bpHistory); - } + update(tid, predHist[tid].back().pc, + predHist[tid].back().predTaken, + predHist[tid].back().bpHistory, false); predHist[tid].pop_back(); } @@ -430,12 +426,23 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, tid, hist_it->seqNum); } - // Have to get GHR here because the update deletes bpHistory + // Get the underlying Global History Register unsigned ghr = getGHR(tid, hist_it->bpHistory); + // There are separate functions for in-order and out-of-order + // branch prediction, but not for update. Therefore, this + // call should take into account that the mispredicted branch may + // be on the wrong path (i.e., OoO execution), and that the counter + // counter table(s) should not be updated. Thus, this call should + // restore the state of the underlying predictor, for instance the + // local/global histories. The counter tables will be updated when + // the branch actually commits. + + // Remember the correct direction for the update at commit. + pred_hist.front().predTaken = actually_taken; + update(tid, (*hist_it).pc, actually_taken, pred_hist.front().bpHistory, true); - hist_it->wasSquashed = true; if (actually_taken) { if (hist_it->wasReturn && !hist_it->usedRAS) { diff --git a/src/cpu/pred/bpred_unit.hh b/src/cpu/pred/bpred_unit.hh index 3f9cbc057..b890dc332 100644 --- a/src/cpu/pred/bpred_unit.hh +++ b/src/cpu/pred/bpred_unit.hh @@ -179,14 +179,6 @@ class BPredUnit : public SimObject */ virtual void update(ThreadID tid, Addr instPC, bool taken, void *bp_history, bool squashed) = 0; - /** - * Deletes the associated history with a branch, performs no predictor - * updates. Used for branches that mispredict and update tables but - * are still speculative and later retire. - * @param bp_history History to delete associated with this predictor - */ - virtual void retireSquashed(ThreadID tid, void *bp_history) = 0; - /** * Updates the BTB with the target of a branch. * @param inst_PC The branch's PC that will be updated. @@ -211,7 +203,7 @@ class BPredUnit : public SimObject ThreadID _tid) : seqNum(seq_num), pc(instPC), bpHistory(bp_history), RASTarget(0), RASIndex(0), tid(_tid), predTaken(pred_taken), usedRAS(0), pushedRAS(0), - wasCall(0), wasReturn(0), wasSquashed(0), wasIndirect(0) + wasCall(0), wasReturn(0), wasIndirect(0) {} bool operator==(const PredictorHistory &entry) const { @@ -254,9 +246,6 @@ class BPredUnit : public SimObject /** Whether or not the instruction was a return. */ bool wasReturn; - /** Whether this instruction has already mispredicted/updated bp */ - bool wasSquashed; - /** Wether this instruction was an indirect branch */ bool wasIndirect; }; diff --git a/src/cpu/pred/tournament.cc b/src/cpu/pred/tournament.cc index 96f03eb30..c0c1c12cf 100644 --- a/src/cpu/pred/tournament.cc +++ b/src/cpu/pred/tournament.cc @@ -267,100 +267,77 @@ void TournamentBP::update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history, bool squashed) { - unsigned local_history_idx; - unsigned local_predictor_idx M5_VAR_USED; - unsigned local_predictor_hist; + assert(bp_history); - // Get the local predictor's current prediction - local_history_idx = calcLocHistIdx(branch_addr); - local_predictor_hist = localHistoryTable[local_history_idx]; - local_predictor_idx = local_predictor_hist & localPredictorMask; - - if (bp_history) { - BPHistory *history = static_cast(bp_history); - // Update may also be called if the Branch target is incorrect even if - // the prediction is correct. In that case do not update the counters. - bool historyPred = false; - unsigned old_local_pred_index = history->localHistory & - localPredictorMask; - - bool old_local_pred_valid = history->localHistory != - invalidPredictorIndex; + BPHistory *history = static_cast(bp_history); - assert(old_local_pred_index < localPredictorSize); + unsigned local_history_idx = calcLocHistIdx(branch_addr); - if (history->globalUsed) { - historyPred = history->globalPredTaken; - } else { - historyPred = history->localPredTaken; - } - if (historyPred != taken || !squashed) { - // Update the choice predictor to tell it which one was correct if - // there was a prediction. - if (history->localPredTaken != history->globalPredTaken) { - // If the local prediction matches the actual outcome, - // decerement the counter. Otherwise increment the - // counter. - unsigned choice_predictor_idx = - history->globalHistory & choiceHistoryMask; - if (history->localPredTaken == taken) { - choiceCtrs[choice_predictor_idx].decrement(); - } else if (history->globalPredTaken == taken) { - choiceCtrs[choice_predictor_idx].increment(); - } - - } - - // Update the counters and local history with the proper - // resolution of the branch. Global history is updated - // speculatively and restored upon squash() calls, so it does not - // need to be updated. - unsigned global_predictor_idx = - history->globalHistory & globalHistoryMask; - if (taken) { - globalCtrs[global_predictor_idx].increment(); - if (old_local_pred_valid) { - localCtrs[old_local_pred_index].increment(); - } - } else { - globalCtrs[global_predictor_idx].decrement(); - if (old_local_pred_valid) { - localCtrs[old_local_pred_index].decrement(); - } - } - } - if (squashed) { - if (taken) { - globalHistory[tid] = (history->globalHistory << 1) | 1; - globalHistory[tid] = globalHistory[tid] & historyRegisterMask; - if (old_local_pred_valid) { - localHistoryTable[local_history_idx] = - (history->localHistory << 1) | 1; - } - } else { - globalHistory[tid] = (history->globalHistory << 1); - globalHistory[tid] = globalHistory[tid] & historyRegisterMask; - if (old_local_pred_valid) { - localHistoryTable[local_history_idx] = - history->localHistory << 1; - } - } + assert(local_history_idx < localHistoryTableSize); - } else { - // We're done with this history, now delete it. - delete history; + // Unconditional branches do not use local history. + bool old_local_pred_valid = history->localHistory != + invalidPredictorIndex; + + // If this is a misprediction, restore the speculatively + // updated state (global history register and local history) + // and update again. + if (squashed) { + // Global history restore and update + globalHistory[tid] = (history->globalHistory << 1) | taken; + globalHistory[tid] &= historyRegisterMask; + + // Local history restore and update. + if (old_local_pred_valid) { + localHistoryTable[local_history_idx] = + (history->localHistory << 1) | taken; } - } - assert(local_history_idx < localHistoryTableSize); + return; + } + unsigned old_local_pred_index = history->localHistory & + localPredictorMask; + + assert(old_local_pred_index < localPredictorSize); + + // Update the choice predictor to tell it which one was correct if + // there was a prediction. + if (history->localPredTaken != history->globalPredTaken && + old_local_pred_valid) + { + // If the local prediction matches the actual outcome, + // decrement the counter. Otherwise increment the + // counter. + unsigned choice_predictor_idx = + history->globalHistory & choiceHistoryMask; + if (history->localPredTaken == taken) { + choiceCtrs[choice_predictor_idx].decrement(); + } else if (history->globalPredTaken == taken) { + choiceCtrs[choice_predictor_idx].increment(); + } + } -} + // Update the counters with the proper + // resolution of the branch. Histories are updated + // speculatively, restored upon squash() calls, and + // recomputed upon update(squash = true) calls, + // so they do not need to be updated. + unsigned global_predictor_idx = + history->globalHistory & globalHistoryMask; + if (taken) { + globalCtrs[global_predictor_idx].increment(); + if (old_local_pred_valid) { + localCtrs[old_local_pred_index].increment(); + } + } else { + globalCtrs[global_predictor_idx].decrement(); + if (old_local_pred_valid) { + localCtrs[old_local_pred_index].decrement(); + } + } -void -TournamentBP::retireSquashed(ThreadID tid, void *bp_history) -{ - BPHistory *history = static_cast(bp_history); + // We're done with this history, now delete it. delete history; } diff --git a/src/cpu/pred/tournament.hh b/src/cpu/pred/tournament.hh index 0febd21bc..c4c092687 100644 --- a/src/cpu/pred/tournament.hh +++ b/src/cpu/pred/tournament.hh @@ -105,8 +105,6 @@ class TournamentBP : public BPredUnit void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history, bool squashed); - void retireSquashed(ThreadID tid, void *bp_history); - /** * Restores the global branch history on a squash. * @param bp_history Pointer to the BPHistory object that has the