From 4ba89236f00e1d18e7785da57941f25abfed6033 Mon Sep 17 00:00:00 2001 From: Jairo Balart Date: Sat, 5 Jan 2019 10:24:17 +0100 Subject: [PATCH] cpu: Made TAGE a SimObject that can be used by other predictors The TAGE implementation is now a SimObject so that other branch predictors can easily use it. It has also been updated with the latest available TAGE implementation from Andre Seznec: http://www.irisa.fr/alf/downloads/seznec/TAGE-GSC-IMLI.tar Change-Id: I2251b8b2d7f94124f9955f52b917dc3b064f090e Reviewed-on: https://gem5-review.googlesource.com/c/15317 Reviewed-by: Giacomo Travaglini Maintainer: Andreas Sandberg --- src/cpu/pred/2bit_local.cc | 2 +- src/cpu/pred/2bit_local.hh | 2 +- src/cpu/pred/BranchPredictor.py | 46 +- src/cpu/pred/SConscript | 1 + src/cpu/pred/bi_mode.cc | 2 +- src/cpu/pred/bi_mode.hh | 2 +- src/cpu/pred/bpred_unit.cc | 11 +- src/cpu/pred/bpred_unit.hh | 20 +- src/cpu/pred/ltage.cc | 125 ++--- src/cpu/pred/ltage.hh | 42 +- src/cpu/pred/tage.cc | 641 ++----------------------- src/cpu/pred/tage.hh | 357 +------------- src/cpu/pred/tage_base.cc | 803 ++++++++++++++++++++++++++++++++ src/cpu/pred/tage_base.hh | 500 ++++++++++++++++++++ src/cpu/pred/tournament.cc | 3 +- src/cpu/pred/tournament.hh | 5 +- 16 files changed, 1502 insertions(+), 1060 deletions(-) create mode 100644 src/cpu/pred/tage_base.cc create mode 100644 src/cpu/pred/tage_base.hh diff --git a/src/cpu/pred/2bit_local.cc b/src/cpu/pred/2bit_local.cc index cd97f2bdf..47ca6514d 100644 --- a/src/cpu/pred/2bit_local.cc +++ b/src/cpu/pred/2bit_local.cc @@ -119,7 +119,7 @@ LocalBP::lookup(ThreadID tid, Addr branch_addr, void * &bp_history) void LocalBP::update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history, - bool squashed) + bool squashed, const StaticInstPtr & inst, Addr corrTarget) { assert(bp_history == NULL); unsigned local_predictor_idx; diff --git a/src/cpu/pred/2bit_local.hh b/src/cpu/pred/2bit_local.hh index 30327b7bc..764a66c74 100644 --- a/src/cpu/pred/2bit_local.hh +++ b/src/cpu/pred/2bit_local.hh @@ -92,7 +92,7 @@ class LocalBP : public BPredUnit * @param taken Whether or not the branch was taken. */ void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history, - bool squashed); + bool squashed, const StaticInstPtr & inst, Addr corrTarget); void squash(ThreadID tid, void *bp_history) { assert(bp_history == NULL); } diff --git a/src/cpu/pred/BranchPredictor.py b/src/cpu/pred/BranchPredictor.py index 4d7323e49..06856865a 100644 --- a/src/cpu/pred/BranchPredictor.py +++ b/src/cpu/pred/BranchPredictor.py @@ -87,12 +87,14 @@ class BiModeBP(BranchPredictor): choicePredictorSize = Param.Unsigned(8192, "Size of choice predictor") choiceCtrBits = Param.Unsigned(2, "Bits of choice counters") -# TAGE branch predictor as described in https://www.jilp.org/vol8/v8paper1.pdf -# The default sizes below are for the 8C-TAGE configuration (63.5 Kbits) -class TAGE(BranchPredictor): - type = 'TAGE' - cxx_class = 'TAGE' - cxx_header = "cpu/pred/tage.hh" +class TAGEBase(SimObject): + type = 'TAGEBase' + cxx_class = 'TAGEBase' + cxx_header = "cpu/pred/tage_base.hh" + + numThreads = Param.Unsigned(Parent.numThreads, "Number of threads") + instShiftAmt = Param.Unsigned(Parent.instShiftAmt, + "Number of bits to shift instructions by") nHistoryTables = Param.Unsigned(7, "Number of history tables") minHist = Param.Unsigned(5, "Minimum history size of TAGE") @@ -115,8 +117,33 @@ class TAGE(BranchPredictor): pathHistBits = Param.Unsigned(16, "Path history size") logUResetPeriod = Param.Unsigned(18, "Log period in number of branches to reset TAGE useful counters") + numUseAltOnNa = Param.Unsigned(1, "Number of USE_ALT_ON_NA counters") useAltOnNaBits = Param.Unsigned(4, "Size of the USE_ALT_ON_NA counter") + maxNumAlloc = Param.Unsigned(1, + "Max number of TAGE entries allocted on mispredict") + + # List of enabled TAGE tables. If empty, all are enabled + noSkip = VectorParam.Bool([], "Vector of enabled TAGE tables") + + speculativeHistUpdate = Param.Bool(True, + "Use speculative update for histories") + +# TAGE branch predictor as described in https://www.jilp.org/vol8/v8paper1.pdf +# The default sizes below are for the 8C-TAGE configuration (63.5 Kbits) +class TAGE(BranchPredictor): + type = 'TAGE' + cxx_class = 'TAGE' + cxx_header = "cpu/pred/tage.hh" + tage = Param.TAGEBase(TAGEBase(), "Tage object") + +class LTAGE_TAGE(TAGEBase): + nHistoryTables = 12 + minHist = 4 + maxHist = 640 + tagTableTagWidths = [0, 7, 7, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15] + logTagTableSizes = [14, 10, 10, 11, 11, 11, 11, 10, 10, 10, 10, 9, 9] + logUResetPeriod = 19 # LTAGE branch predictor as described in # https://www.irisa.fr/caps/people/seznec/L-TAGE.pdf @@ -127,12 +154,7 @@ class LTAGE(TAGE): cxx_class = 'LTAGE' cxx_header = "cpu/pred/ltage.hh" - nHistoryTables = 12 - minHist = 4 - maxHist = 640 - tagTableTagWidths = [0, 7, 7, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15] - logTagTableSizes = [14, 10, 10, 11, 11, 11, 11, 10, 10, 10, 10, 9, 9] - logUResetPeriod = 19 + tage = LTAGE_TAGE() logSizeLoopPred = Param.Unsigned(8, "Log size of the loop predictor") withLoopBits = Param.Unsigned(7, "Size of the WITHLOOP counter") diff --git a/src/cpu/pred/SConscript b/src/cpu/pred/SConscript index b0e63096a..96451f253 100644 --- a/src/cpu/pred/SConscript +++ b/src/cpu/pred/SConscript @@ -43,6 +43,7 @@ Source('indirect.cc') Source('ras.cc') Source('tournament.cc') Source ('bi_mode.cc') +Source('tage_base.cc') Source('tage.cc') Source('ltage.cc') DebugFlag('FreeList') diff --git a/src/cpu/pred/bi_mode.cc b/src/cpu/pred/bi_mode.cc index e8fda7387..18286a702 100644 --- a/src/cpu/pred/bi_mode.cc +++ b/src/cpu/pred/bi_mode.cc @@ -161,7 +161,7 @@ BiModeBP::btbUpdate(ThreadID tid, Addr branchAddr, void * &bpHistory) */ void BiModeBP::update(ThreadID tid, Addr branchAddr, bool taken, void *bpHistory, - bool squashed) + bool squashed, const StaticInstPtr & inst, Addr corrTarget) { assert(bpHistory); diff --git a/src/cpu/pred/bi_mode.hh b/src/cpu/pred/bi_mode.hh index 7b091d111..34766b599 100644 --- a/src/cpu/pred/bi_mode.hh +++ b/src/cpu/pred/bi_mode.hh @@ -62,7 +62,7 @@ class BiModeBP : public BPredUnit bool lookup(ThreadID tid, Addr branch_addr, void * &bp_history); void btbUpdate(ThreadID tid, Addr branch_addr, void * &bp_history); void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history, - bool squashed); + bool squashed, const StaticInstPtr & inst, Addr corrTarget); unsigned getGHR(ThreadID tid, void *bp_history) const; private: diff --git a/src/cpu/pred/bpred_unit.cc b/src/cpu/pred/bpred_unit.cc index 05770cba9..455773cce 100644 --- a/src/cpu/pred/bpred_unit.cc +++ b/src/cpu/pred/bpred_unit.cc @@ -210,7 +210,7 @@ BPredUnit::predict(const StaticInstPtr &inst, const InstSeqNum &seqNum, "for PC %s\n", tid, seqNum, pc); PredictorHistory predict_record(seqNum, pc.instAddr(), - pred_taken, bp_history, tid); + pred_taken, bp_history, tid, inst); // Now lookup in the BTB or RAS. if (pred_taken) { @@ -309,6 +309,7 @@ BPredUnit::predict(const StaticInstPtr &inst, const InstSeqNum &seqNum, } TheISA::advancePC(target, inst); } + predict_record.target = target.instAddr(); pc = target; @@ -332,7 +333,9 @@ BPredUnit::update(const InstSeqNum &done_sn, ThreadID tid) // Update the branch predictor with the correct results. update(tid, predHist[tid].back().pc, predHist[tid].back().predTaken, - predHist[tid].back().bpHistory, false); + predHist[tid].back().bpHistory, false, + predHist[tid].back().inst, + predHist[tid].back().target); predHist[tid].pop_back(); } @@ -440,9 +443,11 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, // Remember the correct direction for the update at commit. pred_hist.front().predTaken = actually_taken; + pred_hist.front().target = corrTarget.instAddr(); update(tid, (*hist_it).pc, actually_taken, - pred_hist.front().bpHistory, true); + pred_hist.front().bpHistory, true, pred_hist.front().inst, + corrTarget.instAddr()); if (actually_taken) { if (hist_it->wasReturn && !hist_it->usedRAS) { diff --git a/src/cpu/pred/bpred_unit.hh b/src/cpu/pred/bpred_unit.hh index b890dc332..9a75bbde0 100644 --- a/src/cpu/pred/bpred_unit.hh +++ b/src/cpu/pred/bpred_unit.hh @@ -175,10 +175,15 @@ class BPredUnit : public SimObject * associated with the branch lookup that is being updated. * @param squashed Set to true when this function is called during a * squash operation. + * @param inst Static instruction information + * @param corrTarget The resolved target of the branch (only needed + * for squashed branches) * @todo Make this update flexible enough to handle a global predictor. */ virtual void update(ThreadID tid, Addr instPC, bool taken, - void *bp_history, bool squashed) = 0; + void *bp_history, bool squashed, + const StaticInstPtr & inst = StaticInst::nullStaticInstPtr, + Addr corrTarget = MaxAddr) = 0; /** * Updates the BTB with the target of a branch. * @param inst_PC The branch's PC that will be updated. @@ -200,10 +205,11 @@ class BPredUnit : public SimObject */ PredictorHistory(const InstSeqNum &seq_num, Addr instPC, bool pred_taken, void *bp_history, - ThreadID _tid) + ThreadID _tid, const StaticInstPtr & inst) : seqNum(seq_num), pc(instPC), bpHistory(bp_history), RASTarget(0), RASIndex(0), tid(_tid), predTaken(pred_taken), usedRAS(0), pushedRAS(0), - wasCall(0), wasReturn(0), wasIndirect(0) + wasCall(0), wasReturn(0), wasIndirect(0), + target(MaxAddr), inst(inst) {} bool operator==(const PredictorHistory &entry) const { @@ -248,6 +254,14 @@ class BPredUnit : public SimObject /** Wether this instruction was an indirect branch */ bool wasIndirect; + + /** Target of the branch. First it is predicted, and fixed later + * if necessary + */ + Addr target; + + /** The branch instrction */ + const StaticInstPtr inst; }; typedef std::deque History; diff --git a/src/cpu/pred/ltage.cc b/src/cpu/pred/ltage.cc index 56e1555cf..22945ecda 100644 --- a/src/cpu/pred/ltage.cc +++ b/src/cpu/pred/ltage.cc @@ -168,9 +168,10 @@ LTAGE::loopUpdate(Addr pc, bool taken, LTageBranchInfo* bi) ltable[idx].confidence = 0; ltable[idx].currentIter = 0; return; - } else if (bi->loopPred != bi->tagePred) { + } else if (bi->loopPred != bi->tageBranchInfo->tagePred) { DPRINTF(LTage, "Loop Prediction success:%lx\n",pc); - unsignedCtrUpdate(ltable[idx].age, true, loopTableAgeBits); + TAGEBase::unsignedCtrUpdate(ltable[idx].age, true, + loopTableAgeBits); } } @@ -190,7 +191,7 @@ LTAGE::loopUpdate(Addr pc, bool taken, LTageBranchInfo* bi) if (ltable[idx].currentIter == ltable[idx].numIter) { DPRINTF(LTage, "Loop End predicted successfully:%lx\n", pc); - unsignedCtrUpdate(ltable[idx].confidence, true, + TAGEBase::unsignedCtrUpdate(ltable[idx].confidence, true, loopTableConfidenceBits); //just do not predict when the loop count is 1 or 2 if (ltable[idx].numIter < 3) { @@ -218,10 +219,11 @@ LTAGE::loopUpdate(Addr pc, bool taken, LTageBranchInfo* bi) } } else if (useDirectionBit ? - ((bi->loopPredValid ? bi->loopPred : bi->tagePred) != taken) : + ((bi->loopPredValid ? + bi->loopPred : bi->tageBranchInfo->tagePred) != taken) : taken) { //try to allocate an entry on taken branch - int nrand = random_mt.random(); + int nrand = TAGEBase::getRandom(); for (int i = 0; i < (1 << logLoopTableAssoc); i++) { int loop_hit = (nrand + i) & ((1 << logLoopTableAssoc) - 1); idx = bi->loopIndex + loop_hit; @@ -248,10 +250,11 @@ LTAGE::loopUpdate(Addr pc, bool taken, LTageBranchInfo* bi) bool LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b) { - LTageBranchInfo *bi = new LTageBranchInfo(nHistoryTables+1); + LTageBranchInfo *bi = new LTageBranchInfo(*tage); b = (void*)(bi); - bool pred_taken = tagePredict(tid, branch_pc, cond_branch, bi); + bool pred_taken = tage->tagePredict(tid, branch_pc, cond_branch, + bi->tageBranchInfo); if (cond_branch) { // loop prediction @@ -259,12 +262,13 @@ LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b) if ((loopUseCounter >= 0) && bi->loopPredValid) { pred_taken = bi->loopPred; - bi->provider = LOOP; + bi->tageBranchInfo->provider = LOOP; } DPRINTF(LTage, "Predict for %lx: taken?:%d, loopTaken?:%d, " "loopValid?:%d, loopUseCounter:%d, tagePred:%d, altPred:%d\n", branch_pc, pred_taken, bi->loopPred, bi->loopPredValid, - loopUseCounter, bi->tagePred, bi->altTaken); + loopUseCounter, bi->tageBranchInfo->tagePred, + bi->tageBranchInfo->altTaken); if (useSpeculation) { specLoopUpdate(pred_taken, bi); @@ -275,40 +279,68 @@ LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b) } void -LTAGE::condBranchUpdate(Addr branch_pc, bool taken, - TageBranchInfo* tage_bi, int nrand) +LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history, + bool squashed, const StaticInstPtr & inst, Addr corrTarget) { - LTageBranchInfo* bi = static_cast(tage_bi); - - if (useSpeculation) { - // recalculate loop prediction without speculation - // It is ok to overwrite the loop prediction fields in bi - // as the stats have already been updated with the previous - // values - bi->loopPred = getLoop(branch_pc, bi, false); + assert(bp_history); + + LTageBranchInfo* bi = static_cast(bp_history); + + if (squashed) { + if (tage->isSpeculativeUpdateEnabled()) { + // This restores the global history, then update it + // and recomputes the folded histories. + tage->squash(tid, taken, bi->tageBranchInfo, corrTarget); + squashLoop(bi); + } + return; } - if (bi->loopPredValid) { - if (bi->tagePred != bi->loopPred) { - ctrUpdate(loopUseCounter, - (bi->loopPred == taken), - withLoopBits); + int nrand = TAGEBase::getRandom() & 3; + if (bi->tageBranchInfo->condBranch) { + DPRINTF(LTage, "Updating tables for branch:%lx; taken?:%d\n", + branch_pc, taken); + tage->updateStats(taken, bi->tageBranchInfo); + // update stats + if (bi->tageBranchInfo->provider == LOOP) { + if (taken == bi->loopPred) { + loopPredictorCorrect++; + } else { + loopPredictorWrong++; + } } + // cond Branch Update + if (useSpeculation) { + // recalculate loop prediction without speculation + // It is ok to overwrite the loop prediction fields in bi + // as the stats have already been updated with the previous + // values + bi->loopPred = getLoop(branch_pc, bi, false); + } + if (bi->loopPredValid) { + if (bi->tageBranchInfo->tagePred != bi->loopPred) { + TAGEBase::ctrUpdate(loopUseCounter, + (bi->loopPred == taken), + withLoopBits); + } + } + + loopUpdate(branch_pc, taken, bi); + + tage->condBranchUpdate(tid, branch_pc, taken, bi->tageBranchInfo, + nrand, corrTarget); } - loopUpdate(branch_pc, taken, bi); + tage->updateHistories(tid, branch_pc, taken, bi->tageBranchInfo, false, + inst, corrTarget); - TAGE::condBranchUpdate(branch_pc, taken, bi, nrand); + delete bi; } void -LTAGE::squash(ThreadID tid, bool taken, void *bp_history) +LTAGE::squashLoop(LTageBranchInfo* bi) { - TAGE::squash(tid, taken, bp_history); - - LTageBranchInfo* bi = (LTageBranchInfo*)(bp_history); - - if (bi->condBranch) { + if (bi->tageBranchInfo->condBranch) { if (bi->loopHit >= 0) { int idx = finallindex(bi->loopIndex, bi->loopLowPcBits, @@ -322,37 +354,14 @@ void LTAGE::squash(ThreadID tid, void *bp_history) { LTageBranchInfo* bi = (LTageBranchInfo*)(bp_history); - if (bi->condBranch) { - if (bi->loopHit >= 0) { - int idx = finallindex(bi->loopIndex, - bi->loopLowPcBits, - bi->loopHit); - ltable[idx].currentIterSpec = bi->currentIter; - } + + if (bi->tageBranchInfo->condBranch) { + squashLoop(bi); } TAGE::squash(tid, bp_history); } - -void -LTAGE::updateStats(bool taken, TageBranchInfo* bi) -{ - TAGE::updateStats(taken, bi); - - LTageBranchInfo * ltage_bi = static_cast(bi); - - if (ltage_bi->provider == LOOP) { - if (taken == ltage_bi->loopPred) { - loopPredictorCorrect++; - } else { - loopPredictorWrong++; - } - } -} - - - void LTAGE::regStats() { diff --git a/src/cpu/pred/ltage.hh b/src/cpu/pred/ltage.hh index 94ff96832..c749cba2c 100644 --- a/src/cpu/pred/ltage.hh +++ b/src/cpu/pred/ltage.hh @@ -66,6 +66,9 @@ class LTAGE: public TAGE // Base class methods. void squash(ThreadID tid, void *bp_history) override; + void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history, + bool squashed, const StaticInstPtr & inst, + Addr corrTarget) override; void regStats() override; @@ -88,7 +91,7 @@ class LTAGE: public TAGE // more provider types enum { - LOOP = LAST_TAGE_PROVIDER_TYPE + 1 + LOOP = TAGEBase::LAST_TAGE_PROVIDER_TYPE + 1 }; // Primary branch history entry @@ -103,12 +106,15 @@ class LTAGE: public TAGE int loopLowPcBits; // only for useHashing int loopHit; - LTageBranchInfo(int sz) - : TageBranchInfo(sz), + LTageBranchInfo(TAGEBase &tage) + : TageBranchInfo(tage), loopTag(0), currentIter(0), loopPred(false), loopPredValid(false), loopIndex(0), loopLowPcBits(0), loopHit(0) {} + + virtual ~LTageBranchInfo() + {} }; /** @@ -156,17 +162,6 @@ class LTAGE: public TAGE */ void specLoopUpdate(bool taken, LTageBranchInfo* bi); - /** - * Update LTAGE for conditional branches. - * @param branch_pc The unshifted branch PC. - * @param taken Actual branch outcome. - * @param bi Pointer to information on the prediction - * recorded at prediction time. - * @nrand Random int number from 0 to 3 - */ - void condBranchUpdate( - Addr branch_pc, bool taken, TageBranchInfo* bi, int nrand) override; - /** * Get a branch prediction from LTAGE. *NOT* an override of * BpredUnit::predict(). @@ -184,24 +179,9 @@ class LTAGE: public TAGE * Restores speculatively updated path and direction histories. * Also recomputes compressed (folded) histories based on the * correct branch outcome. - * This version of squash() is called once on a branch misprediction. - * @param tid The Thread ID to select the histories to rollback. - * @param taken The correct branch outcome. - * @param bp_history Wrapping pointer to TageBranchInfo (to allow - * storing derived class prediction information in the - * base class). - * @post bp_history points to valid memory. - */ - void squash( - ThreadID tid, bool taken, void *bp_history) override; - - /** - * Update the stats - * @param taken Actual branch outcome - * @param bi Pointer to information on the prediction - * recorded at prediction time. + * @param bi Branch information. */ - void updateStats(bool taken, TageBranchInfo* bi) override; + void squashLoop(LTageBranchInfo * bi); const unsigned logSizeLoopPred; const unsigned loopTableAgeBits; diff --git a/src/cpu/pred/tage.cc b/src/cpu/pred/tage.cc index 061f808e8..50beb20ca 100644 --- a/src/cpu/pred/tage.cc +++ b/src/cpu/pred/tage.cc @@ -47,222 +47,8 @@ #include "debug/Fetch.hh" #include "debug/Tage.hh" -TAGE::TAGE(const TAGEParams *params) - : BPredUnit(params), - logRatioBiModalHystEntries(params->logRatioBiModalHystEntries), - nHistoryTables(params->nHistoryTables), - tagTableCounterBits(params->tagTableCounterBits), - tagTableUBits(params->tagTableUBits), - histBufferSize(params->histBufferSize), - minHist(params->minHist), - maxHist(params->maxHist), - pathHistBits(params->pathHistBits), - tagTableTagWidths(params->tagTableTagWidths), - logTagTableSizes(params->logTagTableSizes), - threadHistory(params->numThreads), - logUResetPeriod(params->logUResetPeriod), - useAltOnNaBits(params->useAltOnNaBits) +TAGE::TAGE(const TAGEParams *params) : BPredUnit(params), tage(params->tage) { - // Current method for periodically resetting the u counter bits only - // works for 1 or 2 bits - // Also make sure that it is not 0 - assert(tagTableUBits <= 2 && (tagTableUBits > 0)); - - // we use int type for the path history, so it cannot be more than - // its size - assert(pathHistBits <= (sizeof(int)*8)); - - // initialize the counter to half of the period - assert(logUResetPeriod != 0); - tCounter = ULL(1) << (logUResetPeriod - 1); - - assert(params->histBufferSize > params->maxHist * 2); - useAltPredForNewlyAllocated = 0; - - for (auto& history : threadHistory) { - history.pathHist = 0; - history.globalHistory = new uint8_t[histBufferSize]; - history.gHist = history.globalHistory; - memset(history.gHist, 0, histBufferSize); - history.ptGhist = 0; - } - - histLengths = new int [nHistoryTables+1]; - histLengths[1] = minHist; - histLengths[nHistoryTables] = maxHist; - - for (int i = 2; i <= nHistoryTables; i++) { - histLengths[i] = (int) (((double) minHist * - pow ((double) (maxHist) / (double) minHist, - (double) (i - 1) / (double) ((nHistoryTables- 1)))) - + 0.5); - } - - assert(tagTableTagWidths.size() == (nHistoryTables+1)); - assert(logTagTableSizes.size() == (nHistoryTables+1)); - - // First entry is for the Bimodal table and it is untagged in this - // implementation - assert(tagTableTagWidths[0] == 0); - - for (auto& history : threadHistory) { - history.computeIndices = new FoldedHistory[nHistoryTables+1]; - history.computeTags[0] = new FoldedHistory[nHistoryTables+1]; - history.computeTags[1] = new FoldedHistory[nHistoryTables+1]; - - for (int i = 1; i <= nHistoryTables; i++) { - history.computeIndices[i].init( - histLengths[i], (logTagTableSizes[i])); - history.computeTags[0][i].init( - history.computeIndices[i].origLength, tagTableTagWidths[i]); - history.computeTags[1][i].init( - history.computeIndices[i].origLength, tagTableTagWidths[i]-1); - DPRINTF(Tage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n", - histLengths[i], logTagTableSizes[i], tagTableTagWidths[i]); - } - } - - const uint64_t bimodalTableSize = ULL(1) << logTagTableSizes[0]; - btablePrediction.resize(bimodalTableSize, false); - btableHysteresis.resize(bimodalTableSize >> logRatioBiModalHystEntries, - true); - - gtable = new TageEntry*[nHistoryTables + 1]; - for (int i = 1; i <= nHistoryTables; i++) { - gtable[i] = new TageEntry[1<<(logTagTableSizes[i])]; - } - - tableIndices = new int [nHistoryTables+1]; - tableTags = new int [nHistoryTables+1]; -} - -int -TAGE::bindex(Addr pc_in) const -{ - return ((pc_in >> instShiftAmt) & ((ULL(1) << (logTagTableSizes[0])) - 1)); -} - -int -TAGE::F(int A, int size, int bank) const -{ - int A1, A2; - - A = A & ((ULL(1) << size) - 1); - A1 = (A & ((ULL(1) << logTagTableSizes[bank]) - 1)); - A2 = (A >> logTagTableSizes[bank]); - A2 = ((A2 << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1)) - + (A2 >> (logTagTableSizes[bank] - bank)); - A = A1 ^ A2; - A = ((A << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1)) - + (A >> (logTagTableSizes[bank] - bank)); - return (A); -} - - -// gindex computes a full hash of pc, ghist and pathHist -int -TAGE::gindex(ThreadID tid, Addr pc, int bank) const -{ - int index; - int hlen = (histLengths[bank] > pathHistBits) ? pathHistBits : - histLengths[bank]; - const Addr shiftedPc = pc >> instShiftAmt; - index = - shiftedPc ^ - (shiftedPc >> ((int) abs(logTagTableSizes[bank] - bank) + 1)) ^ - threadHistory[tid].computeIndices[bank].comp ^ - F(threadHistory[tid].pathHist, hlen, bank); - - return (index & ((ULL(1) << (logTagTableSizes[bank])) - 1)); -} - - -// Tag computation -uint16_t -TAGE::gtag(ThreadID tid, Addr pc, int bank) const -{ - int tag = (pc >> instShiftAmt) ^ - threadHistory[tid].computeTags[0][bank].comp ^ - (threadHistory[tid].computeTags[1][bank].comp << 1); - - return (tag & ((ULL(1) << tagTableTagWidths[bank]) - 1)); -} - - -// Up-down saturating counter -void -TAGE::ctrUpdate(int8_t & ctr, bool taken, int nbits) -{ - assert(nbits <= sizeof(int8_t) << 3); - if (taken) { - if (ctr < ((1 << (nbits - 1)) - 1)) - ctr++; - } else { - if (ctr > -(1 << (nbits - 1))) - ctr--; - } -} - -// Up-down unsigned saturating counter -void -TAGE::unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits) -{ - assert(nbits <= sizeof(uint8_t) << 3); - if (up) { - if (ctr < ((1 << nbits) - 1)) - ctr++; - } else { - if (ctr) - ctr--; - } -} - -// Bimodal prediction -bool -TAGE::getBimodePred(Addr pc, TageBranchInfo* bi) const -{ - return btablePrediction[bi->bimodalIndex]; -} - - -// Update the bimodal predictor: a hysteresis bit is shared among N prediction -// bits (N = 2 ^ logRatioBiModalHystEntries) -void -TAGE::baseUpdate(Addr pc, bool taken, TageBranchInfo* bi) -{ - int inter = (btablePrediction[bi->bimodalIndex] << 1) - + btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries]; - if (taken) { - if (inter < 3) - inter++; - } else if (inter > 0) { - inter--; - } - const bool pred = inter >> 1; - const bool hyst = inter & 1; - btablePrediction[bi->bimodalIndex] = pred; - btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries] = hyst; - DPRINTF(Tage, "Updating branch %lx, pred:%d, hyst:%d\n", pc, pred, hyst); -} - -// shifting the global history: we manage the history in a big table in order -// to reduce simulation time -void -TAGE::updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &pt) -{ - if (pt == 0) { - DPRINTF(Tage, "Rolling over the histories\n"); - // Copy beginning of globalHistoryBuffer to end, such that - // the last maxHist outcomes are still reachable - // through pt[0 .. maxHist - 1]. - for (int i = 0; i < maxHist; i++) - tab[histBufferSize - maxHist + i] = tab[i]; - pt = histBufferSize - maxHist; - h = &tab[pt]; - } - pt--; - h--; - h[0] = (dir) ? 1 : 0; } // Get GHR for hashing indirect predictor @@ -272,297 +58,56 @@ unsigned TAGE::getGHR(ThreadID tid, void *bp_history) const { TageBranchInfo* bi = static_cast(bp_history); - unsigned val = 0; - for (unsigned i = 0; i < 32; i++) { - // Make sure we don't go out of bounds - int gh_offset = bi->ptGhist + i; - assert(&(threadHistory[tid].globalHistory[gh_offset]) < - threadHistory[tid].globalHistory + histBufferSize); - val |= ((threadHistory[tid].globalHistory[gh_offset] & 0x1) << i); - } - - return val; -} - -//prediction -bool -TAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b) -{ - TageBranchInfo *bi = new TageBranchInfo(nHistoryTables+1); - b = (void*)(bi); - return tagePredict(tid, branch_pc, cond_branch, bi); -} - -bool -TAGE::tagePredict(ThreadID tid, Addr branch_pc, - bool cond_branch, TageBranchInfo* bi) -{ - Addr pc = branch_pc; - bool pred_taken = true; - - if (cond_branch) { - // TAGE prediction - - // computes the table addresses and the partial tags - for (int i = 1; i <= nHistoryTables; i++) { - tableIndices[i] = gindex(tid, pc, i); - bi->tableIndices[i] = tableIndices[i]; - tableTags[i] = gtag(tid, pc, i); - bi->tableTags[i] = tableTags[i]; - } - - bi->bimodalIndex = bindex(pc); - - bi->hitBank = 0; - bi->altBank = 0; - //Look for the bank with longest matching history - for (int i = nHistoryTables; i > 0; i--) { - if (gtable[i][tableIndices[i]].tag == tableTags[i]) { - bi->hitBank = i; - bi->hitBankIndex = tableIndices[bi->hitBank]; - break; - } - } - //Look for the alternate bank - for (int i = bi->hitBank - 1; i > 0; i--) { - if (gtable[i][tableIndices[i]].tag == tableTags[i]) { - bi->altBank = i; - bi->altBankIndex = tableIndices[bi->altBank]; - break; - } - } - //computes the prediction and the alternate prediction - if (bi->hitBank > 0) { - if (bi->altBank > 0) { - bi->altTaken = - gtable[bi->altBank][tableIndices[bi->altBank]].ctr >= 0; - }else { - bi->altTaken = getBimodePred(pc, bi); - } - - bi->longestMatchPred = - gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr >= 0; - bi->pseudoNewAlloc = - abs(2 * gtable[bi->hitBank][bi->hitBankIndex].ctr + 1) <= 1; - - //if the entry is recognized as a newly allocated entry and - //useAltPredForNewlyAllocated is positive use the alternate - //prediction - if ((useAltPredForNewlyAllocated < 0) || ! bi->pseudoNewAlloc) { - bi->tagePred = bi->longestMatchPred; - bi->provider = TAGE_LONGEST_MATCH; - } else { - bi->tagePred = bi->altTaken; - bi->provider = bi->altBank ? TAGE_ALT_MATCH - : BIMODAL_ALT_MATCH; - } - } else { - bi->altTaken = getBimodePred(pc, bi); - bi->tagePred = bi->altTaken; - bi->longestMatchPred = bi->altTaken; - bi->provider = BIMODAL_ONLY; - } - //end TAGE prediction - - pred_taken = (bi->tagePred); - DPRINTF(Tage, "Predict for %lx: taken?:%d, tagePred:%d, altPred:%d\n", - branch_pc, pred_taken, bi->tagePred, bi->altTaken); - } - bi->branchPC = branch_pc; - bi->condBranch = cond_branch; - return pred_taken; + return tage->getGHR(tid, bi->tageBranchInfo); } // PREDICTOR UPDATE void TAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history, - bool squashed) + bool squashed, const StaticInstPtr & inst, Addr corrTarget) { assert(bp_history); TageBranchInfo *bi = static_cast(bp_history); + assert(corrTarget != MaxAddr); + if (squashed) { // This restores the global history, then update it // and recomputes the folded histories. - squash(tid, taken, bp_history); + tage->squash(tid, taken, bi->tageBranchInfo, corrTarget); return; } - int nrand = random_mt.random(0,3); - if (bi->condBranch) { + int nrand = TAGEBase::getRandom() & 3; + if (bi->tageBranchInfo->condBranch) { DPRINTF(Tage, "Updating tables for branch:%lx; taken?:%d\n", branch_pc, taken); - updateStats(taken, bi); - condBranchUpdate(branch_pc, taken, bi, nrand); - } - if (!squashed) { - delete bi; + tage->updateStats(taken, bi->tageBranchInfo); + tage->condBranchUpdate(tid, branch_pc, taken, bi->tageBranchInfo, + nrand, corrTarget); } -} - -void -TAGE::condBranchUpdate(Addr branch_pc, bool taken, - TageBranchInfo* bi, int nrand) -{ - // TAGE UPDATE - // try to allocate a new entries only if prediction was wrong - bool longest_match_pred = false; - bool alloc = (bi->tagePred != taken) && (bi->hitBank < nHistoryTables); - if (bi->hitBank > 0) { - // Manage the selection between longest matching and alternate - // matching for "pseudo"-newly allocated longest matching entry - longest_match_pred = bi->longestMatchPred; - bool PseudoNewAlloc = bi->pseudoNewAlloc; - // an entry is considered as newly allocated if its prediction - // counter is weak - if (PseudoNewAlloc) { - if (longest_match_pred == taken) { - alloc = false; - } - // if it was delivering the correct prediction, no need to - // allocate new entry even if the overall prediction was false - if (longest_match_pred != bi->altTaken) { - ctrUpdate(useAltPredForNewlyAllocated, - bi->altTaken == taken, useAltOnNaBits); - } - } - } - - if (alloc) { - // is there some "unuseful" entry to allocate - uint8_t min = 1; - for (int i = nHistoryTables; i > bi->hitBank; i--) { - if (gtable[i][bi->tableIndices[i]].u < min) { - min = gtable[i][bi->tableIndices[i]].u; - } - } - - // we allocate an entry with a longer history - // to avoid ping-pong, we do not choose systematically the next - // entry, but among the 3 next entries - int Y = nrand & - ((ULL(1) << (nHistoryTables - bi->hitBank - 1)) - 1); - int X = bi->hitBank + 1; - if (Y & 1) { - X++; - if (Y & 2) - X++; - } - // No entry available, forces one to be available - if (min > 0) { - gtable[X][bi->tableIndices[X]].u = 0; - } + tage->updateHistories(tid, branch_pc, taken, bi->tageBranchInfo, false, + inst, corrTarget); - //Allocate only one entry - for (int i = X; i <= nHistoryTables; i++) { - if ((gtable[i][bi->tableIndices[i]].u == 0)) { - gtable[i][bi->tableIndices[i]].tag = bi->tableTags[i]; - gtable[i][bi->tableIndices[i]].ctr = (taken) ? 0 : -1; - break; - } - } - } - //periodic reset of u: reset is not complete but bit by bit - tCounter++; - if ((tCounter & ((ULL(1) << logUResetPeriod) - 1)) == 0) { - // reset least significant bit - // most significant bit becomes least significant bit - for (int i = 1; i <= nHistoryTables; i++) { - for (int j = 0; j < (ULL(1) << logTagTableSizes[i]); j++) { - gtable[i][j].u = gtable[i][j].u >> 1; - } - } - } - - if (bi->hitBank > 0) { - DPRINTF(Tage, "Updating tag table entry (%d,%d) for branch %lx\n", - bi->hitBank, bi->hitBankIndex, branch_pc); - ctrUpdate(gtable[bi->hitBank][bi->hitBankIndex].ctr, taken, - tagTableCounterBits); - // if the provider entry is not certified to be useful also update - // the alternate prediction - if (gtable[bi->hitBank][bi->hitBankIndex].u == 0) { - if (bi->altBank > 0) { - ctrUpdate(gtable[bi->altBank][bi->altBankIndex].ctr, taken, - tagTableCounterBits); - DPRINTF(Tage, "Updating tag table entry (%d,%d) for" - " branch %lx\n", bi->hitBank, bi->hitBankIndex, - branch_pc); - } - if (bi->altBank == 0) { - baseUpdate(branch_pc, taken, bi); - } - } - - // update the u counter - if (bi->tagePred != bi->altTaken) { - unsignedCtrUpdate(gtable[bi->hitBank][bi->hitBankIndex].u, - bi->tagePred == taken, tagTableUBits); - } - } else { - baseUpdate(branch_pc, taken, bi); - } -} - -void -TAGE::updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b) -{ - TageBranchInfo* bi = (TageBranchInfo*)(b); - ThreadHistory& tHist = threadHistory[tid]; - // UPDATE HISTORIES - bool pathbit = ((branch_pc >> instShiftAmt) & 1); - //on a squash, return pointers to this and recompute indices. - //update user history - updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist); - tHist.pathHist = (tHist.pathHist << 1) + pathbit; - tHist.pathHist = (tHist.pathHist & ((ULL(1) << pathHistBits) - 1)); - - bi->ptGhist = tHist.ptGhist; - bi->pathHist = tHist.pathHist; - //prepare next index and tag computations for user branchs - for (int i = 1; i <= nHistoryTables; i++) - { - bi->ci[i] = tHist.computeIndices[i].comp; - bi->ct0[i] = tHist.computeTags[0][i].comp; - bi->ct1[i] = tHist.computeTags[1][i].comp; - tHist.computeIndices[i].update(tHist.gHist); - tHist.computeTags[0][i].update(tHist.gHist); - tHist.computeTags[1][i].update(tHist.gHist); - } - DPRINTF(Tage, "Updating global histories with branch:%lx; taken?:%d, " - "path Hist: %x; pointer:%d\n", branch_pc, taken, tHist.pathHist, - tHist.ptGhist); + delete bi; } void -TAGE::squash(ThreadID tid, bool taken, void *bp_history) +TAGE::squash(ThreadID tid, void *bp_history) { - TageBranchInfo* bi = (TageBranchInfo*)(bp_history); - ThreadHistory& tHist = threadHistory[tid]; - DPRINTF(Tage, "Restoring branch info: %lx; taken? %d; PathHistory:%x, " - "pointer:%d\n", bi->branchPC,taken, bi->pathHist, bi->ptGhist); - tHist.pathHist = bi->pathHist; - tHist.ptGhist = bi->ptGhist; - tHist.gHist = &(tHist.globalHistory[tHist.ptGhist]); - tHist.gHist[0] = (taken ? 1 : 0); - for (int i = 1; i <= nHistoryTables; i++) { - tHist.computeIndices[i].comp = bi->ci[i]; - tHist.computeTags[0][i].comp = bi->ct0[i]; - tHist.computeTags[1][i].comp = bi->ct1[i]; - tHist.computeIndices[i].update(tHist.gHist); - tHist.computeTags[0][i].update(tHist.gHist); - tHist.computeTags[1][i].update(tHist.gHist); - } + TageBranchInfo *bi = static_cast(bp_history); + DPRINTF(Tage, "Deleting branch info: %lx\n", bi->tageBranchInfo->branchPC); + delete bi; } -void -TAGE::squash(ThreadID tid, void *bp_history) +bool +TAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b) { - TageBranchInfo* bi = (TageBranchInfo*)(bp_history); - DPRINTF(Tage, "Deleting branch info: %lx\n", bi->branchPC); - delete bi; + TageBranchInfo *bi = new TageBranchInfo(*tage); + b = (void*)(bi); + return tage->tagePredict(tid, branch_pc, cond_branch, bi->tageBranchInfo); } bool @@ -570,10 +115,11 @@ TAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history) { bool retval = predict(tid, branch_pc, true, bp_history); + TageBranchInfo *bi = static_cast(bp_history); + DPRINTF(Tage, "Lookup branch: %lx; predict:%d\n", branch_pc, retval); - updateHistories(tid, branch_pc, retval, bp_history); - assert(threadHistory[tid].gHist == - &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]); + + tage->updateHistories(tid, branch_pc, retval, bi->tageBranchInfo, true); return retval; } @@ -581,19 +127,8 @@ TAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history) void TAGE::btbUpdate(ThreadID tid, Addr branch_pc, void* &bp_history) { - TageBranchInfo* bi = (TageBranchInfo*) bp_history; - ThreadHistory& tHist = threadHistory[tid]; - DPRINTF(Tage, "BTB miss resets prediction: %lx\n", branch_pc); - assert(tHist.gHist == &tHist.globalHistory[tHist.ptGhist]); - tHist.gHist[0] = 0; - for (int i = 1; i <= nHistoryTables; i++) { - tHist.computeIndices[i].comp = bi->ci[i]; - tHist.computeTags[0][i].comp = bi->ct0[i]; - tHist.computeTags[1][i].comp = bi->ct1[i]; - tHist.computeIndices[i].update(tHist.gHist); - tHist.computeTags[0][i].update(tHist.gHist); - tHist.computeTags[1][i].update(tHist.gHist); - } + TageBranchInfo *bi = static_cast(bp_history); + tage->btbUpdate(tid, branch_pc, bi->tageBranchInfo); } void @@ -601,122 +136,8 @@ TAGE::uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history) { DPRINTF(Tage, "UnConditionalBranch: %lx\n", br_pc); predict(tid, br_pc, false, bp_history); - updateHistories(tid, br_pc, true, bp_history); - assert(threadHistory[tid].gHist == - &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]); -} - -void -TAGE::updateStats(bool taken, TageBranchInfo* bi) -{ - if (taken == bi->tagePred) { - // correct prediction - switch (bi->provider) { - case BIMODAL_ONLY: tageBimodalProviderCorrect++; break; - case TAGE_LONGEST_MATCH: tageLongestMatchProviderCorrect++; break; - case BIMODAL_ALT_MATCH: bimodalAltMatchProviderCorrect++; break; - case TAGE_ALT_MATCH: tageAltMatchProviderCorrect++; break; - } - } else { - // wrong prediction - switch (bi->provider) { - case BIMODAL_ONLY: tageBimodalProviderWrong++; break; - case TAGE_LONGEST_MATCH: - tageLongestMatchProviderWrong++; - if (bi->altTaken == taken) { - tageAltMatchProviderWouldHaveHit++; - } - break; - case BIMODAL_ALT_MATCH: - bimodalAltMatchProviderWrong++; - break; - case TAGE_ALT_MATCH: - tageAltMatchProviderWrong++; - break; - } - - switch (bi->provider) { - case BIMODAL_ALT_MATCH: - case TAGE_ALT_MATCH: - if (bi->longestMatchPred == taken) { - tageLongestMatchProviderWouldHaveHit++; - } - } - } - - switch (bi->provider) { - case TAGE_LONGEST_MATCH: - case TAGE_ALT_MATCH: - tageLongestMatchProvider[bi->hitBank]++; - tageAltMatchProvider[bi->altBank]++; - break; - } -} - -void -TAGE::regStats() -{ - BPredUnit::regStats(); - - tageLongestMatchProviderCorrect - .name(name() + ".tageLongestMatchProviderCorrect") - .desc("Number of times TAGE Longest Match is the provider and " - "the prediction is correct"); - - tageAltMatchProviderCorrect - .name(name() + ".tageAltMatchProviderCorrect") - .desc("Number of times TAGE Alt Match is the provider and " - "the prediction is correct"); - - bimodalAltMatchProviderCorrect - .name(name() + ".bimodalAltMatchProviderCorrect") - .desc("Number of times TAGE Alt Match is the bimodal and it is the " - "provider and the prediction is correct"); - - tageBimodalProviderCorrect - .name(name() + ".tageBimodalProviderCorrect") - .desc("Number of times there are no hits on the TAGE tables " - "and the bimodal prediction is correct"); - - tageLongestMatchProviderWrong - .name(name() + ".tageLongestMatchProviderWrong") - .desc("Number of times TAGE Longest Match is the provider and " - "the prediction is wrong"); - - tageAltMatchProviderWrong - .name(name() + ".tageAltMatchProviderWrong") - .desc("Number of times TAGE Alt Match is the provider and " - "the prediction is wrong"); - - bimodalAltMatchProviderWrong - .name(name() + ".bimodalAltMatchProviderWrong") - .desc("Number of times TAGE Alt Match is the bimodal and it is the " - "provider and the prediction is wrong"); - - tageBimodalProviderWrong - .name(name() + ".tageBimodalProviderWrong") - .desc("Number of times there are no hits on the TAGE tables " - "and the bimodal prediction is wrong"); - - tageAltMatchProviderWouldHaveHit - .name(name() + ".tageAltMatchProviderWouldHaveHit") - .desc("Number of times TAGE Longest Match is the provider, " - "the prediction is wrong and Alt Match prediction was correct"); - - tageLongestMatchProviderWouldHaveHit - .name(name() + ".tageLongestMatchProviderWouldHaveHit") - .desc("Number of times TAGE Alt Match is the provider, the " - "prediction is wrong and Longest Match prediction was correct"); - - tageLongestMatchProvider - .init(nHistoryTables + 1) - .name(name() + ".tageLongestMatchProvider") - .desc("TAGE provider for longest match"); - - tageAltMatchProvider - .init(nHistoryTables + 1) - .name(name() + ".tageAltMatchProvider") - .desc("TAGE provider for alt match"); + TageBranchInfo *bi = static_cast(bp_history); + tage->updateHistories(tid, br_pc, true, bi->tageBranchInfo, true); } TAGE* diff --git a/src/cpu/pred/tage.hh b/src/cpu/pred/tage.hh index c66c28cb1..b75051dd7 100644 --- a/src/cpu/pred/tage.hh +++ b/src/cpu/pred/tage.hh @@ -55,358 +55,41 @@ #include "base/types.hh" #include "cpu/pred/bpred_unit.hh" +#include "cpu/pred/tage_base.hh" #include "params/TAGE.hh" class TAGE: public BPredUnit { - public: - TAGE(const TAGEParams *params); - - // Base class methods. - void uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history) override; - bool lookup(ThreadID tid, Addr branch_addr, void* &bp_history) override; - void btbUpdate(ThreadID tid, Addr branch_addr, void* &bp_history) override; - void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history, - bool squashed) override; - virtual void squash(ThreadID tid, void *bp_history) override; - unsigned getGHR(ThreadID tid, void *bp_history) const override; - - virtual void regStats() override; - protected: - // Prediction Structures - - // Tage Entry - struct TageEntry - { - int8_t ctr; - uint16_t tag; - uint8_t u; - TageEntry() : ctr(0), tag(0), u(0) { } - }; + TAGEBase *tage; - // Folded History Table - compressed history - // to mix with instruction PC to index partially - // tagged tables. - struct FoldedHistory - { - unsigned comp; - int compLength; - int origLength; - int outpoint; + struct TageBranchInfo { + TAGEBase::BranchInfo *tageBranchInfo; - void init(int original_length, int compressed_length) - { - comp = 0; - origLength = original_length; - compLength = compressed_length; - outpoint = original_length % compressed_length; - } - - void update(uint8_t * h) - { - comp = (comp << 1) | h[0]; - comp ^= h[origLength] << outpoint; - comp ^= (comp >> compLength); - comp &= (ULL(1) << compLength) - 1; - } - }; - - // provider type - enum { - BIMODAL_ONLY = 0, - TAGE_LONGEST_MATCH, - BIMODAL_ALT_MATCH, - TAGE_ALT_MATCH, - LAST_TAGE_PROVIDER_TYPE = TAGE_ALT_MATCH - }; - - // Primary branch history entry - struct TageBranchInfo - { - int pathHist; - int ptGhist; - int hitBank; - int hitBankIndex; - int altBank; - int altBankIndex; - int bimodalIndex; - - bool tagePred; - bool altTaken; - bool condBranch; - bool longestMatchPred; - bool pseudoNewAlloc; - Addr branchPC; - - // Pointer to dynamically allocated storage - // to save table indices and folded histories. - // To do one call to new instead of five. - int *storage; - - // Pointers to actual saved array within the dynamically - // allocated storage. - int *tableIndices; - int *tableTags; - int *ci; - int *ct0; - int *ct1; - - // for stats purposes - unsigned provider; - - TageBranchInfo(int sz) - : pathHist(0), ptGhist(0), - hitBank(0), hitBankIndex(0), - altBank(0), altBankIndex(0), - bimodalIndex(0), - tagePred(false), altTaken(false), - condBranch(false), longestMatchPred(false), - pseudoNewAlloc(false), branchPC(0), - provider(-1) - { - storage = new int [sz * 5]; - tableIndices = storage; - tableTags = storage + sz; - ci = tableTags + sz; - ct0 = ci + sz; - ct1 = ct0 + sz; - } + TageBranchInfo(TAGEBase &tage) : tageBranchInfo(tage.makeBranchInfo()) + {} virtual ~TageBranchInfo() { - delete[] storage; + delete tageBranchInfo; } }; - /** - * Computes the index used to access the - * bimodal table. - * @param pc_in The unshifted branch PC. - */ - int bindex(Addr pc_in) const; - - /** - * Computes the index used to access a - * partially tagged table. - * @param tid The thread ID used to select the - * global histories to use. - * @param pc The unshifted branch PC. - * @param bank The partially tagged table to access. - */ - inline int gindex(ThreadID tid, Addr pc, int bank) const; - - /** - * Utility function to shuffle the path history - * depending on which tagged table we are accessing. - * @param phist The path history. - * @param size Number of path history bits to use. - * @param bank The partially tagged table to access. - */ - int F(int phist, int size, int bank) const; - - /** - * Computes the partial tag of a tagged table. - * @param tid the thread ID used to select the - * global histories to use. - * @param pc The unshifted branch PC. - * @param bank The partially tagged table to access. - */ - inline uint16_t gtag(ThreadID tid, Addr pc, int bank) const; - - /** - * Updates a direction counter based on the actual - * branch outcome. - * @param ctr Reference to counter to update. - * @param taken Actual branch outcome. - * @param nbits Counter width. - */ - void ctrUpdate(int8_t & ctr, bool taken, int nbits); - - /** - * Updates an unsigned counter based on up/down parameter - * @param ctr Reference to counter to update. - * @param up Boolean indicating if the counter is incremented/decremented - * If true it is incremented, if false it is decremented - * @param nbits Counter width. - */ - void unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits); - - /** - * Get a branch prediction from the bimodal - * predictor. - * @param pc The unshifted branch PC. - * @param bi Pointer to information on the - * prediction. - */ - bool getBimodePred(Addr pc, TageBranchInfo* bi) const; - - /** - * Updates the bimodal predictor. - * @param pc The unshifted branch PC. - * @param taken The actual branch outcome. - * @param bi Pointer to information on the prediction - * recorded at prediction time. - */ - void baseUpdate(Addr pc, bool taken, TageBranchInfo* bi); - - /** - * (Speculatively) updates the global branch history. - * @param h Reference to pointer to global branch history. - * @param dir (Predicted) outcome to update the histories - * with. - * @param tab - * @param PT Reference to path history. - */ - void updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &PT); - - /** - * Get a branch prediction from TAGE. *NOT* an override of - * BpredUnit::predict(). - * @param tid The thread ID to select the global - * histories to use. - * @param branch_pc The unshifted branch PC. - * @param cond_branch True if the branch is conditional. - * @param b Reference to wrapping pointer to allow storing - * derived class prediction information in the base class. - */ - virtual bool predict( - ThreadID tid, Addr branch_pc, bool cond_branch, void* &b); - - /** - * Update TAGE. Called at execute to repair histories on a misprediction - * and at commit to update the tables. - * @param tid The thread ID to select the global - * histories to use. - * @param branch_pc The unshifted branch PC. - * @param taken Actual branch outcome. - * @param bi Pointer to information on the prediction - * recorded at prediction time. - */ - void update(ThreadID tid, Addr branch_pc, bool taken, TageBranchInfo* bi); - - /** - * (Speculatively) updates global histories (path and direction). - * Also recomputes compressed (folded) histories based on the - * branch direction. - * @param tid The thread ID to select the histories - * to update. - * @param branch_pc The unshifted branch PC. - * @param taken (Predicted) branch direction. - * @param b Wrapping pointer to TageBranchInfo (to allow - * storing derived class prediction information in the - * base class). - */ - void updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b); - - /** - * Restores speculatively updated path and direction histories. - * Also recomputes compressed (folded) histories based on the - * correct branch outcome. - * This version of squash() is called once on a branch misprediction. - * @param tid The Thread ID to select the histories to rollback. - * @param taken The correct branch outcome. - * @param bp_history Wrapping pointer to TageBranchInfo (to allow - * storing derived class prediction information in the - * base class). - * @post bp_history points to valid memory. - */ - virtual void squash(ThreadID tid, bool taken, void *bp_history); - - /** - * Update TAGE for conditional branches. - * @param branch_pc The unshifted branch PC. - * @param taken Actual branch outcome. - * @param bi Pointer to information on the prediction - * recorded at prediction time. - * @nrand Random int number from 0 to 3 - */ - virtual void condBranchUpdate( - Addr branch_pc, bool taken, TageBranchInfo* bi, int nrand); - - /** - * TAGE prediction called from TAGE::predict - * @param tid The thread ID to select the global - * histories to use. - * @param branch_pc The unshifted branch PC. - * @param cond_branch True if the branch is conditional. - * @param bi Pointer to the TageBranchInfo - */ - bool tagePredict( - ThreadID tid, Addr branch_pc, bool cond_branch, TageBranchInfo* bi); - - /** - * Update the stats - * @param taken Actual branch outcome - * @param bi Pointer to information on the prediction - * recorded at prediction time. - */ - virtual void updateStats(bool taken, TageBranchInfo* bi); - - const unsigned logRatioBiModalHystEntries; - const unsigned nHistoryTables; - const unsigned tagTableCounterBits; - const unsigned tagTableUBits; - const unsigned histBufferSize; - const unsigned minHist; - const unsigned maxHist; - const unsigned pathHistBits; - - const std::vector tagTableTagWidths; - const std::vector logTagTableSizes; - - std::vector btablePrediction; - std::vector btableHysteresis; - TageEntry **gtable; - - // Keep per-thread histories to - // support SMT. - struct ThreadHistory { - // Speculative path history - // (LSB of branch address) - int pathHist; - - // Speculative branch direction - // history (circular buffer) - // @TODO Convert to std::vector - uint8_t *globalHistory; - - // Pointer to most recent branch outcome - uint8_t* gHist; - - // Index to most recent branch outcome - int ptGhist; - - // Speculative folded histories. - FoldedHistory *computeIndices; - FoldedHistory *computeTags[2]; - }; - - std::vector threadHistory; - - int *histLengths; - int *tableIndices; - int *tableTags; - - int8_t useAltPredForNewlyAllocated; - uint64_t tCounter; - uint64_t logUResetPeriod; - unsigned useAltOnNaBits; + virtual bool predict(ThreadID tid, Addr branch_pc, bool cond_branch, + void* &b); + public: - // stats - Stats::Scalar tageLongestMatchProviderCorrect; - Stats::Scalar tageAltMatchProviderCorrect; - Stats::Scalar bimodalAltMatchProviderCorrect; - Stats::Scalar tageBimodalProviderCorrect; - Stats::Scalar tageLongestMatchProviderWrong; - Stats::Scalar tageAltMatchProviderWrong; - Stats::Scalar bimodalAltMatchProviderWrong; - Stats::Scalar tageBimodalProviderWrong; - Stats::Scalar tageAltMatchProviderWouldHaveHit; - Stats::Scalar tageLongestMatchProviderWouldHaveHit; + TAGE(const TAGEParams *params); - Stats::Vector tageLongestMatchProvider; - Stats::Vector tageAltMatchProvider; + // Base class methods. + void uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history) override; + bool lookup(ThreadID tid, Addr branch_addr, void* &bp_history) override; + void btbUpdate(ThreadID tid, Addr branch_addr, void* &bp_history) override; + void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history, + bool squashed, const StaticInstPtr & inst, + Addr corrTarget) override; + virtual void squash(ThreadID tid, void *bp_history) override; + unsigned getGHR(ThreadID tid, void *bp_history) const override; }; #endif // __CPU_PRED_TAGE diff --git a/src/cpu/pred/tage_base.cc b/src/cpu/pred/tage_base.cc new file mode 100644 index 000000000..95be541d3 --- /dev/null +++ b/src/cpu/pred/tage_base.cc @@ -0,0 +1,803 @@ +/* + * Copyright (c) 2014 The University of Wisconsin + * + * Copyright (c) 2006 INRIA (Institut National de Recherche en + * Informatique et en Automatique / French National Research Institute + * for Computer Science and Applied Mathematics) + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais, + * from André Seznec's code. + */ + +/* @file + * Implementation of a TAGE branch predictor + */ + +#include "cpu/pred/tage_base.hh" + +#include "base/intmath.hh" +#include "base/logging.hh" +#include "base/random.hh" +#include "debug/Fetch.hh" +#include "debug/Tage.hh" + +TAGEBase::TAGEBase(const TAGEBaseParams *p) + : SimObject(p), + logRatioBiModalHystEntries(p->logRatioBiModalHystEntries), + nHistoryTables(p->nHistoryTables), + tagTableCounterBits(p->tagTableCounterBits), + tagTableUBits(p->tagTableUBits), + histBufferSize(p->histBufferSize), + minHist(p->minHist), + maxHist(p->maxHist), + pathHistBits(p->pathHistBits), + tagTableTagWidths(p->tagTableTagWidths), + logTagTableSizes(p->logTagTableSizes), + threadHistory(p->numThreads), + logUResetPeriod(p->logUResetPeriod), + numUseAltOnNa(p->numUseAltOnNa), + useAltOnNaBits(p->useAltOnNaBits), + maxNumAlloc(p->maxNumAlloc), + noSkip(p->noSkip), + speculativeHistUpdate(p->speculativeHistUpdate), + instShiftAmt(p->instShiftAmt) +{ + if (noSkip.empty()) { + // Set all the table to enabled by default + noSkip.resize(nHistoryTables + 1, true); + } +} + +TAGEBase::BranchInfo* +TAGEBase::makeBranchInfo() { + return new BranchInfo(*this); +} + +void +TAGEBase::init() +{ + // Current method for periodically resetting the u counter bits only + // works for 1 or 2 bits + // Also make sure that it is not 0 + assert(tagTableUBits <= 2 && (tagTableUBits > 0)); + + // we use int type for the path history, so it cannot be more than + // its size + assert(pathHistBits <= (sizeof(int)*8)); + + // initialize the counter to half of the period + assert(logUResetPeriod != 0); + tCounter = ULL(1) << (logUResetPeriod - 1); + + assert(histBufferSize > maxHist * 2); + + useAltPredForNewlyAllocated.resize(numUseAltOnNa, 0); + + for (auto& history : threadHistory) { + history.pathHist = 0; + history.globalHistory = new uint8_t[histBufferSize]; + history.gHist = history.globalHistory; + memset(history.gHist, 0, histBufferSize); + history.ptGhist = 0; + } + + histLengths = new int [nHistoryTables+1]; + + calculateParameters(); + + assert(tagTableTagWidths.size() == (nHistoryTables+1)); + assert(logTagTableSizes.size() == (nHistoryTables+1)); + + // First entry is for the Bimodal table and it is untagged in this + // implementation + assert(tagTableTagWidths[0] == 0); + + for (auto& history : threadHistory) { + history.computeIndices = new FoldedHistory[nHistoryTables+1]; + history.computeTags[0] = new FoldedHistory[nHistoryTables+1]; + history.computeTags[1] = new FoldedHistory[nHistoryTables+1]; + + initFoldedHistories(history); + } + + const uint64_t bimodalTableSize = ULL(1) << logTagTableSizes[0]; + btablePrediction.resize(bimodalTableSize, false); + btableHysteresis.resize(bimodalTableSize >> logRatioBiModalHystEntries, + true); + + gtable = new TageEntry*[nHistoryTables + 1]; + buildTageTables(); + + tableIndices = new int [nHistoryTables+1]; + tableTags = new int [nHistoryTables+1]; +} + +void +TAGEBase::initFoldedHistories(ThreadHistory & history) +{ + for (int i = 1; i <= nHistoryTables; i++) { + history.computeIndices[i].init( + histLengths[i], (logTagTableSizes[i])); + history.computeTags[0][i].init( + history.computeIndices[i].origLength, tagTableTagWidths[i]); + history.computeTags[1][i].init( + history.computeIndices[i].origLength, tagTableTagWidths[i]-1); + DPRINTF(Tage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n", + histLengths[i], logTagTableSizes[i], tagTableTagWidths[i]); + } +} + +void +TAGEBase::buildTageTables() +{ + for (int i = 1; i <= nHistoryTables; i++) { + gtable[i] = new TageEntry[1<<(logTagTableSizes[i])]; + } +} + +void +TAGEBase::calculateParameters() +{ + histLengths[1] = minHist; + histLengths[nHistoryTables] = maxHist; + + for (int i = 2; i <= nHistoryTables; i++) { + histLengths[i] = (int) (((double) minHist * + pow ((double) (maxHist) / (double) minHist, + (double) (i - 1) / (double) ((nHistoryTables- 1)))) + + 0.5); + } +} + +void +TAGEBase::btbUpdate(ThreadID tid, Addr branch_pc, BranchInfo* &bi) +{ + if (speculativeHistUpdate) { + ThreadHistory& tHist = threadHistory[tid]; + DPRINTF(Tage, "BTB miss resets prediction: %lx\n", branch_pc); + assert(tHist.gHist == &tHist.globalHistory[tHist.ptGhist]); + tHist.gHist[0] = 0; + for (int i = 1; i <= nHistoryTables; i++) { + tHist.computeIndices[i].comp = bi->ci[i]; + tHist.computeTags[0][i].comp = bi->ct0[i]; + tHist.computeTags[1][i].comp = bi->ct1[i]; + tHist.computeIndices[i].update(tHist.gHist); + tHist.computeTags[0][i].update(tHist.gHist); + tHist.computeTags[1][i].update(tHist.gHist); + } + } +} + +int +TAGEBase::bindex(Addr pc_in) const +{ + return ((pc_in >> instShiftAmt) & ((ULL(1) << (logTagTableSizes[0])) - 1)); +} + +int +TAGEBase::F(int A, int size, int bank) const +{ + int A1, A2; + + A = A & ((ULL(1) << size) - 1); + A1 = (A & ((ULL(1) << logTagTableSizes[bank]) - 1)); + A2 = (A >> logTagTableSizes[bank]); + A2 = ((A2 << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1)) + + (A2 >> (logTagTableSizes[bank] - bank)); + A = A1 ^ A2; + A = ((A << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1)) + + (A >> (logTagTableSizes[bank] - bank)); + return (A); +} + +// gindex computes a full hash of pc, ghist and pathHist +int +TAGEBase::gindex(ThreadID tid, Addr pc, int bank) const +{ + int index; + int hlen = (histLengths[bank] > pathHistBits) ? pathHistBits : + histLengths[bank]; + const unsigned int shiftedPc = pc >> instShiftAmt; + index = + shiftedPc ^ + (shiftedPc >> ((int) abs(logTagTableSizes[bank] - bank) + 1)) ^ + threadHistory[tid].computeIndices[bank].comp ^ + F(threadHistory[tid].pathHist, hlen, bank); + + return (index & ((ULL(1) << (logTagTableSizes[bank])) - 1)); +} + + +// Tag computation +uint16_t +TAGEBase::gtag(ThreadID tid, Addr pc, int bank) const +{ + int tag = (pc >> instShiftAmt) ^ + threadHistory[tid].computeTags[0][bank].comp ^ + (threadHistory[tid].computeTags[1][bank].comp << 1); + + return (tag & ((ULL(1) << tagTableTagWidths[bank]) - 1)); +} + + +// Up-down saturating counter +template +void +TAGEBase::ctrUpdate(T & ctr, bool taken, int nbits) +{ + assert(nbits <= sizeof(T) << 3); + if (taken) { + if (ctr < ((1 << (nbits - 1)) - 1)) + ctr++; + } else { + if (ctr > -(1 << (nbits - 1))) + ctr--; + } +} + +// int8_t and int versions of this function may be needed +template void TAGEBase::ctrUpdate(int8_t & ctr, bool taken, int nbits); +template void TAGEBase::ctrUpdate(int & ctr, bool taken, int nbits); + +// Up-down unsigned saturating counter +void +TAGEBase::unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits) +{ + assert(nbits <= sizeof(uint8_t) << 3); + if (up) { + if (ctr < ((1 << nbits) - 1)) + ctr++; + } else { + if (ctr) + ctr--; + } +} + +// Bimodal prediction +bool +TAGEBase::getBimodePred(Addr pc, BranchInfo* bi) const +{ + return btablePrediction[bi->bimodalIndex]; +} + + +// Update the bimodal predictor: a hysteresis bit is shared among N prediction +// bits (N = 2 ^ logRatioBiModalHystEntries) +void +TAGEBase::baseUpdate(Addr pc, bool taken, BranchInfo* bi) +{ + int inter = (btablePrediction[bi->bimodalIndex] << 1) + + btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries]; + if (taken) { + if (inter < 3) + inter++; + } else if (inter > 0) { + inter--; + } + const bool pred = inter >> 1; + const bool hyst = inter & 1; + btablePrediction[bi->bimodalIndex] = pred; + btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries] = hyst; + DPRINTF(Tage, "Updating branch %lx, pred:%d, hyst:%d\n", pc, pred, hyst); +} + +// shifting the global history: we manage the history in a big table in order +// to reduce simulation time +void +TAGEBase::updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &pt) +{ + if (pt == 0) { + DPRINTF(Tage, "Rolling over the histories\n"); + // Copy beginning of globalHistoryBuffer to end, such that + // the last maxHist outcomes are still reachable + // through pt[0 .. maxHist - 1]. + for (int i = 0; i < maxHist; i++) + tab[histBufferSize - maxHist + i] = tab[i]; + pt = histBufferSize - maxHist; + h = &tab[pt]; + } + pt--; + h--; + h[0] = (dir) ? 1 : 0; +} + +void +TAGEBase::calculateIndicesAndTags(ThreadID tid, Addr branch_pc, + BranchInfo* bi) +{ + // computes the table addresses and the partial tags + for (int i = 1; i <= nHistoryTables; i++) { + tableIndices[i] = gindex(tid, branch_pc, i); + bi->tableIndices[i] = tableIndices[i]; + tableTags[i] = gtag(tid, branch_pc, i); + bi->tableTags[i] = tableTags[i]; + } +} + +unsigned +TAGEBase::getUseAltIdx(BranchInfo* bi) +{ + // There is only 1 counter on the base TAGE implementation + return 0; +} + +bool +TAGEBase::tagePredict(ThreadID tid, Addr branch_pc, + bool cond_branch, BranchInfo* bi) +{ + Addr pc = branch_pc; + bool pred_taken = true; + + if (cond_branch) { + // TAGE prediction + + calculateIndicesAndTags(tid, pc, bi); + + bi->bimodalIndex = bindex(pc); + + bi->hitBank = 0; + bi->altBank = 0; + //Look for the bank with longest matching history + for (int i = nHistoryTables; i > 0; i--) { + if (noSkip[i] && + gtable[i][tableIndices[i]].tag == tableTags[i]) { + bi->hitBank = i; + bi->hitBankIndex = tableIndices[bi->hitBank]; + break; + } + } + //Look for the alternate bank + for (int i = bi->hitBank - 1; i > 0; i--) { + if (noSkip[i] && + gtable[i][tableIndices[i]].tag == tableTags[i]) { + bi->altBank = i; + bi->altBankIndex = tableIndices[bi->altBank]; + break; + } + } + //computes the prediction and the alternate prediction + if (bi->hitBank > 0) { + if (bi->altBank > 0) { + bi->altTaken = + gtable[bi->altBank][tableIndices[bi->altBank]].ctr >= 0; + extraAltCalc(bi); + }else { + bi->altTaken = getBimodePred(pc, bi); + } + + bi->longestMatchPred = + gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr >= 0; + bi->pseudoNewAlloc = + abs(2 * gtable[bi->hitBank][bi->hitBankIndex].ctr + 1) <= 1; + + //if the entry is recognized as a newly allocated entry and + //useAltPredForNewlyAllocated is positive use the alternate + //prediction + if ((useAltPredForNewlyAllocated[getUseAltIdx(bi)] < 0) || + ! bi->pseudoNewAlloc) { + bi->tagePred = bi->longestMatchPred; + bi->provider = TAGE_LONGEST_MATCH; + } else { + bi->tagePred = bi->altTaken; + bi->provider = bi->altBank ? TAGE_ALT_MATCH + : BIMODAL_ALT_MATCH; + } + } else { + bi->altTaken = getBimodePred(pc, bi); + bi->tagePred = bi->altTaken; + bi->longestMatchPred = bi->altTaken; + bi->provider = BIMODAL_ONLY; + } + //end TAGE prediction + + pred_taken = (bi->tagePred); + DPRINTF(Tage, "Predict for %lx: taken?:%d, tagePred:%d, altPred:%d\n", + branch_pc, pred_taken, bi->tagePred, bi->altTaken); + } + bi->branchPC = branch_pc; + bi->condBranch = cond_branch; + return pred_taken; +} + +void +TAGEBase::adjustAlloc(bool & alloc, bool taken) +{ + // Nothing for this base class implementation +} + +void +TAGEBase::handleAllocAndUReset(bool alloc, bool taken, BranchInfo* bi, + int nrand) +{ + if (alloc) { + // is there some "unuseful" entry to allocate + uint8_t min = 1; + for (int i = nHistoryTables; i > bi->hitBank; i--) { + if (gtable[i][bi->tableIndices[i]].u < min) { + min = gtable[i][bi->tableIndices[i]].u; + } + } + + // we allocate an entry with a longer history + // to avoid ping-pong, we do not choose systematically the next + // entry, but among the 3 next entries + int Y = nrand & + ((ULL(1) << (nHistoryTables - bi->hitBank - 1)) - 1); + int X = bi->hitBank + 1; + if (Y & 1) { + X++; + if (Y & 2) + X++; + } + // No entry available, forces one to be available + if (min > 0) { + gtable[X][bi->tableIndices[X]].u = 0; + } + + + //Allocate entries + unsigned numAllocated = 0; + for (int i = X; i <= nHistoryTables; i++) { + if ((gtable[i][bi->tableIndices[i]].u == 0)) { + gtable[i][bi->tableIndices[i]].tag = bi->tableTags[i]; + gtable[i][bi->tableIndices[i]].ctr = (taken) ? 0 : -1; + ++numAllocated; + if (numAllocated == maxNumAlloc) { + break; + } + } + } + } + + tCounter++; + + handleUReset(); +} + +void +TAGEBase::handleUReset() +{ + //periodic reset of u: reset is not complete but bit by bit + if ((tCounter & ((ULL(1) << logUResetPeriod) - 1)) == 0) { + // reset least significant bit + // most significant bit becomes least significant bit + for (int i = 1; i <= nHistoryTables; i++) { + for (int j = 0; j < (ULL(1) << logTagTableSizes[i]); j++) { + resetUctr(gtable[i][j].u); + } + } + } +} + +void +TAGEBase::resetUctr(uint8_t & u) +{ + u >>= 1; +} + +void +TAGEBase::condBranchUpdate(ThreadID tid, Addr branch_pc, bool taken, + BranchInfo* bi, int nrand, Addr corrTarget) +{ + // TAGE UPDATE + // try to allocate a new entries only if prediction was wrong + bool alloc = (bi->tagePred != taken) && (bi->hitBank < nHistoryTables); + if (bi->hitBank > 0) { + // Manage the selection between longest matching and alternate + // matching for "pseudo"-newly allocated longest matching entry + bool PseudoNewAlloc = bi->pseudoNewAlloc; + // an entry is considered as newly allocated if its prediction + // counter is weak + if (PseudoNewAlloc) { + if (bi->longestMatchPred == taken) { + alloc = false; + } + // if it was delivering the correct prediction, no need to + // allocate new entry even if the overall prediction was false + if (bi->longestMatchPred != bi->altTaken) { + ctrUpdate(useAltPredForNewlyAllocated[getUseAltIdx(bi)], + bi->altTaken == taken, useAltOnNaBits); + } + } + } + + adjustAlloc(alloc, taken); + + handleAllocAndUReset(alloc, taken, bi, nrand); + + handleTAGEUpdate(branch_pc, taken, bi); +} + +void +TAGEBase::handleTAGEUpdate(Addr branch_pc, bool taken, BranchInfo* bi) +{ + if (bi->hitBank > 0) { + DPRINTF(Tage, "Updating tag table entry (%d,%d) for branch %lx\n", + bi->hitBank, bi->hitBankIndex, branch_pc); + ctrUpdate(gtable[bi->hitBank][bi->hitBankIndex].ctr, taken, + tagTableCounterBits); + // if the provider entry is not certified to be useful also update + // the alternate prediction + if (gtable[bi->hitBank][bi->hitBankIndex].u == 0) { + if (bi->altBank > 0) { + ctrUpdate(gtable[bi->altBank][bi->altBankIndex].ctr, taken, + tagTableCounterBits); + DPRINTF(Tage, "Updating tag table entry (%d,%d) for" + " branch %lx\n", bi->hitBank, bi->hitBankIndex, + branch_pc); + } + if (bi->altBank == 0) { + baseUpdate(branch_pc, taken, bi); + } + } + + // update the u counter + if (bi->tagePred != bi->altTaken) { + unsignedCtrUpdate(gtable[bi->hitBank][bi->hitBankIndex].u, + bi->tagePred == taken, tagTableUBits); + } + } else { + baseUpdate(branch_pc, taken, bi); + } +} + +void +TAGEBase::updateHistories(ThreadID tid, Addr branch_pc, bool taken, + BranchInfo* bi, bool speculative, + const StaticInstPtr &inst, Addr target) +{ + if (speculative != speculativeHistUpdate) { + return; + } + ThreadHistory& tHist = threadHistory[tid]; + // UPDATE HISTORIES + bool pathbit = ((branch_pc >> instShiftAmt) & 1); + //on a squash, return pointers to this and recompute indices. + //update user history + updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist); + tHist.pathHist = (tHist.pathHist << 1) + pathbit; + tHist.pathHist = (tHist.pathHist & ((ULL(1) << pathHistBits) - 1)); + + if (speculative) { + bi->ptGhist = tHist.ptGhist; + bi->pathHist = tHist.pathHist; + } + + //prepare next index and tag computations for user branchs + for (int i = 1; i <= nHistoryTables; i++) + { + if (speculative) { + bi->ci[i] = tHist.computeIndices[i].comp; + bi->ct0[i] = tHist.computeTags[0][i].comp; + bi->ct1[i] = tHist.computeTags[1][i].comp; + } + tHist.computeIndices[i].update(tHist.gHist); + tHist.computeTags[0][i].update(tHist.gHist); + tHist.computeTags[1][i].update(tHist.gHist); + } + DPRINTF(Tage, "Updating global histories with branch:%lx; taken?:%d, " + "path Hist: %x; pointer:%d\n", branch_pc, taken, tHist.pathHist, + tHist.ptGhist); + assert(threadHistory[tid].gHist == + &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]); +} + +void +TAGEBase::squash(ThreadID tid, bool taken, TAGEBase::BranchInfo *bi, + Addr target) +{ + if (!speculativeHistUpdate) { + /* If there are no speculative updates, no actions are needed */ + return; + } + + ThreadHistory& tHist = threadHistory[tid]; + DPRINTF(Tage, "Restoring branch info: %lx; taken? %d; PathHistory:%x, " + "pointer:%d\n", bi->branchPC,taken, bi->pathHist, bi->ptGhist); + tHist.pathHist = bi->pathHist; + tHist.ptGhist = bi->ptGhist; + tHist.gHist = &(tHist.globalHistory[tHist.ptGhist]); + tHist.gHist[0] = (taken ? 1 : 0); + for (int i = 1; i <= nHistoryTables; i++) { + tHist.computeIndices[i].comp = bi->ci[i]; + tHist.computeTags[0][i].comp = bi->ct0[i]; + tHist.computeTags[1][i].comp = bi->ct1[i]; + tHist.computeIndices[i].update(tHist.gHist); + tHist.computeTags[0][i].update(tHist.gHist); + tHist.computeTags[1][i].update(tHist.gHist); + } +} + +void +TAGEBase::extraAltCalc(BranchInfo* bi) +{ + // do nothing. This is only used in some derived classes + return; +} + +int +TAGEBase::getRandom() +{ + return random_mt.random(); +} + +void +TAGEBase::updateStats(bool taken, BranchInfo* bi) +{ + if (taken == bi->tagePred) { + // correct prediction + switch (bi->provider) { + case BIMODAL_ONLY: tageBimodalProviderCorrect++; break; + case TAGE_LONGEST_MATCH: tageLongestMatchProviderCorrect++; break; + case BIMODAL_ALT_MATCH: bimodalAltMatchProviderCorrect++; break; + case TAGE_ALT_MATCH: tageAltMatchProviderCorrect++; break; + } + } else { + // wrong prediction + switch (bi->provider) { + case BIMODAL_ONLY: tageBimodalProviderWrong++; break; + case TAGE_LONGEST_MATCH: + tageLongestMatchProviderWrong++; + if (bi->altTaken == taken) { + tageAltMatchProviderWouldHaveHit++; + } + break; + case BIMODAL_ALT_MATCH: + bimodalAltMatchProviderWrong++; + break; + case TAGE_ALT_MATCH: + tageAltMatchProviderWrong++; + break; + } + + switch (bi->provider) { + case BIMODAL_ALT_MATCH: + case TAGE_ALT_MATCH: + if (bi->longestMatchPred == taken) { + tageLongestMatchProviderWouldHaveHit++; + } + } + } + + switch (bi->provider) { + case TAGE_LONGEST_MATCH: + case TAGE_ALT_MATCH: + tageLongestMatchProvider[bi->hitBank]++; + tageAltMatchProvider[bi->altBank]++; + break; + } +} + +unsigned +TAGEBase::getGHR(ThreadID tid, BranchInfo *bi) const +{ + unsigned val = 0; + for (unsigned i = 0; i < 32; i++) { + // Make sure we don't go out of bounds + int gh_offset = bi->ptGhist + i; + assert(&(threadHistory[tid].globalHistory[gh_offset]) < + threadHistory[tid].globalHistory + histBufferSize); + val |= ((threadHistory[tid].globalHistory[gh_offset] & 0x1) << i); + } + + return val; +} + +void +TAGEBase::regStats() +{ + tageLongestMatchProviderCorrect + .name(name() + ".tageLongestMatchProviderCorrect") + .desc("Number of times TAGE Longest Match is the provider and " + "the prediction is correct"); + + tageAltMatchProviderCorrect + .name(name() + ".tageAltMatchProviderCorrect") + .desc("Number of times TAGE Alt Match is the provider and " + "the prediction is correct"); + + bimodalAltMatchProviderCorrect + .name(name() + ".bimodalAltMatchProviderCorrect") + .desc("Number of times TAGE Alt Match is the bimodal and it is the " + "provider and the prediction is correct"); + + tageBimodalProviderCorrect + .name(name() + ".tageBimodalProviderCorrect") + .desc("Number of times there are no hits on the TAGE tables " + "and the bimodal prediction is correct"); + + tageLongestMatchProviderWrong + .name(name() + ".tageLongestMatchProviderWrong") + .desc("Number of times TAGE Longest Match is the provider and " + "the prediction is wrong"); + + tageAltMatchProviderWrong + .name(name() + ".tageAltMatchProviderWrong") + .desc("Number of times TAGE Alt Match is the provider and " + "the prediction is wrong"); + + bimodalAltMatchProviderWrong + .name(name() + ".bimodalAltMatchProviderWrong") + .desc("Number of times TAGE Alt Match is the bimodal and it is the " + "provider and the prediction is wrong"); + + tageBimodalProviderWrong + .name(name() + ".tageBimodalProviderWrong") + .desc("Number of times there are no hits on the TAGE tables " + "and the bimodal prediction is wrong"); + + tageAltMatchProviderWouldHaveHit + .name(name() + ".tageAltMatchProviderWouldHaveHit") + .desc("Number of times TAGE Longest Match is the provider, " + "the prediction is wrong and Alt Match prediction was correct"); + + tageLongestMatchProviderWouldHaveHit + .name(name() + ".tageLongestMatchProviderWouldHaveHit") + .desc("Number of times TAGE Alt Match is the provider, the " + "prediction is wrong and Longest Match prediction was correct"); + + tageLongestMatchProvider + .init(nHistoryTables + 1) + .name(name() + ".tageLongestMatchProvider") + .desc("TAGE provider for longest match"); + + tageAltMatchProvider + .init(nHistoryTables + 1) + .name(name() + ".tageAltMatchProvider") + .desc("TAGE provider for alt match"); +} + +int8_t +TAGEBase::getCtr(int hitBank, int hitBankIndex) const +{ + return gtable[hitBank][hitBankIndex].ctr; +} + +unsigned +TAGEBase::getTageCtrBits() const +{ + return tagTableCounterBits; +} + +int +TAGEBase::getPathHist(ThreadID tid) const +{ + return threadHistory[tid].pathHist; +} + +bool +TAGEBase::isSpeculativeUpdateEnabled() const +{ + return speculativeHistUpdate; +} + +TAGEBase* +TAGEBaseParams::create() +{ + return new TAGEBase(this); +} diff --git a/src/cpu/pred/tage_base.hh b/src/cpu/pred/tage_base.hh new file mode 100644 index 000000000..ea1d827cb --- /dev/null +++ b/src/cpu/pred/tage_base.hh @@ -0,0 +1,500 @@ +/* + * Copyright (c) 2014 The University of Wisconsin + * + * Copyright (c) 2006 INRIA (Institut National de Recherche en + * Informatique et en Automatique / French National Research Institute + * for Computer Science and Applied Mathematics) + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais, + * from André Seznec's code. + */ + +/* @file + * Implementation of a TAGE branch predictor. TAGE is a global-history based + * branch predictor. It features a PC-indexed bimodal predictor and N + * partially tagged tables, indexed with a hash of the PC and the global + * branch history. The different lengths of global branch history used to + * index the partially tagged tables grow geometrically. A small path history + * is also used in the hash. + * + * All TAGE tables are accessed in parallel, and the one using the longest + * history that matches provides the prediction (some exceptions apply). + * Entries are allocated in components using a longer history than the + * one that predicted when the prediction is incorrect. + */ + +#ifndef __CPU_PRED_TAGE_BASE +#define __CPU_PRED_TAGE_BASE + +#include + +#include "base/statistics.hh" +#include "cpu/static_inst.hh" +#include "params/TAGEBase.hh" +#include "sim/sim_object.hh" + +class TAGEBase : public SimObject +{ + public: + TAGEBase(const TAGEBaseParams *p); + void regStats() override; + void init() override; + + protected: + // Prediction Structures + + // Tage Entry + struct TageEntry + { + int8_t ctr; + uint16_t tag; + uint8_t u; + TageEntry() : ctr(0), tag(0), u(0) { } + }; + + // Folded History Table - compressed history + // to mix with instruction PC to index partially + // tagged tables. + struct FoldedHistory + { + unsigned comp; + int compLength; + int origLength; + int outpoint; + int bufferSize; + + FoldedHistory() + { + comp = 0; + } + + void init(int original_length, int compressed_length) + { + origLength = original_length; + compLength = compressed_length; + outpoint = original_length % compressed_length; + } + + void update(uint8_t * h) + { + comp = (comp << 1) | h[0]; + comp ^= h[origLength] << outpoint; + comp ^= (comp >> compLength); + comp &= (ULL(1) << compLength) - 1; + } + }; + + public: + + // provider type + enum { + BIMODAL_ONLY = 0, + TAGE_LONGEST_MATCH, + BIMODAL_ALT_MATCH, + TAGE_ALT_MATCH, + LAST_TAGE_PROVIDER_TYPE = TAGE_ALT_MATCH + }; + + // Primary branch history entry + struct BranchInfo + { + int pathHist; + int ptGhist; + int hitBank; + int hitBankIndex; + int altBank; + int altBankIndex; + int bimodalIndex; + + bool tagePred; + bool altTaken; + bool condBranch; + bool longestMatchPred; + bool pseudoNewAlloc; + Addr branchPC; + + // Pointer to dynamically allocated storage + // to save table indices and folded histories. + // To do one call to new instead of five. + int *storage; + + // Pointers to actual saved array within the dynamically + // allocated storage. + int *tableIndices; + int *tableTags; + int *ci; + int *ct0; + int *ct1; + + // for stats purposes + unsigned provider; + + BranchInfo(const TAGEBase &tage) + : pathHist(0), ptGhist(0), + hitBank(0), hitBankIndex(0), + altBank(0), altBankIndex(0), + bimodalIndex(0), + tagePred(false), altTaken(false), + condBranch(false), longestMatchPred(false), + pseudoNewAlloc(false), branchPC(0), + provider(-1) + { + int sz = tage.nHistoryTables + 1; + storage = new int [sz * 5]; + tableIndices = storage; + tableTags = storage + sz; + ci = tableTags + sz; + ct0 = ci + sz; + ct1 = ct0 + sz; + } + + virtual ~BranchInfo() + { + delete[] storage; + } + }; + + virtual BranchInfo *makeBranchInfo(); + + /** + * Computes the index used to access the + * bimodal table. + * @param pc_in The unshifted branch PC. + */ + virtual int bindex(Addr pc_in) const; + + /** + * Computes the index used to access a + * partially tagged table. + * @param tid The thread ID used to select the + * global histories to use. + * @param pc The unshifted branch PC. + * @param bank The partially tagged table to access. + */ + virtual int gindex(ThreadID tid, Addr pc, int bank) const; + + /** + * Utility function to shuffle the path history + * depending on which tagged table we are accessing. + * @param phist The path history. + * @param size Number of path history bits to use. + * @param bank The partially tagged table to access. + */ + virtual int F(int phist, int size, int bank) const; + + /** + * Computes the partial tag of a tagged table. + * @param tid the thread ID used to select the + * global histories to use. + * @param pc The unshifted branch PC. + * @param bank The partially tagged table to access. + */ + virtual uint16_t gtag(ThreadID tid, Addr pc, int bank) const; + + /** + * Updates a direction counter based on the actual + * branch outcome. + * @param ctr Reference to counter to update. + * @param taken Actual branch outcome. + * @param nbits Counter width. + */ + template + static void ctrUpdate(T & ctr, bool taken, int nbits); + + /** + * Updates an unsigned counter based on up/down parameter + * @param ctr Reference to counter to update. + * @param up Boolean indicating if the counter is incremented/decremented + * If true it is incremented, if false it is decremented + * @param nbits Counter width. + */ + static void unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits); + + /** + * Get a branch prediction from the bimodal + * predictor. + * @param pc The unshifted branch PC. + * @param bi Pointer to information on the + * prediction. + */ + virtual bool getBimodePred(Addr pc, BranchInfo* bi) const; + + /** + * Updates the bimodal predictor. + * @param pc The unshifted branch PC. + * @param taken The actual branch outcome. + * @param bi Pointer to information on the prediction + * recorded at prediction time. + */ + void baseUpdate(Addr pc, bool taken, BranchInfo* bi); + + /** + * (Speculatively) updates the global branch history. + * @param h Reference to pointer to global branch history. + * @param dir (Predicted) outcome to update the histories + * with. + * @param tab + * @param PT Reference to path history. + */ + void updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &PT); + + /** + * Update TAGE. Called at execute to repair histories on a misprediction + * and at commit to update the tables. + * @param tid The thread ID to select the global + * histories to use. + * @param branch_pc The unshifted branch PC. + * @param taken Actual branch outcome. + * @param bi Pointer to information on the prediction + * recorded at prediction time. + */ + void update(ThreadID tid, Addr branch_pc, bool taken, BranchInfo* bi); + + /** + * (Speculatively) updates global histories (path and direction). + * Also recomputes compressed (folded) histories based on the + * branch direction. + * @param tid The thread ID to select the histories + * to update. + * @param branch_pc The unshifted branch PC. + * @param taken (Predicted) branch direction. + * @param b Wrapping pointer to BranchInfo (to allow + * storing derived class prediction information in the + * base class). + */ + virtual void updateHistories( + ThreadID tid, Addr branch_pc, bool taken, BranchInfo* b, + bool speculative, + const StaticInstPtr & inst = StaticInst::nullStaticInstPtr, + Addr target = MaxAddr); + + /** + * Restores speculatively updated path and direction histories. + * Also recomputes compressed (folded) histories based on the + * correct branch outcome. + * This version of squash() is called once on a branch misprediction. + * @param tid The Thread ID to select the histories to rollback. + * @param taken The correct branch outcome. + * @param bp_history Wrapping pointer to BranchInfo (to allow + * storing derived class prediction information in the + * base class). + * @param target The correct branch target + * @post bp_history points to valid memory. + */ + virtual void squash( + ThreadID tid, bool taken, BranchInfo *bi, Addr target); + + /** + * Update TAGE for conditional branches. + * @param branch_pc The unshifted branch PC. + * @param taken Actual branch outcome. + * @param bi Pointer to information on the prediction + * recorded at prediction time. + * @nrand Random int number from 0 to 3 + * @param corrTarget The correct branch target + */ + virtual void condBranchUpdate( + ThreadID tid, Addr branch_pc, bool taken, BranchInfo* bi, + int nrand, Addr corrTarget); + + /** + * TAGE prediction called from TAGE::predict + * @param tid The thread ID to select the global + * histories to use. + * @param branch_pc The unshifted branch PC. + * @param cond_branch True if the branch is conditional. + * @param bi Pointer to the BranchInfo + */ + bool tagePredict( + ThreadID tid, Addr branch_pc, bool cond_branch, BranchInfo* bi); + + /** + * Update the stats + * @param taken Actual branch outcome + * @param bi Pointer to information on the prediction + * recorded at prediction time. + */ + virtual void updateStats(bool taken, BranchInfo* bi); + + /** + * Instantiates the TAGE table entries + */ + virtual void buildTageTables(); + + /** + * Calculates the history lengths + * and some other paramters in derived classes + */ + virtual void calculateParameters(); + + /** + * On a prediction, calculates the TAGE indices and tags for + * all the different history lengths + */ + virtual void calculateIndicesAndTags( + ThreadID tid, Addr branch_pc, BranchInfo* bi); + + /** + * Calculation of the index for useAltPredForNewlyAllocated + * On this base TAGE implementation it is always 0 + */ + virtual unsigned getUseAltIdx(BranchInfo* bi); + + /** + * Extra calculation to tell whether TAGE allocaitons may happen or not + * on an update + * For this base TAGE implementation it does nothing + */ + virtual void adjustAlloc(bool & alloc, bool taken); + + /** + * Handles Allocation and U bits reset on an update + */ + virtual void handleAllocAndUReset( + bool alloc, bool taken, BranchInfo* bi, int nrand); + + /** + * Handles the U bits reset + */ + virtual void handleUReset(); + + /** + * Handles the update of the TAGE entries + */ + virtual void handleTAGEUpdate( + Addr branch_pc, bool taken, BranchInfo* bi); + + /** + * Algorithm for resetting a single U counter + */ + virtual void resetUctr(uint8_t & u); + + /** + * Extra steps for calculating altTaken + * For this base TAGE class it does nothing + */ + virtual void extraAltCalc(BranchInfo* bi); + + /** + * Algorithm for returning a random number + * This base TAGE class just uses random_mt, but some derived classes + * may want to use a more realistic implementation or force some values + */ + static int getRandom(); + + void btbUpdate(ThreadID tid, Addr branch_addr, BranchInfo* &bi); + unsigned getGHR(ThreadID tid, BranchInfo *bi) const; + int8_t getCtr(int hitBank, int hitBankIndex) const; + unsigned getTageCtrBits() const; + int getPathHist(ThreadID tid) const; + bool isSpeculativeUpdateEnabled() const; + + protected: + const unsigned logRatioBiModalHystEntries; + const unsigned nHistoryTables; + const unsigned tagTableCounterBits; + const unsigned tagTableUBits; + const unsigned histBufferSize; + const unsigned minHist; + const unsigned maxHist; + const unsigned pathHistBits; + + std::vector tagTableTagWidths; + std::vector logTagTableSizes; + + std::vector btablePrediction; + std::vector btableHysteresis; + TageEntry **gtable; + + // Keep per-thread histories to + // support SMT. + struct ThreadHistory { + // Speculative path history + // (LSB of branch address) + int pathHist; + + // Speculative branch direction + // history (circular buffer) + // @TODO Convert to std::vector + uint8_t *globalHistory; + + // Pointer to most recent branch outcome + uint8_t* gHist; + + // Index to most recent branch outcome + int ptGhist; + + // Speculative folded histories. + FoldedHistory *computeIndices; + FoldedHistory *computeTags[2]; + }; + + std::vector threadHistory; + + /** + * Initialization of the folded histories + */ + virtual void initFoldedHistories(ThreadHistory & history); + + int *histLengths; + int *tableIndices; + int *tableTags; + + std::vector useAltPredForNewlyAllocated; + int64_t tCounter; + uint64_t logUResetPeriod; + unsigned numUseAltOnNa; + unsigned useAltOnNaBits; + unsigned maxNumAlloc; + + // Tells which tables are active + // (for the base TAGE implementation all are active) + // Some other classes use this for handling associativity + std::vector noSkip; + + const bool speculativeHistUpdate; + + const unsigned instShiftAmt; + + // stats + Stats::Scalar tageLongestMatchProviderCorrect; + Stats::Scalar tageAltMatchProviderCorrect; + Stats::Scalar bimodalAltMatchProviderCorrect; + Stats::Scalar tageBimodalProviderCorrect; + Stats::Scalar tageLongestMatchProviderWrong; + Stats::Scalar tageAltMatchProviderWrong; + Stats::Scalar bimodalAltMatchProviderWrong; + Stats::Scalar tageBimodalProviderWrong; + Stats::Scalar tageAltMatchProviderWouldHaveHit; + Stats::Scalar tageLongestMatchProviderWouldHaveHit; + + Stats::Vector tageLongestMatchProvider; + Stats::Vector tageAltMatchProvider; +}; + +#endif // __CPU_PRED_TAGE_BASE diff --git a/src/cpu/pred/tournament.cc b/src/cpu/pred/tournament.cc index 276338151..6ed208b1a 100644 --- a/src/cpu/pred/tournament.cc +++ b/src/cpu/pred/tournament.cc @@ -266,7 +266,8 @@ TournamentBP::uncondBranch(ThreadID tid, Addr pc, void * &bp_history) void TournamentBP::update(ThreadID tid, Addr branch_addr, bool taken, - void *bp_history, bool squashed) + void *bp_history, bool squashed, + const StaticInstPtr & inst, Addr corrTarget) { assert(bp_history); diff --git a/src/cpu/pred/tournament.hh b/src/cpu/pred/tournament.hh index c4c092687..e1e50816e 100644 --- a/src/cpu/pred/tournament.hh +++ b/src/cpu/pred/tournament.hh @@ -101,9 +101,12 @@ class TournamentBP : public BPredUnit * when the branch was predicted. * @param squashed is set when this function is called during a squash * operation. + * @param inst Static instruction information + * @param corrTarget Resolved target of the branch (only needed if + * squashed) */ void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history, - bool squashed); + bool squashed, const StaticInstPtr & inst, Addr corrTarget); /** * Restores the global branch history on a squash. -- 2.30.2