From 7e48e92888879b7ab81fba82c09ff6f662bb024d Mon Sep 17 00:00:00 2001 From: Pau Cabre Date: Sun, 11 Nov 2018 23:43:33 +0100 Subject: [PATCH] cpu: Fixes on the loop predictor part of LTAGE Fixed the following fields of the loop predictor entries as described on the LTAGE paper: - Age counter (it was 3 bits and it should be 8 bits) - Tag (it was 16 bits and it should be 14 bits). Also some times it used int variables and some times uint16_t, leading to wrong behaviour - Confidence counter (it was 2 bits ins some parts of the code and 3 bits in some other parts. It should be 2 bits) - Iteration counters (they were 16 bits and they should be 14 bits) All the new sizes are now configurable Change-Id: I8884c7454c1e510b65160eb4d5749d3259d34096 Signed-off-by: Pau Cabre Reviewed-on: https://gem5-review.googlesource.com/c/14216 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power --- src/cpu/pred/BranchPredictor.py | 6 +++++ src/cpu/pred/ltage.cc | 47 ++++++++++++++++++++++++++------- src/cpu/pred/ltage.hh | 19 ++++++++++++- 3 files changed, 61 insertions(+), 11 deletions(-) diff --git a/src/cpu/pred/BranchPredictor.py b/src/cpu/pred/BranchPredictor.py index 1eeecde95..a42819419 100644 --- a/src/cpu/pred/BranchPredictor.py +++ b/src/cpu/pred/BranchPredictor.py @@ -106,3 +106,9 @@ class LTAGE(BranchPredictor): maxHist = Param.Unsigned(640, "Maximum history size of LTAGE") minTagWidth = Param.Unsigned(7, "Minimum tag size in tag tables") + loopTableAgeBits = Param.Unsigned(8, "Number of age bits per loop entry") + loopTableConfidenceBits = Param.Unsigned(2, + "Number of confidence bits per loop entry") + loopTableTagBits = Param.Unsigned(14, "Number of tag bits per loop entry") + loopTableIterBits = Param.Unsigned(14, "Nuber of iteration bits per loop") + diff --git a/src/cpu/pred/ltage.cc b/src/cpu/pred/ltage.cc index 8d20d5056..85ae2b304 100644 --- a/src/cpu/pred/ltage.cc +++ b/src/cpu/pred/ltage.cc @@ -59,8 +59,20 @@ LTAGE::LTAGE(const LTAGEParams *params) minHist(params->minHist), maxHist(params->maxHist), minTagWidth(params->minTagWidth), + loopTableAgeBits(params->loopTableAgeBits), + loopTableConfidenceBits(params->loopTableConfidenceBits), + loopTableTagBits(params->loopTableTagBits), + loopTableIterBits(params->loopTableIterBits), + confidenceThreshold((1 << loopTableConfidenceBits) - 1), + loopTagMask((1 << loopTableTagBits) - 1), + loopNumIterMask((1 << loopTableIterBits) - 1), threadHistory(params->numThreads) { + // we use uint16_t type for these vales, so they cannot be more than + // 16 bits + assert(loopTableTagBits <= 16); + assert(loopTableIterBits <= 16); + assert(params->histBufferSize > params->maxHist * 2); useAltPredForNewlyAllocated = 0; logTick = 19; @@ -212,6 +224,20 @@ LTAGE::ctrUpdate(int8_t & ctr, bool taken, int nbits) } } +// Up-down unsigned saturating counter +void +LTAGE::unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits) +{ + assert(nbits <= sizeof(uint8_t) << 3); + if (up) { + if (ctr < ((1 << nbits) - 1)) + ctr++; + } else { + if (ctr) + ctr--; + } +} + // Bimodal prediction bool LTAGE::getBimodePred(Addr pc, BranchInfo* bi) const @@ -248,12 +274,13 @@ LTAGE::getLoop(Addr pc, BranchInfo* bi) const bi->loopHit = -1; bi->loopPredValid = false; bi->loopIndex = lindex(pc); - bi->loopTag = ((pc) >> (instShiftAmt + logSizeLoopPred - 2)); + bi->loopTag = ((pc) >> (instShiftAmt + logSizeLoopPred - 2)) & loopTagMask; for (int i = 0; i < 4; i++) { if (ltable[bi->loopIndex + i].tag == bi->loopTag) { bi->loopHit = i; - bi->loopPredValid = (ltable[bi->loopIndex + i].confidence >= 3); + bi->loopPredValid = + ltable[bi->loopIndex + i].confidence == confidenceThreshold; bi->currentIter = ltable[bi->loopIndex + i].currentIterSpec; if (ltable[bi->loopIndex + i].currentIterSpec + 1 == ltable[bi->loopIndex + i].numIter) { @@ -274,7 +301,8 @@ LTAGE::specLoopUpdate(Addr pc, bool taken, BranchInfo* bi) if (taken != ltable[index].dir) { ltable[index].currentIterSpec = 0; } else { - ltable[index].currentIterSpec++; + ltable[index].currentIterSpec = + (ltable[index].currentIterSpec + 1) & loopNumIterMask; } } } @@ -295,12 +323,12 @@ LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi) return; } else if (bi->loopPred != bi->tagePred) { DPRINTF(LTage, "Loop Prediction success:%lx\n",pc); - if (ltable[idx].age < 7) - ltable[idx].age++; + unsignedCtrUpdate(ltable[idx].age, true, loopTableAgeBits); } } - ltable[idx].currentIter++; + ltable[idx].currentIter = + (ltable[idx].currentIter + 1) & loopNumIterMask; if (ltable[idx].currentIter > ltable[idx].numIter) { ltable[idx].confidence = 0; if (ltable[idx].numIter != 0) { @@ -315,9 +343,8 @@ LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi) if (ltable[idx].currentIter == ltable[idx].numIter) { DPRINTF(LTage, "Loop End predicted successfully:%lx\n", pc); - if (ltable[idx].confidence < 7) { - ltable[idx].confidence++; - } + unsignedCtrUpdate(ltable[idx].confidence, true, + loopTableConfidenceBits); //just do not predict when the loop count is 1 or 2 if (ltable[idx].numIter < 3) { // free the entry @@ -355,7 +382,7 @@ LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi) ltable[idx].dir = !taken; ltable[idx].tag = bi->loopTag; ltable[idx].numIter = 0; - ltable[idx].age = 7; + ltable[idx].age = (1 << loopTableAgeBits) - 1; ltable[idx].confidence = 0; ltable[idx].currentIter = 1; break; diff --git a/src/cpu/pred/ltage.hh b/src/cpu/pred/ltage.hh index a810fb5fd..2119156b7 100644 --- a/src/cpu/pred/ltage.hh +++ b/src/cpu/pred/ltage.hh @@ -135,7 +135,7 @@ class LTAGE: public BPredUnit int altBank; int altBankIndex; int bimodalIndex; - int loopTag; + uint16_t loopTag; uint16_t currentIter; bool tagePred; @@ -237,6 +237,15 @@ class LTAGE: public BPredUnit */ void ctrUpdate(int8_t & ctr, bool taken, int nbits); + /** + * Updates an unsigned counter based on up/down parameter + * @param ctr Reference to counter to update. + * @param up Boolean indicating if the counter is incremented/decremented + * If true it is incremented, if false it is decremented + * @param nbits Counter width. + */ + void unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits); + /** * Get a branch prediction from the bimodal * predictor. @@ -355,6 +364,14 @@ class LTAGE: public BPredUnit const unsigned minHist; const unsigned maxHist; const unsigned minTagWidth; + const unsigned loopTableAgeBits; + const unsigned loopTableConfidenceBits; + const unsigned loopTableTagBits; + const unsigned loopTableIterBits; + + const uint8_t confidenceThreshold; + const uint16_t loopTagMask; + const uint16_t loopNumIterMask; std::vector btablePrediction; std::vector btableHysteresis; -- 2.30.2