cpu: split LTAGE implementation into a base TAGE and a derived LTAGE
[gem5.git] / src / cpu / pred / ltage.cc
index 86b9e99932549ee04d1c0ac5b78c75b0bcaa5dee..3be01936e98b8a1b8af0b358897e4e165b211a6c 100644 (file)
 #include "debug/LTage.hh"
 
 LTAGE::LTAGE(const LTAGEParams *params)
-  : BPredUnit(params),
-    logSizeBiMP(params->logSizeBiMP),
-    logSizeTagTables(params->logSizeTagTables),
+  : TAGE(params),
     logSizeLoopPred(params->logSizeLoopPred),
-    nHistoryTables(params->nHistoryTables),
-    tagTableCounterBits(params->tagTableCounterBits),
-    histBufferSize(params->histBufferSize),
-    minHist(params->minHist),
-    maxHist(params->maxHist),
-    minTagWidth(params->minTagWidth),
-    threadHistory(params->numThreads)
+    loopTableAgeBits(params->loopTableAgeBits),
+    loopTableConfidenceBits(params->loopTableConfidenceBits),
+    loopTableTagBits(params->loopTableTagBits),
+    loopTableIterBits(params->loopTableIterBits),
+    logLoopTableAssoc(params->logLoopTableAssoc),
+    confidenceThreshold((1 << loopTableConfidenceBits) - 1),
+    loopTagMask((1 << loopTableTagBits) - 1),
+    loopNumIterMask((1 << loopTableIterBits) - 1),
+    loopUseCounter(0),
+    withLoopBits(params->withLoopBits)
 {
-    assert(params->histBufferSize > params->maxHist * 2);
-    useAltPredForNewlyAllocated = 0;
-    logTick = 19;
-    tCounter = ULL(1) << (logTick - 1);
-
-    for (auto& history : threadHistory) {
-        history.pathHist = 0;
-        history.globalHistory = new uint8_t[histBufferSize];
-        history.gHist = history.globalHistory;
-        memset(history.gHist, 0, histBufferSize);
-        history.ptGhist = 0;
-    }
-
-    histLengths = new int [nHistoryTables+1];
-    histLengths[1] = minHist;
-    histLengths[nHistoryTables] = maxHist;
+    // we use uint16_t type for these vales, so they cannot be more than
+    // 16 bits
+    assert(loopTableTagBits <= 16);
+    assert(loopTableIterBits <= 16);
 
-    for (int i = 2; i <= nHistoryTables; i++) {
-        histLengths[i] = (int) (((double) minHist *
-                    pow ((double) (maxHist) / (double) minHist,
-                        (double) (i - 1) / (double) ((nHistoryTables- 1))))
-                    + 0.5);
-    }
+    assert(logSizeLoopPred >= logLoopTableAssoc);
 
-    tagWidths[1] = minTagWidth;
-    tagWidths[2] = minTagWidth;
-    tagWidths[3] = minTagWidth + 1;
-    tagWidths[4] = minTagWidth + 1;
-    tagWidths[5] = minTagWidth + 2;
-    tagWidths[6] = minTagWidth + 3;
-    tagWidths[7] = minTagWidth + 4;
-    tagWidths[8] = minTagWidth + 5;
-    tagWidths[9] = minTagWidth + 5;
-    tagWidths[10] = minTagWidth + 6;
-    tagWidths[11] = minTagWidth + 7;
-    tagWidths[12] = minTagWidth + 8;
-
-    for (int i = 1; i <= 2; i++)
-        tagTableSizes[i] = logSizeTagTables - 1;
-    for (int i = 3; i <= 6; i++)
-        tagTableSizes[i] = logSizeTagTables;
-    for (int i = 7; i <= 10; i++)
-        tagTableSizes[i] = logSizeTagTables - 1;
-    for (int i = 11; i <= 12; i++)
-        tagTableSizes[i] = logSizeTagTables - 2;
-
-    for (auto& history : threadHistory) {
-        history.computeIndices = new FoldedHistory[nHistoryTables+1];
-        history.computeTags[0] = new FoldedHistory[nHistoryTables+1];
-        history.computeTags[1] = new FoldedHistory[nHistoryTables+1];
-
-        for (int i = 1; i <= nHistoryTables; i++) {
-            history.computeIndices[i].init(histLengths[i], (tagTableSizes[i]));
-            history.computeTags[0][i].init(
-                history.computeIndices[i].origLength, tagWidths[i]);
-            history.computeTags[1][i].init(
-                history.computeIndices[i].origLength, tagWidths[i] - 1);
-            DPRINTF(LTage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n",
-                    histLengths[i], tagTableSizes[i], tagWidths[i]);
-        }
-    }
-
-    btable = new BimodalEntry[ULL(1) << logSizeBiMP];
     ltable = new LoopEntry[ULL(1) << logSizeLoopPred];
-    gtable = new TageEntry*[nHistoryTables + 1];
-    for (int i = 1; i <= nHistoryTables; i++) {
-        gtable[i] = new TageEntry[1<<(tagTableSizes[i])];
-    }
-
-    tableIndices = new int [nHistoryTables+1];
-    tableTags = new int [nHistoryTables+1];
-
-    loopUseCounter = 0;
-}
-
-int
-LTAGE::bindex(Addr pc_in) const
-{
-    return ((pc_in >> instShiftAmt) & ((ULL(1) << (logSizeBiMP)) - 1));
 }
 
 int
 LTAGE::lindex(Addr pc_in) const
 {
-    return (((pc_in >> instShiftAmt) &
-             ((ULL(1) << (logSizeLoopPred - 2)) - 1)) << 2);
-}
-
-int
-LTAGE::F(int A, int size, int bank) const
-{
-    int A1, A2;
-
-    A = A & ((ULL(1) << size) - 1);
-    A1 = (A & ((ULL(1) << tagTableSizes[bank]) - 1));
-    A2 = (A >> tagTableSizes[bank]);
-    A2 = ((A2 << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
-       + (A2 >> (tagTableSizes[bank] - bank));
-    A = A1 ^ A2;
-    A = ((A << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
-      + (A >> (tagTableSizes[bank] - bank));
-    return (A);
-}
-
-
-// gindex computes a full hash of pc, ghist and pathHist
-int
-LTAGE::gindex(ThreadID tid, Addr pc, int bank) const
-{
-    int index;
-    int hlen = (histLengths[bank] > 16) ? 16 : histLengths[bank];
-    index =
-        (pc >> instShiftAmt) ^
-        ((pc >> instShiftAmt) >> ((int) abs(tagTableSizes[bank] - bank) + 1)) ^
-        threadHistory[tid].computeIndices[bank].comp ^
-        F(threadHistory[tid].pathHist, hlen, bank);
-
-    return (index & ((ULL(1) << (tagTableSizes[bank])) - 1));
-}
-
-
-// Tag computation
-uint16_t
-LTAGE::gtag(ThreadID tid, Addr pc, int bank) const
-{
-    int tag = (pc >> instShiftAmt) ^
-              threadHistory[tid].computeTags[0][bank].comp ^
-              (threadHistory[tid].computeTags[1][bank].comp << 1);
-
-    return (tag & ((ULL(1) << tagWidths[bank]) - 1));
-}
-
-
-// Up-down saturating counter
-void
-LTAGE::ctrUpdate(int8_t & ctr, bool taken, int nbits)
-{
-    assert(nbits <= sizeof(int8_t) << 3);
-    if (taken) {
-        if (ctr < ((1 << (nbits - 1)) - 1))
-            ctr++;
-    } else {
-        if (ctr > -(1 << (nbits - 1)))
-            ctr--;
-    }
-}
-
-// Bimodal prediction
-bool
-LTAGE::getBimodePred(Addr pc, BranchInfo* bi) const
-{
-    return (btable[bi->bimodalIndex].pred > 0);
+    // The loop table is implemented as a linear table
+    // If associativity is N (N being 1 << logLoopTableAssoc),
+    // the first N entries are for set 0, the next N entries are for set 1,
+    // and so on.
+    // Thus, this function calculates the set and then it gets left shifted
+    // by logLoopTableAssoc in order to return the index of the first of the
+    // N entries of the set
+    Addr mask = (ULL(1) << (logSizeLoopPred - logLoopTableAssoc)) - 1;
+    return (((pc_in >> instShiftAmt) & mask) << logLoopTableAssoc);
 }
 
-
-// Update the bimodal predictor: a hysteresis bit is shared among 4 prediction
-// bits
-void
-LTAGE::baseUpdate(Addr pc, bool taken, BranchInfo* bi)
-{
-    int inter = (btable[bi->bimodalIndex].pred << 1)
-              + btable[bi->bimodalIndex ].hyst;
-    if (taken) {
-        if (inter < 3)
-            inter++;
-    } else if (inter > 0) {
-        inter--;
-    }
-    btable[bi->bimodalIndex].pred = inter >> 1;
-    btable[bi->bimodalIndex].hyst = (inter & 1);
-    DPRINTF(LTage, "Updating branch %lx, pred:%d, hyst:%d\n",
-            pc, btable[bi->bimodalIndex].pred,btable[bi->bimodalIndex].hyst);
-}
-
-
 //loop prediction: only used if high confidence
 bool
-LTAGE::getLoop(Addr pc, BranchInfo* bi) const
+LTAGE::getLoop(Addr pc, LTageBranchInfo* bi) const
 {
     bi->loopHit = -1;
     bi->loopPredValid = false;
     bi->loopIndex = lindex(pc);
-    bi->loopTag = ((pc) >> (instShiftAmt + logSizeLoopPred - 2));
+    unsigned pcShift = instShiftAmt + logSizeLoopPred - logLoopTableAssoc;
+    bi->loopTag = ((pc) >> pcShift) & loopTagMask;
 
-    for (int i = 0; i < 4; i++) {
+    for (int i = 0; i < (1 << logLoopTableAssoc); i++) {
         if (ltable[bi->loopIndex + i].tag == bi->loopTag) {
             bi->loopHit = i;
-            bi->loopPredValid = (ltable[bi->loopIndex + i].confidence >= 3);
+            bi->loopPredValid =
+                ltable[bi->loopIndex + i].confidence == confidenceThreshold;
             bi->currentIter = ltable[bi->loopIndex + i].currentIterSpec;
             if (ltable[bi->loopIndex + i].currentIterSpec + 1 ==
                 ltable[bi->loopIndex + i].numIter) {
@@ -261,20 +113,21 @@ LTAGE::getLoop(Addr pc, BranchInfo* bi) const
 }
 
 void
-LTAGE::specLoopUpdate(Addr pc, bool taken, BranchInfo* bi)
+LTAGE::specLoopUpdate(Addr pc, bool taken, LTageBranchInfo* bi)
 {
     if (bi->loopHit>=0) {
         int index = lindex(pc);
         if (taken != ltable[index].dir) {
             ltable[index].currentIterSpec = 0;
         } else {
-            ltable[index].currentIterSpec++;
+            ltable[index].currentIterSpec =
+                (ltable[index].currentIterSpec + 1) & loopNumIterMask;
         }
     }
 }
 
 void
-LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi)
+LTAGE::loopUpdate(Addr pc, bool taken, LTageBranchInfo* bi)
 {
     int idx = bi->loopIndex + bi->loopHit;
     if (bi->loopHit >= 0) {
@@ -289,12 +142,12 @@ LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi)
                 return;
             } else if (bi->loopPred != bi->tagePred) {
                 DPRINTF(LTage, "Loop Prediction success:%lx\n",pc);
-                if (ltable[idx].age < 7)
-                    ltable[idx].age++;
+                unsignedCtrUpdate(ltable[idx].age, true, loopTableAgeBits);
             }
         }
 
-        ltable[idx].currentIter++;
+        ltable[idx].currentIter =
+            (ltable[idx].currentIter + 1) & loopNumIterMask;
         if (ltable[idx].currentIter > ltable[idx].numIter) {
             ltable[idx].confidence = 0;
             if (ltable[idx].numIter != 0) {
@@ -309,9 +162,8 @@ LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi)
             if (ltable[idx].currentIter == ltable[idx].numIter) {
                 DPRINTF(LTage, "Loop End predicted successfully:%lx\n", pc);
 
-                if (ltable[idx].confidence < 7) {
-                    ltable[idx].confidence++;
-                }
+                unsignedCtrUpdate(ltable[idx].confidence, true,
+                                  loopTableConfidenceBits);
                 //just do not predict when the loop count is 1 or 2
                 if (ltable[idx].numIter < 3) {
                     // free the entry
@@ -340,8 +192,8 @@ LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi)
     } else if (taken) {
         //try to allocate an entry on taken branch
         int nrand = random_mt.random<int>();
-        for (int i = 0; i < 4; i++) {
-            int loop_hit = (nrand + i) & 3;
+        for (int i = 0; i < (1 << logLoopTableAssoc); i++) {
+            int loop_hit = (nrand + i) & ((1 << logLoopTableAssoc) - 1);
             idx = bi->loopIndex + loop_hit;
             if (ltable[idx].age == 0) {
                 DPRINTF(LTage, "Allocating loop pred entry for branch %lx\n",
@@ -349,7 +201,7 @@ LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi)
                 ltable[idx].dir = !taken;
                 ltable[idx].tag = bi->loopTag;
                 ltable[idx].numIter = 0;
-                ltable[idx].age = 7;
+                ltable[idx].age = (1 << loopTableAgeBits) - 1;
                 ltable[idx].confidence = 0;
                 ltable[idx].currentIter = 1;
                 break;
@@ -362,320 +214,57 @@ LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi)
 
 }
 
-// shifting the global history:  we manage the history in a big table in order
-// to reduce simulation time
-void
-LTAGE::updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &pt)
-{
-    if (pt == 0) {
-        DPRINTF(LTage, "Rolling over the histories\n");
-         // Copy beginning of globalHistoryBuffer to end, such that
-         // the last maxHist outcomes are still reachable
-         // through pt[0 .. maxHist - 1].
-         for (int i = 0; i < maxHist; i++)
-             tab[histBufferSize - maxHist + i] = tab[i];
-         pt =  histBufferSize - maxHist;
-         h = &tab[pt];
-    }
-    pt--;
-    h--;
-    h[0] = (dir) ? 1 : 0;
-}
-
-// Get GHR for hashing indirect predictor
-// Build history backwards from pointer in
-// bp_history.
-unsigned
-LTAGE::getGHR(ThreadID tid, void *bp_history) const
-{
-    BranchInfo* bi = static_cast<BranchInfo*>(bp_history);
-    unsigned val = 0;
-    for (unsigned i = 0; i < 32; i++) {
-        // Make sure we don't go out of bounds
-        int gh_offset = bi->ptGhist + i;
-        assert(&(threadHistory[tid].globalHistory[gh_offset]) <
-               threadHistory[tid].globalHistory + histBufferSize);
-        val |= ((threadHistory[tid].globalHistory[gh_offset] & 0x1) << i);
-    }
-
-    return val;
-}
-
 //prediction
 bool
 LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
 {
-    BranchInfo *bi = new BranchInfo(nHistoryTables+1);
+    LTageBranchInfo *bi = new LTageBranchInfo(nHistoryTables+1);
     b = (void*)(bi);
-    Addr pc = branch_pc;
-    bool pred_taken = true;
-    bi->loopHit = -1;
-
-    if (cond_branch) {
-        // TAGE prediction
-
-        // computes the table addresses and the partial tags
-        for (int i = 1; i <= nHistoryTables; i++) {
-            tableIndices[i] = gindex(tid, pc, i);
-            bi->tableIndices[i] = tableIndices[i];
-            tableTags[i] = gtag(tid, pc, i);
-            bi->tableTags[i] = tableTags[i];
-        }
 
-        bi->bimodalIndex = bindex(pc);
+    bool pred_taken = tagePredict(tid, branch_pc, cond_branch, bi);
 
-        bi->hitBank = 0;
-        bi->altBank = 0;
-        //Look for the bank with longest matching history
-        for (int i = nHistoryTables; i > 0; i--) {
-            if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
-                bi->hitBank = i;
-                bi->hitBankIndex = tableIndices[bi->hitBank];
-                break;
-            }
-        }
-        //Look for the alternate bank
-        for (int i = bi->hitBank - 1; i > 0; i--) {
-            if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
-                bi->altBank = i;
-                bi->altBankIndex = tableIndices[bi->altBank];
-                break;
-            }
-        }
-        //computes the prediction and the alternate prediction
-        if (bi->hitBank > 0) {
-            if (bi->altBank > 0) {
-                bi->altTaken =
-                    gtable[bi->altBank][tableIndices[bi->altBank]].ctr >= 0;
-            }else {
-                bi->altTaken = getBimodePred(pc, bi);
-            }
+    if (cond_branch) {
+        bi->loopPred = getLoop(branch_pc, bi); // loop prediction
 
-            bi->longestMatchPred =
-                gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr >= 0;
-            bi->pseudoNewAlloc =
-                abs(2 * gtable[bi->hitBank][bi->hitBankIndex].ctr + 1) <= 1;
-
-            //if the entry is recognized as a newly allocated entry and
-            //useAltPredForNewlyAllocated is positive use the alternate
-            //prediction
-            if ((useAltPredForNewlyAllocated < 0)
-                   || abs(2 *
-                   gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr + 1) > 1)
-                bi->tagePred = bi->longestMatchPred;
-            else
-                bi->tagePred = bi->altTaken;
-        } else {
-            bi->altTaken = getBimodePred(pc, bi);
-            bi->tagePred = bi->altTaken;
-            bi->longestMatchPred = bi->altTaken;
+        if ((loopUseCounter >= 0) && bi->loopPredValid) {
+            pred_taken = bi->loopPred;
         }
-        //end TAGE prediction
-
-        bi->loopPred = getLoop(pc, bi);        // loop prediction
-
-        pred_taken = (((loopUseCounter >= 0) && bi->loopPredValid)) ?
-                     (bi->loopPred): (bi->tagePred);
         DPRINTF(LTage, "Predict for %lx: taken?:%d, loopTaken?:%d, "
                 "loopValid?:%d, loopUseCounter:%d, tagePred:%d, altPred:%d\n",
                 branch_pc, pred_taken, bi->loopPred, bi->loopPredValid,
                 loopUseCounter, bi->tagePred, bi->altTaken);
     }
-    bi->branchPC = branch_pc;
-    bi->condBranch = cond_branch;
+
     specLoopUpdate(branch_pc, pred_taken, bi);
     return pred_taken;
 }
 
-// PREDICTOR UPDATE
 void
-LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history,
-              bool squashed)
+LTAGE::condBranchUpdate(Addr branch_pc, bool taken,
+                        TageBranchInfo* tage_bi, int nrand)
 {
-    assert(bp_history);
+    LTageBranchInfo* bi = static_cast<LTageBranchInfo*>(tage_bi);
 
-    BranchInfo *bi = static_cast<BranchInfo*>(bp_history);
-
-    if (squashed) {
-        // This restores the global history, then update it
-        // and recomputes the folded histories.
-        squash(tid, taken, bp_history);
-        return;
-    }
+    // first update the loop predictor
+    loopUpdate(branch_pc, taken, bi);
 
-    int nrand  = random_mt.random<int>(0,3);
-    Addr pc = branch_pc;
-    if (bi->condBranch) {
-        DPRINTF(LTage, "Updating tables for branch:%lx; taken?:%d\n",
-                branch_pc, taken);
-        // first update the loop predictor
-        loopUpdate(pc, taken, bi);
-
-        if (bi->loopPredValid) {
-            if (bi->tagePred != bi->loopPred) {
-                ctrUpdate(loopUseCounter, (bi->loopPred== taken), 7);
-            }
+    if (bi->loopPredValid) {
+        if (bi->tagePred != bi->loopPred) {
+            ctrUpdate(loopUseCounter,
+                      (bi->loopPred == taken),
+                      withLoopBits);
         }
-
-        // TAGE UPDATE
-        // try to allocate a  new entries only if prediction was wrong
-        bool longest_match_pred = false;
-        bool alloc = (bi->tagePred != taken) && (bi->hitBank < nHistoryTables);
-        if (bi->hitBank > 0) {
-            // Manage the selection between longest matching and alternate
-            // matching for "pseudo"-newly allocated longest matching entry
-             longest_match_pred = bi->longestMatchPred;
-            bool PseudoNewAlloc = bi->pseudoNewAlloc;
-            // an entry is considered as newly allocated if its prediction
-            // counter is weak
-            if (PseudoNewAlloc) {
-                if (longest_match_pred == taken) {
-                    alloc = false;
-                }
-                // if it was delivering the correct prediction, no need to
-                // allocate new entry even if the overall prediction was false
-                if (longest_match_pred != bi->altTaken) {
-                    ctrUpdate(useAltPredForNewlyAllocated,
-                         bi->altTaken == taken, 4);
-                }
-            }
-        }
-
-        if (alloc) {
-            // is there some "unuseful" entry to allocate
-            int8_t min = 1;
-            for (int i = nHistoryTables; i > bi->hitBank; i--) {
-                if (gtable[i][bi->tableIndices[i]].u < min) {
-                    min = gtable[i][bi->tableIndices[i]].u;
-                }
-            }
-
-            // we allocate an entry with a longer history
-            // to  avoid ping-pong, we do not choose systematically the next
-            // entry, but among the 3 next entries
-            int Y = nrand &
-                ((ULL(1) << (nHistoryTables - bi->hitBank - 1)) - 1);
-            int X = bi->hitBank + 1;
-            if (Y & 1) {
-                X++;
-                if (Y & 2)
-                    X++;
-            }
-            // No entry available, forces one to be available
-            if (min > 0) {
-                gtable[X][bi->tableIndices[X]].u = 0;
-            }
-
-
-            //Allocate only  one entry
-            for (int i = X; i <= nHistoryTables; i++) {
-                if ((gtable[i][bi->tableIndices[i]].u == 0)) {
-                    gtable[i][bi->tableIndices[i]].tag = bi->tableTags[i];
-                    gtable[i][bi->tableIndices[i]].ctr = (taken) ? 0 : -1;
-                    gtable[i][bi->tableIndices[i]].u = 0; //?
-                }
-            }
-        }
-        //periodic reset of u: reset is not complete but bit by bit
-        tCounter++;
-        if ((tCounter & ((ULL(1) << logTick) - 1)) == 0) {
-            // reset least significant bit
-            // most significant bit becomes least significant bit
-            for (int i = 1; i <= nHistoryTables; i++) {
-                for (int j = 0; j < (ULL(1) << tagTableSizes[i]); j++) {
-                    gtable[i][j].u = gtable[i][j].u >> 1;
-                }
-            }
-        }
-
-        if (bi->hitBank > 0) {
-            DPRINTF(LTage, "Updating tag table entry (%d,%d) for branch %lx\n",
-                    bi->hitBank, bi->hitBankIndex, branch_pc);
-            ctrUpdate(gtable[bi->hitBank][bi->hitBankIndex].ctr, taken,
-                      tagTableCounterBits);
-            // if the provider entry is not certified to be useful also update
-            // the alternate prediction
-            if (gtable[bi->hitBank][bi->hitBankIndex].u == 0) {
-                if (bi->altBank > 0) {
-                    ctrUpdate(gtable[bi->altBank][bi->altBankIndex].ctr, taken,
-                              tagTableCounterBits);
-                    DPRINTF(LTage, "Updating tag table entry (%d,%d) for"
-                            " branch %lx\n", bi->hitBank, bi->hitBankIndex,
-                            branch_pc);
-                }
-                if (bi->altBank == 0) {
-                    baseUpdate(pc, taken, bi);
-                }
-            }
-
-            // update the u counter
-            if (longest_match_pred != bi->altTaken) {
-                if (longest_match_pred == taken) {
-                    if (gtable[bi->hitBank][bi->hitBankIndex].u < 1) {
-                        gtable[bi->hitBank][bi->hitBankIndex].u++;
-                    }
-                }
-            }
-        } else {
-            baseUpdate(pc, taken, bi);
-        }
-
-        //END PREDICTOR UPDATE
-    }
-    if (!squashed) {
-        delete bi;
     }
-}
 
-void
-LTAGE::updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b)
-{
-    BranchInfo* bi = (BranchInfo*)(b);
-    ThreadHistory& tHist = threadHistory[tid];
-    //  UPDATE HISTORIES
-    bool pathbit = ((branch_pc >> instShiftAmt) & 1);
-    //on a squash, return pointers to this and recompute indices.
-    //update user history
-    updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist);
-    tHist.pathHist = (tHist.pathHist << 1) + pathbit;
-    tHist.pathHist = (tHist.pathHist & ((ULL(1) << 16) - 1));
-
-    bi->ptGhist = tHist.ptGhist;
-    bi->pathHist = tHist.pathHist;
-    //prepare next index and tag computations for user branchs
-    for (int i = 1; i <= nHistoryTables; i++)
-    {
-        bi->ci[i]  = tHist.computeIndices[i].comp;
-        bi->ct0[i] = tHist.computeTags[0][i].comp;
-        bi->ct1[i] = tHist.computeTags[1][i].comp;
-        tHist.computeIndices[i].update(tHist.gHist);
-        tHist.computeTags[0][i].update(tHist.gHist);
-        tHist.computeTags[1][i].update(tHist.gHist);
-    }
-    DPRINTF(LTage, "Updating global histories with branch:%lx; taken?:%d, "
-            "path Hist: %x; pointer:%d\n", branch_pc, taken, tHist.pathHist,
-            tHist.ptGhist);
+    TAGE::condBranchUpdate(branch_pc, taken, bi, nrand);
 }
 
 void
 LTAGE::squash(ThreadID tid, bool taken, void *bp_history)
 {
-    BranchInfo* bi = (BranchInfo*)(bp_history);
-    ThreadHistory& tHist = threadHistory[tid];
-    DPRINTF(LTage, "Restoring branch info: %lx; taken? %d; PathHistory:%x, "
-            "pointer:%d\n", bi->branchPC,taken, bi->pathHist, bi->ptGhist);
-    tHist.pathHist = bi->pathHist;
-    tHist.ptGhist = bi->ptGhist;
-    tHist.gHist = &(tHist.globalHistory[tHist.ptGhist]);
-    tHist.gHist[0] = (taken ? 1 : 0);
-    for (int i = 1; i <= nHistoryTables; i++) {
-        tHist.computeIndices[i].comp = bi->ci[i];
-        tHist.computeTags[0][i].comp = bi->ct0[i];
-        tHist.computeTags[1][i].comp = bi->ct1[i];
-        tHist.computeIndices[i].update(tHist.gHist);
-        tHist.computeTags[0][i].update(tHist.gHist);
-        tHist.computeTags[1][i].update(tHist.gHist);
-    }
+    TAGE::squash(tid, taken, bp_history);
+
+    LTageBranchInfo* bi = (LTageBranchInfo*)(bp_history);
 
     if (bi->condBranch) {
         if (bi->loopHit >= 0) {
@@ -683,14 +272,12 @@ LTAGE::squash(ThreadID tid, bool taken, void *bp_history)
             ltable[idx].currentIterSpec = bi->currentIter;
         }
     }
-
 }
 
 void
 LTAGE::squash(ThreadID tid, void *bp_history)
 {
-    BranchInfo* bi = (BranchInfo*)(bp_history);
-    DPRINTF(LTage, "Deleting branch info: %lx\n", bi->branchPC);
+    LTageBranchInfo* bi = (LTageBranchInfo*)(bp_history);
     if (bi->condBranch) {
         if (bi->loopHit >= 0) {
             int idx = bi->loopIndex + bi->loopHit;
@@ -698,48 +285,7 @@ LTAGE::squash(ThreadID tid, void *bp_history)
         }
     }
 
-    delete bi;
-}
-
-bool
-LTAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history)
-{
-    bool retval = predict(tid, branch_pc, true, bp_history);
-
-    DPRINTF(LTage, "Lookup branch: %lx; predict:%d\n", branch_pc, retval);
-    updateHistories(tid, branch_pc, retval, bp_history);
-    assert(threadHistory[tid].gHist ==
-           &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
-
-    return retval;
-}
-
-void
-LTAGE::btbUpdate(ThreadID tid, Addr branch_pc, void* &bp_history)
-{
-    BranchInfo* bi = (BranchInfo*) bp_history;
-    ThreadHistory& tHist = threadHistory[tid];
-    DPRINTF(LTage, "BTB miss resets prediction: %lx\n", branch_pc);
-    assert(tHist.gHist == &tHist.globalHistory[tHist.ptGhist]);
-    tHist.gHist[0] = 0;
-    for (int i = 1; i <= nHistoryTables; i++) {
-        tHist.computeIndices[i].comp = bi->ci[i];
-        tHist.computeTags[0][i].comp = bi->ct0[i];
-        tHist.computeTags[1][i].comp = bi->ct1[i];
-        tHist.computeIndices[i].update(tHist.gHist);
-        tHist.computeTags[0][i].update(tHist.gHist);
-        tHist.computeTags[1][i].update(tHist.gHist);
-    }
-}
-
-void
-LTAGE::uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history)
-{
-    DPRINTF(LTage, "UnConditionalBranch: %lx\n", br_pc);
-    predict(tid, br_pc, false, bp_history);
-    updateHistories(tid, br_pc, true, bp_history);
-    assert(threadHistory[tid].gHist ==
-           &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
+    TAGE::squash(tid, bp_history);
 }
 
 LTAGE*