2 * Copyright (c) 2014 The University of Wisconsin
4 * Copyright (c) 2006 INRIA (Institut National de Recherche en
5 * Informatique et en Automatique / French National Research Institute
6 * for Computer Science and Applied Mathematics)
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are
12 * met: redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer;
14 * redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution;
17 * neither the name of the copyright holders nor the names of its
18 * contributors may be used to endorse or promote products derived from
19 * this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais,
34 * from André Seznec's code.
38 * Implementation of a TAGE branch predictor
41 #include "cpu/pred/tage.hh"
43 #include "base/intmath.hh"
44 #include "base/logging.hh"
45 #include "base/random.hh"
46 #include "base/trace.hh"
47 #include "debug/Fetch.hh"
48 #include "debug/Tage.hh"
50 TAGE::TAGE(const TAGEParams
*params
)
52 logRatioBiModalHystEntries(params
->logRatioBiModalHystEntries
),
53 nHistoryTables(params
->nHistoryTables
),
54 tagTableCounterBits(params
->tagTableCounterBits
),
55 tagTableUBits(params
->tagTableUBits
),
56 histBufferSize(params
->histBufferSize
),
57 minHist(params
->minHist
),
58 maxHist(params
->maxHist
),
59 pathHistBits(params
->pathHistBits
),
60 tagTableTagWidths(params
->tagTableTagWidths
),
61 logTagTableSizes(params
->logTagTableSizes
),
62 threadHistory(params
->numThreads
),
63 logUResetPeriod(params
->logUResetPeriod
),
64 useAltOnNaBits(params
->useAltOnNaBits
)
66 // Current method for periodically resetting the u counter bits only
67 // works for 1 or 2 bits
68 // Also make sure that it is not 0
69 assert(tagTableUBits
<= 2 && (tagTableUBits
> 0));
71 // we use int type for the path history, so it cannot be more than
73 assert(pathHistBits
<= (sizeof(int)*8));
75 // initialize the counter to half of the period
76 assert(logUResetPeriod
!= 0);
77 tCounter
= ULL(1) << (logUResetPeriod
- 1);
79 assert(params
->histBufferSize
> params
->maxHist
* 2);
80 useAltPredForNewlyAllocated
= 0;
82 for (auto& history
: threadHistory
) {
84 history
.globalHistory
= new uint8_t[histBufferSize
];
85 history
.gHist
= history
.globalHistory
;
86 memset(history
.gHist
, 0, histBufferSize
);
90 histLengths
= new int [nHistoryTables
+1];
91 histLengths
[1] = minHist
;
92 histLengths
[nHistoryTables
] = maxHist
;
94 for (int i
= 2; i
<= nHistoryTables
; i
++) {
95 histLengths
[i
] = (int) (((double) minHist
*
96 pow ((double) (maxHist
) / (double) minHist
,
97 (double) (i
- 1) / (double) ((nHistoryTables
- 1))))
101 assert(tagTableTagWidths
.size() == (nHistoryTables
+1));
102 assert(logTagTableSizes
.size() == (nHistoryTables
+1));
104 // First entry is for the Bimodal table and it is untagged in this
106 assert(tagTableTagWidths
[0] == 0);
108 for (auto& history
: threadHistory
) {
109 history
.computeIndices
= new FoldedHistory
[nHistoryTables
+1];
110 history
.computeTags
[0] = new FoldedHistory
[nHistoryTables
+1];
111 history
.computeTags
[1] = new FoldedHistory
[nHistoryTables
+1];
113 for (int i
= 1; i
<= nHistoryTables
; i
++) {
114 history
.computeIndices
[i
].init(
115 histLengths
[i
], (logTagTableSizes
[i
]));
116 history
.computeTags
[0][i
].init(
117 history
.computeIndices
[i
].origLength
, tagTableTagWidths
[i
]);
118 history
.computeTags
[1][i
].init(
119 history
.computeIndices
[i
].origLength
, tagTableTagWidths
[i
]-1);
120 DPRINTF(Tage
, "HistLength:%d, TTSize:%d, TTTWidth:%d\n",
121 histLengths
[i
], logTagTableSizes
[i
], tagTableTagWidths
[i
]);
125 const uint64_t bimodalTableSize
= ULL(1) << logTagTableSizes
[0];
126 btablePrediction
.resize(bimodalTableSize
, false);
127 btableHysteresis
.resize(bimodalTableSize
>> logRatioBiModalHystEntries
,
130 gtable
= new TageEntry
*[nHistoryTables
+ 1];
131 for (int i
= 1; i
<= nHistoryTables
; i
++) {
132 gtable
[i
] = new TageEntry
[1<<(logTagTableSizes
[i
])];
135 tableIndices
= new int [nHistoryTables
+1];
136 tableTags
= new int [nHistoryTables
+1];
140 TAGE::bindex(Addr pc_in
) const
142 return ((pc_in
>> instShiftAmt
) & ((ULL(1) << (logTagTableSizes
[0])) - 1));
146 TAGE::F(int A
, int size
, int bank
) const
150 A
= A
& ((ULL(1) << size
) - 1);
151 A1
= (A
& ((ULL(1) << logTagTableSizes
[bank
]) - 1));
152 A2
= (A
>> logTagTableSizes
[bank
]);
153 A2
= ((A2
<< bank
) & ((ULL(1) << logTagTableSizes
[bank
]) - 1))
154 + (A2
>> (logTagTableSizes
[bank
] - bank
));
156 A
= ((A
<< bank
) & ((ULL(1) << logTagTableSizes
[bank
]) - 1))
157 + (A
>> (logTagTableSizes
[bank
] - bank
));
162 // gindex computes a full hash of pc, ghist and pathHist
164 TAGE::gindex(ThreadID tid
, Addr pc
, int bank
) const
167 int hlen
= (histLengths
[bank
] > pathHistBits
) ? pathHistBits
:
169 const Addr shiftedPc
= pc
>> instShiftAmt
;
172 (shiftedPc
>> ((int) abs(logTagTableSizes
[bank
] - bank
) + 1)) ^
173 threadHistory
[tid
].computeIndices
[bank
].comp
^
174 F(threadHistory
[tid
].pathHist
, hlen
, bank
);
176 return (index
& ((ULL(1) << (logTagTableSizes
[bank
])) - 1));
182 TAGE::gtag(ThreadID tid
, Addr pc
, int bank
) const
184 int tag
= (pc
>> instShiftAmt
) ^
185 threadHistory
[tid
].computeTags
[0][bank
].comp
^
186 (threadHistory
[tid
].computeTags
[1][bank
].comp
<< 1);
188 return (tag
& ((ULL(1) << tagTableTagWidths
[bank
]) - 1));
192 // Up-down saturating counter
194 TAGE::ctrUpdate(int8_t & ctr
, bool taken
, int nbits
)
196 assert(nbits
<= sizeof(int8_t) << 3);
198 if (ctr
< ((1 << (nbits
- 1)) - 1))
201 if (ctr
> -(1 << (nbits
- 1)))
206 // Up-down unsigned saturating counter
208 TAGE::unsignedCtrUpdate(uint8_t & ctr
, bool up
, unsigned nbits
)
210 assert(nbits
<= sizeof(uint8_t) << 3);
212 if (ctr
< ((1 << nbits
) - 1))
220 // Bimodal prediction
222 TAGE::getBimodePred(Addr pc
, TageBranchInfo
* bi
) const
224 return btablePrediction
[bi
->bimodalIndex
];
228 // Update the bimodal predictor: a hysteresis bit is shared among N prediction
229 // bits (N = 2 ^ logRatioBiModalHystEntries)
231 TAGE::baseUpdate(Addr pc
, bool taken
, TageBranchInfo
* bi
)
233 int inter
= (btablePrediction
[bi
->bimodalIndex
] << 1)
234 + btableHysteresis
[bi
->bimodalIndex
>> logRatioBiModalHystEntries
];
238 } else if (inter
> 0) {
241 const bool pred
= inter
>> 1;
242 const bool hyst
= inter
& 1;
243 btablePrediction
[bi
->bimodalIndex
] = pred
;
244 btableHysteresis
[bi
->bimodalIndex
>> logRatioBiModalHystEntries
] = hyst
;
245 DPRINTF(Tage
, "Updating branch %lx, pred:%d, hyst:%d\n", pc
, pred
, hyst
);
248 // shifting the global history: we manage the history in a big table in order
249 // to reduce simulation time
251 TAGE::updateGHist(uint8_t * &h
, bool dir
, uint8_t * tab
, int &pt
)
254 DPRINTF(Tage
, "Rolling over the histories\n");
255 // Copy beginning of globalHistoryBuffer to end, such that
256 // the last maxHist outcomes are still reachable
257 // through pt[0 .. maxHist - 1].
258 for (int i
= 0; i
< maxHist
; i
++)
259 tab
[histBufferSize
- maxHist
+ i
] = tab
[i
];
260 pt
= histBufferSize
- maxHist
;
265 h
[0] = (dir
) ? 1 : 0;
268 // Get GHR for hashing indirect predictor
269 // Build history backwards from pointer in
272 TAGE::getGHR(ThreadID tid
, void *bp_history
) const
274 TageBranchInfo
* bi
= static_cast<TageBranchInfo
*>(bp_history
);
276 for (unsigned i
= 0; i
< 32; i
++) {
277 // Make sure we don't go out of bounds
278 int gh_offset
= bi
->ptGhist
+ i
;
279 assert(&(threadHistory
[tid
].globalHistory
[gh_offset
]) <
280 threadHistory
[tid
].globalHistory
+ histBufferSize
);
281 val
|= ((threadHistory
[tid
].globalHistory
[gh_offset
] & 0x1) << i
);
289 TAGE::predict(ThreadID tid
, Addr branch_pc
, bool cond_branch
, void* &b
)
291 TageBranchInfo
*bi
= new TageBranchInfo(nHistoryTables
+1);
293 return tagePredict(tid
, branch_pc
, cond_branch
, bi
);
297 TAGE::tagePredict(ThreadID tid
, Addr branch_pc
,
298 bool cond_branch
, TageBranchInfo
* bi
)
301 bool pred_taken
= true;
306 // computes the table addresses and the partial tags
307 for (int i
= 1; i
<= nHistoryTables
; i
++) {
308 tableIndices
[i
] = gindex(tid
, pc
, i
);
309 bi
->tableIndices
[i
] = tableIndices
[i
];
310 tableTags
[i
] = gtag(tid
, pc
, i
);
311 bi
->tableTags
[i
] = tableTags
[i
];
314 bi
->bimodalIndex
= bindex(pc
);
318 //Look for the bank with longest matching history
319 for (int i
= nHistoryTables
; i
> 0; i
--) {
320 if (gtable
[i
][tableIndices
[i
]].tag
== tableTags
[i
]) {
322 bi
->hitBankIndex
= tableIndices
[bi
->hitBank
];
326 //Look for the alternate bank
327 for (int i
= bi
->hitBank
- 1; i
> 0; i
--) {
328 if (gtable
[i
][tableIndices
[i
]].tag
== tableTags
[i
]) {
330 bi
->altBankIndex
= tableIndices
[bi
->altBank
];
334 //computes the prediction and the alternate prediction
335 if (bi
->hitBank
> 0) {
336 if (bi
->altBank
> 0) {
338 gtable
[bi
->altBank
][tableIndices
[bi
->altBank
]].ctr
>= 0;
340 bi
->altTaken
= getBimodePred(pc
, bi
);
343 bi
->longestMatchPred
=
344 gtable
[bi
->hitBank
][tableIndices
[bi
->hitBank
]].ctr
>= 0;
346 abs(2 * gtable
[bi
->hitBank
][bi
->hitBankIndex
].ctr
+ 1) <= 1;
348 //if the entry is recognized as a newly allocated entry and
349 //useAltPredForNewlyAllocated is positive use the alternate
351 if ((useAltPredForNewlyAllocated
< 0)
353 gtable
[bi
->hitBank
][tableIndices
[bi
->hitBank
]].ctr
+ 1) > 1)
354 bi
->tagePred
= bi
->longestMatchPred
;
356 bi
->tagePred
= bi
->altTaken
;
358 bi
->altTaken
= getBimodePred(pc
, bi
);
359 bi
->tagePred
= bi
->altTaken
;
360 bi
->longestMatchPred
= bi
->altTaken
;
362 //end TAGE prediction
364 pred_taken
= (bi
->tagePred
);
365 DPRINTF(Tage
, "Predict for %lx: taken?:%d, tagePred:%d, altPred:%d\n",
366 branch_pc
, pred_taken
, bi
->tagePred
, bi
->altTaken
);
368 bi
->branchPC
= branch_pc
;
369 bi
->condBranch
= cond_branch
;
375 TAGE::update(ThreadID tid
, Addr branch_pc
, bool taken
, void* bp_history
,
380 TageBranchInfo
*bi
= static_cast<TageBranchInfo
*>(bp_history
);
383 // This restores the global history, then update it
384 // and recomputes the folded histories.
385 squash(tid
, taken
, bp_history
);
389 int nrand
= random_mt
.random
<int>(0,3);
390 if (bi
->condBranch
) {
391 DPRINTF(Tage
, "Updating tables for branch:%lx; taken?:%d\n",
393 condBranchUpdate(branch_pc
, taken
, bi
, nrand
);
401 TAGE::condBranchUpdate(Addr branch_pc
, bool taken
,
402 TageBranchInfo
* bi
, int nrand
)
405 // try to allocate a new entries only if prediction was wrong
406 bool longest_match_pred
= false;
407 bool alloc
= (bi
->tagePred
!= taken
) && (bi
->hitBank
< nHistoryTables
);
408 if (bi
->hitBank
> 0) {
409 // Manage the selection between longest matching and alternate
410 // matching for "pseudo"-newly allocated longest matching entry
411 longest_match_pred
= bi
->longestMatchPred
;
412 bool PseudoNewAlloc
= bi
->pseudoNewAlloc
;
413 // an entry is considered as newly allocated if its prediction
415 if (PseudoNewAlloc
) {
416 if (longest_match_pred
== taken
) {
419 // if it was delivering the correct prediction, no need to
420 // allocate new entry even if the overall prediction was false
421 if (longest_match_pred
!= bi
->altTaken
) {
422 ctrUpdate(useAltPredForNewlyAllocated
,
423 bi
->altTaken
== taken
, useAltOnNaBits
);
429 // is there some "unuseful" entry to allocate
431 for (int i
= nHistoryTables
; i
> bi
->hitBank
; i
--) {
432 if (gtable
[i
][bi
->tableIndices
[i
]].u
< min
) {
433 min
= gtable
[i
][bi
->tableIndices
[i
]].u
;
437 // we allocate an entry with a longer history
438 // to avoid ping-pong, we do not choose systematically the next
439 // entry, but among the 3 next entries
441 ((ULL(1) << (nHistoryTables
- bi
->hitBank
- 1)) - 1);
442 int X
= bi
->hitBank
+ 1;
448 // No entry available, forces one to be available
450 gtable
[X
][bi
->tableIndices
[X
]].u
= 0;
454 //Allocate only one entry
455 for (int i
= X
; i
<= nHistoryTables
; i
++) {
456 if ((gtable
[i
][bi
->tableIndices
[i
]].u
== 0)) {
457 gtable
[i
][bi
->tableIndices
[i
]].tag
= bi
->tableTags
[i
];
458 gtable
[i
][bi
->tableIndices
[i
]].ctr
= (taken
) ? 0 : -1;
463 //periodic reset of u: reset is not complete but bit by bit
465 if ((tCounter
& ((ULL(1) << logUResetPeriod
) - 1)) == 0) {
466 // reset least significant bit
467 // most significant bit becomes least significant bit
468 for (int i
= 1; i
<= nHistoryTables
; i
++) {
469 for (int j
= 0; j
< (ULL(1) << logTagTableSizes
[i
]); j
++) {
470 gtable
[i
][j
].u
= gtable
[i
][j
].u
>> 1;
475 if (bi
->hitBank
> 0) {
476 DPRINTF(Tage
, "Updating tag table entry (%d,%d) for branch %lx\n",
477 bi
->hitBank
, bi
->hitBankIndex
, branch_pc
);
478 ctrUpdate(gtable
[bi
->hitBank
][bi
->hitBankIndex
].ctr
, taken
,
479 tagTableCounterBits
);
480 // if the provider entry is not certified to be useful also update
481 // the alternate prediction
482 if (gtable
[bi
->hitBank
][bi
->hitBankIndex
].u
== 0) {
483 if (bi
->altBank
> 0) {
484 ctrUpdate(gtable
[bi
->altBank
][bi
->altBankIndex
].ctr
, taken
,
485 tagTableCounterBits
);
486 DPRINTF(Tage
, "Updating tag table entry (%d,%d) for"
487 " branch %lx\n", bi
->hitBank
, bi
->hitBankIndex
,
490 if (bi
->altBank
== 0) {
491 baseUpdate(branch_pc
, taken
, bi
);
495 // update the u counter
496 if (bi
->tagePred
!= bi
->altTaken
) {
497 unsignedCtrUpdate(gtable
[bi
->hitBank
][bi
->hitBankIndex
].u
,
498 bi
->tagePred
== taken
, tagTableUBits
);
501 baseUpdate(branch_pc
, taken
, bi
);
506 TAGE::updateHistories(ThreadID tid
, Addr branch_pc
, bool taken
, void* b
)
508 TageBranchInfo
* bi
= (TageBranchInfo
*)(b
);
509 ThreadHistory
& tHist
= threadHistory
[tid
];
511 bool pathbit
= ((branch_pc
>> instShiftAmt
) & 1);
512 //on a squash, return pointers to this and recompute indices.
513 //update user history
514 updateGHist(tHist
.gHist
, taken
, tHist
.globalHistory
, tHist
.ptGhist
);
515 tHist
.pathHist
= (tHist
.pathHist
<< 1) + pathbit
;
516 tHist
.pathHist
= (tHist
.pathHist
& ((ULL(1) << pathHistBits
) - 1));
518 bi
->ptGhist
= tHist
.ptGhist
;
519 bi
->pathHist
= tHist
.pathHist
;
520 //prepare next index and tag computations for user branchs
521 for (int i
= 1; i
<= nHistoryTables
; i
++)
523 bi
->ci
[i
] = tHist
.computeIndices
[i
].comp
;
524 bi
->ct0
[i
] = tHist
.computeTags
[0][i
].comp
;
525 bi
->ct1
[i
] = tHist
.computeTags
[1][i
].comp
;
526 tHist
.computeIndices
[i
].update(tHist
.gHist
);
527 tHist
.computeTags
[0][i
].update(tHist
.gHist
);
528 tHist
.computeTags
[1][i
].update(tHist
.gHist
);
530 DPRINTF(Tage
, "Updating global histories with branch:%lx; taken?:%d, "
531 "path Hist: %x; pointer:%d\n", branch_pc
, taken
, tHist
.pathHist
,
536 TAGE::squash(ThreadID tid
, bool taken
, void *bp_history
)
538 TageBranchInfo
* bi
= (TageBranchInfo
*)(bp_history
);
539 ThreadHistory
& tHist
= threadHistory
[tid
];
540 DPRINTF(Tage
, "Restoring branch info: %lx; taken? %d; PathHistory:%x, "
541 "pointer:%d\n", bi
->branchPC
,taken
, bi
->pathHist
, bi
->ptGhist
);
542 tHist
.pathHist
= bi
->pathHist
;
543 tHist
.ptGhist
= bi
->ptGhist
;
544 tHist
.gHist
= &(tHist
.globalHistory
[tHist
.ptGhist
]);
545 tHist
.gHist
[0] = (taken
? 1 : 0);
546 for (int i
= 1; i
<= nHistoryTables
; i
++) {
547 tHist
.computeIndices
[i
].comp
= bi
->ci
[i
];
548 tHist
.computeTags
[0][i
].comp
= bi
->ct0
[i
];
549 tHist
.computeTags
[1][i
].comp
= bi
->ct1
[i
];
550 tHist
.computeIndices
[i
].update(tHist
.gHist
);
551 tHist
.computeTags
[0][i
].update(tHist
.gHist
);
552 tHist
.computeTags
[1][i
].update(tHist
.gHist
);
557 TAGE::squash(ThreadID tid
, void *bp_history
)
559 TageBranchInfo
* bi
= (TageBranchInfo
*)(bp_history
);
560 DPRINTF(Tage
, "Deleting branch info: %lx\n", bi
->branchPC
);
565 TAGE::lookup(ThreadID tid
, Addr branch_pc
, void* &bp_history
)
567 bool retval
= predict(tid
, branch_pc
, true, bp_history
);
569 DPRINTF(Tage
, "Lookup branch: %lx; predict:%d\n", branch_pc
, retval
);
570 updateHistories(tid
, branch_pc
, retval
, bp_history
);
571 assert(threadHistory
[tid
].gHist
==
572 &threadHistory
[tid
].globalHistory
[threadHistory
[tid
].ptGhist
]);
578 TAGE::btbUpdate(ThreadID tid
, Addr branch_pc
, void* &bp_history
)
580 TageBranchInfo
* bi
= (TageBranchInfo
*) bp_history
;
581 ThreadHistory
& tHist
= threadHistory
[tid
];
582 DPRINTF(Tage
, "BTB miss resets prediction: %lx\n", branch_pc
);
583 assert(tHist
.gHist
== &tHist
.globalHistory
[tHist
.ptGhist
]);
585 for (int i
= 1; i
<= nHistoryTables
; i
++) {
586 tHist
.computeIndices
[i
].comp
= bi
->ci
[i
];
587 tHist
.computeTags
[0][i
].comp
= bi
->ct0
[i
];
588 tHist
.computeTags
[1][i
].comp
= bi
->ct1
[i
];
589 tHist
.computeIndices
[i
].update(tHist
.gHist
);
590 tHist
.computeTags
[0][i
].update(tHist
.gHist
);
591 tHist
.computeTags
[1][i
].update(tHist
.gHist
);
596 TAGE::uncondBranch(ThreadID tid
, Addr br_pc
, void* &bp_history
)
598 DPRINTF(Tage
, "UnConditionalBranch: %lx\n", br_pc
);
599 predict(tid
, br_pc
, false, bp_history
);
600 updateHistories(tid
, br_pc
, true, bp_history
);
601 assert(threadHistory
[tid
].gHist
==
602 &threadHistory
[tid
].globalHistory
[threadHistory
[tid
].ptGhist
]);
608 return new TAGE(this);