2 * Copyright (c) 2014 The University of Wisconsin
4 * Copyright (c) 2006 INRIA (Institut National de Recherche en
5 * Informatique et en Automatique / French National Research Institute
6 * for Computer Science and Applied Mathematics)
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are
12 * met: redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer;
14 * redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution;
17 * neither the name of the copyright holders nor the names of its
18 * contributors may be used to endorse or promote products derived from
19 * this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais,
34 * from André Seznec's code.
38 * Implementation of a L-TAGE branch predictor. TAGE is a global-history based
39 * branch predictor. It features a PC-indexed bimodal predictor and N
40 * partially tagged tables, indexed with a hash of the PC and the global
41 * branch history. The different lengths of global branch history used to
42 * index the partially tagged tables grow geometrically. A small path history
43 * is also used in the hash. L-TAGE also features a loop predictor that records
44 * iteration count of loops and predicts accordingly.
46 * All TAGE tables are accessed in parallel, and the one using the longest
47 * history that matches provides the prediction (some exceptions apply).
48 * Entries are allocated in components using a longer history than the
49 * one that predicted when the prediction is incorrect.
52 #ifndef __CPU_PRED_LTAGE
53 #define __CPU_PRED_LTAGE
57 #include "base/types.hh"
58 #include "cpu/pred/bpred_unit.hh"
59 #include "params/LTAGE.hh"
61 class LTAGE: public BPredUnit
64 LTAGE(const LTAGEParams *params);
66 // Base class methods.
67 void uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history) override;
68 bool lookup(ThreadID tid, Addr branch_addr, void* &bp_history) override;
69 void btbUpdate(ThreadID tid, Addr branch_addr, void* &bp_history) override;
70 void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history,
71 bool squashed) override;
72 void squash(ThreadID tid, void *bp_history) override;
73 unsigned getGHR(ThreadID tid, void *bp_history) const override;
76 // Prediction Structures
77 // Loop Predictor Entry
82 uint16_t currentIterSpec;
88 LoopEntry() : numIter(0), currentIter(0), currentIterSpec(0),
89 confidence(0), tag(0), age(0), dir(0) { }
98 TageEntry() : ctr(0), tag(0), u(0) { }
101 // Folded History Table - compressed history
102 // to mix with instruction PC to index partially
111 void init(int original_length, int compressed_length)
114 origLength = original_length;
115 compLength = compressed_length;
116 outpoint = original_length % compressed_length;
119 void update(uint8_t * h)
121 comp = (comp << 1) | h[0];
122 comp ^= h[origLength] << outpoint;
123 comp ^= (comp >> compLength);
124 comp &= (ULL(1) << compLength) - 1;
128 // Primary branch history entry
139 uint16_t currentIter;
148 bool longestMatchPred;
152 // Pointer to dynamically allocated storage
153 // to save table indices and folded histories.
154 // To do one call to new instead of five.
157 // Pointers to actual saved array within the dynamically
158 // allocated storage.
166 : pathHist(0), ptGhist(0),
167 hitBank(0), hitBankIndex(0),
168 altBank(0), altBankIndex(0),
169 bimodalIndex(0), loopTag(0), currentIter(0),
170 tagePred(false), altTaken(false), loopPred(false),
171 loopPredValid(false), loopIndex(0), loopHit(0),
172 condBranch(false), longestMatchPred(false),
173 pseudoNewAlloc(false), branchPC(0)
175 storage = new int [sz * 5];
176 tableIndices = storage;
177 tableTags = storage + sz;
190 * Computes the index used to access the
192 * @param pc_in The unshifted branch PC.
194 int bindex(Addr pc_in) const;
197 * Computes the index used to access the
199 * @param pc_in The unshifted branch PC.
201 int lindex(Addr pc_in) const;
204 * Computes the index used to access a
205 * partially tagged table.
206 * @param tid The thread ID used to select the
207 * global histories to use.
208 * @param pc The unshifted branch PC.
209 * @param bank The partially tagged table to access.
211 inline int gindex(ThreadID tid, Addr pc, int bank) const;
214 * Utility function to shuffle the path history
215 * depending on which tagged table we are accessing.
216 * @param phist The path history.
217 * @param size Number of path history bits to use.
218 * @param bank The partially tagged table to access.
220 int F(int phist, int size, int bank) const;
223 * Computes the partial tag of a tagged table.
224 * @param tid the thread ID used to select the
225 * global histories to use.
226 * @param pc The unshifted branch PC.
227 * @param bank The partially tagged table to access.
229 inline uint16_t gtag(ThreadID tid, Addr pc, int bank) const;
232 * Updates a direction counter based on the actual
234 * @param ctr Reference to counter to update.
235 * @param taken Actual branch outcome.
236 * @param nbits Counter width.
238 void ctrUpdate(int8_t & ctr, bool taken, int nbits);
241 * Get a branch prediction from the bimodal
243 * @param pc The unshifted branch PC.
244 * @param bi Pointer to information on the
247 bool getBimodePred(Addr pc, BranchInfo* bi) const;
250 * Updates the bimodal predictor.
251 * @param pc The unshifted branch PC.
252 * @param taken The actual branch outcome.
253 * @param bi Pointer to information on the prediction
254 * recorded at prediction time.
256 void baseUpdate(Addr pc, bool taken, BranchInfo* bi);
259 * Get a branch prediction from the loop
261 * @param pc The unshifted branch PC.
262 * @param bi Pointer to information on the
265 bool getLoop(Addr pc, BranchInfo* bi) const;
268 * Updates the loop predictor.
269 * @param pc The unshifted branch PC.
270 * @param taken The actual branch outcome.
271 * @param bi Pointer to information on the
272 * prediction recorded at prediction time.
274 void loopUpdate(Addr pc, bool Taken, BranchInfo* bi);
277 * (Speculatively) updates the global branch history.
278 * @param h Reference to pointer to global branch history.
279 * @param dir (Predicted) outcome to update the histories
282 * @param PT Reference to path history.
284 void updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &PT);
287 * Get a branch prediction from L-TAGE. *NOT* an override of
288 * BpredUnit::predict().
289 * @param tid The thread ID to select the global
291 * @param branch_pc The unshifted branch PC.
292 * @param cond_branch True if the branch is conditional.
293 * @param b Reference to wrapping pointer to allow storing
294 * derived class prediction information in the base class.
296 bool predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b);
299 * Update L-TAGE. Called at execute to repair histories on a misprediction
300 * and at commit to update the tables.
301 * @param tid The thread ID to select the global
303 * @param branch_pc The unshifted branch PC.
304 * @param taken Actual branch outcome.
305 * @param bi Pointer to information on the prediction
306 * recorded at prediction time.
308 void update(ThreadID tid, Addr branch_pc, bool taken, BranchInfo* bi);
311 * (Speculatively) updates global histories (path and direction).
312 * Also recomputes compressed (folded) histories based on the
314 * @param tid The thread ID to select the histories
316 * @param branch_pc The unshifted branch PC.
317 * @param taken (Predicted) branch direction.
318 * @param b Wrapping pointer to BranchInfo (to allow
319 * storing derived class prediction information in the
322 void updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b);
325 * Restores speculatively updated path and direction histories.
326 * Also recomputes compressed (folded) histories based on the
327 * correct branch outcome.
328 * This version of squash() is called once on a branch misprediction.
329 * @param tid The Thread ID to select the histories to rollback.
330 * @param taken The correct branch outcome.
331 * @param bp_history Wrapping pointer to BranchInfo (to allow
332 * storing derived class prediction information in the
334 * @post bp_history points to valid memory.
336 void squash(ThreadID tid, bool taken, void *bp_history);
339 * Speculatively updates the loop predictor
341 * @param pc The unshifted branch PC.
342 * @param taken The predicted branch outcome.
343 * @param bi Pointer to information on the prediction
344 * recorded at prediction time.
346 void specLoopUpdate(Addr pc, bool taken, BranchInfo* bi);
348 const unsigned logSizeBiMP;
349 const unsigned logRatioBiModalHystEntries;
350 const unsigned logSizeTagTables;
351 const unsigned logSizeLoopPred;
352 const unsigned nHistoryTables;
353 const unsigned tagTableCounterBits;
354 const unsigned histBufferSize;
355 const unsigned minHist;
356 const unsigned maxHist;
357 const unsigned minTagWidth;
359 std::vector<bool> btablePrediction;
360 std::vector<bool> btableHysteresis;
364 // Keep per-thread histories to
366 struct ThreadHistory {
367 // Speculative path history
368 // (LSB of branch address)
371 // Speculative branch direction
372 // history (circular buffer)
373 // @TODO Convert to std::vector<bool>
374 uint8_t *globalHistory;
376 // Pointer to most recent branch outcome
379 // Index to most recent branch outcome
382 // Speculative folded histories.
383 FoldedHistory *computeIndices;
384 FoldedHistory *computeTags[2];
387 std::vector<ThreadHistory> threadHistory;
390 int tagTableSizes[15];
395 int8_t loopUseCounter;
396 int8_t useAltPredForNewlyAllocated;
401 #endif // __CPU_PRED_LTAGE