cpu: Fixed ratio of pred to hyst bits for LTAGE Bimodal
[gem5.git] / src / cpu / pred / ltage.hh
1 /*
2 * Copyright (c) 2014 The University of Wisconsin
3 *
4 * Copyright (c) 2006 INRIA (Institut National de Recherche en
5 * Informatique et en Automatique / French National Research Institute
6 * for Computer Science and Applied Mathematics)
7 *
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are
12 * met: redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer;
14 * redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution;
17 * neither the name of the copyright holders nor the names of its
18 * contributors may be used to endorse or promote products derived from
19 * this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais,
34 * from André Seznec's code.
35 */
36
37 /* @file
38 * Implementation of a L-TAGE branch predictor. TAGE is a global-history based
39 * branch predictor. It features a PC-indexed bimodal predictor and N
40 * partially tagged tables, indexed with a hash of the PC and the global
41 * branch history. The different lengths of global branch history used to
42 * index the partially tagged tables grow geometrically. A small path history
43 * is also used in the hash. L-TAGE also features a loop predictor that records
44 * iteration count of loops and predicts accordingly.
45 *
46 * All TAGE tables are accessed in parallel, and the one using the longest
47 * history that matches provides the prediction (some exceptions apply).
48 * Entries are allocated in components using a longer history than the
49 * one that predicted when the prediction is incorrect.
50 */
51
52 #ifndef __CPU_PRED_LTAGE
53 #define __CPU_PRED_LTAGE
54
55 #include <vector>
56
57 #include "base/types.hh"
58 #include "cpu/pred/bpred_unit.hh"
59 #include "params/LTAGE.hh"
60
61 class LTAGE: public BPredUnit
62 {
63 public:
64 LTAGE(const LTAGEParams *params);
65
66 // Base class methods.
67 void uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history) override;
68 bool lookup(ThreadID tid, Addr branch_addr, void* &bp_history) override;
69 void btbUpdate(ThreadID tid, Addr branch_addr, void* &bp_history) override;
70 void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history,
71 bool squashed) override;
72 void squash(ThreadID tid, void *bp_history) override;
73 unsigned getGHR(ThreadID tid, void *bp_history) const override;
74
75 private:
76 // Prediction Structures
77 // Loop Predictor Entry
78 struct LoopEntry
79 {
80 uint16_t numIter;
81 uint16_t currentIter;
82 uint16_t currentIterSpec;
83 uint8_t confidence;
84 uint16_t tag;
85 uint8_t age;
86 bool dir;
87
88 LoopEntry() : numIter(0), currentIter(0), currentIterSpec(0),
89 confidence(0), tag(0), age(0), dir(0) { }
90 };
91
92 // Tage Entry
93 struct TageEntry
94 {
95 int8_t ctr;
96 uint16_t tag;
97 int8_t u;
98 TageEntry() : ctr(0), tag(0), u(0) { }
99 };
100
101 // Folded History Table - compressed history
102 // to mix with instruction PC to index partially
103 // tagged tables.
104 struct FoldedHistory
105 {
106 unsigned comp;
107 int compLength;
108 int origLength;
109 int outpoint;
110
111 void init(int original_length, int compressed_length)
112 {
113 comp = 0;
114 origLength = original_length;
115 compLength = compressed_length;
116 outpoint = original_length % compressed_length;
117 }
118
119 void update(uint8_t * h)
120 {
121 comp = (comp << 1) | h[0];
122 comp ^= h[origLength] << outpoint;
123 comp ^= (comp >> compLength);
124 comp &= (ULL(1) << compLength) - 1;
125 }
126 };
127
128 // Primary branch history entry
129 struct BranchInfo
130 {
131 int pathHist;
132 int ptGhist;
133 int hitBank;
134 int hitBankIndex;
135 int altBank;
136 int altBankIndex;
137 int bimodalIndex;
138 int loopTag;
139 uint16_t currentIter;
140
141 bool tagePred;
142 bool altTaken;
143 bool loopPred;
144 bool loopPredValid;
145 int loopIndex;
146 int loopHit;
147 bool condBranch;
148 bool longestMatchPred;
149 bool pseudoNewAlloc;
150 Addr branchPC;
151
152 // Pointer to dynamically allocated storage
153 // to save table indices and folded histories.
154 // To do one call to new instead of five.
155 int *storage;
156
157 // Pointers to actual saved array within the dynamically
158 // allocated storage.
159 int *tableIndices;
160 int *tableTags;
161 int *ci;
162 int *ct0;
163 int *ct1;
164
165 BranchInfo(int sz)
166 : pathHist(0), ptGhist(0),
167 hitBank(0), hitBankIndex(0),
168 altBank(0), altBankIndex(0),
169 bimodalIndex(0), loopTag(0), currentIter(0),
170 tagePred(false), altTaken(false), loopPred(false),
171 loopPredValid(false), loopIndex(0), loopHit(0),
172 condBranch(false), longestMatchPred(false),
173 pseudoNewAlloc(false), branchPC(0)
174 {
175 storage = new int [sz * 5];
176 tableIndices = storage;
177 tableTags = storage + sz;
178 ci = tableTags + sz;
179 ct0 = ci + sz;
180 ct1 = ct0 + sz;
181 }
182
183 ~BranchInfo()
184 {
185 delete[] storage;
186 }
187 };
188
189 /**
190 * Computes the index used to access the
191 * bimodal table.
192 * @param pc_in The unshifted branch PC.
193 */
194 int bindex(Addr pc_in) const;
195
196 /**
197 * Computes the index used to access the
198 * loop predictor.
199 * @param pc_in The unshifted branch PC.
200 */
201 int lindex(Addr pc_in) const;
202
203 /**
204 * Computes the index used to access a
205 * partially tagged table.
206 * @param tid The thread ID used to select the
207 * global histories to use.
208 * @param pc The unshifted branch PC.
209 * @param bank The partially tagged table to access.
210 */
211 inline int gindex(ThreadID tid, Addr pc, int bank) const;
212
213 /**
214 * Utility function to shuffle the path history
215 * depending on which tagged table we are accessing.
216 * @param phist The path history.
217 * @param size Number of path history bits to use.
218 * @param bank The partially tagged table to access.
219 */
220 int F(int phist, int size, int bank) const;
221
222 /**
223 * Computes the partial tag of a tagged table.
224 * @param tid the thread ID used to select the
225 * global histories to use.
226 * @param pc The unshifted branch PC.
227 * @param bank The partially tagged table to access.
228 */
229 inline uint16_t gtag(ThreadID tid, Addr pc, int bank) const;
230
231 /**
232 * Updates a direction counter based on the actual
233 * branch outcome.
234 * @param ctr Reference to counter to update.
235 * @param taken Actual branch outcome.
236 * @param nbits Counter width.
237 */
238 void ctrUpdate(int8_t & ctr, bool taken, int nbits);
239
240 /**
241 * Get a branch prediction from the bimodal
242 * predictor.
243 * @param pc The unshifted branch PC.
244 * @param bi Pointer to information on the
245 * prediction.
246 */
247 bool getBimodePred(Addr pc, BranchInfo* bi) const;
248
249 /**
250 * Updates the bimodal predictor.
251 * @param pc The unshifted branch PC.
252 * @param taken The actual branch outcome.
253 * @param bi Pointer to information on the prediction
254 * recorded at prediction time.
255 */
256 void baseUpdate(Addr pc, bool taken, BranchInfo* bi);
257
258 /**
259 * Get a branch prediction from the loop
260 * predictor.
261 * @param pc The unshifted branch PC.
262 * @param bi Pointer to information on the
263 * prediction.
264 */
265 bool getLoop(Addr pc, BranchInfo* bi) const;
266
267 /**
268 * Updates the loop predictor.
269 * @param pc The unshifted branch PC.
270 * @param taken The actual branch outcome.
271 * @param bi Pointer to information on the
272 * prediction recorded at prediction time.
273 */
274 void loopUpdate(Addr pc, bool Taken, BranchInfo* bi);
275
276 /**
277 * (Speculatively) updates the global branch history.
278 * @param h Reference to pointer to global branch history.
279 * @param dir (Predicted) outcome to update the histories
280 * with.
281 * @param tab
282 * @param PT Reference to path history.
283 */
284 void updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &PT);
285
286 /**
287 * Get a branch prediction from L-TAGE. *NOT* an override of
288 * BpredUnit::predict().
289 * @param tid The thread ID to select the global
290 * histories to use.
291 * @param branch_pc The unshifted branch PC.
292 * @param cond_branch True if the branch is conditional.
293 * @param b Reference to wrapping pointer to allow storing
294 * derived class prediction information in the base class.
295 */
296 bool predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b);
297
298 /**
299 * Update L-TAGE. Called at execute to repair histories on a misprediction
300 * and at commit to update the tables.
301 * @param tid The thread ID to select the global
302 * histories to use.
303 * @param branch_pc The unshifted branch PC.
304 * @param taken Actual branch outcome.
305 * @param bi Pointer to information on the prediction
306 * recorded at prediction time.
307 */
308 void update(ThreadID tid, Addr branch_pc, bool taken, BranchInfo* bi);
309
310 /**
311 * (Speculatively) updates global histories (path and direction).
312 * Also recomputes compressed (folded) histories based on the
313 * branch direction.
314 * @param tid The thread ID to select the histories
315 * to update.
316 * @param branch_pc The unshifted branch PC.
317 * @param taken (Predicted) branch direction.
318 * @param b Wrapping pointer to BranchInfo (to allow
319 * storing derived class prediction information in the
320 * base class).
321 */
322 void updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b);
323
324 /**
325 * Restores speculatively updated path and direction histories.
326 * Also recomputes compressed (folded) histories based on the
327 * correct branch outcome.
328 * This version of squash() is called once on a branch misprediction.
329 * @param tid The Thread ID to select the histories to rollback.
330 * @param taken The correct branch outcome.
331 * @param bp_history Wrapping pointer to BranchInfo (to allow
332 * storing derived class prediction information in the
333 * base class).
334 * @post bp_history points to valid memory.
335 */
336 void squash(ThreadID tid, bool taken, void *bp_history);
337
338 /**
339 * Speculatively updates the loop predictor
340 * iteration count.
341 * @param pc The unshifted branch PC.
342 * @param taken The predicted branch outcome.
343 * @param bi Pointer to information on the prediction
344 * recorded at prediction time.
345 */
346 void specLoopUpdate(Addr pc, bool taken, BranchInfo* bi);
347
348 const unsigned logSizeBiMP;
349 const unsigned logRatioBiModalHystEntries;
350 const unsigned logSizeTagTables;
351 const unsigned logSizeLoopPred;
352 const unsigned nHistoryTables;
353 const unsigned tagTableCounterBits;
354 const unsigned histBufferSize;
355 const unsigned minHist;
356 const unsigned maxHist;
357 const unsigned minTagWidth;
358
359 std::vector<bool> btablePrediction;
360 std::vector<bool> btableHysteresis;
361 TageEntry **gtable;
362 LoopEntry *ltable;
363
364 // Keep per-thread histories to
365 // support SMT.
366 struct ThreadHistory {
367 // Speculative path history
368 // (LSB of branch address)
369 int pathHist;
370
371 // Speculative branch direction
372 // history (circular buffer)
373 // @TODO Convert to std::vector<bool>
374 uint8_t *globalHistory;
375
376 // Pointer to most recent branch outcome
377 uint8_t* gHist;
378
379 // Index to most recent branch outcome
380 int ptGhist;
381
382 // Speculative folded histories.
383 FoldedHistory *computeIndices;
384 FoldedHistory *computeTags[2];
385 };
386
387 std::vector<ThreadHistory> threadHistory;
388
389 int tagWidths[15];
390 int tagTableSizes[15];
391 int *histLengths;
392 int *tableIndices;
393 int *tableTags;
394
395 int8_t loopUseCounter;
396 int8_t useAltPredForNewlyAllocated;
397 int tCounter;
398 int logTick;
399 };
400
401 #endif // __CPU_PRED_LTAGE