cfglayout.c (function_tail_eff_head): Rename to ...
[gcc.git] / gcc / predict.c
1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
20
21 /* References:
22
23 [1] "Branch Prediction for Free"
24 Ball and Larus; PLDI '93.
25 [2] "Static Branch Frequency and Program Profile Analysis"
26 Wu and Larus; MICRO-27.
27 [3] "Corpus-based Static Branch Prediction"
28 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
29
30
31 #include "config.h"
32 #include "system.h"
33 #include "tree.h"
34 #include "rtl.h"
35 #include "tm_p.h"
36 #include "hard-reg-set.h"
37 #include "basic-block.h"
38 #include "insn-config.h"
39 #include "regs.h"
40 #include "flags.h"
41 #include "output.h"
42 #include "function.h"
43 #include "except.h"
44 #include "toplev.h"
45 #include "recog.h"
46 #include "expr.h"
47 #include "predict.h"
48 #include "real.h"
49
50 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE, 0.5,
51 REAL_BB_FREQ_MAX. */
52 static REAL_VALUE_TYPE real_zero, real_one, real_almost_one, real_br_prob_base,
53 real_one_half, real_bb_freq_max;
54
55 /* Random guesstimation given names. */
56 #define PROB_NEVER (0)
57 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 10 - 1)
58 #define PROB_UNLIKELY (REG_BR_PROB_BASE * 4 / 10 - 1)
59 #define PROB_EVEN (REG_BR_PROB_BASE / 2)
60 #define PROB_LIKELY (REG_BR_PROB_BASE - PROB_UNLIKELY)
61 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
62 #define PROB_ALWAYS (REG_BR_PROB_BASE)
63
64 static bool predicted_by_p PARAMS ((basic_block,
65 enum br_predictor));
66 static void combine_predictions_for_insn PARAMS ((rtx, basic_block));
67 static void dump_prediction PARAMS ((enum br_predictor, int,
68 basic_block, int));
69 static void estimate_loops_at_level PARAMS ((struct loop *loop));
70 static void propagate_freq PARAMS ((basic_block));
71 static void estimate_bb_frequencies PARAMS ((struct loops *));
72 static void counts_to_freqs PARAMS ((void));
73 static void process_note_predictions PARAMS ((basic_block, int *, int *,
74 sbitmap *));
75 static void process_note_prediction PARAMS ((basic_block, int *, int *,
76 sbitmap *, int, int));
77 static bool last_basic_block_p PARAMS ((basic_block));
78
79 /* Information we hold about each branch predictor.
80 Filled using information from predict.def. */
81
82 struct predictor_info
83 {
84 const char *const name; /* Name used in the debugging dumps. */
85 const int hitrate; /* Expected hitrate used by
86 predict_insn_def call. */
87 const int flags;
88 };
89
90 /* Use given predictor without Dempster-Shaffer theory if it matches
91 using first_match heuristics. */
92 #define PRED_FLAG_FIRST_MATCH 1
93
94 /* Recompute hitrate in percent to our representation. */
95
96 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
97
98 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
99 static const struct predictor_info predictor_info[]= {
100 #include "predict.def"
101
102 /* Upper bound on predictors. */
103 {NULL, 0, 0}
104 };
105 #undef DEF_PREDICTOR
106 /* Return true if the one of outgoing edges is already predicted by
107 PREDICTOR. */
108
109 static bool
110 predicted_by_p (bb, predictor)
111 basic_block bb;
112 enum br_predictor predictor;
113 {
114 rtx note;
115 if (!INSN_P (bb->end))
116 return false;
117 for (note = REG_NOTES (bb->end); note; note = XEXP (note, 1))
118 if (REG_NOTE_KIND (note) == REG_BR_PRED
119 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
120 return true;
121 return false;
122 }
123
124 void
125 predict_insn (insn, predictor, probability)
126 rtx insn;
127 int probability;
128 enum br_predictor predictor;
129 {
130 if (!any_condjump_p (insn))
131 abort ();
132
133 REG_NOTES (insn)
134 = gen_rtx_EXPR_LIST (REG_BR_PRED,
135 gen_rtx_CONCAT (VOIDmode,
136 GEN_INT ((int) predictor),
137 GEN_INT ((int) probability)),
138 REG_NOTES (insn));
139 }
140
141 /* Predict insn by given predictor. */
142
143 void
144 predict_insn_def (insn, predictor, taken)
145 rtx insn;
146 enum br_predictor predictor;
147 enum prediction taken;
148 {
149 int probability = predictor_info[(int) predictor].hitrate;
150
151 if (taken != TAKEN)
152 probability = REG_BR_PROB_BASE - probability;
153
154 predict_insn (insn, predictor, probability);
155 }
156
157 /* Predict edge E with given probability if possible. */
158
159 void
160 predict_edge (e, predictor, probability)
161 edge e;
162 int probability;
163 enum br_predictor predictor;
164 {
165 rtx last_insn;
166 last_insn = e->src->end;
167
168 /* We can store the branch prediction information only about
169 conditional jumps. */
170 if (!any_condjump_p (last_insn))
171 return;
172
173 /* We always store probability of branching. */
174 if (e->flags & EDGE_FALLTHRU)
175 probability = REG_BR_PROB_BASE - probability;
176
177 predict_insn (last_insn, predictor, probability);
178 }
179
180 /* Predict edge E by given predictor if possible. */
181
182 void
183 predict_edge_def (e, predictor, taken)
184 edge e;
185 enum br_predictor predictor;
186 enum prediction taken;
187 {
188 int probability = predictor_info[(int) predictor].hitrate;
189
190 if (taken != TAKEN)
191 probability = REG_BR_PROB_BASE - probability;
192
193 predict_edge (e, predictor, probability);
194 }
195
196 /* Invert all branch predictions or probability notes in the INSN. This needs
197 to be done each time we invert the condition used by the jump. */
198
199 void
200 invert_br_probabilities (insn)
201 rtx insn;
202 {
203 rtx note;
204
205 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
206 if (REG_NOTE_KIND (note) == REG_BR_PROB)
207 XEXP (note, 0) = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (note, 0)));
208 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
209 XEXP (XEXP (note, 0), 1)
210 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
211 }
212
213 /* Dump information about the branch prediction to the output file. */
214
215 static void
216 dump_prediction (predictor, probability, bb, used)
217 enum br_predictor predictor;
218 int probability;
219 basic_block bb;
220 int used;
221 {
222 edge e = bb->succ;
223
224 if (!rtl_dump_file)
225 return;
226
227 while (e && (e->flags & EDGE_FALLTHRU))
228 e = e->succ_next;
229
230 fprintf (rtl_dump_file, " %s heuristics%s: %.1f%%",
231 predictor_info[predictor].name,
232 used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
233
234 if (bb->count)
235 {
236 fprintf (rtl_dump_file, " exec ");
237 fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
238 if (e)
239 {
240 fprintf (rtl_dump_file, " hit ");
241 fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, e->count);
242 fprintf (rtl_dump_file, " (%.1f%%)", e->count * 100.0 / bb->count);
243 }
244 }
245
246 fprintf (rtl_dump_file, "\n");
247 }
248
249 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
250 note if not already present. Remove now useless REG_BR_PRED notes. */
251
252 static void
253 combine_predictions_for_insn (insn, bb)
254 rtx insn;
255 basic_block bb;
256 {
257 rtx prob_note = find_reg_note (insn, REG_BR_PROB, 0);
258 rtx *pnote = &REG_NOTES (insn);
259 rtx note;
260 int best_probability = PROB_EVEN;
261 int best_predictor = END_PREDICTORS;
262 int combined_probability = REG_BR_PROB_BASE / 2;
263 int d;
264 bool first_match = false;
265 bool found = false;
266
267 if (rtl_dump_file)
268 fprintf (rtl_dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
269 bb->index);
270
271 /* We implement "first match" heuristics and use probability guessed
272 by predictor with smallest index. In the future we will use better
273 probability combination techniques. */
274 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
275 if (REG_NOTE_KIND (note) == REG_BR_PRED)
276 {
277 int predictor = INTVAL (XEXP (XEXP (note, 0), 0));
278 int probability = INTVAL (XEXP (XEXP (note, 0), 1));
279
280 found = true;
281 if (best_predictor > predictor)
282 best_probability = probability, best_predictor = predictor;
283
284 d = (combined_probability * probability
285 + (REG_BR_PROB_BASE - combined_probability)
286 * (REG_BR_PROB_BASE - probability));
287
288 /* Use FP math to avoid overflows of 32bit integers. */
289 if (d == 0)
290 /* If one probability is 0% and one 100%, avoid division by zero. */
291 combined_probability = REG_BR_PROB_BASE / 2;
292 else
293 combined_probability = (((double) combined_probability) * probability
294 * REG_BR_PROB_BASE / d + 0.5);
295 }
296
297 /* Decide which heuristic to use. In case we didn't match anything,
298 use no_prediction heuristic, in case we did match, use either
299 first match or Dempster-Shaffer theory depending on the flags. */
300
301 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
302 first_match = true;
303
304 if (!found)
305 dump_prediction (PRED_NO_PREDICTION, combined_probability, bb, true);
306 else
307 {
308 dump_prediction (PRED_DS_THEORY, combined_probability, bb, !first_match);
309 dump_prediction (PRED_FIRST_MATCH, best_probability, bb, first_match);
310 }
311
312 if (first_match)
313 combined_probability = best_probability;
314 dump_prediction (PRED_COMBINED, combined_probability, bb, true);
315
316 while (*pnote)
317 {
318 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
319 {
320 int predictor = INTVAL (XEXP (XEXP (*pnote, 0), 0));
321 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
322
323 dump_prediction (predictor, probability, bb,
324 !first_match || best_predictor == predictor);
325 *pnote = XEXP (*pnote, 1);
326 }
327 else
328 pnote = &XEXP (*pnote, 1);
329 }
330
331 if (!prob_note)
332 {
333 REG_NOTES (insn)
334 = gen_rtx_EXPR_LIST (REG_BR_PROB,
335 GEN_INT (combined_probability), REG_NOTES (insn));
336
337 /* Save the prediction into CFG in case we are seeing non-degenerated
338 conditional jump. */
339 if (bb->succ->succ_next)
340 {
341 BRANCH_EDGE (bb)->probability = combined_probability;
342 FALLTHRU_EDGE (bb)->probability
343 = REG_BR_PROB_BASE - combined_probability;
344 }
345 }
346 }
347
348 /* Statically estimate the probability that a branch will be taken.
349 ??? In the next revision there will be a number of other predictors added
350 from the above references. Further, each heuristic will be factored out
351 into its own function for clarity (and to facilitate the combination of
352 predictions). */
353
354 void
355 estimate_probability (loops_info)
356 struct loops *loops_info;
357 {
358 sbitmap *dominators, *post_dominators;
359 int i;
360
361 dominators = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
362 post_dominators = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
363 calculate_dominance_info (NULL, dominators, CDI_DOMINATORS);
364 calculate_dominance_info (NULL, post_dominators, CDI_POST_DOMINATORS);
365
366 /* Try to predict out blocks in a loop that are not part of a
367 natural loop. */
368 for (i = 0; i < loops_info->num; i++)
369 {
370 int j;
371 int exits;
372 struct loop *loop = &loops_info->array[i];
373
374 flow_loop_scan (loops_info, loop, LOOP_EXIT_EDGES);
375 exits = loop->num_exits;
376
377 for (j = loop->first->index; j <= loop->last->index; ++j)
378 if (TEST_BIT (loop->nodes, j))
379 {
380 int header_found = 0;
381 edge e;
382
383 /* Bypass loop heuristics on continue statement. These
384 statements construct loops via "non-loop" constructs
385 in the source language and are better to be handled
386 separately. */
387 if (predicted_by_p (BASIC_BLOCK (j), PRED_CONTINUE))
388 continue;
389
390 /* Loop branch heuristics - predict an edge back to a
391 loop's head as taken. */
392 for (e = BASIC_BLOCK(j)->succ; e; e = e->succ_next)
393 if (e->dest == loop->header
394 && e->src == loop->latch)
395 {
396 header_found = 1;
397 predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
398 }
399
400 /* Loop exit heuristics - predict an edge exiting the loop if the
401 conditinal has no loop header successors as not taken. */
402 if (!header_found)
403 for (e = BASIC_BLOCK(j)->succ; e; e = e->succ_next)
404 if (e->dest->index < 0
405 || !TEST_BIT (loop->nodes, e->dest->index))
406 predict_edge
407 (e, PRED_LOOP_EXIT,
408 (REG_BR_PROB_BASE
409 - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
410 / exits);
411 }
412 }
413
414 /* Attempt to predict conditional jumps using a number of heuristics. */
415 for (i = 0; i < n_basic_blocks; i++)
416 {
417 basic_block bb = BASIC_BLOCK (i);
418 rtx last_insn = bb->end;
419 rtx cond, earliest;
420 edge e;
421
422 if (GET_CODE (last_insn) != JUMP_INSN || ! any_condjump_p (last_insn))
423 continue;
424
425 for (e = bb->succ; e; e = e->succ_next)
426 {
427 /* Predict early returns to be probable, as we've already taken
428 care for error returns and other are often used for fast paths
429 trought function. */
430 if ((e->dest == EXIT_BLOCK_PTR
431 || (e->dest->succ && !e->dest->succ->succ_next
432 && e->dest->succ->dest == EXIT_BLOCK_PTR))
433 && !predicted_by_p (bb, PRED_NULL_RETURN)
434 && !predicted_by_p (bb, PRED_CONST_RETURN)
435 && !predicted_by_p (bb, PRED_NEGATIVE_RETURN)
436 && !last_basic_block_p (e->dest))
437 predict_edge_def (e, PRED_EARLY_RETURN, TAKEN);
438
439 /* Look for block we are guarding (ie we dominate it,
440 but it doesn't postdominate us). */
441 if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
442 && TEST_BIT (dominators[e->dest->index], e->src->index)
443 && !TEST_BIT (post_dominators[e->src->index], e->dest->index))
444 {
445 rtx insn;
446
447 /* The call heuristic claims that a guarded function call
448 is improbable. This is because such calls are often used
449 to signal exceptional situations such as printing error
450 messages. */
451 for (insn = e->dest->head; insn != NEXT_INSN (e->dest->end);
452 insn = NEXT_INSN (insn))
453 if (GET_CODE (insn) == CALL_INSN
454 /* Constant and pure calls are hardly used to signalize
455 something exceptional. */
456 && ! CONST_OR_PURE_CALL_P (insn))
457 {
458 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
459 break;
460 }
461 }
462 }
463
464 cond = get_condition (last_insn, &earliest);
465 if (! cond)
466 continue;
467
468 /* Try "pointer heuristic."
469 A comparison ptr == 0 is predicted as false.
470 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
471 if (GET_RTX_CLASS (GET_CODE (cond)) == '<'
472 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
473 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
474 {
475 if (GET_CODE (cond) == EQ)
476 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
477 else if (GET_CODE (cond) == NE)
478 predict_insn_def (last_insn, PRED_POINTER, TAKEN);
479 }
480 else
481
482 /* Try "opcode heuristic."
483 EQ tests are usually false and NE tests are usually true. Also,
484 most quantities are positive, so we can make the appropriate guesses
485 about signed comparisons against zero. */
486 switch (GET_CODE (cond))
487 {
488 case CONST_INT:
489 /* Unconditional branch. */
490 predict_insn_def (last_insn, PRED_UNCONDITIONAL,
491 cond == const0_rtx ? NOT_TAKEN : TAKEN);
492 break;
493
494 case EQ:
495 case UNEQ:
496 /* Floating point comparisons appears to behave in a very
497 inpredictable way because of special role of = tests in
498 FP code. */
499 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
500 ;
501 /* Comparisons with 0 are often used for booleans and there is
502 nothing usefull to predict about them. */
503 else if (XEXP (cond, 1) == const0_rtx
504 || XEXP (cond, 0) == const0_rtx)
505 ;
506 else
507 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
508 break;
509
510 case NE:
511 case LTGT:
512 /* Floating point comparisons appears to behave in a very
513 inpredictable way because of special role of = tests in
514 FP code. */
515 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
516 ;
517 /* Comparisons with 0 are often used for booleans and there is
518 nothing usefull to predict about them. */
519 else if (XEXP (cond, 1) == const0_rtx
520 || XEXP (cond, 0) == const0_rtx)
521 ;
522 else
523 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
524 break;
525
526 case ORDERED:
527 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
528 break;
529
530 case UNORDERED:
531 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
532 break;
533
534 case LE:
535 case LT:
536 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
537 || XEXP (cond, 1) == constm1_rtx)
538 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
539 break;
540
541 case GE:
542 case GT:
543 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
544 || XEXP (cond, 1) == constm1_rtx)
545 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
546 break;
547
548 default:
549 break;
550 }
551 }
552
553 /* Attach the combined probability to each conditional jump. */
554 for (i = 0; i < n_basic_blocks; i++)
555 if (GET_CODE (BLOCK_END (i)) == JUMP_INSN
556 && any_condjump_p (BLOCK_END (i))
557 && BASIC_BLOCK (i)->succ->succ_next != NULL)
558 combine_predictions_for_insn (BLOCK_END (i), BASIC_BLOCK (i));
559
560 sbitmap_vector_free (post_dominators);
561 sbitmap_vector_free (dominators);
562
563 estimate_bb_frequencies (loops_info);
564 }
565 \f
566 /* __builtin_expect dropped tokens into the insn stream describing expected
567 values of registers. Generate branch probabilities based off these
568 values. */
569
570 void
571 expected_value_to_br_prob ()
572 {
573 rtx insn, cond, ev = NULL_RTX, ev_reg = NULL_RTX;
574
575 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
576 {
577 switch (GET_CODE (insn))
578 {
579 case NOTE:
580 /* Look for expected value notes. */
581 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_EXPECTED_VALUE)
582 {
583 ev = NOTE_EXPECTED_VALUE (insn);
584 ev_reg = XEXP (ev, 0);
585 delete_insn (insn);
586 }
587 continue;
588
589 case CODE_LABEL:
590 /* Never propagate across labels. */
591 ev = NULL_RTX;
592 continue;
593
594 case JUMP_INSN:
595 /* Look for simple conditional branches. If we haven't got an
596 expected value yet, no point going further. */
597 if (GET_CODE (insn) != JUMP_INSN || ev == NULL_RTX
598 || ! any_condjump_p (insn))
599 continue;
600 break;
601
602 default:
603 /* Look for insns that clobber the EV register. */
604 if (ev && reg_set_p (ev_reg, insn))
605 ev = NULL_RTX;
606 continue;
607 }
608
609 /* Collect the branch condition, hopefully relative to EV_REG. */
610 /* ??? At present we'll miss things like
611 (expected_value (eq r70 0))
612 (set r71 -1)
613 (set r80 (lt r70 r71))
614 (set pc (if_then_else (ne r80 0) ...))
615 as canonicalize_condition will render this to us as
616 (lt r70, r71)
617 Could use cselib to try and reduce this further. */
618 cond = XEXP (SET_SRC (pc_set (insn)), 0);
619 cond = canonicalize_condition (insn, cond, 0, NULL, ev_reg);
620 if (! cond || XEXP (cond, 0) != ev_reg
621 || GET_CODE (XEXP (cond, 1)) != CONST_INT)
622 continue;
623
624 /* Substitute and simplify. Given that the expression we're
625 building involves two constants, we should wind up with either
626 true or false. */
627 cond = gen_rtx_fmt_ee (GET_CODE (cond), VOIDmode,
628 XEXP (ev, 1), XEXP (cond, 1));
629 cond = simplify_rtx (cond);
630
631 /* Turn the condition into a scaled branch probability. */
632 if (cond != const_true_rtx && cond != const0_rtx)
633 abort ();
634 predict_insn_def (insn, PRED_BUILTIN_EXPECT,
635 cond == const_true_rtx ? TAKEN : NOT_TAKEN);
636 }
637 }
638 \f
639 /* Check whether this is the last basic block of function. Commonly tehre
640 is one extra common cleanup block. */
641 static bool
642 last_basic_block_p (bb)
643 basic_block bb;
644 {
645 return (bb->index == n_basic_blocks - 1
646 || (bb->index == n_basic_blocks - 2
647 && bb->succ && !bb->succ->succ_next
648 && bb->succ->dest->index == n_basic_blocks - 1));
649 }
650
651 /* Sets branch probabilities according to PREDiction and FLAGS. HEADS[bb->index]
652 should be index of basic block in that we need to alter branch predictions
653 (i.e. the first of our dominators such that we do not post-dominate it)
654 (but we fill this information on demand, so -1 may be there in case this
655 was not needed yet). */
656
657 static void
658 process_note_prediction (bb, heads, dominators, post_dominators, pred, flags)
659 basic_block bb;
660 int *heads;
661 int *dominators;
662 sbitmap *post_dominators;
663 int pred;
664 int flags;
665 {
666 edge e;
667 int y;
668 bool taken;
669
670 taken = flags & IS_TAKEN;
671
672 if (heads[bb->index] < 0)
673 {
674 /* This is first time we need this field in heads array; so
675 find first dominator that we do not post-dominate (we are
676 using already known members of heads array). */
677 int ai = bb->index;
678 int next_ai = dominators[bb->index];
679 int head;
680
681 while (heads[next_ai] < 0)
682 {
683 if (!TEST_BIT (post_dominators[next_ai], bb->index))
684 break;
685 heads[next_ai] = ai;
686 ai = next_ai;
687 next_ai = dominators[next_ai];
688 }
689 if (!TEST_BIT (post_dominators[next_ai], bb->index))
690 head = next_ai;
691 else
692 head = heads[next_ai];
693 while (next_ai != bb->index)
694 {
695 next_ai = ai;
696 ai = heads[ai];
697 heads[next_ai] = head;
698 }
699 }
700 y = heads[bb->index];
701
702 /* Now find the edge that leads to our branch and aply the prediction. */
703
704 if (y == n_basic_blocks)
705 return;
706 for (e = BASIC_BLOCK (y)->succ; e; e = e->succ_next)
707 if (e->dest->index >= 0
708 && TEST_BIT (post_dominators[e->dest->index], bb->index))
709 predict_edge_def (e, pred, taken);
710 }
711
712 /* Gathers NOTE_INSN_PREDICTIONs in given basic block and turns them
713 into branch probabilities. For description of heads array, see
714 process_note_prediction. */
715
716 static void
717 process_note_predictions (bb, heads, dominators, post_dominators)
718 basic_block bb;
719 int *heads;
720 int *dominators;
721 sbitmap *post_dominators;
722 {
723 rtx insn;
724 edge e;
725
726 /* Additionaly, we check here for blocks with no successors. */
727 int contained_noreturn_call = 0;
728 int was_bb_head = 0;
729 int noreturn_block = 1;
730
731 for (insn = bb->end; insn;
732 was_bb_head |= (insn == bb->head), insn = PREV_INSN (insn))
733 {
734 if (GET_CODE (insn) != NOTE)
735 {
736 if (was_bb_head)
737 break;
738 else
739 {
740 /* Noreturn calls cause program to exit, therefore they are
741 always predicted as not taken. */
742 if (GET_CODE (insn) == CALL_INSN
743 && find_reg_note (insn, REG_NORETURN, NULL))
744 contained_noreturn_call = 1;
745 continue;
746 }
747 }
748 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_PREDICTION)
749 {
750 int alg = (int) NOTE_PREDICTION_ALG (insn);
751 /* Process single prediction note. */
752 process_note_prediction (bb,
753 heads,
754 dominators,
755 post_dominators,
756 alg, (int) NOTE_PREDICTION_FLAGS (insn));
757 delete_insn (insn);
758 }
759 }
760 for (e = bb->succ; e; e = e->succ_next)
761 if (!(e->flags & EDGE_FAKE))
762 noreturn_block = 0;
763 if (contained_noreturn_call)
764 {
765 /* This block ended from other reasons than because of return.
766 If it is because of noreturn call, this should certainly not
767 be taken. Otherwise it is probably some error recovery. */
768 process_note_prediction (bb,
769 heads,
770 dominators,
771 post_dominators, PRED_NORETURN, NOT_TAKEN);
772 }
773 }
774
775 /* Gathers NOTE_INSN_PREDICTIONs and turns them into
776 branch probabilities. */
777
778 void
779 note_prediction_to_br_prob ()
780 {
781 int i;
782 sbitmap *post_dominators;
783 int *dominators, *heads;
784
785 /* To enable handling of noreturn blocks. */
786 add_noreturn_fake_exit_edges ();
787 connect_infinite_loops_to_exit ();
788
789 dominators = xmalloc (sizeof (int) * n_basic_blocks);
790 memset (dominators, -1, sizeof (int) * n_basic_blocks);
791 post_dominators = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
792 calculate_dominance_info (NULL, post_dominators, CDI_POST_DOMINATORS);
793 calculate_dominance_info (dominators, NULL, CDI_DOMINATORS);
794
795 heads = xmalloc (sizeof (int) * n_basic_blocks);
796 memset (heads, -1, sizeof (int) * n_basic_blocks);
797 heads[0] = n_basic_blocks;
798
799 /* Process all prediction notes. */
800
801 for (i = 0; i < n_basic_blocks; ++i)
802 {
803 basic_block bb = BASIC_BLOCK (i);
804 process_note_predictions (bb, heads, dominators, post_dominators);
805 }
806
807 sbitmap_vector_free (post_dominators);
808 free (dominators);
809 free (heads);
810
811 remove_fake_edges ();
812 }
813 \f
814 /* This is used to carry information about basic blocks. It is
815 attached to the AUX field of the standard CFG block. */
816
817 typedef struct block_info_def
818 {
819 /* Estimated frequency of execution of basic_block. */
820 REAL_VALUE_TYPE frequency;
821
822 /* To keep queue of basic blocks to process. */
823 basic_block next;
824
825 /* True if block needs to be visited in prop_freqency. */
826 int tovisit:1;
827
828 /* Number of predecessors we need to visit first. */
829 int npredecessors;
830 } *block_info;
831
832 /* Similar information for edges. */
833 typedef struct edge_info_def
834 {
835 /* In case edge is an loopback edge, the probability edge will be reached
836 in case header is. Estimated number of iterations of the loop can be
837 then computed as 1 / (1 - back_edge_prob). */
838 REAL_VALUE_TYPE back_edge_prob;
839 /* True if the edge is an loopback edge in the natural loop. */
840 int back_edge:1;
841 } *edge_info;
842
843 #define BLOCK_INFO(B) ((block_info) (B)->aux)
844 #define EDGE_INFO(E) ((edge_info) (E)->aux)
845
846 /* Helper function for estimate_bb_frequencies.
847 Propagate the frequencies for loops headed by HEAD. */
848
849 static void
850 propagate_freq (head)
851 basic_block head;
852 {
853 basic_block bb = head;
854 basic_block last = bb;
855 edge e;
856 basic_block nextbb;
857 int n;
858
859 /* For each basic block we need to visit count number of his predecessors
860 we need to visit first. */
861 for (n = 0; n < n_basic_blocks; n++)
862 {
863 basic_block bb = BASIC_BLOCK (n);
864 if (BLOCK_INFO (bb)->tovisit)
865 {
866 int count = 0;
867
868 for (e = bb->pred; e; e = e->pred_next)
869 if (BLOCK_INFO (e->src)->tovisit && !(e->flags & EDGE_DFS_BACK))
870 count++;
871 else if (BLOCK_INFO (e->src)->tovisit
872 && rtl_dump_file && !EDGE_INFO (e)->back_edge)
873 fprintf (rtl_dump_file,
874 "Irreducible region hit, ignoring edge to %i->%i\n",
875 e->src->index, bb->index);
876 BLOCK_INFO (bb)->npredecessors = count;
877 }
878 }
879
880 memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
881 for (; bb; bb = nextbb)
882 {
883 REAL_VALUE_TYPE cyclic_probability, frequency;
884
885 memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
886 memcpy (&frequency, &real_zero, sizeof (real_zero));
887
888 nextbb = BLOCK_INFO (bb)->next;
889 BLOCK_INFO (bb)->next = NULL;
890
891 /* Compute frequency of basic block. */
892 if (bb != head)
893 {
894 #ifdef ENABLE_CHECKING
895 for (e = bb->pred; e; e = e->pred_next)
896 if (BLOCK_INFO (e->src)->tovisit && !(e->flags & EDGE_DFS_BACK))
897 abort ();
898 #endif
899
900 for (e = bb->pred; e; e = e->pred_next)
901 if (EDGE_INFO (e)->back_edge)
902 {
903 REAL_ARITHMETIC (cyclic_probability, PLUS_EXPR,
904 cyclic_probability,
905 EDGE_INFO (e)->back_edge_prob);
906 }
907 else if (!(e->flags & EDGE_DFS_BACK))
908 {
909 REAL_VALUE_TYPE tmp;
910
911 /* frequency += (e->probability
912 * BLOCK_INFO (e->src)->frequency /
913 REG_BR_PROB_BASE); */
914
915 REAL_VALUE_FROM_INT (tmp, e->probability, 0,
916 TYPE_MODE (double_type_node));
917 REAL_ARITHMETIC (tmp, MULT_EXPR, tmp,
918 BLOCK_INFO (e->src)->frequency);
919 REAL_ARITHMETIC (tmp, RDIV_EXPR, tmp, real_br_prob_base);
920 REAL_ARITHMETIC (frequency, PLUS_EXPR, frequency, tmp);
921 }
922
923 if (REAL_VALUES_LESS (real_almost_one, cyclic_probability))
924 memcpy (&cyclic_probability, &real_almost_one, sizeof (real_zero));
925
926 /* BLOCK_INFO (bb)->frequency = frequency / (1 - cyclic_probability)
927 */
928
929 REAL_ARITHMETIC (cyclic_probability, MINUS_EXPR, real_one,
930 cyclic_probability);
931 REAL_ARITHMETIC (BLOCK_INFO (bb)->frequency,
932 RDIV_EXPR, frequency, cyclic_probability);
933 }
934
935 BLOCK_INFO (bb)->tovisit = 0;
936
937 /* Compute back edge frequencies. */
938 for (e = bb->succ; e; e = e->succ_next)
939 if (e->dest == head)
940 {
941 REAL_VALUE_TYPE tmp;
942
943 /* EDGE_INFO (e)->back_edge_prob
944 = ((e->probability * BLOCK_INFO (bb)->frequency)
945 / REG_BR_PROB_BASE); */
946 REAL_VALUE_FROM_INT (tmp, e->probability, 0,
947 TYPE_MODE (double_type_node));
948 REAL_ARITHMETIC (tmp, MULT_EXPR, tmp,
949 BLOCK_INFO (bb)->frequency);
950 REAL_ARITHMETIC (EDGE_INFO (e)->back_edge_prob,
951 RDIV_EXPR, tmp, real_br_prob_base);
952
953 }
954
955 /* Propagate to successor blocks. */
956 for (e = bb->succ; e; e = e->succ_next)
957 if (!(e->flags & EDGE_DFS_BACK)
958 && BLOCK_INFO (e->dest)->npredecessors)
959 {
960 BLOCK_INFO (e->dest)->npredecessors--;
961 if (!BLOCK_INFO (e->dest)->npredecessors)
962 {
963 if (!nextbb)
964 nextbb = e->dest;
965 else
966 BLOCK_INFO (last)->next = e->dest;
967
968 last = e->dest;
969 }
970 }
971 }
972 }
973
974 /* Estimate probabilities of loopback edges in loops at same nest level. */
975
976 static void
977 estimate_loops_at_level (first_loop)
978 struct loop *first_loop;
979 {
980 struct loop *l, *loop = first_loop;
981
982 for (loop = first_loop; loop; loop = loop->next)
983 {
984 int n;
985 edge e;
986
987 estimate_loops_at_level (loop->inner);
988
989 /* Find current loop back edge and mark it. */
990 for (e = loop->latch->succ; e->dest != loop->header; e = e->succ_next)
991 ;
992
993 EDGE_INFO (e)->back_edge = 1;
994
995 /* In case the loop header is shared, ensure that it is the last
996 one sharing the same header, so we avoid redundant work. */
997 if (loop->shared)
998 {
999 for (l = loop->next; l; l = l->next)
1000 if (l->header == loop->header)
1001 break;
1002
1003 if (l)
1004 continue;
1005 }
1006
1007 /* Now merge all nodes of all loops with given header as not visited. */
1008 for (l = loop->shared ? first_loop : loop; l != loop->next; l = l->next)
1009 if (loop->header == l->header)
1010 EXECUTE_IF_SET_IN_SBITMAP (l->nodes, 0, n,
1011 BLOCK_INFO (BASIC_BLOCK (n))->tovisit = 1
1012 );
1013
1014 propagate_freq (loop->header);
1015 }
1016 }
1017
1018 /* Convert counts measured by profile driven feedback to frequencies. */
1019
1020 static void
1021 counts_to_freqs ()
1022 {
1023 HOST_WIDEST_INT count_max = 1;
1024 int i;
1025
1026 for (i = 0; i < n_basic_blocks; i++)
1027 count_max = MAX (BASIC_BLOCK (i)->count, count_max);
1028
1029 for (i = -2; i < n_basic_blocks; i++)
1030 {
1031 basic_block bb;
1032
1033 if (i == -2)
1034 bb = ENTRY_BLOCK_PTR;
1035 else if (i == -1)
1036 bb = EXIT_BLOCK_PTR;
1037 else
1038 bb = BASIC_BLOCK (i);
1039
1040 bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
1041 }
1042 }
1043
1044 /* Return true if function is likely to be expensive, so there is no point to
1045 optimize performance of prologue, epilogue or do inlining at the expense
1046 of code size growth. THRESHOLD is the limit of number of isntructions
1047 function can execute at average to be still considered not expensive. */
1048
1049 bool
1050 expensive_function_p (threshold)
1051 int threshold;
1052 {
1053 unsigned int sum = 0;
1054 int i;
1055 unsigned int limit;
1056
1057 /* We can not compute accurately for large thresholds due to scaled
1058 frequencies. */
1059 if (threshold > BB_FREQ_MAX)
1060 abort ();
1061
1062 /* Frequencies are out of range. This either means that function contains
1063 internal loop executing more than BB_FREQ_MAX times or profile feedback
1064 is available and function has not been executed at all. */
1065 if (ENTRY_BLOCK_PTR->frequency == 0)
1066 return true;
1067
1068 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
1069 limit = ENTRY_BLOCK_PTR->frequency * threshold;
1070 for (i = 0; i < n_basic_blocks; i++)
1071 {
1072 basic_block bb = BASIC_BLOCK (i);
1073 rtx insn;
1074
1075 for (insn = bb->head; insn != NEXT_INSN (bb->end);
1076 insn = NEXT_INSN (insn))
1077 if (active_insn_p (insn))
1078 {
1079 sum += bb->frequency;
1080 if (sum > limit)
1081 return true;
1082 }
1083 }
1084
1085 return false;
1086 }
1087
1088 /* Estimate basic blocks frequency by given branch probabilities. */
1089
1090 static void
1091 estimate_bb_frequencies (loops)
1092 struct loops *loops;
1093 {
1094 int i;
1095 REAL_VALUE_TYPE freq_max;
1096 enum machine_mode double_mode = TYPE_MODE (double_type_node);
1097
1098 REAL_VALUE_FROM_INT (real_zero, 0, 0, double_mode);
1099 REAL_VALUE_FROM_INT (real_one, 1, 0, double_mode);
1100 REAL_VALUE_FROM_INT (real_br_prob_base, REG_BR_PROB_BASE, 0, double_mode);
1101 REAL_VALUE_FROM_INT (real_bb_freq_max, BB_FREQ_MAX, 0, double_mode);
1102 REAL_VALUE_FROM_INT (real_one_half, 2, 0, double_mode);
1103
1104 REAL_ARITHMETIC (real_one_half, RDIV_EXPR, real_one, real_one_half);
1105
1106 REAL_ARITHMETIC (real_almost_one, RDIV_EXPR, real_one, real_br_prob_base);
1107 REAL_ARITHMETIC (real_almost_one, MINUS_EXPR, real_one, real_almost_one);
1108
1109 mark_dfs_back_edges ();
1110 if (flag_branch_probabilities)
1111 {
1112 counts_to_freqs ();
1113 return;
1114 }
1115
1116 /* Fill in the probability values in flowgraph based on the REG_BR_PROB
1117 notes. */
1118 for (i = 0; i < n_basic_blocks; i++)
1119 {
1120 rtx last_insn = BLOCK_END (i);
1121
1122 if (GET_CODE (last_insn) != JUMP_INSN || !any_condjump_p (last_insn)
1123 /* Avoid handling of conditional jumps jumping to fallthru edge. */
1124 || BASIC_BLOCK (i)->succ->succ_next == NULL)
1125 {
1126 /* We can predict only conditional jumps at the moment.
1127 Expect each edge to be equally probable.
1128 ?? In the future we want to make abnormal edges improbable. */
1129 int nedges = 0;
1130 edge e;
1131
1132 for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
1133 {
1134 nedges++;
1135 if (e->probability != 0)
1136 break;
1137 }
1138 if (!e)
1139 for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
1140 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
1141 }
1142 }
1143
1144 ENTRY_BLOCK_PTR->succ->probability = REG_BR_PROB_BASE;
1145
1146 /* Set up block info for each basic block. */
1147 alloc_aux_for_blocks (sizeof (struct block_info_def));
1148 alloc_aux_for_edges (sizeof (struct edge_info_def));
1149 for (i = -2; i < n_basic_blocks; i++)
1150 {
1151 edge e;
1152 basic_block bb;
1153
1154 if (i == -2)
1155 bb = ENTRY_BLOCK_PTR;
1156 else if (i == -1)
1157 bb = EXIT_BLOCK_PTR;
1158 else
1159 bb = BASIC_BLOCK (i);
1160
1161 BLOCK_INFO (bb)->tovisit = 0;
1162 for (e = bb->succ; e; e = e->succ_next)
1163 {
1164
1165 REAL_VALUE_FROM_INT (EDGE_INFO (e)->back_edge_prob,
1166 e->probability, 0, double_mode);
1167 REAL_ARITHMETIC (EDGE_INFO (e)->back_edge_prob,
1168 RDIV_EXPR, EDGE_INFO (e)->back_edge_prob,
1169 real_br_prob_base);
1170 }
1171 }
1172
1173 /* First compute probabilities locally for each loop from innermost
1174 to outermost to examine probabilities for back edges. */
1175 estimate_loops_at_level (loops->tree_root);
1176
1177 /* Now fake loop around whole function to finalize probabilities. */
1178 for (i = 0; i < n_basic_blocks; i++)
1179 BLOCK_INFO (BASIC_BLOCK (i))->tovisit = 1;
1180
1181 BLOCK_INFO (ENTRY_BLOCK_PTR)->tovisit = 1;
1182 BLOCK_INFO (EXIT_BLOCK_PTR)->tovisit = 1;
1183 propagate_freq (ENTRY_BLOCK_PTR);
1184
1185 memcpy (&freq_max, &real_zero, sizeof (real_zero));
1186 for (i = 0; i < n_basic_blocks; i++)
1187 if (REAL_VALUES_LESS (freq_max, BLOCK_INFO (BASIC_BLOCK (i))->frequency))
1188 memcpy (&freq_max, &BLOCK_INFO (BASIC_BLOCK (i))->frequency,
1189 sizeof (freq_max));
1190
1191 for (i = -2; i < n_basic_blocks; i++)
1192 {
1193 basic_block bb;
1194 REAL_VALUE_TYPE tmp;
1195
1196 if (i == -2)
1197 bb = ENTRY_BLOCK_PTR;
1198 else if (i == -1)
1199 bb = EXIT_BLOCK_PTR;
1200 else
1201 bb = BASIC_BLOCK (i);
1202
1203 REAL_ARITHMETIC (tmp, MULT_EXPR, BLOCK_INFO (bb)->frequency,
1204 real_bb_freq_max);
1205 REAL_ARITHMETIC (tmp, RDIV_EXPR, tmp, freq_max);
1206 REAL_ARITHMETIC (tmp, PLUS_EXPR, tmp, real_one_half);
1207 bb->frequency = REAL_VALUE_UNSIGNED_FIX (tmp);
1208 }
1209
1210 free_aux_for_blocks ();
1211 free_aux_for_edges ();
1212 }