Makefile.in (LIBGCC_DEPS): Add gcov headers.
[gcc.git] / gcc / predict.c
1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
20
21 /* References:
22
23 [1] "Branch Prediction for Free"
24 Ball and Larus; PLDI '93.
25 [2] "Static Branch Frequency and Program Profile Analysis"
26 Wu and Larus; MICRO-27.
27 [3] "Corpus-based Static Branch Prediction"
28 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
29
30
31 #include "config.h"
32 #include "system.h"
33 #include "coretypes.h"
34 #include "tm.h"
35 #include "tree.h"
36 #include "rtl.h"
37 #include "tm_p.h"
38 #include "hard-reg-set.h"
39 #include "basic-block.h"
40 #include "insn-config.h"
41 #include "regs.h"
42 #include "flags.h"
43 #include "output.h"
44 #include "function.h"
45 #include "except.h"
46 #include "toplev.h"
47 #include "recog.h"
48 #include "expr.h"
49 #include "predict.h"
50 #include "coverage.h"
51 #include "sreal.h"
52 #include "params.h"
53 #include "target.h"
54 #include "loop.h"
55 #include "cfgloop.h"
56
57 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
58 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
59 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
60 real_inv_br_prob_base, real_one_half, real_bb_freq_max;
61
62 /* Random guesstimation given names. */
63 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 10 - 1)
64 #define PROB_EVEN (REG_BR_PROB_BASE / 2)
65 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
66 #define PROB_ALWAYS (REG_BR_PROB_BASE)
67
68 static bool predicted_by_p PARAMS ((basic_block,
69 enum br_predictor));
70 static void combine_predictions_for_insn PARAMS ((rtx, basic_block));
71 static void dump_prediction PARAMS ((enum br_predictor, int,
72 basic_block, int));
73 static void estimate_loops_at_level PARAMS ((struct loop *loop));
74 static void propagate_freq PARAMS ((struct loop *));
75 static void estimate_bb_frequencies PARAMS ((struct loops *));
76 static void counts_to_freqs PARAMS ((void));
77 static void process_note_predictions PARAMS ((basic_block, int *,
78 dominance_info,
79 dominance_info));
80 static void process_note_prediction PARAMS ((basic_block, int *,
81 dominance_info,
82 dominance_info, int, int));
83 static bool last_basic_block_p PARAMS ((basic_block));
84 static void compute_function_frequency PARAMS ((void));
85 static void choose_function_section PARAMS ((void));
86 static bool can_predict_insn_p PARAMS ((rtx));
87
88 /* Information we hold about each branch predictor.
89 Filled using information from predict.def. */
90
91 struct predictor_info
92 {
93 const char *const name; /* Name used in the debugging dumps. */
94 const int hitrate; /* Expected hitrate used by
95 predict_insn_def call. */
96 const int flags;
97 };
98
99 /* Use given predictor without Dempster-Shaffer theory if it matches
100 using first_match heuristics. */
101 #define PRED_FLAG_FIRST_MATCH 1
102
103 /* Recompute hitrate in percent to our representation. */
104
105 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
106
107 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
108 static const struct predictor_info predictor_info[]= {
109 #include "predict.def"
110
111 /* Upper bound on predictors. */
112 {NULL, 0, 0}
113 };
114 #undef DEF_PREDICTOR
115
116 /* Return true in case BB can be CPU intensive and should be optimized
117 for maximal performance. */
118
119 bool
120 maybe_hot_bb_p (bb)
121 basic_block bb;
122 {
123 if (profile_info && flag_branch_probabilities
124 && (bb->count
125 < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
126 return false;
127 if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
128 return false;
129 return true;
130 }
131
132 /* Return true in case BB is cold and should be optimized for size. */
133
134 bool
135 probably_cold_bb_p (bb)
136 basic_block bb;
137 {
138 if (profile_info && flag_branch_probabilities
139 && (bb->count
140 < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
141 return true;
142 if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
143 return true;
144 return false;
145 }
146
147 /* Return true in case BB is probably never executed. */
148 bool
149 probably_never_executed_bb_p (bb)
150 basic_block bb;
151 {
152 if (profile_info && flag_branch_probabilities)
153 return ((bb->count + profile_info->runs / 2) / profile_info->runs) == 0;
154 return false;
155 }
156
157 /* Return true if the one of outgoing edges is already predicted by
158 PREDICTOR. */
159
160 static bool
161 predicted_by_p (bb, predictor)
162 basic_block bb;
163 enum br_predictor predictor;
164 {
165 rtx note;
166 if (!INSN_P (bb->end))
167 return false;
168 for (note = REG_NOTES (bb->end); note; note = XEXP (note, 1))
169 if (REG_NOTE_KIND (note) == REG_BR_PRED
170 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
171 return true;
172 return false;
173 }
174
175 void
176 predict_insn (insn, predictor, probability)
177 rtx insn;
178 int probability;
179 enum br_predictor predictor;
180 {
181 if (!any_condjump_p (insn))
182 abort ();
183 if (!flag_guess_branch_prob)
184 return;
185
186 REG_NOTES (insn)
187 = gen_rtx_EXPR_LIST (REG_BR_PRED,
188 gen_rtx_CONCAT (VOIDmode,
189 GEN_INT ((int) predictor),
190 GEN_INT ((int) probability)),
191 REG_NOTES (insn));
192 }
193
194 /* Predict insn by given predictor. */
195
196 void
197 predict_insn_def (insn, predictor, taken)
198 rtx insn;
199 enum br_predictor predictor;
200 enum prediction taken;
201 {
202 int probability = predictor_info[(int) predictor].hitrate;
203
204 if (taken != TAKEN)
205 probability = REG_BR_PROB_BASE - probability;
206
207 predict_insn (insn, predictor, probability);
208 }
209
210 /* Predict edge E with given probability if possible. */
211
212 void
213 predict_edge (e, predictor, probability)
214 edge e;
215 int probability;
216 enum br_predictor predictor;
217 {
218 rtx last_insn;
219 last_insn = e->src->end;
220
221 /* We can store the branch prediction information only about
222 conditional jumps. */
223 if (!any_condjump_p (last_insn))
224 return;
225
226 /* We always store probability of branching. */
227 if (e->flags & EDGE_FALLTHRU)
228 probability = REG_BR_PROB_BASE - probability;
229
230 predict_insn (last_insn, predictor, probability);
231 }
232
233 /* Return true when we can store prediction on insn INSN.
234 At the moment we represent predictions only on conditional
235 jumps, not at computed jump or other complicated cases. */
236 static bool
237 can_predict_insn_p (insn)
238 rtx insn;
239 {
240 return (GET_CODE (insn) == JUMP_INSN
241 && any_condjump_p (insn)
242 && BLOCK_FOR_INSN (insn)->succ->succ_next);
243 }
244
245 /* Predict edge E by given predictor if possible. */
246
247 void
248 predict_edge_def (e, predictor, taken)
249 edge e;
250 enum br_predictor predictor;
251 enum prediction taken;
252 {
253 int probability = predictor_info[(int) predictor].hitrate;
254
255 if (taken != TAKEN)
256 probability = REG_BR_PROB_BASE - probability;
257
258 predict_edge (e, predictor, probability);
259 }
260
261 /* Invert all branch predictions or probability notes in the INSN. This needs
262 to be done each time we invert the condition used by the jump. */
263
264 void
265 invert_br_probabilities (insn)
266 rtx insn;
267 {
268 rtx note;
269
270 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
271 if (REG_NOTE_KIND (note) == REG_BR_PROB)
272 XEXP (note, 0) = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (note, 0)));
273 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
274 XEXP (XEXP (note, 0), 1)
275 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
276 }
277
278 /* Dump information about the branch prediction to the output file. */
279
280 static void
281 dump_prediction (predictor, probability, bb, used)
282 enum br_predictor predictor;
283 int probability;
284 basic_block bb;
285 int used;
286 {
287 edge e = bb->succ;
288
289 if (!rtl_dump_file)
290 return;
291
292 while (e && (e->flags & EDGE_FALLTHRU))
293 e = e->succ_next;
294
295 fprintf (rtl_dump_file, " %s heuristics%s: %.1f%%",
296 predictor_info[predictor].name,
297 used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
298
299 if (bb->count)
300 {
301 fprintf (rtl_dump_file, " exec ");
302 fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
303 if (e)
304 {
305 fprintf (rtl_dump_file, " hit ");
306 fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, e->count);
307 fprintf (rtl_dump_file, " (%.1f%%)", e->count * 100.0 / bb->count);
308 }
309 }
310
311 fprintf (rtl_dump_file, "\n");
312 }
313
314 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
315 note if not already present. Remove now useless REG_BR_PRED notes. */
316
317 static void
318 combine_predictions_for_insn (insn, bb)
319 rtx insn;
320 basic_block bb;
321 {
322 rtx prob_note = find_reg_note (insn, REG_BR_PROB, 0);
323 rtx *pnote = &REG_NOTES (insn);
324 rtx note;
325 int best_probability = PROB_EVEN;
326 int best_predictor = END_PREDICTORS;
327 int combined_probability = REG_BR_PROB_BASE / 2;
328 int d;
329 bool first_match = false;
330 bool found = false;
331
332 if (rtl_dump_file)
333 fprintf (rtl_dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
334 bb->index);
335
336 /* We implement "first match" heuristics and use probability guessed
337 by predictor with smallest index. In the future we will use better
338 probability combination techniques. */
339 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
340 if (REG_NOTE_KIND (note) == REG_BR_PRED)
341 {
342 int predictor = INTVAL (XEXP (XEXP (note, 0), 0));
343 int probability = INTVAL (XEXP (XEXP (note, 0), 1));
344
345 found = true;
346 if (best_predictor > predictor)
347 best_probability = probability, best_predictor = predictor;
348
349 d = (combined_probability * probability
350 + (REG_BR_PROB_BASE - combined_probability)
351 * (REG_BR_PROB_BASE - probability));
352
353 /* Use FP math to avoid overflows of 32bit integers. */
354 if (d == 0)
355 /* If one probability is 0% and one 100%, avoid division by zero. */
356 combined_probability = REG_BR_PROB_BASE / 2;
357 else
358 combined_probability = (((double) combined_probability) * probability
359 * REG_BR_PROB_BASE / d + 0.5);
360 }
361
362 /* Decide which heuristic to use. In case we didn't match anything,
363 use no_prediction heuristic, in case we did match, use either
364 first match or Dempster-Shaffer theory depending on the flags. */
365
366 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
367 first_match = true;
368
369 if (!found)
370 dump_prediction (PRED_NO_PREDICTION, combined_probability, bb, true);
371 else
372 {
373 dump_prediction (PRED_DS_THEORY, combined_probability, bb, !first_match);
374 dump_prediction (PRED_FIRST_MATCH, best_probability, bb, first_match);
375 }
376
377 if (first_match)
378 combined_probability = best_probability;
379 dump_prediction (PRED_COMBINED, combined_probability, bb, true);
380
381 while (*pnote)
382 {
383 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
384 {
385 int predictor = INTVAL (XEXP (XEXP (*pnote, 0), 0));
386 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
387
388 dump_prediction (predictor, probability, bb,
389 !first_match || best_predictor == predictor);
390 *pnote = XEXP (*pnote, 1);
391 }
392 else
393 pnote = &XEXP (*pnote, 1);
394 }
395
396 if (!prob_note)
397 {
398 REG_NOTES (insn)
399 = gen_rtx_EXPR_LIST (REG_BR_PROB,
400 GEN_INT (combined_probability), REG_NOTES (insn));
401
402 /* Save the prediction into CFG in case we are seeing non-degenerated
403 conditional jump. */
404 if (bb->succ->succ_next)
405 {
406 BRANCH_EDGE (bb)->probability = combined_probability;
407 FALLTHRU_EDGE (bb)->probability
408 = REG_BR_PROB_BASE - combined_probability;
409 }
410 }
411 }
412
413 /* Statically estimate the probability that a branch will be taken.
414 ??? In the next revision there will be a number of other predictors added
415 from the above references. Further, each heuristic will be factored out
416 into its own function for clarity (and to facilitate the combination of
417 predictions). */
418
419 void
420 estimate_probability (loops_info)
421 struct loops *loops_info;
422 {
423 dominance_info dominators, post_dominators;
424 basic_block bb;
425 unsigned i;
426
427 connect_infinite_loops_to_exit ();
428 dominators = calculate_dominance_info (CDI_DOMINATORS);
429 post_dominators = calculate_dominance_info (CDI_POST_DOMINATORS);
430
431 /* Try to predict out blocks in a loop that are not part of a
432 natural loop. */
433 for (i = 1; i < loops_info->num; i++)
434 {
435 basic_block bb, *bbs;
436 unsigned j;
437 int exits;
438 struct loop *loop = loops_info->parray[i];
439 struct loop_desc desc;
440 unsigned HOST_WIDE_INT niter;
441
442 flow_loop_scan (loops_info, loop, LOOP_EXIT_EDGES);
443 exits = loop->num_exits;
444
445 if (simple_loop_p (loops_info, loop, &desc)
446 && desc.const_iter)
447 {
448 int prob;
449 niter = desc.niter + 1;
450 if (niter == 0) /* We might overflow here. */
451 niter = desc.niter;
452
453 prob = (REG_BR_PROB_BASE
454 - (REG_BR_PROB_BASE + niter /2) / niter);
455 /* Branch prediction algorithm gives 0 frequency for everything
456 after the end of loop for loop having 0 probability to finish. */
457 if (prob == REG_BR_PROB_BASE)
458 prob = REG_BR_PROB_BASE - 1;
459 predict_edge (desc.in_edge, PRED_LOOP_ITERATIONS,
460 prob);
461 }
462
463 bbs = get_loop_body (loop);
464 for (j = 0; j < loop->num_nodes; j++)
465 {
466 int header_found = 0;
467 edge e;
468
469 bb = bbs[j];
470
471 /* Bypass loop heuristics on continue statement. These
472 statements construct loops via "non-loop" constructs
473 in the source language and are better to be handled
474 separately. */
475 if (!can_predict_insn_p (bb->end)
476 || predicted_by_p (bb, PRED_CONTINUE))
477 continue;
478
479 /* Loop branch heuristics - predict an edge back to a
480 loop's head as taken. */
481 for (e = bb->succ; e; e = e->succ_next)
482 if (e->dest == loop->header
483 && e->src == loop->latch)
484 {
485 header_found = 1;
486 predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
487 }
488
489 /* Loop exit heuristics - predict an edge exiting the loop if the
490 conditional has no loop header successors as not taken. */
491 if (!header_found)
492 for (e = bb->succ; e; e = e->succ_next)
493 if (e->dest->index < 0
494 || !flow_bb_inside_loop_p (loop, e->dest))
495 predict_edge
496 (e, PRED_LOOP_EXIT,
497 (REG_BR_PROB_BASE
498 - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
499 / exits);
500 }
501 }
502
503 /* Attempt to predict conditional jumps using a number of heuristics. */
504 FOR_EACH_BB (bb)
505 {
506 rtx last_insn = bb->end;
507 rtx cond, earliest;
508 edge e;
509
510 if (! can_predict_insn_p (last_insn))
511 continue;
512
513 for (e = bb->succ; e; e = e->succ_next)
514 {
515 /* Predict early returns to be probable, as we've already taken
516 care for error returns and other are often used for fast paths
517 trought function. */
518 if ((e->dest == EXIT_BLOCK_PTR
519 || (e->dest->succ && !e->dest->succ->succ_next
520 && e->dest->succ->dest == EXIT_BLOCK_PTR))
521 && !predicted_by_p (bb, PRED_NULL_RETURN)
522 && !predicted_by_p (bb, PRED_CONST_RETURN)
523 && !predicted_by_p (bb, PRED_NEGATIVE_RETURN)
524 && !last_basic_block_p (e->dest))
525 predict_edge_def (e, PRED_EARLY_RETURN, TAKEN);
526
527 /* Look for block we are guarding (ie we dominate it,
528 but it doesn't postdominate us). */
529 if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
530 && dominated_by_p (dominators, e->dest, e->src)
531 && !dominated_by_p (post_dominators, e->src, e->dest))
532 {
533 rtx insn;
534
535 /* The call heuristic claims that a guarded function call
536 is improbable. This is because such calls are often used
537 to signal exceptional situations such as printing error
538 messages. */
539 for (insn = e->dest->head; insn != NEXT_INSN (e->dest->end);
540 insn = NEXT_INSN (insn))
541 if (GET_CODE (insn) == CALL_INSN
542 /* Constant and pure calls are hardly used to signalize
543 something exceptional. */
544 && ! CONST_OR_PURE_CALL_P (insn))
545 {
546 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
547 break;
548 }
549 }
550 }
551
552 cond = get_condition (last_insn, &earliest);
553 if (! cond)
554 continue;
555
556 /* Try "pointer heuristic."
557 A comparison ptr == 0 is predicted as false.
558 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
559 if (GET_RTX_CLASS (GET_CODE (cond)) == '<'
560 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
561 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
562 {
563 if (GET_CODE (cond) == EQ)
564 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
565 else if (GET_CODE (cond) == NE)
566 predict_insn_def (last_insn, PRED_POINTER, TAKEN);
567 }
568 else
569
570 /* Try "opcode heuristic."
571 EQ tests are usually false and NE tests are usually true. Also,
572 most quantities are positive, so we can make the appropriate guesses
573 about signed comparisons against zero. */
574 switch (GET_CODE (cond))
575 {
576 case CONST_INT:
577 /* Unconditional branch. */
578 predict_insn_def (last_insn, PRED_UNCONDITIONAL,
579 cond == const0_rtx ? NOT_TAKEN : TAKEN);
580 break;
581
582 case EQ:
583 case UNEQ:
584 /* Floating point comparisons appears to behave in a very
585 unpredictable way because of special role of = tests in
586 FP code. */
587 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
588 ;
589 /* Comparisons with 0 are often used for booleans and there is
590 nothing useful to predict about them. */
591 else if (XEXP (cond, 1) == const0_rtx
592 || XEXP (cond, 0) == const0_rtx)
593 ;
594 else
595 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
596 break;
597
598 case NE:
599 case LTGT:
600 /* Floating point comparisons appears to behave in a very
601 unpredictable way because of special role of = tests in
602 FP code. */
603 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
604 ;
605 /* Comparisons with 0 are often used for booleans and there is
606 nothing useful to predict about them. */
607 else if (XEXP (cond, 1) == const0_rtx
608 || XEXP (cond, 0) == const0_rtx)
609 ;
610 else
611 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
612 break;
613
614 case ORDERED:
615 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
616 break;
617
618 case UNORDERED:
619 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
620 break;
621
622 case LE:
623 case LT:
624 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
625 || XEXP (cond, 1) == constm1_rtx)
626 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
627 break;
628
629 case GE:
630 case GT:
631 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
632 || XEXP (cond, 1) == constm1_rtx)
633 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
634 break;
635
636 default:
637 break;
638 }
639 }
640
641 /* Attach the combined probability to each conditional jump. */
642 FOR_EACH_BB (bb)
643 if (GET_CODE (bb->end) == JUMP_INSN
644 && any_condjump_p (bb->end)
645 && bb->succ->succ_next != NULL)
646 combine_predictions_for_insn (bb->end, bb);
647
648 free_dominance_info (post_dominators);
649 free_dominance_info (dominators);
650
651 remove_fake_edges ();
652 estimate_bb_frequencies (loops_info);
653 }
654 \f
655 /* __builtin_expect dropped tokens into the insn stream describing expected
656 values of registers. Generate branch probabilities based off these
657 values. */
658
659 void
660 expected_value_to_br_prob ()
661 {
662 rtx insn, cond, ev = NULL_RTX, ev_reg = NULL_RTX;
663
664 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
665 {
666 switch (GET_CODE (insn))
667 {
668 case NOTE:
669 /* Look for expected value notes. */
670 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_EXPECTED_VALUE)
671 {
672 ev = NOTE_EXPECTED_VALUE (insn);
673 ev_reg = XEXP (ev, 0);
674 delete_insn (insn);
675 }
676 continue;
677
678 case CODE_LABEL:
679 /* Never propagate across labels. */
680 ev = NULL_RTX;
681 continue;
682
683 case JUMP_INSN:
684 /* Look for simple conditional branches. If we haven't got an
685 expected value yet, no point going further. */
686 if (GET_CODE (insn) != JUMP_INSN || ev == NULL_RTX
687 || ! any_condjump_p (insn))
688 continue;
689 break;
690
691 default:
692 /* Look for insns that clobber the EV register. */
693 if (ev && reg_set_p (ev_reg, insn))
694 ev = NULL_RTX;
695 continue;
696 }
697
698 /* Collect the branch condition, hopefully relative to EV_REG. */
699 /* ??? At present we'll miss things like
700 (expected_value (eq r70 0))
701 (set r71 -1)
702 (set r80 (lt r70 r71))
703 (set pc (if_then_else (ne r80 0) ...))
704 as canonicalize_condition will render this to us as
705 (lt r70, r71)
706 Could use cselib to try and reduce this further. */
707 cond = XEXP (SET_SRC (pc_set (insn)), 0);
708 cond = canonicalize_condition (insn, cond, 0, NULL, ev_reg);
709 if (! cond || XEXP (cond, 0) != ev_reg
710 || GET_CODE (XEXP (cond, 1)) != CONST_INT)
711 continue;
712
713 /* Substitute and simplify. Given that the expression we're
714 building involves two constants, we should wind up with either
715 true or false. */
716 cond = gen_rtx_fmt_ee (GET_CODE (cond), VOIDmode,
717 XEXP (ev, 1), XEXP (cond, 1));
718 cond = simplify_rtx (cond);
719
720 /* Turn the condition into a scaled branch probability. */
721 if (cond != const_true_rtx && cond != const0_rtx)
722 abort ();
723 predict_insn_def (insn, PRED_BUILTIN_EXPECT,
724 cond == const_true_rtx ? TAKEN : NOT_TAKEN);
725 }
726 }
727 \f
728 /* Check whether this is the last basic block of function. Commonly tehre
729 is one extra common cleanup block. */
730 static bool
731 last_basic_block_p (bb)
732 basic_block bb;
733 {
734 if (bb == EXIT_BLOCK_PTR)
735 return false;
736
737 return (bb->next_bb == EXIT_BLOCK_PTR
738 || (bb->next_bb->next_bb == EXIT_BLOCK_PTR
739 && bb->succ && !bb->succ->succ_next
740 && bb->succ->dest->next_bb == EXIT_BLOCK_PTR));
741 }
742
743 /* Sets branch probabilities according to PREDiction and FLAGS. HEADS[bb->index]
744 should be index of basic block in that we need to alter branch predictions
745 (i.e. the first of our dominators such that we do not post-dominate it)
746 (but we fill this information on demand, so -1 may be there in case this
747 was not needed yet). */
748
749 static void
750 process_note_prediction (bb, heads, dominators, post_dominators, pred, flags)
751 basic_block bb;
752 int *heads;
753 dominance_info dominators;
754 dominance_info post_dominators;
755 int pred;
756 int flags;
757 {
758 edge e;
759 int y;
760 bool taken;
761
762 taken = flags & IS_TAKEN;
763
764 if (heads[bb->index] < 0)
765 {
766 /* This is first time we need this field in heads array; so
767 find first dominator that we do not post-dominate (we are
768 using already known members of heads array). */
769 basic_block ai = bb;
770 basic_block next_ai = get_immediate_dominator (dominators, bb);
771 int head;
772
773 while (heads[next_ai->index] < 0)
774 {
775 if (!dominated_by_p (post_dominators, next_ai, bb))
776 break;
777 heads[next_ai->index] = ai->index;
778 ai = next_ai;
779 next_ai = get_immediate_dominator (dominators, next_ai);
780 }
781 if (!dominated_by_p (post_dominators, next_ai, bb))
782 head = next_ai->index;
783 else
784 head = heads[next_ai->index];
785 while (next_ai != bb)
786 {
787 next_ai = ai;
788 if (heads[ai->index] == ENTRY_BLOCK)
789 ai = ENTRY_BLOCK_PTR;
790 else
791 ai = BASIC_BLOCK (heads[ai->index]);
792 heads[next_ai->index] = head;
793 }
794 }
795 y = heads[bb->index];
796
797 /* Now find the edge that leads to our branch and aply the prediction. */
798
799 if (y == last_basic_block || !can_predict_insn_p (BASIC_BLOCK (y)->end))
800 return;
801 for (e = BASIC_BLOCK (y)->succ; e; e = e->succ_next)
802 if (e->dest->index >= 0
803 && dominated_by_p (post_dominators, e->dest, bb))
804 predict_edge_def (e, pred, taken);
805 }
806
807 /* Gathers NOTE_INSN_PREDICTIONs in given basic block and turns them
808 into branch probabilities. For description of heads array, see
809 process_note_prediction. */
810
811 static void
812 process_note_predictions (bb, heads, dominators, post_dominators)
813 basic_block bb;
814 int *heads;
815 dominance_info dominators;
816 dominance_info post_dominators;
817 {
818 rtx insn;
819 edge e;
820
821 /* Additionally, we check here for blocks with no successors. */
822 int contained_noreturn_call = 0;
823 int was_bb_head = 0;
824 int noreturn_block = 1;
825
826 for (insn = bb->end; insn;
827 was_bb_head |= (insn == bb->head), insn = PREV_INSN (insn))
828 {
829 if (GET_CODE (insn) != NOTE)
830 {
831 if (was_bb_head)
832 break;
833 else
834 {
835 /* Noreturn calls cause program to exit, therefore they are
836 always predicted as not taken. */
837 if (GET_CODE (insn) == CALL_INSN
838 && find_reg_note (insn, REG_NORETURN, NULL))
839 contained_noreturn_call = 1;
840 continue;
841 }
842 }
843 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_PREDICTION)
844 {
845 int alg = (int) NOTE_PREDICTION_ALG (insn);
846 /* Process single prediction note. */
847 process_note_prediction (bb,
848 heads,
849 dominators,
850 post_dominators,
851 alg, (int) NOTE_PREDICTION_FLAGS (insn));
852 delete_insn (insn);
853 }
854 }
855 for (e = bb->succ; e; e = e->succ_next)
856 if (!(e->flags & EDGE_FAKE))
857 noreturn_block = 0;
858 if (contained_noreturn_call)
859 {
860 /* This block ended from other reasons than because of return.
861 If it is because of noreturn call, this should certainly not
862 be taken. Otherwise it is probably some error recovery. */
863 process_note_prediction (bb,
864 heads,
865 dominators,
866 post_dominators, PRED_NORETURN, NOT_TAKEN);
867 }
868 }
869
870 /* Gathers NOTE_INSN_PREDICTIONs and turns them into
871 branch probabilities. */
872
873 void
874 note_prediction_to_br_prob ()
875 {
876 basic_block bb;
877 dominance_info post_dominators, dominators;
878 int *heads;
879
880 /* To enable handling of noreturn blocks. */
881 add_noreturn_fake_exit_edges ();
882 connect_infinite_loops_to_exit ();
883
884 post_dominators = calculate_dominance_info (CDI_POST_DOMINATORS);
885 dominators = calculate_dominance_info (CDI_DOMINATORS);
886
887 heads = xmalloc (sizeof (int) * last_basic_block);
888 memset (heads, -1, sizeof (int) * last_basic_block);
889 heads[ENTRY_BLOCK_PTR->next_bb->index] = last_basic_block;
890
891 /* Process all prediction notes. */
892
893 FOR_EACH_BB (bb)
894 process_note_predictions (bb, heads, dominators, post_dominators);
895
896 free_dominance_info (post_dominators);
897 free_dominance_info (dominators);
898 free (heads);
899
900 remove_fake_edges ();
901 }
902 \f
903 /* This is used to carry information about basic blocks. It is
904 attached to the AUX field of the standard CFG block. */
905
906 typedef struct block_info_def
907 {
908 /* Estimated frequency of execution of basic_block. */
909 sreal frequency;
910
911 /* To keep queue of basic blocks to process. */
912 basic_block next;
913
914 /* True if block needs to be visited in prop_freqency. */
915 int tovisit:1;
916
917 /* Number of predecessors we need to visit first. */
918 int npredecessors;
919 } *block_info;
920
921 /* Similar information for edges. */
922 typedef struct edge_info_def
923 {
924 /* In case edge is an loopback edge, the probability edge will be reached
925 in case header is. Estimated number of iterations of the loop can be
926 then computed as 1 / (1 - back_edge_prob). */
927 sreal back_edge_prob;
928 /* True if the edge is an loopback edge in the natural loop. */
929 int back_edge:1;
930 } *edge_info;
931
932 #define BLOCK_INFO(B) ((block_info) (B)->aux)
933 #define EDGE_INFO(E) ((edge_info) (E)->aux)
934
935 /* Helper function for estimate_bb_frequencies.
936 Propagate the frequencies for LOOP. */
937
938 static void
939 propagate_freq (loop)
940 struct loop *loop;
941 {
942 basic_block head = loop->header;
943 basic_block bb;
944 basic_block last;
945 edge e;
946 basic_block nextbb;
947
948 /* For each basic block we need to visit count number of his predecessors
949 we need to visit first. */
950 FOR_EACH_BB (bb)
951 {
952 if (BLOCK_INFO (bb)->tovisit)
953 {
954 int count = 0;
955
956 for (e = bb->pred; e; e = e->pred_next)
957 if (BLOCK_INFO (e->src)->tovisit && !(e->flags & EDGE_DFS_BACK))
958 count++;
959 else if (BLOCK_INFO (e->src)->tovisit
960 && rtl_dump_file && !EDGE_INFO (e)->back_edge)
961 fprintf (rtl_dump_file,
962 "Irreducible region hit, ignoring edge to %i->%i\n",
963 e->src->index, bb->index);
964 BLOCK_INFO (bb)->npredecessors = count;
965 }
966 }
967
968 memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
969 last = head;
970 for (bb = head; bb; bb = nextbb)
971 {
972 sreal cyclic_probability, frequency;
973
974 memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
975 memcpy (&frequency, &real_zero, sizeof (real_zero));
976
977 nextbb = BLOCK_INFO (bb)->next;
978 BLOCK_INFO (bb)->next = NULL;
979
980 /* Compute frequency of basic block. */
981 if (bb != head)
982 {
983 #ifdef ENABLE_CHECKING
984 for (e = bb->pred; e; e = e->pred_next)
985 if (BLOCK_INFO (e->src)->tovisit && !(e->flags & EDGE_DFS_BACK))
986 abort ();
987 #endif
988
989 for (e = bb->pred; e; e = e->pred_next)
990 if (EDGE_INFO (e)->back_edge)
991 {
992 sreal_add (&cyclic_probability, &cyclic_probability,
993 &EDGE_INFO (e)->back_edge_prob);
994 }
995 else if (!(e->flags & EDGE_DFS_BACK))
996 {
997 sreal tmp;
998
999 /* frequency += (e->probability
1000 * BLOCK_INFO (e->src)->frequency /
1001 REG_BR_PROB_BASE); */
1002
1003 sreal_init (&tmp, e->probability, 0);
1004 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
1005 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
1006 sreal_add (&frequency, &frequency, &tmp);
1007 }
1008
1009 if (sreal_compare (&cyclic_probability, &real_zero) == 0)
1010 {
1011 memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
1012 sizeof (frequency));
1013 }
1014 else
1015 {
1016 if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
1017 {
1018 memcpy (&cyclic_probability, &real_almost_one,
1019 sizeof (real_almost_one));
1020 }
1021
1022 /* BLOCK_INFO (bb)->frequency = frequency
1023 / (1 - cyclic_probability) */
1024
1025 sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
1026 sreal_div (&BLOCK_INFO (bb)->frequency,
1027 &frequency, &cyclic_probability);
1028 }
1029 }
1030
1031 BLOCK_INFO (bb)->tovisit = 0;
1032
1033 /* Compute back edge frequencies. */
1034 for (e = bb->succ; e; e = e->succ_next)
1035 if (e->dest == head)
1036 {
1037 sreal tmp;
1038
1039 /* EDGE_INFO (e)->back_edge_prob
1040 = ((e->probability * BLOCK_INFO (bb)->frequency)
1041 / REG_BR_PROB_BASE); */
1042
1043 sreal_init (&tmp, e->probability, 0);
1044 sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
1045 sreal_mul (&EDGE_INFO (e)->back_edge_prob,
1046 &tmp, &real_inv_br_prob_base);
1047 }
1048
1049 /* Propagate to successor blocks. */
1050 for (e = bb->succ; e; e = e->succ_next)
1051 if (!(e->flags & EDGE_DFS_BACK)
1052 && BLOCK_INFO (e->dest)->npredecessors)
1053 {
1054 BLOCK_INFO (e->dest)->npredecessors--;
1055 if (!BLOCK_INFO (e->dest)->npredecessors)
1056 {
1057 if (!nextbb)
1058 nextbb = e->dest;
1059 else
1060 BLOCK_INFO (last)->next = e->dest;
1061
1062 last = e->dest;
1063 }
1064 }
1065 }
1066 }
1067
1068 /* Estimate probabilities of loopback edges in loops at same nest level. */
1069
1070 static void
1071 estimate_loops_at_level (first_loop)
1072 struct loop *first_loop;
1073 {
1074 struct loop *loop;
1075
1076 for (loop = first_loop; loop; loop = loop->next)
1077 {
1078 edge e;
1079 basic_block *bbs;
1080 unsigned i;
1081
1082 estimate_loops_at_level (loop->inner);
1083
1084 if (loop->latch->succ) /* Do not do this for dummy function loop. */
1085 {
1086 /* Find current loop back edge and mark it. */
1087 e = loop_latch_edge (loop);
1088 EDGE_INFO (e)->back_edge = 1;
1089 }
1090
1091 bbs = get_loop_body (loop);
1092 for (i = 0; i < loop->num_nodes; i++)
1093 BLOCK_INFO (bbs[i])->tovisit = 1;
1094 free (bbs);
1095 propagate_freq (loop);
1096 }
1097 }
1098
1099 /* Convert counts measured by profile driven feedback to frequencies. */
1100
1101 static void
1102 counts_to_freqs ()
1103 {
1104 gcov_type count_max = 1;
1105 basic_block bb;
1106
1107 FOR_EACH_BB (bb)
1108 count_max = MAX (bb->count, count_max);
1109
1110 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1111 bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
1112 }
1113
1114 /* Return true if function is likely to be expensive, so there is no point to
1115 optimize performance of prologue, epilogue or do inlining at the expense
1116 of code size growth. THRESHOLD is the limit of number of instructions
1117 function can execute at average to be still considered not expensive. */
1118
1119 bool
1120 expensive_function_p (threshold)
1121 int threshold;
1122 {
1123 unsigned int sum = 0;
1124 basic_block bb;
1125 unsigned int limit;
1126
1127 /* We can not compute accurately for large thresholds due to scaled
1128 frequencies. */
1129 if (threshold > BB_FREQ_MAX)
1130 abort ();
1131
1132 /* Frequencies are out of range. This either means that function contains
1133 internal loop executing more than BB_FREQ_MAX times or profile feedback
1134 is available and function has not been executed at all. */
1135 if (ENTRY_BLOCK_PTR->frequency == 0)
1136 return true;
1137
1138 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
1139 limit = ENTRY_BLOCK_PTR->frequency * threshold;
1140 FOR_EACH_BB (bb)
1141 {
1142 rtx insn;
1143
1144 for (insn = bb->head; insn != NEXT_INSN (bb->end);
1145 insn = NEXT_INSN (insn))
1146 if (active_insn_p (insn))
1147 {
1148 sum += bb->frequency;
1149 if (sum > limit)
1150 return true;
1151 }
1152 }
1153
1154 return false;
1155 }
1156
1157 /* Estimate basic blocks frequency by given branch probabilities. */
1158
1159 static void
1160 estimate_bb_frequencies (loops)
1161 struct loops *loops;
1162 {
1163 basic_block bb;
1164 sreal freq_max;
1165
1166 if (flag_branch_probabilities)
1167 counts_to_freqs ();
1168 else
1169 {
1170 static int real_values_initialized = 0;
1171
1172 if (!real_values_initialized)
1173 {
1174 real_values_initialized = 1;
1175 sreal_init (&real_zero, 0, 0);
1176 sreal_init (&real_one, 1, 0);
1177 sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
1178 sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
1179 sreal_init (&real_one_half, 1, -1);
1180 sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
1181 sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
1182 }
1183
1184 mark_dfs_back_edges ();
1185 /* Fill in the probability values in flowgraph based on the REG_BR_PROB
1186 notes. */
1187 FOR_EACH_BB (bb)
1188 {
1189 rtx last_insn = bb->end;
1190
1191 if (!can_predict_insn_p (last_insn))
1192 {
1193 /* We can predict only conditional jumps at the moment.
1194 Expect each edge to be equally probable.
1195 ?? In the future we want to make abnormal edges improbable. */
1196 int nedges = 0;
1197 edge e;
1198
1199 for (e = bb->succ; e; e = e->succ_next)
1200 {
1201 nedges++;
1202 if (e->probability != 0)
1203 break;
1204 }
1205 if (!e)
1206 for (e = bb->succ; e; e = e->succ_next)
1207 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
1208 }
1209 }
1210
1211 ENTRY_BLOCK_PTR->succ->probability = REG_BR_PROB_BASE;
1212
1213 /* Set up block info for each basic block. */
1214 alloc_aux_for_blocks (sizeof (struct block_info_def));
1215 alloc_aux_for_edges (sizeof (struct edge_info_def));
1216 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1217 {
1218 edge e;
1219
1220 BLOCK_INFO (bb)->tovisit = 0;
1221 for (e = bb->succ; e; e = e->succ_next)
1222 {
1223 sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
1224 sreal_mul (&EDGE_INFO (e)->back_edge_prob,
1225 &EDGE_INFO (e)->back_edge_prob,
1226 &real_inv_br_prob_base);
1227 }
1228 }
1229
1230 /* First compute probabilities locally for each loop from innermost
1231 to outermost to examine probabilities for back edges. */
1232 estimate_loops_at_level (loops->tree_root);
1233
1234 memcpy (&freq_max, &real_zero, sizeof (real_zero));
1235 FOR_EACH_BB (bb)
1236 if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
1237 memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
1238
1239 sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
1240 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1241 {
1242 sreal tmp;
1243
1244 sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
1245 sreal_add (&tmp, &tmp, &real_one_half);
1246 bb->frequency = sreal_to_int (&tmp);
1247 }
1248
1249 free_aux_for_blocks ();
1250 free_aux_for_edges ();
1251 }
1252 compute_function_frequency ();
1253 if (flag_reorder_functions)
1254 choose_function_section ();
1255 }
1256
1257 /* Decide whether function is hot, cold or unlikely executed. */
1258 static void
1259 compute_function_frequency ()
1260 {
1261 basic_block bb;
1262
1263 if (!profile_info || !flag_branch_probabilities)
1264 return;
1265 cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED;
1266 FOR_EACH_BB (bb)
1267 {
1268 if (maybe_hot_bb_p (bb))
1269 {
1270 cfun->function_frequency = FUNCTION_FREQUENCY_HOT;
1271 return;
1272 }
1273 if (!probably_never_executed_bb_p (bb))
1274 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
1275 }
1276 }
1277
1278 /* Choose appropriate section for the function. */
1279 static void
1280 choose_function_section ()
1281 {
1282 if (DECL_SECTION_NAME (current_function_decl)
1283 || !targetm.have_named_sections
1284 /* Theoretically we can split the gnu.linkonce text section too,
1285 but this requires more work as the frequency needs to match
1286 for all generated objects so we need to merge the frequency
1287 of all instances. For now just never set frequency for these. */
1288 || DECL_ONE_ONLY (current_function_decl))
1289 return;
1290 if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT)
1291 DECL_SECTION_NAME (current_function_decl) =
1292 build_string (strlen (HOT_TEXT_SECTION_NAME), HOT_TEXT_SECTION_NAME);
1293 if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
1294 DECL_SECTION_NAME (current_function_decl) =
1295 build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME),
1296 UNLIKELY_EXECUTED_TEXT_SECTION_NAME);
1297 }