1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 [1] "Branch Prediction for Free"
24 Ball and Larus; PLDI '93.
25 [2] "Static Branch Frequency and Program Profile Analysis"
26 Wu and Larus; MICRO-27.
27 [3] "Corpus-based Static Branch Prediction"
28 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
33 #include "coretypes.h"
38 #include "hard-reg-set.h"
39 #include "basic-block.h"
40 #include "insn-config.h"
55 #include "tree-flow.h"
57 #include "tree-dump.h"
58 #include "tree-pass.h"
60 #include "tree-scalar-evolution.h"
62 #include "pointer-set.h"
64 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
65 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
66 static sreal real_zero
, real_one
, real_almost_one
, real_br_prob_base
,
67 real_inv_br_prob_base
, real_one_half
, real_bb_freq_max
;
69 /* Random guesstimation given names.
70 PROV_VERY_UNLIKELY should be small enough so basic block predicted
71 by it gets bellow HOT_BB_FREQUENCY_FRANCTION. */
72 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 2000 - 1)
73 #define PROB_EVEN (REG_BR_PROB_BASE / 2)
74 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
75 #define PROB_ALWAYS (REG_BR_PROB_BASE)
77 static void combine_predictions_for_insn (rtx
, basic_block
);
78 static void dump_prediction (FILE *, enum br_predictor
, int, basic_block
, int);
79 static void predict_paths_leading_to (basic_block
, enum br_predictor
, enum prediction
);
80 static void compute_function_frequency (void);
81 static void choose_function_section (void);
82 static bool can_predict_insn_p (const_rtx
);
84 /* Information we hold about each branch predictor.
85 Filled using information from predict.def. */
89 const char *const name
; /* Name used in the debugging dumps. */
90 const int hitrate
; /* Expected hitrate used by
91 predict_insn_def call. */
95 /* Use given predictor without Dempster-Shaffer theory if it matches
96 using first_match heuristics. */
97 #define PRED_FLAG_FIRST_MATCH 1
99 /* Recompute hitrate in percent to our representation. */
101 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
103 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
104 static const struct predictor_info predictor_info
[]= {
105 #include "predict.def"
107 /* Upper bound on predictors. */
112 /* Return TRUE if frequency FREQ is considered to be hot. */
115 maybe_hot_frequency_p (int freq
)
117 if (!profile_info
|| !flag_branch_probabilities
)
119 if (cfun
->function_frequency
== FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
121 if (cfun
->function_frequency
== FUNCTION_FREQUENCY_HOT
)
124 if (profile_status
== PROFILE_ABSENT
)
126 if (freq
< BB_FREQ_MAX
/ PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
))
131 /* Return TRUE if frequency FREQ is considered to be hot. */
134 maybe_hot_count_p (gcov_type count
)
136 if (profile_status
!= PROFILE_READ
)
138 /* Code executed at most once is not hot. */
139 if (profile_info
->runs
>= count
)
142 > profile_info
->sum_max
/ PARAM_VALUE (HOT_BB_COUNT_FRACTION
));
145 /* Return true in case BB can be CPU intensive and should be optimized
146 for maximal performance. */
149 maybe_hot_bb_p (const_basic_block bb
)
151 return maybe_hot_count_p (bb
->count
) || maybe_hot_frequency_p (bb
->frequency
);
154 /* Return true if the call can be hot. */
157 cgraph_maybe_hot_edge_p (struct cgraph_edge
*edge
)
159 if (profile_info
&& flag_branch_probabilities
161 <= profile_info
->sum_max
/ PARAM_VALUE (HOT_BB_COUNT_FRACTION
)))
163 if (lookup_attribute ("cold", DECL_ATTRIBUTES (edge
->callee
->decl
))
164 || lookup_attribute ("cold", DECL_ATTRIBUTES (edge
->caller
->decl
)))
166 if (lookup_attribute ("hot", DECL_ATTRIBUTES (edge
->caller
->decl
)))
168 if (flag_guess_branch_prob
169 && edge
->frequency
< (CGRAPH_FREQ_MAX
170 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
)))
175 /* Return true in case BB can be CPU intensive and should be optimized
176 for maximal performance. */
179 maybe_hot_edge_p (edge e
)
181 return maybe_hot_count_p (e
->count
) || maybe_hot_frequency_p (EDGE_FREQUENCY (e
));
184 /* Return true in case BB is probably never executed. */
186 probably_never_executed_bb_p (const_basic_block bb
)
188 if (profile_info
&& flag_branch_probabilities
)
189 return ((bb
->count
+ profile_info
->runs
/ 2) / profile_info
->runs
) == 0;
190 if ((!profile_info
|| !flag_branch_probabilities
)
191 && cfun
->function_frequency
== FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
196 /* Return true when current function should always be optimized for size. */
199 optimize_function_for_size_p (struct function
*fun
)
201 return (optimize_size
202 || fun
->function_frequency
== FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
);
205 /* Return true when current function should always be optimized for speed. */
208 optimize_function_for_speed_p (struct function
*fun
)
210 return !optimize_function_for_size_p (fun
);
213 /* Return TRUE when BB should be optimized for size. */
216 optimize_bb_for_size_p (const_basic_block bb
)
218 return optimize_function_for_size_p (cfun
) || !maybe_hot_bb_p (bb
);
221 /* Return TRUE when BB should be optimized for speed. */
224 optimize_bb_for_speed_p (const_basic_block bb
)
226 return !optimize_bb_for_size_p (bb
);
229 /* Return TRUE when BB should be optimized for size. */
232 optimize_edge_for_size_p (edge e
)
234 return optimize_function_for_size_p (cfun
) || !maybe_hot_edge_p (e
);
237 /* Return TRUE when BB should be optimized for speed. */
240 optimize_edge_for_speed_p (edge e
)
242 return !optimize_edge_for_size_p (e
);
245 /* Return TRUE when BB should be optimized for size. */
248 optimize_insn_for_size_p (void)
250 return optimize_function_for_size_p (cfun
) || !crtl
->maybe_hot_insn_p
;
253 /* Return TRUE when BB should be optimized for speed. */
256 optimize_insn_for_speed_p (void)
258 return !optimize_insn_for_size_p ();
261 /* Return TRUE when LOOP should be optimized for size. */
264 optimize_loop_for_size_p (struct loop
*loop
)
266 return optimize_bb_for_size_p (loop
->header
);
269 /* Return TRUE when LOOP should be optimized for speed. */
272 optimize_loop_for_speed_p (struct loop
*loop
)
274 return optimize_bb_for_speed_p (loop
->header
);
277 /* Return TRUE when LOOP nest should be optimized for speed. */
280 optimize_loop_nest_for_speed_p (struct loop
*loop
)
282 struct loop
*l
= loop
;
283 if (optimize_loop_for_speed_p (loop
))
286 while (l
&& l
!= loop
)
288 if (optimize_loop_for_speed_p (l
))
296 while (l
!= loop
&& !l
->next
)
305 /* Return TRUE when LOOP nest should be optimized for size. */
308 optimize_loop_nest_for_size_p (struct loop
*loop
)
310 return !optimize_loop_nest_for_speed_p (loop
);
313 /* Return true when edge E is likely to be well predictable by branch
317 predictable_edge_p (edge e
)
319 if (profile_status
== PROFILE_ABSENT
)
322 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME
) * REG_BR_PROB_BASE
/ 100)
323 || (REG_BR_PROB_BASE
- e
->probability
324 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME
) * REG_BR_PROB_BASE
/ 100))
330 /* Set RTL expansion for BB profile. */
333 rtl_profile_for_bb (basic_block bb
)
335 crtl
->maybe_hot_insn_p
= maybe_hot_bb_p (bb
);
338 /* Set RTL expansion for edge profile. */
341 rtl_profile_for_edge (edge e
)
343 crtl
->maybe_hot_insn_p
= maybe_hot_edge_p (e
);
346 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
348 default_rtl_profile (void)
350 crtl
->maybe_hot_insn_p
= true;
353 /* Return true if the one of outgoing edges is already predicted by
357 rtl_predicted_by_p (const_basic_block bb
, enum br_predictor predictor
)
360 if (!INSN_P (BB_END (bb
)))
362 for (note
= REG_NOTES (BB_END (bb
)); note
; note
= XEXP (note
, 1))
363 if (REG_NOTE_KIND (note
) == REG_BR_PRED
364 && INTVAL (XEXP (XEXP (note
, 0), 0)) == (int)predictor
)
369 /* This map contains for a basic block the list of predictions for the
372 static struct pointer_map_t
*bb_predictions
;
374 /* Return true if the one of outgoing edges is already predicted by
378 gimple_predicted_by_p (const_basic_block bb
, enum br_predictor predictor
)
380 struct edge_prediction
*i
;
381 void **preds
= pointer_map_contains (bb_predictions
, bb
);
386 for (i
= (struct edge_prediction
*) *preds
; i
; i
= i
->ep_next
)
387 if (i
->ep_predictor
== predictor
)
392 /* Return true when the probability of edge is reliable.
394 The profile guessing code is good at predicting branch outcome (ie.
395 taken/not taken), that is predicted right slightly over 75% of time.
396 It is however notoriously poor on predicting the probability itself.
397 In general the profile appear a lot flatter (with probabilities closer
398 to 50%) than the reality so it is bad idea to use it to drive optimization
399 such as those disabling dynamic branch prediction for well predictable
402 There are two exceptions - edges leading to noreturn edges and edges
403 predicted by number of iterations heuristics are predicted well. This macro
404 should be able to distinguish those, but at the moment it simply check for
405 noreturn heuristic that is only one giving probability over 99% or bellow
406 1%. In future we might want to propagate reliability information across the
407 CFG if we find this information useful on multiple places. */
409 probability_reliable_p (int prob
)
411 return (profile_status
== PROFILE_READ
412 || (profile_status
== PROFILE_GUESSED
413 && (prob
<= HITRATE (1) || prob
>= HITRATE (99))));
416 /* Same predicate as above, working on edges. */
418 edge_probability_reliable_p (const_edge e
)
420 return probability_reliable_p (e
->probability
);
423 /* Same predicate as edge_probability_reliable_p, working on notes. */
425 br_prob_note_reliable_p (const_rtx note
)
427 gcc_assert (REG_NOTE_KIND (note
) == REG_BR_PROB
);
428 return probability_reliable_p (INTVAL (XEXP (note
, 0)));
432 predict_insn (rtx insn
, enum br_predictor predictor
, int probability
)
434 gcc_assert (any_condjump_p (insn
));
435 if (!flag_guess_branch_prob
)
438 add_reg_note (insn
, REG_BR_PRED
,
439 gen_rtx_CONCAT (VOIDmode
,
440 GEN_INT ((int) predictor
),
441 GEN_INT ((int) probability
)));
444 /* Predict insn by given predictor. */
447 predict_insn_def (rtx insn
, enum br_predictor predictor
,
448 enum prediction taken
)
450 int probability
= predictor_info
[(int) predictor
].hitrate
;
453 probability
= REG_BR_PROB_BASE
- probability
;
455 predict_insn (insn
, predictor
, probability
);
458 /* Predict edge E with given probability if possible. */
461 rtl_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
464 last_insn
= BB_END (e
->src
);
466 /* We can store the branch prediction information only about
467 conditional jumps. */
468 if (!any_condjump_p (last_insn
))
471 /* We always store probability of branching. */
472 if (e
->flags
& EDGE_FALLTHRU
)
473 probability
= REG_BR_PROB_BASE
- probability
;
475 predict_insn (last_insn
, predictor
, probability
);
478 /* Predict edge E with the given PROBABILITY. */
480 gimple_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
482 gcc_assert (profile_status
!= PROFILE_GUESSED
);
483 if ((e
->src
!= ENTRY_BLOCK_PTR
&& EDGE_COUNT (e
->src
->succs
) > 1)
484 && flag_guess_branch_prob
&& optimize
)
486 struct edge_prediction
*i
= XNEW (struct edge_prediction
);
487 void **preds
= pointer_map_insert (bb_predictions
, e
->src
);
489 i
->ep_next
= (struct edge_prediction
*) *preds
;
491 i
->ep_probability
= probability
;
492 i
->ep_predictor
= predictor
;
497 /* Remove all predictions on given basic block that are attached
500 remove_predictions_associated_with_edge (edge e
)
507 preds
= pointer_map_contains (bb_predictions
, e
->src
);
511 struct edge_prediction
**prediction
= (struct edge_prediction
**) preds
;
512 struct edge_prediction
*next
;
516 if ((*prediction
)->ep_edge
== e
)
518 next
= (*prediction
)->ep_next
;
523 prediction
= &((*prediction
)->ep_next
);
528 /* Clears the list of predictions stored for BB. */
531 clear_bb_predictions (basic_block bb
)
533 void **preds
= pointer_map_contains (bb_predictions
, bb
);
534 struct edge_prediction
*pred
, *next
;
539 for (pred
= (struct edge_prediction
*) *preds
; pred
; pred
= next
)
541 next
= pred
->ep_next
;
547 /* Return true when we can store prediction on insn INSN.
548 At the moment we represent predictions only on conditional
549 jumps, not at computed jump or other complicated cases. */
551 can_predict_insn_p (const_rtx insn
)
553 return (JUMP_P (insn
)
554 && any_condjump_p (insn
)
555 && EDGE_COUNT (BLOCK_FOR_INSN (insn
)->succs
) >= 2);
558 /* Predict edge E by given predictor if possible. */
561 predict_edge_def (edge e
, enum br_predictor predictor
,
562 enum prediction taken
)
564 int probability
= predictor_info
[(int) predictor
].hitrate
;
567 probability
= REG_BR_PROB_BASE
- probability
;
569 predict_edge (e
, predictor
, probability
);
572 /* Invert all branch predictions or probability notes in the INSN. This needs
573 to be done each time we invert the condition used by the jump. */
576 invert_br_probabilities (rtx insn
)
580 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
581 if (REG_NOTE_KIND (note
) == REG_BR_PROB
)
582 XEXP (note
, 0) = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (note
, 0)));
583 else if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
584 XEXP (XEXP (note
, 0), 1)
585 = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (XEXP (note
, 0), 1)));
588 /* Dump information about the branch prediction to the output file. */
591 dump_prediction (FILE *file
, enum br_predictor predictor
, int probability
,
592 basic_block bb
, int used
)
600 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
601 if (! (e
->flags
& EDGE_FALLTHRU
))
604 fprintf (file
, " %s heuristics%s: %.1f%%",
605 predictor_info
[predictor
].name
,
606 used
? "" : " (ignored)", probability
* 100.0 / REG_BR_PROB_BASE
);
610 fprintf (file
, " exec ");
611 fprintf (file
, HOST_WIDEST_INT_PRINT_DEC
, bb
->count
);
614 fprintf (file
, " hit ");
615 fprintf (file
, HOST_WIDEST_INT_PRINT_DEC
, e
->count
);
616 fprintf (file
, " (%.1f%%)", e
->count
* 100.0 / bb
->count
);
620 fprintf (file
, "\n");
623 /* We can not predict the probabilities of outgoing edges of bb. Set them
624 evenly and hope for the best. */
626 set_even_probabilities (basic_block bb
)
632 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
633 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
635 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
636 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
637 e
->probability
= (REG_BR_PROB_BASE
+ nedges
/ 2) / nedges
;
642 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
643 note if not already present. Remove now useless REG_BR_PRED notes. */
646 combine_predictions_for_insn (rtx insn
, basic_block bb
)
651 int best_probability
= PROB_EVEN
;
652 int best_predictor
= END_PREDICTORS
;
653 int combined_probability
= REG_BR_PROB_BASE
/ 2;
655 bool first_match
= false;
658 if (!can_predict_insn_p (insn
))
660 set_even_probabilities (bb
);
664 prob_note
= find_reg_note (insn
, REG_BR_PROB
, 0);
665 pnote
= ®_NOTES (insn
);
667 fprintf (dump_file
, "Predictions for insn %i bb %i\n", INSN_UID (insn
),
670 /* We implement "first match" heuristics and use probability guessed
671 by predictor with smallest index. */
672 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
673 if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
675 int predictor
= INTVAL (XEXP (XEXP (note
, 0), 0));
676 int probability
= INTVAL (XEXP (XEXP (note
, 0), 1));
679 if (best_predictor
> predictor
)
680 best_probability
= probability
, best_predictor
= predictor
;
682 d
= (combined_probability
* probability
683 + (REG_BR_PROB_BASE
- combined_probability
)
684 * (REG_BR_PROB_BASE
- probability
));
686 /* Use FP math to avoid overflows of 32bit integers. */
688 /* If one probability is 0% and one 100%, avoid division by zero. */
689 combined_probability
= REG_BR_PROB_BASE
/ 2;
691 combined_probability
= (((double) combined_probability
) * probability
692 * REG_BR_PROB_BASE
/ d
+ 0.5);
695 /* Decide which heuristic to use. In case we didn't match anything,
696 use no_prediction heuristic, in case we did match, use either
697 first match or Dempster-Shaffer theory depending on the flags. */
699 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
703 dump_prediction (dump_file
, PRED_NO_PREDICTION
,
704 combined_probability
, bb
, true);
707 dump_prediction (dump_file
, PRED_DS_THEORY
, combined_probability
,
709 dump_prediction (dump_file
, PRED_FIRST_MATCH
, best_probability
,
714 combined_probability
= best_probability
;
715 dump_prediction (dump_file
, PRED_COMBINED
, combined_probability
, bb
, true);
719 if (REG_NOTE_KIND (*pnote
) == REG_BR_PRED
)
721 int predictor
= INTVAL (XEXP (XEXP (*pnote
, 0), 0));
722 int probability
= INTVAL (XEXP (XEXP (*pnote
, 0), 1));
724 dump_prediction (dump_file
, predictor
, probability
, bb
,
725 !first_match
|| best_predictor
== predictor
);
726 *pnote
= XEXP (*pnote
, 1);
729 pnote
= &XEXP (*pnote
, 1);
734 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (combined_probability
));
736 /* Save the prediction into CFG in case we are seeing non-degenerated
738 if (!single_succ_p (bb
))
740 BRANCH_EDGE (bb
)->probability
= combined_probability
;
741 FALLTHRU_EDGE (bb
)->probability
742 = REG_BR_PROB_BASE
- combined_probability
;
745 else if (!single_succ_p (bb
))
747 int prob
= INTVAL (XEXP (prob_note
, 0));
749 BRANCH_EDGE (bb
)->probability
= prob
;
750 FALLTHRU_EDGE (bb
)->probability
= REG_BR_PROB_BASE
- prob
;
753 single_succ_edge (bb
)->probability
= REG_BR_PROB_BASE
;
756 /* Combine predictions into single probability and store them into CFG.
757 Remove now useless prediction entries. */
760 combine_predictions_for_bb (basic_block bb
)
762 int best_probability
= PROB_EVEN
;
763 int best_predictor
= END_PREDICTORS
;
764 int combined_probability
= REG_BR_PROB_BASE
/ 2;
766 bool first_match
= false;
768 struct edge_prediction
*pred
;
770 edge e
, first
= NULL
, second
= NULL
;
774 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
775 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
778 if (first
&& !second
)
784 /* When there is no successor or only one choice, prediction is easy.
786 We are lazy for now and predict only basic blocks with two outgoing
787 edges. It is possible to predict generic case too, but we have to
788 ignore first match heuristics and do more involved combining. Implement
793 set_even_probabilities (bb
);
794 clear_bb_predictions (bb
);
796 fprintf (dump_file
, "%i edges in bb %i predicted to even probabilities\n",
802 fprintf (dump_file
, "Predictions for bb %i\n", bb
->index
);
804 preds
= pointer_map_contains (bb_predictions
, bb
);
807 /* We implement "first match" heuristics and use probability guessed
808 by predictor with smallest index. */
809 for (pred
= (struct edge_prediction
*) *preds
; pred
; pred
= pred
->ep_next
)
811 int predictor
= pred
->ep_predictor
;
812 int probability
= pred
->ep_probability
;
814 if (pred
->ep_edge
!= first
)
815 probability
= REG_BR_PROB_BASE
- probability
;
818 if (best_predictor
> predictor
)
819 best_probability
= probability
, best_predictor
= predictor
;
821 d
= (combined_probability
* probability
822 + (REG_BR_PROB_BASE
- combined_probability
)
823 * (REG_BR_PROB_BASE
- probability
));
825 /* Use FP math to avoid overflows of 32bit integers. */
827 /* If one probability is 0% and one 100%, avoid division by zero. */
828 combined_probability
= REG_BR_PROB_BASE
/ 2;
830 combined_probability
= (((double) combined_probability
)
832 * REG_BR_PROB_BASE
/ d
+ 0.5);
836 /* Decide which heuristic to use. In case we didn't match anything,
837 use no_prediction heuristic, in case we did match, use either
838 first match or Dempster-Shaffer theory depending on the flags. */
840 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
844 dump_prediction (dump_file
, PRED_NO_PREDICTION
, combined_probability
, bb
, true);
847 dump_prediction (dump_file
, PRED_DS_THEORY
, combined_probability
, bb
,
849 dump_prediction (dump_file
, PRED_FIRST_MATCH
, best_probability
, bb
,
854 combined_probability
= best_probability
;
855 dump_prediction (dump_file
, PRED_COMBINED
, combined_probability
, bb
, true);
859 for (pred
= (struct edge_prediction
*) *preds
; pred
; pred
= pred
->ep_next
)
861 int predictor
= pred
->ep_predictor
;
862 int probability
= pred
->ep_probability
;
864 if (pred
->ep_edge
!= EDGE_SUCC (bb
, 0))
865 probability
= REG_BR_PROB_BASE
- probability
;
866 dump_prediction (dump_file
, predictor
, probability
, bb
,
867 !first_match
|| best_predictor
== predictor
);
870 clear_bb_predictions (bb
);
874 first
->probability
= combined_probability
;
875 second
->probability
= REG_BR_PROB_BASE
- combined_probability
;
879 /* Predict edge probabilities by exploiting loop structure. */
889 /* Try to predict out blocks in a loop that are not part of a
891 FOR_EACH_LOOP (li
, loop
, 0)
893 basic_block bb
, *bbs
;
895 VEC (edge
, heap
) *exits
;
896 struct tree_niter_desc niter_desc
;
899 exits
= get_loop_exit_edges (loop
);
900 n_exits
= VEC_length (edge
, exits
);
902 for (j
= 0; VEC_iterate (edge
, exits
, j
, ex
); j
++)
905 HOST_WIDE_INT nitercst
;
906 int max
= PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS
);
908 enum br_predictor predictor
;
910 if (number_of_iterations_exit (loop
, ex
, &niter_desc
, false))
911 niter
= niter_desc
.niter
;
912 if (!niter
|| TREE_CODE (niter_desc
.niter
) != INTEGER_CST
)
913 niter
= loop_niter_by_eval (loop
, ex
);
915 if (TREE_CODE (niter
) == INTEGER_CST
)
917 if (host_integerp (niter
, 1)
918 && compare_tree_int (niter
, max
-1) == -1)
919 nitercst
= tree_low_cst (niter
, 1) + 1;
922 predictor
= PRED_LOOP_ITERATIONS
;
924 /* If we have just one exit and we can derive some information about
925 the number of iterations of the loop from the statements inside
926 the loop, use it to predict this exit. */
927 else if (n_exits
== 1)
929 nitercst
= estimated_loop_iterations_int (loop
, false);
935 predictor
= PRED_LOOP_ITERATIONS_GUESSED
;
940 probability
= ((REG_BR_PROB_BASE
+ nitercst
/ 2) / nitercst
);
941 predict_edge (ex
, predictor
, probability
);
943 VEC_free (edge
, heap
, exits
);
945 bbs
= get_loop_body (loop
);
947 for (j
= 0; j
< loop
->num_nodes
; j
++)
949 int header_found
= 0;
955 /* Bypass loop heuristics on continue statement. These
956 statements construct loops via "non-loop" constructs
957 in the source language and are better to be handled
959 if (predicted_by_p (bb
, PRED_CONTINUE
))
962 /* Loop branch heuristics - predict an edge back to a
963 loop's head as taken. */
964 if (bb
== loop
->latch
)
966 e
= find_edge (loop
->latch
, loop
->header
);
970 predict_edge_def (e
, PRED_LOOP_BRANCH
, TAKEN
);
974 /* Loop exit heuristics - predict an edge exiting the loop if the
975 conditional has no loop header successors as not taken. */
977 /* If we already used more reliable loop exit predictors, do not
978 bother with PRED_LOOP_EXIT. */
979 && !predicted_by_p (bb
, PRED_LOOP_ITERATIONS_GUESSED
)
980 && !predicted_by_p (bb
, PRED_LOOP_ITERATIONS
))
982 /* For loop with many exits we don't want to predict all exits
983 with the pretty large probability, because if all exits are
984 considered in row, the loop would be predicted to iterate
985 almost never. The code to divide probability by number of
986 exits is very rough. It should compute the number of exits
987 taken in each patch through function (not the overall number
988 of exits that might be a lot higher for loops with wide switch
989 statements in them) and compute n-th square root.
991 We limit the minimal probability by 2% to avoid
992 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
993 as this was causing regression in perl benchmark containing such
996 int probability
= ((REG_BR_PROB_BASE
997 - predictor_info
[(int) PRED_LOOP_EXIT
].hitrate
)
999 if (probability
< HITRATE (2))
1000 probability
= HITRATE (2);
1001 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1002 if (e
->dest
->index
< NUM_FIXED_BLOCKS
1003 || !flow_bb_inside_loop_p (loop
, e
->dest
))
1004 predict_edge (e
, PRED_LOOP_EXIT
, probability
);
1008 /* Free basic blocks from get_loop_body. */
1015 /* Attempt to predict probabilities of BB outgoing edges using local
1018 bb_estimate_probability_locally (basic_block bb
)
1020 rtx last_insn
= BB_END (bb
);
1023 if (! can_predict_insn_p (last_insn
))
1025 cond
= get_condition (last_insn
, NULL
, false, false);
1029 /* Try "pointer heuristic."
1030 A comparison ptr == 0 is predicted as false.
1031 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1032 if (COMPARISON_P (cond
)
1033 && ((REG_P (XEXP (cond
, 0)) && REG_POINTER (XEXP (cond
, 0)))
1034 || (REG_P (XEXP (cond
, 1)) && REG_POINTER (XEXP (cond
, 1)))))
1036 if (GET_CODE (cond
) == EQ
)
1037 predict_insn_def (last_insn
, PRED_POINTER
, NOT_TAKEN
);
1038 else if (GET_CODE (cond
) == NE
)
1039 predict_insn_def (last_insn
, PRED_POINTER
, TAKEN
);
1043 /* Try "opcode heuristic."
1044 EQ tests are usually false and NE tests are usually true. Also,
1045 most quantities are positive, so we can make the appropriate guesses
1046 about signed comparisons against zero. */
1047 switch (GET_CODE (cond
))
1050 /* Unconditional branch. */
1051 predict_insn_def (last_insn
, PRED_UNCONDITIONAL
,
1052 cond
== const0_rtx
? NOT_TAKEN
: TAKEN
);
1057 /* Floating point comparisons appears to behave in a very
1058 unpredictable way because of special role of = tests in
1060 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
1062 /* Comparisons with 0 are often used for booleans and there is
1063 nothing useful to predict about them. */
1064 else if (XEXP (cond
, 1) == const0_rtx
1065 || XEXP (cond
, 0) == const0_rtx
)
1068 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, NOT_TAKEN
);
1073 /* Floating point comparisons appears to behave in a very
1074 unpredictable way because of special role of = tests in
1076 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
1078 /* Comparisons with 0 are often used for booleans and there is
1079 nothing useful to predict about them. */
1080 else if (XEXP (cond
, 1) == const0_rtx
1081 || XEXP (cond
, 0) == const0_rtx
)
1084 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, TAKEN
);
1088 predict_insn_def (last_insn
, PRED_FPOPCODE
, TAKEN
);
1092 predict_insn_def (last_insn
, PRED_FPOPCODE
, NOT_TAKEN
);
1097 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
1098 || XEXP (cond
, 1) == constm1_rtx
)
1099 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, NOT_TAKEN
);
1104 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
1105 || XEXP (cond
, 1) == constm1_rtx
)
1106 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, TAKEN
);
1114 /* Set edge->probability for each successor edge of BB. */
1116 guess_outgoing_edge_probabilities (basic_block bb
)
1118 bb_estimate_probability_locally (bb
);
1119 combine_predictions_for_insn (BB_END (bb
), bb
);
1122 static tree
expr_expected_value (tree
, bitmap
);
1124 /* Helper function for expr_expected_value. */
1127 expr_expected_value_1 (tree type
, tree op0
, enum tree_code code
, tree op1
, bitmap visited
)
1131 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
)
1133 if (TREE_CONSTANT (op0
))
1136 if (code
!= SSA_NAME
)
1139 def
= SSA_NAME_DEF_STMT (op0
);
1141 /* If we were already here, break the infinite cycle. */
1142 if (bitmap_bit_p (visited
, SSA_NAME_VERSION (op0
)))
1144 bitmap_set_bit (visited
, SSA_NAME_VERSION (op0
));
1146 if (gimple_code (def
) == GIMPLE_PHI
)
1148 /* All the arguments of the PHI node must have the same constant
1150 int i
, n
= gimple_phi_num_args (def
);
1151 tree val
= NULL
, new_val
;
1153 for (i
= 0; i
< n
; i
++)
1155 tree arg
= PHI_ARG_DEF (def
, i
);
1157 /* If this PHI has itself as an argument, we cannot
1158 determine the string length of this argument. However,
1159 if we can find an expected constant value for the other
1160 PHI args then we can still be sure that this is
1161 likely a constant. So be optimistic and just
1162 continue with the next argument. */
1163 if (arg
== PHI_RESULT (def
))
1166 new_val
= expr_expected_value (arg
, visited
);
1171 else if (!operand_equal_p (val
, new_val
, false))
1176 if (is_gimple_assign (def
))
1178 if (gimple_assign_lhs (def
) != op0
)
1181 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def
)),
1182 gimple_assign_rhs1 (def
),
1183 gimple_assign_rhs_code (def
),
1184 gimple_assign_rhs2 (def
),
1188 if (is_gimple_call (def
))
1190 tree decl
= gimple_call_fndecl (def
);
1193 if (DECL_BUILT_IN_CLASS (decl
) == BUILT_IN_NORMAL
1194 && DECL_FUNCTION_CODE (decl
) == BUILT_IN_EXPECT
)
1198 if (gimple_call_num_args (def
) != 2)
1200 val
= gimple_call_arg (def
, 0);
1201 if (TREE_CONSTANT (val
))
1203 return gimple_call_arg (def
, 1);
1210 if (get_gimple_rhs_class (code
) == GIMPLE_BINARY_RHS
)
1213 op0
= expr_expected_value (op0
, visited
);
1216 op1
= expr_expected_value (op1
, visited
);
1219 res
= fold_build2 (code
, type
, op0
, op1
);
1220 if (TREE_CONSTANT (res
))
1224 if (get_gimple_rhs_class (code
) == GIMPLE_UNARY_RHS
)
1227 op0
= expr_expected_value (op0
, visited
);
1230 res
= fold_build1 (code
, type
, op0
);
1231 if (TREE_CONSTANT (res
))
1238 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1239 The function is used by builtin_expect branch predictor so the evidence
1240 must come from this construct and additional possible constant folding.
1242 We may want to implement more involved value guess (such as value range
1243 propagation based prediction), but such tricks shall go to new
1247 expr_expected_value (tree expr
, bitmap visited
)
1249 enum tree_code code
;
1252 if (TREE_CONSTANT (expr
))
1255 extract_ops_from_tree (expr
, &code
, &op0
, &op1
);
1256 return expr_expected_value_1 (TREE_TYPE (expr
),
1257 op0
, code
, op1
, visited
);
1261 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
1262 we no longer need. */
1264 strip_predict_hints (void)
1272 gimple_stmt_iterator bi
;
1273 for (bi
= gsi_start_bb (bb
); !gsi_end_p (bi
);)
1275 gimple stmt
= gsi_stmt (bi
);
1277 if (gimple_code (stmt
) == GIMPLE_PREDICT
)
1279 gsi_remove (&bi
, true);
1282 else if (gimple_code (stmt
) == GIMPLE_CALL
)
1284 tree fndecl
= gimple_call_fndecl (stmt
);
1287 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_NORMAL
1288 && DECL_FUNCTION_CODE (fndecl
) == BUILT_IN_EXPECT
1289 && gimple_call_num_args (stmt
) == 2)
1291 var
= gimple_call_lhs (stmt
);
1292 ass_stmt
= gimple_build_assign (var
, gimple_call_arg (stmt
, 0));
1294 gsi_replace (&bi
, ass_stmt
, true);
1303 /* Predict using opcode of the last statement in basic block. */
1305 tree_predict_by_opcode (basic_block bb
)
1307 gimple stmt
= last_stmt (bb
);
1316 if (!stmt
|| gimple_code (stmt
) != GIMPLE_COND
)
1318 FOR_EACH_EDGE (then_edge
, ei
, bb
->succs
)
1319 if (then_edge
->flags
& EDGE_TRUE_VALUE
)
1321 op0
= gimple_cond_lhs (stmt
);
1322 op1
= gimple_cond_rhs (stmt
);
1323 cmp
= gimple_cond_code (stmt
);
1324 type
= TREE_TYPE (op0
);
1325 visited
= BITMAP_ALLOC (NULL
);
1326 val
= expr_expected_value_1 (boolean_type_node
, op0
, cmp
, op1
, visited
);
1327 BITMAP_FREE (visited
);
1330 if (integer_zerop (val
))
1331 predict_edge_def (then_edge
, PRED_BUILTIN_EXPECT
, NOT_TAKEN
);
1333 predict_edge_def (then_edge
, PRED_BUILTIN_EXPECT
, TAKEN
);
1336 /* Try "pointer heuristic."
1337 A comparison ptr == 0 is predicted as false.
1338 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1339 if (POINTER_TYPE_P (type
))
1342 predict_edge_def (then_edge
, PRED_TREE_POINTER
, NOT_TAKEN
);
1343 else if (cmp
== NE_EXPR
)
1344 predict_edge_def (then_edge
, PRED_TREE_POINTER
, TAKEN
);
1348 /* Try "opcode heuristic."
1349 EQ tests are usually false and NE tests are usually true. Also,
1350 most quantities are positive, so we can make the appropriate guesses
1351 about signed comparisons against zero. */
1356 /* Floating point comparisons appears to behave in a very
1357 unpredictable way because of special role of = tests in
1359 if (FLOAT_TYPE_P (type
))
1361 /* Comparisons with 0 are often used for booleans and there is
1362 nothing useful to predict about them. */
1363 else if (integer_zerop (op0
) || integer_zerop (op1
))
1366 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, NOT_TAKEN
);
1371 /* Floating point comparisons appears to behave in a very
1372 unpredictable way because of special role of = tests in
1374 if (FLOAT_TYPE_P (type
))
1376 /* Comparisons with 0 are often used for booleans and there is
1377 nothing useful to predict about them. */
1378 else if (integer_zerop (op0
)
1379 || integer_zerop (op1
))
1382 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, TAKEN
);
1386 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, TAKEN
);
1389 case UNORDERED_EXPR
:
1390 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, NOT_TAKEN
);
1395 if (integer_zerop (op1
)
1396 || integer_onep (op1
)
1397 || integer_all_onesp (op1
)
1400 || real_minus_onep (op1
))
1401 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, NOT_TAKEN
);
1406 if (integer_zerop (op1
)
1407 || integer_onep (op1
)
1408 || integer_all_onesp (op1
)
1411 || real_minus_onep (op1
))
1412 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, TAKEN
);
1420 /* Try to guess whether the value of return means error code. */
1422 static enum br_predictor
1423 return_prediction (tree val
, enum prediction
*prediction
)
1427 return PRED_NO_PREDICTION
;
1428 /* Different heuristics for pointers and scalars. */
1429 if (POINTER_TYPE_P (TREE_TYPE (val
)))
1431 /* NULL is usually not returned. */
1432 if (integer_zerop (val
))
1434 *prediction
= NOT_TAKEN
;
1435 return PRED_NULL_RETURN
;
1438 else if (INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1440 /* Negative return values are often used to indicate
1442 if (TREE_CODE (val
) == INTEGER_CST
1443 && tree_int_cst_sgn (val
) < 0)
1445 *prediction
= NOT_TAKEN
;
1446 return PRED_NEGATIVE_RETURN
;
1448 /* Constant return values seems to be commonly taken.
1449 Zero/one often represent booleans so exclude them from the
1451 if (TREE_CONSTANT (val
)
1452 && (!integer_zerop (val
) && !integer_onep (val
)))
1454 *prediction
= TAKEN
;
1455 return PRED_CONST_RETURN
;
1458 return PRED_NO_PREDICTION
;
1461 /* Find the basic block with return expression and look up for possible
1462 return value trying to apply RETURN_PREDICTION heuristics. */
1464 apply_return_prediction (void)
1466 gimple return_stmt
= NULL
;
1470 int phi_num_args
, i
;
1471 enum br_predictor pred
;
1472 enum prediction direction
;
1475 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
1477 return_stmt
= last_stmt (e
->src
);
1479 && gimple_code (return_stmt
) == GIMPLE_RETURN
)
1484 return_val
= gimple_return_retval (return_stmt
);
1487 if (TREE_CODE (return_val
) != SSA_NAME
1488 || !SSA_NAME_DEF_STMT (return_val
)
1489 || gimple_code (SSA_NAME_DEF_STMT (return_val
)) != GIMPLE_PHI
)
1491 phi
= SSA_NAME_DEF_STMT (return_val
);
1492 phi_num_args
= gimple_phi_num_args (phi
);
1493 pred
= return_prediction (PHI_ARG_DEF (phi
, 0), &direction
);
1495 /* Avoid the degenerate case where all return values form the function
1496 belongs to same category (ie they are all positive constants)
1497 so we can hardly say something about them. */
1498 for (i
= 1; i
< phi_num_args
; i
++)
1499 if (pred
!= return_prediction (PHI_ARG_DEF (phi
, i
), &direction
))
1501 if (i
!= phi_num_args
)
1502 for (i
= 0; i
< phi_num_args
; i
++)
1504 pred
= return_prediction (PHI_ARG_DEF (phi
, i
), &direction
);
1505 if (pred
!= PRED_NO_PREDICTION
)
1506 predict_paths_leading_to (gimple_phi_arg_edge (phi
, i
)->src
, pred
,
1511 /* Look for basic block that contains unlikely to happen events
1512 (such as noreturn calls) and mark all paths leading to execution
1513 of this basic blocks as unlikely. */
1516 tree_bb_level_predictions (void)
1520 apply_return_prediction ();
1524 gimple_stmt_iterator gsi
;
1526 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1528 gimple stmt
= gsi_stmt (gsi
);
1531 if (is_gimple_call (stmt
))
1533 if (gimple_call_flags (stmt
) & ECF_NORETURN
)
1534 predict_paths_leading_to (bb
, PRED_NORETURN
,
1536 decl
= gimple_call_fndecl (stmt
);
1538 && lookup_attribute ("cold",
1539 DECL_ATTRIBUTES (decl
)))
1540 predict_paths_leading_to (bb
, PRED_COLD_FUNCTION
,
1543 else if (gimple_code (stmt
) == GIMPLE_PREDICT
)
1545 predict_paths_leading_to (bb
, gimple_predict_predictor (stmt
),
1546 gimple_predict_outcome (stmt
));
1547 /* Keep GIMPLE_PREDICT around so early inlining will propagate
1548 hints to callers. */
1554 #ifdef ENABLE_CHECKING
1556 /* Callback for pointer_map_traverse, asserts that the pointer map is
1560 assert_is_empty (const void *key ATTRIBUTE_UNUSED
, void **value
,
1561 void *data ATTRIBUTE_UNUSED
)
1563 gcc_assert (!*value
);
1568 /* Predict branch probabilities and estimate profile of the tree CFG. */
1570 tree_estimate_probability (void)
1574 loop_optimizer_init (0);
1575 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1576 flow_loops_dump (dump_file
, NULL
, 0);
1578 add_noreturn_fake_exit_edges ();
1579 connect_infinite_loops_to_exit ();
1580 /* We use loop_niter_by_eval, which requires that the loops have
1582 create_preheaders (CP_SIMPLE_PREHEADERS
);
1583 calculate_dominance_info (CDI_POST_DOMINATORS
);
1585 bb_predictions
= pointer_map_create ();
1586 tree_bb_level_predictions ();
1588 mark_irreducible_loops ();
1589 record_loop_exits ();
1590 if (number_of_loops () > 1)
1598 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1600 /* Predict early returns to be probable, as we've already taken
1601 care for error returns and other cases are often used for
1602 fast paths through function.
1604 Since we've already removed the return statements, we are
1605 looking for CFG like:
1615 if (e
->dest
!= bb
->next_bb
1616 && e
->dest
!= EXIT_BLOCK_PTR
1617 && single_succ_p (e
->dest
)
1618 && single_succ_edge (e
->dest
)->dest
== EXIT_BLOCK_PTR
1619 && gimple_code (last_stmt (e
->dest
)) == GIMPLE_RETURN
)
1624 if (single_succ_p (bb
))
1626 FOR_EACH_EDGE (e1
, ei1
, bb
->preds
)
1627 if (!predicted_by_p (e1
->src
, PRED_NULL_RETURN
)
1628 && !predicted_by_p (e1
->src
, PRED_CONST_RETURN
)
1629 && !predicted_by_p (e1
->src
, PRED_NEGATIVE_RETURN
))
1630 predict_edge_def (e1
, PRED_TREE_EARLY_RETURN
, NOT_TAKEN
);
1633 if (!predicted_by_p (e
->src
, PRED_NULL_RETURN
)
1634 && !predicted_by_p (e
->src
, PRED_CONST_RETURN
)
1635 && !predicted_by_p (e
->src
, PRED_NEGATIVE_RETURN
))
1636 predict_edge_def (e
, PRED_TREE_EARLY_RETURN
, NOT_TAKEN
);
1639 /* Look for block we are guarding (ie we dominate it,
1640 but it doesn't postdominate us). */
1641 if (e
->dest
!= EXIT_BLOCK_PTR
&& e
->dest
!= bb
1642 && dominated_by_p (CDI_DOMINATORS
, e
->dest
, e
->src
)
1643 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, e
->dest
))
1645 gimple_stmt_iterator bi
;
1647 /* The call heuristic claims that a guarded function call
1648 is improbable. This is because such calls are often used
1649 to signal exceptional situations such as printing error
1651 for (bi
= gsi_start_bb (e
->dest
); !gsi_end_p (bi
);
1654 gimple stmt
= gsi_stmt (bi
);
1655 if (is_gimple_call (stmt
)
1656 /* Constant and pure calls are hardly used to signalize
1657 something exceptional. */
1658 && gimple_has_side_effects (stmt
))
1660 predict_edge_def (e
, PRED_CALL
, NOT_TAKEN
);
1666 tree_predict_by_opcode (bb
);
1669 combine_predictions_for_bb (bb
);
1671 #ifdef ENABLE_CHECKING
1672 pointer_map_traverse (bb_predictions
, assert_is_empty
, NULL
);
1674 pointer_map_destroy (bb_predictions
);
1675 bb_predictions
= NULL
;
1677 estimate_bb_frequencies ();
1678 free_dominance_info (CDI_POST_DOMINATORS
);
1679 remove_fake_exit_edges ();
1680 loop_optimizer_finalize ();
1681 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1682 gimple_dump_cfg (dump_file
, dump_flags
);
1683 if (profile_status
== PROFILE_ABSENT
)
1684 profile_status
= PROFILE_GUESSED
;
1688 /* Predict edges to successors of CUR whose sources are not postdominated by
1689 BB by PRED and recurse to all postdominators. */
1692 predict_paths_for_bb (basic_block cur
, basic_block bb
,
1693 enum br_predictor pred
,
1694 enum prediction taken
)
1700 /* We are looking for all edges forming edge cut induced by
1701 set of all blocks postdominated by BB. */
1702 FOR_EACH_EDGE (e
, ei
, cur
->preds
)
1703 if (e
->src
->index
>= NUM_FIXED_BLOCKS
1704 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, bb
))
1706 gcc_assert (bb
== cur
|| dominated_by_p (CDI_POST_DOMINATORS
, cur
, bb
));
1707 predict_edge_def (e
, pred
, taken
);
1709 for (son
= first_dom_son (CDI_POST_DOMINATORS
, cur
);
1711 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
1712 predict_paths_for_bb (son
, bb
, pred
, taken
);
1715 /* Sets branch probabilities according to PREDiction and
1719 predict_paths_leading_to (basic_block bb
, enum br_predictor pred
,
1720 enum prediction taken
)
1722 predict_paths_for_bb (bb
, bb
, pred
, taken
);
1725 /* This is used to carry information about basic blocks. It is
1726 attached to the AUX field of the standard CFG block. */
1728 typedef struct block_info_def
1730 /* Estimated frequency of execution of basic_block. */
1733 /* To keep queue of basic blocks to process. */
1736 /* Number of predecessors we need to visit first. */
1740 /* Similar information for edges. */
1741 typedef struct edge_info_def
1743 /* In case edge is a loopback edge, the probability edge will be reached
1744 in case header is. Estimated number of iterations of the loop can be
1745 then computed as 1 / (1 - back_edge_prob). */
1746 sreal back_edge_prob
;
1747 /* True if the edge is a loopback edge in the natural loop. */
1748 unsigned int back_edge
:1;
1751 #define BLOCK_INFO(B) ((block_info) (B)->aux)
1752 #define EDGE_INFO(E) ((edge_info) (E)->aux)
1754 /* Helper function for estimate_bb_frequencies.
1755 Propagate the frequencies in blocks marked in
1756 TOVISIT, starting in HEAD. */
1759 propagate_freq (basic_block head
, bitmap tovisit
)
1768 /* For each basic block we need to visit count number of his predecessors
1769 we need to visit first. */
1770 EXECUTE_IF_SET_IN_BITMAP (tovisit
, 0, i
, bi
)
1775 /* The outermost "loop" includes the exit block, which we can not
1776 look up via BASIC_BLOCK. Detect this and use EXIT_BLOCK_PTR
1777 directly. Do the same for the entry block. */
1778 bb
= BASIC_BLOCK (i
);
1780 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1782 bool visit
= bitmap_bit_p (tovisit
, e
->src
->index
);
1784 if (visit
&& !(e
->flags
& EDGE_DFS_BACK
))
1786 else if (visit
&& dump_file
&& !EDGE_INFO (e
)->back_edge
)
1788 "Irreducible region hit, ignoring edge to %i->%i\n",
1789 e
->src
->index
, bb
->index
);
1791 BLOCK_INFO (bb
)->npredecessors
= count
;
1794 memcpy (&BLOCK_INFO (head
)->frequency
, &real_one
, sizeof (real_one
));
1796 for (bb
= head
; bb
; bb
= nextbb
)
1799 sreal cyclic_probability
, frequency
;
1801 memcpy (&cyclic_probability
, &real_zero
, sizeof (real_zero
));
1802 memcpy (&frequency
, &real_zero
, sizeof (real_zero
));
1804 nextbb
= BLOCK_INFO (bb
)->next
;
1805 BLOCK_INFO (bb
)->next
= NULL
;
1807 /* Compute frequency of basic block. */
1810 #ifdef ENABLE_CHECKING
1811 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1812 gcc_assert (!bitmap_bit_p (tovisit
, e
->src
->index
)
1813 || (e
->flags
& EDGE_DFS_BACK
));
1816 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1817 if (EDGE_INFO (e
)->back_edge
)
1819 sreal_add (&cyclic_probability
, &cyclic_probability
,
1820 &EDGE_INFO (e
)->back_edge_prob
);
1822 else if (!(e
->flags
& EDGE_DFS_BACK
))
1826 /* frequency += (e->probability
1827 * BLOCK_INFO (e->src)->frequency /
1828 REG_BR_PROB_BASE); */
1830 sreal_init (&tmp
, e
->probability
, 0);
1831 sreal_mul (&tmp
, &tmp
, &BLOCK_INFO (e
->src
)->frequency
);
1832 sreal_mul (&tmp
, &tmp
, &real_inv_br_prob_base
);
1833 sreal_add (&frequency
, &frequency
, &tmp
);
1836 if (sreal_compare (&cyclic_probability
, &real_zero
) == 0)
1838 memcpy (&BLOCK_INFO (bb
)->frequency
, &frequency
,
1839 sizeof (frequency
));
1843 if (sreal_compare (&cyclic_probability
, &real_almost_one
) > 0)
1845 memcpy (&cyclic_probability
, &real_almost_one
,
1846 sizeof (real_almost_one
));
1849 /* BLOCK_INFO (bb)->frequency = frequency
1850 / (1 - cyclic_probability) */
1852 sreal_sub (&cyclic_probability
, &real_one
, &cyclic_probability
);
1853 sreal_div (&BLOCK_INFO (bb
)->frequency
,
1854 &frequency
, &cyclic_probability
);
1858 bitmap_clear_bit (tovisit
, bb
->index
);
1860 e
= find_edge (bb
, head
);
1865 /* EDGE_INFO (e)->back_edge_prob
1866 = ((e->probability * BLOCK_INFO (bb)->frequency)
1867 / REG_BR_PROB_BASE); */
1869 sreal_init (&tmp
, e
->probability
, 0);
1870 sreal_mul (&tmp
, &tmp
, &BLOCK_INFO (bb
)->frequency
);
1871 sreal_mul (&EDGE_INFO (e
)->back_edge_prob
,
1872 &tmp
, &real_inv_br_prob_base
);
1875 /* Propagate to successor blocks. */
1876 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1877 if (!(e
->flags
& EDGE_DFS_BACK
)
1878 && BLOCK_INFO (e
->dest
)->npredecessors
)
1880 BLOCK_INFO (e
->dest
)->npredecessors
--;
1881 if (!BLOCK_INFO (e
->dest
)->npredecessors
)
1886 BLOCK_INFO (last
)->next
= e
->dest
;
1894 /* Estimate probabilities of loopback edges in loops at same nest level. */
1897 estimate_loops_at_level (struct loop
*first_loop
)
1901 for (loop
= first_loop
; loop
; loop
= loop
->next
)
1906 bitmap tovisit
= BITMAP_ALLOC (NULL
);
1908 estimate_loops_at_level (loop
->inner
);
1910 /* Find current loop back edge and mark it. */
1911 e
= loop_latch_edge (loop
);
1912 EDGE_INFO (e
)->back_edge
= 1;
1914 bbs
= get_loop_body (loop
);
1915 for (i
= 0; i
< loop
->num_nodes
; i
++)
1916 bitmap_set_bit (tovisit
, bbs
[i
]->index
);
1918 propagate_freq (loop
->header
, tovisit
);
1919 BITMAP_FREE (tovisit
);
1923 /* Propagates frequencies through structure of loops. */
1926 estimate_loops (void)
1928 bitmap tovisit
= BITMAP_ALLOC (NULL
);
1931 /* Start by estimating the frequencies in the loops. */
1932 if (number_of_loops () > 1)
1933 estimate_loops_at_level (current_loops
->tree_root
->inner
);
1935 /* Now propagate the frequencies through all the blocks. */
1938 bitmap_set_bit (tovisit
, bb
->index
);
1940 propagate_freq (ENTRY_BLOCK_PTR
, tovisit
);
1941 BITMAP_FREE (tovisit
);
1944 /* Convert counts measured by profile driven feedback to frequencies.
1945 Return nonzero iff there was any nonzero execution count. */
1948 counts_to_freqs (void)
1950 gcov_type count_max
, true_count_max
= 0;
1954 true_count_max
= MAX (bb
->count
, true_count_max
);
1956 count_max
= MAX (true_count_max
, 1);
1957 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
1958 bb
->frequency
= (bb
->count
* BB_FREQ_MAX
+ count_max
/ 2) / count_max
;
1960 return true_count_max
;
1963 /* Return true if function is likely to be expensive, so there is no point to
1964 optimize performance of prologue, epilogue or do inlining at the expense
1965 of code size growth. THRESHOLD is the limit of number of instructions
1966 function can execute at average to be still considered not expensive. */
1969 expensive_function_p (int threshold
)
1971 unsigned int sum
= 0;
1975 /* We can not compute accurately for large thresholds due to scaled
1977 gcc_assert (threshold
<= BB_FREQ_MAX
);
1979 /* Frequencies are out of range. This either means that function contains
1980 internal loop executing more than BB_FREQ_MAX times or profile feedback
1981 is available and function has not been executed at all. */
1982 if (ENTRY_BLOCK_PTR
->frequency
== 0)
1985 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
1986 limit
= ENTRY_BLOCK_PTR
->frequency
* threshold
;
1991 for (insn
= BB_HEAD (bb
); insn
!= NEXT_INSN (BB_END (bb
));
1992 insn
= NEXT_INSN (insn
))
1993 if (active_insn_p (insn
))
1995 sum
+= bb
->frequency
;
2004 /* Estimate basic blocks frequency by given branch probabilities. */
2007 estimate_bb_frequencies (void)
2012 if (!flag_branch_probabilities
|| !counts_to_freqs ())
2014 static int real_values_initialized
= 0;
2016 if (!real_values_initialized
)
2018 real_values_initialized
= 1;
2019 sreal_init (&real_zero
, 0, 0);
2020 sreal_init (&real_one
, 1, 0);
2021 sreal_init (&real_br_prob_base
, REG_BR_PROB_BASE
, 0);
2022 sreal_init (&real_bb_freq_max
, BB_FREQ_MAX
, 0);
2023 sreal_init (&real_one_half
, 1, -1);
2024 sreal_div (&real_inv_br_prob_base
, &real_one
, &real_br_prob_base
);
2025 sreal_sub (&real_almost_one
, &real_one
, &real_inv_br_prob_base
);
2028 mark_dfs_back_edges ();
2030 single_succ_edge (ENTRY_BLOCK_PTR
)->probability
= REG_BR_PROB_BASE
;
2032 /* Set up block info for each basic block. */
2033 alloc_aux_for_blocks (sizeof (struct block_info_def
));
2034 alloc_aux_for_edges (sizeof (struct edge_info_def
));
2035 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
2040 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2042 sreal_init (&EDGE_INFO (e
)->back_edge_prob
, e
->probability
, 0);
2043 sreal_mul (&EDGE_INFO (e
)->back_edge_prob
,
2044 &EDGE_INFO (e
)->back_edge_prob
,
2045 &real_inv_br_prob_base
);
2049 /* First compute probabilities locally for each loop from innermost
2050 to outermost to examine probabilities for back edges. */
2053 memcpy (&freq_max
, &real_zero
, sizeof (real_zero
));
2055 if (sreal_compare (&freq_max
, &BLOCK_INFO (bb
)->frequency
) < 0)
2056 memcpy (&freq_max
, &BLOCK_INFO (bb
)->frequency
, sizeof (freq_max
));
2058 sreal_div (&freq_max
, &real_bb_freq_max
, &freq_max
);
2059 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
2063 sreal_mul (&tmp
, &BLOCK_INFO (bb
)->frequency
, &freq_max
);
2064 sreal_add (&tmp
, &tmp
, &real_one_half
);
2065 bb
->frequency
= sreal_to_int (&tmp
);
2068 free_aux_for_blocks ();
2069 free_aux_for_edges ();
2071 compute_function_frequency ();
2072 if (flag_reorder_functions
)
2073 choose_function_section ();
2076 /* Decide whether function is hot, cold or unlikely executed. */
2078 compute_function_frequency (void)
2082 if (!profile_info
|| !flag_branch_probabilities
)
2084 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl
))
2086 cfun
->function_frequency
= FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
;
2087 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl
))
2089 cfun
->function_frequency
= FUNCTION_FREQUENCY_HOT
;
2092 cfun
->function_frequency
= FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
;
2095 if (maybe_hot_bb_p (bb
))
2097 cfun
->function_frequency
= FUNCTION_FREQUENCY_HOT
;
2100 if (!probably_never_executed_bb_p (bb
))
2101 cfun
->function_frequency
= FUNCTION_FREQUENCY_NORMAL
;
2105 /* Choose appropriate section for the function. */
2107 choose_function_section (void)
2109 if (DECL_SECTION_NAME (current_function_decl
)
2110 || !targetm
.have_named_sections
2111 /* Theoretically we can split the gnu.linkonce text section too,
2112 but this requires more work as the frequency needs to match
2113 for all generated objects so we need to merge the frequency
2114 of all instances. For now just never set frequency for these. */
2115 || DECL_ONE_ONLY (current_function_decl
))
2118 /* If we are doing the partitioning optimization, let the optimization
2119 choose the correct section into which to put things. */
2121 if (flag_reorder_blocks_and_partition
)
2124 if (cfun
->function_frequency
== FUNCTION_FREQUENCY_HOT
)
2125 DECL_SECTION_NAME (current_function_decl
) =
2126 build_string (strlen (HOT_TEXT_SECTION_NAME
), HOT_TEXT_SECTION_NAME
);
2127 if (cfun
->function_frequency
== FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
2128 DECL_SECTION_NAME (current_function_decl
) =
2129 build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME
),
2130 UNLIKELY_EXECUTED_TEXT_SECTION_NAME
);
2134 gate_estimate_probability (void)
2136 return flag_guess_branch_prob
;
2139 /* Build PREDICT_EXPR. */
2141 build_predict_expr (enum br_predictor predictor
, enum prediction taken
)
2143 tree t
= build1 (PREDICT_EXPR
, void_type_node
,
2144 build_int_cst (NULL
, predictor
));
2145 PREDICT_EXPR_OUTCOME (t
) = taken
;
2150 predictor_name (enum br_predictor predictor
)
2152 return predictor_info
[predictor
].name
;
2155 struct gimple_opt_pass pass_profile
=
2159 "profile", /* name */
2160 gate_estimate_probability
, /* gate */
2161 tree_estimate_probability
, /* execute */
2164 0, /* static_pass_number */
2165 TV_BRANCH_PROB
, /* tv_id */
2166 PROP_cfg
, /* properties_required */
2167 0, /* properties_provided */
2168 0, /* properties_destroyed */
2169 0, /* todo_flags_start */
2170 TODO_ggc_collect
| TODO_verify_ssa
/* todo_flags_finish */
2174 struct gimple_opt_pass pass_strip_predict_hints
=
2180 strip_predict_hints
, /* execute */
2183 0, /* static_pass_number */
2184 TV_BRANCH_PROB
, /* tv_id */
2185 PROP_cfg
, /* properties_required */
2186 0, /* properties_provided */
2187 0, /* properties_destroyed */
2188 0, /* todo_flags_start */
2189 TODO_ggc_collect
| TODO_verify_ssa
/* todo_flags_finish */