1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 [1] "Branch Prediction for Free"
24 Ball and Larus; PLDI '93.
25 [2] "Static Branch Frequency and Program Profile Analysis"
26 Wu and Larus; MICRO-27.
27 [3] "Corpus-based Static Branch Prediction"
28 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
33 #include "coretypes.h"
38 #include "hard-reg-set.h"
39 #include "basic-block.h"
40 #include "insn-config.h"
55 #include "tree-flow.h"
57 #include "tree-dump.h"
58 #include "tree-pass.h"
60 #include "tree-scalar-evolution.h"
62 #include "pointer-set.h"
64 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
65 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
66 static sreal real_zero
, real_one
, real_almost_one
, real_br_prob_base
,
67 real_inv_br_prob_base
, real_one_half
, real_bb_freq_max
;
69 /* Random guesstimation given names. */
70 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 100 - 1)
71 #define PROB_EVEN (REG_BR_PROB_BASE / 2)
72 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
73 #define PROB_ALWAYS (REG_BR_PROB_BASE)
75 static void combine_predictions_for_insn (rtx
, basic_block
);
76 static void dump_prediction (FILE *, enum br_predictor
, int, basic_block
, int);
77 static void predict_paths_leading_to (basic_block
, enum br_predictor
, enum prediction
);
78 static void compute_function_frequency (void);
79 static void choose_function_section (void);
80 static bool can_predict_insn_p (const_rtx
);
82 /* Information we hold about each branch predictor.
83 Filled using information from predict.def. */
87 const char *const name
; /* Name used in the debugging dumps. */
88 const int hitrate
; /* Expected hitrate used by
89 predict_insn_def call. */
93 /* Use given predictor without Dempster-Shaffer theory if it matches
94 using first_match heuristics. */
95 #define PRED_FLAG_FIRST_MATCH 1
97 /* Recompute hitrate in percent to our representation. */
99 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
101 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
102 static const struct predictor_info predictor_info
[]= {
103 #include "predict.def"
105 /* Upper bound on predictors. */
110 /* Return TRUE if frequency FREQ is considered to be hot. */
112 maybe_hot_frequency_p (int freq
)
114 if (!profile_info
|| !flag_branch_probabilities
)
116 if (cfun
->function_frequency
== FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
118 if (cfun
->function_frequency
== FUNCTION_FREQUENCY_HOT
)
121 if (profile_status
== PROFILE_ABSENT
)
123 if (freq
< BB_FREQ_MAX
/ PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
))
128 /* Return true in case BB can be CPU intensive and should be optimized
129 for maximal performance. */
132 maybe_hot_bb_p (const_basic_block bb
)
134 if (profile_info
&& flag_branch_probabilities
136 < profile_info
->sum_max
/ PARAM_VALUE (HOT_BB_COUNT_FRACTION
)))
138 return maybe_hot_frequency_p (bb
->frequency
);
141 /* Return true if the call can be hot. */
144 cgraph_maybe_hot_edge_p (struct cgraph_edge
*edge
)
146 if (profile_info
&& flag_branch_probabilities
148 <= profile_info
->sum_max
/ PARAM_VALUE (HOT_BB_COUNT_FRACTION
)))
150 if (lookup_attribute ("cold", DECL_ATTRIBUTES (edge
->callee
->decl
))
151 || lookup_attribute ("cold", DECL_ATTRIBUTES (edge
->caller
->decl
)))
153 if (lookup_attribute ("hot", DECL_ATTRIBUTES (edge
->caller
->decl
)))
155 if (flag_guess_branch_prob
156 && edge
->frequency
< (CGRAPH_FREQ_MAX
157 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
)))
162 /* Return true in case BB can be CPU intensive and should be optimized
163 for maximal performance. */
166 maybe_hot_edge_p (edge e
)
168 if (profile_info
&& flag_branch_probabilities
170 < profile_info
->sum_max
/ PARAM_VALUE (HOT_BB_COUNT_FRACTION
)))
172 return maybe_hot_frequency_p (EDGE_FREQUENCY (e
));
175 /* Return true in case BB is cold and should be optimized for size. */
178 probably_cold_bb_p (const_basic_block bb
)
180 if (profile_info
&& flag_branch_probabilities
182 < profile_info
->sum_max
/ PARAM_VALUE (HOT_BB_COUNT_FRACTION
)))
184 if ((!profile_info
|| !flag_branch_probabilities
)
185 && cfun
->function_frequency
== FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
187 if (bb
->frequency
< BB_FREQ_MAX
/ PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
))
192 /* Return true in case BB is probably never executed. */
194 probably_never_executed_bb_p (const_basic_block bb
)
196 if (profile_info
&& flag_branch_probabilities
)
197 return ((bb
->count
+ profile_info
->runs
/ 2) / profile_info
->runs
) == 0;
198 if ((!profile_info
|| !flag_branch_probabilities
)
199 && cfun
->function_frequency
== FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
204 /* Return true when current function should always be optimized for size. */
207 optimize_function_for_size_p (struct function
*fun
)
209 return (optimize_size
210 || fun
->function_frequency
== FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
);
213 /* Return true when current function should always be optimized for speed. */
216 optimize_function_for_speed_p (struct function
*fun
)
218 return !optimize_function_for_size_p (fun
);
221 /* Return TRUE when BB should be optimized for size. */
224 optimize_bb_for_size_p (const_basic_block bb
)
226 return optimize_function_for_size_p (cfun
) || !maybe_hot_bb_p (bb
);
229 /* Return TRUE when BB should be optimized for speed. */
232 optimize_bb_for_speed_p (const_basic_block bb
)
234 return !optimize_bb_for_size_p (bb
);
237 /* Return TRUE when BB should be optimized for size. */
240 optimize_edge_for_size_p (edge e
)
242 return optimize_function_for_size_p (cfun
) || !maybe_hot_edge_p (e
);
245 /* Return TRUE when BB should be optimized for speed. */
248 optimize_edge_for_speed_p (edge e
)
250 return !optimize_edge_for_size_p (e
);
253 /* Return TRUE when BB should be optimized for size. */
256 optimize_insn_for_size_p (void)
258 return optimize_function_for_size_p (cfun
) || !crtl
->maybe_hot_insn_p
;
261 /* Return TRUE when BB should be optimized for speed. */
264 optimize_insn_for_speed_p (void)
266 return !optimize_insn_for_size_p ();
269 /* Return TRUE when LOOP should be optimized for size. */
272 optimize_loop_for_size_p (struct loop
*loop
)
274 return optimize_bb_for_size_p (loop
->header
);
277 /* Return TRUE when LOOP should be optimized for speed. */
280 optimize_loop_for_speed_p (struct loop
*loop
)
282 return optimize_bb_for_speed_p (loop
->header
);
285 /* Return TRUE when LOOP nest should be optimized for speed. */
288 optimize_loop_nest_for_speed_p (struct loop
*loop
)
290 struct loop
*l
= loop
;
291 if (optimize_loop_for_speed_p (loop
))
294 while (l
&& l
!= loop
)
296 if (optimize_loop_for_speed_p (l
))
304 while (l
!= loop
&& !l
->next
)
313 /* Return TRUE when LOOP nest should be optimized for size. */
316 optimize_loop_nest_for_size_p (struct loop
*loop
)
318 return !optimize_loop_nest_for_speed_p (loop
);
321 /* Set RTL expansion for BB profile. */
324 rtl_profile_for_bb (basic_block bb
)
326 crtl
->maybe_hot_insn_p
= maybe_hot_bb_p (bb
);
329 /* Set RTL expansion for edge profile. */
332 rtl_profile_for_edge (edge e
)
334 crtl
->maybe_hot_insn_p
= maybe_hot_edge_p (e
);
337 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
339 default_rtl_profile (void)
341 crtl
->maybe_hot_insn_p
= true;
344 /* Return true if the one of outgoing edges is already predicted by
348 rtl_predicted_by_p (const_basic_block bb
, enum br_predictor predictor
)
351 if (!INSN_P (BB_END (bb
)))
353 for (note
= REG_NOTES (BB_END (bb
)); note
; note
= XEXP (note
, 1))
354 if (REG_NOTE_KIND (note
) == REG_BR_PRED
355 && INTVAL (XEXP (XEXP (note
, 0), 0)) == (int)predictor
)
360 /* This map contains for a basic block the list of predictions for the
363 static struct pointer_map_t
*bb_predictions
;
365 /* Return true if the one of outgoing edges is already predicted by
369 gimple_predicted_by_p (const_basic_block bb
, enum br_predictor predictor
)
371 struct edge_prediction
*i
;
372 void **preds
= pointer_map_contains (bb_predictions
, bb
);
377 for (i
= (struct edge_prediction
*) *preds
; i
; i
= i
->ep_next
)
378 if (i
->ep_predictor
== predictor
)
383 /* Return true when the probability of edge is reliable.
385 The profile guessing code is good at predicting branch outcome (ie.
386 taken/not taken), that is predicted right slightly over 75% of time.
387 It is however notoriously poor on predicting the probability itself.
388 In general the profile appear a lot flatter (with probabilities closer
389 to 50%) than the reality so it is bad idea to use it to drive optimization
390 such as those disabling dynamic branch prediction for well predictable
393 There are two exceptions - edges leading to noreturn edges and edges
394 predicted by number of iterations heuristics are predicted well. This macro
395 should be able to distinguish those, but at the moment it simply check for
396 noreturn heuristic that is only one giving probability over 99% or bellow
397 1%. In future we might want to propagate reliability information across the
398 CFG if we find this information useful on multiple places. */
400 probability_reliable_p (int prob
)
402 return (profile_status
== PROFILE_READ
403 || (profile_status
== PROFILE_GUESSED
404 && (prob
<= HITRATE (1) || prob
>= HITRATE (99))));
407 /* Same predicate as above, working on edges. */
409 edge_probability_reliable_p (const_edge e
)
411 return probability_reliable_p (e
->probability
);
414 /* Same predicate as edge_probability_reliable_p, working on notes. */
416 br_prob_note_reliable_p (const_rtx note
)
418 gcc_assert (REG_NOTE_KIND (note
) == REG_BR_PROB
);
419 return probability_reliable_p (INTVAL (XEXP (note
, 0)));
423 predict_insn (rtx insn
, enum br_predictor predictor
, int probability
)
425 gcc_assert (any_condjump_p (insn
));
426 if (!flag_guess_branch_prob
)
429 add_reg_note (insn
, REG_BR_PRED
,
430 gen_rtx_CONCAT (VOIDmode
,
431 GEN_INT ((int) predictor
),
432 GEN_INT ((int) probability
)));
435 /* Predict insn by given predictor. */
438 predict_insn_def (rtx insn
, enum br_predictor predictor
,
439 enum prediction taken
)
441 int probability
= predictor_info
[(int) predictor
].hitrate
;
444 probability
= REG_BR_PROB_BASE
- probability
;
446 predict_insn (insn
, predictor
, probability
);
449 /* Predict edge E with given probability if possible. */
452 rtl_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
455 last_insn
= BB_END (e
->src
);
457 /* We can store the branch prediction information only about
458 conditional jumps. */
459 if (!any_condjump_p (last_insn
))
462 /* We always store probability of branching. */
463 if (e
->flags
& EDGE_FALLTHRU
)
464 probability
= REG_BR_PROB_BASE
- probability
;
466 predict_insn (last_insn
, predictor
, probability
);
469 /* Predict edge E with the given PROBABILITY. */
471 gimple_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
473 gcc_assert (profile_status
!= PROFILE_GUESSED
);
474 if ((e
->src
!= ENTRY_BLOCK_PTR
&& EDGE_COUNT (e
->src
->succs
) > 1)
475 && flag_guess_branch_prob
&& optimize
)
477 struct edge_prediction
*i
= XNEW (struct edge_prediction
);
478 void **preds
= pointer_map_insert (bb_predictions
, e
->src
);
480 i
->ep_next
= (struct edge_prediction
*) *preds
;
482 i
->ep_probability
= probability
;
483 i
->ep_predictor
= predictor
;
488 /* Remove all predictions on given basic block that are attached
491 remove_predictions_associated_with_edge (edge e
)
498 preds
= pointer_map_contains (bb_predictions
, e
->src
);
502 struct edge_prediction
**prediction
= (struct edge_prediction
**) preds
;
503 struct edge_prediction
*next
;
507 if ((*prediction
)->ep_edge
== e
)
509 next
= (*prediction
)->ep_next
;
514 prediction
= &((*prediction
)->ep_next
);
519 /* Clears the list of predictions stored for BB. */
522 clear_bb_predictions (basic_block bb
)
524 void **preds
= pointer_map_contains (bb_predictions
, bb
);
525 struct edge_prediction
*pred
, *next
;
530 for (pred
= (struct edge_prediction
*) *preds
; pred
; pred
= next
)
532 next
= pred
->ep_next
;
538 /* Return true when we can store prediction on insn INSN.
539 At the moment we represent predictions only on conditional
540 jumps, not at computed jump or other complicated cases. */
542 can_predict_insn_p (const_rtx insn
)
544 return (JUMP_P (insn
)
545 && any_condjump_p (insn
)
546 && EDGE_COUNT (BLOCK_FOR_INSN (insn
)->succs
) >= 2);
549 /* Predict edge E by given predictor if possible. */
552 predict_edge_def (edge e
, enum br_predictor predictor
,
553 enum prediction taken
)
555 int probability
= predictor_info
[(int) predictor
].hitrate
;
558 probability
= REG_BR_PROB_BASE
- probability
;
560 predict_edge (e
, predictor
, probability
);
563 /* Invert all branch predictions or probability notes in the INSN. This needs
564 to be done each time we invert the condition used by the jump. */
567 invert_br_probabilities (rtx insn
)
571 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
572 if (REG_NOTE_KIND (note
) == REG_BR_PROB
)
573 XEXP (note
, 0) = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (note
, 0)));
574 else if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
575 XEXP (XEXP (note
, 0), 1)
576 = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (XEXP (note
, 0), 1)));
579 /* Dump information about the branch prediction to the output file. */
582 dump_prediction (FILE *file
, enum br_predictor predictor
, int probability
,
583 basic_block bb
, int used
)
591 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
592 if (! (e
->flags
& EDGE_FALLTHRU
))
595 fprintf (file
, " %s heuristics%s: %.1f%%",
596 predictor_info
[predictor
].name
,
597 used
? "" : " (ignored)", probability
* 100.0 / REG_BR_PROB_BASE
);
601 fprintf (file
, " exec ");
602 fprintf (file
, HOST_WIDEST_INT_PRINT_DEC
, bb
->count
);
605 fprintf (file
, " hit ");
606 fprintf (file
, HOST_WIDEST_INT_PRINT_DEC
, e
->count
);
607 fprintf (file
, " (%.1f%%)", e
->count
* 100.0 / bb
->count
);
611 fprintf (file
, "\n");
614 /* We can not predict the probabilities of outgoing edges of bb. Set them
615 evenly and hope for the best. */
617 set_even_probabilities (basic_block bb
)
623 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
624 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
626 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
627 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
628 e
->probability
= (REG_BR_PROB_BASE
+ nedges
/ 2) / nedges
;
633 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
634 note if not already present. Remove now useless REG_BR_PRED notes. */
637 combine_predictions_for_insn (rtx insn
, basic_block bb
)
642 int best_probability
= PROB_EVEN
;
643 int best_predictor
= END_PREDICTORS
;
644 int combined_probability
= REG_BR_PROB_BASE
/ 2;
646 bool first_match
= false;
649 if (!can_predict_insn_p (insn
))
651 set_even_probabilities (bb
);
655 prob_note
= find_reg_note (insn
, REG_BR_PROB
, 0);
656 pnote
= ®_NOTES (insn
);
658 fprintf (dump_file
, "Predictions for insn %i bb %i\n", INSN_UID (insn
),
661 /* We implement "first match" heuristics and use probability guessed
662 by predictor with smallest index. */
663 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
664 if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
666 int predictor
= INTVAL (XEXP (XEXP (note
, 0), 0));
667 int probability
= INTVAL (XEXP (XEXP (note
, 0), 1));
670 if (best_predictor
> predictor
)
671 best_probability
= probability
, best_predictor
= predictor
;
673 d
= (combined_probability
* probability
674 + (REG_BR_PROB_BASE
- combined_probability
)
675 * (REG_BR_PROB_BASE
- probability
));
677 /* Use FP math to avoid overflows of 32bit integers. */
679 /* If one probability is 0% and one 100%, avoid division by zero. */
680 combined_probability
= REG_BR_PROB_BASE
/ 2;
682 combined_probability
= (((double) combined_probability
) * probability
683 * REG_BR_PROB_BASE
/ d
+ 0.5);
686 /* Decide which heuristic to use. In case we didn't match anything,
687 use no_prediction heuristic, in case we did match, use either
688 first match or Dempster-Shaffer theory depending on the flags. */
690 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
694 dump_prediction (dump_file
, PRED_NO_PREDICTION
,
695 combined_probability
, bb
, true);
698 dump_prediction (dump_file
, PRED_DS_THEORY
, combined_probability
,
700 dump_prediction (dump_file
, PRED_FIRST_MATCH
, best_probability
,
705 combined_probability
= best_probability
;
706 dump_prediction (dump_file
, PRED_COMBINED
, combined_probability
, bb
, true);
710 if (REG_NOTE_KIND (*pnote
) == REG_BR_PRED
)
712 int predictor
= INTVAL (XEXP (XEXP (*pnote
, 0), 0));
713 int probability
= INTVAL (XEXP (XEXP (*pnote
, 0), 1));
715 dump_prediction (dump_file
, predictor
, probability
, bb
,
716 !first_match
|| best_predictor
== predictor
);
717 *pnote
= XEXP (*pnote
, 1);
720 pnote
= &XEXP (*pnote
, 1);
725 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (combined_probability
));
727 /* Save the prediction into CFG in case we are seeing non-degenerated
729 if (!single_succ_p (bb
))
731 BRANCH_EDGE (bb
)->probability
= combined_probability
;
732 FALLTHRU_EDGE (bb
)->probability
733 = REG_BR_PROB_BASE
- combined_probability
;
736 else if (!single_succ_p (bb
))
738 int prob
= INTVAL (XEXP (prob_note
, 0));
740 BRANCH_EDGE (bb
)->probability
= prob
;
741 FALLTHRU_EDGE (bb
)->probability
= REG_BR_PROB_BASE
- prob
;
744 single_succ_edge (bb
)->probability
= REG_BR_PROB_BASE
;
747 /* Combine predictions into single probability and store them into CFG.
748 Remove now useless prediction entries. */
751 combine_predictions_for_bb (basic_block bb
)
753 int best_probability
= PROB_EVEN
;
754 int best_predictor
= END_PREDICTORS
;
755 int combined_probability
= REG_BR_PROB_BASE
/ 2;
757 bool first_match
= false;
759 struct edge_prediction
*pred
;
761 edge e
, first
= NULL
, second
= NULL
;
765 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
766 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
769 if (first
&& !second
)
775 /* When there is no successor or only one choice, prediction is easy.
777 We are lazy for now and predict only basic blocks with two outgoing
778 edges. It is possible to predict generic case too, but we have to
779 ignore first match heuristics and do more involved combining. Implement
784 set_even_probabilities (bb
);
785 clear_bb_predictions (bb
);
787 fprintf (dump_file
, "%i edges in bb %i predicted to even probabilities\n",
793 fprintf (dump_file
, "Predictions for bb %i\n", bb
->index
);
795 preds
= pointer_map_contains (bb_predictions
, bb
);
798 /* We implement "first match" heuristics and use probability guessed
799 by predictor with smallest index. */
800 for (pred
= (struct edge_prediction
*) *preds
; pred
; pred
= pred
->ep_next
)
802 int predictor
= pred
->ep_predictor
;
803 int probability
= pred
->ep_probability
;
805 if (pred
->ep_edge
!= first
)
806 probability
= REG_BR_PROB_BASE
- probability
;
809 if (best_predictor
> predictor
)
810 best_probability
= probability
, best_predictor
= predictor
;
812 d
= (combined_probability
* probability
813 + (REG_BR_PROB_BASE
- combined_probability
)
814 * (REG_BR_PROB_BASE
- probability
));
816 /* Use FP math to avoid overflows of 32bit integers. */
818 /* If one probability is 0% and one 100%, avoid division by zero. */
819 combined_probability
= REG_BR_PROB_BASE
/ 2;
821 combined_probability
= (((double) combined_probability
)
823 * REG_BR_PROB_BASE
/ d
+ 0.5);
827 /* Decide which heuristic to use. In case we didn't match anything,
828 use no_prediction heuristic, in case we did match, use either
829 first match or Dempster-Shaffer theory depending on the flags. */
831 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
835 dump_prediction (dump_file
, PRED_NO_PREDICTION
, combined_probability
, bb
, true);
838 dump_prediction (dump_file
, PRED_DS_THEORY
, combined_probability
, bb
,
840 dump_prediction (dump_file
, PRED_FIRST_MATCH
, best_probability
, bb
,
845 combined_probability
= best_probability
;
846 dump_prediction (dump_file
, PRED_COMBINED
, combined_probability
, bb
, true);
850 for (pred
= (struct edge_prediction
*) *preds
; pred
; pred
= pred
->ep_next
)
852 int predictor
= pred
->ep_predictor
;
853 int probability
= pred
->ep_probability
;
855 if (pred
->ep_edge
!= EDGE_SUCC (bb
, 0))
856 probability
= REG_BR_PROB_BASE
- probability
;
857 dump_prediction (dump_file
, predictor
, probability
, bb
,
858 !first_match
|| best_predictor
== predictor
);
861 clear_bb_predictions (bb
);
865 first
->probability
= combined_probability
;
866 second
->probability
= REG_BR_PROB_BASE
- combined_probability
;
870 /* Predict edge probabilities by exploiting loop structure. */
880 /* Try to predict out blocks in a loop that are not part of a
882 FOR_EACH_LOOP (li
, loop
, 0)
884 basic_block bb
, *bbs
;
886 VEC (edge
, heap
) *exits
;
887 struct tree_niter_desc niter_desc
;
890 exits
= get_loop_exit_edges (loop
);
891 n_exits
= VEC_length (edge
, exits
);
893 for (j
= 0; VEC_iterate (edge
, exits
, j
, ex
); j
++)
896 HOST_WIDE_INT nitercst
;
897 int max
= PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS
);
899 enum br_predictor predictor
;
901 if (number_of_iterations_exit (loop
, ex
, &niter_desc
, false))
902 niter
= niter_desc
.niter
;
903 if (!niter
|| TREE_CODE (niter_desc
.niter
) != INTEGER_CST
)
904 niter
= loop_niter_by_eval (loop
, ex
);
906 if (TREE_CODE (niter
) == INTEGER_CST
)
908 if (host_integerp (niter
, 1)
909 && compare_tree_int (niter
, max
-1) == -1)
910 nitercst
= tree_low_cst (niter
, 1) + 1;
913 predictor
= PRED_LOOP_ITERATIONS
;
915 /* If we have just one exit and we can derive some information about
916 the number of iterations of the loop from the statements inside
917 the loop, use it to predict this exit. */
918 else if (n_exits
== 1)
920 nitercst
= estimated_loop_iterations_int (loop
, false);
926 predictor
= PRED_LOOP_ITERATIONS_GUESSED
;
931 probability
= ((REG_BR_PROB_BASE
+ nitercst
/ 2) / nitercst
);
932 predict_edge (ex
, predictor
, probability
);
934 VEC_free (edge
, heap
, exits
);
936 bbs
= get_loop_body (loop
);
938 for (j
= 0; j
< loop
->num_nodes
; j
++)
940 int header_found
= 0;
946 /* Bypass loop heuristics on continue statement. These
947 statements construct loops via "non-loop" constructs
948 in the source language and are better to be handled
950 if (predicted_by_p (bb
, PRED_CONTINUE
))
953 /* Loop branch heuristics - predict an edge back to a
954 loop's head as taken. */
955 if (bb
== loop
->latch
)
957 e
= find_edge (loop
->latch
, loop
->header
);
961 predict_edge_def (e
, PRED_LOOP_BRANCH
, TAKEN
);
965 /* Loop exit heuristics - predict an edge exiting the loop if the
966 conditional has no loop header successors as not taken. */
968 /* If we already used more reliable loop exit predictors, do not
969 bother with PRED_LOOP_EXIT. */
970 && !predicted_by_p (bb
, PRED_LOOP_ITERATIONS_GUESSED
)
971 && !predicted_by_p (bb
, PRED_LOOP_ITERATIONS
))
973 /* For loop with many exits we don't want to predict all exits
974 with the pretty large probability, because if all exits are
975 considered in row, the loop would be predicted to iterate
976 almost never. The code to divide probability by number of
977 exits is very rough. It should compute the number of exits
978 taken in each patch through function (not the overall number
979 of exits that might be a lot higher for loops with wide switch
980 statements in them) and compute n-th square root.
982 We limit the minimal probability by 2% to avoid
983 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
984 as this was causing regression in perl benchmark containing such
987 int probability
= ((REG_BR_PROB_BASE
988 - predictor_info
[(int) PRED_LOOP_EXIT
].hitrate
)
990 if (probability
< HITRATE (2))
991 probability
= HITRATE (2);
992 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
993 if (e
->dest
->index
< NUM_FIXED_BLOCKS
994 || !flow_bb_inside_loop_p (loop
, e
->dest
))
995 predict_edge (e
, PRED_LOOP_EXIT
, probability
);
999 /* Free basic blocks from get_loop_body. */
1006 /* Attempt to predict probabilities of BB outgoing edges using local
1009 bb_estimate_probability_locally (basic_block bb
)
1011 rtx last_insn
= BB_END (bb
);
1014 if (! can_predict_insn_p (last_insn
))
1016 cond
= get_condition (last_insn
, NULL
, false, false);
1020 /* Try "pointer heuristic."
1021 A comparison ptr == 0 is predicted as false.
1022 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1023 if (COMPARISON_P (cond
)
1024 && ((REG_P (XEXP (cond
, 0)) && REG_POINTER (XEXP (cond
, 0)))
1025 || (REG_P (XEXP (cond
, 1)) && REG_POINTER (XEXP (cond
, 1)))))
1027 if (GET_CODE (cond
) == EQ
)
1028 predict_insn_def (last_insn
, PRED_POINTER
, NOT_TAKEN
);
1029 else if (GET_CODE (cond
) == NE
)
1030 predict_insn_def (last_insn
, PRED_POINTER
, TAKEN
);
1034 /* Try "opcode heuristic."
1035 EQ tests are usually false and NE tests are usually true. Also,
1036 most quantities are positive, so we can make the appropriate guesses
1037 about signed comparisons against zero. */
1038 switch (GET_CODE (cond
))
1041 /* Unconditional branch. */
1042 predict_insn_def (last_insn
, PRED_UNCONDITIONAL
,
1043 cond
== const0_rtx
? NOT_TAKEN
: TAKEN
);
1048 /* Floating point comparisons appears to behave in a very
1049 unpredictable way because of special role of = tests in
1051 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
1053 /* Comparisons with 0 are often used for booleans and there is
1054 nothing useful to predict about them. */
1055 else if (XEXP (cond
, 1) == const0_rtx
1056 || XEXP (cond
, 0) == const0_rtx
)
1059 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, NOT_TAKEN
);
1064 /* Floating point comparisons appears to behave in a very
1065 unpredictable way because of special role of = tests in
1067 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
1069 /* Comparisons with 0 are often used for booleans and there is
1070 nothing useful to predict about them. */
1071 else if (XEXP (cond
, 1) == const0_rtx
1072 || XEXP (cond
, 0) == const0_rtx
)
1075 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, TAKEN
);
1079 predict_insn_def (last_insn
, PRED_FPOPCODE
, TAKEN
);
1083 predict_insn_def (last_insn
, PRED_FPOPCODE
, NOT_TAKEN
);
1088 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
1089 || XEXP (cond
, 1) == constm1_rtx
)
1090 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, NOT_TAKEN
);
1095 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
1096 || XEXP (cond
, 1) == constm1_rtx
)
1097 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, TAKEN
);
1105 /* Set edge->probability for each successor edge of BB. */
1107 guess_outgoing_edge_probabilities (basic_block bb
)
1109 bb_estimate_probability_locally (bb
);
1110 combine_predictions_for_insn (BB_END (bb
), bb
);
1113 static tree
expr_expected_value (tree
, bitmap
);
1115 /* Helper function for expr_expected_value. */
1118 expr_expected_value_1 (tree type
, tree op0
, enum tree_code code
, tree op1
, bitmap visited
)
1122 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
)
1124 if (TREE_CONSTANT (op0
))
1127 if (code
!= SSA_NAME
)
1130 def
= SSA_NAME_DEF_STMT (op0
);
1132 /* If we were already here, break the infinite cycle. */
1133 if (bitmap_bit_p (visited
, SSA_NAME_VERSION (op0
)))
1135 bitmap_set_bit (visited
, SSA_NAME_VERSION (op0
));
1137 if (gimple_code (def
) == GIMPLE_PHI
)
1139 /* All the arguments of the PHI node must have the same constant
1141 int i
, n
= gimple_phi_num_args (def
);
1142 tree val
= NULL
, new_val
;
1144 for (i
= 0; i
< n
; i
++)
1146 tree arg
= PHI_ARG_DEF (def
, i
);
1148 /* If this PHI has itself as an argument, we cannot
1149 determine the string length of this argument. However,
1150 if we can find an expected constant value for the other
1151 PHI args then we can still be sure that this is
1152 likely a constant. So be optimistic and just
1153 continue with the next argument. */
1154 if (arg
== PHI_RESULT (def
))
1157 new_val
= expr_expected_value (arg
, visited
);
1162 else if (!operand_equal_p (val
, new_val
, false))
1167 if (is_gimple_assign (def
))
1169 if (gimple_assign_lhs (def
) != op0
)
1172 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def
)),
1173 gimple_assign_rhs1 (def
),
1174 gimple_assign_rhs_code (def
),
1175 gimple_assign_rhs2 (def
),
1179 if (is_gimple_call (def
))
1181 tree decl
= gimple_call_fndecl (def
);
1184 if (DECL_BUILT_IN_CLASS (decl
) == BUILT_IN_NORMAL
1185 && DECL_FUNCTION_CODE (decl
) == BUILT_IN_EXPECT
)
1189 if (gimple_call_num_args (def
) != 2)
1191 val
= gimple_call_arg (def
, 0);
1192 if (TREE_CONSTANT (val
))
1194 return gimple_call_arg (def
, 1);
1201 if (get_gimple_rhs_class (code
) == GIMPLE_BINARY_RHS
)
1204 op0
= expr_expected_value (op0
, visited
);
1207 op1
= expr_expected_value (op1
, visited
);
1210 res
= fold_build2 (code
, type
, op0
, op1
);
1211 if (TREE_CONSTANT (res
))
1215 if (get_gimple_rhs_class (code
) == GIMPLE_UNARY_RHS
)
1218 op0
= expr_expected_value (op0
, visited
);
1221 res
= fold_build1 (code
, type
, op0
);
1222 if (TREE_CONSTANT (res
))
1229 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1230 The function is used by builtin_expect branch predictor so the evidence
1231 must come from this construct and additional possible constant folding.
1233 We may want to implement more involved value guess (such as value range
1234 propagation based prediction), but such tricks shall go to new
1238 expr_expected_value (tree expr
, bitmap visited
)
1240 enum tree_code code
;
1243 if (TREE_CONSTANT (expr
))
1246 extract_ops_from_tree (expr
, &code
, &op0
, &op1
);
1247 return expr_expected_value_1 (TREE_TYPE (expr
),
1248 op0
, code
, op1
, visited
);
1252 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
1253 we no longer need. */
1255 strip_predict_hints (void)
1263 gimple_stmt_iterator bi
;
1264 for (bi
= gsi_start_bb (bb
); !gsi_end_p (bi
);)
1266 gimple stmt
= gsi_stmt (bi
);
1268 if (gimple_code (stmt
) == GIMPLE_PREDICT
)
1270 gsi_remove (&bi
, true);
1273 else if (gimple_code (stmt
) == GIMPLE_CALL
)
1275 tree fndecl
= gimple_call_fndecl (stmt
);
1278 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_NORMAL
1279 && DECL_FUNCTION_CODE (fndecl
) == BUILT_IN_EXPECT
1280 && gimple_call_num_args (stmt
) == 2)
1282 var
= gimple_call_lhs (stmt
);
1283 ass_stmt
= gimple_build_assign (var
, gimple_call_arg (stmt
, 0));
1285 gsi_replace (&bi
, ass_stmt
, true);
1294 /* Predict using opcode of the last statement in basic block. */
1296 tree_predict_by_opcode (basic_block bb
)
1298 gimple stmt
= last_stmt (bb
);
1307 if (!stmt
|| gimple_code (stmt
) != GIMPLE_COND
)
1309 FOR_EACH_EDGE (then_edge
, ei
, bb
->succs
)
1310 if (then_edge
->flags
& EDGE_TRUE_VALUE
)
1312 op0
= gimple_cond_lhs (stmt
);
1313 op1
= gimple_cond_rhs (stmt
);
1314 cmp
= gimple_cond_code (stmt
);
1315 type
= TREE_TYPE (op0
);
1316 visited
= BITMAP_ALLOC (NULL
);
1317 val
= expr_expected_value_1 (boolean_type_node
, op0
, cmp
, op1
, visited
);
1318 BITMAP_FREE (visited
);
1321 if (integer_zerop (val
))
1322 predict_edge_def (then_edge
, PRED_BUILTIN_EXPECT
, NOT_TAKEN
);
1324 predict_edge_def (then_edge
, PRED_BUILTIN_EXPECT
, TAKEN
);
1327 /* Try "pointer heuristic."
1328 A comparison ptr == 0 is predicted as false.
1329 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1330 if (POINTER_TYPE_P (type
))
1333 predict_edge_def (then_edge
, PRED_TREE_POINTER
, NOT_TAKEN
);
1334 else if (cmp
== NE_EXPR
)
1335 predict_edge_def (then_edge
, PRED_TREE_POINTER
, TAKEN
);
1339 /* Try "opcode heuristic."
1340 EQ tests are usually false and NE tests are usually true. Also,
1341 most quantities are positive, so we can make the appropriate guesses
1342 about signed comparisons against zero. */
1347 /* Floating point comparisons appears to behave in a very
1348 unpredictable way because of special role of = tests in
1350 if (FLOAT_TYPE_P (type
))
1352 /* Comparisons with 0 are often used for booleans and there is
1353 nothing useful to predict about them. */
1354 else if (integer_zerop (op0
) || integer_zerop (op1
))
1357 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, NOT_TAKEN
);
1362 /* Floating point comparisons appears to behave in a very
1363 unpredictable way because of special role of = tests in
1365 if (FLOAT_TYPE_P (type
))
1367 /* Comparisons with 0 are often used for booleans and there is
1368 nothing useful to predict about them. */
1369 else if (integer_zerop (op0
)
1370 || integer_zerop (op1
))
1373 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, TAKEN
);
1377 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, TAKEN
);
1380 case UNORDERED_EXPR
:
1381 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, NOT_TAKEN
);
1386 if (integer_zerop (op1
)
1387 || integer_onep (op1
)
1388 || integer_all_onesp (op1
)
1391 || real_minus_onep (op1
))
1392 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, NOT_TAKEN
);
1397 if (integer_zerop (op1
)
1398 || integer_onep (op1
)
1399 || integer_all_onesp (op1
)
1402 || real_minus_onep (op1
))
1403 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, TAKEN
);
1411 /* Try to guess whether the value of return means error code. */
1413 static enum br_predictor
1414 return_prediction (tree val
, enum prediction
*prediction
)
1418 return PRED_NO_PREDICTION
;
1419 /* Different heuristics for pointers and scalars. */
1420 if (POINTER_TYPE_P (TREE_TYPE (val
)))
1422 /* NULL is usually not returned. */
1423 if (integer_zerop (val
))
1425 *prediction
= NOT_TAKEN
;
1426 return PRED_NULL_RETURN
;
1429 else if (INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1431 /* Negative return values are often used to indicate
1433 if (TREE_CODE (val
) == INTEGER_CST
1434 && tree_int_cst_sgn (val
) < 0)
1436 *prediction
= NOT_TAKEN
;
1437 return PRED_NEGATIVE_RETURN
;
1439 /* Constant return values seems to be commonly taken.
1440 Zero/one often represent booleans so exclude them from the
1442 if (TREE_CONSTANT (val
)
1443 && (!integer_zerop (val
) && !integer_onep (val
)))
1445 *prediction
= TAKEN
;
1446 return PRED_CONST_RETURN
;
1449 return PRED_NO_PREDICTION
;
1452 /* Find the basic block with return expression and look up for possible
1453 return value trying to apply RETURN_PREDICTION heuristics. */
1455 apply_return_prediction (void)
1457 gimple return_stmt
= NULL
;
1461 int phi_num_args
, i
;
1462 enum br_predictor pred
;
1463 enum prediction direction
;
1466 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
1468 return_stmt
= last_stmt (e
->src
);
1470 && gimple_code (return_stmt
) == GIMPLE_RETURN
)
1475 return_val
= gimple_return_retval (return_stmt
);
1478 if (TREE_CODE (return_val
) != SSA_NAME
1479 || !SSA_NAME_DEF_STMT (return_val
)
1480 || gimple_code (SSA_NAME_DEF_STMT (return_val
)) != GIMPLE_PHI
)
1482 phi
= SSA_NAME_DEF_STMT (return_val
);
1483 phi_num_args
= gimple_phi_num_args (phi
);
1484 pred
= return_prediction (PHI_ARG_DEF (phi
, 0), &direction
);
1486 /* Avoid the degenerate case where all return values form the function
1487 belongs to same category (ie they are all positive constants)
1488 so we can hardly say something about them. */
1489 for (i
= 1; i
< phi_num_args
; i
++)
1490 if (pred
!= return_prediction (PHI_ARG_DEF (phi
, i
), &direction
))
1492 if (i
!= phi_num_args
)
1493 for (i
= 0; i
< phi_num_args
; i
++)
1495 pred
= return_prediction (PHI_ARG_DEF (phi
, i
), &direction
);
1496 if (pred
!= PRED_NO_PREDICTION
)
1497 predict_paths_leading_to (gimple_phi_arg_edge (phi
, i
)->src
, pred
,
1502 /* Look for basic block that contains unlikely to happen events
1503 (such as noreturn calls) and mark all paths leading to execution
1504 of this basic blocks as unlikely. */
1507 tree_bb_level_predictions (void)
1511 apply_return_prediction ();
1515 gimple_stmt_iterator gsi
;
1517 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1519 gimple stmt
= gsi_stmt (gsi
);
1522 if (is_gimple_call (stmt
))
1524 if (gimple_call_flags (stmt
) & ECF_NORETURN
)
1525 predict_paths_leading_to (bb
, PRED_NORETURN
,
1527 decl
= gimple_call_fndecl (stmt
);
1529 && lookup_attribute ("cold",
1530 DECL_ATTRIBUTES (decl
)))
1531 predict_paths_leading_to (bb
, PRED_COLD_FUNCTION
,
1534 else if (gimple_code (stmt
) == GIMPLE_PREDICT
)
1536 predict_paths_leading_to (bb
, gimple_predict_predictor (stmt
),
1537 gimple_predict_outcome (stmt
));
1538 /* Keep GIMPLE_PREDICT around so early inlining will propagate
1539 hints to callers. */
1545 #ifdef ENABLE_CHECKING
1547 /* Callback for pointer_map_traverse, asserts that the pointer map is
1551 assert_is_empty (const void *key ATTRIBUTE_UNUSED
, void **value
,
1552 void *data ATTRIBUTE_UNUSED
)
1554 gcc_assert (!*value
);
1559 /* Predict branch probabilities and estimate profile of the tree CFG. */
1561 tree_estimate_probability (void)
1565 loop_optimizer_init (0);
1566 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1567 flow_loops_dump (dump_file
, NULL
, 0);
1569 add_noreturn_fake_exit_edges ();
1570 connect_infinite_loops_to_exit ();
1571 /* We use loop_niter_by_eval, which requires that the loops have
1573 create_preheaders (CP_SIMPLE_PREHEADERS
);
1574 calculate_dominance_info (CDI_POST_DOMINATORS
);
1576 bb_predictions
= pointer_map_create ();
1577 tree_bb_level_predictions ();
1579 mark_irreducible_loops ();
1580 record_loop_exits ();
1581 if (number_of_loops () > 1)
1589 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1591 /* Predict early returns to be probable, as we've already taken
1592 care for error returns and other cases are often used for
1593 fast paths through function.
1595 Since we've already removed the return statements, we are
1596 looking for CFG like:
1606 if (e
->dest
!= bb
->next_bb
1607 && e
->dest
!= EXIT_BLOCK_PTR
1608 && single_succ_p (e
->dest
)
1609 && single_succ_edge (e
->dest
)->dest
== EXIT_BLOCK_PTR
1610 && gimple_code (last_stmt (e
->dest
)) == GIMPLE_RETURN
)
1615 if (single_succ_p (bb
))
1617 FOR_EACH_EDGE (e1
, ei1
, bb
->preds
)
1618 if (!predicted_by_p (e1
->src
, PRED_NULL_RETURN
)
1619 && !predicted_by_p (e1
->src
, PRED_CONST_RETURN
)
1620 && !predicted_by_p (e1
->src
, PRED_NEGATIVE_RETURN
))
1621 predict_edge_def (e1
, PRED_TREE_EARLY_RETURN
, NOT_TAKEN
);
1624 if (!predicted_by_p (e
->src
, PRED_NULL_RETURN
)
1625 && !predicted_by_p (e
->src
, PRED_CONST_RETURN
)
1626 && !predicted_by_p (e
->src
, PRED_NEGATIVE_RETURN
))
1627 predict_edge_def (e
, PRED_TREE_EARLY_RETURN
, NOT_TAKEN
);
1630 /* Look for block we are guarding (ie we dominate it,
1631 but it doesn't postdominate us). */
1632 if (e
->dest
!= EXIT_BLOCK_PTR
&& e
->dest
!= bb
1633 && dominated_by_p (CDI_DOMINATORS
, e
->dest
, e
->src
)
1634 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, e
->dest
))
1636 gimple_stmt_iterator bi
;
1638 /* The call heuristic claims that a guarded function call
1639 is improbable. This is because such calls are often used
1640 to signal exceptional situations such as printing error
1642 for (bi
= gsi_start_bb (e
->dest
); !gsi_end_p (bi
);
1645 gimple stmt
= gsi_stmt (bi
);
1646 if (is_gimple_call (stmt
)
1647 /* Constant and pure calls are hardly used to signalize
1648 something exceptional. */
1649 && gimple_has_side_effects (stmt
))
1651 predict_edge_def (e
, PRED_CALL
, NOT_TAKEN
);
1657 tree_predict_by_opcode (bb
);
1660 combine_predictions_for_bb (bb
);
1662 #ifdef ENABLE_CHECKING
1663 pointer_map_traverse (bb_predictions
, assert_is_empty
, NULL
);
1665 pointer_map_destroy (bb_predictions
);
1666 bb_predictions
= NULL
;
1668 estimate_bb_frequencies ();
1669 free_dominance_info (CDI_POST_DOMINATORS
);
1670 remove_fake_exit_edges ();
1671 loop_optimizer_finalize ();
1672 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1673 gimple_dump_cfg (dump_file
, dump_flags
);
1674 if (profile_status
== PROFILE_ABSENT
)
1675 profile_status
= PROFILE_GUESSED
;
1679 /* Predict edges to successors of CUR whose sources are not postdominated by
1680 BB by PRED and recurse to all postdominators. */
1683 predict_paths_for_bb (basic_block cur
, basic_block bb
,
1684 enum br_predictor pred
,
1685 enum prediction taken
)
1691 /* We are looking for all edges forming edge cut induced by
1692 set of all blocks postdominated by BB. */
1693 FOR_EACH_EDGE (e
, ei
, cur
->preds
)
1694 if (e
->src
->index
>= NUM_FIXED_BLOCKS
1695 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, bb
))
1697 gcc_assert (bb
== cur
|| dominated_by_p (CDI_POST_DOMINATORS
, cur
, bb
));
1698 predict_edge_def (e
, pred
, taken
);
1700 for (son
= first_dom_son (CDI_POST_DOMINATORS
, cur
);
1702 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
1703 predict_paths_for_bb (son
, bb
, pred
, taken
);
1706 /* Sets branch probabilities according to PREDiction and
1710 predict_paths_leading_to (basic_block bb
, enum br_predictor pred
,
1711 enum prediction taken
)
1713 predict_paths_for_bb (bb
, bb
, pred
, taken
);
1716 /* This is used to carry information about basic blocks. It is
1717 attached to the AUX field of the standard CFG block. */
1719 typedef struct block_info_def
1721 /* Estimated frequency of execution of basic_block. */
1724 /* To keep queue of basic blocks to process. */
1727 /* Number of predecessors we need to visit first. */
1731 /* Similar information for edges. */
1732 typedef struct edge_info_def
1734 /* In case edge is a loopback edge, the probability edge will be reached
1735 in case header is. Estimated number of iterations of the loop can be
1736 then computed as 1 / (1 - back_edge_prob). */
1737 sreal back_edge_prob
;
1738 /* True if the edge is a loopback edge in the natural loop. */
1739 unsigned int back_edge
:1;
1742 #define BLOCK_INFO(B) ((block_info) (B)->aux)
1743 #define EDGE_INFO(E) ((edge_info) (E)->aux)
1745 /* Helper function for estimate_bb_frequencies.
1746 Propagate the frequencies in blocks marked in
1747 TOVISIT, starting in HEAD. */
1750 propagate_freq (basic_block head
, bitmap tovisit
)
1759 /* For each basic block we need to visit count number of his predecessors
1760 we need to visit first. */
1761 EXECUTE_IF_SET_IN_BITMAP (tovisit
, 0, i
, bi
)
1766 /* The outermost "loop" includes the exit block, which we can not
1767 look up via BASIC_BLOCK. Detect this and use EXIT_BLOCK_PTR
1768 directly. Do the same for the entry block. */
1769 bb
= BASIC_BLOCK (i
);
1771 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1773 bool visit
= bitmap_bit_p (tovisit
, e
->src
->index
);
1775 if (visit
&& !(e
->flags
& EDGE_DFS_BACK
))
1777 else if (visit
&& dump_file
&& !EDGE_INFO (e
)->back_edge
)
1779 "Irreducible region hit, ignoring edge to %i->%i\n",
1780 e
->src
->index
, bb
->index
);
1782 BLOCK_INFO (bb
)->npredecessors
= count
;
1785 memcpy (&BLOCK_INFO (head
)->frequency
, &real_one
, sizeof (real_one
));
1787 for (bb
= head
; bb
; bb
= nextbb
)
1790 sreal cyclic_probability
, frequency
;
1792 memcpy (&cyclic_probability
, &real_zero
, sizeof (real_zero
));
1793 memcpy (&frequency
, &real_zero
, sizeof (real_zero
));
1795 nextbb
= BLOCK_INFO (bb
)->next
;
1796 BLOCK_INFO (bb
)->next
= NULL
;
1798 /* Compute frequency of basic block. */
1801 #ifdef ENABLE_CHECKING
1802 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1803 gcc_assert (!bitmap_bit_p (tovisit
, e
->src
->index
)
1804 || (e
->flags
& EDGE_DFS_BACK
));
1807 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1808 if (EDGE_INFO (e
)->back_edge
)
1810 sreal_add (&cyclic_probability
, &cyclic_probability
,
1811 &EDGE_INFO (e
)->back_edge_prob
);
1813 else if (!(e
->flags
& EDGE_DFS_BACK
))
1817 /* frequency += (e->probability
1818 * BLOCK_INFO (e->src)->frequency /
1819 REG_BR_PROB_BASE); */
1821 sreal_init (&tmp
, e
->probability
, 0);
1822 sreal_mul (&tmp
, &tmp
, &BLOCK_INFO (e
->src
)->frequency
);
1823 sreal_mul (&tmp
, &tmp
, &real_inv_br_prob_base
);
1824 sreal_add (&frequency
, &frequency
, &tmp
);
1827 if (sreal_compare (&cyclic_probability
, &real_zero
) == 0)
1829 memcpy (&BLOCK_INFO (bb
)->frequency
, &frequency
,
1830 sizeof (frequency
));
1834 if (sreal_compare (&cyclic_probability
, &real_almost_one
) > 0)
1836 memcpy (&cyclic_probability
, &real_almost_one
,
1837 sizeof (real_almost_one
));
1840 /* BLOCK_INFO (bb)->frequency = frequency
1841 / (1 - cyclic_probability) */
1843 sreal_sub (&cyclic_probability
, &real_one
, &cyclic_probability
);
1844 sreal_div (&BLOCK_INFO (bb
)->frequency
,
1845 &frequency
, &cyclic_probability
);
1849 bitmap_clear_bit (tovisit
, bb
->index
);
1851 e
= find_edge (bb
, head
);
1856 /* EDGE_INFO (e)->back_edge_prob
1857 = ((e->probability * BLOCK_INFO (bb)->frequency)
1858 / REG_BR_PROB_BASE); */
1860 sreal_init (&tmp
, e
->probability
, 0);
1861 sreal_mul (&tmp
, &tmp
, &BLOCK_INFO (bb
)->frequency
);
1862 sreal_mul (&EDGE_INFO (e
)->back_edge_prob
,
1863 &tmp
, &real_inv_br_prob_base
);
1866 /* Propagate to successor blocks. */
1867 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1868 if (!(e
->flags
& EDGE_DFS_BACK
)
1869 && BLOCK_INFO (e
->dest
)->npredecessors
)
1871 BLOCK_INFO (e
->dest
)->npredecessors
--;
1872 if (!BLOCK_INFO (e
->dest
)->npredecessors
)
1877 BLOCK_INFO (last
)->next
= e
->dest
;
1885 /* Estimate probabilities of loopback edges in loops at same nest level. */
1888 estimate_loops_at_level (struct loop
*first_loop
)
1892 for (loop
= first_loop
; loop
; loop
= loop
->next
)
1897 bitmap tovisit
= BITMAP_ALLOC (NULL
);
1899 estimate_loops_at_level (loop
->inner
);
1901 /* Find current loop back edge and mark it. */
1902 e
= loop_latch_edge (loop
);
1903 EDGE_INFO (e
)->back_edge
= 1;
1905 bbs
= get_loop_body (loop
);
1906 for (i
= 0; i
< loop
->num_nodes
; i
++)
1907 bitmap_set_bit (tovisit
, bbs
[i
]->index
);
1909 propagate_freq (loop
->header
, tovisit
);
1910 BITMAP_FREE (tovisit
);
1914 /* Propagates frequencies through structure of loops. */
1917 estimate_loops (void)
1919 bitmap tovisit
= BITMAP_ALLOC (NULL
);
1922 /* Start by estimating the frequencies in the loops. */
1923 if (number_of_loops () > 1)
1924 estimate_loops_at_level (current_loops
->tree_root
->inner
);
1926 /* Now propagate the frequencies through all the blocks. */
1929 bitmap_set_bit (tovisit
, bb
->index
);
1931 propagate_freq (ENTRY_BLOCK_PTR
, tovisit
);
1932 BITMAP_FREE (tovisit
);
1935 /* Convert counts measured by profile driven feedback to frequencies.
1936 Return nonzero iff there was any nonzero execution count. */
1939 counts_to_freqs (void)
1941 gcov_type count_max
, true_count_max
= 0;
1945 true_count_max
= MAX (bb
->count
, true_count_max
);
1947 count_max
= MAX (true_count_max
, 1);
1948 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
1949 bb
->frequency
= (bb
->count
* BB_FREQ_MAX
+ count_max
/ 2) / count_max
;
1951 return true_count_max
;
1954 /* Return true if function is likely to be expensive, so there is no point to
1955 optimize performance of prologue, epilogue or do inlining at the expense
1956 of code size growth. THRESHOLD is the limit of number of instructions
1957 function can execute at average to be still considered not expensive. */
1960 expensive_function_p (int threshold
)
1962 unsigned int sum
= 0;
1966 /* We can not compute accurately for large thresholds due to scaled
1968 gcc_assert (threshold
<= BB_FREQ_MAX
);
1970 /* Frequencies are out of range. This either means that function contains
1971 internal loop executing more than BB_FREQ_MAX times or profile feedback
1972 is available and function has not been executed at all. */
1973 if (ENTRY_BLOCK_PTR
->frequency
== 0)
1976 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
1977 limit
= ENTRY_BLOCK_PTR
->frequency
* threshold
;
1982 for (insn
= BB_HEAD (bb
); insn
!= NEXT_INSN (BB_END (bb
));
1983 insn
= NEXT_INSN (insn
))
1984 if (active_insn_p (insn
))
1986 sum
+= bb
->frequency
;
1995 /* Estimate basic blocks frequency by given branch probabilities. */
1998 estimate_bb_frequencies (void)
2003 if (!flag_branch_probabilities
|| !counts_to_freqs ())
2005 static int real_values_initialized
= 0;
2007 if (!real_values_initialized
)
2009 real_values_initialized
= 1;
2010 sreal_init (&real_zero
, 0, 0);
2011 sreal_init (&real_one
, 1, 0);
2012 sreal_init (&real_br_prob_base
, REG_BR_PROB_BASE
, 0);
2013 sreal_init (&real_bb_freq_max
, BB_FREQ_MAX
, 0);
2014 sreal_init (&real_one_half
, 1, -1);
2015 sreal_div (&real_inv_br_prob_base
, &real_one
, &real_br_prob_base
);
2016 sreal_sub (&real_almost_one
, &real_one
, &real_inv_br_prob_base
);
2019 mark_dfs_back_edges ();
2021 single_succ_edge (ENTRY_BLOCK_PTR
)->probability
= REG_BR_PROB_BASE
;
2023 /* Set up block info for each basic block. */
2024 alloc_aux_for_blocks (sizeof (struct block_info_def
));
2025 alloc_aux_for_edges (sizeof (struct edge_info_def
));
2026 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
2031 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2033 sreal_init (&EDGE_INFO (e
)->back_edge_prob
, e
->probability
, 0);
2034 sreal_mul (&EDGE_INFO (e
)->back_edge_prob
,
2035 &EDGE_INFO (e
)->back_edge_prob
,
2036 &real_inv_br_prob_base
);
2040 /* First compute probabilities locally for each loop from innermost
2041 to outermost to examine probabilities for back edges. */
2044 memcpy (&freq_max
, &real_zero
, sizeof (real_zero
));
2046 if (sreal_compare (&freq_max
, &BLOCK_INFO (bb
)->frequency
) < 0)
2047 memcpy (&freq_max
, &BLOCK_INFO (bb
)->frequency
, sizeof (freq_max
));
2049 sreal_div (&freq_max
, &real_bb_freq_max
, &freq_max
);
2050 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
2054 sreal_mul (&tmp
, &BLOCK_INFO (bb
)->frequency
, &freq_max
);
2055 sreal_add (&tmp
, &tmp
, &real_one_half
);
2056 bb
->frequency
= sreal_to_int (&tmp
);
2059 free_aux_for_blocks ();
2060 free_aux_for_edges ();
2062 compute_function_frequency ();
2063 if (flag_reorder_functions
)
2064 choose_function_section ();
2067 /* Decide whether function is hot, cold or unlikely executed. */
2069 compute_function_frequency (void)
2073 if (!profile_info
|| !flag_branch_probabilities
)
2075 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl
))
2077 cfun
->function_frequency
= FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
;
2078 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl
))
2080 cfun
->function_frequency
= FUNCTION_FREQUENCY_HOT
;
2083 cfun
->function_frequency
= FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
;
2086 if (maybe_hot_bb_p (bb
))
2088 cfun
->function_frequency
= FUNCTION_FREQUENCY_HOT
;
2091 if (!probably_never_executed_bb_p (bb
))
2092 cfun
->function_frequency
= FUNCTION_FREQUENCY_NORMAL
;
2096 /* Choose appropriate section for the function. */
2098 choose_function_section (void)
2100 if (DECL_SECTION_NAME (current_function_decl
)
2101 || !targetm
.have_named_sections
2102 /* Theoretically we can split the gnu.linkonce text section too,
2103 but this requires more work as the frequency needs to match
2104 for all generated objects so we need to merge the frequency
2105 of all instances. For now just never set frequency for these. */
2106 || DECL_ONE_ONLY (current_function_decl
))
2109 /* If we are doing the partitioning optimization, let the optimization
2110 choose the correct section into which to put things. */
2112 if (flag_reorder_blocks_and_partition
)
2115 if (cfun
->function_frequency
== FUNCTION_FREQUENCY_HOT
)
2116 DECL_SECTION_NAME (current_function_decl
) =
2117 build_string (strlen (HOT_TEXT_SECTION_NAME
), HOT_TEXT_SECTION_NAME
);
2118 if (cfun
->function_frequency
== FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
2119 DECL_SECTION_NAME (current_function_decl
) =
2120 build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME
),
2121 UNLIKELY_EXECUTED_TEXT_SECTION_NAME
);
2125 gate_estimate_probability (void)
2127 return flag_guess_branch_prob
;
2130 /* Build PREDICT_EXPR. */
2132 build_predict_expr (enum br_predictor predictor
, enum prediction taken
)
2134 tree t
= build1 (PREDICT_EXPR
, void_type_node
,
2135 build_int_cst (NULL
, predictor
));
2136 PREDICT_EXPR_OUTCOME (t
) = taken
;
2141 predictor_name (enum br_predictor predictor
)
2143 return predictor_info
[predictor
].name
;
2146 struct gimple_opt_pass pass_profile
=
2150 "profile", /* name */
2151 gate_estimate_probability
, /* gate */
2152 tree_estimate_probability
, /* execute */
2155 0, /* static_pass_number */
2156 TV_BRANCH_PROB
, /* tv_id */
2157 PROP_cfg
, /* properties_required */
2158 0, /* properties_provided */
2159 0, /* properties_destroyed */
2160 0, /* todo_flags_start */
2161 TODO_ggc_collect
| TODO_verify_ssa
/* todo_flags_finish */
2165 struct gimple_opt_pass pass_strip_predict_hints
=
2171 strip_predict_hints
, /* execute */
2174 0, /* static_pass_number */
2175 TV_BRANCH_PROB
, /* tv_id */
2176 PROP_cfg
, /* properties_required */
2177 0, /* properties_provided */
2178 0, /* properties_destroyed */
2179 0, /* todo_flags_start */
2180 TODO_ggc_collect
| TODO_verify_ssa
/* todo_flags_finish */