1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 [1] "Branch Prediction for Free"
23 Ball and Larus; PLDI '93.
24 [2] "Static Branch Frequency and Program Profile Analysis"
25 Wu and Larus; MICRO-27.
26 [3] "Corpus-based Static Branch Prediction"
27 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
32 #include "coretypes.h"
38 #include "tree-pass.h"
43 #include "diagnostic-core.h"
44 #include "gimple-predict.h"
45 #include "fold-const.h"
52 #include "gimple-iterator.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "tree-ssa-loop.h"
56 #include "tree-scalar-evolution.h"
58 /* Enum with reasons why a predictor is ignored. */
64 REASON_SINGLE_EDGE_DUPLICATE
,
65 REASON_EDGE_PAIR_DUPLICATE
68 /* String messages for the aforementioned enum. */
70 static const char *reason_messages
[] = {"", " (ignored)",
71 " (single edge duplicate)", " (edge pair duplicate)"};
73 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
74 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
75 static sreal real_almost_one
, real_br_prob_base
,
76 real_inv_br_prob_base
, real_one_half
, real_bb_freq_max
;
78 static void combine_predictions_for_insn (rtx_insn
*, basic_block
);
79 static void dump_prediction (FILE *, enum br_predictor
, int, basic_block
,
80 enum predictor_reason
, edge
);
81 static void predict_paths_leading_to (basic_block
, enum br_predictor
, enum prediction
);
82 static void predict_paths_leading_to_edge (edge
, enum br_predictor
, enum prediction
);
83 static bool can_predict_insn_p (const rtx_insn
*);
85 /* Information we hold about each branch predictor.
86 Filled using information from predict.def. */
90 const char *const name
; /* Name used in the debugging dumps. */
91 const int hitrate
; /* Expected hitrate used by
92 predict_insn_def call. */
96 /* Use given predictor without Dempster-Shaffer theory if it matches
97 using first_match heuristics. */
98 #define PRED_FLAG_FIRST_MATCH 1
100 /* Recompute hitrate in percent to our representation. */
102 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
104 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
105 static const struct predictor_info predictor_info
[]= {
106 #include "predict.def"
108 /* Upper bound on predictors. */
113 /* Return TRUE if frequency FREQ is considered to be hot. */
116 maybe_hot_frequency_p (struct function
*fun
, int freq
)
118 struct cgraph_node
*node
= cgraph_node::get (fun
->decl
);
120 || !opt_for_fn (fun
->decl
, flag_branch_probabilities
))
122 if (node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
124 if (node
->frequency
== NODE_FREQUENCY_HOT
)
127 if (profile_status_for_fn (fun
) == PROFILE_ABSENT
)
129 if (node
->frequency
== NODE_FREQUENCY_EXECUTED_ONCE
130 && freq
< (ENTRY_BLOCK_PTR_FOR_FN (fun
)->frequency
* 2 / 3))
132 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
) == 0)
134 if (freq
* PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
)
135 < ENTRY_BLOCK_PTR_FOR_FN (fun
)->frequency
)
140 static gcov_type min_count
= -1;
142 /* Determine the threshold for hot BB counts. */
145 get_hot_bb_threshold ()
147 gcov_working_set_t
*ws
;
150 ws
= find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE
));
152 min_count
= ws
->min_counter
;
157 /* Set the threshold for hot BB counts. */
160 set_hot_bb_threshold (gcov_type min
)
165 /* Return TRUE if frequency FREQ is considered to be hot. */
168 maybe_hot_count_p (struct function
*fun
, gcov_type count
)
170 if (fun
&& profile_status_for_fn (fun
) != PROFILE_READ
)
172 /* Code executed at most once is not hot. */
173 if (profile_info
->runs
>= count
)
175 return (count
>= get_hot_bb_threshold ());
178 /* Return true in case BB can be CPU intensive and should be optimized
179 for maximal performance. */
182 maybe_hot_bb_p (struct function
*fun
, const_basic_block bb
)
184 gcc_checking_assert (fun
);
185 if (profile_status_for_fn (fun
) == PROFILE_READ
)
186 return maybe_hot_count_p (fun
, bb
->count
);
187 return maybe_hot_frequency_p (fun
, bb
->frequency
);
190 /* Return true in case BB can be CPU intensive and should be optimized
191 for maximal performance. */
194 maybe_hot_edge_p (edge e
)
196 if (profile_status_for_fn (cfun
) == PROFILE_READ
)
197 return maybe_hot_count_p (cfun
, e
->count
);
198 return maybe_hot_frequency_p (cfun
, EDGE_FREQUENCY (e
));
201 /* Return true if profile COUNT and FREQUENCY, or function FUN static
202 node frequency reflects never being executed. */
205 probably_never_executed (struct function
*fun
,
206 gcov_type count
, int frequency
)
208 gcc_checking_assert (fun
);
209 if (profile_status_for_fn (fun
) == PROFILE_READ
)
211 int unlikely_count_fraction
= PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION
);
212 if (count
* unlikely_count_fraction
>= profile_info
->runs
)
216 if (!ENTRY_BLOCK_PTR_FOR_FN (fun
)->frequency
)
218 if (ENTRY_BLOCK_PTR_FOR_FN (fun
)->count
)
220 gcov_type computed_count
;
221 /* Check for possibility of overflow, in which case entry bb count
222 is large enough to do the division first without losing much
224 if (ENTRY_BLOCK_PTR_FOR_FN (fun
)->count
< REG_BR_PROB_BASE
*
227 gcov_type scaled_count
228 = frequency
* ENTRY_BLOCK_PTR_FOR_FN (fun
)->count
*
229 unlikely_count_fraction
;
230 computed_count
= RDIV (scaled_count
,
231 ENTRY_BLOCK_PTR_FOR_FN (fun
)->frequency
);
235 computed_count
= RDIV (ENTRY_BLOCK_PTR_FOR_FN (fun
)->count
,
236 ENTRY_BLOCK_PTR_FOR_FN (fun
)->frequency
);
237 computed_count
*= frequency
* unlikely_count_fraction
;
239 if (computed_count
>= profile_info
->runs
)
244 if ((!profile_info
|| !(opt_for_fn (fun
->decl
, flag_branch_probabilities
)))
245 && (cgraph_node::get (fun
->decl
)->frequency
246 == NODE_FREQUENCY_UNLIKELY_EXECUTED
))
252 /* Return true in case BB is probably never executed. */
255 probably_never_executed_bb_p (struct function
*fun
, const_basic_block bb
)
257 return probably_never_executed (fun
, bb
->count
, bb
->frequency
);
261 /* Return true in case edge E is probably never executed. */
264 probably_never_executed_edge_p (struct function
*fun
, edge e
)
266 return probably_never_executed (fun
, e
->count
, EDGE_FREQUENCY (e
));
269 /* Return true when current function should always be optimized for size. */
272 optimize_function_for_size_p (struct function
*fun
)
274 if (!fun
|| !fun
->decl
)
275 return optimize_size
;
276 cgraph_node
*n
= cgraph_node::get (fun
->decl
);
277 return n
&& n
->optimize_for_size_p ();
280 /* Return true when current function should always be optimized for speed. */
283 optimize_function_for_speed_p (struct function
*fun
)
285 return !optimize_function_for_size_p (fun
);
288 /* Return the optimization type that should be used for the function FUN. */
291 function_optimization_type (struct function
*fun
)
293 return (optimize_function_for_speed_p (fun
)
295 : OPTIMIZE_FOR_SIZE
);
298 /* Return TRUE when BB should be optimized for size. */
301 optimize_bb_for_size_p (const_basic_block bb
)
303 return (optimize_function_for_size_p (cfun
)
304 || (bb
&& !maybe_hot_bb_p (cfun
, bb
)));
307 /* Return TRUE when BB should be optimized for speed. */
310 optimize_bb_for_speed_p (const_basic_block bb
)
312 return !optimize_bb_for_size_p (bb
);
315 /* Return the optimization type that should be used for block BB. */
318 bb_optimization_type (const_basic_block bb
)
320 return (optimize_bb_for_speed_p (bb
)
322 : OPTIMIZE_FOR_SIZE
);
325 /* Return TRUE when BB should be optimized for size. */
328 optimize_edge_for_size_p (edge e
)
330 return optimize_function_for_size_p (cfun
) || !maybe_hot_edge_p (e
);
333 /* Return TRUE when BB should be optimized for speed. */
336 optimize_edge_for_speed_p (edge e
)
338 return !optimize_edge_for_size_p (e
);
341 /* Return TRUE when BB should be optimized for size. */
344 optimize_insn_for_size_p (void)
346 return optimize_function_for_size_p (cfun
) || !crtl
->maybe_hot_insn_p
;
349 /* Return TRUE when BB should be optimized for speed. */
352 optimize_insn_for_speed_p (void)
354 return !optimize_insn_for_size_p ();
357 /* Return TRUE when LOOP should be optimized for size. */
360 optimize_loop_for_size_p (struct loop
*loop
)
362 return optimize_bb_for_size_p (loop
->header
);
365 /* Return TRUE when LOOP should be optimized for speed. */
368 optimize_loop_for_speed_p (struct loop
*loop
)
370 return optimize_bb_for_speed_p (loop
->header
);
373 /* Return TRUE when LOOP nest should be optimized for speed. */
376 optimize_loop_nest_for_speed_p (struct loop
*loop
)
378 struct loop
*l
= loop
;
379 if (optimize_loop_for_speed_p (loop
))
382 while (l
&& l
!= loop
)
384 if (optimize_loop_for_speed_p (l
))
392 while (l
!= loop
&& !l
->next
)
401 /* Return TRUE when LOOP nest should be optimized for size. */
404 optimize_loop_nest_for_size_p (struct loop
*loop
)
406 return !optimize_loop_nest_for_speed_p (loop
);
409 /* Return true when edge E is likely to be well predictable by branch
413 predictable_edge_p (edge e
)
415 if (profile_status_for_fn (cfun
) == PROFILE_ABSENT
)
418 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME
) * REG_BR_PROB_BASE
/ 100)
419 || (REG_BR_PROB_BASE
- e
->probability
420 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME
) * REG_BR_PROB_BASE
/ 100))
426 /* Set RTL expansion for BB profile. */
429 rtl_profile_for_bb (basic_block bb
)
431 crtl
->maybe_hot_insn_p
= maybe_hot_bb_p (cfun
, bb
);
434 /* Set RTL expansion for edge profile. */
437 rtl_profile_for_edge (edge e
)
439 crtl
->maybe_hot_insn_p
= maybe_hot_edge_p (e
);
442 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
444 default_rtl_profile (void)
446 crtl
->maybe_hot_insn_p
= true;
449 /* Return true if the one of outgoing edges is already predicted by
453 rtl_predicted_by_p (const_basic_block bb
, enum br_predictor predictor
)
456 if (!INSN_P (BB_END (bb
)))
458 for (note
= REG_NOTES (BB_END (bb
)); note
; note
= XEXP (note
, 1))
459 if (REG_NOTE_KIND (note
) == REG_BR_PRED
460 && INTVAL (XEXP (XEXP (note
, 0), 0)) == (int)predictor
)
465 /* Structure representing predictions in tree level. */
467 struct edge_prediction
{
468 struct edge_prediction
*ep_next
;
470 enum br_predictor ep_predictor
;
474 /* This map contains for a basic block the list of predictions for the
477 static hash_map
<const_basic_block
, edge_prediction
*> *bb_predictions
;
479 /* Return true if the one of outgoing edges is already predicted by
483 gimple_predicted_by_p (const_basic_block bb
, enum br_predictor predictor
)
485 struct edge_prediction
*i
;
486 edge_prediction
**preds
= bb_predictions
->get (bb
);
491 for (i
= *preds
; i
; i
= i
->ep_next
)
492 if (i
->ep_predictor
== predictor
)
497 /* Return true if the one of outgoing edges is already predicted by
498 PREDICTOR for edge E predicted as TAKEN. */
501 edge_predicted_by_p (edge e
, enum br_predictor predictor
, bool taken
)
503 struct edge_prediction
*i
;
504 basic_block bb
= e
->src
;
505 edge_prediction
**preds
= bb_predictions
->get (bb
);
509 int probability
= predictor_info
[(int) predictor
].hitrate
;
512 probability
= REG_BR_PROB_BASE
- probability
;
514 for (i
= *preds
; i
; i
= i
->ep_next
)
515 if (i
->ep_predictor
== predictor
517 && i
->ep_probability
== probability
)
522 /* Return true when the probability of edge is reliable.
524 The profile guessing code is good at predicting branch outcome (ie.
525 taken/not taken), that is predicted right slightly over 75% of time.
526 It is however notoriously poor on predicting the probability itself.
527 In general the profile appear a lot flatter (with probabilities closer
528 to 50%) than the reality so it is bad idea to use it to drive optimization
529 such as those disabling dynamic branch prediction for well predictable
532 There are two exceptions - edges leading to noreturn edges and edges
533 predicted by number of iterations heuristics are predicted well. This macro
534 should be able to distinguish those, but at the moment it simply check for
535 noreturn heuristic that is only one giving probability over 99% or bellow
536 1%. In future we might want to propagate reliability information across the
537 CFG if we find this information useful on multiple places. */
539 probability_reliable_p (int prob
)
541 return (profile_status_for_fn (cfun
) == PROFILE_READ
542 || (profile_status_for_fn (cfun
) == PROFILE_GUESSED
543 && (prob
<= HITRATE (1) || prob
>= HITRATE (99))));
546 /* Same predicate as above, working on edges. */
548 edge_probability_reliable_p (const_edge e
)
550 return probability_reliable_p (e
->probability
);
553 /* Same predicate as edge_probability_reliable_p, working on notes. */
555 br_prob_note_reliable_p (const_rtx note
)
557 gcc_assert (REG_NOTE_KIND (note
) == REG_BR_PROB
);
558 return probability_reliable_p (XINT (note
, 0));
562 predict_insn (rtx_insn
*insn
, enum br_predictor predictor
, int probability
)
564 gcc_assert (any_condjump_p (insn
));
565 if (!flag_guess_branch_prob
)
568 add_reg_note (insn
, REG_BR_PRED
,
569 gen_rtx_CONCAT (VOIDmode
,
570 GEN_INT ((int) predictor
),
571 GEN_INT ((int) probability
)));
574 /* Predict insn by given predictor. */
577 predict_insn_def (rtx_insn
*insn
, enum br_predictor predictor
,
578 enum prediction taken
)
580 int probability
= predictor_info
[(int) predictor
].hitrate
;
583 probability
= REG_BR_PROB_BASE
- probability
;
585 predict_insn (insn
, predictor
, probability
);
588 /* Predict edge E with given probability if possible. */
591 rtl_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
594 last_insn
= BB_END (e
->src
);
596 /* We can store the branch prediction information only about
597 conditional jumps. */
598 if (!any_condjump_p (last_insn
))
601 /* We always store probability of branching. */
602 if (e
->flags
& EDGE_FALLTHRU
)
603 probability
= REG_BR_PROB_BASE
- probability
;
605 predict_insn (last_insn
, predictor
, probability
);
608 /* Predict edge E with the given PROBABILITY. */
610 gimple_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
612 if (e
->src
!= ENTRY_BLOCK_PTR_FOR_FN (cfun
)
613 && EDGE_COUNT (e
->src
->succs
) > 1
614 && flag_guess_branch_prob
617 struct edge_prediction
*i
= XNEW (struct edge_prediction
);
618 edge_prediction
*&preds
= bb_predictions
->get_or_insert (e
->src
);
622 i
->ep_probability
= probability
;
623 i
->ep_predictor
= predictor
;
628 /* Filter edge predictions PREDS by a function FILTER. DATA are passed
629 to the filter function. */
632 filter_predictions (edge_prediction
**preds
,
633 bool (*filter
) (edge_prediction
*, void *), void *data
)
640 struct edge_prediction
**prediction
= preds
;
641 struct edge_prediction
*next
;
645 if ((*filter
) (*prediction
, data
))
646 prediction
= &((*prediction
)->ep_next
);
649 next
= (*prediction
)->ep_next
;
657 /* Filter function predicate that returns true for a edge predicate P
658 if its edge is equal to DATA. */
661 equal_edge_p (edge_prediction
*p
, void *data
)
663 return p
->ep_edge
== (edge
)data
;
666 /* Remove all predictions on given basic block that are attached
669 remove_predictions_associated_with_edge (edge e
)
674 edge_prediction
**preds
= bb_predictions
->get (e
->src
);
675 filter_predictions (preds
, equal_edge_p
, e
);
678 /* Clears the list of predictions stored for BB. */
681 clear_bb_predictions (basic_block bb
)
683 edge_prediction
**preds
= bb_predictions
->get (bb
);
684 struct edge_prediction
*pred
, *next
;
689 for (pred
= *preds
; pred
; pred
= next
)
691 next
= pred
->ep_next
;
697 /* Return true when we can store prediction on insn INSN.
698 At the moment we represent predictions only on conditional
699 jumps, not at computed jump or other complicated cases. */
701 can_predict_insn_p (const rtx_insn
*insn
)
703 return (JUMP_P (insn
)
704 && any_condjump_p (insn
)
705 && EDGE_COUNT (BLOCK_FOR_INSN (insn
)->succs
) >= 2);
708 /* Predict edge E by given predictor if possible. */
711 predict_edge_def (edge e
, enum br_predictor predictor
,
712 enum prediction taken
)
714 int probability
= predictor_info
[(int) predictor
].hitrate
;
717 probability
= REG_BR_PROB_BASE
- probability
;
719 predict_edge (e
, predictor
, probability
);
722 /* Invert all branch predictions or probability notes in the INSN. This needs
723 to be done each time we invert the condition used by the jump. */
726 invert_br_probabilities (rtx insn
)
730 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
731 if (REG_NOTE_KIND (note
) == REG_BR_PROB
)
732 XINT (note
, 0) = REG_BR_PROB_BASE
- XINT (note
, 0);
733 else if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
734 XEXP (XEXP (note
, 0), 1)
735 = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (XEXP (note
, 0), 1)));
738 /* Dump information about the branch prediction to the output file. */
741 dump_prediction (FILE *file
, enum br_predictor predictor
, int probability
,
742 basic_block bb
, enum predictor_reason reason
= REASON_NONE
,
752 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
753 if (! (e
->flags
& EDGE_FALLTHRU
))
756 char edge_info_str
[128];
758 sprintf (edge_info_str
, " of edge %d->%d", ep_edge
->src
->index
,
759 ep_edge
->dest
->index
);
761 edge_info_str
[0] = '\0';
763 fprintf (file
, " %s heuristics%s%s: %.1f%%",
764 predictor_info
[predictor
].name
,
765 edge_info_str
, reason_messages
[reason
],
766 probability
* 100.0 / REG_BR_PROB_BASE
);
770 fprintf (file
, " exec %" PRId64
, bb
->count
);
773 fprintf (file
, " hit %" PRId64
, e
->count
);
774 fprintf (file
, " (%.1f%%)", e
->count
* 100.0 / bb
->count
);
778 fprintf (file
, "\n");
781 /* We can not predict the probabilities of outgoing edges of bb. Set them
782 evenly and hope for the best. */
784 set_even_probabilities (basic_block bb
)
790 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
791 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
793 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
794 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
795 e
->probability
= (REG_BR_PROB_BASE
+ nedges
/ 2) / nedges
;
800 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
801 note if not already present. Remove now useless REG_BR_PRED notes. */
804 combine_predictions_for_insn (rtx_insn
*insn
, basic_block bb
)
809 int best_probability
= PROB_EVEN
;
810 enum br_predictor best_predictor
= END_PREDICTORS
;
811 int combined_probability
= REG_BR_PROB_BASE
/ 2;
813 bool first_match
= false;
816 if (!can_predict_insn_p (insn
))
818 set_even_probabilities (bb
);
822 prob_note
= find_reg_note (insn
, REG_BR_PROB
, 0);
823 pnote
= ®_NOTES (insn
);
825 fprintf (dump_file
, "Predictions for insn %i bb %i\n", INSN_UID (insn
),
828 /* We implement "first match" heuristics and use probability guessed
829 by predictor with smallest index. */
830 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
831 if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
833 enum br_predictor predictor
= ((enum br_predictor
)
834 INTVAL (XEXP (XEXP (note
, 0), 0)));
835 int probability
= INTVAL (XEXP (XEXP (note
, 0), 1));
838 if (best_predictor
> predictor
)
839 best_probability
= probability
, best_predictor
= predictor
;
841 d
= (combined_probability
* probability
842 + (REG_BR_PROB_BASE
- combined_probability
)
843 * (REG_BR_PROB_BASE
- probability
));
845 /* Use FP math to avoid overflows of 32bit integers. */
847 /* If one probability is 0% and one 100%, avoid division by zero. */
848 combined_probability
= REG_BR_PROB_BASE
/ 2;
850 combined_probability
= (((double) combined_probability
) * probability
851 * REG_BR_PROB_BASE
/ d
+ 0.5);
854 /* Decide which heuristic to use. In case we didn't match anything,
855 use no_prediction heuristic, in case we did match, use either
856 first match or Dempster-Shaffer theory depending on the flags. */
858 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
862 dump_prediction (dump_file
, PRED_NO_PREDICTION
,
863 combined_probability
, bb
);
866 dump_prediction (dump_file
, PRED_DS_THEORY
, combined_probability
,
867 bb
, !first_match
? REASON_NONE
: REASON_IGNORED
);
868 dump_prediction (dump_file
, PRED_FIRST_MATCH
, best_probability
,
869 bb
, first_match
? REASON_NONE
: REASON_IGNORED
);
873 combined_probability
= best_probability
;
874 dump_prediction (dump_file
, PRED_COMBINED
, combined_probability
, bb
);
878 if (REG_NOTE_KIND (*pnote
) == REG_BR_PRED
)
880 enum br_predictor predictor
= ((enum br_predictor
)
881 INTVAL (XEXP (XEXP (*pnote
, 0), 0)));
882 int probability
= INTVAL (XEXP (XEXP (*pnote
, 0), 1));
884 dump_prediction (dump_file
, predictor
, probability
, bb
,
885 (!first_match
|| best_predictor
== predictor
)
886 ? REASON_NONE
: REASON_IGNORED
);
887 *pnote
= XEXP (*pnote
, 1);
890 pnote
= &XEXP (*pnote
, 1);
895 add_int_reg_note (insn
, REG_BR_PROB
, combined_probability
);
897 /* Save the prediction into CFG in case we are seeing non-degenerated
899 if (!single_succ_p (bb
))
901 BRANCH_EDGE (bb
)->probability
= combined_probability
;
902 FALLTHRU_EDGE (bb
)->probability
903 = REG_BR_PROB_BASE
- combined_probability
;
906 else if (!single_succ_p (bb
))
908 int prob
= XINT (prob_note
, 0);
910 BRANCH_EDGE (bb
)->probability
= prob
;
911 FALLTHRU_EDGE (bb
)->probability
= REG_BR_PROB_BASE
- prob
;
914 single_succ_edge (bb
)->probability
= REG_BR_PROB_BASE
;
917 /* Edge prediction hash traits. */
919 struct predictor_hash
: pointer_hash
<edge_prediction
>
922 static inline hashval_t
hash (const edge_prediction
*);
923 static inline bool equal (const edge_prediction
*, const edge_prediction
*);
926 /* Calculate hash value of an edge prediction P based on predictor and
927 normalized probability. */
930 predictor_hash::hash (const edge_prediction
*p
)
932 inchash::hash hstate
;
933 hstate
.add_int (p
->ep_predictor
);
935 int prob
= p
->ep_probability
;
936 if (prob
> REG_BR_PROB_BASE
/ 2)
937 prob
= REG_BR_PROB_BASE
- prob
;
939 hstate
.add_int (prob
);
941 return hstate
.end ();
944 /* Return true whether edge predictions P1 and P2 use the same predictor and
945 have equal (or opposed probability). */
948 predictor_hash::equal (const edge_prediction
*p1
, const edge_prediction
*p2
)
950 return (p1
->ep_predictor
== p2
->ep_predictor
951 && (p1
->ep_probability
== p2
->ep_probability
952 || p1
->ep_probability
== REG_BR_PROB_BASE
- p2
->ep_probability
));
955 struct predictor_hash_traits
: predictor_hash
,
956 typed_noop_remove
<edge_prediction
*> {};
958 /* Return true if edge prediction P is not in DATA hash set. */
961 not_removed_prediction_p (edge_prediction
*p
, void *data
)
963 hash_set
<edge_prediction
*> *remove
= (hash_set
<edge_prediction
*> *) data
;
964 return !remove
->contains (p
);
967 /* Prune predictions for a basic block BB. Currently we do following
970 1) remove duplicate prediction that is guessed with the same probability
971 (different than 1/2) to both edge
972 2) remove duplicates for a prediction that belongs with the same probability
978 prune_predictions_for_bb (basic_block bb
)
980 edge_prediction
**preds
= bb_predictions
->get (bb
);
984 hash_table
<predictor_hash_traits
> s (13);
985 hash_set
<edge_prediction
*> remove
;
987 /* Step 1: identify predictors that should be removed. */
988 for (edge_prediction
*pred
= *preds
; pred
; pred
= pred
->ep_next
)
990 edge_prediction
*existing
= s
.find (pred
);
993 if (pred
->ep_edge
== existing
->ep_edge
994 && pred
->ep_probability
== existing
->ep_probability
)
996 /* Remove a duplicate predictor. */
997 dump_prediction (dump_file
, pred
->ep_predictor
,
998 pred
->ep_probability
, bb
,
999 REASON_SINGLE_EDGE_DUPLICATE
, pred
->ep_edge
);
1003 else if (pred
->ep_edge
!= existing
->ep_edge
1004 && pred
->ep_probability
== existing
->ep_probability
1005 && pred
->ep_probability
!= REG_BR_PROB_BASE
/ 2)
1007 /* Remove both predictors as they predict the same
1009 dump_prediction (dump_file
, existing
->ep_predictor
,
1010 pred
->ep_probability
, bb
,
1011 REASON_EDGE_PAIR_DUPLICATE
,
1013 dump_prediction (dump_file
, pred
->ep_predictor
,
1014 pred
->ep_probability
, bb
,
1015 REASON_EDGE_PAIR_DUPLICATE
,
1018 remove
.add (existing
);
1023 edge_prediction
**slot2
= s
.find_slot (pred
, INSERT
);
1027 /* Step 2: Remove predictors. */
1028 filter_predictions (preds
, not_removed_prediction_p
, &remove
);
1032 /* Combine predictions into single probability and store them into CFG.
1033 Remove now useless prediction entries.
1034 If DRY_RUN is set, only produce dumps and do not modify profile. */
1037 combine_predictions_for_bb (basic_block bb
, bool dry_run
)
1039 int best_probability
= PROB_EVEN
;
1040 enum br_predictor best_predictor
= END_PREDICTORS
;
1041 int combined_probability
= REG_BR_PROB_BASE
/ 2;
1043 bool first_match
= false;
1045 struct edge_prediction
*pred
;
1047 edge e
, first
= NULL
, second
= NULL
;
1050 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1051 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
1054 if (first
&& !second
)
1060 /* When there is no successor or only one choice, prediction is easy.
1062 We are lazy for now and predict only basic blocks with two outgoing
1063 edges. It is possible to predict generic case too, but we have to
1064 ignore first match heuristics and do more involved combining. Implement
1068 if (!bb
->count
&& !dry_run
)
1069 set_even_probabilities (bb
);
1070 clear_bb_predictions (bb
);
1072 fprintf (dump_file
, "%i edges in bb %i predicted to even probabilities\n",
1078 fprintf (dump_file
, "Predictions for bb %i\n", bb
->index
);
1080 prune_predictions_for_bb (bb
);
1082 edge_prediction
**preds
= bb_predictions
->get (bb
);
1086 /* We implement "first match" heuristics and use probability guessed
1087 by predictor with smallest index. */
1088 for (pred
= *preds
; pred
; pred
= pred
->ep_next
)
1090 enum br_predictor predictor
= pred
->ep_predictor
;
1091 int probability
= pred
->ep_probability
;
1093 if (pred
->ep_edge
!= first
)
1094 probability
= REG_BR_PROB_BASE
- probability
;
1097 /* First match heuristics would be widly confused if we predicted
1099 if (best_predictor
> predictor
)
1101 struct edge_prediction
*pred2
;
1102 int prob
= probability
;
1104 for (pred2
= (struct edge_prediction
*) *preds
;
1105 pred2
; pred2
= pred2
->ep_next
)
1106 if (pred2
!= pred
&& pred2
->ep_predictor
== pred
->ep_predictor
)
1108 int probability2
= pred2
->ep_probability
;
1110 if (pred2
->ep_edge
!= first
)
1111 probability2
= REG_BR_PROB_BASE
- probability2
;
1113 if ((probability
< REG_BR_PROB_BASE
/ 2) !=
1114 (probability2
< REG_BR_PROB_BASE
/ 2))
1117 /* If the same predictor later gave better result, go for it! */
1118 if ((probability
>= REG_BR_PROB_BASE
/ 2 && (probability2
> probability
))
1119 || (probability
<= REG_BR_PROB_BASE
/ 2 && (probability2
< probability
)))
1120 prob
= probability2
;
1123 best_probability
= prob
, best_predictor
= predictor
;
1126 d
= (combined_probability
* probability
1127 + (REG_BR_PROB_BASE
- combined_probability
)
1128 * (REG_BR_PROB_BASE
- probability
));
1130 /* Use FP math to avoid overflows of 32bit integers. */
1132 /* If one probability is 0% and one 100%, avoid division by zero. */
1133 combined_probability
= REG_BR_PROB_BASE
/ 2;
1135 combined_probability
= (((double) combined_probability
)
1137 * REG_BR_PROB_BASE
/ d
+ 0.5);
1141 /* Decide which heuristic to use. In case we didn't match anything,
1142 use no_prediction heuristic, in case we did match, use either
1143 first match or Dempster-Shaffer theory depending on the flags. */
1145 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
1149 dump_prediction (dump_file
, PRED_NO_PREDICTION
, combined_probability
, bb
);
1152 dump_prediction (dump_file
, PRED_DS_THEORY
, combined_probability
, bb
,
1153 !first_match
? REASON_NONE
: REASON_IGNORED
);
1154 dump_prediction (dump_file
, PRED_FIRST_MATCH
, best_probability
, bb
,
1155 first_match
? REASON_NONE
: REASON_IGNORED
);
1159 combined_probability
= best_probability
;
1160 dump_prediction (dump_file
, PRED_COMBINED
, combined_probability
, bb
);
1164 for (pred
= (struct edge_prediction
*) *preds
; pred
; pred
= pred
->ep_next
)
1166 enum br_predictor predictor
= pred
->ep_predictor
;
1167 int probability
= pred
->ep_probability
;
1169 dump_prediction (dump_file
, predictor
, probability
, bb
,
1170 (!first_match
|| best_predictor
== predictor
)
1171 ? REASON_NONE
: REASON_IGNORED
, pred
->ep_edge
);
1174 clear_bb_predictions (bb
);
1176 if (!bb
->count
&& !dry_run
)
1178 first
->probability
= combined_probability
;
1179 second
->probability
= REG_BR_PROB_BASE
- combined_probability
;
1183 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1184 Return the SSA_NAME if the condition satisfies, NULL otherwise.
1186 T1 and T2 should be one of the following cases:
1187 1. T1 is SSA_NAME, T2 is NULL
1188 2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1189 3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4] */
1192 strips_small_constant (tree t1
, tree t2
)
1199 else if (TREE_CODE (t1
) == SSA_NAME
)
1201 else if (tree_fits_shwi_p (t1
))
1202 value
= tree_to_shwi (t1
);
1208 else if (tree_fits_shwi_p (t2
))
1209 value
= tree_to_shwi (t2
);
1210 else if (TREE_CODE (t2
) == SSA_NAME
)
1218 if (value
<= 4 && value
>= -4)
1224 /* Return the SSA_NAME in T or T's operands.
1225 Return NULL if SSA_NAME cannot be found. */
1228 get_base_value (tree t
)
1230 if (TREE_CODE (t
) == SSA_NAME
)
1233 if (!BINARY_CLASS_P (t
))
1236 switch (TREE_OPERAND_LENGTH (t
))
1239 return strips_small_constant (TREE_OPERAND (t
, 0), NULL
);
1241 return strips_small_constant (TREE_OPERAND (t
, 0),
1242 TREE_OPERAND (t
, 1));
1248 /* Check the compare STMT in LOOP. If it compares an induction
1249 variable to a loop invariant, return true, and save
1250 LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1251 Otherwise return false and set LOOP_INVAIANT to NULL. */
1254 is_comparison_with_loop_invariant_p (gcond
*stmt
, struct loop
*loop
,
1255 tree
*loop_invariant
,
1256 enum tree_code
*compare_code
,
1260 tree op0
, op1
, bound
, base
;
1262 enum tree_code code
;
1265 code
= gimple_cond_code (stmt
);
1266 *loop_invariant
= NULL
;
1282 op0
= gimple_cond_lhs (stmt
);
1283 op1
= gimple_cond_rhs (stmt
);
1285 if ((TREE_CODE (op0
) != SSA_NAME
&& TREE_CODE (op0
) != INTEGER_CST
)
1286 || (TREE_CODE (op1
) != SSA_NAME
&& TREE_CODE (op1
) != INTEGER_CST
))
1288 if (!simple_iv (loop
, loop_containing_stmt (stmt
), op0
, &iv0
, true))
1290 if (!simple_iv (loop
, loop_containing_stmt (stmt
), op1
, &iv1
, true))
1292 if (TREE_CODE (iv0
.step
) != INTEGER_CST
1293 || TREE_CODE (iv1
.step
) != INTEGER_CST
)
1295 if ((integer_zerop (iv0
.step
) && integer_zerop (iv1
.step
))
1296 || (!integer_zerop (iv0
.step
) && !integer_zerop (iv1
.step
)))
1299 if (integer_zerop (iv0
.step
))
1301 if (code
!= NE_EXPR
&& code
!= EQ_EXPR
)
1302 code
= invert_tree_comparison (code
, false);
1305 if (tree_fits_shwi_p (iv1
.step
))
1314 if (tree_fits_shwi_p (iv0
.step
))
1320 if (TREE_CODE (bound
) != INTEGER_CST
)
1321 bound
= get_base_value (bound
);
1324 if (TREE_CODE (base
) != INTEGER_CST
)
1325 base
= get_base_value (base
);
1329 *loop_invariant
= bound
;
1330 *compare_code
= code
;
1332 *loop_iv_base
= base
;
1336 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent. */
1339 expr_coherent_p (tree t1
, tree t2
)
1342 tree ssa_name_1
= NULL
;
1343 tree ssa_name_2
= NULL
;
1345 gcc_assert (TREE_CODE (t1
) == SSA_NAME
|| TREE_CODE (t1
) == INTEGER_CST
);
1346 gcc_assert (TREE_CODE (t2
) == SSA_NAME
|| TREE_CODE (t2
) == INTEGER_CST
);
1351 if (TREE_CODE (t1
) == INTEGER_CST
&& TREE_CODE (t2
) == INTEGER_CST
)
1353 if (TREE_CODE (t1
) == INTEGER_CST
|| TREE_CODE (t2
) == INTEGER_CST
)
1356 /* Check to see if t1 is expressed/defined with t2. */
1357 stmt
= SSA_NAME_DEF_STMT (t1
);
1358 gcc_assert (stmt
!= NULL
);
1359 if (is_gimple_assign (stmt
))
1361 ssa_name_1
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_USE
);
1362 if (ssa_name_1
&& ssa_name_1
== t2
)
1366 /* Check to see if t2 is expressed/defined with t1. */
1367 stmt
= SSA_NAME_DEF_STMT (t2
);
1368 gcc_assert (stmt
!= NULL
);
1369 if (is_gimple_assign (stmt
))
1371 ssa_name_2
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_USE
);
1372 if (ssa_name_2
&& ssa_name_2
== t1
)
1376 /* Compare if t1 and t2's def_stmts are identical. */
1377 if (ssa_name_2
!= NULL
&& ssa_name_1
== ssa_name_2
)
1383 /* Return true if E is predicted by one of loop heuristics. */
1386 predicted_by_loop_heuristics_p (basic_block bb
)
1388 struct edge_prediction
*i
;
1389 edge_prediction
**preds
= bb_predictions
->get (bb
);
1394 for (i
= *preds
; i
; i
= i
->ep_next
)
1395 if (i
->ep_predictor
== PRED_LOOP_ITERATIONS_GUESSED
1396 || i
->ep_predictor
== PRED_LOOP_ITERATIONS_MAX
1397 || i
->ep_predictor
== PRED_LOOP_ITERATIONS
1398 || i
->ep_predictor
== PRED_LOOP_EXIT
1399 || i
->ep_predictor
== PRED_LOOP_EXTRA_EXIT
)
1404 /* Predict branch probability of BB when BB contains a branch that compares
1405 an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1406 loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1409 for (int i = 0; i < bound; i++) {
1416 In this loop, we will predict the branch inside the loop to be taken. */
1419 predict_iv_comparison (struct loop
*loop
, basic_block bb
,
1420 tree loop_bound_var
,
1421 tree loop_iv_base_var
,
1422 enum tree_code loop_bound_code
,
1423 int loop_bound_step
)
1426 tree compare_var
, compare_base
;
1427 enum tree_code compare_code
;
1428 tree compare_step_var
;
1432 if (predicted_by_loop_heuristics_p (bb
))
1435 stmt
= last_stmt (bb
);
1436 if (!stmt
|| gimple_code (stmt
) != GIMPLE_COND
)
1438 if (!is_comparison_with_loop_invariant_p (as_a
<gcond
*> (stmt
),
1445 /* Find the taken edge. */
1446 FOR_EACH_EDGE (then_edge
, ei
, bb
->succs
)
1447 if (then_edge
->flags
& EDGE_TRUE_VALUE
)
1450 /* When comparing an IV to a loop invariant, NE is more likely to be
1451 taken while EQ is more likely to be not-taken. */
1452 if (compare_code
== NE_EXPR
)
1454 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1457 else if (compare_code
== EQ_EXPR
)
1459 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1463 if (!expr_coherent_p (loop_iv_base_var
, compare_base
))
1466 /* If loop bound, base and compare bound are all constants, we can
1467 calculate the probability directly. */
1468 if (tree_fits_shwi_p (loop_bound_var
)
1469 && tree_fits_shwi_p (compare_var
)
1470 && tree_fits_shwi_p (compare_base
))
1473 bool overflow
, overall_overflow
= false;
1474 widest_int compare_count
, tem
;
1476 /* (loop_bound - base) / compare_step */
1477 tem
= wi::sub (wi::to_widest (loop_bound_var
),
1478 wi::to_widest (compare_base
), SIGNED
, &overflow
);
1479 overall_overflow
|= overflow
;
1480 widest_int loop_count
= wi::div_trunc (tem
,
1481 wi::to_widest (compare_step_var
),
1483 overall_overflow
|= overflow
;
1485 if (!wi::neg_p (wi::to_widest (compare_step_var
))
1486 ^ (compare_code
== LT_EXPR
|| compare_code
== LE_EXPR
))
1488 /* (loop_bound - compare_bound) / compare_step */
1489 tem
= wi::sub (wi::to_widest (loop_bound_var
),
1490 wi::to_widest (compare_var
), SIGNED
, &overflow
);
1491 overall_overflow
|= overflow
;
1492 compare_count
= wi::div_trunc (tem
, wi::to_widest (compare_step_var
),
1494 overall_overflow
|= overflow
;
1498 /* (compare_bound - base) / compare_step */
1499 tem
= wi::sub (wi::to_widest (compare_var
),
1500 wi::to_widest (compare_base
), SIGNED
, &overflow
);
1501 overall_overflow
|= overflow
;
1502 compare_count
= wi::div_trunc (tem
, wi::to_widest (compare_step_var
),
1504 overall_overflow
|= overflow
;
1506 if (compare_code
== LE_EXPR
|| compare_code
== GE_EXPR
)
1508 if (loop_bound_code
== LE_EXPR
|| loop_bound_code
== GE_EXPR
)
1510 if (wi::neg_p (compare_count
))
1512 if (wi::neg_p (loop_count
))
1514 if (loop_count
== 0)
1516 else if (wi::cmps (compare_count
, loop_count
) == 1)
1517 probability
= REG_BR_PROB_BASE
;
1520 tem
= compare_count
* REG_BR_PROB_BASE
;
1521 tem
= wi::udiv_trunc (tem
, loop_count
);
1522 probability
= tem
.to_uhwi ();
1525 /* FIXME: The branch prediction seems broken. It has only 20% hitrate. */
1526 if (!overall_overflow
)
1527 predict_edge (then_edge
, PRED_LOOP_IV_COMPARE
, probability
);
1532 if (expr_coherent_p (loop_bound_var
, compare_var
))
1534 if ((loop_bound_code
== LT_EXPR
|| loop_bound_code
== LE_EXPR
)
1535 && (compare_code
== LT_EXPR
|| compare_code
== LE_EXPR
))
1536 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1537 else if ((loop_bound_code
== GT_EXPR
|| loop_bound_code
== GE_EXPR
)
1538 && (compare_code
== GT_EXPR
|| compare_code
== GE_EXPR
))
1539 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1540 else if (loop_bound_code
== NE_EXPR
)
1542 /* If the loop backedge condition is "(i != bound)", we do
1543 the comparison based on the step of IV:
1544 * step < 0 : backedge condition is like (i > bound)
1545 * step > 0 : backedge condition is like (i < bound) */
1546 gcc_assert (loop_bound_step
!= 0);
1547 if (loop_bound_step
> 0
1548 && (compare_code
== LT_EXPR
1549 || compare_code
== LE_EXPR
))
1550 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1551 else if (loop_bound_step
< 0
1552 && (compare_code
== GT_EXPR
1553 || compare_code
== GE_EXPR
))
1554 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1556 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1559 /* The branch is predicted not-taken if loop_bound_code is
1560 opposite with compare_code. */
1561 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1563 else if (expr_coherent_p (loop_iv_base_var
, compare_var
))
1566 for (i = s; i < h; i++)
1568 The branch should be predicted taken. */
1569 if (loop_bound_step
> 0
1570 && (compare_code
== GT_EXPR
|| compare_code
== GE_EXPR
))
1571 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1572 else if (loop_bound_step
< 0
1573 && (compare_code
== LT_EXPR
|| compare_code
== LE_EXPR
))
1574 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1576 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1580 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1581 exits are resulted from short-circuit conditions that will generate an
1584 if (foo() || global > 10)
1587 This will be translated into:
1592 if foo() goto BB6 else goto BB5
1594 if global > 10 goto BB6 else goto BB7
1598 iftmp = (PHI 0(BB5), 1(BB6))
1599 if iftmp == 1 goto BB8 else goto BB3
1601 outside of the loop...
1603 The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1604 From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1605 exits. This function takes BB7->BB8 as input, and finds out the extra loop
1606 exits to predict them using PRED_LOOP_EXTRA_EXIT. */
1609 predict_extra_loop_exits (edge exit_edge
)
1612 bool check_value_one
;
1613 gimple
*lhs_def_stmt
;
1615 tree cmp_rhs
, cmp_lhs
;
1619 last
= last_stmt (exit_edge
->src
);
1622 cmp_stmt
= dyn_cast
<gcond
*> (last
);
1626 cmp_rhs
= gimple_cond_rhs (cmp_stmt
);
1627 cmp_lhs
= gimple_cond_lhs (cmp_stmt
);
1628 if (!TREE_CONSTANT (cmp_rhs
)
1629 || !(integer_zerop (cmp_rhs
) || integer_onep (cmp_rhs
)))
1631 if (TREE_CODE (cmp_lhs
) != SSA_NAME
)
1634 /* If check_value_one is true, only the phi_args with value '1' will lead
1635 to loop exit. Otherwise, only the phi_args with value '0' will lead to
1637 check_value_one
= (((integer_onep (cmp_rhs
))
1638 ^ (gimple_cond_code (cmp_stmt
) == EQ_EXPR
))
1639 ^ ((exit_edge
->flags
& EDGE_TRUE_VALUE
) != 0));
1641 lhs_def_stmt
= SSA_NAME_DEF_STMT (cmp_lhs
);
1645 phi_stmt
= dyn_cast
<gphi
*> (lhs_def_stmt
);
1649 for (i
= 0; i
< gimple_phi_num_args (phi_stmt
); i
++)
1653 tree val
= gimple_phi_arg_def (phi_stmt
, i
);
1654 edge e
= gimple_phi_arg_edge (phi_stmt
, i
);
1656 if (!TREE_CONSTANT (val
) || !(integer_zerop (val
) || integer_onep (val
)))
1658 if ((check_value_one
^ integer_onep (val
)) == 1)
1660 if (EDGE_COUNT (e
->src
->succs
) != 1)
1662 predict_paths_leading_to_edge (e
, PRED_LOOP_EXTRA_EXIT
, NOT_TAKEN
);
1666 FOR_EACH_EDGE (e1
, ei
, e
->src
->preds
)
1667 predict_paths_leading_to_edge (e1
, PRED_LOOP_EXTRA_EXIT
, NOT_TAKEN
);
1672 /* Predict edge probabilities by exploiting loop structure. */
1675 predict_loops (void)
1679 /* Try to predict out blocks in a loop that are not part of a
1681 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
1683 basic_block bb
, *bbs
;
1684 unsigned j
, n_exits
= 0;
1686 struct tree_niter_desc niter_desc
;
1688 struct nb_iter_bound
*nb_iter
;
1689 enum tree_code loop_bound_code
= ERROR_MARK
;
1690 tree loop_bound_step
= NULL
;
1691 tree loop_bound_var
= NULL
;
1692 tree loop_iv_base
= NULL
;
1695 exits
= get_loop_exit_edges (loop
);
1696 FOR_EACH_VEC_ELT (exits
, j
, ex
)
1697 if (!(ex
->flags
& (EDGE_EH
| EDGE_ABNORMAL_CALL
| EDGE_FAKE
)))
1705 FOR_EACH_VEC_ELT (exits
, j
, ex
)
1708 HOST_WIDE_INT nitercst
;
1709 int max
= PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS
);
1711 enum br_predictor predictor
;
1714 if (ex
->flags
& (EDGE_EH
| EDGE_ABNORMAL_CALL
| EDGE_FAKE
))
1716 /* Loop heuristics do not expect exit conditional to be inside
1717 inner loop. We predict from innermost to outermost loop. */
1718 if (predicted_by_loop_heuristics_p (ex
->src
))
1720 predict_extra_loop_exits (ex
);
1722 if (number_of_iterations_exit (loop
, ex
, &niter_desc
, false, false))
1723 niter
= niter_desc
.niter
;
1724 if (!niter
|| TREE_CODE (niter_desc
.niter
) != INTEGER_CST
)
1725 niter
= loop_niter_by_eval (loop
, ex
);
1727 if (TREE_CODE (niter
) == INTEGER_CST
)
1729 if (tree_fits_uhwi_p (niter
)
1731 && compare_tree_int (niter
, max
- 1) == -1)
1732 nitercst
= tree_to_uhwi (niter
) + 1;
1735 predictor
= PRED_LOOP_ITERATIONS
;
1737 /* If we have just one exit and we can derive some information about
1738 the number of iterations of the loop from the statements inside
1739 the loop, use it to predict this exit. */
1740 else if (n_exits
== 1
1741 && estimated_stmt_executions (loop
, &nit
))
1743 if (wi::gtu_p (nit
, max
))
1746 nitercst
= nit
.to_shwi ();
1747 predictor
= PRED_LOOP_ITERATIONS_GUESSED
;
1749 /* If we have likely upper bound, trust it for very small iteration
1750 counts. Such loops would otherwise get mispredicted by standard
1751 LOOP_EXIT heuristics. */
1752 else if (n_exits
== 1
1753 && likely_max_stmt_executions (loop
, &nit
)
1755 RDIV (REG_BR_PROB_BASE
,
1758 [PRED_LOOP_EXIT
].hitrate
)))
1760 nitercst
= nit
.to_shwi ();
1761 predictor
= PRED_LOOP_ITERATIONS_MAX
;
1766 gcc_checking_assert (nitercst
);
1767 probability
= RDIV (REG_BR_PROB_BASE
, nitercst
);
1768 predict_edge (ex
, predictor
, probability
);
1772 /* Find information about loop bound variables. */
1773 for (nb_iter
= loop
->bounds
; nb_iter
;
1774 nb_iter
= nb_iter
->next
)
1776 && gimple_code (nb_iter
->stmt
) == GIMPLE_COND
)
1778 stmt
= as_a
<gcond
*> (nb_iter
->stmt
);
1781 if (!stmt
&& last_stmt (loop
->header
)
1782 && gimple_code (last_stmt (loop
->header
)) == GIMPLE_COND
)
1783 stmt
= as_a
<gcond
*> (last_stmt (loop
->header
));
1785 is_comparison_with_loop_invariant_p (stmt
, loop
,
1791 bbs
= get_loop_body (loop
);
1793 for (j
= 0; j
< loop
->num_nodes
; j
++)
1795 int header_found
= 0;
1801 /* Bypass loop heuristics on continue statement. These
1802 statements construct loops via "non-loop" constructs
1803 in the source language and are better to be handled
1805 if (predicted_by_p (bb
, PRED_CONTINUE
))
1808 /* Loop exit heuristics - predict an edge exiting the loop if the
1809 conditional has no loop header successors as not taken. */
1811 /* If we already used more reliable loop exit predictors, do not
1812 bother with PRED_LOOP_EXIT. */
1813 && !predicted_by_loop_heuristics_p (bb
))
1815 /* For loop with many exits we don't want to predict all exits
1816 with the pretty large probability, because if all exits are
1817 considered in row, the loop would be predicted to iterate
1818 almost never. The code to divide probability by number of
1819 exits is very rough. It should compute the number of exits
1820 taken in each patch through function (not the overall number
1821 of exits that might be a lot higher for loops with wide switch
1822 statements in them) and compute n-th square root.
1824 We limit the minimal probability by 2% to avoid
1825 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1826 as this was causing regression in perl benchmark containing such
1829 int probability
= ((REG_BR_PROB_BASE
1830 - predictor_info
[(int) PRED_LOOP_EXIT
].hitrate
)
1832 if (probability
< HITRATE (2))
1833 probability
= HITRATE (2);
1834 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1835 if (e
->dest
->index
< NUM_FIXED_BLOCKS
1836 || !flow_bb_inside_loop_p (loop
, e
->dest
))
1837 predict_edge (e
, PRED_LOOP_EXIT
, probability
);
1840 predict_iv_comparison (loop
, bb
, loop_bound_var
, loop_iv_base
,
1842 tree_to_shwi (loop_bound_step
));
1845 /* Free basic blocks from get_loop_body. */
1850 /* Attempt to predict probabilities of BB outgoing edges using local
1853 bb_estimate_probability_locally (basic_block bb
)
1855 rtx_insn
*last_insn
= BB_END (bb
);
1858 if (! can_predict_insn_p (last_insn
))
1860 cond
= get_condition (last_insn
, NULL
, false, false);
1864 /* Try "pointer heuristic."
1865 A comparison ptr == 0 is predicted as false.
1866 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1867 if (COMPARISON_P (cond
)
1868 && ((REG_P (XEXP (cond
, 0)) && REG_POINTER (XEXP (cond
, 0)))
1869 || (REG_P (XEXP (cond
, 1)) && REG_POINTER (XEXP (cond
, 1)))))
1871 if (GET_CODE (cond
) == EQ
)
1872 predict_insn_def (last_insn
, PRED_POINTER
, NOT_TAKEN
);
1873 else if (GET_CODE (cond
) == NE
)
1874 predict_insn_def (last_insn
, PRED_POINTER
, TAKEN
);
1878 /* Try "opcode heuristic."
1879 EQ tests are usually false and NE tests are usually true. Also,
1880 most quantities are positive, so we can make the appropriate guesses
1881 about signed comparisons against zero. */
1882 switch (GET_CODE (cond
))
1885 /* Unconditional branch. */
1886 predict_insn_def (last_insn
, PRED_UNCONDITIONAL
,
1887 cond
== const0_rtx
? NOT_TAKEN
: TAKEN
);
1892 /* Floating point comparisons appears to behave in a very
1893 unpredictable way because of special role of = tests in
1895 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
1897 /* Comparisons with 0 are often used for booleans and there is
1898 nothing useful to predict about them. */
1899 else if (XEXP (cond
, 1) == const0_rtx
1900 || XEXP (cond
, 0) == const0_rtx
)
1903 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, NOT_TAKEN
);
1908 /* Floating point comparisons appears to behave in a very
1909 unpredictable way because of special role of = tests in
1911 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
1913 /* Comparisons with 0 are often used for booleans and there is
1914 nothing useful to predict about them. */
1915 else if (XEXP (cond
, 1) == const0_rtx
1916 || XEXP (cond
, 0) == const0_rtx
)
1919 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, TAKEN
);
1923 predict_insn_def (last_insn
, PRED_FPOPCODE
, TAKEN
);
1927 predict_insn_def (last_insn
, PRED_FPOPCODE
, NOT_TAKEN
);
1932 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
1933 || XEXP (cond
, 1) == constm1_rtx
)
1934 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, NOT_TAKEN
);
1939 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
1940 || XEXP (cond
, 1) == constm1_rtx
)
1941 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, TAKEN
);
1949 /* Set edge->probability for each successor edge of BB. */
1951 guess_outgoing_edge_probabilities (basic_block bb
)
1953 bb_estimate_probability_locally (bb
);
1954 combine_predictions_for_insn (BB_END (bb
), bb
);
1957 static tree
expr_expected_value (tree
, bitmap
, enum br_predictor
*predictor
);
1959 /* Helper function for expr_expected_value. */
1962 expr_expected_value_1 (tree type
, tree op0
, enum tree_code code
,
1963 tree op1
, bitmap visited
, enum br_predictor
*predictor
)
1968 *predictor
= PRED_UNCONDITIONAL
;
1970 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
)
1972 if (TREE_CONSTANT (op0
))
1975 if (code
!= SSA_NAME
)
1978 def
= SSA_NAME_DEF_STMT (op0
);
1980 /* If we were already here, break the infinite cycle. */
1981 if (!bitmap_set_bit (visited
, SSA_NAME_VERSION (op0
)))
1984 if (gimple_code (def
) == GIMPLE_PHI
)
1986 /* All the arguments of the PHI node must have the same constant
1988 int i
, n
= gimple_phi_num_args (def
);
1989 tree val
= NULL
, new_val
;
1991 for (i
= 0; i
< n
; i
++)
1993 tree arg
= PHI_ARG_DEF (def
, i
);
1994 enum br_predictor predictor2
;
1996 /* If this PHI has itself as an argument, we cannot
1997 determine the string length of this argument. However,
1998 if we can find an expected constant value for the other
1999 PHI args then we can still be sure that this is
2000 likely a constant. So be optimistic and just
2001 continue with the next argument. */
2002 if (arg
== PHI_RESULT (def
))
2005 new_val
= expr_expected_value (arg
, visited
, &predictor2
);
2007 /* It is difficult to combine value predictors. Simply assume
2008 that later predictor is weaker and take its prediction. */
2009 if (predictor
&& *predictor
< predictor2
)
2010 *predictor
= predictor2
;
2015 else if (!operand_equal_p (val
, new_val
, false))
2020 if (is_gimple_assign (def
))
2022 if (gimple_assign_lhs (def
) != op0
)
2025 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def
)),
2026 gimple_assign_rhs1 (def
),
2027 gimple_assign_rhs_code (def
),
2028 gimple_assign_rhs2 (def
),
2029 visited
, predictor
);
2032 if (is_gimple_call (def
))
2034 tree decl
= gimple_call_fndecl (def
);
2037 if (gimple_call_internal_p (def
)
2038 && gimple_call_internal_fn (def
) == IFN_BUILTIN_EXPECT
)
2040 gcc_assert (gimple_call_num_args (def
) == 3);
2041 tree val
= gimple_call_arg (def
, 0);
2042 if (TREE_CONSTANT (val
))
2046 tree val2
= gimple_call_arg (def
, 2);
2047 gcc_assert (TREE_CODE (val2
) == INTEGER_CST
2048 && tree_fits_uhwi_p (val2
)
2049 && tree_to_uhwi (val2
) < END_PREDICTORS
);
2050 *predictor
= (enum br_predictor
) tree_to_uhwi (val2
);
2052 return gimple_call_arg (def
, 1);
2056 if (DECL_BUILT_IN_CLASS (decl
) == BUILT_IN_NORMAL
)
2057 switch (DECL_FUNCTION_CODE (decl
))
2059 case BUILT_IN_EXPECT
:
2062 if (gimple_call_num_args (def
) != 2)
2064 val
= gimple_call_arg (def
, 0);
2065 if (TREE_CONSTANT (val
))
2068 *predictor
= PRED_BUILTIN_EXPECT
;
2069 return gimple_call_arg (def
, 1);
2072 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N
:
2073 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1
:
2074 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2
:
2075 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4
:
2076 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8
:
2077 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16
:
2078 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE
:
2079 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N
:
2080 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1
:
2081 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2
:
2082 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4
:
2083 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8
:
2084 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16
:
2085 /* Assume that any given atomic operation has low contention,
2086 and thus the compare-and-swap operation succeeds. */
2088 *predictor
= PRED_COMPARE_AND_SWAP
;
2089 return boolean_true_node
;
2098 if (get_gimple_rhs_class (code
) == GIMPLE_BINARY_RHS
)
2101 enum br_predictor predictor2
;
2102 op0
= expr_expected_value (op0
, visited
, predictor
);
2105 op1
= expr_expected_value (op1
, visited
, &predictor2
);
2106 if (predictor
&& *predictor
< predictor2
)
2107 *predictor
= predictor2
;
2110 res
= fold_build2 (code
, type
, op0
, op1
);
2111 if (TREE_CONSTANT (res
))
2115 if (get_gimple_rhs_class (code
) == GIMPLE_UNARY_RHS
)
2118 op0
= expr_expected_value (op0
, visited
, predictor
);
2121 res
= fold_build1 (code
, type
, op0
);
2122 if (TREE_CONSTANT (res
))
2129 /* Return constant EXPR will likely have at execution time, NULL if unknown.
2130 The function is used by builtin_expect branch predictor so the evidence
2131 must come from this construct and additional possible constant folding.
2133 We may want to implement more involved value guess (such as value range
2134 propagation based prediction), but such tricks shall go to new
2138 expr_expected_value (tree expr
, bitmap visited
,
2139 enum br_predictor
*predictor
)
2141 enum tree_code code
;
2144 if (TREE_CONSTANT (expr
))
2147 *predictor
= PRED_UNCONDITIONAL
;
2151 extract_ops_from_tree (expr
, &code
, &op0
, &op1
);
2152 return expr_expected_value_1 (TREE_TYPE (expr
),
2153 op0
, code
, op1
, visited
, predictor
);
2156 /* Predict using opcode of the last statement in basic block. */
2158 tree_predict_by_opcode (basic_block bb
)
2160 gimple
*stmt
= last_stmt (bb
);
2168 enum br_predictor predictor
;
2170 if (!stmt
|| gimple_code (stmt
) != GIMPLE_COND
)
2172 FOR_EACH_EDGE (then_edge
, ei
, bb
->succs
)
2173 if (then_edge
->flags
& EDGE_TRUE_VALUE
)
2175 op0
= gimple_cond_lhs (stmt
);
2176 op1
= gimple_cond_rhs (stmt
);
2177 cmp
= gimple_cond_code (stmt
);
2178 type
= TREE_TYPE (op0
);
2179 visited
= BITMAP_ALLOC (NULL
);
2180 val
= expr_expected_value_1 (boolean_type_node
, op0
, cmp
, op1
, visited
,
2182 BITMAP_FREE (visited
);
2183 if (val
&& TREE_CODE (val
) == INTEGER_CST
)
2185 if (predictor
== PRED_BUILTIN_EXPECT
)
2187 int percent
= PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY
);
2189 gcc_assert (percent
>= 0 && percent
<= 100);
2190 if (integer_zerop (val
))
2191 percent
= 100 - percent
;
2192 predict_edge (then_edge
, PRED_BUILTIN_EXPECT
, HITRATE (percent
));
2195 predict_edge_def (then_edge
, predictor
,
2196 integer_zerop (val
) ? NOT_TAKEN
: TAKEN
);
2198 /* Try "pointer heuristic."
2199 A comparison ptr == 0 is predicted as false.
2200 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
2201 if (POINTER_TYPE_P (type
))
2204 predict_edge_def (then_edge
, PRED_TREE_POINTER
, NOT_TAKEN
);
2205 else if (cmp
== NE_EXPR
)
2206 predict_edge_def (then_edge
, PRED_TREE_POINTER
, TAKEN
);
2210 /* Try "opcode heuristic."
2211 EQ tests are usually false and NE tests are usually true. Also,
2212 most quantities are positive, so we can make the appropriate guesses
2213 about signed comparisons against zero. */
2218 /* Floating point comparisons appears to behave in a very
2219 unpredictable way because of special role of = tests in
2221 if (FLOAT_TYPE_P (type
))
2223 /* Comparisons with 0 are often used for booleans and there is
2224 nothing useful to predict about them. */
2225 else if (integer_zerop (op0
) || integer_zerop (op1
))
2228 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, NOT_TAKEN
);
2233 /* Floating point comparisons appears to behave in a very
2234 unpredictable way because of special role of = tests in
2236 if (FLOAT_TYPE_P (type
))
2238 /* Comparisons with 0 are often used for booleans and there is
2239 nothing useful to predict about them. */
2240 else if (integer_zerop (op0
)
2241 || integer_zerop (op1
))
2244 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, TAKEN
);
2248 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, TAKEN
);
2251 case UNORDERED_EXPR
:
2252 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, NOT_TAKEN
);
2257 if (integer_zerop (op1
)
2258 || integer_onep (op1
)
2259 || integer_all_onesp (op1
)
2262 || real_minus_onep (op1
))
2263 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, NOT_TAKEN
);
2268 if (integer_zerop (op1
)
2269 || integer_onep (op1
)
2270 || integer_all_onesp (op1
)
2273 || real_minus_onep (op1
))
2274 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, TAKEN
);
2282 /* Try to guess whether the value of return means error code. */
2284 static enum br_predictor
2285 return_prediction (tree val
, enum prediction
*prediction
)
2289 return PRED_NO_PREDICTION
;
2290 /* Different heuristics for pointers and scalars. */
2291 if (POINTER_TYPE_P (TREE_TYPE (val
)))
2293 /* NULL is usually not returned. */
2294 if (integer_zerop (val
))
2296 *prediction
= NOT_TAKEN
;
2297 return PRED_NULL_RETURN
;
2300 else if (INTEGRAL_TYPE_P (TREE_TYPE (val
)))
2302 /* Negative return values are often used to indicate
2304 if (TREE_CODE (val
) == INTEGER_CST
2305 && tree_int_cst_sgn (val
) < 0)
2307 *prediction
= NOT_TAKEN
;
2308 return PRED_NEGATIVE_RETURN
;
2310 /* Constant return values seems to be commonly taken.
2311 Zero/one often represent booleans so exclude them from the
2313 if (TREE_CONSTANT (val
)
2314 && (!integer_zerop (val
) && !integer_onep (val
)))
2316 *prediction
= NOT_TAKEN
;
2317 return PRED_CONST_RETURN
;
2320 return PRED_NO_PREDICTION
;
2323 /* Find the basic block with return expression and look up for possible
2324 return value trying to apply RETURN_PREDICTION heuristics. */
2326 apply_return_prediction (void)
2328 greturn
*return_stmt
= NULL
;
2332 int phi_num_args
, i
;
2333 enum br_predictor pred
;
2334 enum prediction direction
;
2337 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
2339 gimple
*last
= last_stmt (e
->src
);
2341 && gimple_code (last
) == GIMPLE_RETURN
)
2343 return_stmt
= as_a
<greturn
*> (last
);
2349 return_val
= gimple_return_retval (return_stmt
);
2352 if (TREE_CODE (return_val
) != SSA_NAME
2353 || !SSA_NAME_DEF_STMT (return_val
)
2354 || gimple_code (SSA_NAME_DEF_STMT (return_val
)) != GIMPLE_PHI
)
2356 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (return_val
));
2357 phi_num_args
= gimple_phi_num_args (phi
);
2358 pred
= return_prediction (PHI_ARG_DEF (phi
, 0), &direction
);
2360 /* Avoid the degenerate case where all return values form the function
2361 belongs to same category (ie they are all positive constants)
2362 so we can hardly say something about them. */
2363 for (i
= 1; i
< phi_num_args
; i
++)
2364 if (pred
!= return_prediction (PHI_ARG_DEF (phi
, i
), &direction
))
2366 if (i
!= phi_num_args
)
2367 for (i
= 0; i
< phi_num_args
; i
++)
2369 pred
= return_prediction (PHI_ARG_DEF (phi
, i
), &direction
);
2370 if (pred
!= PRED_NO_PREDICTION
)
2371 predict_paths_leading_to_edge (gimple_phi_arg_edge (phi
, i
), pred
,
2376 /* Look for basic block that contains unlikely to happen events
2377 (such as noreturn calls) and mark all paths leading to execution
2378 of this basic blocks as unlikely. */
2381 tree_bb_level_predictions (void)
2384 bool has_return_edges
= false;
2388 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
2389 if (!(e
->flags
& (EDGE_ABNORMAL
| EDGE_FAKE
| EDGE_EH
)))
2391 has_return_edges
= true;
2395 apply_return_prediction ();
2397 FOR_EACH_BB_FN (bb
, cfun
)
2399 gimple_stmt_iterator gsi
;
2401 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2403 gimple
*stmt
= gsi_stmt (gsi
);
2406 if (is_gimple_call (stmt
))
2408 if ((gimple_call_flags (stmt
) & ECF_NORETURN
)
2409 && has_return_edges
)
2410 predict_paths_leading_to (bb
, PRED_NORETURN
,
2412 decl
= gimple_call_fndecl (stmt
);
2414 && lookup_attribute ("cold",
2415 DECL_ATTRIBUTES (decl
)))
2416 predict_paths_leading_to (bb
, PRED_COLD_FUNCTION
,
2419 else if (gimple_code (stmt
) == GIMPLE_PREDICT
)
2421 predict_paths_leading_to (bb
, gimple_predict_predictor (stmt
),
2422 gimple_predict_outcome (stmt
));
2423 /* Keep GIMPLE_PREDICT around so early inlining will propagate
2424 hints to callers. */
2430 /* Callback for hash_map::traverse, asserts that the pointer map is
2434 assert_is_empty (const_basic_block
const &, edge_prediction
*const &value
,
2437 gcc_assert (!value
);
2441 /* Predict branch probabilities and estimate profile for basic block BB. */
2444 tree_estimate_probability_bb (basic_block bb
)
2450 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2452 /* Predict edges to user labels with attributes. */
2453 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
))
2455 gimple_stmt_iterator gi
;
2456 for (gi
= gsi_start_bb (e
->dest
); !gsi_end_p (gi
); gsi_next (&gi
))
2458 glabel
*label_stmt
= dyn_cast
<glabel
*> (gsi_stmt (gi
));
2463 decl
= gimple_label_label (label_stmt
);
2464 if (DECL_ARTIFICIAL (decl
))
2467 /* Finally, we have a user-defined label. */
2468 if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl
)))
2469 predict_edge_def (e
, PRED_COLD_LABEL
, NOT_TAKEN
);
2470 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl
)))
2471 predict_edge_def (e
, PRED_HOT_LABEL
, TAKEN
);
2475 /* Predict early returns to be probable, as we've already taken
2476 care for error returns and other cases are often used for
2477 fast paths through function.
2479 Since we've already removed the return statements, we are
2480 looking for CFG like:
2490 if (e
->dest
!= bb
->next_bb
2491 && e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
2492 && single_succ_p (e
->dest
)
2493 && single_succ_edge (e
->dest
)->dest
== EXIT_BLOCK_PTR_FOR_FN (cfun
)
2494 && (last
= last_stmt (e
->dest
)) != NULL
2495 && gimple_code (last
) == GIMPLE_RETURN
)
2500 if (single_succ_p (bb
))
2502 FOR_EACH_EDGE (e1
, ei1
, bb
->preds
)
2503 if (!predicted_by_p (e1
->src
, PRED_NULL_RETURN
)
2504 && !predicted_by_p (e1
->src
, PRED_CONST_RETURN
)
2505 && !predicted_by_p (e1
->src
, PRED_NEGATIVE_RETURN
))
2506 predict_edge_def (e1
, PRED_TREE_EARLY_RETURN
, NOT_TAKEN
);
2509 if (!predicted_by_p (e
->src
, PRED_NULL_RETURN
)
2510 && !predicted_by_p (e
->src
, PRED_CONST_RETURN
)
2511 && !predicted_by_p (e
->src
, PRED_NEGATIVE_RETURN
))
2512 predict_edge_def (e
, PRED_TREE_EARLY_RETURN
, NOT_TAKEN
);
2515 /* Look for block we are guarding (ie we dominate it,
2516 but it doesn't postdominate us). */
2517 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
) && e
->dest
!= bb
2518 && dominated_by_p (CDI_DOMINATORS
, e
->dest
, e
->src
)
2519 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, e
->dest
))
2521 gimple_stmt_iterator bi
;
2523 /* The call heuristic claims that a guarded function call
2524 is improbable. This is because such calls are often used
2525 to signal exceptional situations such as printing error
2527 for (bi
= gsi_start_bb (e
->dest
); !gsi_end_p (bi
);
2530 gimple
*stmt
= gsi_stmt (bi
);
2531 if (is_gimple_call (stmt
)
2532 /* Constant and pure calls are hardly used to signalize
2533 something exceptional. */
2534 && gimple_has_side_effects (stmt
))
2536 predict_edge_def (e
, PRED_CALL
, NOT_TAKEN
);
2542 tree_predict_by_opcode (bb
);
2545 /* Predict branch probabilities and estimate profile of the tree CFG.
2546 This function can be called from the loop optimizers to recompute
2547 the profile information.
2548 If DRY_RUN is set, do not modify CFG and only produce dump files. */
2551 tree_estimate_probability (bool dry_run
)
2555 add_noreturn_fake_exit_edges ();
2556 connect_infinite_loops_to_exit ();
2557 /* We use loop_niter_by_eval, which requires that the loops have
2559 create_preheaders (CP_SIMPLE_PREHEADERS
);
2560 calculate_dominance_info (CDI_POST_DOMINATORS
);
2562 bb_predictions
= new hash_map
<const_basic_block
, edge_prediction
*>;
2563 tree_bb_level_predictions ();
2564 record_loop_exits ();
2566 if (number_of_loops (cfun
) > 1)
2569 FOR_EACH_BB_FN (bb
, cfun
)
2570 tree_estimate_probability_bb (bb
);
2572 FOR_EACH_BB_FN (bb
, cfun
)
2573 combine_predictions_for_bb (bb
, dry_run
);
2576 bb_predictions
->traverse
<void *, assert_is_empty
> (NULL
);
2578 delete bb_predictions
;
2579 bb_predictions
= NULL
;
2582 estimate_bb_frequencies (false);
2583 free_dominance_info (CDI_POST_DOMINATORS
);
2584 remove_fake_exit_edges ();
2587 /* Predict edges to successors of CUR whose sources are not postdominated by
2588 BB by PRED and recurse to all postdominators. */
2591 predict_paths_for_bb (basic_block cur
, basic_block bb
,
2592 enum br_predictor pred
,
2593 enum prediction taken
,
2600 /* We are looking for all edges forming edge cut induced by
2601 set of all blocks postdominated by BB. */
2602 FOR_EACH_EDGE (e
, ei
, cur
->preds
)
2603 if (e
->src
->index
>= NUM_FIXED_BLOCKS
2604 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, bb
))
2610 /* Ignore fake edges and eh, we predict them as not taken anyway. */
2611 if (e
->flags
& (EDGE_EH
| EDGE_FAKE
))
2613 gcc_assert (bb
== cur
|| dominated_by_p (CDI_POST_DOMINATORS
, cur
, bb
));
2615 /* See if there is an edge from e->src that is not abnormal
2616 and does not lead to BB. */
2617 FOR_EACH_EDGE (e2
, ei2
, e
->src
->succs
)
2619 && !(e2
->flags
& (EDGE_EH
| EDGE_FAKE
))
2620 && !dominated_by_p (CDI_POST_DOMINATORS
, e2
->dest
, bb
))
2626 /* If there is non-abnormal path leaving e->src, predict edge
2627 using predictor. Otherwise we need to look for paths
2630 The second may lead to infinite loop in the case we are predicitng
2631 regions that are only reachable by abnormal edges. We simply
2632 prevent visiting given BB twice. */
2635 if (!edge_predicted_by_p (e
, pred
, taken
))
2636 predict_edge_def (e
, pred
, taken
);
2638 else if (bitmap_set_bit (visited
, e
->src
->index
))
2639 predict_paths_for_bb (e
->src
, e
->src
, pred
, taken
, visited
);
2641 for (son
= first_dom_son (CDI_POST_DOMINATORS
, cur
);
2643 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
2644 predict_paths_for_bb (son
, bb
, pred
, taken
, visited
);
2647 /* Sets branch probabilities according to PREDiction and
2651 predict_paths_leading_to (basic_block bb
, enum br_predictor pred
,
2652 enum prediction taken
)
2654 bitmap visited
= BITMAP_ALLOC (NULL
);
2655 predict_paths_for_bb (bb
, bb
, pred
, taken
, visited
);
2656 BITMAP_FREE (visited
);
2659 /* Like predict_paths_leading_to but take edge instead of basic block. */
2662 predict_paths_leading_to_edge (edge e
, enum br_predictor pred
,
2663 enum prediction taken
)
2665 bool has_nonloop_edge
= false;
2669 basic_block bb
= e
->src
;
2670 FOR_EACH_EDGE (e2
, ei
, bb
->succs
)
2671 if (e2
->dest
!= e
->src
&& e2
->dest
!= e
->dest
2672 && !(e
->flags
& (EDGE_EH
| EDGE_FAKE
))
2673 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, e2
->dest
))
2675 has_nonloop_edge
= true;
2678 if (!has_nonloop_edge
)
2680 bitmap visited
= BITMAP_ALLOC (NULL
);
2681 predict_paths_for_bb (bb
, bb
, pred
, taken
, visited
);
2682 BITMAP_FREE (visited
);
2685 predict_edge_def (e
, pred
, taken
);
2688 /* This is used to carry information about basic blocks. It is
2689 attached to the AUX field of the standard CFG block. */
2693 /* Estimated frequency of execution of basic_block. */
2696 /* To keep queue of basic blocks to process. */
2699 /* Number of predecessors we need to visit first. */
2703 /* Similar information for edges. */
2704 struct edge_prob_info
2706 /* In case edge is a loopback edge, the probability edge will be reached
2707 in case header is. Estimated number of iterations of the loop can be
2708 then computed as 1 / (1 - back_edge_prob). */
2709 sreal back_edge_prob
;
2710 /* True if the edge is a loopback edge in the natural loop. */
2711 unsigned int back_edge
:1;
2714 #define BLOCK_INFO(B) ((block_info *) (B)->aux)
2716 #define EDGE_INFO(E) ((edge_prob_info *) (E)->aux)
2718 /* Helper function for estimate_bb_frequencies.
2719 Propagate the frequencies in blocks marked in
2720 TOVISIT, starting in HEAD. */
2723 propagate_freq (basic_block head
, bitmap tovisit
)
2732 /* For each basic block we need to visit count number of his predecessors
2733 we need to visit first. */
2734 EXECUTE_IF_SET_IN_BITMAP (tovisit
, 0, i
, bi
)
2739 bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
2741 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2743 bool visit
= bitmap_bit_p (tovisit
, e
->src
->index
);
2745 if (visit
&& !(e
->flags
& EDGE_DFS_BACK
))
2747 else if (visit
&& dump_file
&& !EDGE_INFO (e
)->back_edge
)
2749 "Irreducible region hit, ignoring edge to %i->%i\n",
2750 e
->src
->index
, bb
->index
);
2752 BLOCK_INFO (bb
)->npredecessors
= count
;
2753 /* When function never returns, we will never process exit block. */
2754 if (!count
&& bb
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
2755 bb
->count
= bb
->frequency
= 0;
2758 BLOCK_INFO (head
)->frequency
= 1;
2760 for (bb
= head
; bb
; bb
= nextbb
)
2763 sreal cyclic_probability
= 0;
2764 sreal frequency
= 0;
2766 nextbb
= BLOCK_INFO (bb
)->next
;
2767 BLOCK_INFO (bb
)->next
= NULL
;
2769 /* Compute frequency of basic block. */
2773 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2774 gcc_assert (!bitmap_bit_p (tovisit
, e
->src
->index
)
2775 || (e
->flags
& EDGE_DFS_BACK
));
2777 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2778 if (EDGE_INFO (e
)->back_edge
)
2780 cyclic_probability
+= EDGE_INFO (e
)->back_edge_prob
;
2782 else if (!(e
->flags
& EDGE_DFS_BACK
))
2784 /* frequency += (e->probability
2785 * BLOCK_INFO (e->src)->frequency /
2786 REG_BR_PROB_BASE); */
2788 sreal tmp
= e
->probability
;
2789 tmp
*= BLOCK_INFO (e
->src
)->frequency
;
2790 tmp
*= real_inv_br_prob_base
;
2794 if (cyclic_probability
== 0)
2796 BLOCK_INFO (bb
)->frequency
= frequency
;
2800 if (cyclic_probability
> real_almost_one
)
2801 cyclic_probability
= real_almost_one
;
2803 /* BLOCK_INFO (bb)->frequency = frequency
2804 / (1 - cyclic_probability) */
2806 cyclic_probability
= sreal (1) - cyclic_probability
;
2807 BLOCK_INFO (bb
)->frequency
= frequency
/ cyclic_probability
;
2811 bitmap_clear_bit (tovisit
, bb
->index
);
2813 e
= find_edge (bb
, head
);
2816 /* EDGE_INFO (e)->back_edge_prob
2817 = ((e->probability * BLOCK_INFO (bb)->frequency)
2818 / REG_BR_PROB_BASE); */
2820 sreal tmp
= e
->probability
;
2821 tmp
*= BLOCK_INFO (bb
)->frequency
;
2822 EDGE_INFO (e
)->back_edge_prob
= tmp
* real_inv_br_prob_base
;
2825 /* Propagate to successor blocks. */
2826 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2827 if (!(e
->flags
& EDGE_DFS_BACK
)
2828 && BLOCK_INFO (e
->dest
)->npredecessors
)
2830 BLOCK_INFO (e
->dest
)->npredecessors
--;
2831 if (!BLOCK_INFO (e
->dest
)->npredecessors
)
2836 BLOCK_INFO (last
)->next
= e
->dest
;
2844 /* Estimate frequencies in loops at same nest level. */
2847 estimate_loops_at_level (struct loop
*first_loop
)
2851 for (loop
= first_loop
; loop
; loop
= loop
->next
)
2856 bitmap tovisit
= BITMAP_ALLOC (NULL
);
2858 estimate_loops_at_level (loop
->inner
);
2860 /* Find current loop back edge and mark it. */
2861 e
= loop_latch_edge (loop
);
2862 EDGE_INFO (e
)->back_edge
= 1;
2864 bbs
= get_loop_body (loop
);
2865 for (i
= 0; i
< loop
->num_nodes
; i
++)
2866 bitmap_set_bit (tovisit
, bbs
[i
]->index
);
2868 propagate_freq (loop
->header
, tovisit
);
2869 BITMAP_FREE (tovisit
);
2873 /* Propagates frequencies through structure of loops. */
2876 estimate_loops (void)
2878 bitmap tovisit
= BITMAP_ALLOC (NULL
);
2881 /* Start by estimating the frequencies in the loops. */
2882 if (number_of_loops (cfun
) > 1)
2883 estimate_loops_at_level (current_loops
->tree_root
->inner
);
2885 /* Now propagate the frequencies through all the blocks. */
2886 FOR_ALL_BB_FN (bb
, cfun
)
2888 bitmap_set_bit (tovisit
, bb
->index
);
2890 propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun
), tovisit
);
2891 BITMAP_FREE (tovisit
);
2894 /* Drop the profile for NODE to guessed, and update its frequency based on
2895 whether it is expected to be hot given the CALL_COUNT. */
2898 drop_profile (struct cgraph_node
*node
, gcov_type call_count
)
2900 struct function
*fn
= DECL_STRUCT_FUNCTION (node
->decl
);
2901 /* In the case where this was called by another function with a
2902 dropped profile, call_count will be 0. Since there are no
2903 non-zero call counts to this function, we don't know for sure
2904 whether it is hot, and therefore it will be marked normal below. */
2905 bool hot
= maybe_hot_count_p (NULL
, call_count
);
2909 "Dropping 0 profile for %s/%i. %s based on calls.\n",
2910 node
->name (), node
->order
,
2911 hot
? "Function is hot" : "Function is normal");
2912 /* We only expect to miss profiles for functions that are reached
2913 via non-zero call edges in cases where the function may have
2914 been linked from another module or library (COMDATs and extern
2915 templates). See the comments below for handle_missing_profiles.
2916 Also, only warn in cases where the missing counts exceed the
2917 number of training runs. In certain cases with an execv followed
2918 by a no-return call the profile for the no-return call is not
2919 dumped and there can be a mismatch. */
2920 if (!DECL_COMDAT (node
->decl
) && !DECL_EXTERNAL (node
->decl
)
2921 && call_count
> profile_info
->runs
)
2923 if (flag_profile_correction
)
2927 "Missing counts for called function %s/%i\n",
2928 node
->name (), node
->order
);
2931 warning (0, "Missing counts for called function %s/%i",
2932 node
->name (), node
->order
);
2935 profile_status_for_fn (fn
)
2936 = (flag_guess_branch_prob
? PROFILE_GUESSED
: PROFILE_ABSENT
);
2938 = hot
? NODE_FREQUENCY_HOT
: NODE_FREQUENCY_NORMAL
;
2941 /* In the case of COMDAT routines, multiple object files will contain the same
2942 function and the linker will select one for the binary. In that case
2943 all the other copies from the profile instrument binary will be missing
2944 profile counts. Look for cases where this happened, due to non-zero
2945 call counts going to 0-count functions, and drop the profile to guessed
2946 so that we can use the estimated probabilities and avoid optimizing only
2949 The other case where the profile may be missing is when the routine
2950 is not going to be emitted to the object file, e.g. for "extern template"
2951 class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2952 all other cases of non-zero calls to 0-count functions. */
2955 handle_missing_profiles (void)
2957 struct cgraph_node
*node
;
2958 int unlikely_count_fraction
= PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION
);
2959 vec
<struct cgraph_node
*> worklist
;
2960 worklist
.create (64);
2962 /* See if 0 count function has non-0 count callers. In this case we
2963 lost some profile. Drop its function profile to PROFILE_GUESSED. */
2964 FOR_EACH_DEFINED_FUNCTION (node
)
2966 struct cgraph_edge
*e
;
2967 gcov_type call_count
= 0;
2968 gcov_type max_tp_first_run
= 0;
2969 struct function
*fn
= DECL_STRUCT_FUNCTION (node
->decl
);
2973 for (e
= node
->callers
; e
; e
= e
->next_caller
)
2975 call_count
+= e
->count
;
2977 if (e
->caller
->tp_first_run
> max_tp_first_run
)
2978 max_tp_first_run
= e
->caller
->tp_first_run
;
2981 /* If time profile is missing, let assign the maximum that comes from
2982 caller functions. */
2983 if (!node
->tp_first_run
&& max_tp_first_run
)
2984 node
->tp_first_run
= max_tp_first_run
+ 1;
2988 && (call_count
* unlikely_count_fraction
>= profile_info
->runs
))
2990 drop_profile (node
, call_count
);
2991 worklist
.safe_push (node
);
2995 /* Propagate the profile dropping to other 0-count COMDATs that are
2996 potentially called by COMDATs we already dropped the profile on. */
2997 while (worklist
.length () > 0)
2999 struct cgraph_edge
*e
;
3001 node
= worklist
.pop ();
3002 for (e
= node
->callees
; e
; e
= e
->next_caller
)
3004 struct cgraph_node
*callee
= e
->callee
;
3005 struct function
*fn
= DECL_STRUCT_FUNCTION (callee
->decl
);
3007 if (callee
->count
> 0)
3009 if (DECL_COMDAT (callee
->decl
) && fn
&& fn
->cfg
3010 && profile_status_for_fn (fn
) == PROFILE_READ
)
3012 drop_profile (node
, 0);
3013 worklist
.safe_push (callee
);
3017 worklist
.release ();
3020 /* Convert counts measured by profile driven feedback to frequencies.
3021 Return nonzero iff there was any nonzero execution count. */
3024 counts_to_freqs (void)
3026 gcov_type count_max
, true_count_max
= 0;
3029 /* Don't overwrite the estimated frequencies when the profile for
3030 the function is missing. We may drop this function PROFILE_GUESSED
3031 later in drop_profile (). */
3032 if (!flag_auto_profile
&& !ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
)
3035 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
3036 true_count_max
= MAX (bb
->count
, true_count_max
);
3038 count_max
= MAX (true_count_max
, 1);
3039 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
3040 bb
->frequency
= (bb
->count
* BB_FREQ_MAX
+ count_max
/ 2) / count_max
;
3042 return true_count_max
;
3045 /* Return true if function is likely to be expensive, so there is no point to
3046 optimize performance of prologue, epilogue or do inlining at the expense
3047 of code size growth. THRESHOLD is the limit of number of instructions
3048 function can execute at average to be still considered not expensive. */
3051 expensive_function_p (int threshold
)
3053 unsigned int sum
= 0;
3057 /* We can not compute accurately for large thresholds due to scaled
3059 gcc_assert (threshold
<= BB_FREQ_MAX
);
3061 /* Frequencies are out of range. This either means that function contains
3062 internal loop executing more than BB_FREQ_MAX times or profile feedback
3063 is available and function has not been executed at all. */
3064 if (ENTRY_BLOCK_PTR_FOR_FN (cfun
)->frequency
== 0)
3067 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
3068 limit
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->frequency
* threshold
;
3069 FOR_EACH_BB_FN (bb
, cfun
)
3073 FOR_BB_INSNS (bb
, insn
)
3074 if (active_insn_p (insn
))
3076 sum
+= bb
->frequency
;
3085 /* Estimate and propagate basic block frequencies using the given branch
3086 probabilities. If FORCE is true, the frequencies are used to estimate
3087 the counts even when there are already non-zero profile counts. */
3090 estimate_bb_frequencies (bool force
)
3095 if (force
|| profile_status_for_fn (cfun
) != PROFILE_READ
|| !counts_to_freqs ())
3097 static int real_values_initialized
= 0;
3099 if (!real_values_initialized
)
3101 real_values_initialized
= 1;
3102 real_br_prob_base
= REG_BR_PROB_BASE
;
3103 real_bb_freq_max
= BB_FREQ_MAX
;
3104 real_one_half
= sreal (1, -1);
3105 real_inv_br_prob_base
= sreal (1) / real_br_prob_base
;
3106 real_almost_one
= sreal (1) - real_inv_br_prob_base
;
3109 mark_dfs_back_edges ();
3111 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
))->probability
=
3114 /* Set up block info for each basic block. */
3115 alloc_aux_for_blocks (sizeof (block_info
));
3116 alloc_aux_for_edges (sizeof (edge_prob_info
));
3117 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
3122 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
3124 EDGE_INFO (e
)->back_edge_prob
= e
->probability
;
3125 EDGE_INFO (e
)->back_edge_prob
*= real_inv_br_prob_base
;
3129 /* First compute frequencies locally for each loop from innermost
3130 to outermost to examine frequencies for back edges. */
3134 FOR_EACH_BB_FN (bb
, cfun
)
3135 if (freq_max
< BLOCK_INFO (bb
)->frequency
)
3136 freq_max
= BLOCK_INFO (bb
)->frequency
;
3138 freq_max
= real_bb_freq_max
/ freq_max
;
3139 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
3141 sreal tmp
= BLOCK_INFO (bb
)->frequency
* freq_max
+ real_one_half
;
3142 bb
->frequency
= tmp
.to_int ();
3145 free_aux_for_blocks ();
3146 free_aux_for_edges ();
3148 compute_function_frequency ();
3151 /* Decide whether function is hot, cold or unlikely executed. */
3153 compute_function_frequency (void)
3156 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
3158 if (DECL_STATIC_CONSTRUCTOR (current_function_decl
)
3159 || MAIN_NAME_P (DECL_NAME (current_function_decl
)))
3160 node
->only_called_at_startup
= true;
3161 if (DECL_STATIC_DESTRUCTOR (current_function_decl
))
3162 node
->only_called_at_exit
= true;
3164 if (profile_status_for_fn (cfun
) != PROFILE_READ
)
3166 int flags
= flags_from_decl_or_type (current_function_decl
);
3167 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl
))
3169 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
3170 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl
))
3172 node
->frequency
= NODE_FREQUENCY_HOT
;
3173 else if (flags
& ECF_NORETURN
)
3174 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
3175 else if (MAIN_NAME_P (DECL_NAME (current_function_decl
)))
3176 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
3177 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl
)
3178 || DECL_STATIC_DESTRUCTOR (current_function_decl
))
3179 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
3183 /* Only first time try to drop function into unlikely executed.
3184 After inlining the roundoff errors may confuse us.
3185 Ipa-profile pass will drop functions only called from unlikely
3186 functions to unlikely and that is most of what we care about. */
3187 if (!cfun
->after_inlining
)
3188 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
3189 FOR_EACH_BB_FN (bb
, cfun
)
3191 if (maybe_hot_bb_p (cfun
, bb
))
3193 node
->frequency
= NODE_FREQUENCY_HOT
;
3196 if (!probably_never_executed_bb_p (cfun
, bb
))
3197 node
->frequency
= NODE_FREQUENCY_NORMAL
;
3201 /* Build PREDICT_EXPR. */
3203 build_predict_expr (enum br_predictor predictor
, enum prediction taken
)
3205 tree t
= build1 (PREDICT_EXPR
, void_type_node
,
3206 build_int_cst (integer_type_node
, predictor
));
3207 SET_PREDICT_EXPR_OUTCOME (t
, taken
);
3212 predictor_name (enum br_predictor predictor
)
3214 return predictor_info
[predictor
].name
;
3217 /* Predict branch probabilities and estimate profile of the tree CFG. */
3221 const pass_data pass_data_profile
=
3223 GIMPLE_PASS
, /* type */
3224 "profile_estimate", /* name */
3225 OPTGROUP_NONE
, /* optinfo_flags */
3226 TV_BRANCH_PROB
, /* tv_id */
3227 PROP_cfg
, /* properties_required */
3228 0, /* properties_provided */
3229 0, /* properties_destroyed */
3230 0, /* todo_flags_start */
3231 0, /* todo_flags_finish */
3234 class pass_profile
: public gimple_opt_pass
3237 pass_profile (gcc::context
*ctxt
)
3238 : gimple_opt_pass (pass_data_profile
, ctxt
)
3241 /* opt_pass methods: */
3242 virtual bool gate (function
*) { return flag_guess_branch_prob
; }
3243 virtual unsigned int execute (function
*);
3245 }; // class pass_profile
3248 pass_profile::execute (function
*fun
)
3252 if (profile_status_for_fn (cfun
) == PROFILE_GUESSED
)
3255 loop_optimizer_init (LOOPS_NORMAL
);
3256 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3257 flow_loops_dump (dump_file
, NULL
, 0);
3259 mark_irreducible_loops ();
3261 nb_loops
= number_of_loops (fun
);
3265 tree_estimate_probability (false);
3270 loop_optimizer_finalize ();
3271 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3272 gimple_dump_cfg (dump_file
, dump_flags
);
3273 if (profile_status_for_fn (fun
) == PROFILE_ABSENT
)
3274 profile_status_for_fn (fun
) = PROFILE_GUESSED
;
3281 make_pass_profile (gcc::context
*ctxt
)
3283 return new pass_profile (ctxt
);
3288 const pass_data pass_data_strip_predict_hints
=
3290 GIMPLE_PASS
, /* type */
3291 "*strip_predict_hints", /* name */
3292 OPTGROUP_NONE
, /* optinfo_flags */
3293 TV_BRANCH_PROB
, /* tv_id */
3294 PROP_cfg
, /* properties_required */
3295 0, /* properties_provided */
3296 0, /* properties_destroyed */
3297 0, /* todo_flags_start */
3298 0, /* todo_flags_finish */
3301 class pass_strip_predict_hints
: public gimple_opt_pass
3304 pass_strip_predict_hints (gcc::context
*ctxt
)
3305 : gimple_opt_pass (pass_data_strip_predict_hints
, ctxt
)
3308 /* opt_pass methods: */
3309 opt_pass
* clone () { return new pass_strip_predict_hints (m_ctxt
); }
3310 virtual unsigned int execute (function
*);
3312 }; // class pass_strip_predict_hints
3314 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3315 we no longer need. */
3317 pass_strip_predict_hints::execute (function
*fun
)
3322 bool changed
= false;
3324 FOR_EACH_BB_FN (bb
, fun
)
3326 gimple_stmt_iterator bi
;
3327 for (bi
= gsi_start_bb (bb
); !gsi_end_p (bi
);)
3329 gimple
*stmt
= gsi_stmt (bi
);
3331 if (gimple_code (stmt
) == GIMPLE_PREDICT
)
3333 gsi_remove (&bi
, true);
3337 else if (is_gimple_call (stmt
))
3339 tree fndecl
= gimple_call_fndecl (stmt
);
3342 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_NORMAL
3343 && DECL_FUNCTION_CODE (fndecl
) == BUILT_IN_EXPECT
3344 && gimple_call_num_args (stmt
) == 2)
3345 || (gimple_call_internal_p (stmt
)
3346 && gimple_call_internal_fn (stmt
) == IFN_BUILTIN_EXPECT
))
3348 var
= gimple_call_lhs (stmt
);
3353 = gimple_build_assign (var
, gimple_call_arg (stmt
, 0));
3354 gsi_replace (&bi
, ass_stmt
, true);
3358 gsi_remove (&bi
, true);
3366 return changed
? TODO_cleanup_cfg
: 0;
3372 make_pass_strip_predict_hints (gcc::context
*ctxt
)
3374 return new pass_strip_predict_hints (ctxt
);
3377 /* Rebuild function frequencies. Passes are in general expected to
3378 maintain profile by hand, however in some cases this is not possible:
3379 for example when inlining several functions with loops freuqencies might run
3380 out of scale and thus needs to be recomputed. */
3383 rebuild_frequencies (void)
3385 timevar_push (TV_REBUILD_FREQUENCIES
);
3387 /* When the max bb count in the function is small, there is a higher
3388 chance that there were truncation errors in the integer scaling
3389 of counts by inlining and other optimizations. This could lead
3390 to incorrect classification of code as being cold when it isn't.
3391 In that case, force the estimation of bb counts/frequencies from the
3392 branch probabilities, rather than computing frequencies from counts,
3393 which may also lead to frequencies incorrectly reduced to 0. There
3394 is less precision in the probabilities, so we only do this for small
3396 gcov_type count_max
= 0;
3398 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
3399 count_max
= MAX (bb
->count
, count_max
);
3401 if (profile_status_for_fn (cfun
) == PROFILE_GUESSED
3402 || (!flag_auto_profile
&& profile_status_for_fn (cfun
) == PROFILE_READ
3403 && count_max
< REG_BR_PROB_BASE
/10))
3405 loop_optimizer_init (0);
3406 add_noreturn_fake_exit_edges ();
3407 mark_irreducible_loops ();
3408 connect_infinite_loops_to_exit ();
3409 estimate_bb_frequencies (true);
3410 remove_fake_exit_edges ();
3411 loop_optimizer_finalize ();
3413 else if (profile_status_for_fn (cfun
) == PROFILE_READ
)
3417 timevar_pop (TV_REBUILD_FREQUENCIES
);
3420 /* Perform a dry run of the branch prediction pass and report comparsion of
3421 the predicted and real profile into the dump file. */
3424 report_predictor_hitrates (void)
3428 loop_optimizer_init (LOOPS_NORMAL
);
3429 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3430 flow_loops_dump (dump_file
, NULL
, 0);
3432 mark_irreducible_loops ();
3434 nb_loops
= number_of_loops (cfun
);
3438 tree_estimate_probability (true);
3443 loop_optimizer_finalize ();
3446 /* Force edge E to be cold.
3447 If IMPOSSIBLE is true, for edge to have count and probability 0 otherwise
3448 keep low probability to represent possible error in a guess. This is used
3449 i.e. in case we predict loop to likely iterate given number of times but
3450 we are not 100% sure.
3452 This function locally updates profile without attempt to keep global
3453 consistency which can not be reached in full generality without full profile
3454 rebuild from probabilities alone. Doing so is not necessarily a good idea
3455 because frequencies and counts may be more realistic then probabilities.
3457 In some cases (such as for elimination of early exits during full loop
3458 unrolling) the caller can ensure that profile will get consistent
3462 force_edge_cold (edge e
, bool impossible
)
3464 gcov_type count_sum
= 0;
3468 gcov_type old_count
= e
->count
;
3469 int old_probability
= e
->probability
;
3470 gcov_type gcov_scale
= REG_BR_PROB_BASE
;
3471 int prob_scale
= REG_BR_PROB_BASE
;
3473 /* If edge is already improbably or cold, just return. */
3474 if (e
->probability
<= impossible
? PROB_VERY_UNLIKELY
: 0
3475 && (!impossible
|| !e
->count
))
3477 FOR_EACH_EDGE (e2
, ei
, e
->src
->succs
)
3480 count_sum
+= e2
->count
;
3481 prob_sum
+= e2
->probability
;
3484 /* If there are other edges out of e->src, redistribute probabilitity
3489 = MIN (e
->probability
, impossible
? 0 : PROB_VERY_UNLIKELY
);
3490 if (old_probability
)
3491 e
->count
= RDIV (e
->count
* e
->probability
, old_probability
);
3493 e
->count
= MIN (e
->count
, impossible
? 0 : 1);
3496 gcov_scale
= RDIV ((count_sum
+ old_count
- e
->count
) * REG_BR_PROB_BASE
,
3498 prob_scale
= RDIV ((REG_BR_PROB_BASE
- e
->probability
) * REG_BR_PROB_BASE
,
3500 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3501 fprintf (dump_file
, "Making edge %i->%i %s by redistributing "
3502 "probability to other edges.\n",
3503 e
->src
->index
, e
->dest
->index
,
3504 impossible
? "imposisble" : "cold");
3505 FOR_EACH_EDGE (e2
, ei
, e
->src
->succs
)
3508 e2
->count
= RDIV (e2
->count
* gcov_scale
, REG_BR_PROB_BASE
);
3509 e2
->probability
= RDIV (e2
->probability
* prob_scale
,
3513 /* If all edges out of e->src are unlikely, the basic block itself
3517 e
->probability
= REG_BR_PROB_BASE
;
3519 /* If we did not adjusting, the source basic block has no likely edeges
3520 leaving other direction. In that case force that bb cold, too.
3521 This in general is difficult task to do, but handle special case when
3522 BB has only one predecestor. This is common case when we are updating
3523 after loop transforms. */
3524 if (!prob_sum
&& !count_sum
&& single_pred_p (e
->src
)
3525 && e
->src
->frequency
> (impossible
? 0 : 1))
3527 int old_frequency
= e
->src
->frequency
;
3528 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3529 fprintf (dump_file
, "Making bb %i %s.\n", e
->src
->index
,
3530 impossible
? "imposisble" : "cold");
3531 e
->src
->frequency
= MIN (e
->src
->frequency
, impossible
? 0 : 1);
3532 e
->src
->count
= e
->count
= RDIV (e
->src
->count
* e
->src
->frequency
,
3534 force_edge_cold (single_pred_edge (e
->src
), impossible
);
3536 else if (dump_file
&& (dump_flags
& TDF_DETAILS
)
3537 && maybe_hot_bb_p (cfun
, e
->src
))
3538 fprintf (dump_file
, "Giving up on making bb %i %s.\n", e
->src
->index
,
3539 impossible
? "imposisble" : "cold");