This patch sanitizes the partitioning to address issues such as edge weight insanitie...
[gcc.git] / gcc / predict.c
1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000-2013 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* References:
21
22 [1] "Branch Prediction for Free"
23 Ball and Larus; PLDI '93.
24 [2] "Static Branch Frequency and Program Profile Analysis"
25 Wu and Larus; MICRO-27.
26 [3] "Corpus-based Static Branch Prediction"
27 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
28
29
30 #include "config.h"
31 #include "system.h"
32 #include "coretypes.h"
33 #include "tm.h"
34 #include "tree.h"
35 #include "rtl.h"
36 #include "tm_p.h"
37 #include "hard-reg-set.h"
38 #include "basic-block.h"
39 #include "insn-config.h"
40 #include "regs.h"
41 #include "flags.h"
42 #include "function.h"
43 #include "except.h"
44 #include "diagnostic-core.h"
45 #include "recog.h"
46 #include "expr.h"
47 #include "predict.h"
48 #include "coverage.h"
49 #include "sreal.h"
50 #include "params.h"
51 #include "target.h"
52 #include "cfgloop.h"
53 #include "tree-flow.h"
54 #include "ggc.h"
55 #include "tree-pass.h"
56 #include "tree-scalar-evolution.h"
57 #include "cfgloop.h"
58 #include "pointer-set.h"
59
60 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
61 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
62 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
63 real_inv_br_prob_base, real_one_half, real_bb_freq_max;
64
65 /* Random guesstimation given names.
66 PROV_VERY_UNLIKELY should be small enough so basic block predicted
67 by it gets below HOT_BB_FREQUENCY_FRACTION. */
68 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 2000 - 1)
69 #define PROB_EVEN (REG_BR_PROB_BASE / 2)
70 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
71 #define PROB_ALWAYS (REG_BR_PROB_BASE)
72
73 static void combine_predictions_for_insn (rtx, basic_block);
74 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
75 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
76 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
77 static bool can_predict_insn_p (const_rtx);
78
79 /* Information we hold about each branch predictor.
80 Filled using information from predict.def. */
81
82 struct predictor_info
83 {
84 const char *const name; /* Name used in the debugging dumps. */
85 const int hitrate; /* Expected hitrate used by
86 predict_insn_def call. */
87 const int flags;
88 };
89
90 /* Use given predictor without Dempster-Shaffer theory if it matches
91 using first_match heuristics. */
92 #define PRED_FLAG_FIRST_MATCH 1
93
94 /* Recompute hitrate in percent to our representation. */
95
96 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
97
98 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
99 static const struct predictor_info predictor_info[]= {
100 #include "predict.def"
101
102 /* Upper bound on predictors. */
103 {NULL, 0, 0}
104 };
105 #undef DEF_PREDICTOR
106
107 /* Return TRUE if frequency FREQ is considered to be hot. */
108
109 static inline bool
110 maybe_hot_frequency_p (struct function *fun, int freq)
111 {
112 struct cgraph_node *node = cgraph_get_node (fun->decl);
113 if (!profile_info || !flag_branch_probabilities)
114 {
115 if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
116 return false;
117 if (node->frequency == NODE_FREQUENCY_HOT)
118 return true;
119 }
120 if (profile_status_for_function (fun) == PROFILE_ABSENT)
121 return true;
122 if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
123 && freq < (ENTRY_BLOCK_PTR_FOR_FUNCTION (fun)->frequency * 2 / 3))
124 return false;
125 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
126 return false;
127 if (freq < (ENTRY_BLOCK_PTR_FOR_FUNCTION (fun)->frequency
128 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
129 return false;
130 return true;
131 }
132
133 static gcov_type min_count = -1;
134
135 /* Determine the threshold for hot BB counts. */
136
137 gcov_type
138 get_hot_bb_threshold ()
139 {
140 gcov_working_set_t *ws;
141 if (min_count == -1)
142 {
143 ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
144 gcc_assert (ws);
145 min_count = ws->min_counter;
146 }
147 return min_count;
148 }
149
150 /* Set the threshold for hot BB counts. */
151
152 void
153 set_hot_bb_threshold (gcov_type min)
154 {
155 min_count = min;
156 }
157
158 /* Return TRUE if frequency FREQ is considered to be hot. */
159
160 static inline bool
161 maybe_hot_count_p (struct function *fun, gcov_type count)
162 {
163 if (fun && profile_status_for_function (fun) != PROFILE_READ)
164 return true;
165 /* Code executed at most once is not hot. */
166 if (profile_info->runs >= count)
167 return false;
168 return (count >= get_hot_bb_threshold ());
169 }
170
171 /* Return true in case BB can be CPU intensive and should be optimized
172 for maximal performance. */
173
174 bool
175 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
176 {
177 gcc_checking_assert (fun);
178 if (profile_status_for_function (fun) == PROFILE_READ)
179 return maybe_hot_count_p (fun, bb->count);
180 return maybe_hot_frequency_p (fun, bb->frequency);
181 }
182
183 /* Return true if the call can be hot. */
184
185 bool
186 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
187 {
188 if (profile_info && flag_branch_probabilities
189 && !maybe_hot_count_p (NULL,
190 edge->count))
191 return false;
192 if (edge->caller->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED
193 || (edge->callee
194 && edge->callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
195 return false;
196 if (edge->caller->frequency > NODE_FREQUENCY_UNLIKELY_EXECUTED
197 && (edge->callee
198 && edge->callee->frequency <= NODE_FREQUENCY_EXECUTED_ONCE))
199 return false;
200 if (optimize_size)
201 return false;
202 if (edge->caller->frequency == NODE_FREQUENCY_HOT)
203 return true;
204 if (edge->caller->frequency == NODE_FREQUENCY_EXECUTED_ONCE
205 && edge->frequency < CGRAPH_FREQ_BASE * 3 / 2)
206 return false;
207 if (flag_guess_branch_prob)
208 {
209 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0
210 || edge->frequency <= (CGRAPH_FREQ_BASE
211 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
212 return false;
213 }
214 return true;
215 }
216
217 /* Return true in case BB can be CPU intensive and should be optimized
218 for maximal performance. */
219
220 bool
221 maybe_hot_edge_p (edge e)
222 {
223 if (profile_status == PROFILE_READ)
224 return maybe_hot_count_p (cfun, e->count);
225 return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
226 }
227
228
229 /* Return true in case BB is probably never executed. */
230
231 bool
232 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
233 {
234 gcc_checking_assert (fun);
235 if (profile_info && flag_branch_probabilities)
236 return ((bb->count + profile_info->runs / 2) / profile_info->runs) == 0;
237 if ((!profile_info || !flag_branch_probabilities)
238 && (cgraph_get_node (fun->decl)->frequency
239 == NODE_FREQUENCY_UNLIKELY_EXECUTED))
240 return true;
241 return false;
242 }
243
244
245 /* Return true in case edge E is probably never executed. */
246
247 bool
248 probably_never_executed_edge_p (struct function *fun, edge e)
249 {
250 gcc_checking_assert (fun);
251 if (profile_info && flag_branch_probabilities)
252 return ((e->count + profile_info->runs / 2) / profile_info->runs) == 0;
253 if ((!profile_info || !flag_branch_probabilities)
254 && (cgraph_get_node (fun->decl)->frequency
255 == NODE_FREQUENCY_UNLIKELY_EXECUTED))
256 return true;
257 return false;
258 }
259
260 /* Return true if NODE should be optimized for size. */
261
262 bool
263 cgraph_optimize_for_size_p (struct cgraph_node *node)
264 {
265 if (optimize_size)
266 return true;
267 if (node && (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
268 return true;
269 else
270 return false;
271 }
272
273 /* Return true when current function should always be optimized for size. */
274
275 bool
276 optimize_function_for_size_p (struct function *fun)
277 {
278 if (optimize_size)
279 return true;
280 if (!fun || !fun->decl)
281 return false;
282 return cgraph_optimize_for_size_p (cgraph_get_node (fun->decl));
283 }
284
285 /* Return true when current function should always be optimized for speed. */
286
287 bool
288 optimize_function_for_speed_p (struct function *fun)
289 {
290 return !optimize_function_for_size_p (fun);
291 }
292
293 /* Return TRUE when BB should be optimized for size. */
294
295 bool
296 optimize_bb_for_size_p (const_basic_block bb)
297 {
298 return optimize_function_for_size_p (cfun) || !maybe_hot_bb_p (cfun, bb);
299 }
300
301 /* Return TRUE when BB should be optimized for speed. */
302
303 bool
304 optimize_bb_for_speed_p (const_basic_block bb)
305 {
306 return !optimize_bb_for_size_p (bb);
307 }
308
309 /* Return TRUE when BB should be optimized for size. */
310
311 bool
312 optimize_edge_for_size_p (edge e)
313 {
314 return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
315 }
316
317 /* Return TRUE when BB should be optimized for speed. */
318
319 bool
320 optimize_edge_for_speed_p (edge e)
321 {
322 return !optimize_edge_for_size_p (e);
323 }
324
325 /* Return TRUE when BB should be optimized for size. */
326
327 bool
328 optimize_insn_for_size_p (void)
329 {
330 return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
331 }
332
333 /* Return TRUE when BB should be optimized for speed. */
334
335 bool
336 optimize_insn_for_speed_p (void)
337 {
338 return !optimize_insn_for_size_p ();
339 }
340
341 /* Return TRUE when LOOP should be optimized for size. */
342
343 bool
344 optimize_loop_for_size_p (struct loop *loop)
345 {
346 return optimize_bb_for_size_p (loop->header);
347 }
348
349 /* Return TRUE when LOOP should be optimized for speed. */
350
351 bool
352 optimize_loop_for_speed_p (struct loop *loop)
353 {
354 return optimize_bb_for_speed_p (loop->header);
355 }
356
357 /* Return TRUE when LOOP nest should be optimized for speed. */
358
359 bool
360 optimize_loop_nest_for_speed_p (struct loop *loop)
361 {
362 struct loop *l = loop;
363 if (optimize_loop_for_speed_p (loop))
364 return true;
365 l = loop->inner;
366 while (l && l != loop)
367 {
368 if (optimize_loop_for_speed_p (l))
369 return true;
370 if (l->inner)
371 l = l->inner;
372 else if (l->next)
373 l = l->next;
374 else
375 {
376 while (l != loop && !l->next)
377 l = loop_outer (l);
378 if (l != loop)
379 l = l->next;
380 }
381 }
382 return false;
383 }
384
385 /* Return TRUE when LOOP nest should be optimized for size. */
386
387 bool
388 optimize_loop_nest_for_size_p (struct loop *loop)
389 {
390 return !optimize_loop_nest_for_speed_p (loop);
391 }
392
393 /* Return true when edge E is likely to be well predictable by branch
394 predictor. */
395
396 bool
397 predictable_edge_p (edge e)
398 {
399 if (profile_status == PROFILE_ABSENT)
400 return false;
401 if ((e->probability
402 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
403 || (REG_BR_PROB_BASE - e->probability
404 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
405 return true;
406 return false;
407 }
408
409
410 /* Set RTL expansion for BB profile. */
411
412 void
413 rtl_profile_for_bb (basic_block bb)
414 {
415 crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
416 }
417
418 /* Set RTL expansion for edge profile. */
419
420 void
421 rtl_profile_for_edge (edge e)
422 {
423 crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
424 }
425
426 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
427 void
428 default_rtl_profile (void)
429 {
430 crtl->maybe_hot_insn_p = true;
431 }
432
433 /* Return true if the one of outgoing edges is already predicted by
434 PREDICTOR. */
435
436 bool
437 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
438 {
439 rtx note;
440 if (!INSN_P (BB_END (bb)))
441 return false;
442 for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
443 if (REG_NOTE_KIND (note) == REG_BR_PRED
444 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
445 return true;
446 return false;
447 }
448
449 /* This map contains for a basic block the list of predictions for the
450 outgoing edges. */
451
452 static struct pointer_map_t *bb_predictions;
453
454 /* Structure representing predictions in tree level. */
455
456 struct edge_prediction {
457 struct edge_prediction *ep_next;
458 edge ep_edge;
459 enum br_predictor ep_predictor;
460 int ep_probability;
461 };
462
463 /* Return true if the one of outgoing edges is already predicted by
464 PREDICTOR. */
465
466 bool
467 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
468 {
469 struct edge_prediction *i;
470 void **preds = pointer_map_contains (bb_predictions, bb);
471
472 if (!preds)
473 return false;
474
475 for (i = (struct edge_prediction *) *preds; i; i = i->ep_next)
476 if (i->ep_predictor == predictor)
477 return true;
478 return false;
479 }
480
481 /* Return true when the probability of edge is reliable.
482
483 The profile guessing code is good at predicting branch outcome (ie.
484 taken/not taken), that is predicted right slightly over 75% of time.
485 It is however notoriously poor on predicting the probability itself.
486 In general the profile appear a lot flatter (with probabilities closer
487 to 50%) than the reality so it is bad idea to use it to drive optimization
488 such as those disabling dynamic branch prediction for well predictable
489 branches.
490
491 There are two exceptions - edges leading to noreturn edges and edges
492 predicted by number of iterations heuristics are predicted well. This macro
493 should be able to distinguish those, but at the moment it simply check for
494 noreturn heuristic that is only one giving probability over 99% or bellow
495 1%. In future we might want to propagate reliability information across the
496 CFG if we find this information useful on multiple places. */
497 static bool
498 probability_reliable_p (int prob)
499 {
500 return (profile_status == PROFILE_READ
501 || (profile_status == PROFILE_GUESSED
502 && (prob <= HITRATE (1) || prob >= HITRATE (99))));
503 }
504
505 /* Same predicate as above, working on edges. */
506 bool
507 edge_probability_reliable_p (const_edge e)
508 {
509 return probability_reliable_p (e->probability);
510 }
511
512 /* Same predicate as edge_probability_reliable_p, working on notes. */
513 bool
514 br_prob_note_reliable_p (const_rtx note)
515 {
516 gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
517 return probability_reliable_p (INTVAL (XEXP (note, 0)));
518 }
519
520 static void
521 predict_insn (rtx insn, enum br_predictor predictor, int probability)
522 {
523 gcc_assert (any_condjump_p (insn));
524 if (!flag_guess_branch_prob)
525 return;
526
527 add_reg_note (insn, REG_BR_PRED,
528 gen_rtx_CONCAT (VOIDmode,
529 GEN_INT ((int) predictor),
530 GEN_INT ((int) probability)));
531 }
532
533 /* Predict insn by given predictor. */
534
535 void
536 predict_insn_def (rtx insn, enum br_predictor predictor,
537 enum prediction taken)
538 {
539 int probability = predictor_info[(int) predictor].hitrate;
540
541 if (taken != TAKEN)
542 probability = REG_BR_PROB_BASE - probability;
543
544 predict_insn (insn, predictor, probability);
545 }
546
547 /* Predict edge E with given probability if possible. */
548
549 void
550 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
551 {
552 rtx last_insn;
553 last_insn = BB_END (e->src);
554
555 /* We can store the branch prediction information only about
556 conditional jumps. */
557 if (!any_condjump_p (last_insn))
558 return;
559
560 /* We always store probability of branching. */
561 if (e->flags & EDGE_FALLTHRU)
562 probability = REG_BR_PROB_BASE - probability;
563
564 predict_insn (last_insn, predictor, probability);
565 }
566
567 /* Predict edge E with the given PROBABILITY. */
568 void
569 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
570 {
571 gcc_assert (profile_status != PROFILE_GUESSED);
572 if ((e->src != ENTRY_BLOCK_PTR && EDGE_COUNT (e->src->succs) > 1)
573 && flag_guess_branch_prob && optimize)
574 {
575 struct edge_prediction *i = XNEW (struct edge_prediction);
576 void **preds = pointer_map_insert (bb_predictions, e->src);
577
578 i->ep_next = (struct edge_prediction *) *preds;
579 *preds = i;
580 i->ep_probability = probability;
581 i->ep_predictor = predictor;
582 i->ep_edge = e;
583 }
584 }
585
586 /* Remove all predictions on given basic block that are attached
587 to edge E. */
588 void
589 remove_predictions_associated_with_edge (edge e)
590 {
591 void **preds;
592
593 if (!bb_predictions)
594 return;
595
596 preds = pointer_map_contains (bb_predictions, e->src);
597
598 if (preds)
599 {
600 struct edge_prediction **prediction = (struct edge_prediction **) preds;
601 struct edge_prediction *next;
602
603 while (*prediction)
604 {
605 if ((*prediction)->ep_edge == e)
606 {
607 next = (*prediction)->ep_next;
608 free (*prediction);
609 *prediction = next;
610 }
611 else
612 prediction = &((*prediction)->ep_next);
613 }
614 }
615 }
616
617 /* Clears the list of predictions stored for BB. */
618
619 static void
620 clear_bb_predictions (basic_block bb)
621 {
622 void **preds = pointer_map_contains (bb_predictions, bb);
623 struct edge_prediction *pred, *next;
624
625 if (!preds)
626 return;
627
628 for (pred = (struct edge_prediction *) *preds; pred; pred = next)
629 {
630 next = pred->ep_next;
631 free (pred);
632 }
633 *preds = NULL;
634 }
635
636 /* Return true when we can store prediction on insn INSN.
637 At the moment we represent predictions only on conditional
638 jumps, not at computed jump or other complicated cases. */
639 static bool
640 can_predict_insn_p (const_rtx insn)
641 {
642 return (JUMP_P (insn)
643 && any_condjump_p (insn)
644 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
645 }
646
647 /* Predict edge E by given predictor if possible. */
648
649 void
650 predict_edge_def (edge e, enum br_predictor predictor,
651 enum prediction taken)
652 {
653 int probability = predictor_info[(int) predictor].hitrate;
654
655 if (taken != TAKEN)
656 probability = REG_BR_PROB_BASE - probability;
657
658 predict_edge (e, predictor, probability);
659 }
660
661 /* Invert all branch predictions or probability notes in the INSN. This needs
662 to be done each time we invert the condition used by the jump. */
663
664 void
665 invert_br_probabilities (rtx insn)
666 {
667 rtx note;
668
669 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
670 if (REG_NOTE_KIND (note) == REG_BR_PROB)
671 XEXP (note, 0) = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (note, 0)));
672 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
673 XEXP (XEXP (note, 0), 1)
674 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
675 }
676
677 /* Dump information about the branch prediction to the output file. */
678
679 static void
680 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
681 basic_block bb, int used)
682 {
683 edge e;
684 edge_iterator ei;
685
686 if (!file)
687 return;
688
689 FOR_EACH_EDGE (e, ei, bb->succs)
690 if (! (e->flags & EDGE_FALLTHRU))
691 break;
692
693 fprintf (file, " %s heuristics%s: %.1f%%",
694 predictor_info[predictor].name,
695 used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
696
697 if (bb->count)
698 {
699 fprintf (file, " exec ");
700 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
701 if (e)
702 {
703 fprintf (file, " hit ");
704 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count);
705 fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
706 }
707 }
708
709 fprintf (file, "\n");
710 }
711
712 /* We can not predict the probabilities of outgoing edges of bb. Set them
713 evenly and hope for the best. */
714 static void
715 set_even_probabilities (basic_block bb)
716 {
717 int nedges = 0;
718 edge e;
719 edge_iterator ei;
720
721 FOR_EACH_EDGE (e, ei, bb->succs)
722 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
723 nedges ++;
724 FOR_EACH_EDGE (e, ei, bb->succs)
725 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
726 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
727 else
728 e->probability = 0;
729 }
730
731 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
732 note if not already present. Remove now useless REG_BR_PRED notes. */
733
734 static void
735 combine_predictions_for_insn (rtx insn, basic_block bb)
736 {
737 rtx prob_note;
738 rtx *pnote;
739 rtx note;
740 int best_probability = PROB_EVEN;
741 enum br_predictor best_predictor = END_PREDICTORS;
742 int combined_probability = REG_BR_PROB_BASE / 2;
743 int d;
744 bool first_match = false;
745 bool found = false;
746
747 if (!can_predict_insn_p (insn))
748 {
749 set_even_probabilities (bb);
750 return;
751 }
752
753 prob_note = find_reg_note (insn, REG_BR_PROB, 0);
754 pnote = &REG_NOTES (insn);
755 if (dump_file)
756 fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
757 bb->index);
758
759 /* We implement "first match" heuristics and use probability guessed
760 by predictor with smallest index. */
761 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
762 if (REG_NOTE_KIND (note) == REG_BR_PRED)
763 {
764 enum br_predictor predictor = ((enum br_predictor)
765 INTVAL (XEXP (XEXP (note, 0), 0)));
766 int probability = INTVAL (XEXP (XEXP (note, 0), 1));
767
768 found = true;
769 if (best_predictor > predictor)
770 best_probability = probability, best_predictor = predictor;
771
772 d = (combined_probability * probability
773 + (REG_BR_PROB_BASE - combined_probability)
774 * (REG_BR_PROB_BASE - probability));
775
776 /* Use FP math to avoid overflows of 32bit integers. */
777 if (d == 0)
778 /* If one probability is 0% and one 100%, avoid division by zero. */
779 combined_probability = REG_BR_PROB_BASE / 2;
780 else
781 combined_probability = (((double) combined_probability) * probability
782 * REG_BR_PROB_BASE / d + 0.5);
783 }
784
785 /* Decide which heuristic to use. In case we didn't match anything,
786 use no_prediction heuristic, in case we did match, use either
787 first match or Dempster-Shaffer theory depending on the flags. */
788
789 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
790 first_match = true;
791
792 if (!found)
793 dump_prediction (dump_file, PRED_NO_PREDICTION,
794 combined_probability, bb, true);
795 else
796 {
797 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
798 bb, !first_match);
799 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
800 bb, first_match);
801 }
802
803 if (first_match)
804 combined_probability = best_probability;
805 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
806
807 while (*pnote)
808 {
809 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
810 {
811 enum br_predictor predictor = ((enum br_predictor)
812 INTVAL (XEXP (XEXP (*pnote, 0), 0)));
813 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
814
815 dump_prediction (dump_file, predictor, probability, bb,
816 !first_match || best_predictor == predictor);
817 *pnote = XEXP (*pnote, 1);
818 }
819 else
820 pnote = &XEXP (*pnote, 1);
821 }
822
823 if (!prob_note)
824 {
825 add_reg_note (insn, REG_BR_PROB, GEN_INT (combined_probability));
826
827 /* Save the prediction into CFG in case we are seeing non-degenerated
828 conditional jump. */
829 if (!single_succ_p (bb))
830 {
831 BRANCH_EDGE (bb)->probability = combined_probability;
832 FALLTHRU_EDGE (bb)->probability
833 = REG_BR_PROB_BASE - combined_probability;
834 }
835 }
836 else if (!single_succ_p (bb))
837 {
838 int prob = INTVAL (XEXP (prob_note, 0));
839
840 BRANCH_EDGE (bb)->probability = prob;
841 FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
842 }
843 else
844 single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
845 }
846
847 /* Combine predictions into single probability and store them into CFG.
848 Remove now useless prediction entries. */
849
850 static void
851 combine_predictions_for_bb (basic_block bb)
852 {
853 int best_probability = PROB_EVEN;
854 enum br_predictor best_predictor = END_PREDICTORS;
855 int combined_probability = REG_BR_PROB_BASE / 2;
856 int d;
857 bool first_match = false;
858 bool found = false;
859 struct edge_prediction *pred;
860 int nedges = 0;
861 edge e, first = NULL, second = NULL;
862 edge_iterator ei;
863 void **preds;
864
865 FOR_EACH_EDGE (e, ei, bb->succs)
866 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
867 {
868 nedges ++;
869 if (first && !second)
870 second = e;
871 if (!first)
872 first = e;
873 }
874
875 /* When there is no successor or only one choice, prediction is easy.
876
877 We are lazy for now and predict only basic blocks with two outgoing
878 edges. It is possible to predict generic case too, but we have to
879 ignore first match heuristics and do more involved combining. Implement
880 this later. */
881 if (nedges != 2)
882 {
883 if (!bb->count)
884 set_even_probabilities (bb);
885 clear_bb_predictions (bb);
886 if (dump_file)
887 fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
888 nedges, bb->index);
889 return;
890 }
891
892 if (dump_file)
893 fprintf (dump_file, "Predictions for bb %i\n", bb->index);
894
895 preds = pointer_map_contains (bb_predictions, bb);
896 if (preds)
897 {
898 /* We implement "first match" heuristics and use probability guessed
899 by predictor with smallest index. */
900 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
901 {
902 enum br_predictor predictor = pred->ep_predictor;
903 int probability = pred->ep_probability;
904
905 if (pred->ep_edge != first)
906 probability = REG_BR_PROB_BASE - probability;
907
908 found = true;
909 /* First match heuristics would be widly confused if we predicted
910 both directions. */
911 if (best_predictor > predictor)
912 {
913 struct edge_prediction *pred2;
914 int prob = probability;
915
916 for (pred2 = (struct edge_prediction *) *preds; pred2; pred2 = pred2->ep_next)
917 if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
918 {
919 int probability2 = pred->ep_probability;
920
921 if (pred2->ep_edge != first)
922 probability2 = REG_BR_PROB_BASE - probability2;
923
924 if ((probability < REG_BR_PROB_BASE / 2) !=
925 (probability2 < REG_BR_PROB_BASE / 2))
926 break;
927
928 /* If the same predictor later gave better result, go for it! */
929 if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
930 || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
931 prob = probability2;
932 }
933 if (!pred2)
934 best_probability = prob, best_predictor = predictor;
935 }
936
937 d = (combined_probability * probability
938 + (REG_BR_PROB_BASE - combined_probability)
939 * (REG_BR_PROB_BASE - probability));
940
941 /* Use FP math to avoid overflows of 32bit integers. */
942 if (d == 0)
943 /* If one probability is 0% and one 100%, avoid division by zero. */
944 combined_probability = REG_BR_PROB_BASE / 2;
945 else
946 combined_probability = (((double) combined_probability)
947 * probability
948 * REG_BR_PROB_BASE / d + 0.5);
949 }
950 }
951
952 /* Decide which heuristic to use. In case we didn't match anything,
953 use no_prediction heuristic, in case we did match, use either
954 first match or Dempster-Shaffer theory depending on the flags. */
955
956 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
957 first_match = true;
958
959 if (!found)
960 dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
961 else
962 {
963 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
964 !first_match);
965 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
966 first_match);
967 }
968
969 if (first_match)
970 combined_probability = best_probability;
971 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
972
973 if (preds)
974 {
975 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
976 {
977 enum br_predictor predictor = pred->ep_predictor;
978 int probability = pred->ep_probability;
979
980 if (pred->ep_edge != EDGE_SUCC (bb, 0))
981 probability = REG_BR_PROB_BASE - probability;
982 dump_prediction (dump_file, predictor, probability, bb,
983 !first_match || best_predictor == predictor);
984 }
985 }
986 clear_bb_predictions (bb);
987
988 if (!bb->count)
989 {
990 first->probability = combined_probability;
991 second->probability = REG_BR_PROB_BASE - combined_probability;
992 }
993 }
994
995 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
996 Return the SSA_NAME if the condition satisfies, NULL otherwise.
997
998 T1 and T2 should be one of the following cases:
999 1. T1 is SSA_NAME, T2 is NULL
1000 2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1001 3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4] */
1002
1003 static tree
1004 strips_small_constant (tree t1, tree t2)
1005 {
1006 tree ret = NULL;
1007 int value = 0;
1008
1009 if (!t1)
1010 return NULL;
1011 else if (TREE_CODE (t1) == SSA_NAME)
1012 ret = t1;
1013 else if (host_integerp (t1, 0))
1014 value = tree_low_cst (t1, 0);
1015 else
1016 return NULL;
1017
1018 if (!t2)
1019 return ret;
1020 else if (host_integerp (t2, 0))
1021 value = tree_low_cst (t2, 0);
1022 else if (TREE_CODE (t2) == SSA_NAME)
1023 {
1024 if (ret)
1025 return NULL;
1026 else
1027 ret = t2;
1028 }
1029
1030 if (value <= 4 && value >= -4)
1031 return ret;
1032 else
1033 return NULL;
1034 }
1035
1036 /* Return the SSA_NAME in T or T's operands.
1037 Return NULL if SSA_NAME cannot be found. */
1038
1039 static tree
1040 get_base_value (tree t)
1041 {
1042 if (TREE_CODE (t) == SSA_NAME)
1043 return t;
1044
1045 if (!BINARY_CLASS_P (t))
1046 return NULL;
1047
1048 switch (TREE_OPERAND_LENGTH (t))
1049 {
1050 case 1:
1051 return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1052 case 2:
1053 return strips_small_constant (TREE_OPERAND (t, 0),
1054 TREE_OPERAND (t, 1));
1055 default:
1056 return NULL;
1057 }
1058 }
1059
1060 /* Check the compare STMT in LOOP. If it compares an induction
1061 variable to a loop invariant, return true, and save
1062 LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1063 Otherwise return false and set LOOP_INVAIANT to NULL. */
1064
1065 static bool
1066 is_comparison_with_loop_invariant_p (gimple stmt, struct loop *loop,
1067 tree *loop_invariant,
1068 enum tree_code *compare_code,
1069 tree *loop_step,
1070 tree *loop_iv_base)
1071 {
1072 tree op0, op1, bound, base;
1073 affine_iv iv0, iv1;
1074 enum tree_code code;
1075 tree step;
1076
1077 code = gimple_cond_code (stmt);
1078 *loop_invariant = NULL;
1079
1080 switch (code)
1081 {
1082 case GT_EXPR:
1083 case GE_EXPR:
1084 case NE_EXPR:
1085 case LT_EXPR:
1086 case LE_EXPR:
1087 case EQ_EXPR:
1088 break;
1089
1090 default:
1091 return false;
1092 }
1093
1094 op0 = gimple_cond_lhs (stmt);
1095 op1 = gimple_cond_rhs (stmt);
1096
1097 if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1098 || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1099 return false;
1100 if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1101 return false;
1102 if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1103 return false;
1104 if (TREE_CODE (iv0.step) != INTEGER_CST
1105 || TREE_CODE (iv1.step) != INTEGER_CST)
1106 return false;
1107 if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1108 || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1109 return false;
1110
1111 if (integer_zerop (iv0.step))
1112 {
1113 if (code != NE_EXPR && code != EQ_EXPR)
1114 code = invert_tree_comparison (code, false);
1115 bound = iv0.base;
1116 base = iv1.base;
1117 if (host_integerp (iv1.step, 0))
1118 step = iv1.step;
1119 else
1120 return false;
1121 }
1122 else
1123 {
1124 bound = iv1.base;
1125 base = iv0.base;
1126 if (host_integerp (iv0.step, 0))
1127 step = iv0.step;
1128 else
1129 return false;
1130 }
1131
1132 if (TREE_CODE (bound) != INTEGER_CST)
1133 bound = get_base_value (bound);
1134 if (!bound)
1135 return false;
1136 if (TREE_CODE (base) != INTEGER_CST)
1137 base = get_base_value (base);
1138 if (!base)
1139 return false;
1140
1141 *loop_invariant = bound;
1142 *compare_code = code;
1143 *loop_step = step;
1144 *loop_iv_base = base;
1145 return true;
1146 }
1147
1148 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent. */
1149
1150 static bool
1151 expr_coherent_p (tree t1, tree t2)
1152 {
1153 gimple stmt;
1154 tree ssa_name_1 = NULL;
1155 tree ssa_name_2 = NULL;
1156
1157 gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1158 gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1159
1160 if (t1 == t2)
1161 return true;
1162
1163 if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1164 return true;
1165 if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1166 return false;
1167
1168 /* Check to see if t1 is expressed/defined with t2. */
1169 stmt = SSA_NAME_DEF_STMT (t1);
1170 gcc_assert (stmt != NULL);
1171 if (is_gimple_assign (stmt))
1172 {
1173 ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1174 if (ssa_name_1 && ssa_name_1 == t2)
1175 return true;
1176 }
1177
1178 /* Check to see if t2 is expressed/defined with t1. */
1179 stmt = SSA_NAME_DEF_STMT (t2);
1180 gcc_assert (stmt != NULL);
1181 if (is_gimple_assign (stmt))
1182 {
1183 ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1184 if (ssa_name_2 && ssa_name_2 == t1)
1185 return true;
1186 }
1187
1188 /* Compare if t1 and t2's def_stmts are identical. */
1189 if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1190 return true;
1191 else
1192 return false;
1193 }
1194
1195 /* Predict branch probability of BB when BB contains a branch that compares
1196 an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1197 loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1198
1199 E.g.
1200 for (int i = 0; i < bound; i++) {
1201 if (i < bound - 2)
1202 computation_1();
1203 else
1204 computation_2();
1205 }
1206
1207 In this loop, we will predict the branch inside the loop to be taken. */
1208
1209 static void
1210 predict_iv_comparison (struct loop *loop, basic_block bb,
1211 tree loop_bound_var,
1212 tree loop_iv_base_var,
1213 enum tree_code loop_bound_code,
1214 int loop_bound_step)
1215 {
1216 gimple stmt;
1217 tree compare_var, compare_base;
1218 enum tree_code compare_code;
1219 tree compare_step_var;
1220 edge then_edge;
1221 edge_iterator ei;
1222
1223 if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1224 || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1225 || predicted_by_p (bb, PRED_LOOP_EXIT))
1226 return;
1227
1228 stmt = last_stmt (bb);
1229 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1230 return;
1231 if (!is_comparison_with_loop_invariant_p (stmt, loop, &compare_var,
1232 &compare_code,
1233 &compare_step_var,
1234 &compare_base))
1235 return;
1236
1237 /* Find the taken edge. */
1238 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1239 if (then_edge->flags & EDGE_TRUE_VALUE)
1240 break;
1241
1242 /* When comparing an IV to a loop invariant, NE is more likely to be
1243 taken while EQ is more likely to be not-taken. */
1244 if (compare_code == NE_EXPR)
1245 {
1246 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1247 return;
1248 }
1249 else if (compare_code == EQ_EXPR)
1250 {
1251 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1252 return;
1253 }
1254
1255 if (!expr_coherent_p (loop_iv_base_var, compare_base))
1256 return;
1257
1258 /* If loop bound, base and compare bound are all constants, we can
1259 calculate the probability directly. */
1260 if (host_integerp (loop_bound_var, 0)
1261 && host_integerp (compare_var, 0)
1262 && host_integerp (compare_base, 0))
1263 {
1264 int probability;
1265 bool of, overflow = false;
1266 double_int mod, compare_count, tem, loop_count;
1267
1268 double_int loop_bound = tree_to_double_int (loop_bound_var);
1269 double_int compare_bound = tree_to_double_int (compare_var);
1270 double_int base = tree_to_double_int (compare_base);
1271 double_int compare_step = tree_to_double_int (compare_step_var);
1272
1273 /* (loop_bound - base) / compare_step */
1274 tem = loop_bound.sub_with_overflow (base, &of);
1275 overflow |= of;
1276 loop_count = tem.divmod_with_overflow (compare_step,
1277 0, TRUNC_DIV_EXPR,
1278 &mod, &of);
1279 overflow |= of;
1280
1281 if ((!compare_step.is_negative ())
1282 ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1283 {
1284 /* (loop_bound - compare_bound) / compare_step */
1285 tem = loop_bound.sub_with_overflow (compare_bound, &of);
1286 overflow |= of;
1287 compare_count = tem.divmod_with_overflow (compare_step,
1288 0, TRUNC_DIV_EXPR,
1289 &mod, &of);
1290 overflow |= of;
1291 }
1292 else
1293 {
1294 /* (compare_bound - base) / compare_step */
1295 tem = compare_bound.sub_with_overflow (base, &of);
1296 overflow |= of;
1297 compare_count = tem.divmod_with_overflow (compare_step,
1298 0, TRUNC_DIV_EXPR,
1299 &mod, &of);
1300 overflow |= of;
1301 }
1302 if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1303 ++compare_count;
1304 if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1305 ++loop_count;
1306 if (compare_count.is_negative ())
1307 compare_count = double_int_zero;
1308 if (loop_count.is_negative ())
1309 loop_count = double_int_zero;
1310 if (loop_count.is_zero ())
1311 probability = 0;
1312 else if (compare_count.scmp (loop_count) == 1)
1313 probability = REG_BR_PROB_BASE;
1314 else
1315 {
1316 /* If loop_count is too big, such that REG_BR_PROB_BASE * loop_count
1317 could overflow, shift both loop_count and compare_count right
1318 a bit so that it doesn't overflow. Note both counts are known not
1319 to be negative at this point. */
1320 int clz_bits = clz_hwi (loop_count.high);
1321 gcc_assert (REG_BR_PROB_BASE < 32768);
1322 if (clz_bits < 16)
1323 {
1324 loop_count.arshift (16 - clz_bits, HOST_BITS_PER_DOUBLE_INT);
1325 compare_count.arshift (16 - clz_bits, HOST_BITS_PER_DOUBLE_INT);
1326 }
1327 tem = compare_count.mul_with_sign (double_int::from_shwi
1328 (REG_BR_PROB_BASE), true, &of);
1329 gcc_assert (!of);
1330 tem = tem.divmod (loop_count, true, TRUNC_DIV_EXPR, &mod);
1331 probability = tem.to_uhwi ();
1332 }
1333
1334 if (!overflow)
1335 predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1336
1337 return;
1338 }
1339
1340 if (expr_coherent_p (loop_bound_var, compare_var))
1341 {
1342 if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1343 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1344 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1345 else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1346 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1347 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1348 else if (loop_bound_code == NE_EXPR)
1349 {
1350 /* If the loop backedge condition is "(i != bound)", we do
1351 the comparison based on the step of IV:
1352 * step < 0 : backedge condition is like (i > bound)
1353 * step > 0 : backedge condition is like (i < bound) */
1354 gcc_assert (loop_bound_step != 0);
1355 if (loop_bound_step > 0
1356 && (compare_code == LT_EXPR
1357 || compare_code == LE_EXPR))
1358 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1359 else if (loop_bound_step < 0
1360 && (compare_code == GT_EXPR
1361 || compare_code == GE_EXPR))
1362 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1363 else
1364 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1365 }
1366 else
1367 /* The branch is predicted not-taken if loop_bound_code is
1368 opposite with compare_code. */
1369 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1370 }
1371 else if (expr_coherent_p (loop_iv_base_var, compare_var))
1372 {
1373 /* For cases like:
1374 for (i = s; i < h; i++)
1375 if (i > s + 2) ....
1376 The branch should be predicted taken. */
1377 if (loop_bound_step > 0
1378 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1379 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1380 else if (loop_bound_step < 0
1381 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1382 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1383 else
1384 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1385 }
1386 }
1387
1388 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1389 exits are resulted from short-circuit conditions that will generate an
1390 if_tmp. E.g.:
1391
1392 if (foo() || global > 10)
1393 break;
1394
1395 This will be translated into:
1396
1397 BB3:
1398 loop header...
1399 BB4:
1400 if foo() goto BB6 else goto BB5
1401 BB5:
1402 if global > 10 goto BB6 else goto BB7
1403 BB6:
1404 goto BB7
1405 BB7:
1406 iftmp = (PHI 0(BB5), 1(BB6))
1407 if iftmp == 1 goto BB8 else goto BB3
1408 BB8:
1409 outside of the loop...
1410
1411 The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1412 From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1413 exits. This function takes BB7->BB8 as input, and finds out the extra loop
1414 exits to predict them using PRED_LOOP_EXIT. */
1415
1416 static void
1417 predict_extra_loop_exits (edge exit_edge)
1418 {
1419 unsigned i;
1420 bool check_value_one;
1421 gimple phi_stmt;
1422 tree cmp_rhs, cmp_lhs;
1423 gimple cmp_stmt = last_stmt (exit_edge->src);
1424
1425 if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1426 return;
1427 cmp_rhs = gimple_cond_rhs (cmp_stmt);
1428 cmp_lhs = gimple_cond_lhs (cmp_stmt);
1429 if (!TREE_CONSTANT (cmp_rhs)
1430 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1431 return;
1432 if (TREE_CODE (cmp_lhs) != SSA_NAME)
1433 return;
1434
1435 /* If check_value_one is true, only the phi_args with value '1' will lead
1436 to loop exit. Otherwise, only the phi_args with value '0' will lead to
1437 loop exit. */
1438 check_value_one = (((integer_onep (cmp_rhs))
1439 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1440 ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1441
1442 phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1443 if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
1444 return;
1445
1446 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1447 {
1448 edge e1;
1449 edge_iterator ei;
1450 tree val = gimple_phi_arg_def (phi_stmt, i);
1451 edge e = gimple_phi_arg_edge (phi_stmt, i);
1452
1453 if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1454 continue;
1455 if ((check_value_one ^ integer_onep (val)) == 1)
1456 continue;
1457 if (EDGE_COUNT (e->src->succs) != 1)
1458 {
1459 predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1460 continue;
1461 }
1462
1463 FOR_EACH_EDGE (e1, ei, e->src->preds)
1464 predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1465 }
1466 }
1467
1468 /* Predict edge probabilities by exploiting loop structure. */
1469
1470 static void
1471 predict_loops (void)
1472 {
1473 loop_iterator li;
1474 struct loop *loop;
1475
1476 /* Try to predict out blocks in a loop that are not part of a
1477 natural loop. */
1478 FOR_EACH_LOOP (li, loop, 0)
1479 {
1480 basic_block bb, *bbs;
1481 unsigned j, n_exits;
1482 vec<edge> exits;
1483 struct tree_niter_desc niter_desc;
1484 edge ex;
1485 struct nb_iter_bound *nb_iter;
1486 enum tree_code loop_bound_code = ERROR_MARK;
1487 tree loop_bound_step = NULL;
1488 tree loop_bound_var = NULL;
1489 tree loop_iv_base = NULL;
1490 gimple stmt = NULL;
1491
1492 exits = get_loop_exit_edges (loop);
1493 n_exits = exits.length ();
1494 if (!n_exits)
1495 {
1496 exits.release ();
1497 continue;
1498 }
1499
1500 FOR_EACH_VEC_ELT (exits, j, ex)
1501 {
1502 tree niter = NULL;
1503 HOST_WIDE_INT nitercst;
1504 int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1505 int probability;
1506 enum br_predictor predictor;
1507
1508 predict_extra_loop_exits (ex);
1509
1510 if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1511 niter = niter_desc.niter;
1512 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1513 niter = loop_niter_by_eval (loop, ex);
1514
1515 if (TREE_CODE (niter) == INTEGER_CST)
1516 {
1517 if (host_integerp (niter, 1)
1518 && max
1519 && compare_tree_int (niter, max - 1) == -1)
1520 nitercst = tree_low_cst (niter, 1) + 1;
1521 else
1522 nitercst = max;
1523 predictor = PRED_LOOP_ITERATIONS;
1524 }
1525 /* If we have just one exit and we can derive some information about
1526 the number of iterations of the loop from the statements inside
1527 the loop, use it to predict this exit. */
1528 else if (n_exits == 1)
1529 {
1530 nitercst = estimated_stmt_executions_int (loop);
1531 if (nitercst < 0)
1532 continue;
1533 if (nitercst > max)
1534 nitercst = max;
1535
1536 predictor = PRED_LOOP_ITERATIONS_GUESSED;
1537 }
1538 else
1539 continue;
1540
1541 /* If the prediction for number of iterations is zero, do not
1542 predict the exit edges. */
1543 if (nitercst == 0)
1544 continue;
1545
1546 probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1547 predict_edge (ex, predictor, probability);
1548 }
1549 exits.release ();
1550
1551 /* Find information about loop bound variables. */
1552 for (nb_iter = loop->bounds; nb_iter;
1553 nb_iter = nb_iter->next)
1554 if (nb_iter->stmt
1555 && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1556 {
1557 stmt = nb_iter->stmt;
1558 break;
1559 }
1560 if (!stmt && last_stmt (loop->header)
1561 && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1562 stmt = last_stmt (loop->header);
1563 if (stmt)
1564 is_comparison_with_loop_invariant_p (stmt, loop,
1565 &loop_bound_var,
1566 &loop_bound_code,
1567 &loop_bound_step,
1568 &loop_iv_base);
1569
1570 bbs = get_loop_body (loop);
1571
1572 for (j = 0; j < loop->num_nodes; j++)
1573 {
1574 int header_found = 0;
1575 edge e;
1576 edge_iterator ei;
1577
1578 bb = bbs[j];
1579
1580 /* Bypass loop heuristics on continue statement. These
1581 statements construct loops via "non-loop" constructs
1582 in the source language and are better to be handled
1583 separately. */
1584 if (predicted_by_p (bb, PRED_CONTINUE))
1585 continue;
1586
1587 /* Loop branch heuristics - predict an edge back to a
1588 loop's head as taken. */
1589 if (bb == loop->latch)
1590 {
1591 e = find_edge (loop->latch, loop->header);
1592 if (e)
1593 {
1594 header_found = 1;
1595 predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1596 }
1597 }
1598
1599 /* Loop exit heuristics - predict an edge exiting the loop if the
1600 conditional has no loop header successors as not taken. */
1601 if (!header_found
1602 /* If we already used more reliable loop exit predictors, do not
1603 bother with PRED_LOOP_EXIT. */
1604 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1605 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1606 {
1607 /* For loop with many exits we don't want to predict all exits
1608 with the pretty large probability, because if all exits are
1609 considered in row, the loop would be predicted to iterate
1610 almost never. The code to divide probability by number of
1611 exits is very rough. It should compute the number of exits
1612 taken in each patch through function (not the overall number
1613 of exits that might be a lot higher for loops with wide switch
1614 statements in them) and compute n-th square root.
1615
1616 We limit the minimal probability by 2% to avoid
1617 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1618 as this was causing regression in perl benchmark containing such
1619 a wide loop. */
1620
1621 int probability = ((REG_BR_PROB_BASE
1622 - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1623 / n_exits);
1624 if (probability < HITRATE (2))
1625 probability = HITRATE (2);
1626 FOR_EACH_EDGE (e, ei, bb->succs)
1627 if (e->dest->index < NUM_FIXED_BLOCKS
1628 || !flow_bb_inside_loop_p (loop, e->dest))
1629 predict_edge (e, PRED_LOOP_EXIT, probability);
1630 }
1631 if (loop_bound_var)
1632 predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1633 loop_bound_code,
1634 tree_low_cst (loop_bound_step, 0));
1635 }
1636
1637 /* Free basic blocks from get_loop_body. */
1638 free (bbs);
1639 }
1640 }
1641
1642 /* Attempt to predict probabilities of BB outgoing edges using local
1643 properties. */
1644 static void
1645 bb_estimate_probability_locally (basic_block bb)
1646 {
1647 rtx last_insn = BB_END (bb);
1648 rtx cond;
1649
1650 if (! can_predict_insn_p (last_insn))
1651 return;
1652 cond = get_condition (last_insn, NULL, false, false);
1653 if (! cond)
1654 return;
1655
1656 /* Try "pointer heuristic."
1657 A comparison ptr == 0 is predicted as false.
1658 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1659 if (COMPARISON_P (cond)
1660 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1661 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1662 {
1663 if (GET_CODE (cond) == EQ)
1664 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1665 else if (GET_CODE (cond) == NE)
1666 predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1667 }
1668 else
1669
1670 /* Try "opcode heuristic."
1671 EQ tests are usually false and NE tests are usually true. Also,
1672 most quantities are positive, so we can make the appropriate guesses
1673 about signed comparisons against zero. */
1674 switch (GET_CODE (cond))
1675 {
1676 case CONST_INT:
1677 /* Unconditional branch. */
1678 predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1679 cond == const0_rtx ? NOT_TAKEN : TAKEN);
1680 break;
1681
1682 case EQ:
1683 case UNEQ:
1684 /* Floating point comparisons appears to behave in a very
1685 unpredictable way because of special role of = tests in
1686 FP code. */
1687 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1688 ;
1689 /* Comparisons with 0 are often used for booleans and there is
1690 nothing useful to predict about them. */
1691 else if (XEXP (cond, 1) == const0_rtx
1692 || XEXP (cond, 0) == const0_rtx)
1693 ;
1694 else
1695 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1696 break;
1697
1698 case NE:
1699 case LTGT:
1700 /* Floating point comparisons appears to behave in a very
1701 unpredictable way because of special role of = tests in
1702 FP code. */
1703 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1704 ;
1705 /* Comparisons with 0 are often used for booleans and there is
1706 nothing useful to predict about them. */
1707 else if (XEXP (cond, 1) == const0_rtx
1708 || XEXP (cond, 0) == const0_rtx)
1709 ;
1710 else
1711 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1712 break;
1713
1714 case ORDERED:
1715 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1716 break;
1717
1718 case UNORDERED:
1719 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1720 break;
1721
1722 case LE:
1723 case LT:
1724 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1725 || XEXP (cond, 1) == constm1_rtx)
1726 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1727 break;
1728
1729 case GE:
1730 case GT:
1731 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1732 || XEXP (cond, 1) == constm1_rtx)
1733 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1734 break;
1735
1736 default:
1737 break;
1738 }
1739 }
1740
1741 /* Set edge->probability for each successor edge of BB. */
1742 void
1743 guess_outgoing_edge_probabilities (basic_block bb)
1744 {
1745 bb_estimate_probability_locally (bb);
1746 combine_predictions_for_insn (BB_END (bb), bb);
1747 }
1748 \f
1749 static tree expr_expected_value (tree, bitmap);
1750
1751 /* Helper function for expr_expected_value. */
1752
1753 static tree
1754 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1755 tree op1, bitmap visited)
1756 {
1757 gimple def;
1758
1759 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1760 {
1761 if (TREE_CONSTANT (op0))
1762 return op0;
1763
1764 if (code != SSA_NAME)
1765 return NULL_TREE;
1766
1767 def = SSA_NAME_DEF_STMT (op0);
1768
1769 /* If we were already here, break the infinite cycle. */
1770 if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1771 return NULL;
1772
1773 if (gimple_code (def) == GIMPLE_PHI)
1774 {
1775 /* All the arguments of the PHI node must have the same constant
1776 length. */
1777 int i, n = gimple_phi_num_args (def);
1778 tree val = NULL, new_val;
1779
1780 for (i = 0; i < n; i++)
1781 {
1782 tree arg = PHI_ARG_DEF (def, i);
1783
1784 /* If this PHI has itself as an argument, we cannot
1785 determine the string length of this argument. However,
1786 if we can find an expected constant value for the other
1787 PHI args then we can still be sure that this is
1788 likely a constant. So be optimistic and just
1789 continue with the next argument. */
1790 if (arg == PHI_RESULT (def))
1791 continue;
1792
1793 new_val = expr_expected_value (arg, visited);
1794 if (!new_val)
1795 return NULL;
1796 if (!val)
1797 val = new_val;
1798 else if (!operand_equal_p (val, new_val, false))
1799 return NULL;
1800 }
1801 return val;
1802 }
1803 if (is_gimple_assign (def))
1804 {
1805 if (gimple_assign_lhs (def) != op0)
1806 return NULL;
1807
1808 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1809 gimple_assign_rhs1 (def),
1810 gimple_assign_rhs_code (def),
1811 gimple_assign_rhs2 (def),
1812 visited);
1813 }
1814
1815 if (is_gimple_call (def))
1816 {
1817 tree decl = gimple_call_fndecl (def);
1818 if (!decl)
1819 return NULL;
1820 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1821 switch (DECL_FUNCTION_CODE (decl))
1822 {
1823 case BUILT_IN_EXPECT:
1824 {
1825 tree val;
1826 if (gimple_call_num_args (def) != 2)
1827 return NULL;
1828 val = gimple_call_arg (def, 0);
1829 if (TREE_CONSTANT (val))
1830 return val;
1831 return gimple_call_arg (def, 1);
1832 }
1833
1834 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1835 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1836 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1837 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1838 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1839 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1840 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1841 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1842 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1843 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1844 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1845 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1846 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1847 /* Assume that any given atomic operation has low contention,
1848 and thus the compare-and-swap operation succeeds. */
1849 return boolean_true_node;
1850 }
1851 }
1852
1853 return NULL;
1854 }
1855
1856 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1857 {
1858 tree res;
1859 op0 = expr_expected_value (op0, visited);
1860 if (!op0)
1861 return NULL;
1862 op1 = expr_expected_value (op1, visited);
1863 if (!op1)
1864 return NULL;
1865 res = fold_build2 (code, type, op0, op1);
1866 if (TREE_CONSTANT (res))
1867 return res;
1868 return NULL;
1869 }
1870 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1871 {
1872 tree res;
1873 op0 = expr_expected_value (op0, visited);
1874 if (!op0)
1875 return NULL;
1876 res = fold_build1 (code, type, op0);
1877 if (TREE_CONSTANT (res))
1878 return res;
1879 return NULL;
1880 }
1881 return NULL;
1882 }
1883
1884 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1885 The function is used by builtin_expect branch predictor so the evidence
1886 must come from this construct and additional possible constant folding.
1887
1888 We may want to implement more involved value guess (such as value range
1889 propagation based prediction), but such tricks shall go to new
1890 implementation. */
1891
1892 static tree
1893 expr_expected_value (tree expr, bitmap visited)
1894 {
1895 enum tree_code code;
1896 tree op0, op1;
1897
1898 if (TREE_CONSTANT (expr))
1899 return expr;
1900
1901 extract_ops_from_tree (expr, &code, &op0, &op1);
1902 return expr_expected_value_1 (TREE_TYPE (expr),
1903 op0, code, op1, visited);
1904 }
1905
1906 \f
1907 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
1908 we no longer need. */
1909 static unsigned int
1910 strip_predict_hints (void)
1911 {
1912 basic_block bb;
1913 gimple ass_stmt;
1914 tree var;
1915
1916 FOR_EACH_BB (bb)
1917 {
1918 gimple_stmt_iterator bi;
1919 for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
1920 {
1921 gimple stmt = gsi_stmt (bi);
1922
1923 if (gimple_code (stmt) == GIMPLE_PREDICT)
1924 {
1925 gsi_remove (&bi, true);
1926 continue;
1927 }
1928 else if (gimple_code (stmt) == GIMPLE_CALL)
1929 {
1930 tree fndecl = gimple_call_fndecl (stmt);
1931
1932 if (fndecl
1933 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
1934 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
1935 && gimple_call_num_args (stmt) == 2)
1936 {
1937 var = gimple_call_lhs (stmt);
1938 if (var)
1939 {
1940 ass_stmt
1941 = gimple_build_assign (var, gimple_call_arg (stmt, 0));
1942 gsi_replace (&bi, ass_stmt, true);
1943 }
1944 else
1945 {
1946 gsi_remove (&bi, true);
1947 continue;
1948 }
1949 }
1950 }
1951 gsi_next (&bi);
1952 }
1953 }
1954 return 0;
1955 }
1956 \f
1957 /* Predict using opcode of the last statement in basic block. */
1958 static void
1959 tree_predict_by_opcode (basic_block bb)
1960 {
1961 gimple stmt = last_stmt (bb);
1962 edge then_edge;
1963 tree op0, op1;
1964 tree type;
1965 tree val;
1966 enum tree_code cmp;
1967 bitmap visited;
1968 edge_iterator ei;
1969
1970 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1971 return;
1972 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1973 if (then_edge->flags & EDGE_TRUE_VALUE)
1974 break;
1975 op0 = gimple_cond_lhs (stmt);
1976 op1 = gimple_cond_rhs (stmt);
1977 cmp = gimple_cond_code (stmt);
1978 type = TREE_TYPE (op0);
1979 visited = BITMAP_ALLOC (NULL);
1980 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited);
1981 BITMAP_FREE (visited);
1982 if (val)
1983 {
1984 if (integer_zerop (val))
1985 predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, NOT_TAKEN);
1986 else
1987 predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, TAKEN);
1988 return;
1989 }
1990 /* Try "pointer heuristic."
1991 A comparison ptr == 0 is predicted as false.
1992 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1993 if (POINTER_TYPE_P (type))
1994 {
1995 if (cmp == EQ_EXPR)
1996 predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
1997 else if (cmp == NE_EXPR)
1998 predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
1999 }
2000 else
2001
2002 /* Try "opcode heuristic."
2003 EQ tests are usually false and NE tests are usually true. Also,
2004 most quantities are positive, so we can make the appropriate guesses
2005 about signed comparisons against zero. */
2006 switch (cmp)
2007 {
2008 case EQ_EXPR:
2009 case UNEQ_EXPR:
2010 /* Floating point comparisons appears to behave in a very
2011 unpredictable way because of special role of = tests in
2012 FP code. */
2013 if (FLOAT_TYPE_P (type))
2014 ;
2015 /* Comparisons with 0 are often used for booleans and there is
2016 nothing useful to predict about them. */
2017 else if (integer_zerop (op0) || integer_zerop (op1))
2018 ;
2019 else
2020 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2021 break;
2022
2023 case NE_EXPR:
2024 case LTGT_EXPR:
2025 /* Floating point comparisons appears to behave in a very
2026 unpredictable way because of special role of = tests in
2027 FP code. */
2028 if (FLOAT_TYPE_P (type))
2029 ;
2030 /* Comparisons with 0 are often used for booleans and there is
2031 nothing useful to predict about them. */
2032 else if (integer_zerop (op0)
2033 || integer_zerop (op1))
2034 ;
2035 else
2036 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2037 break;
2038
2039 case ORDERED_EXPR:
2040 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2041 break;
2042
2043 case UNORDERED_EXPR:
2044 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2045 break;
2046
2047 case LE_EXPR:
2048 case LT_EXPR:
2049 if (integer_zerop (op1)
2050 || integer_onep (op1)
2051 || integer_all_onesp (op1)
2052 || real_zerop (op1)
2053 || real_onep (op1)
2054 || real_minus_onep (op1))
2055 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2056 break;
2057
2058 case GE_EXPR:
2059 case GT_EXPR:
2060 if (integer_zerop (op1)
2061 || integer_onep (op1)
2062 || integer_all_onesp (op1)
2063 || real_zerop (op1)
2064 || real_onep (op1)
2065 || real_minus_onep (op1))
2066 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2067 break;
2068
2069 default:
2070 break;
2071 }
2072 }
2073
2074 /* Try to guess whether the value of return means error code. */
2075
2076 static enum br_predictor
2077 return_prediction (tree val, enum prediction *prediction)
2078 {
2079 /* VOID. */
2080 if (!val)
2081 return PRED_NO_PREDICTION;
2082 /* Different heuristics for pointers and scalars. */
2083 if (POINTER_TYPE_P (TREE_TYPE (val)))
2084 {
2085 /* NULL is usually not returned. */
2086 if (integer_zerop (val))
2087 {
2088 *prediction = NOT_TAKEN;
2089 return PRED_NULL_RETURN;
2090 }
2091 }
2092 else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2093 {
2094 /* Negative return values are often used to indicate
2095 errors. */
2096 if (TREE_CODE (val) == INTEGER_CST
2097 && tree_int_cst_sgn (val) < 0)
2098 {
2099 *prediction = NOT_TAKEN;
2100 return PRED_NEGATIVE_RETURN;
2101 }
2102 /* Constant return values seems to be commonly taken.
2103 Zero/one often represent booleans so exclude them from the
2104 heuristics. */
2105 if (TREE_CONSTANT (val)
2106 && (!integer_zerop (val) && !integer_onep (val)))
2107 {
2108 *prediction = TAKEN;
2109 return PRED_CONST_RETURN;
2110 }
2111 }
2112 return PRED_NO_PREDICTION;
2113 }
2114
2115 /* Find the basic block with return expression and look up for possible
2116 return value trying to apply RETURN_PREDICTION heuristics. */
2117 static void
2118 apply_return_prediction (void)
2119 {
2120 gimple return_stmt = NULL;
2121 tree return_val;
2122 edge e;
2123 gimple phi;
2124 int phi_num_args, i;
2125 enum br_predictor pred;
2126 enum prediction direction;
2127 edge_iterator ei;
2128
2129 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2130 {
2131 return_stmt = last_stmt (e->src);
2132 if (return_stmt
2133 && gimple_code (return_stmt) == GIMPLE_RETURN)
2134 break;
2135 }
2136 if (!e)
2137 return;
2138 return_val = gimple_return_retval (return_stmt);
2139 if (!return_val)
2140 return;
2141 if (TREE_CODE (return_val) != SSA_NAME
2142 || !SSA_NAME_DEF_STMT (return_val)
2143 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2144 return;
2145 phi = SSA_NAME_DEF_STMT (return_val);
2146 phi_num_args = gimple_phi_num_args (phi);
2147 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2148
2149 /* Avoid the degenerate case where all return values form the function
2150 belongs to same category (ie they are all positive constants)
2151 so we can hardly say something about them. */
2152 for (i = 1; i < phi_num_args; i++)
2153 if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2154 break;
2155 if (i != phi_num_args)
2156 for (i = 0; i < phi_num_args; i++)
2157 {
2158 pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2159 if (pred != PRED_NO_PREDICTION)
2160 predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2161 direction);
2162 }
2163 }
2164
2165 /* Look for basic block that contains unlikely to happen events
2166 (such as noreturn calls) and mark all paths leading to execution
2167 of this basic blocks as unlikely. */
2168
2169 static void
2170 tree_bb_level_predictions (void)
2171 {
2172 basic_block bb;
2173 bool has_return_edges = false;
2174 edge e;
2175 edge_iterator ei;
2176
2177 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2178 if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2179 {
2180 has_return_edges = true;
2181 break;
2182 }
2183
2184 apply_return_prediction ();
2185
2186 FOR_EACH_BB (bb)
2187 {
2188 gimple_stmt_iterator gsi;
2189
2190 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2191 {
2192 gimple stmt = gsi_stmt (gsi);
2193 tree decl;
2194
2195 if (is_gimple_call (stmt))
2196 {
2197 if ((gimple_call_flags (stmt) & ECF_NORETURN)
2198 && has_return_edges)
2199 predict_paths_leading_to (bb, PRED_NORETURN,
2200 NOT_TAKEN);
2201 decl = gimple_call_fndecl (stmt);
2202 if (decl
2203 && lookup_attribute ("cold",
2204 DECL_ATTRIBUTES (decl)))
2205 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2206 NOT_TAKEN);
2207 }
2208 else if (gimple_code (stmt) == GIMPLE_PREDICT)
2209 {
2210 predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2211 gimple_predict_outcome (stmt));
2212 /* Keep GIMPLE_PREDICT around so early inlining will propagate
2213 hints to callers. */
2214 }
2215 }
2216 }
2217 }
2218
2219 #ifdef ENABLE_CHECKING
2220
2221 /* Callback for pointer_map_traverse, asserts that the pointer map is
2222 empty. */
2223
2224 static bool
2225 assert_is_empty (const void *key ATTRIBUTE_UNUSED, void **value,
2226 void *data ATTRIBUTE_UNUSED)
2227 {
2228 gcc_assert (!*value);
2229 return false;
2230 }
2231 #endif
2232
2233 /* Predict branch probabilities and estimate profile for basic block BB. */
2234
2235 static void
2236 tree_estimate_probability_bb (basic_block bb)
2237 {
2238 edge e;
2239 edge_iterator ei;
2240 gimple last;
2241
2242 FOR_EACH_EDGE (e, ei, bb->succs)
2243 {
2244 /* Predict edges to user labels with attributes. */
2245 if (e->dest != EXIT_BLOCK_PTR)
2246 {
2247 gimple_stmt_iterator gi;
2248 for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2249 {
2250 gimple stmt = gsi_stmt (gi);
2251 tree decl;
2252
2253 if (gimple_code (stmt) != GIMPLE_LABEL)
2254 break;
2255 decl = gimple_label_label (stmt);
2256 if (DECL_ARTIFICIAL (decl))
2257 continue;
2258
2259 /* Finally, we have a user-defined label. */
2260 if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2261 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2262 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2263 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2264 }
2265 }
2266
2267 /* Predict early returns to be probable, as we've already taken
2268 care for error returns and other cases are often used for
2269 fast paths through function.
2270
2271 Since we've already removed the return statements, we are
2272 looking for CFG like:
2273
2274 if (conditional)
2275 {
2276 ..
2277 goto return_block
2278 }
2279 some other blocks
2280 return_block:
2281 return_stmt. */
2282 if (e->dest != bb->next_bb
2283 && e->dest != EXIT_BLOCK_PTR
2284 && single_succ_p (e->dest)
2285 && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR
2286 && (last = last_stmt (e->dest)) != NULL
2287 && gimple_code (last) == GIMPLE_RETURN)
2288 {
2289 edge e1;
2290 edge_iterator ei1;
2291
2292 if (single_succ_p (bb))
2293 {
2294 FOR_EACH_EDGE (e1, ei1, bb->preds)
2295 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2296 && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2297 && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2298 predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2299 }
2300 else
2301 if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2302 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2303 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2304 predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2305 }
2306
2307 /* Look for block we are guarding (ie we dominate it,
2308 but it doesn't postdominate us). */
2309 if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
2310 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2311 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2312 {
2313 gimple_stmt_iterator bi;
2314
2315 /* The call heuristic claims that a guarded function call
2316 is improbable. This is because such calls are often used
2317 to signal exceptional situations such as printing error
2318 messages. */
2319 for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2320 gsi_next (&bi))
2321 {
2322 gimple stmt = gsi_stmt (bi);
2323 if (is_gimple_call (stmt)
2324 /* Constant and pure calls are hardly used to signalize
2325 something exceptional. */
2326 && gimple_has_side_effects (stmt))
2327 {
2328 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2329 break;
2330 }
2331 }
2332 }
2333 }
2334 tree_predict_by_opcode (bb);
2335 }
2336
2337 /* Predict branch probabilities and estimate profile of the tree CFG.
2338 This function can be called from the loop optimizers to recompute
2339 the profile information. */
2340
2341 void
2342 tree_estimate_probability (void)
2343 {
2344 basic_block bb;
2345
2346 add_noreturn_fake_exit_edges ();
2347 connect_infinite_loops_to_exit ();
2348 /* We use loop_niter_by_eval, which requires that the loops have
2349 preheaders. */
2350 create_preheaders (CP_SIMPLE_PREHEADERS);
2351 calculate_dominance_info (CDI_POST_DOMINATORS);
2352
2353 bb_predictions = pointer_map_create ();
2354 tree_bb_level_predictions ();
2355 record_loop_exits ();
2356
2357 if (number_of_loops (cfun) > 1)
2358 predict_loops ();
2359
2360 FOR_EACH_BB (bb)
2361 tree_estimate_probability_bb (bb);
2362
2363 FOR_EACH_BB (bb)
2364 combine_predictions_for_bb (bb);
2365
2366 #ifdef ENABLE_CHECKING
2367 pointer_map_traverse (bb_predictions, assert_is_empty, NULL);
2368 #endif
2369 pointer_map_destroy (bb_predictions);
2370 bb_predictions = NULL;
2371
2372 estimate_bb_frequencies ();
2373 free_dominance_info (CDI_POST_DOMINATORS);
2374 remove_fake_exit_edges ();
2375 }
2376
2377 /* Predict branch probabilities and estimate profile of the tree CFG.
2378 This is the driver function for PASS_PROFILE. */
2379
2380 static unsigned int
2381 tree_estimate_probability_driver (void)
2382 {
2383 unsigned nb_loops;
2384
2385 loop_optimizer_init (LOOPS_NORMAL);
2386 if (dump_file && (dump_flags & TDF_DETAILS))
2387 flow_loops_dump (dump_file, NULL, 0);
2388
2389 mark_irreducible_loops ();
2390
2391 nb_loops = number_of_loops (cfun);
2392 if (nb_loops > 1)
2393 scev_initialize ();
2394
2395 tree_estimate_probability ();
2396
2397 if (nb_loops > 1)
2398 scev_finalize ();
2399
2400 loop_optimizer_finalize ();
2401 if (dump_file && (dump_flags & TDF_DETAILS))
2402 gimple_dump_cfg (dump_file, dump_flags);
2403 if (profile_status == PROFILE_ABSENT)
2404 profile_status = PROFILE_GUESSED;
2405 return 0;
2406 }
2407 \f
2408 /* Predict edges to successors of CUR whose sources are not postdominated by
2409 BB by PRED and recurse to all postdominators. */
2410
2411 static void
2412 predict_paths_for_bb (basic_block cur, basic_block bb,
2413 enum br_predictor pred,
2414 enum prediction taken,
2415 bitmap visited)
2416 {
2417 edge e;
2418 edge_iterator ei;
2419 basic_block son;
2420
2421 /* We are looking for all edges forming edge cut induced by
2422 set of all blocks postdominated by BB. */
2423 FOR_EACH_EDGE (e, ei, cur->preds)
2424 if (e->src->index >= NUM_FIXED_BLOCKS
2425 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2426 {
2427 edge e2;
2428 edge_iterator ei2;
2429 bool found = false;
2430
2431 /* Ignore fake edges and eh, we predict them as not taken anyway. */
2432 if (e->flags & (EDGE_EH | EDGE_FAKE))
2433 continue;
2434 gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2435
2436 /* See if there is an edge from e->src that is not abnormal
2437 and does not lead to BB. */
2438 FOR_EACH_EDGE (e2, ei2, e->src->succs)
2439 if (e2 != e
2440 && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2441 && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2442 {
2443 found = true;
2444 break;
2445 }
2446
2447 /* If there is non-abnormal path leaving e->src, predict edge
2448 using predictor. Otherwise we need to look for paths
2449 leading to e->src.
2450
2451 The second may lead to infinite loop in the case we are predicitng
2452 regions that are only reachable by abnormal edges. We simply
2453 prevent visiting given BB twice. */
2454 if (found)
2455 predict_edge_def (e, pred, taken);
2456 else if (bitmap_set_bit (visited, e->src->index))
2457 predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2458 }
2459 for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2460 son;
2461 son = next_dom_son (CDI_POST_DOMINATORS, son))
2462 predict_paths_for_bb (son, bb, pred, taken, visited);
2463 }
2464
2465 /* Sets branch probabilities according to PREDiction and
2466 FLAGS. */
2467
2468 static void
2469 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2470 enum prediction taken)
2471 {
2472 bitmap visited = BITMAP_ALLOC (NULL);
2473 predict_paths_for_bb (bb, bb, pred, taken, visited);
2474 BITMAP_FREE (visited);
2475 }
2476
2477 /* Like predict_paths_leading_to but take edge instead of basic block. */
2478
2479 static void
2480 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2481 enum prediction taken)
2482 {
2483 bool has_nonloop_edge = false;
2484 edge_iterator ei;
2485 edge e2;
2486
2487 basic_block bb = e->src;
2488 FOR_EACH_EDGE (e2, ei, bb->succs)
2489 if (e2->dest != e->src && e2->dest != e->dest
2490 && !(e->flags & (EDGE_EH | EDGE_FAKE))
2491 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2492 {
2493 has_nonloop_edge = true;
2494 break;
2495 }
2496 if (!has_nonloop_edge)
2497 {
2498 bitmap visited = BITMAP_ALLOC (NULL);
2499 predict_paths_for_bb (bb, bb, pred, taken, visited);
2500 BITMAP_FREE (visited);
2501 }
2502 else
2503 predict_edge_def (e, pred, taken);
2504 }
2505 \f
2506 /* This is used to carry information about basic blocks. It is
2507 attached to the AUX field of the standard CFG block. */
2508
2509 typedef struct block_info_def
2510 {
2511 /* Estimated frequency of execution of basic_block. */
2512 sreal frequency;
2513
2514 /* To keep queue of basic blocks to process. */
2515 basic_block next;
2516
2517 /* Number of predecessors we need to visit first. */
2518 int npredecessors;
2519 } *block_info;
2520
2521 /* Similar information for edges. */
2522 typedef struct edge_info_def
2523 {
2524 /* In case edge is a loopback edge, the probability edge will be reached
2525 in case header is. Estimated number of iterations of the loop can be
2526 then computed as 1 / (1 - back_edge_prob). */
2527 sreal back_edge_prob;
2528 /* True if the edge is a loopback edge in the natural loop. */
2529 unsigned int back_edge:1;
2530 } *edge_info;
2531
2532 #define BLOCK_INFO(B) ((block_info) (B)->aux)
2533 #define EDGE_INFO(E) ((edge_info) (E)->aux)
2534
2535 /* Helper function for estimate_bb_frequencies.
2536 Propagate the frequencies in blocks marked in
2537 TOVISIT, starting in HEAD. */
2538
2539 static void
2540 propagate_freq (basic_block head, bitmap tovisit)
2541 {
2542 basic_block bb;
2543 basic_block last;
2544 unsigned i;
2545 edge e;
2546 basic_block nextbb;
2547 bitmap_iterator bi;
2548
2549 /* For each basic block we need to visit count number of his predecessors
2550 we need to visit first. */
2551 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2552 {
2553 edge_iterator ei;
2554 int count = 0;
2555
2556 bb = BASIC_BLOCK (i);
2557
2558 FOR_EACH_EDGE (e, ei, bb->preds)
2559 {
2560 bool visit = bitmap_bit_p (tovisit, e->src->index);
2561
2562 if (visit && !(e->flags & EDGE_DFS_BACK))
2563 count++;
2564 else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2565 fprintf (dump_file,
2566 "Irreducible region hit, ignoring edge to %i->%i\n",
2567 e->src->index, bb->index);
2568 }
2569 BLOCK_INFO (bb)->npredecessors = count;
2570 /* When function never returns, we will never process exit block. */
2571 if (!count && bb == EXIT_BLOCK_PTR)
2572 bb->count = bb->frequency = 0;
2573 }
2574
2575 memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
2576 last = head;
2577 for (bb = head; bb; bb = nextbb)
2578 {
2579 edge_iterator ei;
2580 sreal cyclic_probability, frequency;
2581
2582 memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
2583 memcpy (&frequency, &real_zero, sizeof (real_zero));
2584
2585 nextbb = BLOCK_INFO (bb)->next;
2586 BLOCK_INFO (bb)->next = NULL;
2587
2588 /* Compute frequency of basic block. */
2589 if (bb != head)
2590 {
2591 #ifdef ENABLE_CHECKING
2592 FOR_EACH_EDGE (e, ei, bb->preds)
2593 gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2594 || (e->flags & EDGE_DFS_BACK));
2595 #endif
2596
2597 FOR_EACH_EDGE (e, ei, bb->preds)
2598 if (EDGE_INFO (e)->back_edge)
2599 {
2600 sreal_add (&cyclic_probability, &cyclic_probability,
2601 &EDGE_INFO (e)->back_edge_prob);
2602 }
2603 else if (!(e->flags & EDGE_DFS_BACK))
2604 {
2605 sreal tmp;
2606
2607 /* frequency += (e->probability
2608 * BLOCK_INFO (e->src)->frequency /
2609 REG_BR_PROB_BASE); */
2610
2611 sreal_init (&tmp, e->probability, 0);
2612 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
2613 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
2614 sreal_add (&frequency, &frequency, &tmp);
2615 }
2616
2617 if (sreal_compare (&cyclic_probability, &real_zero) == 0)
2618 {
2619 memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
2620 sizeof (frequency));
2621 }
2622 else
2623 {
2624 if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
2625 {
2626 memcpy (&cyclic_probability, &real_almost_one,
2627 sizeof (real_almost_one));
2628 }
2629
2630 /* BLOCK_INFO (bb)->frequency = frequency
2631 / (1 - cyclic_probability) */
2632
2633 sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
2634 sreal_div (&BLOCK_INFO (bb)->frequency,
2635 &frequency, &cyclic_probability);
2636 }
2637 }
2638
2639 bitmap_clear_bit (tovisit, bb->index);
2640
2641 e = find_edge (bb, head);
2642 if (e)
2643 {
2644 sreal tmp;
2645
2646 /* EDGE_INFO (e)->back_edge_prob
2647 = ((e->probability * BLOCK_INFO (bb)->frequency)
2648 / REG_BR_PROB_BASE); */
2649
2650 sreal_init (&tmp, e->probability, 0);
2651 sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
2652 sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2653 &tmp, &real_inv_br_prob_base);
2654 }
2655
2656 /* Propagate to successor blocks. */
2657 FOR_EACH_EDGE (e, ei, bb->succs)
2658 if (!(e->flags & EDGE_DFS_BACK)
2659 && BLOCK_INFO (e->dest)->npredecessors)
2660 {
2661 BLOCK_INFO (e->dest)->npredecessors--;
2662 if (!BLOCK_INFO (e->dest)->npredecessors)
2663 {
2664 if (!nextbb)
2665 nextbb = e->dest;
2666 else
2667 BLOCK_INFO (last)->next = e->dest;
2668
2669 last = e->dest;
2670 }
2671 }
2672 }
2673 }
2674
2675 /* Estimate probabilities of loopback edges in loops at same nest level. */
2676
2677 static void
2678 estimate_loops_at_level (struct loop *first_loop)
2679 {
2680 struct loop *loop;
2681
2682 for (loop = first_loop; loop; loop = loop->next)
2683 {
2684 edge e;
2685 basic_block *bbs;
2686 unsigned i;
2687 bitmap tovisit = BITMAP_ALLOC (NULL);
2688
2689 estimate_loops_at_level (loop->inner);
2690
2691 /* Find current loop back edge and mark it. */
2692 e = loop_latch_edge (loop);
2693 EDGE_INFO (e)->back_edge = 1;
2694
2695 bbs = get_loop_body (loop);
2696 for (i = 0; i < loop->num_nodes; i++)
2697 bitmap_set_bit (tovisit, bbs[i]->index);
2698 free (bbs);
2699 propagate_freq (loop->header, tovisit);
2700 BITMAP_FREE (tovisit);
2701 }
2702 }
2703
2704 /* Propagates frequencies through structure of loops. */
2705
2706 static void
2707 estimate_loops (void)
2708 {
2709 bitmap tovisit = BITMAP_ALLOC (NULL);
2710 basic_block bb;
2711
2712 /* Start by estimating the frequencies in the loops. */
2713 if (number_of_loops (cfun) > 1)
2714 estimate_loops_at_level (current_loops->tree_root->inner);
2715
2716 /* Now propagate the frequencies through all the blocks. */
2717 FOR_ALL_BB (bb)
2718 {
2719 bitmap_set_bit (tovisit, bb->index);
2720 }
2721 propagate_freq (ENTRY_BLOCK_PTR, tovisit);
2722 BITMAP_FREE (tovisit);
2723 }
2724
2725 /* Convert counts measured by profile driven feedback to frequencies.
2726 Return nonzero iff there was any nonzero execution count. */
2727
2728 int
2729 counts_to_freqs (void)
2730 {
2731 gcov_type count_max, true_count_max = 0;
2732 basic_block bb;
2733
2734 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2735 true_count_max = MAX (bb->count, true_count_max);
2736
2737 count_max = MAX (true_count_max, 1);
2738 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2739 bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2740
2741 return true_count_max;
2742 }
2743
2744 /* Return true if function is likely to be expensive, so there is no point to
2745 optimize performance of prologue, epilogue or do inlining at the expense
2746 of code size growth. THRESHOLD is the limit of number of instructions
2747 function can execute at average to be still considered not expensive. */
2748
2749 bool
2750 expensive_function_p (int threshold)
2751 {
2752 unsigned int sum = 0;
2753 basic_block bb;
2754 unsigned int limit;
2755
2756 /* We can not compute accurately for large thresholds due to scaled
2757 frequencies. */
2758 gcc_assert (threshold <= BB_FREQ_MAX);
2759
2760 /* Frequencies are out of range. This either means that function contains
2761 internal loop executing more than BB_FREQ_MAX times or profile feedback
2762 is available and function has not been executed at all. */
2763 if (ENTRY_BLOCK_PTR->frequency == 0)
2764 return true;
2765
2766 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
2767 limit = ENTRY_BLOCK_PTR->frequency * threshold;
2768 FOR_EACH_BB (bb)
2769 {
2770 rtx insn;
2771
2772 FOR_BB_INSNS (bb, insn)
2773 if (active_insn_p (insn))
2774 {
2775 sum += bb->frequency;
2776 if (sum > limit)
2777 return true;
2778 }
2779 }
2780
2781 return false;
2782 }
2783
2784 /* Estimate basic blocks frequency by given branch probabilities. */
2785
2786 void
2787 estimate_bb_frequencies (void)
2788 {
2789 basic_block bb;
2790 sreal freq_max;
2791
2792 if (profile_status != PROFILE_READ || !counts_to_freqs ())
2793 {
2794 static int real_values_initialized = 0;
2795
2796 if (!real_values_initialized)
2797 {
2798 real_values_initialized = 1;
2799 sreal_init (&real_zero, 0, 0);
2800 sreal_init (&real_one, 1, 0);
2801 sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
2802 sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
2803 sreal_init (&real_one_half, 1, -1);
2804 sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
2805 sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
2806 }
2807
2808 mark_dfs_back_edges ();
2809
2810 single_succ_edge (ENTRY_BLOCK_PTR)->probability = REG_BR_PROB_BASE;
2811
2812 /* Set up block info for each basic block. */
2813 alloc_aux_for_blocks (sizeof (struct block_info_def));
2814 alloc_aux_for_edges (sizeof (struct edge_info_def));
2815 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2816 {
2817 edge e;
2818 edge_iterator ei;
2819
2820 FOR_EACH_EDGE (e, ei, bb->succs)
2821 {
2822 sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
2823 sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2824 &EDGE_INFO (e)->back_edge_prob,
2825 &real_inv_br_prob_base);
2826 }
2827 }
2828
2829 /* First compute probabilities locally for each loop from innermost
2830 to outermost to examine probabilities for back edges. */
2831 estimate_loops ();
2832
2833 memcpy (&freq_max, &real_zero, sizeof (real_zero));
2834 FOR_EACH_BB (bb)
2835 if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
2836 memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
2837
2838 sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
2839 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2840 {
2841 sreal tmp;
2842
2843 sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
2844 sreal_add (&tmp, &tmp, &real_one_half);
2845 bb->frequency = sreal_to_int (&tmp);
2846 }
2847
2848 free_aux_for_blocks ();
2849 free_aux_for_edges ();
2850 }
2851 compute_function_frequency ();
2852 }
2853
2854 /* Decide whether function is hot, cold or unlikely executed. */
2855 void
2856 compute_function_frequency (void)
2857 {
2858 basic_block bb;
2859 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2860 if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2861 || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2862 node->only_called_at_startup = true;
2863 if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2864 node->only_called_at_exit = true;
2865
2866 if (!profile_info || !flag_branch_probabilities)
2867 {
2868 int flags = flags_from_decl_or_type (current_function_decl);
2869 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2870 != NULL)
2871 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2872 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2873 != NULL)
2874 node->frequency = NODE_FREQUENCY_HOT;
2875 else if (flags & ECF_NORETURN)
2876 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2877 else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
2878 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2879 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2880 || DECL_STATIC_DESTRUCTOR (current_function_decl))
2881 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2882 return;
2883 }
2884 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2885 FOR_EACH_BB (bb)
2886 {
2887 if (maybe_hot_bb_p (cfun, bb))
2888 {
2889 node->frequency = NODE_FREQUENCY_HOT;
2890 return;
2891 }
2892 if (!probably_never_executed_bb_p (cfun, bb))
2893 node->frequency = NODE_FREQUENCY_NORMAL;
2894 }
2895 }
2896
2897 static bool
2898 gate_estimate_probability (void)
2899 {
2900 return flag_guess_branch_prob;
2901 }
2902
2903 /* Build PREDICT_EXPR. */
2904 tree
2905 build_predict_expr (enum br_predictor predictor, enum prediction taken)
2906 {
2907 tree t = build1 (PREDICT_EXPR, void_type_node,
2908 build_int_cst (integer_type_node, predictor));
2909 SET_PREDICT_EXPR_OUTCOME (t, taken);
2910 return t;
2911 }
2912
2913 const char *
2914 predictor_name (enum br_predictor predictor)
2915 {
2916 return predictor_info[predictor].name;
2917 }
2918
2919 namespace {
2920
2921 const pass_data pass_data_profile =
2922 {
2923 GIMPLE_PASS, /* type */
2924 "profile_estimate", /* name */
2925 OPTGROUP_NONE, /* optinfo_flags */
2926 true, /* has_gate */
2927 true, /* has_execute */
2928 TV_BRANCH_PROB, /* tv_id */
2929 PROP_cfg, /* properties_required */
2930 0, /* properties_provided */
2931 0, /* properties_destroyed */
2932 0, /* todo_flags_start */
2933 TODO_verify_ssa, /* todo_flags_finish */
2934 };
2935
2936 class pass_profile : public gimple_opt_pass
2937 {
2938 public:
2939 pass_profile(gcc::context *ctxt)
2940 : gimple_opt_pass(pass_data_profile, ctxt)
2941 {}
2942
2943 /* opt_pass methods: */
2944 bool gate () { return gate_estimate_probability (); }
2945 unsigned int execute () { return tree_estimate_probability_driver (); }
2946
2947 }; // class pass_profile
2948
2949 } // anon namespace
2950
2951 gimple_opt_pass *
2952 make_pass_profile (gcc::context *ctxt)
2953 {
2954 return new pass_profile (ctxt);
2955 }
2956
2957 namespace {
2958
2959 const pass_data pass_data_strip_predict_hints =
2960 {
2961 GIMPLE_PASS, /* type */
2962 "*strip_predict_hints", /* name */
2963 OPTGROUP_NONE, /* optinfo_flags */
2964 false, /* has_gate */
2965 true, /* has_execute */
2966 TV_BRANCH_PROB, /* tv_id */
2967 PROP_cfg, /* properties_required */
2968 0, /* properties_provided */
2969 0, /* properties_destroyed */
2970 0, /* todo_flags_start */
2971 TODO_verify_ssa, /* todo_flags_finish */
2972 };
2973
2974 class pass_strip_predict_hints : public gimple_opt_pass
2975 {
2976 public:
2977 pass_strip_predict_hints(gcc::context *ctxt)
2978 : gimple_opt_pass(pass_data_strip_predict_hints, ctxt)
2979 {}
2980
2981 /* opt_pass methods: */
2982 opt_pass * clone () { return new pass_strip_predict_hints (ctxt_); }
2983 unsigned int execute () { return strip_predict_hints (); }
2984
2985 }; // class pass_strip_predict_hints
2986
2987 } // anon namespace
2988
2989 gimple_opt_pass *
2990 make_pass_strip_predict_hints (gcc::context *ctxt)
2991 {
2992 return new pass_strip_predict_hints (ctxt);
2993 }
2994
2995 /* Rebuild function frequencies. Passes are in general expected to
2996 maintain profile by hand, however in some cases this is not possible:
2997 for example when inlining several functions with loops freuqencies might run
2998 out of scale and thus needs to be recomputed. */
2999
3000 void
3001 rebuild_frequencies (void)
3002 {
3003 timevar_push (TV_REBUILD_FREQUENCIES);
3004 if (profile_status == PROFILE_GUESSED)
3005 {
3006 loop_optimizer_init (0);
3007 add_noreturn_fake_exit_edges ();
3008 mark_irreducible_loops ();
3009 connect_infinite_loops_to_exit ();
3010 estimate_bb_frequencies ();
3011 remove_fake_exit_edges ();
3012 loop_optimizer_finalize ();
3013 }
3014 else if (profile_status == PROFILE_READ)
3015 counts_to_freqs ();
3016 else
3017 gcc_unreachable ();
3018 timevar_pop (TV_REBUILD_FREQUENCIES);
3019 }