predict.c (probably_never_executed): New function.
[gcc.git] / gcc / predict.c
1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000-2013 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* References:
21
22 [1] "Branch Prediction for Free"
23 Ball and Larus; PLDI '93.
24 [2] "Static Branch Frequency and Program Profile Analysis"
25 Wu and Larus; MICRO-27.
26 [3] "Corpus-based Static Branch Prediction"
27 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
28
29
30 #include "config.h"
31 #include "system.h"
32 #include "coretypes.h"
33 #include "tm.h"
34 #include "tree.h"
35 #include "rtl.h"
36 #include "tm_p.h"
37 #include "hard-reg-set.h"
38 #include "basic-block.h"
39 #include "insn-config.h"
40 #include "regs.h"
41 #include "flags.h"
42 #include "function.h"
43 #include "except.h"
44 #include "diagnostic-core.h"
45 #include "recog.h"
46 #include "expr.h"
47 #include "predict.h"
48 #include "coverage.h"
49 #include "sreal.h"
50 #include "params.h"
51 #include "target.h"
52 #include "cfgloop.h"
53 #include "tree-ssa.h"
54 #include "ggc.h"
55 #include "tree-pass.h"
56 #include "tree-scalar-evolution.h"
57 #include "cfgloop.h"
58 #include "pointer-set.h"
59
60 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
61 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
62 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
63 real_inv_br_prob_base, real_one_half, real_bb_freq_max;
64
65 /* Random guesstimation given names.
66 PROV_VERY_UNLIKELY should be small enough so basic block predicted
67 by it gets below HOT_BB_FREQUENCY_FRACTION. */
68 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 2000 - 1)
69 #define PROB_EVEN (REG_BR_PROB_BASE / 2)
70 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
71 #define PROB_ALWAYS (REG_BR_PROB_BASE)
72
73 static void combine_predictions_for_insn (rtx, basic_block);
74 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
75 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
76 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
77 static bool can_predict_insn_p (const_rtx);
78
79 /* Information we hold about each branch predictor.
80 Filled using information from predict.def. */
81
82 struct predictor_info
83 {
84 const char *const name; /* Name used in the debugging dumps. */
85 const int hitrate; /* Expected hitrate used by
86 predict_insn_def call. */
87 const int flags;
88 };
89
90 /* Use given predictor without Dempster-Shaffer theory if it matches
91 using first_match heuristics. */
92 #define PRED_FLAG_FIRST_MATCH 1
93
94 /* Recompute hitrate in percent to our representation. */
95
96 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
97
98 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
99 static const struct predictor_info predictor_info[]= {
100 #include "predict.def"
101
102 /* Upper bound on predictors. */
103 {NULL, 0, 0}
104 };
105 #undef DEF_PREDICTOR
106
107 /* Return TRUE if frequency FREQ is considered to be hot. */
108
109 static inline bool
110 maybe_hot_frequency_p (struct function *fun, int freq)
111 {
112 struct cgraph_node *node = cgraph_get_node (fun->decl);
113 if (!profile_info || !flag_branch_probabilities)
114 {
115 if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
116 return false;
117 if (node->frequency == NODE_FREQUENCY_HOT)
118 return true;
119 }
120 if (profile_status_for_function (fun) == PROFILE_ABSENT)
121 return true;
122 if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
123 && freq < (ENTRY_BLOCK_PTR_FOR_FUNCTION (fun)->frequency * 2 / 3))
124 return false;
125 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
126 return false;
127 if (freq < (ENTRY_BLOCK_PTR_FOR_FUNCTION (fun)->frequency
128 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
129 return false;
130 return true;
131 }
132
133 static gcov_type min_count = -1;
134
135 /* Determine the threshold for hot BB counts. */
136
137 gcov_type
138 get_hot_bb_threshold ()
139 {
140 gcov_working_set_t *ws;
141 if (min_count == -1)
142 {
143 ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
144 gcc_assert (ws);
145 min_count = ws->min_counter;
146 }
147 return min_count;
148 }
149
150 /* Set the threshold for hot BB counts. */
151
152 void
153 set_hot_bb_threshold (gcov_type min)
154 {
155 min_count = min;
156 }
157
158 /* Return TRUE if frequency FREQ is considered to be hot. */
159
160 static inline bool
161 maybe_hot_count_p (struct function *fun, gcov_type count)
162 {
163 if (fun && profile_status_for_function (fun) != PROFILE_READ)
164 return true;
165 /* Code executed at most once is not hot. */
166 if (profile_info->runs >= count)
167 return false;
168 return (count >= get_hot_bb_threshold ());
169 }
170
171 /* Return true in case BB can be CPU intensive and should be optimized
172 for maximal performance. */
173
174 bool
175 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
176 {
177 gcc_checking_assert (fun);
178 if (profile_status_for_function (fun) == PROFILE_READ)
179 return maybe_hot_count_p (fun, bb->count);
180 return maybe_hot_frequency_p (fun, bb->frequency);
181 }
182
183 /* Return true if the call can be hot. */
184
185 bool
186 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
187 {
188 if (profile_info && flag_branch_probabilities
189 && !maybe_hot_count_p (NULL,
190 edge->count))
191 return false;
192 if (edge->caller->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED
193 || (edge->callee
194 && edge->callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
195 return false;
196 if (edge->caller->frequency > NODE_FREQUENCY_UNLIKELY_EXECUTED
197 && (edge->callee
198 && edge->callee->frequency <= NODE_FREQUENCY_EXECUTED_ONCE))
199 return false;
200 if (optimize_size)
201 return false;
202 if (edge->caller->frequency == NODE_FREQUENCY_HOT)
203 return true;
204 if (edge->caller->frequency == NODE_FREQUENCY_EXECUTED_ONCE
205 && edge->frequency < CGRAPH_FREQ_BASE * 3 / 2)
206 return false;
207 if (flag_guess_branch_prob)
208 {
209 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0
210 || edge->frequency <= (CGRAPH_FREQ_BASE
211 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
212 return false;
213 }
214 return true;
215 }
216
217 /* Return true in case BB can be CPU intensive and should be optimized
218 for maximal performance. */
219
220 bool
221 maybe_hot_edge_p (edge e)
222 {
223 if (profile_status == PROFILE_READ)
224 return maybe_hot_count_p (cfun, e->count);
225 return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
226 }
227
228
229
230 /* Return true if profile COUNT and FREQUENCY, or function FUN static
231 node frequency reflects never being executed. */
232
233 static bool
234 probably_never_executed (struct function *fun,
235 gcov_type count, int frequency)
236 {
237 gcc_checking_assert (fun);
238 if (profile_status_for_function (fun) == PROFILE_READ)
239 {
240 if ((count * 4 + profile_info->runs / 2) / profile_info->runs > 0)
241 return false;
242 if (!frequency)
243 return true;
244 if (!ENTRY_BLOCK_PTR->frequency)
245 return false;
246 if (ENTRY_BLOCK_PTR->count && ENTRY_BLOCK_PTR->count < REG_BR_PROB_BASE)
247 {
248 return (RDIV (frequency * ENTRY_BLOCK_PTR->count,
249 ENTRY_BLOCK_PTR->frequency)
250 < REG_BR_PROB_BASE / 4);
251 }
252 return true;
253 }
254 if ((!profile_info || !flag_branch_probabilities)
255 && (cgraph_get_node (fun->decl)->frequency
256 == NODE_FREQUENCY_UNLIKELY_EXECUTED))
257 return true;
258 return false;
259 }
260
261
262 /* Return true in case BB is probably never executed. */
263
264 bool
265 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
266 {
267 return probably_never_executed (fun, bb->count, bb->frequency);
268 }
269
270
271 /* Return true in case edge E is probably never executed. */
272
273 bool
274 probably_never_executed_edge_p (struct function *fun, edge e)
275 {
276 return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
277 }
278
279 /* Return true if NODE should be optimized for size. */
280
281 bool
282 cgraph_optimize_for_size_p (struct cgraph_node *node)
283 {
284 if (optimize_size)
285 return true;
286 if (node && (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
287 return true;
288 else
289 return false;
290 }
291
292 /* Return true when current function should always be optimized for size. */
293
294 bool
295 optimize_function_for_size_p (struct function *fun)
296 {
297 if (optimize_size)
298 return true;
299 if (!fun || !fun->decl)
300 return false;
301 return cgraph_optimize_for_size_p (cgraph_get_node (fun->decl));
302 }
303
304 /* Return true when current function should always be optimized for speed. */
305
306 bool
307 optimize_function_for_speed_p (struct function *fun)
308 {
309 return !optimize_function_for_size_p (fun);
310 }
311
312 /* Return TRUE when BB should be optimized for size. */
313
314 bool
315 optimize_bb_for_size_p (const_basic_block bb)
316 {
317 return optimize_function_for_size_p (cfun) || !maybe_hot_bb_p (cfun, bb);
318 }
319
320 /* Return TRUE when BB should be optimized for speed. */
321
322 bool
323 optimize_bb_for_speed_p (const_basic_block bb)
324 {
325 return !optimize_bb_for_size_p (bb);
326 }
327
328 /* Return TRUE when BB should be optimized for size. */
329
330 bool
331 optimize_edge_for_size_p (edge e)
332 {
333 return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
334 }
335
336 /* Return TRUE when BB should be optimized for speed. */
337
338 bool
339 optimize_edge_for_speed_p (edge e)
340 {
341 return !optimize_edge_for_size_p (e);
342 }
343
344 /* Return TRUE when BB should be optimized for size. */
345
346 bool
347 optimize_insn_for_size_p (void)
348 {
349 return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
350 }
351
352 /* Return TRUE when BB should be optimized for speed. */
353
354 bool
355 optimize_insn_for_speed_p (void)
356 {
357 return !optimize_insn_for_size_p ();
358 }
359
360 /* Return TRUE when LOOP should be optimized for size. */
361
362 bool
363 optimize_loop_for_size_p (struct loop *loop)
364 {
365 return optimize_bb_for_size_p (loop->header);
366 }
367
368 /* Return TRUE when LOOP should be optimized for speed. */
369
370 bool
371 optimize_loop_for_speed_p (struct loop *loop)
372 {
373 return optimize_bb_for_speed_p (loop->header);
374 }
375
376 /* Return TRUE when LOOP nest should be optimized for speed. */
377
378 bool
379 optimize_loop_nest_for_speed_p (struct loop *loop)
380 {
381 struct loop *l = loop;
382 if (optimize_loop_for_speed_p (loop))
383 return true;
384 l = loop->inner;
385 while (l && l != loop)
386 {
387 if (optimize_loop_for_speed_p (l))
388 return true;
389 if (l->inner)
390 l = l->inner;
391 else if (l->next)
392 l = l->next;
393 else
394 {
395 while (l != loop && !l->next)
396 l = loop_outer (l);
397 if (l != loop)
398 l = l->next;
399 }
400 }
401 return false;
402 }
403
404 /* Return TRUE when LOOP nest should be optimized for size. */
405
406 bool
407 optimize_loop_nest_for_size_p (struct loop *loop)
408 {
409 return !optimize_loop_nest_for_speed_p (loop);
410 }
411
412 /* Return true when edge E is likely to be well predictable by branch
413 predictor. */
414
415 bool
416 predictable_edge_p (edge e)
417 {
418 if (profile_status == PROFILE_ABSENT)
419 return false;
420 if ((e->probability
421 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
422 || (REG_BR_PROB_BASE - e->probability
423 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
424 return true;
425 return false;
426 }
427
428
429 /* Set RTL expansion for BB profile. */
430
431 void
432 rtl_profile_for_bb (basic_block bb)
433 {
434 crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
435 }
436
437 /* Set RTL expansion for edge profile. */
438
439 void
440 rtl_profile_for_edge (edge e)
441 {
442 crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
443 }
444
445 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
446 void
447 default_rtl_profile (void)
448 {
449 crtl->maybe_hot_insn_p = true;
450 }
451
452 /* Return true if the one of outgoing edges is already predicted by
453 PREDICTOR. */
454
455 bool
456 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
457 {
458 rtx note;
459 if (!INSN_P (BB_END (bb)))
460 return false;
461 for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
462 if (REG_NOTE_KIND (note) == REG_BR_PRED
463 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
464 return true;
465 return false;
466 }
467
468 /* This map contains for a basic block the list of predictions for the
469 outgoing edges. */
470
471 static struct pointer_map_t *bb_predictions;
472
473 /* Structure representing predictions in tree level. */
474
475 struct edge_prediction {
476 struct edge_prediction *ep_next;
477 edge ep_edge;
478 enum br_predictor ep_predictor;
479 int ep_probability;
480 };
481
482 /* Return true if the one of outgoing edges is already predicted by
483 PREDICTOR. */
484
485 bool
486 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
487 {
488 struct edge_prediction *i;
489 void **preds = pointer_map_contains (bb_predictions, bb);
490
491 if (!preds)
492 return false;
493
494 for (i = (struct edge_prediction *) *preds; i; i = i->ep_next)
495 if (i->ep_predictor == predictor)
496 return true;
497 return false;
498 }
499
500 /* Return true when the probability of edge is reliable.
501
502 The profile guessing code is good at predicting branch outcome (ie.
503 taken/not taken), that is predicted right slightly over 75% of time.
504 It is however notoriously poor on predicting the probability itself.
505 In general the profile appear a lot flatter (with probabilities closer
506 to 50%) than the reality so it is bad idea to use it to drive optimization
507 such as those disabling dynamic branch prediction for well predictable
508 branches.
509
510 There are two exceptions - edges leading to noreturn edges and edges
511 predicted by number of iterations heuristics are predicted well. This macro
512 should be able to distinguish those, but at the moment it simply check for
513 noreturn heuristic that is only one giving probability over 99% or bellow
514 1%. In future we might want to propagate reliability information across the
515 CFG if we find this information useful on multiple places. */
516 static bool
517 probability_reliable_p (int prob)
518 {
519 return (profile_status == PROFILE_READ
520 || (profile_status == PROFILE_GUESSED
521 && (prob <= HITRATE (1) || prob >= HITRATE (99))));
522 }
523
524 /* Same predicate as above, working on edges. */
525 bool
526 edge_probability_reliable_p (const_edge e)
527 {
528 return probability_reliable_p (e->probability);
529 }
530
531 /* Same predicate as edge_probability_reliable_p, working on notes. */
532 bool
533 br_prob_note_reliable_p (const_rtx note)
534 {
535 gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
536 return probability_reliable_p (XINT (note, 0));
537 }
538
539 static void
540 predict_insn (rtx insn, enum br_predictor predictor, int probability)
541 {
542 gcc_assert (any_condjump_p (insn));
543 if (!flag_guess_branch_prob)
544 return;
545
546 add_reg_note (insn, REG_BR_PRED,
547 gen_rtx_CONCAT (VOIDmode,
548 GEN_INT ((int) predictor),
549 GEN_INT ((int) probability)));
550 }
551
552 /* Predict insn by given predictor. */
553
554 void
555 predict_insn_def (rtx insn, enum br_predictor predictor,
556 enum prediction taken)
557 {
558 int probability = predictor_info[(int) predictor].hitrate;
559
560 if (taken != TAKEN)
561 probability = REG_BR_PROB_BASE - probability;
562
563 predict_insn (insn, predictor, probability);
564 }
565
566 /* Predict edge E with given probability if possible. */
567
568 void
569 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
570 {
571 rtx last_insn;
572 last_insn = BB_END (e->src);
573
574 /* We can store the branch prediction information only about
575 conditional jumps. */
576 if (!any_condjump_p (last_insn))
577 return;
578
579 /* We always store probability of branching. */
580 if (e->flags & EDGE_FALLTHRU)
581 probability = REG_BR_PROB_BASE - probability;
582
583 predict_insn (last_insn, predictor, probability);
584 }
585
586 /* Predict edge E with the given PROBABILITY. */
587 void
588 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
589 {
590 gcc_assert (profile_status != PROFILE_GUESSED);
591 if ((e->src != ENTRY_BLOCK_PTR && EDGE_COUNT (e->src->succs) > 1)
592 && flag_guess_branch_prob && optimize)
593 {
594 struct edge_prediction *i = XNEW (struct edge_prediction);
595 void **preds = pointer_map_insert (bb_predictions, e->src);
596
597 i->ep_next = (struct edge_prediction *) *preds;
598 *preds = i;
599 i->ep_probability = probability;
600 i->ep_predictor = predictor;
601 i->ep_edge = e;
602 }
603 }
604
605 /* Remove all predictions on given basic block that are attached
606 to edge E. */
607 void
608 remove_predictions_associated_with_edge (edge e)
609 {
610 void **preds;
611
612 if (!bb_predictions)
613 return;
614
615 preds = pointer_map_contains (bb_predictions, e->src);
616
617 if (preds)
618 {
619 struct edge_prediction **prediction = (struct edge_prediction **) preds;
620 struct edge_prediction *next;
621
622 while (*prediction)
623 {
624 if ((*prediction)->ep_edge == e)
625 {
626 next = (*prediction)->ep_next;
627 free (*prediction);
628 *prediction = next;
629 }
630 else
631 prediction = &((*prediction)->ep_next);
632 }
633 }
634 }
635
636 /* Clears the list of predictions stored for BB. */
637
638 static void
639 clear_bb_predictions (basic_block bb)
640 {
641 void **preds = pointer_map_contains (bb_predictions, bb);
642 struct edge_prediction *pred, *next;
643
644 if (!preds)
645 return;
646
647 for (pred = (struct edge_prediction *) *preds; pred; pred = next)
648 {
649 next = pred->ep_next;
650 free (pred);
651 }
652 *preds = NULL;
653 }
654
655 /* Return true when we can store prediction on insn INSN.
656 At the moment we represent predictions only on conditional
657 jumps, not at computed jump or other complicated cases. */
658 static bool
659 can_predict_insn_p (const_rtx insn)
660 {
661 return (JUMP_P (insn)
662 && any_condjump_p (insn)
663 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
664 }
665
666 /* Predict edge E by given predictor if possible. */
667
668 void
669 predict_edge_def (edge e, enum br_predictor predictor,
670 enum prediction taken)
671 {
672 int probability = predictor_info[(int) predictor].hitrate;
673
674 if (taken != TAKEN)
675 probability = REG_BR_PROB_BASE - probability;
676
677 predict_edge (e, predictor, probability);
678 }
679
680 /* Invert all branch predictions or probability notes in the INSN. This needs
681 to be done each time we invert the condition used by the jump. */
682
683 void
684 invert_br_probabilities (rtx insn)
685 {
686 rtx note;
687
688 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
689 if (REG_NOTE_KIND (note) == REG_BR_PROB)
690 XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
691 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
692 XEXP (XEXP (note, 0), 1)
693 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
694 }
695
696 /* Dump information about the branch prediction to the output file. */
697
698 static void
699 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
700 basic_block bb, int used)
701 {
702 edge e;
703 edge_iterator ei;
704
705 if (!file)
706 return;
707
708 FOR_EACH_EDGE (e, ei, bb->succs)
709 if (! (e->flags & EDGE_FALLTHRU))
710 break;
711
712 fprintf (file, " %s heuristics%s: %.1f%%",
713 predictor_info[predictor].name,
714 used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
715
716 if (bb->count)
717 {
718 fprintf (file, " exec ");
719 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
720 if (e)
721 {
722 fprintf (file, " hit ");
723 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count);
724 fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
725 }
726 }
727
728 fprintf (file, "\n");
729 }
730
731 /* We can not predict the probabilities of outgoing edges of bb. Set them
732 evenly and hope for the best. */
733 static void
734 set_even_probabilities (basic_block bb)
735 {
736 int nedges = 0;
737 edge e;
738 edge_iterator ei;
739
740 FOR_EACH_EDGE (e, ei, bb->succs)
741 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
742 nedges ++;
743 FOR_EACH_EDGE (e, ei, bb->succs)
744 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
745 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
746 else
747 e->probability = 0;
748 }
749
750 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
751 note if not already present. Remove now useless REG_BR_PRED notes. */
752
753 static void
754 combine_predictions_for_insn (rtx insn, basic_block bb)
755 {
756 rtx prob_note;
757 rtx *pnote;
758 rtx note;
759 int best_probability = PROB_EVEN;
760 enum br_predictor best_predictor = END_PREDICTORS;
761 int combined_probability = REG_BR_PROB_BASE / 2;
762 int d;
763 bool first_match = false;
764 bool found = false;
765
766 if (!can_predict_insn_p (insn))
767 {
768 set_even_probabilities (bb);
769 return;
770 }
771
772 prob_note = find_reg_note (insn, REG_BR_PROB, 0);
773 pnote = &REG_NOTES (insn);
774 if (dump_file)
775 fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
776 bb->index);
777
778 /* We implement "first match" heuristics and use probability guessed
779 by predictor with smallest index. */
780 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
781 if (REG_NOTE_KIND (note) == REG_BR_PRED)
782 {
783 enum br_predictor predictor = ((enum br_predictor)
784 INTVAL (XEXP (XEXP (note, 0), 0)));
785 int probability = INTVAL (XEXP (XEXP (note, 0), 1));
786
787 found = true;
788 if (best_predictor > predictor)
789 best_probability = probability, best_predictor = predictor;
790
791 d = (combined_probability * probability
792 + (REG_BR_PROB_BASE - combined_probability)
793 * (REG_BR_PROB_BASE - probability));
794
795 /* Use FP math to avoid overflows of 32bit integers. */
796 if (d == 0)
797 /* If one probability is 0% and one 100%, avoid division by zero. */
798 combined_probability = REG_BR_PROB_BASE / 2;
799 else
800 combined_probability = (((double) combined_probability) * probability
801 * REG_BR_PROB_BASE / d + 0.5);
802 }
803
804 /* Decide which heuristic to use. In case we didn't match anything,
805 use no_prediction heuristic, in case we did match, use either
806 first match or Dempster-Shaffer theory depending on the flags. */
807
808 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
809 first_match = true;
810
811 if (!found)
812 dump_prediction (dump_file, PRED_NO_PREDICTION,
813 combined_probability, bb, true);
814 else
815 {
816 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
817 bb, !first_match);
818 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
819 bb, first_match);
820 }
821
822 if (first_match)
823 combined_probability = best_probability;
824 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
825
826 while (*pnote)
827 {
828 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
829 {
830 enum br_predictor predictor = ((enum br_predictor)
831 INTVAL (XEXP (XEXP (*pnote, 0), 0)));
832 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
833
834 dump_prediction (dump_file, predictor, probability, bb,
835 !first_match || best_predictor == predictor);
836 *pnote = XEXP (*pnote, 1);
837 }
838 else
839 pnote = &XEXP (*pnote, 1);
840 }
841
842 if (!prob_note)
843 {
844 add_int_reg_note (insn, REG_BR_PROB, combined_probability);
845
846 /* Save the prediction into CFG in case we are seeing non-degenerated
847 conditional jump. */
848 if (!single_succ_p (bb))
849 {
850 BRANCH_EDGE (bb)->probability = combined_probability;
851 FALLTHRU_EDGE (bb)->probability
852 = REG_BR_PROB_BASE - combined_probability;
853 }
854 }
855 else if (!single_succ_p (bb))
856 {
857 int prob = XINT (prob_note, 0);
858
859 BRANCH_EDGE (bb)->probability = prob;
860 FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
861 }
862 else
863 single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
864 }
865
866 /* Combine predictions into single probability and store them into CFG.
867 Remove now useless prediction entries. */
868
869 static void
870 combine_predictions_for_bb (basic_block bb)
871 {
872 int best_probability = PROB_EVEN;
873 enum br_predictor best_predictor = END_PREDICTORS;
874 int combined_probability = REG_BR_PROB_BASE / 2;
875 int d;
876 bool first_match = false;
877 bool found = false;
878 struct edge_prediction *pred;
879 int nedges = 0;
880 edge e, first = NULL, second = NULL;
881 edge_iterator ei;
882 void **preds;
883
884 FOR_EACH_EDGE (e, ei, bb->succs)
885 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
886 {
887 nedges ++;
888 if (first && !second)
889 second = e;
890 if (!first)
891 first = e;
892 }
893
894 /* When there is no successor or only one choice, prediction is easy.
895
896 We are lazy for now and predict only basic blocks with two outgoing
897 edges. It is possible to predict generic case too, but we have to
898 ignore first match heuristics and do more involved combining. Implement
899 this later. */
900 if (nedges != 2)
901 {
902 if (!bb->count)
903 set_even_probabilities (bb);
904 clear_bb_predictions (bb);
905 if (dump_file)
906 fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
907 nedges, bb->index);
908 return;
909 }
910
911 if (dump_file)
912 fprintf (dump_file, "Predictions for bb %i\n", bb->index);
913
914 preds = pointer_map_contains (bb_predictions, bb);
915 if (preds)
916 {
917 /* We implement "first match" heuristics and use probability guessed
918 by predictor with smallest index. */
919 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
920 {
921 enum br_predictor predictor = pred->ep_predictor;
922 int probability = pred->ep_probability;
923
924 if (pred->ep_edge != first)
925 probability = REG_BR_PROB_BASE - probability;
926
927 found = true;
928 /* First match heuristics would be widly confused if we predicted
929 both directions. */
930 if (best_predictor > predictor)
931 {
932 struct edge_prediction *pred2;
933 int prob = probability;
934
935 for (pred2 = (struct edge_prediction *) *preds; pred2; pred2 = pred2->ep_next)
936 if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
937 {
938 int probability2 = pred->ep_probability;
939
940 if (pred2->ep_edge != first)
941 probability2 = REG_BR_PROB_BASE - probability2;
942
943 if ((probability < REG_BR_PROB_BASE / 2) !=
944 (probability2 < REG_BR_PROB_BASE / 2))
945 break;
946
947 /* If the same predictor later gave better result, go for it! */
948 if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
949 || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
950 prob = probability2;
951 }
952 if (!pred2)
953 best_probability = prob, best_predictor = predictor;
954 }
955
956 d = (combined_probability * probability
957 + (REG_BR_PROB_BASE - combined_probability)
958 * (REG_BR_PROB_BASE - probability));
959
960 /* Use FP math to avoid overflows of 32bit integers. */
961 if (d == 0)
962 /* If one probability is 0% and one 100%, avoid division by zero. */
963 combined_probability = REG_BR_PROB_BASE / 2;
964 else
965 combined_probability = (((double) combined_probability)
966 * probability
967 * REG_BR_PROB_BASE / d + 0.5);
968 }
969 }
970
971 /* Decide which heuristic to use. In case we didn't match anything,
972 use no_prediction heuristic, in case we did match, use either
973 first match or Dempster-Shaffer theory depending on the flags. */
974
975 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
976 first_match = true;
977
978 if (!found)
979 dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
980 else
981 {
982 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
983 !first_match);
984 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
985 first_match);
986 }
987
988 if (first_match)
989 combined_probability = best_probability;
990 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
991
992 if (preds)
993 {
994 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
995 {
996 enum br_predictor predictor = pred->ep_predictor;
997 int probability = pred->ep_probability;
998
999 if (pred->ep_edge != EDGE_SUCC (bb, 0))
1000 probability = REG_BR_PROB_BASE - probability;
1001 dump_prediction (dump_file, predictor, probability, bb,
1002 !first_match || best_predictor == predictor);
1003 }
1004 }
1005 clear_bb_predictions (bb);
1006
1007 if (!bb->count)
1008 {
1009 first->probability = combined_probability;
1010 second->probability = REG_BR_PROB_BASE - combined_probability;
1011 }
1012 }
1013
1014 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1015 Return the SSA_NAME if the condition satisfies, NULL otherwise.
1016
1017 T1 and T2 should be one of the following cases:
1018 1. T1 is SSA_NAME, T2 is NULL
1019 2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1020 3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4] */
1021
1022 static tree
1023 strips_small_constant (tree t1, tree t2)
1024 {
1025 tree ret = NULL;
1026 int value = 0;
1027
1028 if (!t1)
1029 return NULL;
1030 else if (TREE_CODE (t1) == SSA_NAME)
1031 ret = t1;
1032 else if (host_integerp (t1, 0))
1033 value = tree_low_cst (t1, 0);
1034 else
1035 return NULL;
1036
1037 if (!t2)
1038 return ret;
1039 else if (host_integerp (t2, 0))
1040 value = tree_low_cst (t2, 0);
1041 else if (TREE_CODE (t2) == SSA_NAME)
1042 {
1043 if (ret)
1044 return NULL;
1045 else
1046 ret = t2;
1047 }
1048
1049 if (value <= 4 && value >= -4)
1050 return ret;
1051 else
1052 return NULL;
1053 }
1054
1055 /* Return the SSA_NAME in T or T's operands.
1056 Return NULL if SSA_NAME cannot be found. */
1057
1058 static tree
1059 get_base_value (tree t)
1060 {
1061 if (TREE_CODE (t) == SSA_NAME)
1062 return t;
1063
1064 if (!BINARY_CLASS_P (t))
1065 return NULL;
1066
1067 switch (TREE_OPERAND_LENGTH (t))
1068 {
1069 case 1:
1070 return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1071 case 2:
1072 return strips_small_constant (TREE_OPERAND (t, 0),
1073 TREE_OPERAND (t, 1));
1074 default:
1075 return NULL;
1076 }
1077 }
1078
1079 /* Check the compare STMT in LOOP. If it compares an induction
1080 variable to a loop invariant, return true, and save
1081 LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1082 Otherwise return false and set LOOP_INVAIANT to NULL. */
1083
1084 static bool
1085 is_comparison_with_loop_invariant_p (gimple stmt, struct loop *loop,
1086 tree *loop_invariant,
1087 enum tree_code *compare_code,
1088 tree *loop_step,
1089 tree *loop_iv_base)
1090 {
1091 tree op0, op1, bound, base;
1092 affine_iv iv0, iv1;
1093 enum tree_code code;
1094 tree step;
1095
1096 code = gimple_cond_code (stmt);
1097 *loop_invariant = NULL;
1098
1099 switch (code)
1100 {
1101 case GT_EXPR:
1102 case GE_EXPR:
1103 case NE_EXPR:
1104 case LT_EXPR:
1105 case LE_EXPR:
1106 case EQ_EXPR:
1107 break;
1108
1109 default:
1110 return false;
1111 }
1112
1113 op0 = gimple_cond_lhs (stmt);
1114 op1 = gimple_cond_rhs (stmt);
1115
1116 if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1117 || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1118 return false;
1119 if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1120 return false;
1121 if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1122 return false;
1123 if (TREE_CODE (iv0.step) != INTEGER_CST
1124 || TREE_CODE (iv1.step) != INTEGER_CST)
1125 return false;
1126 if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1127 || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1128 return false;
1129
1130 if (integer_zerop (iv0.step))
1131 {
1132 if (code != NE_EXPR && code != EQ_EXPR)
1133 code = invert_tree_comparison (code, false);
1134 bound = iv0.base;
1135 base = iv1.base;
1136 if (host_integerp (iv1.step, 0))
1137 step = iv1.step;
1138 else
1139 return false;
1140 }
1141 else
1142 {
1143 bound = iv1.base;
1144 base = iv0.base;
1145 if (host_integerp (iv0.step, 0))
1146 step = iv0.step;
1147 else
1148 return false;
1149 }
1150
1151 if (TREE_CODE (bound) != INTEGER_CST)
1152 bound = get_base_value (bound);
1153 if (!bound)
1154 return false;
1155 if (TREE_CODE (base) != INTEGER_CST)
1156 base = get_base_value (base);
1157 if (!base)
1158 return false;
1159
1160 *loop_invariant = bound;
1161 *compare_code = code;
1162 *loop_step = step;
1163 *loop_iv_base = base;
1164 return true;
1165 }
1166
1167 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent. */
1168
1169 static bool
1170 expr_coherent_p (tree t1, tree t2)
1171 {
1172 gimple stmt;
1173 tree ssa_name_1 = NULL;
1174 tree ssa_name_2 = NULL;
1175
1176 gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1177 gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1178
1179 if (t1 == t2)
1180 return true;
1181
1182 if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1183 return true;
1184 if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1185 return false;
1186
1187 /* Check to see if t1 is expressed/defined with t2. */
1188 stmt = SSA_NAME_DEF_STMT (t1);
1189 gcc_assert (stmt != NULL);
1190 if (is_gimple_assign (stmt))
1191 {
1192 ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1193 if (ssa_name_1 && ssa_name_1 == t2)
1194 return true;
1195 }
1196
1197 /* Check to see if t2 is expressed/defined with t1. */
1198 stmt = SSA_NAME_DEF_STMT (t2);
1199 gcc_assert (stmt != NULL);
1200 if (is_gimple_assign (stmt))
1201 {
1202 ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1203 if (ssa_name_2 && ssa_name_2 == t1)
1204 return true;
1205 }
1206
1207 /* Compare if t1 and t2's def_stmts are identical. */
1208 if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1209 return true;
1210 else
1211 return false;
1212 }
1213
1214 /* Predict branch probability of BB when BB contains a branch that compares
1215 an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1216 loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1217
1218 E.g.
1219 for (int i = 0; i < bound; i++) {
1220 if (i < bound - 2)
1221 computation_1();
1222 else
1223 computation_2();
1224 }
1225
1226 In this loop, we will predict the branch inside the loop to be taken. */
1227
1228 static void
1229 predict_iv_comparison (struct loop *loop, basic_block bb,
1230 tree loop_bound_var,
1231 tree loop_iv_base_var,
1232 enum tree_code loop_bound_code,
1233 int loop_bound_step)
1234 {
1235 gimple stmt;
1236 tree compare_var, compare_base;
1237 enum tree_code compare_code;
1238 tree compare_step_var;
1239 edge then_edge;
1240 edge_iterator ei;
1241
1242 if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1243 || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1244 || predicted_by_p (bb, PRED_LOOP_EXIT))
1245 return;
1246
1247 stmt = last_stmt (bb);
1248 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1249 return;
1250 if (!is_comparison_with_loop_invariant_p (stmt, loop, &compare_var,
1251 &compare_code,
1252 &compare_step_var,
1253 &compare_base))
1254 return;
1255
1256 /* Find the taken edge. */
1257 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1258 if (then_edge->flags & EDGE_TRUE_VALUE)
1259 break;
1260
1261 /* When comparing an IV to a loop invariant, NE is more likely to be
1262 taken while EQ is more likely to be not-taken. */
1263 if (compare_code == NE_EXPR)
1264 {
1265 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1266 return;
1267 }
1268 else if (compare_code == EQ_EXPR)
1269 {
1270 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1271 return;
1272 }
1273
1274 if (!expr_coherent_p (loop_iv_base_var, compare_base))
1275 return;
1276
1277 /* If loop bound, base and compare bound are all constants, we can
1278 calculate the probability directly. */
1279 if (host_integerp (loop_bound_var, 0)
1280 && host_integerp (compare_var, 0)
1281 && host_integerp (compare_base, 0))
1282 {
1283 int probability;
1284 bool of, overflow = false;
1285 double_int mod, compare_count, tem, loop_count;
1286
1287 double_int loop_bound = tree_to_double_int (loop_bound_var);
1288 double_int compare_bound = tree_to_double_int (compare_var);
1289 double_int base = tree_to_double_int (compare_base);
1290 double_int compare_step = tree_to_double_int (compare_step_var);
1291
1292 /* (loop_bound - base) / compare_step */
1293 tem = loop_bound.sub_with_overflow (base, &of);
1294 overflow |= of;
1295 loop_count = tem.divmod_with_overflow (compare_step,
1296 0, TRUNC_DIV_EXPR,
1297 &mod, &of);
1298 overflow |= of;
1299
1300 if ((!compare_step.is_negative ())
1301 ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1302 {
1303 /* (loop_bound - compare_bound) / compare_step */
1304 tem = loop_bound.sub_with_overflow (compare_bound, &of);
1305 overflow |= of;
1306 compare_count = tem.divmod_with_overflow (compare_step,
1307 0, TRUNC_DIV_EXPR,
1308 &mod, &of);
1309 overflow |= of;
1310 }
1311 else
1312 {
1313 /* (compare_bound - base) / compare_step */
1314 tem = compare_bound.sub_with_overflow (base, &of);
1315 overflow |= of;
1316 compare_count = tem.divmod_with_overflow (compare_step,
1317 0, TRUNC_DIV_EXPR,
1318 &mod, &of);
1319 overflow |= of;
1320 }
1321 if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1322 ++compare_count;
1323 if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1324 ++loop_count;
1325 if (compare_count.is_negative ())
1326 compare_count = double_int_zero;
1327 if (loop_count.is_negative ())
1328 loop_count = double_int_zero;
1329 if (loop_count.is_zero ())
1330 probability = 0;
1331 else if (compare_count.scmp (loop_count) == 1)
1332 probability = REG_BR_PROB_BASE;
1333 else
1334 {
1335 /* If loop_count is too big, such that REG_BR_PROB_BASE * loop_count
1336 could overflow, shift both loop_count and compare_count right
1337 a bit so that it doesn't overflow. Note both counts are known not
1338 to be negative at this point. */
1339 int clz_bits = clz_hwi (loop_count.high);
1340 gcc_assert (REG_BR_PROB_BASE < 32768);
1341 if (clz_bits < 16)
1342 {
1343 loop_count.arshift (16 - clz_bits, HOST_BITS_PER_DOUBLE_INT);
1344 compare_count.arshift (16 - clz_bits, HOST_BITS_PER_DOUBLE_INT);
1345 }
1346 tem = compare_count.mul_with_sign (double_int::from_shwi
1347 (REG_BR_PROB_BASE), true, &of);
1348 gcc_assert (!of);
1349 tem = tem.divmod (loop_count, true, TRUNC_DIV_EXPR, &mod);
1350 probability = tem.to_uhwi ();
1351 }
1352
1353 if (!overflow)
1354 predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1355
1356 return;
1357 }
1358
1359 if (expr_coherent_p (loop_bound_var, compare_var))
1360 {
1361 if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1362 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1363 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1364 else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1365 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1366 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1367 else if (loop_bound_code == NE_EXPR)
1368 {
1369 /* If the loop backedge condition is "(i != bound)", we do
1370 the comparison based on the step of IV:
1371 * step < 0 : backedge condition is like (i > bound)
1372 * step > 0 : backedge condition is like (i < bound) */
1373 gcc_assert (loop_bound_step != 0);
1374 if (loop_bound_step > 0
1375 && (compare_code == LT_EXPR
1376 || compare_code == LE_EXPR))
1377 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1378 else if (loop_bound_step < 0
1379 && (compare_code == GT_EXPR
1380 || compare_code == GE_EXPR))
1381 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1382 else
1383 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1384 }
1385 else
1386 /* The branch is predicted not-taken if loop_bound_code is
1387 opposite with compare_code. */
1388 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1389 }
1390 else if (expr_coherent_p (loop_iv_base_var, compare_var))
1391 {
1392 /* For cases like:
1393 for (i = s; i < h; i++)
1394 if (i > s + 2) ....
1395 The branch should be predicted taken. */
1396 if (loop_bound_step > 0
1397 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1398 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1399 else if (loop_bound_step < 0
1400 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1401 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1402 else
1403 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1404 }
1405 }
1406
1407 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1408 exits are resulted from short-circuit conditions that will generate an
1409 if_tmp. E.g.:
1410
1411 if (foo() || global > 10)
1412 break;
1413
1414 This will be translated into:
1415
1416 BB3:
1417 loop header...
1418 BB4:
1419 if foo() goto BB6 else goto BB5
1420 BB5:
1421 if global > 10 goto BB6 else goto BB7
1422 BB6:
1423 goto BB7
1424 BB7:
1425 iftmp = (PHI 0(BB5), 1(BB6))
1426 if iftmp == 1 goto BB8 else goto BB3
1427 BB8:
1428 outside of the loop...
1429
1430 The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1431 From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1432 exits. This function takes BB7->BB8 as input, and finds out the extra loop
1433 exits to predict them using PRED_LOOP_EXIT. */
1434
1435 static void
1436 predict_extra_loop_exits (edge exit_edge)
1437 {
1438 unsigned i;
1439 bool check_value_one;
1440 gimple phi_stmt;
1441 tree cmp_rhs, cmp_lhs;
1442 gimple cmp_stmt = last_stmt (exit_edge->src);
1443
1444 if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1445 return;
1446 cmp_rhs = gimple_cond_rhs (cmp_stmt);
1447 cmp_lhs = gimple_cond_lhs (cmp_stmt);
1448 if (!TREE_CONSTANT (cmp_rhs)
1449 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1450 return;
1451 if (TREE_CODE (cmp_lhs) != SSA_NAME)
1452 return;
1453
1454 /* If check_value_one is true, only the phi_args with value '1' will lead
1455 to loop exit. Otherwise, only the phi_args with value '0' will lead to
1456 loop exit. */
1457 check_value_one = (((integer_onep (cmp_rhs))
1458 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1459 ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1460
1461 phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1462 if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
1463 return;
1464
1465 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1466 {
1467 edge e1;
1468 edge_iterator ei;
1469 tree val = gimple_phi_arg_def (phi_stmt, i);
1470 edge e = gimple_phi_arg_edge (phi_stmt, i);
1471
1472 if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1473 continue;
1474 if ((check_value_one ^ integer_onep (val)) == 1)
1475 continue;
1476 if (EDGE_COUNT (e->src->succs) != 1)
1477 {
1478 predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1479 continue;
1480 }
1481
1482 FOR_EACH_EDGE (e1, ei, e->src->preds)
1483 predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1484 }
1485 }
1486
1487 /* Predict edge probabilities by exploiting loop structure. */
1488
1489 static void
1490 predict_loops (void)
1491 {
1492 loop_iterator li;
1493 struct loop *loop;
1494
1495 /* Try to predict out blocks in a loop that are not part of a
1496 natural loop. */
1497 FOR_EACH_LOOP (li, loop, 0)
1498 {
1499 basic_block bb, *bbs;
1500 unsigned j, n_exits;
1501 vec<edge> exits;
1502 struct tree_niter_desc niter_desc;
1503 edge ex;
1504 struct nb_iter_bound *nb_iter;
1505 enum tree_code loop_bound_code = ERROR_MARK;
1506 tree loop_bound_step = NULL;
1507 tree loop_bound_var = NULL;
1508 tree loop_iv_base = NULL;
1509 gimple stmt = NULL;
1510
1511 exits = get_loop_exit_edges (loop);
1512 n_exits = exits.length ();
1513 if (!n_exits)
1514 {
1515 exits.release ();
1516 continue;
1517 }
1518
1519 FOR_EACH_VEC_ELT (exits, j, ex)
1520 {
1521 tree niter = NULL;
1522 HOST_WIDE_INT nitercst;
1523 int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1524 int probability;
1525 enum br_predictor predictor;
1526
1527 predict_extra_loop_exits (ex);
1528
1529 if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1530 niter = niter_desc.niter;
1531 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1532 niter = loop_niter_by_eval (loop, ex);
1533
1534 if (TREE_CODE (niter) == INTEGER_CST)
1535 {
1536 if (host_integerp (niter, 1)
1537 && max
1538 && compare_tree_int (niter, max - 1) == -1)
1539 nitercst = tree_low_cst (niter, 1) + 1;
1540 else
1541 nitercst = max;
1542 predictor = PRED_LOOP_ITERATIONS;
1543 }
1544 /* If we have just one exit and we can derive some information about
1545 the number of iterations of the loop from the statements inside
1546 the loop, use it to predict this exit. */
1547 else if (n_exits == 1)
1548 {
1549 nitercst = estimated_stmt_executions_int (loop);
1550 if (nitercst < 0)
1551 continue;
1552 if (nitercst > max)
1553 nitercst = max;
1554
1555 predictor = PRED_LOOP_ITERATIONS_GUESSED;
1556 }
1557 else
1558 continue;
1559
1560 /* If the prediction for number of iterations is zero, do not
1561 predict the exit edges. */
1562 if (nitercst == 0)
1563 continue;
1564
1565 probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1566 predict_edge (ex, predictor, probability);
1567 }
1568 exits.release ();
1569
1570 /* Find information about loop bound variables. */
1571 for (nb_iter = loop->bounds; nb_iter;
1572 nb_iter = nb_iter->next)
1573 if (nb_iter->stmt
1574 && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1575 {
1576 stmt = nb_iter->stmt;
1577 break;
1578 }
1579 if (!stmt && last_stmt (loop->header)
1580 && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1581 stmt = last_stmt (loop->header);
1582 if (stmt)
1583 is_comparison_with_loop_invariant_p (stmt, loop,
1584 &loop_bound_var,
1585 &loop_bound_code,
1586 &loop_bound_step,
1587 &loop_iv_base);
1588
1589 bbs = get_loop_body (loop);
1590
1591 for (j = 0; j < loop->num_nodes; j++)
1592 {
1593 int header_found = 0;
1594 edge e;
1595 edge_iterator ei;
1596
1597 bb = bbs[j];
1598
1599 /* Bypass loop heuristics on continue statement. These
1600 statements construct loops via "non-loop" constructs
1601 in the source language and are better to be handled
1602 separately. */
1603 if (predicted_by_p (bb, PRED_CONTINUE))
1604 continue;
1605
1606 /* Loop branch heuristics - predict an edge back to a
1607 loop's head as taken. */
1608 if (bb == loop->latch)
1609 {
1610 e = find_edge (loop->latch, loop->header);
1611 if (e)
1612 {
1613 header_found = 1;
1614 predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1615 }
1616 }
1617
1618 /* Loop exit heuristics - predict an edge exiting the loop if the
1619 conditional has no loop header successors as not taken. */
1620 if (!header_found
1621 /* If we already used more reliable loop exit predictors, do not
1622 bother with PRED_LOOP_EXIT. */
1623 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1624 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1625 {
1626 /* For loop with many exits we don't want to predict all exits
1627 with the pretty large probability, because if all exits are
1628 considered in row, the loop would be predicted to iterate
1629 almost never. The code to divide probability by number of
1630 exits is very rough. It should compute the number of exits
1631 taken in each patch through function (not the overall number
1632 of exits that might be a lot higher for loops with wide switch
1633 statements in them) and compute n-th square root.
1634
1635 We limit the minimal probability by 2% to avoid
1636 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1637 as this was causing regression in perl benchmark containing such
1638 a wide loop. */
1639
1640 int probability = ((REG_BR_PROB_BASE
1641 - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1642 / n_exits);
1643 if (probability < HITRATE (2))
1644 probability = HITRATE (2);
1645 FOR_EACH_EDGE (e, ei, bb->succs)
1646 if (e->dest->index < NUM_FIXED_BLOCKS
1647 || !flow_bb_inside_loop_p (loop, e->dest))
1648 predict_edge (e, PRED_LOOP_EXIT, probability);
1649 }
1650 if (loop_bound_var)
1651 predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1652 loop_bound_code,
1653 tree_low_cst (loop_bound_step, 0));
1654 }
1655
1656 /* Free basic blocks from get_loop_body. */
1657 free (bbs);
1658 }
1659 }
1660
1661 /* Attempt to predict probabilities of BB outgoing edges using local
1662 properties. */
1663 static void
1664 bb_estimate_probability_locally (basic_block bb)
1665 {
1666 rtx last_insn = BB_END (bb);
1667 rtx cond;
1668
1669 if (! can_predict_insn_p (last_insn))
1670 return;
1671 cond = get_condition (last_insn, NULL, false, false);
1672 if (! cond)
1673 return;
1674
1675 /* Try "pointer heuristic."
1676 A comparison ptr == 0 is predicted as false.
1677 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1678 if (COMPARISON_P (cond)
1679 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1680 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1681 {
1682 if (GET_CODE (cond) == EQ)
1683 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1684 else if (GET_CODE (cond) == NE)
1685 predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1686 }
1687 else
1688
1689 /* Try "opcode heuristic."
1690 EQ tests are usually false and NE tests are usually true. Also,
1691 most quantities are positive, so we can make the appropriate guesses
1692 about signed comparisons against zero. */
1693 switch (GET_CODE (cond))
1694 {
1695 case CONST_INT:
1696 /* Unconditional branch. */
1697 predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1698 cond == const0_rtx ? NOT_TAKEN : TAKEN);
1699 break;
1700
1701 case EQ:
1702 case UNEQ:
1703 /* Floating point comparisons appears to behave in a very
1704 unpredictable way because of special role of = tests in
1705 FP code. */
1706 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1707 ;
1708 /* Comparisons with 0 are often used for booleans and there is
1709 nothing useful to predict about them. */
1710 else if (XEXP (cond, 1) == const0_rtx
1711 || XEXP (cond, 0) == const0_rtx)
1712 ;
1713 else
1714 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1715 break;
1716
1717 case NE:
1718 case LTGT:
1719 /* Floating point comparisons appears to behave in a very
1720 unpredictable way because of special role of = tests in
1721 FP code. */
1722 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1723 ;
1724 /* Comparisons with 0 are often used for booleans and there is
1725 nothing useful to predict about them. */
1726 else if (XEXP (cond, 1) == const0_rtx
1727 || XEXP (cond, 0) == const0_rtx)
1728 ;
1729 else
1730 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1731 break;
1732
1733 case ORDERED:
1734 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1735 break;
1736
1737 case UNORDERED:
1738 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1739 break;
1740
1741 case LE:
1742 case LT:
1743 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1744 || XEXP (cond, 1) == constm1_rtx)
1745 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1746 break;
1747
1748 case GE:
1749 case GT:
1750 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1751 || XEXP (cond, 1) == constm1_rtx)
1752 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1753 break;
1754
1755 default:
1756 break;
1757 }
1758 }
1759
1760 /* Set edge->probability for each successor edge of BB. */
1761 void
1762 guess_outgoing_edge_probabilities (basic_block bb)
1763 {
1764 bb_estimate_probability_locally (bb);
1765 combine_predictions_for_insn (BB_END (bb), bb);
1766 }
1767 \f
1768 static tree expr_expected_value (tree, bitmap);
1769
1770 /* Helper function for expr_expected_value. */
1771
1772 static tree
1773 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1774 tree op1, bitmap visited)
1775 {
1776 gimple def;
1777
1778 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1779 {
1780 if (TREE_CONSTANT (op0))
1781 return op0;
1782
1783 if (code != SSA_NAME)
1784 return NULL_TREE;
1785
1786 def = SSA_NAME_DEF_STMT (op0);
1787
1788 /* If we were already here, break the infinite cycle. */
1789 if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1790 return NULL;
1791
1792 if (gimple_code (def) == GIMPLE_PHI)
1793 {
1794 /* All the arguments of the PHI node must have the same constant
1795 length. */
1796 int i, n = gimple_phi_num_args (def);
1797 tree val = NULL, new_val;
1798
1799 for (i = 0; i < n; i++)
1800 {
1801 tree arg = PHI_ARG_DEF (def, i);
1802
1803 /* If this PHI has itself as an argument, we cannot
1804 determine the string length of this argument. However,
1805 if we can find an expected constant value for the other
1806 PHI args then we can still be sure that this is
1807 likely a constant. So be optimistic and just
1808 continue with the next argument. */
1809 if (arg == PHI_RESULT (def))
1810 continue;
1811
1812 new_val = expr_expected_value (arg, visited);
1813 if (!new_val)
1814 return NULL;
1815 if (!val)
1816 val = new_val;
1817 else if (!operand_equal_p (val, new_val, false))
1818 return NULL;
1819 }
1820 return val;
1821 }
1822 if (is_gimple_assign (def))
1823 {
1824 if (gimple_assign_lhs (def) != op0)
1825 return NULL;
1826
1827 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1828 gimple_assign_rhs1 (def),
1829 gimple_assign_rhs_code (def),
1830 gimple_assign_rhs2 (def),
1831 visited);
1832 }
1833
1834 if (is_gimple_call (def))
1835 {
1836 tree decl = gimple_call_fndecl (def);
1837 if (!decl)
1838 return NULL;
1839 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1840 switch (DECL_FUNCTION_CODE (decl))
1841 {
1842 case BUILT_IN_EXPECT:
1843 {
1844 tree val;
1845 if (gimple_call_num_args (def) != 2)
1846 return NULL;
1847 val = gimple_call_arg (def, 0);
1848 if (TREE_CONSTANT (val))
1849 return val;
1850 return gimple_call_arg (def, 1);
1851 }
1852
1853 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1854 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1855 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1856 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1857 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1858 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1859 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1860 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1861 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1862 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1863 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1864 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1865 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1866 /* Assume that any given atomic operation has low contention,
1867 and thus the compare-and-swap operation succeeds. */
1868 return boolean_true_node;
1869 }
1870 }
1871
1872 return NULL;
1873 }
1874
1875 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1876 {
1877 tree res;
1878 op0 = expr_expected_value (op0, visited);
1879 if (!op0)
1880 return NULL;
1881 op1 = expr_expected_value (op1, visited);
1882 if (!op1)
1883 return NULL;
1884 res = fold_build2 (code, type, op0, op1);
1885 if (TREE_CONSTANT (res))
1886 return res;
1887 return NULL;
1888 }
1889 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1890 {
1891 tree res;
1892 op0 = expr_expected_value (op0, visited);
1893 if (!op0)
1894 return NULL;
1895 res = fold_build1 (code, type, op0);
1896 if (TREE_CONSTANT (res))
1897 return res;
1898 return NULL;
1899 }
1900 return NULL;
1901 }
1902
1903 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1904 The function is used by builtin_expect branch predictor so the evidence
1905 must come from this construct and additional possible constant folding.
1906
1907 We may want to implement more involved value guess (such as value range
1908 propagation based prediction), but such tricks shall go to new
1909 implementation. */
1910
1911 static tree
1912 expr_expected_value (tree expr, bitmap visited)
1913 {
1914 enum tree_code code;
1915 tree op0, op1;
1916
1917 if (TREE_CONSTANT (expr))
1918 return expr;
1919
1920 extract_ops_from_tree (expr, &code, &op0, &op1);
1921 return expr_expected_value_1 (TREE_TYPE (expr),
1922 op0, code, op1, visited);
1923 }
1924
1925 \f
1926 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
1927 we no longer need. */
1928 static unsigned int
1929 strip_predict_hints (void)
1930 {
1931 basic_block bb;
1932 gimple ass_stmt;
1933 tree var;
1934
1935 FOR_EACH_BB (bb)
1936 {
1937 gimple_stmt_iterator bi;
1938 for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
1939 {
1940 gimple stmt = gsi_stmt (bi);
1941
1942 if (gimple_code (stmt) == GIMPLE_PREDICT)
1943 {
1944 gsi_remove (&bi, true);
1945 continue;
1946 }
1947 else if (gimple_code (stmt) == GIMPLE_CALL)
1948 {
1949 tree fndecl = gimple_call_fndecl (stmt);
1950
1951 if (fndecl
1952 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
1953 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
1954 && gimple_call_num_args (stmt) == 2)
1955 {
1956 var = gimple_call_lhs (stmt);
1957 if (var)
1958 {
1959 ass_stmt
1960 = gimple_build_assign (var, gimple_call_arg (stmt, 0));
1961 gsi_replace (&bi, ass_stmt, true);
1962 }
1963 else
1964 {
1965 gsi_remove (&bi, true);
1966 continue;
1967 }
1968 }
1969 }
1970 gsi_next (&bi);
1971 }
1972 }
1973 return 0;
1974 }
1975 \f
1976 /* Predict using opcode of the last statement in basic block. */
1977 static void
1978 tree_predict_by_opcode (basic_block bb)
1979 {
1980 gimple stmt = last_stmt (bb);
1981 edge then_edge;
1982 tree op0, op1;
1983 tree type;
1984 tree val;
1985 enum tree_code cmp;
1986 bitmap visited;
1987 edge_iterator ei;
1988
1989 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1990 return;
1991 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1992 if (then_edge->flags & EDGE_TRUE_VALUE)
1993 break;
1994 op0 = gimple_cond_lhs (stmt);
1995 op1 = gimple_cond_rhs (stmt);
1996 cmp = gimple_cond_code (stmt);
1997 type = TREE_TYPE (op0);
1998 visited = BITMAP_ALLOC (NULL);
1999 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited);
2000 BITMAP_FREE (visited);
2001 if (val)
2002 {
2003 if (integer_zerop (val))
2004 predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, NOT_TAKEN);
2005 else
2006 predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, TAKEN);
2007 return;
2008 }
2009 /* Try "pointer heuristic."
2010 A comparison ptr == 0 is predicted as false.
2011 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
2012 if (POINTER_TYPE_P (type))
2013 {
2014 if (cmp == EQ_EXPR)
2015 predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2016 else if (cmp == NE_EXPR)
2017 predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2018 }
2019 else
2020
2021 /* Try "opcode heuristic."
2022 EQ tests are usually false and NE tests are usually true. Also,
2023 most quantities are positive, so we can make the appropriate guesses
2024 about signed comparisons against zero. */
2025 switch (cmp)
2026 {
2027 case EQ_EXPR:
2028 case UNEQ_EXPR:
2029 /* Floating point comparisons appears to behave in a very
2030 unpredictable way because of special role of = tests in
2031 FP code. */
2032 if (FLOAT_TYPE_P (type))
2033 ;
2034 /* Comparisons with 0 are often used for booleans and there is
2035 nothing useful to predict about them. */
2036 else if (integer_zerop (op0) || integer_zerop (op1))
2037 ;
2038 else
2039 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2040 break;
2041
2042 case NE_EXPR:
2043 case LTGT_EXPR:
2044 /* Floating point comparisons appears to behave in a very
2045 unpredictable way because of special role of = tests in
2046 FP code. */
2047 if (FLOAT_TYPE_P (type))
2048 ;
2049 /* Comparisons with 0 are often used for booleans and there is
2050 nothing useful to predict about them. */
2051 else if (integer_zerop (op0)
2052 || integer_zerop (op1))
2053 ;
2054 else
2055 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2056 break;
2057
2058 case ORDERED_EXPR:
2059 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2060 break;
2061
2062 case UNORDERED_EXPR:
2063 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2064 break;
2065
2066 case LE_EXPR:
2067 case LT_EXPR:
2068 if (integer_zerop (op1)
2069 || integer_onep (op1)
2070 || integer_all_onesp (op1)
2071 || real_zerop (op1)
2072 || real_onep (op1)
2073 || real_minus_onep (op1))
2074 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2075 break;
2076
2077 case GE_EXPR:
2078 case GT_EXPR:
2079 if (integer_zerop (op1)
2080 || integer_onep (op1)
2081 || integer_all_onesp (op1)
2082 || real_zerop (op1)
2083 || real_onep (op1)
2084 || real_minus_onep (op1))
2085 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2086 break;
2087
2088 default:
2089 break;
2090 }
2091 }
2092
2093 /* Try to guess whether the value of return means error code. */
2094
2095 static enum br_predictor
2096 return_prediction (tree val, enum prediction *prediction)
2097 {
2098 /* VOID. */
2099 if (!val)
2100 return PRED_NO_PREDICTION;
2101 /* Different heuristics for pointers and scalars. */
2102 if (POINTER_TYPE_P (TREE_TYPE (val)))
2103 {
2104 /* NULL is usually not returned. */
2105 if (integer_zerop (val))
2106 {
2107 *prediction = NOT_TAKEN;
2108 return PRED_NULL_RETURN;
2109 }
2110 }
2111 else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2112 {
2113 /* Negative return values are often used to indicate
2114 errors. */
2115 if (TREE_CODE (val) == INTEGER_CST
2116 && tree_int_cst_sgn (val) < 0)
2117 {
2118 *prediction = NOT_TAKEN;
2119 return PRED_NEGATIVE_RETURN;
2120 }
2121 /* Constant return values seems to be commonly taken.
2122 Zero/one often represent booleans so exclude them from the
2123 heuristics. */
2124 if (TREE_CONSTANT (val)
2125 && (!integer_zerop (val) && !integer_onep (val)))
2126 {
2127 *prediction = TAKEN;
2128 return PRED_CONST_RETURN;
2129 }
2130 }
2131 return PRED_NO_PREDICTION;
2132 }
2133
2134 /* Find the basic block with return expression and look up for possible
2135 return value trying to apply RETURN_PREDICTION heuristics. */
2136 static void
2137 apply_return_prediction (void)
2138 {
2139 gimple return_stmt = NULL;
2140 tree return_val;
2141 edge e;
2142 gimple phi;
2143 int phi_num_args, i;
2144 enum br_predictor pred;
2145 enum prediction direction;
2146 edge_iterator ei;
2147
2148 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2149 {
2150 return_stmt = last_stmt (e->src);
2151 if (return_stmt
2152 && gimple_code (return_stmt) == GIMPLE_RETURN)
2153 break;
2154 }
2155 if (!e)
2156 return;
2157 return_val = gimple_return_retval (return_stmt);
2158 if (!return_val)
2159 return;
2160 if (TREE_CODE (return_val) != SSA_NAME
2161 || !SSA_NAME_DEF_STMT (return_val)
2162 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2163 return;
2164 phi = SSA_NAME_DEF_STMT (return_val);
2165 phi_num_args = gimple_phi_num_args (phi);
2166 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2167
2168 /* Avoid the degenerate case where all return values form the function
2169 belongs to same category (ie they are all positive constants)
2170 so we can hardly say something about them. */
2171 for (i = 1; i < phi_num_args; i++)
2172 if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2173 break;
2174 if (i != phi_num_args)
2175 for (i = 0; i < phi_num_args; i++)
2176 {
2177 pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2178 if (pred != PRED_NO_PREDICTION)
2179 predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2180 direction);
2181 }
2182 }
2183
2184 /* Look for basic block that contains unlikely to happen events
2185 (such as noreturn calls) and mark all paths leading to execution
2186 of this basic blocks as unlikely. */
2187
2188 static void
2189 tree_bb_level_predictions (void)
2190 {
2191 basic_block bb;
2192 bool has_return_edges = false;
2193 edge e;
2194 edge_iterator ei;
2195
2196 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2197 if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2198 {
2199 has_return_edges = true;
2200 break;
2201 }
2202
2203 apply_return_prediction ();
2204
2205 FOR_EACH_BB (bb)
2206 {
2207 gimple_stmt_iterator gsi;
2208
2209 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2210 {
2211 gimple stmt = gsi_stmt (gsi);
2212 tree decl;
2213
2214 if (is_gimple_call (stmt))
2215 {
2216 if ((gimple_call_flags (stmt) & ECF_NORETURN)
2217 && has_return_edges)
2218 predict_paths_leading_to (bb, PRED_NORETURN,
2219 NOT_TAKEN);
2220 decl = gimple_call_fndecl (stmt);
2221 if (decl
2222 && lookup_attribute ("cold",
2223 DECL_ATTRIBUTES (decl)))
2224 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2225 NOT_TAKEN);
2226 }
2227 else if (gimple_code (stmt) == GIMPLE_PREDICT)
2228 {
2229 predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2230 gimple_predict_outcome (stmt));
2231 /* Keep GIMPLE_PREDICT around so early inlining will propagate
2232 hints to callers. */
2233 }
2234 }
2235 }
2236 }
2237
2238 #ifdef ENABLE_CHECKING
2239
2240 /* Callback for pointer_map_traverse, asserts that the pointer map is
2241 empty. */
2242
2243 static bool
2244 assert_is_empty (const void *key ATTRIBUTE_UNUSED, void **value,
2245 void *data ATTRIBUTE_UNUSED)
2246 {
2247 gcc_assert (!*value);
2248 return false;
2249 }
2250 #endif
2251
2252 /* Predict branch probabilities and estimate profile for basic block BB. */
2253
2254 static void
2255 tree_estimate_probability_bb (basic_block bb)
2256 {
2257 edge e;
2258 edge_iterator ei;
2259 gimple last;
2260
2261 FOR_EACH_EDGE (e, ei, bb->succs)
2262 {
2263 /* Predict edges to user labels with attributes. */
2264 if (e->dest != EXIT_BLOCK_PTR)
2265 {
2266 gimple_stmt_iterator gi;
2267 for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2268 {
2269 gimple stmt = gsi_stmt (gi);
2270 tree decl;
2271
2272 if (gimple_code (stmt) != GIMPLE_LABEL)
2273 break;
2274 decl = gimple_label_label (stmt);
2275 if (DECL_ARTIFICIAL (decl))
2276 continue;
2277
2278 /* Finally, we have a user-defined label. */
2279 if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2280 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2281 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2282 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2283 }
2284 }
2285
2286 /* Predict early returns to be probable, as we've already taken
2287 care for error returns and other cases are often used for
2288 fast paths through function.
2289
2290 Since we've already removed the return statements, we are
2291 looking for CFG like:
2292
2293 if (conditional)
2294 {
2295 ..
2296 goto return_block
2297 }
2298 some other blocks
2299 return_block:
2300 return_stmt. */
2301 if (e->dest != bb->next_bb
2302 && e->dest != EXIT_BLOCK_PTR
2303 && single_succ_p (e->dest)
2304 && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR
2305 && (last = last_stmt (e->dest)) != NULL
2306 && gimple_code (last) == GIMPLE_RETURN)
2307 {
2308 edge e1;
2309 edge_iterator ei1;
2310
2311 if (single_succ_p (bb))
2312 {
2313 FOR_EACH_EDGE (e1, ei1, bb->preds)
2314 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2315 && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2316 && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2317 predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2318 }
2319 else
2320 if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2321 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2322 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2323 predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2324 }
2325
2326 /* Look for block we are guarding (ie we dominate it,
2327 but it doesn't postdominate us). */
2328 if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
2329 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2330 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2331 {
2332 gimple_stmt_iterator bi;
2333
2334 /* The call heuristic claims that a guarded function call
2335 is improbable. This is because such calls are often used
2336 to signal exceptional situations such as printing error
2337 messages. */
2338 for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2339 gsi_next (&bi))
2340 {
2341 gimple stmt = gsi_stmt (bi);
2342 if (is_gimple_call (stmt)
2343 /* Constant and pure calls are hardly used to signalize
2344 something exceptional. */
2345 && gimple_has_side_effects (stmt))
2346 {
2347 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2348 break;
2349 }
2350 }
2351 }
2352 }
2353 tree_predict_by_opcode (bb);
2354 }
2355
2356 /* Predict branch probabilities and estimate profile of the tree CFG.
2357 This function can be called from the loop optimizers to recompute
2358 the profile information. */
2359
2360 void
2361 tree_estimate_probability (void)
2362 {
2363 basic_block bb;
2364
2365 add_noreturn_fake_exit_edges ();
2366 connect_infinite_loops_to_exit ();
2367 /* We use loop_niter_by_eval, which requires that the loops have
2368 preheaders. */
2369 create_preheaders (CP_SIMPLE_PREHEADERS);
2370 calculate_dominance_info (CDI_POST_DOMINATORS);
2371
2372 bb_predictions = pointer_map_create ();
2373 tree_bb_level_predictions ();
2374 record_loop_exits ();
2375
2376 if (number_of_loops (cfun) > 1)
2377 predict_loops ();
2378
2379 FOR_EACH_BB (bb)
2380 tree_estimate_probability_bb (bb);
2381
2382 FOR_EACH_BB (bb)
2383 combine_predictions_for_bb (bb);
2384
2385 #ifdef ENABLE_CHECKING
2386 pointer_map_traverse (bb_predictions, assert_is_empty, NULL);
2387 #endif
2388 pointer_map_destroy (bb_predictions);
2389 bb_predictions = NULL;
2390
2391 estimate_bb_frequencies ();
2392 free_dominance_info (CDI_POST_DOMINATORS);
2393 remove_fake_exit_edges ();
2394 }
2395
2396 /* Predict branch probabilities and estimate profile of the tree CFG.
2397 This is the driver function for PASS_PROFILE. */
2398
2399 static unsigned int
2400 tree_estimate_probability_driver (void)
2401 {
2402 unsigned nb_loops;
2403
2404 loop_optimizer_init (LOOPS_NORMAL);
2405 if (dump_file && (dump_flags & TDF_DETAILS))
2406 flow_loops_dump (dump_file, NULL, 0);
2407
2408 mark_irreducible_loops ();
2409
2410 nb_loops = number_of_loops (cfun);
2411 if (nb_loops > 1)
2412 scev_initialize ();
2413
2414 tree_estimate_probability ();
2415
2416 if (nb_loops > 1)
2417 scev_finalize ();
2418
2419 loop_optimizer_finalize ();
2420 if (dump_file && (dump_flags & TDF_DETAILS))
2421 gimple_dump_cfg (dump_file, dump_flags);
2422 if (profile_status == PROFILE_ABSENT)
2423 profile_status = PROFILE_GUESSED;
2424 return 0;
2425 }
2426 \f
2427 /* Predict edges to successors of CUR whose sources are not postdominated by
2428 BB by PRED and recurse to all postdominators. */
2429
2430 static void
2431 predict_paths_for_bb (basic_block cur, basic_block bb,
2432 enum br_predictor pred,
2433 enum prediction taken,
2434 bitmap visited)
2435 {
2436 edge e;
2437 edge_iterator ei;
2438 basic_block son;
2439
2440 /* We are looking for all edges forming edge cut induced by
2441 set of all blocks postdominated by BB. */
2442 FOR_EACH_EDGE (e, ei, cur->preds)
2443 if (e->src->index >= NUM_FIXED_BLOCKS
2444 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2445 {
2446 edge e2;
2447 edge_iterator ei2;
2448 bool found = false;
2449
2450 /* Ignore fake edges and eh, we predict them as not taken anyway. */
2451 if (e->flags & (EDGE_EH | EDGE_FAKE))
2452 continue;
2453 gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2454
2455 /* See if there is an edge from e->src that is not abnormal
2456 and does not lead to BB. */
2457 FOR_EACH_EDGE (e2, ei2, e->src->succs)
2458 if (e2 != e
2459 && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2460 && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2461 {
2462 found = true;
2463 break;
2464 }
2465
2466 /* If there is non-abnormal path leaving e->src, predict edge
2467 using predictor. Otherwise we need to look for paths
2468 leading to e->src.
2469
2470 The second may lead to infinite loop in the case we are predicitng
2471 regions that are only reachable by abnormal edges. We simply
2472 prevent visiting given BB twice. */
2473 if (found)
2474 predict_edge_def (e, pred, taken);
2475 else if (bitmap_set_bit (visited, e->src->index))
2476 predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2477 }
2478 for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2479 son;
2480 son = next_dom_son (CDI_POST_DOMINATORS, son))
2481 predict_paths_for_bb (son, bb, pred, taken, visited);
2482 }
2483
2484 /* Sets branch probabilities according to PREDiction and
2485 FLAGS. */
2486
2487 static void
2488 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2489 enum prediction taken)
2490 {
2491 bitmap visited = BITMAP_ALLOC (NULL);
2492 predict_paths_for_bb (bb, bb, pred, taken, visited);
2493 BITMAP_FREE (visited);
2494 }
2495
2496 /* Like predict_paths_leading_to but take edge instead of basic block. */
2497
2498 static void
2499 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2500 enum prediction taken)
2501 {
2502 bool has_nonloop_edge = false;
2503 edge_iterator ei;
2504 edge e2;
2505
2506 basic_block bb = e->src;
2507 FOR_EACH_EDGE (e2, ei, bb->succs)
2508 if (e2->dest != e->src && e2->dest != e->dest
2509 && !(e->flags & (EDGE_EH | EDGE_FAKE))
2510 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2511 {
2512 has_nonloop_edge = true;
2513 break;
2514 }
2515 if (!has_nonloop_edge)
2516 {
2517 bitmap visited = BITMAP_ALLOC (NULL);
2518 predict_paths_for_bb (bb, bb, pred, taken, visited);
2519 BITMAP_FREE (visited);
2520 }
2521 else
2522 predict_edge_def (e, pred, taken);
2523 }
2524 \f
2525 /* This is used to carry information about basic blocks. It is
2526 attached to the AUX field of the standard CFG block. */
2527
2528 typedef struct block_info_def
2529 {
2530 /* Estimated frequency of execution of basic_block. */
2531 sreal frequency;
2532
2533 /* To keep queue of basic blocks to process. */
2534 basic_block next;
2535
2536 /* Number of predecessors we need to visit first. */
2537 int npredecessors;
2538 } *block_info;
2539
2540 /* Similar information for edges. */
2541 typedef struct edge_info_def
2542 {
2543 /* In case edge is a loopback edge, the probability edge will be reached
2544 in case header is. Estimated number of iterations of the loop can be
2545 then computed as 1 / (1 - back_edge_prob). */
2546 sreal back_edge_prob;
2547 /* True if the edge is a loopback edge in the natural loop. */
2548 unsigned int back_edge:1;
2549 } *edge_info;
2550
2551 #define BLOCK_INFO(B) ((block_info) (B)->aux)
2552 #define EDGE_INFO(E) ((edge_info) (E)->aux)
2553
2554 /* Helper function for estimate_bb_frequencies.
2555 Propagate the frequencies in blocks marked in
2556 TOVISIT, starting in HEAD. */
2557
2558 static void
2559 propagate_freq (basic_block head, bitmap tovisit)
2560 {
2561 basic_block bb;
2562 basic_block last;
2563 unsigned i;
2564 edge e;
2565 basic_block nextbb;
2566 bitmap_iterator bi;
2567
2568 /* For each basic block we need to visit count number of his predecessors
2569 we need to visit first. */
2570 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2571 {
2572 edge_iterator ei;
2573 int count = 0;
2574
2575 bb = BASIC_BLOCK (i);
2576
2577 FOR_EACH_EDGE (e, ei, bb->preds)
2578 {
2579 bool visit = bitmap_bit_p (tovisit, e->src->index);
2580
2581 if (visit && !(e->flags & EDGE_DFS_BACK))
2582 count++;
2583 else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2584 fprintf (dump_file,
2585 "Irreducible region hit, ignoring edge to %i->%i\n",
2586 e->src->index, bb->index);
2587 }
2588 BLOCK_INFO (bb)->npredecessors = count;
2589 /* When function never returns, we will never process exit block. */
2590 if (!count && bb == EXIT_BLOCK_PTR)
2591 bb->count = bb->frequency = 0;
2592 }
2593
2594 memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
2595 last = head;
2596 for (bb = head; bb; bb = nextbb)
2597 {
2598 edge_iterator ei;
2599 sreal cyclic_probability, frequency;
2600
2601 memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
2602 memcpy (&frequency, &real_zero, sizeof (real_zero));
2603
2604 nextbb = BLOCK_INFO (bb)->next;
2605 BLOCK_INFO (bb)->next = NULL;
2606
2607 /* Compute frequency of basic block. */
2608 if (bb != head)
2609 {
2610 #ifdef ENABLE_CHECKING
2611 FOR_EACH_EDGE (e, ei, bb->preds)
2612 gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2613 || (e->flags & EDGE_DFS_BACK));
2614 #endif
2615
2616 FOR_EACH_EDGE (e, ei, bb->preds)
2617 if (EDGE_INFO (e)->back_edge)
2618 {
2619 sreal_add (&cyclic_probability, &cyclic_probability,
2620 &EDGE_INFO (e)->back_edge_prob);
2621 }
2622 else if (!(e->flags & EDGE_DFS_BACK))
2623 {
2624 sreal tmp;
2625
2626 /* frequency += (e->probability
2627 * BLOCK_INFO (e->src)->frequency /
2628 REG_BR_PROB_BASE); */
2629
2630 sreal_init (&tmp, e->probability, 0);
2631 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
2632 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
2633 sreal_add (&frequency, &frequency, &tmp);
2634 }
2635
2636 if (sreal_compare (&cyclic_probability, &real_zero) == 0)
2637 {
2638 memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
2639 sizeof (frequency));
2640 }
2641 else
2642 {
2643 if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
2644 {
2645 memcpy (&cyclic_probability, &real_almost_one,
2646 sizeof (real_almost_one));
2647 }
2648
2649 /* BLOCK_INFO (bb)->frequency = frequency
2650 / (1 - cyclic_probability) */
2651
2652 sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
2653 sreal_div (&BLOCK_INFO (bb)->frequency,
2654 &frequency, &cyclic_probability);
2655 }
2656 }
2657
2658 bitmap_clear_bit (tovisit, bb->index);
2659
2660 e = find_edge (bb, head);
2661 if (e)
2662 {
2663 sreal tmp;
2664
2665 /* EDGE_INFO (e)->back_edge_prob
2666 = ((e->probability * BLOCK_INFO (bb)->frequency)
2667 / REG_BR_PROB_BASE); */
2668
2669 sreal_init (&tmp, e->probability, 0);
2670 sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
2671 sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2672 &tmp, &real_inv_br_prob_base);
2673 }
2674
2675 /* Propagate to successor blocks. */
2676 FOR_EACH_EDGE (e, ei, bb->succs)
2677 if (!(e->flags & EDGE_DFS_BACK)
2678 && BLOCK_INFO (e->dest)->npredecessors)
2679 {
2680 BLOCK_INFO (e->dest)->npredecessors--;
2681 if (!BLOCK_INFO (e->dest)->npredecessors)
2682 {
2683 if (!nextbb)
2684 nextbb = e->dest;
2685 else
2686 BLOCK_INFO (last)->next = e->dest;
2687
2688 last = e->dest;
2689 }
2690 }
2691 }
2692 }
2693
2694 /* Estimate probabilities of loopback edges in loops at same nest level. */
2695
2696 static void
2697 estimate_loops_at_level (struct loop *first_loop)
2698 {
2699 struct loop *loop;
2700
2701 for (loop = first_loop; loop; loop = loop->next)
2702 {
2703 edge e;
2704 basic_block *bbs;
2705 unsigned i;
2706 bitmap tovisit = BITMAP_ALLOC (NULL);
2707
2708 estimate_loops_at_level (loop->inner);
2709
2710 /* Find current loop back edge and mark it. */
2711 e = loop_latch_edge (loop);
2712 EDGE_INFO (e)->back_edge = 1;
2713
2714 bbs = get_loop_body (loop);
2715 for (i = 0; i < loop->num_nodes; i++)
2716 bitmap_set_bit (tovisit, bbs[i]->index);
2717 free (bbs);
2718 propagate_freq (loop->header, tovisit);
2719 BITMAP_FREE (tovisit);
2720 }
2721 }
2722
2723 /* Propagates frequencies through structure of loops. */
2724
2725 static void
2726 estimate_loops (void)
2727 {
2728 bitmap tovisit = BITMAP_ALLOC (NULL);
2729 basic_block bb;
2730
2731 /* Start by estimating the frequencies in the loops. */
2732 if (number_of_loops (cfun) > 1)
2733 estimate_loops_at_level (current_loops->tree_root->inner);
2734
2735 /* Now propagate the frequencies through all the blocks. */
2736 FOR_ALL_BB (bb)
2737 {
2738 bitmap_set_bit (tovisit, bb->index);
2739 }
2740 propagate_freq (ENTRY_BLOCK_PTR, tovisit);
2741 BITMAP_FREE (tovisit);
2742 }
2743
2744 /* Convert counts measured by profile driven feedback to frequencies.
2745 Return nonzero iff there was any nonzero execution count. */
2746
2747 int
2748 counts_to_freqs (void)
2749 {
2750 gcov_type count_max, true_count_max = 0;
2751 basic_block bb;
2752
2753 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2754 true_count_max = MAX (bb->count, true_count_max);
2755
2756 count_max = MAX (true_count_max, 1);
2757 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2758 bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2759
2760 return true_count_max;
2761 }
2762
2763 /* Return true if function is likely to be expensive, so there is no point to
2764 optimize performance of prologue, epilogue or do inlining at the expense
2765 of code size growth. THRESHOLD is the limit of number of instructions
2766 function can execute at average to be still considered not expensive. */
2767
2768 bool
2769 expensive_function_p (int threshold)
2770 {
2771 unsigned int sum = 0;
2772 basic_block bb;
2773 unsigned int limit;
2774
2775 /* We can not compute accurately for large thresholds due to scaled
2776 frequencies. */
2777 gcc_assert (threshold <= BB_FREQ_MAX);
2778
2779 /* Frequencies are out of range. This either means that function contains
2780 internal loop executing more than BB_FREQ_MAX times or profile feedback
2781 is available and function has not been executed at all. */
2782 if (ENTRY_BLOCK_PTR->frequency == 0)
2783 return true;
2784
2785 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
2786 limit = ENTRY_BLOCK_PTR->frequency * threshold;
2787 FOR_EACH_BB (bb)
2788 {
2789 rtx insn;
2790
2791 FOR_BB_INSNS (bb, insn)
2792 if (active_insn_p (insn))
2793 {
2794 sum += bb->frequency;
2795 if (sum > limit)
2796 return true;
2797 }
2798 }
2799
2800 return false;
2801 }
2802
2803 /* Estimate basic blocks frequency by given branch probabilities. */
2804
2805 void
2806 estimate_bb_frequencies (void)
2807 {
2808 basic_block bb;
2809 sreal freq_max;
2810
2811 if (profile_status != PROFILE_READ || !counts_to_freqs ())
2812 {
2813 static int real_values_initialized = 0;
2814
2815 if (!real_values_initialized)
2816 {
2817 real_values_initialized = 1;
2818 sreal_init (&real_zero, 0, 0);
2819 sreal_init (&real_one, 1, 0);
2820 sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
2821 sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
2822 sreal_init (&real_one_half, 1, -1);
2823 sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
2824 sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
2825 }
2826
2827 mark_dfs_back_edges ();
2828
2829 single_succ_edge (ENTRY_BLOCK_PTR)->probability = REG_BR_PROB_BASE;
2830
2831 /* Set up block info for each basic block. */
2832 alloc_aux_for_blocks (sizeof (struct block_info_def));
2833 alloc_aux_for_edges (sizeof (struct edge_info_def));
2834 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2835 {
2836 edge e;
2837 edge_iterator ei;
2838
2839 FOR_EACH_EDGE (e, ei, bb->succs)
2840 {
2841 sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
2842 sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2843 &EDGE_INFO (e)->back_edge_prob,
2844 &real_inv_br_prob_base);
2845 }
2846 }
2847
2848 /* First compute probabilities locally for each loop from innermost
2849 to outermost to examine probabilities for back edges. */
2850 estimate_loops ();
2851
2852 memcpy (&freq_max, &real_zero, sizeof (real_zero));
2853 FOR_EACH_BB (bb)
2854 if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
2855 memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
2856
2857 sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
2858 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2859 {
2860 sreal tmp;
2861
2862 sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
2863 sreal_add (&tmp, &tmp, &real_one_half);
2864 bb->frequency = sreal_to_int (&tmp);
2865 }
2866
2867 free_aux_for_blocks ();
2868 free_aux_for_edges ();
2869 }
2870 compute_function_frequency ();
2871 }
2872
2873 /* Decide whether function is hot, cold or unlikely executed. */
2874 void
2875 compute_function_frequency (void)
2876 {
2877 basic_block bb;
2878 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2879
2880 if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2881 || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2882 node->only_called_at_startup = true;
2883 if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2884 node->only_called_at_exit = true;
2885
2886 if (profile_status != PROFILE_READ)
2887 {
2888 int flags = flags_from_decl_or_type (current_function_decl);
2889 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2890 != NULL)
2891 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2892 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2893 != NULL)
2894 node->frequency = NODE_FREQUENCY_HOT;
2895 else if (flags & ECF_NORETURN)
2896 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2897 else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
2898 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2899 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2900 || DECL_STATIC_DESTRUCTOR (current_function_decl))
2901 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2902 return;
2903 }
2904
2905 /* Only first time try to drop function into unlikely executed.
2906 After inlining the roundoff errors may confuse us.
2907 Ipa-profile pass will drop functions only called from unlikely
2908 functions to unlikely and that is most of what we care about. */
2909 if (!cfun->after_inlining)
2910 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2911 FOR_EACH_BB (bb)
2912 {
2913 if (maybe_hot_bb_p (cfun, bb))
2914 {
2915 node->frequency = NODE_FREQUENCY_HOT;
2916 return;
2917 }
2918 if (!probably_never_executed_bb_p (cfun, bb))
2919 node->frequency = NODE_FREQUENCY_NORMAL;
2920 }
2921 }
2922
2923 static bool
2924 gate_estimate_probability (void)
2925 {
2926 return flag_guess_branch_prob;
2927 }
2928
2929 /* Build PREDICT_EXPR. */
2930 tree
2931 build_predict_expr (enum br_predictor predictor, enum prediction taken)
2932 {
2933 tree t = build1 (PREDICT_EXPR, void_type_node,
2934 build_int_cst (integer_type_node, predictor));
2935 SET_PREDICT_EXPR_OUTCOME (t, taken);
2936 return t;
2937 }
2938
2939 const char *
2940 predictor_name (enum br_predictor predictor)
2941 {
2942 return predictor_info[predictor].name;
2943 }
2944
2945 namespace {
2946
2947 const pass_data pass_data_profile =
2948 {
2949 GIMPLE_PASS, /* type */
2950 "profile_estimate", /* name */
2951 OPTGROUP_NONE, /* optinfo_flags */
2952 true, /* has_gate */
2953 true, /* has_execute */
2954 TV_BRANCH_PROB, /* tv_id */
2955 PROP_cfg, /* properties_required */
2956 0, /* properties_provided */
2957 0, /* properties_destroyed */
2958 0, /* todo_flags_start */
2959 TODO_verify_ssa, /* todo_flags_finish */
2960 };
2961
2962 class pass_profile : public gimple_opt_pass
2963 {
2964 public:
2965 pass_profile (gcc::context *ctxt)
2966 : gimple_opt_pass (pass_data_profile, ctxt)
2967 {}
2968
2969 /* opt_pass methods: */
2970 bool gate () { return gate_estimate_probability (); }
2971 unsigned int execute () { return tree_estimate_probability_driver (); }
2972
2973 }; // class pass_profile
2974
2975 } // anon namespace
2976
2977 gimple_opt_pass *
2978 make_pass_profile (gcc::context *ctxt)
2979 {
2980 return new pass_profile (ctxt);
2981 }
2982
2983 namespace {
2984
2985 const pass_data pass_data_strip_predict_hints =
2986 {
2987 GIMPLE_PASS, /* type */
2988 "*strip_predict_hints", /* name */
2989 OPTGROUP_NONE, /* optinfo_flags */
2990 false, /* has_gate */
2991 true, /* has_execute */
2992 TV_BRANCH_PROB, /* tv_id */
2993 PROP_cfg, /* properties_required */
2994 0, /* properties_provided */
2995 0, /* properties_destroyed */
2996 0, /* todo_flags_start */
2997 TODO_verify_ssa, /* todo_flags_finish */
2998 };
2999
3000 class pass_strip_predict_hints : public gimple_opt_pass
3001 {
3002 public:
3003 pass_strip_predict_hints (gcc::context *ctxt)
3004 : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3005 {}
3006
3007 /* opt_pass methods: */
3008 opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3009 unsigned int execute () { return strip_predict_hints (); }
3010
3011 }; // class pass_strip_predict_hints
3012
3013 } // anon namespace
3014
3015 gimple_opt_pass *
3016 make_pass_strip_predict_hints (gcc::context *ctxt)
3017 {
3018 return new pass_strip_predict_hints (ctxt);
3019 }
3020
3021 /* Rebuild function frequencies. Passes are in general expected to
3022 maintain profile by hand, however in some cases this is not possible:
3023 for example when inlining several functions with loops freuqencies might run
3024 out of scale and thus needs to be recomputed. */
3025
3026 void
3027 rebuild_frequencies (void)
3028 {
3029 timevar_push (TV_REBUILD_FREQUENCIES);
3030 if (profile_status == PROFILE_GUESSED)
3031 {
3032 loop_optimizer_init (0);
3033 add_noreturn_fake_exit_edges ();
3034 mark_irreducible_loops ();
3035 connect_infinite_loops_to_exit ();
3036 estimate_bb_frequencies ();
3037 remove_fake_exit_edges ();
3038 loop_optimizer_finalize ();
3039 }
3040 else if (profile_status == PROFILE_READ)
3041 counts_to_freqs ();
3042 else
3043 gcc_unreachable ();
3044 timevar_pop (TV_REBUILD_FREQUENCIES);
3045 }