predict.c (predict_loops): Remove PRED_LOOP_BRANCH.
[gcc.git] / gcc / predict.c
1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000-2016 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* References:
21
22 [1] "Branch Prediction for Free"
23 Ball and Larus; PLDI '93.
24 [2] "Static Branch Frequency and Program Profile Analysis"
25 Wu and Larus; MICRO-27.
26 [3] "Corpus-based Static Branch Prediction"
27 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
28
29
30 #include "config.h"
31 #include "system.h"
32 #include "coretypes.h"
33 #include "backend.h"
34 #include "rtl.h"
35 #include "tree.h"
36 #include "gimple.h"
37 #include "cfghooks.h"
38 #include "tree-pass.h"
39 #include "ssa.h"
40 #include "emit-rtl.h"
41 #include "cgraph.h"
42 #include "coverage.h"
43 #include "diagnostic-core.h"
44 #include "gimple-predict.h"
45 #include "fold-const.h"
46 #include "calls.h"
47 #include "cfganal.h"
48 #include "profile.h"
49 #include "sreal.h"
50 #include "params.h"
51 #include "cfgloop.h"
52 #include "gimple-iterator.h"
53 #include "tree-cfg.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "tree-ssa-loop.h"
56 #include "tree-scalar-evolution.h"
57
58 /* Enum with reasons why a predictor is ignored. */
59
60 enum predictor_reason
61 {
62 NONE,
63 IGNORED,
64 SINGLE_EDGE_DUPLICATE,
65 EDGE_PAIR_DUPLICATE
66 };
67
68 /* String messages for the aforementioned enum. */
69
70 static const char *reason_messages[] = {"", " (ignored)",
71 " (single edge duplicate)", " (edge pair duplicate)"};
72
73 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
74 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
75 static sreal real_almost_one, real_br_prob_base,
76 real_inv_br_prob_base, real_one_half, real_bb_freq_max;
77
78 static void combine_predictions_for_insn (rtx_insn *, basic_block);
79 static void dump_prediction (FILE *, enum br_predictor, int, basic_block,
80 enum predictor_reason, edge);
81 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
82 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
83 static bool can_predict_insn_p (const rtx_insn *);
84
85 /* Information we hold about each branch predictor.
86 Filled using information from predict.def. */
87
88 struct predictor_info
89 {
90 const char *const name; /* Name used in the debugging dumps. */
91 const int hitrate; /* Expected hitrate used by
92 predict_insn_def call. */
93 const int flags;
94 };
95
96 /* Use given predictor without Dempster-Shaffer theory if it matches
97 using first_match heuristics. */
98 #define PRED_FLAG_FIRST_MATCH 1
99
100 /* Recompute hitrate in percent to our representation. */
101
102 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
103
104 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
105 static const struct predictor_info predictor_info[]= {
106 #include "predict.def"
107
108 /* Upper bound on predictors. */
109 {NULL, 0, 0}
110 };
111 #undef DEF_PREDICTOR
112
113 /* Return TRUE if frequency FREQ is considered to be hot. */
114
115 static inline bool
116 maybe_hot_frequency_p (struct function *fun, int freq)
117 {
118 struct cgraph_node *node = cgraph_node::get (fun->decl);
119 if (!profile_info
120 || !opt_for_fn (fun->decl, flag_branch_probabilities))
121 {
122 if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
123 return false;
124 if (node->frequency == NODE_FREQUENCY_HOT)
125 return true;
126 }
127 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
128 return true;
129 if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
130 && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
131 return false;
132 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
133 return false;
134 if (freq * PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)
135 < ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency)
136 return false;
137 return true;
138 }
139
140 static gcov_type min_count = -1;
141
142 /* Determine the threshold for hot BB counts. */
143
144 gcov_type
145 get_hot_bb_threshold ()
146 {
147 gcov_working_set_t *ws;
148 if (min_count == -1)
149 {
150 ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
151 gcc_assert (ws);
152 min_count = ws->min_counter;
153 }
154 return min_count;
155 }
156
157 /* Set the threshold for hot BB counts. */
158
159 void
160 set_hot_bb_threshold (gcov_type min)
161 {
162 min_count = min;
163 }
164
165 /* Return TRUE if frequency FREQ is considered to be hot. */
166
167 bool
168 maybe_hot_count_p (struct function *fun, gcov_type count)
169 {
170 if (fun && profile_status_for_fn (fun) != PROFILE_READ)
171 return true;
172 /* Code executed at most once is not hot. */
173 if (profile_info->runs >= count)
174 return false;
175 return (count >= get_hot_bb_threshold ());
176 }
177
178 /* Return true in case BB can be CPU intensive and should be optimized
179 for maximal performance. */
180
181 bool
182 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
183 {
184 gcc_checking_assert (fun);
185 if (profile_status_for_fn (fun) == PROFILE_READ)
186 return maybe_hot_count_p (fun, bb->count);
187 return maybe_hot_frequency_p (fun, bb->frequency);
188 }
189
190 /* Return true in case BB can be CPU intensive and should be optimized
191 for maximal performance. */
192
193 bool
194 maybe_hot_edge_p (edge e)
195 {
196 if (profile_status_for_fn (cfun) == PROFILE_READ)
197 return maybe_hot_count_p (cfun, e->count);
198 return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
199 }
200
201 /* Return true if profile COUNT and FREQUENCY, or function FUN static
202 node frequency reflects never being executed. */
203
204 static bool
205 probably_never_executed (struct function *fun,
206 gcov_type count, int frequency)
207 {
208 gcc_checking_assert (fun);
209 if (profile_status_for_fn (fun) == PROFILE_READ)
210 {
211 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
212 if (count * unlikely_count_fraction >= profile_info->runs)
213 return false;
214 if (!frequency)
215 return true;
216 if (!ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency)
217 return false;
218 if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count)
219 {
220 gcov_type computed_count;
221 /* Check for possibility of overflow, in which case entry bb count
222 is large enough to do the division first without losing much
223 precision. */
224 if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count < REG_BR_PROB_BASE *
225 REG_BR_PROB_BASE)
226 {
227 gcov_type scaled_count
228 = frequency * ENTRY_BLOCK_PTR_FOR_FN (fun)->count *
229 unlikely_count_fraction;
230 computed_count = RDIV (scaled_count,
231 ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
232 }
233 else
234 {
235 computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (fun)->count,
236 ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
237 computed_count *= frequency * unlikely_count_fraction;
238 }
239 if (computed_count >= profile_info->runs)
240 return false;
241 }
242 return true;
243 }
244 if ((!profile_info || !(opt_for_fn (fun->decl, flag_branch_probabilities)))
245 && (cgraph_node::get (fun->decl)->frequency
246 == NODE_FREQUENCY_UNLIKELY_EXECUTED))
247 return true;
248 return false;
249 }
250
251
252 /* Return true in case BB is probably never executed. */
253
254 bool
255 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
256 {
257 return probably_never_executed (fun, bb->count, bb->frequency);
258 }
259
260
261 /* Return true in case edge E is probably never executed. */
262
263 bool
264 probably_never_executed_edge_p (struct function *fun, edge e)
265 {
266 return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
267 }
268
269 /* Return true when current function should always be optimized for size. */
270
271 bool
272 optimize_function_for_size_p (struct function *fun)
273 {
274 if (!fun || !fun->decl)
275 return optimize_size;
276 cgraph_node *n = cgraph_node::get (fun->decl);
277 return n && n->optimize_for_size_p ();
278 }
279
280 /* Return true when current function should always be optimized for speed. */
281
282 bool
283 optimize_function_for_speed_p (struct function *fun)
284 {
285 return !optimize_function_for_size_p (fun);
286 }
287
288 /* Return the optimization type that should be used for the function FUN. */
289
290 optimization_type
291 function_optimization_type (struct function *fun)
292 {
293 return (optimize_function_for_speed_p (fun)
294 ? OPTIMIZE_FOR_SPEED
295 : OPTIMIZE_FOR_SIZE);
296 }
297
298 /* Return TRUE when BB should be optimized for size. */
299
300 bool
301 optimize_bb_for_size_p (const_basic_block bb)
302 {
303 return (optimize_function_for_size_p (cfun)
304 || (bb && !maybe_hot_bb_p (cfun, bb)));
305 }
306
307 /* Return TRUE when BB should be optimized for speed. */
308
309 bool
310 optimize_bb_for_speed_p (const_basic_block bb)
311 {
312 return !optimize_bb_for_size_p (bb);
313 }
314
315 /* Return the optimization type that should be used for block BB. */
316
317 optimization_type
318 bb_optimization_type (const_basic_block bb)
319 {
320 return (optimize_bb_for_speed_p (bb)
321 ? OPTIMIZE_FOR_SPEED
322 : OPTIMIZE_FOR_SIZE);
323 }
324
325 /* Return TRUE when BB should be optimized for size. */
326
327 bool
328 optimize_edge_for_size_p (edge e)
329 {
330 return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
331 }
332
333 /* Return TRUE when BB should be optimized for speed. */
334
335 bool
336 optimize_edge_for_speed_p (edge e)
337 {
338 return !optimize_edge_for_size_p (e);
339 }
340
341 /* Return TRUE when BB should be optimized for size. */
342
343 bool
344 optimize_insn_for_size_p (void)
345 {
346 return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
347 }
348
349 /* Return TRUE when BB should be optimized for speed. */
350
351 bool
352 optimize_insn_for_speed_p (void)
353 {
354 return !optimize_insn_for_size_p ();
355 }
356
357 /* Return TRUE when LOOP should be optimized for size. */
358
359 bool
360 optimize_loop_for_size_p (struct loop *loop)
361 {
362 return optimize_bb_for_size_p (loop->header);
363 }
364
365 /* Return TRUE when LOOP should be optimized for speed. */
366
367 bool
368 optimize_loop_for_speed_p (struct loop *loop)
369 {
370 return optimize_bb_for_speed_p (loop->header);
371 }
372
373 /* Return TRUE when LOOP nest should be optimized for speed. */
374
375 bool
376 optimize_loop_nest_for_speed_p (struct loop *loop)
377 {
378 struct loop *l = loop;
379 if (optimize_loop_for_speed_p (loop))
380 return true;
381 l = loop->inner;
382 while (l && l != loop)
383 {
384 if (optimize_loop_for_speed_p (l))
385 return true;
386 if (l->inner)
387 l = l->inner;
388 else if (l->next)
389 l = l->next;
390 else
391 {
392 while (l != loop && !l->next)
393 l = loop_outer (l);
394 if (l != loop)
395 l = l->next;
396 }
397 }
398 return false;
399 }
400
401 /* Return TRUE when LOOP nest should be optimized for size. */
402
403 bool
404 optimize_loop_nest_for_size_p (struct loop *loop)
405 {
406 return !optimize_loop_nest_for_speed_p (loop);
407 }
408
409 /* Return true when edge E is likely to be well predictable by branch
410 predictor. */
411
412 bool
413 predictable_edge_p (edge e)
414 {
415 if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
416 return false;
417 if ((e->probability
418 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
419 || (REG_BR_PROB_BASE - e->probability
420 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
421 return true;
422 return false;
423 }
424
425
426 /* Set RTL expansion for BB profile. */
427
428 void
429 rtl_profile_for_bb (basic_block bb)
430 {
431 crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
432 }
433
434 /* Set RTL expansion for edge profile. */
435
436 void
437 rtl_profile_for_edge (edge e)
438 {
439 crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
440 }
441
442 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
443 void
444 default_rtl_profile (void)
445 {
446 crtl->maybe_hot_insn_p = true;
447 }
448
449 /* Return true if the one of outgoing edges is already predicted by
450 PREDICTOR. */
451
452 bool
453 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
454 {
455 rtx note;
456 if (!INSN_P (BB_END (bb)))
457 return false;
458 for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
459 if (REG_NOTE_KIND (note) == REG_BR_PRED
460 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
461 return true;
462 return false;
463 }
464
465 /* Structure representing predictions in tree level. */
466
467 struct edge_prediction {
468 struct edge_prediction *ep_next;
469 edge ep_edge;
470 enum br_predictor ep_predictor;
471 int ep_probability;
472 };
473
474 /* This map contains for a basic block the list of predictions for the
475 outgoing edges. */
476
477 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
478
479 /* Return true if the one of outgoing edges is already predicted by
480 PREDICTOR. */
481
482 bool
483 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
484 {
485 struct edge_prediction *i;
486 edge_prediction **preds = bb_predictions->get (bb);
487
488 if (!preds)
489 return false;
490
491 for (i = *preds; i; i = i->ep_next)
492 if (i->ep_predictor == predictor)
493 return true;
494 return false;
495 }
496
497 /* Return true if the one of outgoing edges is already predicted by
498 PREDICTOR for edge E predicted as TAKEN. */
499
500 bool
501 edge_predicted_by_p (edge e, enum br_predictor predictor, bool taken)
502 {
503 struct edge_prediction *i;
504 basic_block bb = e->src;
505 edge_prediction **preds = bb_predictions->get (bb);
506 if (!preds)
507 return false;
508
509 int probability = predictor_info[(int) predictor].hitrate;
510
511 if (taken != TAKEN)
512 probability = REG_BR_PROB_BASE - probability;
513
514 for (i = *preds; i; i = i->ep_next)
515 if (i->ep_predictor == predictor
516 && i->ep_edge == e
517 && i->ep_probability == probability)
518 return true;
519 return false;
520 }
521
522 /* Return true when the probability of edge is reliable.
523
524 The profile guessing code is good at predicting branch outcome (ie.
525 taken/not taken), that is predicted right slightly over 75% of time.
526 It is however notoriously poor on predicting the probability itself.
527 In general the profile appear a lot flatter (with probabilities closer
528 to 50%) than the reality so it is bad idea to use it to drive optimization
529 such as those disabling dynamic branch prediction for well predictable
530 branches.
531
532 There are two exceptions - edges leading to noreturn edges and edges
533 predicted by number of iterations heuristics are predicted well. This macro
534 should be able to distinguish those, but at the moment it simply check for
535 noreturn heuristic that is only one giving probability over 99% or bellow
536 1%. In future we might want to propagate reliability information across the
537 CFG if we find this information useful on multiple places. */
538 static bool
539 probability_reliable_p (int prob)
540 {
541 return (profile_status_for_fn (cfun) == PROFILE_READ
542 || (profile_status_for_fn (cfun) == PROFILE_GUESSED
543 && (prob <= HITRATE (1) || prob >= HITRATE (99))));
544 }
545
546 /* Same predicate as above, working on edges. */
547 bool
548 edge_probability_reliable_p (const_edge e)
549 {
550 return probability_reliable_p (e->probability);
551 }
552
553 /* Same predicate as edge_probability_reliable_p, working on notes. */
554 bool
555 br_prob_note_reliable_p (const_rtx note)
556 {
557 gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
558 return probability_reliable_p (XINT (note, 0));
559 }
560
561 static void
562 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
563 {
564 gcc_assert (any_condjump_p (insn));
565 if (!flag_guess_branch_prob)
566 return;
567
568 add_reg_note (insn, REG_BR_PRED,
569 gen_rtx_CONCAT (VOIDmode,
570 GEN_INT ((int) predictor),
571 GEN_INT ((int) probability)));
572 }
573
574 /* Predict insn by given predictor. */
575
576 void
577 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
578 enum prediction taken)
579 {
580 int probability = predictor_info[(int) predictor].hitrate;
581
582 if (taken != TAKEN)
583 probability = REG_BR_PROB_BASE - probability;
584
585 predict_insn (insn, predictor, probability);
586 }
587
588 /* Predict edge E with given probability if possible. */
589
590 void
591 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
592 {
593 rtx_insn *last_insn;
594 last_insn = BB_END (e->src);
595
596 /* We can store the branch prediction information only about
597 conditional jumps. */
598 if (!any_condjump_p (last_insn))
599 return;
600
601 /* We always store probability of branching. */
602 if (e->flags & EDGE_FALLTHRU)
603 probability = REG_BR_PROB_BASE - probability;
604
605 predict_insn (last_insn, predictor, probability);
606 }
607
608 /* Predict edge E with the given PROBABILITY. */
609 void
610 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
611 {
612 if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
613 && EDGE_COUNT (e->src->succs) > 1
614 && flag_guess_branch_prob
615 && optimize)
616 {
617 struct edge_prediction *i = XNEW (struct edge_prediction);
618 edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
619
620 i->ep_next = preds;
621 preds = i;
622 i->ep_probability = probability;
623 i->ep_predictor = predictor;
624 i->ep_edge = e;
625 }
626 }
627
628 /* Filter edge predictions PREDS by a function FILTER. DATA are passed
629 to the filter function. */
630
631 void
632 filter_predictions (edge_prediction **preds,
633 bool (*filter) (edge_prediction *, void *), void *data)
634 {
635 if (!bb_predictions)
636 return;
637
638 if (preds)
639 {
640 struct edge_prediction **prediction = preds;
641 struct edge_prediction *next;
642
643 while (*prediction)
644 {
645 if ((*filter) (*prediction, data))
646 prediction = &((*prediction)->ep_next);
647 else
648 {
649 next = (*prediction)->ep_next;
650 free (*prediction);
651 *prediction = next;
652 }
653 }
654 }
655 }
656
657 /* Filter function predicate that returns true for a edge predicate P
658 if its edge is equal to DATA. */
659
660 bool
661 equal_edge_p (edge_prediction *p, void *data)
662 {
663 return p->ep_edge == (edge)data;
664 }
665
666 /* Remove all predictions on given basic block that are attached
667 to edge E. */
668 void
669 remove_predictions_associated_with_edge (edge e)
670 {
671 if (!bb_predictions)
672 return;
673
674 edge_prediction **preds = bb_predictions->get (e->src);
675 filter_predictions (preds, equal_edge_p, e);
676 }
677
678 /* Clears the list of predictions stored for BB. */
679
680 static void
681 clear_bb_predictions (basic_block bb)
682 {
683 edge_prediction **preds = bb_predictions->get (bb);
684 struct edge_prediction *pred, *next;
685
686 if (!preds)
687 return;
688
689 for (pred = *preds; pred; pred = next)
690 {
691 next = pred->ep_next;
692 free (pred);
693 }
694 *preds = NULL;
695 }
696
697 /* Return true when we can store prediction on insn INSN.
698 At the moment we represent predictions only on conditional
699 jumps, not at computed jump or other complicated cases. */
700 static bool
701 can_predict_insn_p (const rtx_insn *insn)
702 {
703 return (JUMP_P (insn)
704 && any_condjump_p (insn)
705 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
706 }
707
708 /* Predict edge E by given predictor if possible. */
709
710 void
711 predict_edge_def (edge e, enum br_predictor predictor,
712 enum prediction taken)
713 {
714 int probability = predictor_info[(int) predictor].hitrate;
715
716 if (taken != TAKEN)
717 probability = REG_BR_PROB_BASE - probability;
718
719 predict_edge (e, predictor, probability);
720 }
721
722 /* Invert all branch predictions or probability notes in the INSN. This needs
723 to be done each time we invert the condition used by the jump. */
724
725 void
726 invert_br_probabilities (rtx insn)
727 {
728 rtx note;
729
730 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
731 if (REG_NOTE_KIND (note) == REG_BR_PROB)
732 XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
733 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
734 XEXP (XEXP (note, 0), 1)
735 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
736 }
737
738 /* Dump information about the branch prediction to the output file. */
739
740 static void
741 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
742 basic_block bb, enum predictor_reason reason = NONE,
743 edge ep_edge = NULL)
744 {
745 edge e = ep_edge;
746 edge_iterator ei;
747
748 if (!file)
749 return;
750
751 if (e == NULL)
752 FOR_EACH_EDGE (e, ei, bb->succs)
753 if (! (e->flags & EDGE_FALLTHRU))
754 break;
755
756 char edge_info_str[128];
757 if (ep_edge)
758 sprintf (edge_info_str, " of edge %d->%d", ep_edge->src->index,
759 ep_edge->dest->index);
760 else
761 edge_info_str[0] = '\0';
762
763 fprintf (file, " %s heuristics%s%s: %.1f%%",
764 predictor_info[predictor].name,
765 edge_info_str, reason_messages[reason],
766 probability * 100.0 / REG_BR_PROB_BASE);
767
768 if (bb->count)
769 {
770 fprintf (file, " exec %" PRId64, bb->count);
771 if (e)
772 {
773 fprintf (file, " hit %" PRId64, e->count);
774 fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
775 }
776 }
777
778 fprintf (file, "\n");
779 }
780
781 /* We can not predict the probabilities of outgoing edges of bb. Set them
782 evenly and hope for the best. */
783 static void
784 set_even_probabilities (basic_block bb)
785 {
786 int nedges = 0;
787 edge e;
788 edge_iterator ei;
789
790 FOR_EACH_EDGE (e, ei, bb->succs)
791 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
792 nedges ++;
793 FOR_EACH_EDGE (e, ei, bb->succs)
794 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
795 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
796 else
797 e->probability = 0;
798 }
799
800 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
801 note if not already present. Remove now useless REG_BR_PRED notes. */
802
803 static void
804 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
805 {
806 rtx prob_note;
807 rtx *pnote;
808 rtx note;
809 int best_probability = PROB_EVEN;
810 enum br_predictor best_predictor = END_PREDICTORS;
811 int combined_probability = REG_BR_PROB_BASE / 2;
812 int d;
813 bool first_match = false;
814 bool found = false;
815
816 if (!can_predict_insn_p (insn))
817 {
818 set_even_probabilities (bb);
819 return;
820 }
821
822 prob_note = find_reg_note (insn, REG_BR_PROB, 0);
823 pnote = &REG_NOTES (insn);
824 if (dump_file)
825 fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
826 bb->index);
827
828 /* We implement "first match" heuristics and use probability guessed
829 by predictor with smallest index. */
830 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
831 if (REG_NOTE_KIND (note) == REG_BR_PRED)
832 {
833 enum br_predictor predictor = ((enum br_predictor)
834 INTVAL (XEXP (XEXP (note, 0), 0)));
835 int probability = INTVAL (XEXP (XEXP (note, 0), 1));
836
837 found = true;
838 if (best_predictor > predictor)
839 best_probability = probability, best_predictor = predictor;
840
841 d = (combined_probability * probability
842 + (REG_BR_PROB_BASE - combined_probability)
843 * (REG_BR_PROB_BASE - probability));
844
845 /* Use FP math to avoid overflows of 32bit integers. */
846 if (d == 0)
847 /* If one probability is 0% and one 100%, avoid division by zero. */
848 combined_probability = REG_BR_PROB_BASE / 2;
849 else
850 combined_probability = (((double) combined_probability) * probability
851 * REG_BR_PROB_BASE / d + 0.5);
852 }
853
854 /* Decide which heuristic to use. In case we didn't match anything,
855 use no_prediction heuristic, in case we did match, use either
856 first match or Dempster-Shaffer theory depending on the flags. */
857
858 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
859 first_match = true;
860
861 if (!found)
862 dump_prediction (dump_file, PRED_NO_PREDICTION,
863 combined_probability, bb);
864 else
865 {
866 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
867 bb, !first_match ? NONE : IGNORED);
868 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
869 bb, first_match ? NONE: IGNORED);
870 }
871
872 if (first_match)
873 combined_probability = best_probability;
874 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb);
875
876 while (*pnote)
877 {
878 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
879 {
880 enum br_predictor predictor = ((enum br_predictor)
881 INTVAL (XEXP (XEXP (*pnote, 0), 0)));
882 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
883
884 dump_prediction (dump_file, predictor, probability, bb,
885 (!first_match || best_predictor == predictor)
886 ? NONE : IGNORED);
887 *pnote = XEXP (*pnote, 1);
888 }
889 else
890 pnote = &XEXP (*pnote, 1);
891 }
892
893 if (!prob_note)
894 {
895 add_int_reg_note (insn, REG_BR_PROB, combined_probability);
896
897 /* Save the prediction into CFG in case we are seeing non-degenerated
898 conditional jump. */
899 if (!single_succ_p (bb))
900 {
901 BRANCH_EDGE (bb)->probability = combined_probability;
902 FALLTHRU_EDGE (bb)->probability
903 = REG_BR_PROB_BASE - combined_probability;
904 }
905 }
906 else if (!single_succ_p (bb))
907 {
908 int prob = XINT (prob_note, 0);
909
910 BRANCH_EDGE (bb)->probability = prob;
911 FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
912 }
913 else
914 single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
915 }
916
917 /* Edge prediction hash traits. */
918
919 struct predictor_hash: pointer_hash <edge_prediction>
920 {
921
922 static inline hashval_t hash (const edge_prediction *);
923 static inline bool equal (const edge_prediction *, const edge_prediction *);
924 };
925
926 /* Calculate hash value of an edge prediction P based on predictor and
927 normalized probability. */
928
929 inline hashval_t
930 predictor_hash::hash (const edge_prediction *p)
931 {
932 inchash::hash hstate;
933 hstate.add_int (p->ep_predictor);
934
935 int prob = p->ep_probability;
936 if (prob > REG_BR_PROB_BASE / 2)
937 prob = REG_BR_PROB_BASE - prob;
938
939 hstate.add_int (prob);
940
941 return hstate.end ();
942 }
943
944 /* Return true whether edge predictions P1 and P2 use the same predictor and
945 have equal (or opposed probability). */
946
947 inline bool
948 predictor_hash::equal (const edge_prediction *p1, const edge_prediction *p2)
949 {
950 return (p1->ep_predictor == p2->ep_predictor
951 && (p1->ep_probability == p2->ep_probability
952 || p1->ep_probability == REG_BR_PROB_BASE - p2->ep_probability));
953 }
954
955 struct predictor_hash_traits: predictor_hash,
956 typed_noop_remove <edge_prediction *> {};
957
958 /* Return true if edge prediction P is not in DATA hash set. */
959
960 static bool
961 not_removed_prediction_p (edge_prediction *p, void *data)
962 {
963 hash_set<edge_prediction *> *remove = (hash_set<edge_prediction *> *) data;
964 return !remove->contains (p);
965 }
966
967 /* Prune predictions for a basic block BB. Currently we do following
968 clean-up steps:
969
970 1) remove duplicate prediction that is guessed with the same probability
971 (different than 1/2) to both edge
972 2) remove duplicates for a prediction that belongs with the same probability
973 to a single edge
974
975 */
976
977 static void
978 prune_predictions_for_bb (basic_block bb)
979 {
980 edge_prediction **preds = bb_predictions->get (bb);
981
982 if (preds)
983 {
984 hash_table <predictor_hash_traits> s (13);
985 hash_set <edge_prediction *> remove;
986
987 /* Step 1: identify predictors that should be removed. */
988 for (edge_prediction *pred = *preds; pred; pred = pred->ep_next)
989 {
990 edge_prediction *existing = s.find (pred);
991 if (existing)
992 {
993 if (pred->ep_edge == existing->ep_edge
994 && pred->ep_probability == existing->ep_probability)
995 {
996 /* Remove a duplicate predictor. */
997 dump_prediction (dump_file, pred->ep_predictor,
998 pred->ep_probability, bb,
999 SINGLE_EDGE_DUPLICATE, pred->ep_edge);
1000
1001 remove.add (pred);
1002 }
1003 else if (pred->ep_edge != existing->ep_edge
1004 && pred->ep_probability == existing->ep_probability
1005 && pred->ep_probability != REG_BR_PROB_BASE / 2)
1006 {
1007 /* Remove both predictors as they predict the same
1008 for both edges. */
1009 dump_prediction (dump_file, existing->ep_predictor,
1010 pred->ep_probability, bb,
1011 EDGE_PAIR_DUPLICATE,
1012 existing->ep_edge);
1013 dump_prediction (dump_file, pred->ep_predictor,
1014 pred->ep_probability, bb,
1015 EDGE_PAIR_DUPLICATE,
1016 pred->ep_edge);
1017
1018 remove.add (existing);
1019 remove.add (pred);
1020 }
1021 }
1022
1023 edge_prediction **slot2 = s.find_slot (pred, INSERT);
1024 *slot2 = pred;
1025 }
1026
1027 /* Step 2: Remove predictors. */
1028 filter_predictions (preds, not_removed_prediction_p, &remove);
1029 }
1030 }
1031
1032 /* Combine predictions into single probability and store them into CFG.
1033 Remove now useless prediction entries.
1034 If DRY_RUN is set, only produce dumps and do not modify profile. */
1035
1036 static void
1037 combine_predictions_for_bb (basic_block bb, bool dry_run)
1038 {
1039 int best_probability = PROB_EVEN;
1040 enum br_predictor best_predictor = END_PREDICTORS;
1041 int combined_probability = REG_BR_PROB_BASE / 2;
1042 int d;
1043 bool first_match = false;
1044 bool found = false;
1045 struct edge_prediction *pred;
1046 int nedges = 0;
1047 edge e, first = NULL, second = NULL;
1048 edge_iterator ei;
1049
1050 FOR_EACH_EDGE (e, ei, bb->succs)
1051 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
1052 {
1053 nedges ++;
1054 if (first && !second)
1055 second = e;
1056 if (!first)
1057 first = e;
1058 }
1059
1060 /* When there is no successor or only one choice, prediction is easy.
1061
1062 We are lazy for now and predict only basic blocks with two outgoing
1063 edges. It is possible to predict generic case too, but we have to
1064 ignore first match heuristics and do more involved combining. Implement
1065 this later. */
1066 if (nedges != 2)
1067 {
1068 if (!bb->count && !dry_run)
1069 set_even_probabilities (bb);
1070 clear_bb_predictions (bb);
1071 if (dump_file)
1072 fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
1073 nedges, bb->index);
1074 return;
1075 }
1076
1077 if (dump_file)
1078 fprintf (dump_file, "Predictions for bb %i\n", bb->index);
1079
1080 prune_predictions_for_bb (bb);
1081
1082 edge_prediction **preds = bb_predictions->get (bb);
1083
1084 if (preds)
1085 {
1086 /* We implement "first match" heuristics and use probability guessed
1087 by predictor with smallest index. */
1088 for (pred = *preds; pred; pred = pred->ep_next)
1089 {
1090 enum br_predictor predictor = pred->ep_predictor;
1091 int probability = pred->ep_probability;
1092
1093 if (pred->ep_edge != first)
1094 probability = REG_BR_PROB_BASE - probability;
1095
1096 found = true;
1097 /* First match heuristics would be widly confused if we predicted
1098 both directions. */
1099 if (best_predictor > predictor)
1100 {
1101 struct edge_prediction *pred2;
1102 int prob = probability;
1103
1104 for (pred2 = (struct edge_prediction *) *preds;
1105 pred2; pred2 = pred2->ep_next)
1106 if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
1107 {
1108 int probability2 = pred2->ep_probability;
1109
1110 if (pred2->ep_edge != first)
1111 probability2 = REG_BR_PROB_BASE - probability2;
1112
1113 if ((probability < REG_BR_PROB_BASE / 2) !=
1114 (probability2 < REG_BR_PROB_BASE / 2))
1115 break;
1116
1117 /* If the same predictor later gave better result, go for it! */
1118 if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
1119 || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
1120 prob = probability2;
1121 }
1122 if (!pred2)
1123 best_probability = prob, best_predictor = predictor;
1124 }
1125
1126 d = (combined_probability * probability
1127 + (REG_BR_PROB_BASE - combined_probability)
1128 * (REG_BR_PROB_BASE - probability));
1129
1130 /* Use FP math to avoid overflows of 32bit integers. */
1131 if (d == 0)
1132 /* If one probability is 0% and one 100%, avoid division by zero. */
1133 combined_probability = REG_BR_PROB_BASE / 2;
1134 else
1135 combined_probability = (((double) combined_probability)
1136 * probability
1137 * REG_BR_PROB_BASE / d + 0.5);
1138 }
1139 }
1140
1141 /* Decide which heuristic to use. In case we didn't match anything,
1142 use no_prediction heuristic, in case we did match, use either
1143 first match or Dempster-Shaffer theory depending on the flags. */
1144
1145 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
1146 first_match = true;
1147
1148 if (!found)
1149 dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb);
1150 else
1151 {
1152 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
1153 !first_match ? NONE : IGNORED);
1154 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
1155 first_match ? NONE : IGNORED);
1156 }
1157
1158 if (first_match)
1159 combined_probability = best_probability;
1160 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb);
1161
1162 if (preds)
1163 {
1164 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
1165 {
1166 enum br_predictor predictor = pred->ep_predictor;
1167 int probability = pred->ep_probability;
1168
1169 dump_prediction (dump_file, predictor, probability, bb,
1170 (!first_match || best_predictor == predictor)
1171 ? NONE : IGNORED, pred->ep_edge);
1172 }
1173 }
1174 clear_bb_predictions (bb);
1175
1176 if (!bb->count && !dry_run)
1177 {
1178 first->probability = combined_probability;
1179 second->probability = REG_BR_PROB_BASE - combined_probability;
1180 }
1181 }
1182
1183 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1184 Return the SSA_NAME if the condition satisfies, NULL otherwise.
1185
1186 T1 and T2 should be one of the following cases:
1187 1. T1 is SSA_NAME, T2 is NULL
1188 2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1189 3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4] */
1190
1191 static tree
1192 strips_small_constant (tree t1, tree t2)
1193 {
1194 tree ret = NULL;
1195 int value = 0;
1196
1197 if (!t1)
1198 return NULL;
1199 else if (TREE_CODE (t1) == SSA_NAME)
1200 ret = t1;
1201 else if (tree_fits_shwi_p (t1))
1202 value = tree_to_shwi (t1);
1203 else
1204 return NULL;
1205
1206 if (!t2)
1207 return ret;
1208 else if (tree_fits_shwi_p (t2))
1209 value = tree_to_shwi (t2);
1210 else if (TREE_CODE (t2) == SSA_NAME)
1211 {
1212 if (ret)
1213 return NULL;
1214 else
1215 ret = t2;
1216 }
1217
1218 if (value <= 4 && value >= -4)
1219 return ret;
1220 else
1221 return NULL;
1222 }
1223
1224 /* Return the SSA_NAME in T or T's operands.
1225 Return NULL if SSA_NAME cannot be found. */
1226
1227 static tree
1228 get_base_value (tree t)
1229 {
1230 if (TREE_CODE (t) == SSA_NAME)
1231 return t;
1232
1233 if (!BINARY_CLASS_P (t))
1234 return NULL;
1235
1236 switch (TREE_OPERAND_LENGTH (t))
1237 {
1238 case 1:
1239 return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1240 case 2:
1241 return strips_small_constant (TREE_OPERAND (t, 0),
1242 TREE_OPERAND (t, 1));
1243 default:
1244 return NULL;
1245 }
1246 }
1247
1248 /* Check the compare STMT in LOOP. If it compares an induction
1249 variable to a loop invariant, return true, and save
1250 LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1251 Otherwise return false and set LOOP_INVAIANT to NULL. */
1252
1253 static bool
1254 is_comparison_with_loop_invariant_p (gcond *stmt, struct loop *loop,
1255 tree *loop_invariant,
1256 enum tree_code *compare_code,
1257 tree *loop_step,
1258 tree *loop_iv_base)
1259 {
1260 tree op0, op1, bound, base;
1261 affine_iv iv0, iv1;
1262 enum tree_code code;
1263 tree step;
1264
1265 code = gimple_cond_code (stmt);
1266 *loop_invariant = NULL;
1267
1268 switch (code)
1269 {
1270 case GT_EXPR:
1271 case GE_EXPR:
1272 case NE_EXPR:
1273 case LT_EXPR:
1274 case LE_EXPR:
1275 case EQ_EXPR:
1276 break;
1277
1278 default:
1279 return false;
1280 }
1281
1282 op0 = gimple_cond_lhs (stmt);
1283 op1 = gimple_cond_rhs (stmt);
1284
1285 if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1286 || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1287 return false;
1288 if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1289 return false;
1290 if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1291 return false;
1292 if (TREE_CODE (iv0.step) != INTEGER_CST
1293 || TREE_CODE (iv1.step) != INTEGER_CST)
1294 return false;
1295 if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1296 || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1297 return false;
1298
1299 if (integer_zerop (iv0.step))
1300 {
1301 if (code != NE_EXPR && code != EQ_EXPR)
1302 code = invert_tree_comparison (code, false);
1303 bound = iv0.base;
1304 base = iv1.base;
1305 if (tree_fits_shwi_p (iv1.step))
1306 step = iv1.step;
1307 else
1308 return false;
1309 }
1310 else
1311 {
1312 bound = iv1.base;
1313 base = iv0.base;
1314 if (tree_fits_shwi_p (iv0.step))
1315 step = iv0.step;
1316 else
1317 return false;
1318 }
1319
1320 if (TREE_CODE (bound) != INTEGER_CST)
1321 bound = get_base_value (bound);
1322 if (!bound)
1323 return false;
1324 if (TREE_CODE (base) != INTEGER_CST)
1325 base = get_base_value (base);
1326 if (!base)
1327 return false;
1328
1329 *loop_invariant = bound;
1330 *compare_code = code;
1331 *loop_step = step;
1332 *loop_iv_base = base;
1333 return true;
1334 }
1335
1336 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent. */
1337
1338 static bool
1339 expr_coherent_p (tree t1, tree t2)
1340 {
1341 gimple *stmt;
1342 tree ssa_name_1 = NULL;
1343 tree ssa_name_2 = NULL;
1344
1345 gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1346 gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1347
1348 if (t1 == t2)
1349 return true;
1350
1351 if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1352 return true;
1353 if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1354 return false;
1355
1356 /* Check to see if t1 is expressed/defined with t2. */
1357 stmt = SSA_NAME_DEF_STMT (t1);
1358 gcc_assert (stmt != NULL);
1359 if (is_gimple_assign (stmt))
1360 {
1361 ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1362 if (ssa_name_1 && ssa_name_1 == t2)
1363 return true;
1364 }
1365
1366 /* Check to see if t2 is expressed/defined with t1. */
1367 stmt = SSA_NAME_DEF_STMT (t2);
1368 gcc_assert (stmt != NULL);
1369 if (is_gimple_assign (stmt))
1370 {
1371 ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1372 if (ssa_name_2 && ssa_name_2 == t1)
1373 return true;
1374 }
1375
1376 /* Compare if t1 and t2's def_stmts are identical. */
1377 if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1378 return true;
1379 else
1380 return false;
1381 }
1382
1383 /* Return true if E is predicted by one of loop heuristics. */
1384
1385 static bool
1386 predicted_by_loop_heuristics_p (basic_block bb)
1387 {
1388 struct edge_prediction *i;
1389 edge_prediction **preds = bb_predictions->get (bb);
1390
1391 if (!preds)
1392 return false;
1393
1394 for (i = *preds; i; i = i->ep_next)
1395 if (i->ep_predictor == PRED_LOOP_ITERATIONS_GUESSED
1396 || i->ep_predictor == PRED_LOOP_ITERATIONS_MAX
1397 || i->ep_predictor == PRED_LOOP_ITERATIONS
1398 || i->ep_predictor == PRED_LOOP_EXIT
1399 || i->ep_predictor == PRED_LOOP_EXTRA_EXIT)
1400 return true;
1401 return false;
1402 }
1403
1404 /* Predict branch probability of BB when BB contains a branch that compares
1405 an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1406 loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1407
1408 E.g.
1409 for (int i = 0; i < bound; i++) {
1410 if (i < bound - 2)
1411 computation_1();
1412 else
1413 computation_2();
1414 }
1415
1416 In this loop, we will predict the branch inside the loop to be taken. */
1417
1418 static void
1419 predict_iv_comparison (struct loop *loop, basic_block bb,
1420 tree loop_bound_var,
1421 tree loop_iv_base_var,
1422 enum tree_code loop_bound_code,
1423 int loop_bound_step)
1424 {
1425 gimple *stmt;
1426 tree compare_var, compare_base;
1427 enum tree_code compare_code;
1428 tree compare_step_var;
1429 edge then_edge;
1430 edge_iterator ei;
1431
1432 if (predicted_by_loop_heuristics_p (bb))
1433 return;
1434
1435 stmt = last_stmt (bb);
1436 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1437 return;
1438 if (!is_comparison_with_loop_invariant_p (as_a <gcond *> (stmt),
1439 loop, &compare_var,
1440 &compare_code,
1441 &compare_step_var,
1442 &compare_base))
1443 return;
1444
1445 /* Find the taken edge. */
1446 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1447 if (then_edge->flags & EDGE_TRUE_VALUE)
1448 break;
1449
1450 /* When comparing an IV to a loop invariant, NE is more likely to be
1451 taken while EQ is more likely to be not-taken. */
1452 if (compare_code == NE_EXPR)
1453 {
1454 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1455 return;
1456 }
1457 else if (compare_code == EQ_EXPR)
1458 {
1459 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1460 return;
1461 }
1462
1463 if (!expr_coherent_p (loop_iv_base_var, compare_base))
1464 return;
1465
1466 /* If loop bound, base and compare bound are all constants, we can
1467 calculate the probability directly. */
1468 if (tree_fits_shwi_p (loop_bound_var)
1469 && tree_fits_shwi_p (compare_var)
1470 && tree_fits_shwi_p (compare_base))
1471 {
1472 int probability;
1473 bool overflow, overall_overflow = false;
1474 widest_int compare_count, tem;
1475
1476 /* (loop_bound - base) / compare_step */
1477 tem = wi::sub (wi::to_widest (loop_bound_var),
1478 wi::to_widest (compare_base), SIGNED, &overflow);
1479 overall_overflow |= overflow;
1480 widest_int loop_count = wi::div_trunc (tem,
1481 wi::to_widest (compare_step_var),
1482 SIGNED, &overflow);
1483 overall_overflow |= overflow;
1484
1485 if (!wi::neg_p (wi::to_widest (compare_step_var))
1486 ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1487 {
1488 /* (loop_bound - compare_bound) / compare_step */
1489 tem = wi::sub (wi::to_widest (loop_bound_var),
1490 wi::to_widest (compare_var), SIGNED, &overflow);
1491 overall_overflow |= overflow;
1492 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1493 SIGNED, &overflow);
1494 overall_overflow |= overflow;
1495 }
1496 else
1497 {
1498 /* (compare_bound - base) / compare_step */
1499 tem = wi::sub (wi::to_widest (compare_var),
1500 wi::to_widest (compare_base), SIGNED, &overflow);
1501 overall_overflow |= overflow;
1502 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1503 SIGNED, &overflow);
1504 overall_overflow |= overflow;
1505 }
1506 if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1507 ++compare_count;
1508 if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1509 ++loop_count;
1510 if (wi::neg_p (compare_count))
1511 compare_count = 0;
1512 if (wi::neg_p (loop_count))
1513 loop_count = 0;
1514 if (loop_count == 0)
1515 probability = 0;
1516 else if (wi::cmps (compare_count, loop_count) == 1)
1517 probability = REG_BR_PROB_BASE;
1518 else
1519 {
1520 tem = compare_count * REG_BR_PROB_BASE;
1521 tem = wi::udiv_trunc (tem, loop_count);
1522 probability = tem.to_uhwi ();
1523 }
1524
1525 /* FIXME: The branch prediction seems broken. It has only 20% hitrate. */
1526 if (!overall_overflow)
1527 predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1528
1529 return;
1530 }
1531
1532 if (expr_coherent_p (loop_bound_var, compare_var))
1533 {
1534 if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1535 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1536 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1537 else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1538 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1539 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1540 else if (loop_bound_code == NE_EXPR)
1541 {
1542 /* If the loop backedge condition is "(i != bound)", we do
1543 the comparison based on the step of IV:
1544 * step < 0 : backedge condition is like (i > bound)
1545 * step > 0 : backedge condition is like (i < bound) */
1546 gcc_assert (loop_bound_step != 0);
1547 if (loop_bound_step > 0
1548 && (compare_code == LT_EXPR
1549 || compare_code == LE_EXPR))
1550 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1551 else if (loop_bound_step < 0
1552 && (compare_code == GT_EXPR
1553 || compare_code == GE_EXPR))
1554 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1555 else
1556 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1557 }
1558 else
1559 /* The branch is predicted not-taken if loop_bound_code is
1560 opposite with compare_code. */
1561 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1562 }
1563 else if (expr_coherent_p (loop_iv_base_var, compare_var))
1564 {
1565 /* For cases like:
1566 for (i = s; i < h; i++)
1567 if (i > s + 2) ....
1568 The branch should be predicted taken. */
1569 if (loop_bound_step > 0
1570 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1571 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1572 else if (loop_bound_step < 0
1573 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1574 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1575 else
1576 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1577 }
1578 }
1579
1580 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1581 exits are resulted from short-circuit conditions that will generate an
1582 if_tmp. E.g.:
1583
1584 if (foo() || global > 10)
1585 break;
1586
1587 This will be translated into:
1588
1589 BB3:
1590 loop header...
1591 BB4:
1592 if foo() goto BB6 else goto BB5
1593 BB5:
1594 if global > 10 goto BB6 else goto BB7
1595 BB6:
1596 goto BB7
1597 BB7:
1598 iftmp = (PHI 0(BB5), 1(BB6))
1599 if iftmp == 1 goto BB8 else goto BB3
1600 BB8:
1601 outside of the loop...
1602
1603 The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1604 From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1605 exits. This function takes BB7->BB8 as input, and finds out the extra loop
1606 exits to predict them using PRED_LOOP_EXTRA_EXIT. */
1607
1608 static void
1609 predict_extra_loop_exits (edge exit_edge)
1610 {
1611 unsigned i;
1612 bool check_value_one;
1613 gimple *lhs_def_stmt;
1614 gphi *phi_stmt;
1615 tree cmp_rhs, cmp_lhs;
1616 gimple *last;
1617 gcond *cmp_stmt;
1618
1619 last = last_stmt (exit_edge->src);
1620 if (!last)
1621 return;
1622 cmp_stmt = dyn_cast <gcond *> (last);
1623 if (!cmp_stmt)
1624 return;
1625
1626 cmp_rhs = gimple_cond_rhs (cmp_stmt);
1627 cmp_lhs = gimple_cond_lhs (cmp_stmt);
1628 if (!TREE_CONSTANT (cmp_rhs)
1629 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1630 return;
1631 if (TREE_CODE (cmp_lhs) != SSA_NAME)
1632 return;
1633
1634 /* If check_value_one is true, only the phi_args with value '1' will lead
1635 to loop exit. Otherwise, only the phi_args with value '0' will lead to
1636 loop exit. */
1637 check_value_one = (((integer_onep (cmp_rhs))
1638 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1639 ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1640
1641 lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1642 if (!lhs_def_stmt)
1643 return;
1644
1645 phi_stmt = dyn_cast <gphi *> (lhs_def_stmt);
1646 if (!phi_stmt)
1647 return;
1648
1649 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1650 {
1651 edge e1;
1652 edge_iterator ei;
1653 tree val = gimple_phi_arg_def (phi_stmt, i);
1654 edge e = gimple_phi_arg_edge (phi_stmt, i);
1655
1656 if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1657 continue;
1658 if ((check_value_one ^ integer_onep (val)) == 1)
1659 continue;
1660 if (EDGE_COUNT (e->src->succs) != 1)
1661 {
1662 predict_paths_leading_to_edge (e, PRED_LOOP_EXTRA_EXIT, NOT_TAKEN);
1663 continue;
1664 }
1665
1666 FOR_EACH_EDGE (e1, ei, e->src->preds)
1667 predict_paths_leading_to_edge (e1, PRED_LOOP_EXTRA_EXIT, NOT_TAKEN);
1668 }
1669 }
1670
1671
1672 /* Predict edge probabilities by exploiting loop structure. */
1673
1674 static void
1675 predict_loops (void)
1676 {
1677 struct loop *loop;
1678
1679 /* Try to predict out blocks in a loop that are not part of a
1680 natural loop. */
1681 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1682 {
1683 basic_block bb, *bbs;
1684 unsigned j, n_exits = 0;
1685 vec<edge> exits;
1686 struct tree_niter_desc niter_desc;
1687 edge ex;
1688 struct nb_iter_bound *nb_iter;
1689 enum tree_code loop_bound_code = ERROR_MARK;
1690 tree loop_bound_step = NULL;
1691 tree loop_bound_var = NULL;
1692 tree loop_iv_base = NULL;
1693 gcond *stmt = NULL;
1694
1695 exits = get_loop_exit_edges (loop);
1696 FOR_EACH_VEC_ELT (exits, j, ex)
1697 if (!(ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL | EDGE_FAKE)))
1698 n_exits ++;
1699 if (!n_exits)
1700 {
1701 exits.release ();
1702 continue;
1703 }
1704
1705 FOR_EACH_VEC_ELT (exits, j, ex)
1706 {
1707 tree niter = NULL;
1708 HOST_WIDE_INT nitercst;
1709 int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1710 int probability;
1711 enum br_predictor predictor;
1712 widest_int nit;
1713
1714 if (ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL | EDGE_FAKE))
1715 continue;
1716 /* Loop heuristics do not expect exit conditional to be inside
1717 inner loop. We predict from innermost to outermost loop. */
1718 if (predicted_by_loop_heuristics_p (ex->src))
1719 continue;
1720 predict_extra_loop_exits (ex);
1721
1722 if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1723 niter = niter_desc.niter;
1724 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1725 niter = loop_niter_by_eval (loop, ex);
1726
1727 if (TREE_CODE (niter) == INTEGER_CST)
1728 {
1729 if (tree_fits_uhwi_p (niter)
1730 && max
1731 && compare_tree_int (niter, max - 1) == -1)
1732 nitercst = tree_to_uhwi (niter) + 1;
1733 else
1734 nitercst = max;
1735 predictor = PRED_LOOP_ITERATIONS;
1736 }
1737 /* If we have just one exit and we can derive some information about
1738 the number of iterations of the loop from the statements inside
1739 the loop, use it to predict this exit. */
1740 else if (n_exits == 1
1741 && estimated_stmt_executions (loop, &nit))
1742 {
1743 if (wi::gtu_p (nit, max))
1744 nitercst = max;
1745 else
1746 nitercst = nit.to_shwi ();
1747 predictor = PRED_LOOP_ITERATIONS_GUESSED;
1748 }
1749 /* If we have likely upper bound, trust it for very small iteration
1750 counts. Such loops would otherwise get mispredicted by standard
1751 LOOP_EXIT heuristics. */
1752 else if (n_exits == 1
1753 && likely_max_stmt_executions (loop, &nit)
1754 && wi::ltu_p (nit,
1755 RDIV (REG_BR_PROB_BASE,
1756 REG_BR_PROB_BASE
1757 - predictor_info
1758 [PRED_LOOP_EXIT].hitrate)))
1759 {
1760 nitercst = nit.to_shwi ();
1761 predictor = PRED_LOOP_ITERATIONS_MAX;
1762 }
1763 else
1764 continue;
1765
1766 gcc_checking_assert (nitercst);
1767 probability = RDIV (REG_BR_PROB_BASE, nitercst);
1768 predict_edge (ex, predictor, probability);
1769 }
1770 exits.release ();
1771
1772 /* Find information about loop bound variables. */
1773 for (nb_iter = loop->bounds; nb_iter;
1774 nb_iter = nb_iter->next)
1775 if (nb_iter->stmt
1776 && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1777 {
1778 stmt = as_a <gcond *> (nb_iter->stmt);
1779 break;
1780 }
1781 if (!stmt && last_stmt (loop->header)
1782 && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1783 stmt = as_a <gcond *> (last_stmt (loop->header));
1784 if (stmt)
1785 is_comparison_with_loop_invariant_p (stmt, loop,
1786 &loop_bound_var,
1787 &loop_bound_code,
1788 &loop_bound_step,
1789 &loop_iv_base);
1790
1791 bbs = get_loop_body (loop);
1792
1793 for (j = 0; j < loop->num_nodes; j++)
1794 {
1795 int header_found = 0;
1796 edge e;
1797 edge_iterator ei;
1798
1799 bb = bbs[j];
1800
1801 /* Bypass loop heuristics on continue statement. These
1802 statements construct loops via "non-loop" constructs
1803 in the source language and are better to be handled
1804 separately. */
1805 if (predicted_by_p (bb, PRED_CONTINUE))
1806 continue;
1807
1808 /* Loop exit heuristics - predict an edge exiting the loop if the
1809 conditional has no loop header successors as not taken. */
1810 if (!header_found
1811 /* If we already used more reliable loop exit predictors, do not
1812 bother with PRED_LOOP_EXIT. */
1813 && !predicted_by_loop_heuristics_p (bb))
1814 {
1815 /* For loop with many exits we don't want to predict all exits
1816 with the pretty large probability, because if all exits are
1817 considered in row, the loop would be predicted to iterate
1818 almost never. The code to divide probability by number of
1819 exits is very rough. It should compute the number of exits
1820 taken in each patch through function (not the overall number
1821 of exits that might be a lot higher for loops with wide switch
1822 statements in them) and compute n-th square root.
1823
1824 We limit the minimal probability by 2% to avoid
1825 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1826 as this was causing regression in perl benchmark containing such
1827 a wide loop. */
1828
1829 int probability = ((REG_BR_PROB_BASE
1830 - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1831 / n_exits);
1832 if (probability < HITRATE (2))
1833 probability = HITRATE (2);
1834 FOR_EACH_EDGE (e, ei, bb->succs)
1835 if (e->dest->index < NUM_FIXED_BLOCKS
1836 || !flow_bb_inside_loop_p (loop, e->dest))
1837 predict_edge (e, PRED_LOOP_EXIT, probability);
1838 }
1839 if (loop_bound_var)
1840 predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1841 loop_bound_code,
1842 tree_to_shwi (loop_bound_step));
1843 }
1844
1845 /* Free basic blocks from get_loop_body. */
1846 free (bbs);
1847 }
1848 }
1849
1850 /* Attempt to predict probabilities of BB outgoing edges using local
1851 properties. */
1852 static void
1853 bb_estimate_probability_locally (basic_block bb)
1854 {
1855 rtx_insn *last_insn = BB_END (bb);
1856 rtx cond;
1857
1858 if (! can_predict_insn_p (last_insn))
1859 return;
1860 cond = get_condition (last_insn, NULL, false, false);
1861 if (! cond)
1862 return;
1863
1864 /* Try "pointer heuristic."
1865 A comparison ptr == 0 is predicted as false.
1866 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1867 if (COMPARISON_P (cond)
1868 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1869 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1870 {
1871 if (GET_CODE (cond) == EQ)
1872 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1873 else if (GET_CODE (cond) == NE)
1874 predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1875 }
1876 else
1877
1878 /* Try "opcode heuristic."
1879 EQ tests are usually false and NE tests are usually true. Also,
1880 most quantities are positive, so we can make the appropriate guesses
1881 about signed comparisons against zero. */
1882 switch (GET_CODE (cond))
1883 {
1884 case CONST_INT:
1885 /* Unconditional branch. */
1886 predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1887 cond == const0_rtx ? NOT_TAKEN : TAKEN);
1888 break;
1889
1890 case EQ:
1891 case UNEQ:
1892 /* Floating point comparisons appears to behave in a very
1893 unpredictable way because of special role of = tests in
1894 FP code. */
1895 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1896 ;
1897 /* Comparisons with 0 are often used for booleans and there is
1898 nothing useful to predict about them. */
1899 else if (XEXP (cond, 1) == const0_rtx
1900 || XEXP (cond, 0) == const0_rtx)
1901 ;
1902 else
1903 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1904 break;
1905
1906 case NE:
1907 case LTGT:
1908 /* Floating point comparisons appears to behave in a very
1909 unpredictable way because of special role of = tests in
1910 FP code. */
1911 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1912 ;
1913 /* Comparisons with 0 are often used for booleans and there is
1914 nothing useful to predict about them. */
1915 else if (XEXP (cond, 1) == const0_rtx
1916 || XEXP (cond, 0) == const0_rtx)
1917 ;
1918 else
1919 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1920 break;
1921
1922 case ORDERED:
1923 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1924 break;
1925
1926 case UNORDERED:
1927 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1928 break;
1929
1930 case LE:
1931 case LT:
1932 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1933 || XEXP (cond, 1) == constm1_rtx)
1934 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1935 break;
1936
1937 case GE:
1938 case GT:
1939 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1940 || XEXP (cond, 1) == constm1_rtx)
1941 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1942 break;
1943
1944 default:
1945 break;
1946 }
1947 }
1948
1949 /* Set edge->probability for each successor edge of BB. */
1950 void
1951 guess_outgoing_edge_probabilities (basic_block bb)
1952 {
1953 bb_estimate_probability_locally (bb);
1954 combine_predictions_for_insn (BB_END (bb), bb);
1955 }
1956 \f
1957 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1958
1959 /* Helper function for expr_expected_value. */
1960
1961 static tree
1962 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1963 tree op1, bitmap visited, enum br_predictor *predictor)
1964 {
1965 gimple *def;
1966
1967 if (predictor)
1968 *predictor = PRED_UNCONDITIONAL;
1969
1970 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1971 {
1972 if (TREE_CONSTANT (op0))
1973 return op0;
1974
1975 if (code != SSA_NAME)
1976 return NULL_TREE;
1977
1978 def = SSA_NAME_DEF_STMT (op0);
1979
1980 /* If we were already here, break the infinite cycle. */
1981 if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1982 return NULL;
1983
1984 if (gimple_code (def) == GIMPLE_PHI)
1985 {
1986 /* All the arguments of the PHI node must have the same constant
1987 length. */
1988 int i, n = gimple_phi_num_args (def);
1989 tree val = NULL, new_val;
1990
1991 for (i = 0; i < n; i++)
1992 {
1993 tree arg = PHI_ARG_DEF (def, i);
1994 enum br_predictor predictor2;
1995
1996 /* If this PHI has itself as an argument, we cannot
1997 determine the string length of this argument. However,
1998 if we can find an expected constant value for the other
1999 PHI args then we can still be sure that this is
2000 likely a constant. So be optimistic and just
2001 continue with the next argument. */
2002 if (arg == PHI_RESULT (def))
2003 continue;
2004
2005 new_val = expr_expected_value (arg, visited, &predictor2);
2006
2007 /* It is difficult to combine value predictors. Simply assume
2008 that later predictor is weaker and take its prediction. */
2009 if (predictor && *predictor < predictor2)
2010 *predictor = predictor2;
2011 if (!new_val)
2012 return NULL;
2013 if (!val)
2014 val = new_val;
2015 else if (!operand_equal_p (val, new_val, false))
2016 return NULL;
2017 }
2018 return val;
2019 }
2020 if (is_gimple_assign (def))
2021 {
2022 if (gimple_assign_lhs (def) != op0)
2023 return NULL;
2024
2025 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
2026 gimple_assign_rhs1 (def),
2027 gimple_assign_rhs_code (def),
2028 gimple_assign_rhs2 (def),
2029 visited, predictor);
2030 }
2031
2032 if (is_gimple_call (def))
2033 {
2034 tree decl = gimple_call_fndecl (def);
2035 if (!decl)
2036 {
2037 if (gimple_call_internal_p (def)
2038 && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
2039 {
2040 gcc_assert (gimple_call_num_args (def) == 3);
2041 tree val = gimple_call_arg (def, 0);
2042 if (TREE_CONSTANT (val))
2043 return val;
2044 if (predictor)
2045 {
2046 tree val2 = gimple_call_arg (def, 2);
2047 gcc_assert (TREE_CODE (val2) == INTEGER_CST
2048 && tree_fits_uhwi_p (val2)
2049 && tree_to_uhwi (val2) < END_PREDICTORS);
2050 *predictor = (enum br_predictor) tree_to_uhwi (val2);
2051 }
2052 return gimple_call_arg (def, 1);
2053 }
2054 return NULL;
2055 }
2056 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
2057 switch (DECL_FUNCTION_CODE (decl))
2058 {
2059 case BUILT_IN_EXPECT:
2060 {
2061 tree val;
2062 if (gimple_call_num_args (def) != 2)
2063 return NULL;
2064 val = gimple_call_arg (def, 0);
2065 if (TREE_CONSTANT (val))
2066 return val;
2067 if (predictor)
2068 *predictor = PRED_BUILTIN_EXPECT;
2069 return gimple_call_arg (def, 1);
2070 }
2071
2072 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
2073 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
2074 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
2075 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
2076 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
2077 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
2078 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
2079 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
2080 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
2081 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
2082 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
2083 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
2084 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
2085 /* Assume that any given atomic operation has low contention,
2086 and thus the compare-and-swap operation succeeds. */
2087 if (predictor)
2088 *predictor = PRED_COMPARE_AND_SWAP;
2089 return boolean_true_node;
2090 default:
2091 break;
2092 }
2093 }
2094
2095 return NULL;
2096 }
2097
2098 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
2099 {
2100 tree res;
2101 enum br_predictor predictor2;
2102 op0 = expr_expected_value (op0, visited, predictor);
2103 if (!op0)
2104 return NULL;
2105 op1 = expr_expected_value (op1, visited, &predictor2);
2106 if (predictor && *predictor < predictor2)
2107 *predictor = predictor2;
2108 if (!op1)
2109 return NULL;
2110 res = fold_build2 (code, type, op0, op1);
2111 if (TREE_CONSTANT (res))
2112 return res;
2113 return NULL;
2114 }
2115 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
2116 {
2117 tree res;
2118 op0 = expr_expected_value (op0, visited, predictor);
2119 if (!op0)
2120 return NULL;
2121 res = fold_build1 (code, type, op0);
2122 if (TREE_CONSTANT (res))
2123 return res;
2124 return NULL;
2125 }
2126 return NULL;
2127 }
2128
2129 /* Return constant EXPR will likely have at execution time, NULL if unknown.
2130 The function is used by builtin_expect branch predictor so the evidence
2131 must come from this construct and additional possible constant folding.
2132
2133 We may want to implement more involved value guess (such as value range
2134 propagation based prediction), but such tricks shall go to new
2135 implementation. */
2136
2137 static tree
2138 expr_expected_value (tree expr, bitmap visited,
2139 enum br_predictor *predictor)
2140 {
2141 enum tree_code code;
2142 tree op0, op1;
2143
2144 if (TREE_CONSTANT (expr))
2145 {
2146 if (predictor)
2147 *predictor = PRED_UNCONDITIONAL;
2148 return expr;
2149 }
2150
2151 extract_ops_from_tree (expr, &code, &op0, &op1);
2152 return expr_expected_value_1 (TREE_TYPE (expr),
2153 op0, code, op1, visited, predictor);
2154 }
2155 \f
2156 /* Predict using opcode of the last statement in basic block. */
2157 static void
2158 tree_predict_by_opcode (basic_block bb)
2159 {
2160 gimple *stmt = last_stmt (bb);
2161 edge then_edge;
2162 tree op0, op1;
2163 tree type;
2164 tree val;
2165 enum tree_code cmp;
2166 bitmap visited;
2167 edge_iterator ei;
2168 enum br_predictor predictor;
2169
2170 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
2171 return;
2172 FOR_EACH_EDGE (then_edge, ei, bb->succs)
2173 if (then_edge->flags & EDGE_TRUE_VALUE)
2174 break;
2175 op0 = gimple_cond_lhs (stmt);
2176 op1 = gimple_cond_rhs (stmt);
2177 cmp = gimple_cond_code (stmt);
2178 type = TREE_TYPE (op0);
2179 visited = BITMAP_ALLOC (NULL);
2180 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
2181 &predictor);
2182 BITMAP_FREE (visited);
2183 if (val && TREE_CODE (val) == INTEGER_CST)
2184 {
2185 if (predictor == PRED_BUILTIN_EXPECT)
2186 {
2187 int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
2188
2189 gcc_assert (percent >= 0 && percent <= 100);
2190 if (integer_zerop (val))
2191 percent = 100 - percent;
2192 predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
2193 }
2194 else
2195 predict_edge (then_edge, predictor,
2196 integer_zerop (val) ? NOT_TAKEN : TAKEN);
2197 }
2198 /* Try "pointer heuristic."
2199 A comparison ptr == 0 is predicted as false.
2200 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
2201 if (POINTER_TYPE_P (type))
2202 {
2203 if (cmp == EQ_EXPR)
2204 predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2205 else if (cmp == NE_EXPR)
2206 predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2207 }
2208 else
2209
2210 /* Try "opcode heuristic."
2211 EQ tests are usually false and NE tests are usually true. Also,
2212 most quantities are positive, so we can make the appropriate guesses
2213 about signed comparisons against zero. */
2214 switch (cmp)
2215 {
2216 case EQ_EXPR:
2217 case UNEQ_EXPR:
2218 /* Floating point comparisons appears to behave in a very
2219 unpredictable way because of special role of = tests in
2220 FP code. */
2221 if (FLOAT_TYPE_P (type))
2222 ;
2223 /* Comparisons with 0 are often used for booleans and there is
2224 nothing useful to predict about them. */
2225 else if (integer_zerop (op0) || integer_zerop (op1))
2226 ;
2227 else
2228 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2229 break;
2230
2231 case NE_EXPR:
2232 case LTGT_EXPR:
2233 /* Floating point comparisons appears to behave in a very
2234 unpredictable way because of special role of = tests in
2235 FP code. */
2236 if (FLOAT_TYPE_P (type))
2237 ;
2238 /* Comparisons with 0 are often used for booleans and there is
2239 nothing useful to predict about them. */
2240 else if (integer_zerop (op0)
2241 || integer_zerop (op1))
2242 ;
2243 else
2244 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2245 break;
2246
2247 case ORDERED_EXPR:
2248 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2249 break;
2250
2251 case UNORDERED_EXPR:
2252 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2253 break;
2254
2255 case LE_EXPR:
2256 case LT_EXPR:
2257 if (integer_zerop (op1)
2258 || integer_onep (op1)
2259 || integer_all_onesp (op1)
2260 || real_zerop (op1)
2261 || real_onep (op1)
2262 || real_minus_onep (op1))
2263 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2264 break;
2265
2266 case GE_EXPR:
2267 case GT_EXPR:
2268 if (integer_zerop (op1)
2269 || integer_onep (op1)
2270 || integer_all_onesp (op1)
2271 || real_zerop (op1)
2272 || real_onep (op1)
2273 || real_minus_onep (op1))
2274 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2275 break;
2276
2277 default:
2278 break;
2279 }
2280 }
2281
2282 /* Try to guess whether the value of return means error code. */
2283
2284 static enum br_predictor
2285 return_prediction (tree val, enum prediction *prediction)
2286 {
2287 /* VOID. */
2288 if (!val)
2289 return PRED_NO_PREDICTION;
2290 /* Different heuristics for pointers and scalars. */
2291 if (POINTER_TYPE_P (TREE_TYPE (val)))
2292 {
2293 /* NULL is usually not returned. */
2294 if (integer_zerop (val))
2295 {
2296 *prediction = NOT_TAKEN;
2297 return PRED_NULL_RETURN;
2298 }
2299 }
2300 else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2301 {
2302 /* Negative return values are often used to indicate
2303 errors. */
2304 if (TREE_CODE (val) == INTEGER_CST
2305 && tree_int_cst_sgn (val) < 0)
2306 {
2307 *prediction = NOT_TAKEN;
2308 return PRED_NEGATIVE_RETURN;
2309 }
2310 /* Constant return values seems to be commonly taken.
2311 Zero/one often represent booleans so exclude them from the
2312 heuristics. */
2313 if (TREE_CONSTANT (val)
2314 && (!integer_zerop (val) && !integer_onep (val)))
2315 {
2316 *prediction = NOT_TAKEN;
2317 return PRED_CONST_RETURN;
2318 }
2319 }
2320 return PRED_NO_PREDICTION;
2321 }
2322
2323 /* Find the basic block with return expression and look up for possible
2324 return value trying to apply RETURN_PREDICTION heuristics. */
2325 static void
2326 apply_return_prediction (void)
2327 {
2328 greturn *return_stmt = NULL;
2329 tree return_val;
2330 edge e;
2331 gphi *phi;
2332 int phi_num_args, i;
2333 enum br_predictor pred;
2334 enum prediction direction;
2335 edge_iterator ei;
2336
2337 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2338 {
2339 gimple *last = last_stmt (e->src);
2340 if (last
2341 && gimple_code (last) == GIMPLE_RETURN)
2342 {
2343 return_stmt = as_a <greturn *> (last);
2344 break;
2345 }
2346 }
2347 if (!e)
2348 return;
2349 return_val = gimple_return_retval (return_stmt);
2350 if (!return_val)
2351 return;
2352 if (TREE_CODE (return_val) != SSA_NAME
2353 || !SSA_NAME_DEF_STMT (return_val)
2354 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2355 return;
2356 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val));
2357 phi_num_args = gimple_phi_num_args (phi);
2358 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2359
2360 /* Avoid the degenerate case where all return values form the function
2361 belongs to same category (ie they are all positive constants)
2362 so we can hardly say something about them. */
2363 for (i = 1; i < phi_num_args; i++)
2364 if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2365 break;
2366 if (i != phi_num_args)
2367 for (i = 0; i < phi_num_args; i++)
2368 {
2369 pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2370 if (pred != PRED_NO_PREDICTION)
2371 predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2372 direction);
2373 }
2374 }
2375
2376 /* Look for basic block that contains unlikely to happen events
2377 (such as noreturn calls) and mark all paths leading to execution
2378 of this basic blocks as unlikely. */
2379
2380 static void
2381 tree_bb_level_predictions (void)
2382 {
2383 basic_block bb;
2384 bool has_return_edges = false;
2385 edge e;
2386 edge_iterator ei;
2387
2388 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2389 if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2390 {
2391 has_return_edges = true;
2392 break;
2393 }
2394
2395 apply_return_prediction ();
2396
2397 FOR_EACH_BB_FN (bb, cfun)
2398 {
2399 gimple_stmt_iterator gsi;
2400
2401 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2402 {
2403 gimple *stmt = gsi_stmt (gsi);
2404 tree decl;
2405
2406 if (is_gimple_call (stmt))
2407 {
2408 if ((gimple_call_flags (stmt) & ECF_NORETURN)
2409 && has_return_edges)
2410 predict_paths_leading_to (bb, PRED_NORETURN,
2411 NOT_TAKEN);
2412 decl = gimple_call_fndecl (stmt);
2413 if (decl
2414 && lookup_attribute ("cold",
2415 DECL_ATTRIBUTES (decl)))
2416 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2417 NOT_TAKEN);
2418 }
2419 else if (gimple_code (stmt) == GIMPLE_PREDICT)
2420 {
2421 predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2422 gimple_predict_outcome (stmt));
2423 /* Keep GIMPLE_PREDICT around so early inlining will propagate
2424 hints to callers. */
2425 }
2426 }
2427 }
2428 }
2429
2430 /* Callback for hash_map::traverse, asserts that the pointer map is
2431 empty. */
2432
2433 bool
2434 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2435 void *)
2436 {
2437 gcc_assert (!value);
2438 return false;
2439 }
2440
2441 /* Predict branch probabilities and estimate profile for basic block BB. */
2442
2443 static void
2444 tree_estimate_probability_bb (basic_block bb)
2445 {
2446 edge e;
2447 edge_iterator ei;
2448 gimple *last;
2449
2450 FOR_EACH_EDGE (e, ei, bb->succs)
2451 {
2452 /* Predict edges to user labels with attributes. */
2453 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2454 {
2455 gimple_stmt_iterator gi;
2456 for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2457 {
2458 glabel *label_stmt = dyn_cast <glabel *> (gsi_stmt (gi));
2459 tree decl;
2460
2461 if (!label_stmt)
2462 break;
2463 decl = gimple_label_label (label_stmt);
2464 if (DECL_ARTIFICIAL (decl))
2465 continue;
2466
2467 /* Finally, we have a user-defined label. */
2468 if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2469 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2470 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2471 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2472 }
2473 }
2474
2475 /* Predict early returns to be probable, as we've already taken
2476 care for error returns and other cases are often used for
2477 fast paths through function.
2478
2479 Since we've already removed the return statements, we are
2480 looking for CFG like:
2481
2482 if (conditional)
2483 {
2484 ..
2485 goto return_block
2486 }
2487 some other blocks
2488 return_block:
2489 return_stmt. */
2490 if (e->dest != bb->next_bb
2491 && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2492 && single_succ_p (e->dest)
2493 && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2494 && (last = last_stmt (e->dest)) != NULL
2495 && gimple_code (last) == GIMPLE_RETURN)
2496 {
2497 edge e1;
2498 edge_iterator ei1;
2499
2500 if (single_succ_p (bb))
2501 {
2502 FOR_EACH_EDGE (e1, ei1, bb->preds)
2503 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2504 && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2505 && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2506 predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2507 }
2508 else
2509 if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2510 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2511 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2512 predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2513 }
2514
2515 /* Look for block we are guarding (ie we dominate it,
2516 but it doesn't postdominate us). */
2517 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2518 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2519 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2520 {
2521 gimple_stmt_iterator bi;
2522
2523 /* The call heuristic claims that a guarded function call
2524 is improbable. This is because such calls are often used
2525 to signal exceptional situations such as printing error
2526 messages. */
2527 for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2528 gsi_next (&bi))
2529 {
2530 gimple *stmt = gsi_stmt (bi);
2531 if (is_gimple_call (stmt)
2532 /* Constant and pure calls are hardly used to signalize
2533 something exceptional. */
2534 && gimple_has_side_effects (stmt))
2535 {
2536 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2537 break;
2538 }
2539 }
2540 }
2541 }
2542 tree_predict_by_opcode (bb);
2543 }
2544
2545 /* Predict branch probabilities and estimate profile of the tree CFG.
2546 This function can be called from the loop optimizers to recompute
2547 the profile information.
2548 If DRY_RUN is set, do not modify CFG and only produce dump files. */
2549
2550 void
2551 tree_estimate_probability (bool dry_run)
2552 {
2553 basic_block bb;
2554
2555 add_noreturn_fake_exit_edges ();
2556 connect_infinite_loops_to_exit ();
2557 /* We use loop_niter_by_eval, which requires that the loops have
2558 preheaders. */
2559 create_preheaders (CP_SIMPLE_PREHEADERS);
2560 calculate_dominance_info (CDI_POST_DOMINATORS);
2561
2562 bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2563 tree_bb_level_predictions ();
2564 record_loop_exits ();
2565
2566 if (number_of_loops (cfun) > 1)
2567 predict_loops ();
2568
2569 FOR_EACH_BB_FN (bb, cfun)
2570 tree_estimate_probability_bb (bb);
2571
2572 FOR_EACH_BB_FN (bb, cfun)
2573 combine_predictions_for_bb (bb, dry_run);
2574
2575 if (flag_checking)
2576 bb_predictions->traverse<void *, assert_is_empty> (NULL);
2577
2578 delete bb_predictions;
2579 bb_predictions = NULL;
2580
2581 if (!dry_run)
2582 estimate_bb_frequencies (false);
2583 free_dominance_info (CDI_POST_DOMINATORS);
2584 remove_fake_exit_edges ();
2585 }
2586 \f
2587 /* Predict edges to successors of CUR whose sources are not postdominated by
2588 BB by PRED and recurse to all postdominators. */
2589
2590 static void
2591 predict_paths_for_bb (basic_block cur, basic_block bb,
2592 enum br_predictor pred,
2593 enum prediction taken,
2594 bitmap visited)
2595 {
2596 edge e;
2597 edge_iterator ei;
2598 basic_block son;
2599
2600 /* We are looking for all edges forming edge cut induced by
2601 set of all blocks postdominated by BB. */
2602 FOR_EACH_EDGE (e, ei, cur->preds)
2603 if (e->src->index >= NUM_FIXED_BLOCKS
2604 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2605 {
2606 edge e2;
2607 edge_iterator ei2;
2608 bool found = false;
2609
2610 /* Ignore fake edges and eh, we predict them as not taken anyway. */
2611 if (e->flags & (EDGE_EH | EDGE_FAKE))
2612 continue;
2613 gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2614
2615 /* See if there is an edge from e->src that is not abnormal
2616 and does not lead to BB. */
2617 FOR_EACH_EDGE (e2, ei2, e->src->succs)
2618 if (e2 != e
2619 && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2620 && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2621 {
2622 found = true;
2623 break;
2624 }
2625
2626 /* If there is non-abnormal path leaving e->src, predict edge
2627 using predictor. Otherwise we need to look for paths
2628 leading to e->src.
2629
2630 The second may lead to infinite loop in the case we are predicitng
2631 regions that are only reachable by abnormal edges. We simply
2632 prevent visiting given BB twice. */
2633 if (found)
2634 {
2635 if (!edge_predicted_by_p (e, pred, taken))
2636 predict_edge_def (e, pred, taken);
2637 }
2638 else if (bitmap_set_bit (visited, e->src->index))
2639 predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2640 }
2641 for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2642 son;
2643 son = next_dom_son (CDI_POST_DOMINATORS, son))
2644 predict_paths_for_bb (son, bb, pred, taken, visited);
2645 }
2646
2647 /* Sets branch probabilities according to PREDiction and
2648 FLAGS. */
2649
2650 static void
2651 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2652 enum prediction taken)
2653 {
2654 bitmap visited = BITMAP_ALLOC (NULL);
2655 predict_paths_for_bb (bb, bb, pred, taken, visited);
2656 BITMAP_FREE (visited);
2657 }
2658
2659 /* Like predict_paths_leading_to but take edge instead of basic block. */
2660
2661 static void
2662 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2663 enum prediction taken)
2664 {
2665 bool has_nonloop_edge = false;
2666 edge_iterator ei;
2667 edge e2;
2668
2669 basic_block bb = e->src;
2670 FOR_EACH_EDGE (e2, ei, bb->succs)
2671 if (e2->dest != e->src && e2->dest != e->dest
2672 && !(e->flags & (EDGE_EH | EDGE_FAKE))
2673 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2674 {
2675 has_nonloop_edge = true;
2676 break;
2677 }
2678 if (!has_nonloop_edge)
2679 {
2680 bitmap visited = BITMAP_ALLOC (NULL);
2681 predict_paths_for_bb (bb, bb, pred, taken, visited);
2682 BITMAP_FREE (visited);
2683 }
2684 else
2685 predict_edge_def (e, pred, taken);
2686 }
2687 \f
2688 /* This is used to carry information about basic blocks. It is
2689 attached to the AUX field of the standard CFG block. */
2690
2691 struct block_info
2692 {
2693 /* Estimated frequency of execution of basic_block. */
2694 sreal frequency;
2695
2696 /* To keep queue of basic blocks to process. */
2697 basic_block next;
2698
2699 /* Number of predecessors we need to visit first. */
2700 int npredecessors;
2701 };
2702
2703 /* Similar information for edges. */
2704 struct edge_prob_info
2705 {
2706 /* In case edge is a loopback edge, the probability edge will be reached
2707 in case header is. Estimated number of iterations of the loop can be
2708 then computed as 1 / (1 - back_edge_prob). */
2709 sreal back_edge_prob;
2710 /* True if the edge is a loopback edge in the natural loop. */
2711 unsigned int back_edge:1;
2712 };
2713
2714 #define BLOCK_INFO(B) ((block_info *) (B)->aux)
2715 #undef EDGE_INFO
2716 #define EDGE_INFO(E) ((edge_prob_info *) (E)->aux)
2717
2718 /* Helper function for estimate_bb_frequencies.
2719 Propagate the frequencies in blocks marked in
2720 TOVISIT, starting in HEAD. */
2721
2722 static void
2723 propagate_freq (basic_block head, bitmap tovisit)
2724 {
2725 basic_block bb;
2726 basic_block last;
2727 unsigned i;
2728 edge e;
2729 basic_block nextbb;
2730 bitmap_iterator bi;
2731
2732 /* For each basic block we need to visit count number of his predecessors
2733 we need to visit first. */
2734 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2735 {
2736 edge_iterator ei;
2737 int count = 0;
2738
2739 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2740
2741 FOR_EACH_EDGE (e, ei, bb->preds)
2742 {
2743 bool visit = bitmap_bit_p (tovisit, e->src->index);
2744
2745 if (visit && !(e->flags & EDGE_DFS_BACK))
2746 count++;
2747 else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2748 fprintf (dump_file,
2749 "Irreducible region hit, ignoring edge to %i->%i\n",
2750 e->src->index, bb->index);
2751 }
2752 BLOCK_INFO (bb)->npredecessors = count;
2753 /* When function never returns, we will never process exit block. */
2754 if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2755 bb->count = bb->frequency = 0;
2756 }
2757
2758 BLOCK_INFO (head)->frequency = 1;
2759 last = head;
2760 for (bb = head; bb; bb = nextbb)
2761 {
2762 edge_iterator ei;
2763 sreal cyclic_probability = 0;
2764 sreal frequency = 0;
2765
2766 nextbb = BLOCK_INFO (bb)->next;
2767 BLOCK_INFO (bb)->next = NULL;
2768
2769 /* Compute frequency of basic block. */
2770 if (bb != head)
2771 {
2772 if (flag_checking)
2773 FOR_EACH_EDGE (e, ei, bb->preds)
2774 gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2775 || (e->flags & EDGE_DFS_BACK));
2776
2777 FOR_EACH_EDGE (e, ei, bb->preds)
2778 if (EDGE_INFO (e)->back_edge)
2779 {
2780 cyclic_probability += EDGE_INFO (e)->back_edge_prob;
2781 }
2782 else if (!(e->flags & EDGE_DFS_BACK))
2783 {
2784 /* frequency += (e->probability
2785 * BLOCK_INFO (e->src)->frequency /
2786 REG_BR_PROB_BASE); */
2787
2788 sreal tmp = e->probability;
2789 tmp *= BLOCK_INFO (e->src)->frequency;
2790 tmp *= real_inv_br_prob_base;
2791 frequency += tmp;
2792 }
2793
2794 if (cyclic_probability == 0)
2795 {
2796 BLOCK_INFO (bb)->frequency = frequency;
2797 }
2798 else
2799 {
2800 if (cyclic_probability > real_almost_one)
2801 cyclic_probability = real_almost_one;
2802
2803 /* BLOCK_INFO (bb)->frequency = frequency
2804 / (1 - cyclic_probability) */
2805
2806 cyclic_probability = sreal (1) - cyclic_probability;
2807 BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
2808 }
2809 }
2810
2811 bitmap_clear_bit (tovisit, bb->index);
2812
2813 e = find_edge (bb, head);
2814 if (e)
2815 {
2816 /* EDGE_INFO (e)->back_edge_prob
2817 = ((e->probability * BLOCK_INFO (bb)->frequency)
2818 / REG_BR_PROB_BASE); */
2819
2820 sreal tmp = e->probability;
2821 tmp *= BLOCK_INFO (bb)->frequency;
2822 EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
2823 }
2824
2825 /* Propagate to successor blocks. */
2826 FOR_EACH_EDGE (e, ei, bb->succs)
2827 if (!(e->flags & EDGE_DFS_BACK)
2828 && BLOCK_INFO (e->dest)->npredecessors)
2829 {
2830 BLOCK_INFO (e->dest)->npredecessors--;
2831 if (!BLOCK_INFO (e->dest)->npredecessors)
2832 {
2833 if (!nextbb)
2834 nextbb = e->dest;
2835 else
2836 BLOCK_INFO (last)->next = e->dest;
2837
2838 last = e->dest;
2839 }
2840 }
2841 }
2842 }
2843
2844 /* Estimate frequencies in loops at same nest level. */
2845
2846 static void
2847 estimate_loops_at_level (struct loop *first_loop)
2848 {
2849 struct loop *loop;
2850
2851 for (loop = first_loop; loop; loop = loop->next)
2852 {
2853 edge e;
2854 basic_block *bbs;
2855 unsigned i;
2856 bitmap tovisit = BITMAP_ALLOC (NULL);
2857
2858 estimate_loops_at_level (loop->inner);
2859
2860 /* Find current loop back edge and mark it. */
2861 e = loop_latch_edge (loop);
2862 EDGE_INFO (e)->back_edge = 1;
2863
2864 bbs = get_loop_body (loop);
2865 for (i = 0; i < loop->num_nodes; i++)
2866 bitmap_set_bit (tovisit, bbs[i]->index);
2867 free (bbs);
2868 propagate_freq (loop->header, tovisit);
2869 BITMAP_FREE (tovisit);
2870 }
2871 }
2872
2873 /* Propagates frequencies through structure of loops. */
2874
2875 static void
2876 estimate_loops (void)
2877 {
2878 bitmap tovisit = BITMAP_ALLOC (NULL);
2879 basic_block bb;
2880
2881 /* Start by estimating the frequencies in the loops. */
2882 if (number_of_loops (cfun) > 1)
2883 estimate_loops_at_level (current_loops->tree_root->inner);
2884
2885 /* Now propagate the frequencies through all the blocks. */
2886 FOR_ALL_BB_FN (bb, cfun)
2887 {
2888 bitmap_set_bit (tovisit, bb->index);
2889 }
2890 propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2891 BITMAP_FREE (tovisit);
2892 }
2893
2894 /* Drop the profile for NODE to guessed, and update its frequency based on
2895 whether it is expected to be hot given the CALL_COUNT. */
2896
2897 static void
2898 drop_profile (struct cgraph_node *node, gcov_type call_count)
2899 {
2900 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2901 /* In the case where this was called by another function with a
2902 dropped profile, call_count will be 0. Since there are no
2903 non-zero call counts to this function, we don't know for sure
2904 whether it is hot, and therefore it will be marked normal below. */
2905 bool hot = maybe_hot_count_p (NULL, call_count);
2906
2907 if (dump_file)
2908 fprintf (dump_file,
2909 "Dropping 0 profile for %s/%i. %s based on calls.\n",
2910 node->name (), node->order,
2911 hot ? "Function is hot" : "Function is normal");
2912 /* We only expect to miss profiles for functions that are reached
2913 via non-zero call edges in cases where the function may have
2914 been linked from another module or library (COMDATs and extern
2915 templates). See the comments below for handle_missing_profiles.
2916 Also, only warn in cases where the missing counts exceed the
2917 number of training runs. In certain cases with an execv followed
2918 by a no-return call the profile for the no-return call is not
2919 dumped and there can be a mismatch. */
2920 if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2921 && call_count > profile_info->runs)
2922 {
2923 if (flag_profile_correction)
2924 {
2925 if (dump_file)
2926 fprintf (dump_file,
2927 "Missing counts for called function %s/%i\n",
2928 node->name (), node->order);
2929 }
2930 else
2931 warning (0, "Missing counts for called function %s/%i",
2932 node->name (), node->order);
2933 }
2934
2935 profile_status_for_fn (fn)
2936 = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2937 node->frequency
2938 = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2939 }
2940
2941 /* In the case of COMDAT routines, multiple object files will contain the same
2942 function and the linker will select one for the binary. In that case
2943 all the other copies from the profile instrument binary will be missing
2944 profile counts. Look for cases where this happened, due to non-zero
2945 call counts going to 0-count functions, and drop the profile to guessed
2946 so that we can use the estimated probabilities and avoid optimizing only
2947 for size.
2948
2949 The other case where the profile may be missing is when the routine
2950 is not going to be emitted to the object file, e.g. for "extern template"
2951 class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2952 all other cases of non-zero calls to 0-count functions. */
2953
2954 void
2955 handle_missing_profiles (void)
2956 {
2957 struct cgraph_node *node;
2958 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2959 vec<struct cgraph_node *> worklist;
2960 worklist.create (64);
2961
2962 /* See if 0 count function has non-0 count callers. In this case we
2963 lost some profile. Drop its function profile to PROFILE_GUESSED. */
2964 FOR_EACH_DEFINED_FUNCTION (node)
2965 {
2966 struct cgraph_edge *e;
2967 gcov_type call_count = 0;
2968 gcov_type max_tp_first_run = 0;
2969 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2970
2971 if (node->count)
2972 continue;
2973 for (e = node->callers; e; e = e->next_caller)
2974 {
2975 call_count += e->count;
2976
2977 if (e->caller->tp_first_run > max_tp_first_run)
2978 max_tp_first_run = e->caller->tp_first_run;
2979 }
2980
2981 /* If time profile is missing, let assign the maximum that comes from
2982 caller functions. */
2983 if (!node->tp_first_run && max_tp_first_run)
2984 node->tp_first_run = max_tp_first_run + 1;
2985
2986 if (call_count
2987 && fn && fn->cfg
2988 && (call_count * unlikely_count_fraction >= profile_info->runs))
2989 {
2990 drop_profile (node, call_count);
2991 worklist.safe_push (node);
2992 }
2993 }
2994
2995 /* Propagate the profile dropping to other 0-count COMDATs that are
2996 potentially called by COMDATs we already dropped the profile on. */
2997 while (worklist.length () > 0)
2998 {
2999 struct cgraph_edge *e;
3000
3001 node = worklist.pop ();
3002 for (e = node->callees; e; e = e->next_caller)
3003 {
3004 struct cgraph_node *callee = e->callee;
3005 struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
3006
3007 if (callee->count > 0)
3008 continue;
3009 if (DECL_COMDAT (callee->decl) && fn && fn->cfg
3010 && profile_status_for_fn (fn) == PROFILE_READ)
3011 {
3012 drop_profile (node, 0);
3013 worklist.safe_push (callee);
3014 }
3015 }
3016 }
3017 worklist.release ();
3018 }
3019
3020 /* Convert counts measured by profile driven feedback to frequencies.
3021 Return nonzero iff there was any nonzero execution count. */
3022
3023 int
3024 counts_to_freqs (void)
3025 {
3026 gcov_type count_max, true_count_max = 0;
3027 basic_block bb;
3028
3029 /* Don't overwrite the estimated frequencies when the profile for
3030 the function is missing. We may drop this function PROFILE_GUESSED
3031 later in drop_profile (). */
3032 if (!flag_auto_profile && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
3033 return 0;
3034
3035 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3036 true_count_max = MAX (bb->count, true_count_max);
3037
3038 count_max = MAX (true_count_max, 1);
3039 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3040 bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
3041
3042 return true_count_max;
3043 }
3044
3045 /* Return true if function is likely to be expensive, so there is no point to
3046 optimize performance of prologue, epilogue or do inlining at the expense
3047 of code size growth. THRESHOLD is the limit of number of instructions
3048 function can execute at average to be still considered not expensive. */
3049
3050 bool
3051 expensive_function_p (int threshold)
3052 {
3053 unsigned int sum = 0;
3054 basic_block bb;
3055 unsigned int limit;
3056
3057 /* We can not compute accurately for large thresholds due to scaled
3058 frequencies. */
3059 gcc_assert (threshold <= BB_FREQ_MAX);
3060
3061 /* Frequencies are out of range. This either means that function contains
3062 internal loop executing more than BB_FREQ_MAX times or profile feedback
3063 is available and function has not been executed at all. */
3064 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
3065 return true;
3066
3067 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
3068 limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
3069 FOR_EACH_BB_FN (bb, cfun)
3070 {
3071 rtx_insn *insn;
3072
3073 FOR_BB_INSNS (bb, insn)
3074 if (active_insn_p (insn))
3075 {
3076 sum += bb->frequency;
3077 if (sum > limit)
3078 return true;
3079 }
3080 }
3081
3082 return false;
3083 }
3084
3085 /* Estimate and propagate basic block frequencies using the given branch
3086 probabilities. If FORCE is true, the frequencies are used to estimate
3087 the counts even when there are already non-zero profile counts. */
3088
3089 void
3090 estimate_bb_frequencies (bool force)
3091 {
3092 basic_block bb;
3093 sreal freq_max;
3094
3095 if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
3096 {
3097 static int real_values_initialized = 0;
3098
3099 if (!real_values_initialized)
3100 {
3101 real_values_initialized = 1;
3102 real_br_prob_base = REG_BR_PROB_BASE;
3103 real_bb_freq_max = BB_FREQ_MAX;
3104 real_one_half = sreal (1, -1);
3105 real_inv_br_prob_base = sreal (1) / real_br_prob_base;
3106 real_almost_one = sreal (1) - real_inv_br_prob_base;
3107 }
3108
3109 mark_dfs_back_edges ();
3110
3111 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
3112 REG_BR_PROB_BASE;
3113
3114 /* Set up block info for each basic block. */
3115 alloc_aux_for_blocks (sizeof (block_info));
3116 alloc_aux_for_edges (sizeof (edge_prob_info));
3117 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3118 {
3119 edge e;
3120 edge_iterator ei;
3121
3122 FOR_EACH_EDGE (e, ei, bb->succs)
3123 {
3124 EDGE_INFO (e)->back_edge_prob = e->probability;
3125 EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
3126 }
3127 }
3128
3129 /* First compute frequencies locally for each loop from innermost
3130 to outermost to examine frequencies for back edges. */
3131 estimate_loops ();
3132
3133 freq_max = 0;
3134 FOR_EACH_BB_FN (bb, cfun)
3135 if (freq_max < BLOCK_INFO (bb)->frequency)
3136 freq_max = BLOCK_INFO (bb)->frequency;
3137
3138 freq_max = real_bb_freq_max / freq_max;
3139 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3140 {
3141 sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
3142 bb->frequency = tmp.to_int ();
3143 }
3144
3145 free_aux_for_blocks ();
3146 free_aux_for_edges ();
3147 }
3148 compute_function_frequency ();
3149 }
3150
3151 /* Decide whether function is hot, cold or unlikely executed. */
3152 void
3153 compute_function_frequency (void)
3154 {
3155 basic_block bb;
3156 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3157
3158 if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3159 || MAIN_NAME_P (DECL_NAME (current_function_decl)))
3160 node->only_called_at_startup = true;
3161 if (DECL_STATIC_DESTRUCTOR (current_function_decl))
3162 node->only_called_at_exit = true;
3163
3164 if (profile_status_for_fn (cfun) != PROFILE_READ)
3165 {
3166 int flags = flags_from_decl_or_type (current_function_decl);
3167 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
3168 != NULL)
3169 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3170 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
3171 != NULL)
3172 node->frequency = NODE_FREQUENCY_HOT;
3173 else if (flags & ECF_NORETURN)
3174 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3175 else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
3176 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3177 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3178 || DECL_STATIC_DESTRUCTOR (current_function_decl))
3179 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3180 return;
3181 }
3182
3183 /* Only first time try to drop function into unlikely executed.
3184 After inlining the roundoff errors may confuse us.
3185 Ipa-profile pass will drop functions only called from unlikely
3186 functions to unlikely and that is most of what we care about. */
3187 if (!cfun->after_inlining)
3188 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3189 FOR_EACH_BB_FN (bb, cfun)
3190 {
3191 if (maybe_hot_bb_p (cfun, bb))
3192 {
3193 node->frequency = NODE_FREQUENCY_HOT;
3194 return;
3195 }
3196 if (!probably_never_executed_bb_p (cfun, bb))
3197 node->frequency = NODE_FREQUENCY_NORMAL;
3198 }
3199 }
3200
3201 /* Build PREDICT_EXPR. */
3202 tree
3203 build_predict_expr (enum br_predictor predictor, enum prediction taken)
3204 {
3205 tree t = build1 (PREDICT_EXPR, void_type_node,
3206 build_int_cst (integer_type_node, predictor));
3207 SET_PREDICT_EXPR_OUTCOME (t, taken);
3208 return t;
3209 }
3210
3211 const char *
3212 predictor_name (enum br_predictor predictor)
3213 {
3214 return predictor_info[predictor].name;
3215 }
3216
3217 /* Predict branch probabilities and estimate profile of the tree CFG. */
3218
3219 namespace {
3220
3221 const pass_data pass_data_profile =
3222 {
3223 GIMPLE_PASS, /* type */
3224 "profile_estimate", /* name */
3225 OPTGROUP_NONE, /* optinfo_flags */
3226 TV_BRANCH_PROB, /* tv_id */
3227 PROP_cfg, /* properties_required */
3228 0, /* properties_provided */
3229 0, /* properties_destroyed */
3230 0, /* todo_flags_start */
3231 0, /* todo_flags_finish */
3232 };
3233
3234 class pass_profile : public gimple_opt_pass
3235 {
3236 public:
3237 pass_profile (gcc::context *ctxt)
3238 : gimple_opt_pass (pass_data_profile, ctxt)
3239 {}
3240
3241 /* opt_pass methods: */
3242 virtual bool gate (function *) { return flag_guess_branch_prob; }
3243 virtual unsigned int execute (function *);
3244
3245 }; // class pass_profile
3246
3247 unsigned int
3248 pass_profile::execute (function *fun)
3249 {
3250 unsigned nb_loops;
3251
3252 if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
3253 return 0;
3254
3255 loop_optimizer_init (LOOPS_NORMAL);
3256 if (dump_file && (dump_flags & TDF_DETAILS))
3257 flow_loops_dump (dump_file, NULL, 0);
3258
3259 mark_irreducible_loops ();
3260
3261 nb_loops = number_of_loops (fun);
3262 if (nb_loops > 1)
3263 scev_initialize ();
3264
3265 tree_estimate_probability (false);
3266
3267 if (nb_loops > 1)
3268 scev_finalize ();
3269
3270 loop_optimizer_finalize ();
3271 if (dump_file && (dump_flags & TDF_DETAILS))
3272 gimple_dump_cfg (dump_file, dump_flags);
3273 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3274 profile_status_for_fn (fun) = PROFILE_GUESSED;
3275 return 0;
3276 }
3277
3278 } // anon namespace
3279
3280 gimple_opt_pass *
3281 make_pass_profile (gcc::context *ctxt)
3282 {
3283 return new pass_profile (ctxt);
3284 }
3285
3286 namespace {
3287
3288 const pass_data pass_data_strip_predict_hints =
3289 {
3290 GIMPLE_PASS, /* type */
3291 "*strip_predict_hints", /* name */
3292 OPTGROUP_NONE, /* optinfo_flags */
3293 TV_BRANCH_PROB, /* tv_id */
3294 PROP_cfg, /* properties_required */
3295 0, /* properties_provided */
3296 0, /* properties_destroyed */
3297 0, /* todo_flags_start */
3298 0, /* todo_flags_finish */
3299 };
3300
3301 class pass_strip_predict_hints : public gimple_opt_pass
3302 {
3303 public:
3304 pass_strip_predict_hints (gcc::context *ctxt)
3305 : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3306 {}
3307
3308 /* opt_pass methods: */
3309 opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3310 virtual unsigned int execute (function *);
3311
3312 }; // class pass_strip_predict_hints
3313
3314 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3315 we no longer need. */
3316 unsigned int
3317 pass_strip_predict_hints::execute (function *fun)
3318 {
3319 basic_block bb;
3320 gimple *ass_stmt;
3321 tree var;
3322 bool changed = false;
3323
3324 FOR_EACH_BB_FN (bb, fun)
3325 {
3326 gimple_stmt_iterator bi;
3327 for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3328 {
3329 gimple *stmt = gsi_stmt (bi);
3330
3331 if (gimple_code (stmt) == GIMPLE_PREDICT)
3332 {
3333 gsi_remove (&bi, true);
3334 changed = true;
3335 continue;
3336 }
3337 else if (is_gimple_call (stmt))
3338 {
3339 tree fndecl = gimple_call_fndecl (stmt);
3340
3341 if ((fndecl
3342 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3343 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3344 && gimple_call_num_args (stmt) == 2)
3345 || (gimple_call_internal_p (stmt)
3346 && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3347 {
3348 var = gimple_call_lhs (stmt);
3349 changed = true;
3350 if (var)
3351 {
3352 ass_stmt
3353 = gimple_build_assign (var, gimple_call_arg (stmt, 0));
3354 gsi_replace (&bi, ass_stmt, true);
3355 }
3356 else
3357 {
3358 gsi_remove (&bi, true);
3359 continue;
3360 }
3361 }
3362 }
3363 gsi_next (&bi);
3364 }
3365 }
3366 return changed ? TODO_cleanup_cfg : 0;
3367 }
3368
3369 } // anon namespace
3370
3371 gimple_opt_pass *
3372 make_pass_strip_predict_hints (gcc::context *ctxt)
3373 {
3374 return new pass_strip_predict_hints (ctxt);
3375 }
3376
3377 /* Rebuild function frequencies. Passes are in general expected to
3378 maintain profile by hand, however in some cases this is not possible:
3379 for example when inlining several functions with loops freuqencies might run
3380 out of scale and thus needs to be recomputed. */
3381
3382 void
3383 rebuild_frequencies (void)
3384 {
3385 timevar_push (TV_REBUILD_FREQUENCIES);
3386
3387 /* When the max bb count in the function is small, there is a higher
3388 chance that there were truncation errors in the integer scaling
3389 of counts by inlining and other optimizations. This could lead
3390 to incorrect classification of code as being cold when it isn't.
3391 In that case, force the estimation of bb counts/frequencies from the
3392 branch probabilities, rather than computing frequencies from counts,
3393 which may also lead to frequencies incorrectly reduced to 0. There
3394 is less precision in the probabilities, so we only do this for small
3395 max counts. */
3396 gcov_type count_max = 0;
3397 basic_block bb;
3398 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3399 count_max = MAX (bb->count, count_max);
3400
3401 if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3402 || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
3403 && count_max < REG_BR_PROB_BASE/10))
3404 {
3405 loop_optimizer_init (0);
3406 add_noreturn_fake_exit_edges ();
3407 mark_irreducible_loops ();
3408 connect_infinite_loops_to_exit ();
3409 estimate_bb_frequencies (true);
3410 remove_fake_exit_edges ();
3411 loop_optimizer_finalize ();
3412 }
3413 else if (profile_status_for_fn (cfun) == PROFILE_READ)
3414 counts_to_freqs ();
3415 else
3416 gcc_unreachable ();
3417 timevar_pop (TV_REBUILD_FREQUENCIES);
3418 }
3419
3420 /* Perform a dry run of the branch prediction pass and report comparsion of
3421 the predicted and real profile into the dump file. */
3422
3423 void
3424 report_predictor_hitrates (void)
3425 {
3426 unsigned nb_loops;
3427
3428 loop_optimizer_init (LOOPS_NORMAL);
3429 if (dump_file && (dump_flags & TDF_DETAILS))
3430 flow_loops_dump (dump_file, NULL, 0);
3431
3432 mark_irreducible_loops ();
3433
3434 nb_loops = number_of_loops (cfun);
3435 if (nb_loops > 1)
3436 scev_initialize ();
3437
3438 tree_estimate_probability (true);
3439
3440 if (nb_loops > 1)
3441 scev_finalize ();
3442
3443 loop_optimizer_finalize ();
3444 }
3445
3446 /* Force edge E to be cold.
3447 If IMPOSSIBLE is true, for edge to have count and probability 0 otherwise
3448 keep low probability to represent possible error in a guess. This is used
3449 i.e. in case we predict loop to likely iterate given number of times but
3450 we are not 100% sure.
3451
3452 This function locally updates profile without attempt to keep global
3453 consistency which can not be reached in full generality without full profile
3454 rebuild from probabilities alone. Doing so is not necessarily a good idea
3455 because frequencies and counts may be more realistic then probabilities.
3456
3457 In some cases (such as for elimination of early exits during full loop
3458 unrolling) the caller can ensure that profile will get consistent
3459 afterwards. */
3460
3461 void
3462 force_edge_cold (edge e, bool impossible)
3463 {
3464 gcov_type count_sum = 0;
3465 int prob_sum = 0;
3466 edge_iterator ei;
3467 edge e2;
3468 gcov_type old_count = e->count;
3469 int old_probability = e->probability;
3470 gcov_type gcov_scale = REG_BR_PROB_BASE;
3471 int prob_scale = REG_BR_PROB_BASE;
3472
3473 /* If edge is already improbably or cold, just return. */
3474 if (e->probability <= impossible ? PROB_VERY_UNLIKELY : 0
3475 && (!impossible || !e->count))
3476 return;
3477 FOR_EACH_EDGE (e2, ei, e->src->succs)
3478 if (e2 != e)
3479 {
3480 count_sum += e2->count;
3481 prob_sum += e2->probability;
3482 }
3483
3484 /* If there are other edges out of e->src, redistribute probabilitity
3485 there. */
3486 if (prob_sum)
3487 {
3488 e->probability
3489 = MIN (e->probability, impossible ? 0 : PROB_VERY_UNLIKELY);
3490 if (old_probability)
3491 e->count = RDIV (e->count * e->probability, old_probability);
3492 else
3493 e->count = MIN (e->count, impossible ? 0 : 1);
3494
3495 if (count_sum)
3496 gcov_scale = RDIV ((count_sum + old_count - e->count) * REG_BR_PROB_BASE,
3497 count_sum);
3498 prob_scale = RDIV ((REG_BR_PROB_BASE - e->probability) * REG_BR_PROB_BASE,
3499 prob_sum);
3500 if (dump_file && (dump_flags & TDF_DETAILS))
3501 fprintf (dump_file, "Making edge %i->%i %s by redistributing "
3502 "probability to other edges.\n",
3503 e->src->index, e->dest->index,
3504 impossible ? "imposisble" : "cold");
3505 FOR_EACH_EDGE (e2, ei, e->src->succs)
3506 if (e2 != e)
3507 {
3508 e2->count = RDIV (e2->count * gcov_scale, REG_BR_PROB_BASE);
3509 e2->probability = RDIV (e2->probability * prob_scale,
3510 REG_BR_PROB_BASE);
3511 }
3512 }
3513 /* If all edges out of e->src are unlikely, the basic block itself
3514 is unlikely. */
3515 else
3516 {
3517 e->probability = REG_BR_PROB_BASE;
3518
3519 /* If we did not adjusting, the source basic block has no likely edeges
3520 leaving other direction. In that case force that bb cold, too.
3521 This in general is difficult task to do, but handle special case when
3522 BB has only one predecestor. This is common case when we are updating
3523 after loop transforms. */
3524 if (!prob_sum && !count_sum && single_pred_p (e->src)
3525 && e->src->frequency > (impossible ? 0 : 1))
3526 {
3527 int old_frequency = e->src->frequency;
3528 if (dump_file && (dump_flags & TDF_DETAILS))
3529 fprintf (dump_file, "Making bb %i %s.\n", e->src->index,
3530 impossible ? "imposisble" : "cold");
3531 e->src->frequency = MIN (e->src->frequency, impossible ? 0 : 1);
3532 e->src->count = e->count = RDIV (e->src->count * e->src->frequency,
3533 old_frequency);
3534 force_edge_cold (single_pred_edge (e->src), impossible);
3535 }
3536 else if (dump_file && (dump_flags & TDF_DETAILS)
3537 && maybe_hot_bb_p (cfun, e->src))
3538 fprintf (dump_file, "Giving up on making bb %i %s.\n", e->src->index,
3539 impossible ? "imposisble" : "cold");
3540 }
3541 }