[ARM/AArch64][testsuite] Add vmull tests.
[gcc.git] / gcc / predict.c
1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* References:
21
22 [1] "Branch Prediction for Free"
23 Ball and Larus; PLDI '93.
24 [2] "Static Branch Frequency and Program Profile Analysis"
25 Wu and Larus; MICRO-27.
26 [3] "Corpus-based Static Branch Prediction"
27 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
28
29
30 #include "config.h"
31 #include "system.h"
32 #include "coretypes.h"
33 #include "tm.h"
34 #include "hash-set.h"
35 #include "machmode.h"
36 #include "vec.h"
37 #include "double-int.h"
38 #include "input.h"
39 #include "alias.h"
40 #include "symtab.h"
41 #include "wide-int.h"
42 #include "inchash.h"
43 #include "tree.h"
44 #include "fold-const.h"
45 #include "calls.h"
46 #include "rtl.h"
47 #include "tm_p.h"
48 #include "hard-reg-set.h"
49 #include "predict.h"
50 #include "function.h"
51 #include "dominance.h"
52 #include "cfg.h"
53 #include "cfganal.h"
54 #include "basic-block.h"
55 #include "insn-config.h"
56 #include "regs.h"
57 #include "flags.h"
58 #include "profile.h"
59 #include "except.h"
60 #include "diagnostic-core.h"
61 #include "recog.h"
62 #include "hashtab.h"
63 #include "statistics.h"
64 #include "real.h"
65 #include "fixed-value.h"
66 #include "expmed.h"
67 #include "dojump.h"
68 #include "explow.h"
69 #include "emit-rtl.h"
70 #include "varasm.h"
71 #include "stmt.h"
72 #include "expr.h"
73 #include "coverage.h"
74 #include "sreal.h"
75 #include "params.h"
76 #include "target.h"
77 #include "cfgloop.h"
78 #include "hash-map.h"
79 #include "tree-ssa-alias.h"
80 #include "internal-fn.h"
81 #include "gimple-expr.h"
82 #include "is-a.h"
83 #include "gimple.h"
84 #include "gimple-iterator.h"
85 #include "gimple-ssa.h"
86 #include "plugin-api.h"
87 #include "ipa-ref.h"
88 #include "cgraph.h"
89 #include "tree-cfg.h"
90 #include "tree-phinodes.h"
91 #include "ssa-iterators.h"
92 #include "tree-ssa-loop-niter.h"
93 #include "tree-ssa-loop.h"
94 #include "tree-pass.h"
95 #include "tree-scalar-evolution.h"
96
97 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
98 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
99 static sreal real_almost_one, real_br_prob_base,
100 real_inv_br_prob_base, real_one_half, real_bb_freq_max;
101
102 static void combine_predictions_for_insn (rtx_insn *, basic_block);
103 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
104 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
105 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
106 static bool can_predict_insn_p (const rtx_insn *);
107
108 /* Information we hold about each branch predictor.
109 Filled using information from predict.def. */
110
111 struct predictor_info
112 {
113 const char *const name; /* Name used in the debugging dumps. */
114 const int hitrate; /* Expected hitrate used by
115 predict_insn_def call. */
116 const int flags;
117 };
118
119 /* Use given predictor without Dempster-Shaffer theory if it matches
120 using first_match heuristics. */
121 #define PRED_FLAG_FIRST_MATCH 1
122
123 /* Recompute hitrate in percent to our representation. */
124
125 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
126
127 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
128 static const struct predictor_info predictor_info[]= {
129 #include "predict.def"
130
131 /* Upper bound on predictors. */
132 {NULL, 0, 0}
133 };
134 #undef DEF_PREDICTOR
135
136 /* Return TRUE if frequency FREQ is considered to be hot. */
137
138 static inline bool
139 maybe_hot_frequency_p (struct function *fun, int freq)
140 {
141 struct cgraph_node *node = cgraph_node::get (fun->decl);
142 if (!profile_info
143 || !opt_for_fn (fun->decl, flag_branch_probabilities))
144 {
145 if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
146 return false;
147 if (node->frequency == NODE_FREQUENCY_HOT)
148 return true;
149 }
150 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
151 return true;
152 if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
153 && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
154 return false;
155 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
156 return false;
157 if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
158 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
159 return false;
160 return true;
161 }
162
163 static gcov_type min_count = -1;
164
165 /* Determine the threshold for hot BB counts. */
166
167 gcov_type
168 get_hot_bb_threshold ()
169 {
170 gcov_working_set_t *ws;
171 if (min_count == -1)
172 {
173 ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
174 gcc_assert (ws);
175 min_count = ws->min_counter;
176 }
177 return min_count;
178 }
179
180 /* Set the threshold for hot BB counts. */
181
182 void
183 set_hot_bb_threshold (gcov_type min)
184 {
185 min_count = min;
186 }
187
188 /* Return TRUE if frequency FREQ is considered to be hot. */
189
190 bool
191 maybe_hot_count_p (struct function *fun, gcov_type count)
192 {
193 if (fun && profile_status_for_fn (fun) != PROFILE_READ)
194 return true;
195 /* Code executed at most once is not hot. */
196 if (profile_info->runs >= count)
197 return false;
198 return (count >= get_hot_bb_threshold ());
199 }
200
201 /* Return true in case BB can be CPU intensive and should be optimized
202 for maximal performance. */
203
204 bool
205 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
206 {
207 gcc_checking_assert (fun);
208 if (profile_status_for_fn (fun) == PROFILE_READ)
209 return maybe_hot_count_p (fun, bb->count);
210 return maybe_hot_frequency_p (fun, bb->frequency);
211 }
212
213 /* Return true in case BB can be CPU intensive and should be optimized
214 for maximal performance. */
215
216 bool
217 maybe_hot_edge_p (edge e)
218 {
219 if (profile_status_for_fn (cfun) == PROFILE_READ)
220 return maybe_hot_count_p (cfun, e->count);
221 return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
222 }
223
224 /* Return true if profile COUNT and FREQUENCY, or function FUN static
225 node frequency reflects never being executed. */
226
227 static bool
228 probably_never_executed (struct function *fun,
229 gcov_type count, int frequency)
230 {
231 gcc_checking_assert (fun);
232 if (profile_status_for_fn (fun) == PROFILE_READ)
233 {
234 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
235 if (count * unlikely_count_fraction >= profile_info->runs)
236 return false;
237 if (!frequency)
238 return true;
239 if (!ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency)
240 return false;
241 if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count)
242 {
243 gcov_type computed_count;
244 /* Check for possibility of overflow, in which case entry bb count
245 is large enough to do the division first without losing much
246 precision. */
247 if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count < REG_BR_PROB_BASE *
248 REG_BR_PROB_BASE)
249 {
250 gcov_type scaled_count
251 = frequency * ENTRY_BLOCK_PTR_FOR_FN (fun)->count *
252 unlikely_count_fraction;
253 computed_count = RDIV (scaled_count,
254 ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
255 }
256 else
257 {
258 computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (fun)->count,
259 ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
260 computed_count *= frequency * unlikely_count_fraction;
261 }
262 if (computed_count >= profile_info->runs)
263 return false;
264 }
265 return true;
266 }
267 if ((!profile_info || !(opt_for_fn (fun->decl, flag_branch_probabilities)))
268 && (cgraph_node::get (fun->decl)->frequency
269 == NODE_FREQUENCY_UNLIKELY_EXECUTED))
270 return true;
271 return false;
272 }
273
274
275 /* Return true in case BB is probably never executed. */
276
277 bool
278 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
279 {
280 return probably_never_executed (fun, bb->count, bb->frequency);
281 }
282
283
284 /* Return true in case edge E is probably never executed. */
285
286 bool
287 probably_never_executed_edge_p (struct function *fun, edge e)
288 {
289 return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
290 }
291
292 /* Return true when current function should always be optimized for size. */
293
294 bool
295 optimize_function_for_size_p (struct function *fun)
296 {
297 if (!fun || !fun->decl)
298 return optimize_size;
299 cgraph_node *n = cgraph_node::get (fun->decl);
300 return n && n->optimize_for_size_p ();
301 }
302
303 /* Return true when current function should always be optimized for speed. */
304
305 bool
306 optimize_function_for_speed_p (struct function *fun)
307 {
308 return !optimize_function_for_size_p (fun);
309 }
310
311 /* Return TRUE when BB should be optimized for size. */
312
313 bool
314 optimize_bb_for_size_p (const_basic_block bb)
315 {
316 return (optimize_function_for_size_p (cfun)
317 || (bb && !maybe_hot_bb_p (cfun, bb)));
318 }
319
320 /* Return TRUE when BB should be optimized for speed. */
321
322 bool
323 optimize_bb_for_speed_p (const_basic_block bb)
324 {
325 return !optimize_bb_for_size_p (bb);
326 }
327
328 /* Return TRUE when BB should be optimized for size. */
329
330 bool
331 optimize_edge_for_size_p (edge e)
332 {
333 return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
334 }
335
336 /* Return TRUE when BB should be optimized for speed. */
337
338 bool
339 optimize_edge_for_speed_p (edge e)
340 {
341 return !optimize_edge_for_size_p (e);
342 }
343
344 /* Return TRUE when BB should be optimized for size. */
345
346 bool
347 optimize_insn_for_size_p (void)
348 {
349 return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
350 }
351
352 /* Return TRUE when BB should be optimized for speed. */
353
354 bool
355 optimize_insn_for_speed_p (void)
356 {
357 return !optimize_insn_for_size_p ();
358 }
359
360 /* Return TRUE when LOOP should be optimized for size. */
361
362 bool
363 optimize_loop_for_size_p (struct loop *loop)
364 {
365 return optimize_bb_for_size_p (loop->header);
366 }
367
368 /* Return TRUE when LOOP should be optimized for speed. */
369
370 bool
371 optimize_loop_for_speed_p (struct loop *loop)
372 {
373 return optimize_bb_for_speed_p (loop->header);
374 }
375
376 /* Return TRUE when LOOP nest should be optimized for speed. */
377
378 bool
379 optimize_loop_nest_for_speed_p (struct loop *loop)
380 {
381 struct loop *l = loop;
382 if (optimize_loop_for_speed_p (loop))
383 return true;
384 l = loop->inner;
385 while (l && l != loop)
386 {
387 if (optimize_loop_for_speed_p (l))
388 return true;
389 if (l->inner)
390 l = l->inner;
391 else if (l->next)
392 l = l->next;
393 else
394 {
395 while (l != loop && !l->next)
396 l = loop_outer (l);
397 if (l != loop)
398 l = l->next;
399 }
400 }
401 return false;
402 }
403
404 /* Return TRUE when LOOP nest should be optimized for size. */
405
406 bool
407 optimize_loop_nest_for_size_p (struct loop *loop)
408 {
409 return !optimize_loop_nest_for_speed_p (loop);
410 }
411
412 /* Return true when edge E is likely to be well predictable by branch
413 predictor. */
414
415 bool
416 predictable_edge_p (edge e)
417 {
418 if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
419 return false;
420 if ((e->probability
421 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
422 || (REG_BR_PROB_BASE - e->probability
423 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
424 return true;
425 return false;
426 }
427
428
429 /* Set RTL expansion for BB profile. */
430
431 void
432 rtl_profile_for_bb (basic_block bb)
433 {
434 crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
435 }
436
437 /* Set RTL expansion for edge profile. */
438
439 void
440 rtl_profile_for_edge (edge e)
441 {
442 crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
443 }
444
445 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
446 void
447 default_rtl_profile (void)
448 {
449 crtl->maybe_hot_insn_p = true;
450 }
451
452 /* Return true if the one of outgoing edges is already predicted by
453 PREDICTOR. */
454
455 bool
456 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
457 {
458 rtx note;
459 if (!INSN_P (BB_END (bb)))
460 return false;
461 for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
462 if (REG_NOTE_KIND (note) == REG_BR_PRED
463 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
464 return true;
465 return false;
466 }
467
468 /* Structure representing predictions in tree level. */
469
470 struct edge_prediction {
471 struct edge_prediction *ep_next;
472 edge ep_edge;
473 enum br_predictor ep_predictor;
474 int ep_probability;
475 };
476
477 /* This map contains for a basic block the list of predictions for the
478 outgoing edges. */
479
480 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
481
482 /* Return true if the one of outgoing edges is already predicted by
483 PREDICTOR. */
484
485 bool
486 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
487 {
488 struct edge_prediction *i;
489 edge_prediction **preds = bb_predictions->get (bb);
490
491 if (!preds)
492 return false;
493
494 for (i = *preds; i; i = i->ep_next)
495 if (i->ep_predictor == predictor)
496 return true;
497 return false;
498 }
499
500 /* Return true when the probability of edge is reliable.
501
502 The profile guessing code is good at predicting branch outcome (ie.
503 taken/not taken), that is predicted right slightly over 75% of time.
504 It is however notoriously poor on predicting the probability itself.
505 In general the profile appear a lot flatter (with probabilities closer
506 to 50%) than the reality so it is bad idea to use it to drive optimization
507 such as those disabling dynamic branch prediction for well predictable
508 branches.
509
510 There are two exceptions - edges leading to noreturn edges and edges
511 predicted by number of iterations heuristics are predicted well. This macro
512 should be able to distinguish those, but at the moment it simply check for
513 noreturn heuristic that is only one giving probability over 99% or bellow
514 1%. In future we might want to propagate reliability information across the
515 CFG if we find this information useful on multiple places. */
516 static bool
517 probability_reliable_p (int prob)
518 {
519 return (profile_status_for_fn (cfun) == PROFILE_READ
520 || (profile_status_for_fn (cfun) == PROFILE_GUESSED
521 && (prob <= HITRATE (1) || prob >= HITRATE (99))));
522 }
523
524 /* Same predicate as above, working on edges. */
525 bool
526 edge_probability_reliable_p (const_edge e)
527 {
528 return probability_reliable_p (e->probability);
529 }
530
531 /* Same predicate as edge_probability_reliable_p, working on notes. */
532 bool
533 br_prob_note_reliable_p (const_rtx note)
534 {
535 gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
536 return probability_reliable_p (XINT (note, 0));
537 }
538
539 static void
540 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
541 {
542 gcc_assert (any_condjump_p (insn));
543 if (!flag_guess_branch_prob)
544 return;
545
546 add_reg_note (insn, REG_BR_PRED,
547 gen_rtx_CONCAT (VOIDmode,
548 GEN_INT ((int) predictor),
549 GEN_INT ((int) probability)));
550 }
551
552 /* Predict insn by given predictor. */
553
554 void
555 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
556 enum prediction taken)
557 {
558 int probability = predictor_info[(int) predictor].hitrate;
559
560 if (taken != TAKEN)
561 probability = REG_BR_PROB_BASE - probability;
562
563 predict_insn (insn, predictor, probability);
564 }
565
566 /* Predict edge E with given probability if possible. */
567
568 void
569 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
570 {
571 rtx_insn *last_insn;
572 last_insn = BB_END (e->src);
573
574 /* We can store the branch prediction information only about
575 conditional jumps. */
576 if (!any_condjump_p (last_insn))
577 return;
578
579 /* We always store probability of branching. */
580 if (e->flags & EDGE_FALLTHRU)
581 probability = REG_BR_PROB_BASE - probability;
582
583 predict_insn (last_insn, predictor, probability);
584 }
585
586 /* Predict edge E with the given PROBABILITY. */
587 void
588 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
589 {
590 gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
591 if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
592 1)
593 && flag_guess_branch_prob && optimize)
594 {
595 struct edge_prediction *i = XNEW (struct edge_prediction);
596 edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
597
598 i->ep_next = preds;
599 preds = i;
600 i->ep_probability = probability;
601 i->ep_predictor = predictor;
602 i->ep_edge = e;
603 }
604 }
605
606 /* Remove all predictions on given basic block that are attached
607 to edge E. */
608 void
609 remove_predictions_associated_with_edge (edge e)
610 {
611 if (!bb_predictions)
612 return;
613
614 edge_prediction **preds = bb_predictions->get (e->src);
615
616 if (preds)
617 {
618 struct edge_prediction **prediction = preds;
619 struct edge_prediction *next;
620
621 while (*prediction)
622 {
623 if ((*prediction)->ep_edge == e)
624 {
625 next = (*prediction)->ep_next;
626 free (*prediction);
627 *prediction = next;
628 }
629 else
630 prediction = &((*prediction)->ep_next);
631 }
632 }
633 }
634
635 /* Clears the list of predictions stored for BB. */
636
637 static void
638 clear_bb_predictions (basic_block bb)
639 {
640 edge_prediction **preds = bb_predictions->get (bb);
641 struct edge_prediction *pred, *next;
642
643 if (!preds)
644 return;
645
646 for (pred = *preds; pred; pred = next)
647 {
648 next = pred->ep_next;
649 free (pred);
650 }
651 *preds = NULL;
652 }
653
654 /* Return true when we can store prediction on insn INSN.
655 At the moment we represent predictions only on conditional
656 jumps, not at computed jump or other complicated cases. */
657 static bool
658 can_predict_insn_p (const rtx_insn *insn)
659 {
660 return (JUMP_P (insn)
661 && any_condjump_p (insn)
662 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
663 }
664
665 /* Predict edge E by given predictor if possible. */
666
667 void
668 predict_edge_def (edge e, enum br_predictor predictor,
669 enum prediction taken)
670 {
671 int probability = predictor_info[(int) predictor].hitrate;
672
673 if (taken != TAKEN)
674 probability = REG_BR_PROB_BASE - probability;
675
676 predict_edge (e, predictor, probability);
677 }
678
679 /* Invert all branch predictions or probability notes in the INSN. This needs
680 to be done each time we invert the condition used by the jump. */
681
682 void
683 invert_br_probabilities (rtx insn)
684 {
685 rtx note;
686
687 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
688 if (REG_NOTE_KIND (note) == REG_BR_PROB)
689 XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
690 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
691 XEXP (XEXP (note, 0), 1)
692 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
693 }
694
695 /* Dump information about the branch prediction to the output file. */
696
697 static void
698 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
699 basic_block bb, int used)
700 {
701 edge e;
702 edge_iterator ei;
703
704 if (!file)
705 return;
706
707 FOR_EACH_EDGE (e, ei, bb->succs)
708 if (! (e->flags & EDGE_FALLTHRU))
709 break;
710
711 fprintf (file, " %s heuristics%s: %.1f%%",
712 predictor_info[predictor].name,
713 used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
714
715 if (bb->count)
716 {
717 fprintf (file, " exec %"PRId64, bb->count);
718 if (e)
719 {
720 fprintf (file, " hit %"PRId64, e->count);
721 fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
722 }
723 }
724
725 fprintf (file, "\n");
726 }
727
728 /* We can not predict the probabilities of outgoing edges of bb. Set them
729 evenly and hope for the best. */
730 static void
731 set_even_probabilities (basic_block bb)
732 {
733 int nedges = 0;
734 edge e;
735 edge_iterator ei;
736
737 FOR_EACH_EDGE (e, ei, bb->succs)
738 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
739 nedges ++;
740 FOR_EACH_EDGE (e, ei, bb->succs)
741 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
742 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
743 else
744 e->probability = 0;
745 }
746
747 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
748 note if not already present. Remove now useless REG_BR_PRED notes. */
749
750 static void
751 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
752 {
753 rtx prob_note;
754 rtx *pnote;
755 rtx note;
756 int best_probability = PROB_EVEN;
757 enum br_predictor best_predictor = END_PREDICTORS;
758 int combined_probability = REG_BR_PROB_BASE / 2;
759 int d;
760 bool first_match = false;
761 bool found = false;
762
763 if (!can_predict_insn_p (insn))
764 {
765 set_even_probabilities (bb);
766 return;
767 }
768
769 prob_note = find_reg_note (insn, REG_BR_PROB, 0);
770 pnote = &REG_NOTES (insn);
771 if (dump_file)
772 fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
773 bb->index);
774
775 /* We implement "first match" heuristics and use probability guessed
776 by predictor with smallest index. */
777 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
778 if (REG_NOTE_KIND (note) == REG_BR_PRED)
779 {
780 enum br_predictor predictor = ((enum br_predictor)
781 INTVAL (XEXP (XEXP (note, 0), 0)));
782 int probability = INTVAL (XEXP (XEXP (note, 0), 1));
783
784 found = true;
785 if (best_predictor > predictor)
786 best_probability = probability, best_predictor = predictor;
787
788 d = (combined_probability * probability
789 + (REG_BR_PROB_BASE - combined_probability)
790 * (REG_BR_PROB_BASE - probability));
791
792 /* Use FP math to avoid overflows of 32bit integers. */
793 if (d == 0)
794 /* If one probability is 0% and one 100%, avoid division by zero. */
795 combined_probability = REG_BR_PROB_BASE / 2;
796 else
797 combined_probability = (((double) combined_probability) * probability
798 * REG_BR_PROB_BASE / d + 0.5);
799 }
800
801 /* Decide which heuristic to use. In case we didn't match anything,
802 use no_prediction heuristic, in case we did match, use either
803 first match or Dempster-Shaffer theory depending on the flags. */
804
805 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
806 first_match = true;
807
808 if (!found)
809 dump_prediction (dump_file, PRED_NO_PREDICTION,
810 combined_probability, bb, true);
811 else
812 {
813 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
814 bb, !first_match);
815 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
816 bb, first_match);
817 }
818
819 if (first_match)
820 combined_probability = best_probability;
821 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
822
823 while (*pnote)
824 {
825 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
826 {
827 enum br_predictor predictor = ((enum br_predictor)
828 INTVAL (XEXP (XEXP (*pnote, 0), 0)));
829 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
830
831 dump_prediction (dump_file, predictor, probability, bb,
832 !first_match || best_predictor == predictor);
833 *pnote = XEXP (*pnote, 1);
834 }
835 else
836 pnote = &XEXP (*pnote, 1);
837 }
838
839 if (!prob_note)
840 {
841 add_int_reg_note (insn, REG_BR_PROB, combined_probability);
842
843 /* Save the prediction into CFG in case we are seeing non-degenerated
844 conditional jump. */
845 if (!single_succ_p (bb))
846 {
847 BRANCH_EDGE (bb)->probability = combined_probability;
848 FALLTHRU_EDGE (bb)->probability
849 = REG_BR_PROB_BASE - combined_probability;
850 }
851 }
852 else if (!single_succ_p (bb))
853 {
854 int prob = XINT (prob_note, 0);
855
856 BRANCH_EDGE (bb)->probability = prob;
857 FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
858 }
859 else
860 single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
861 }
862
863 /* Combine predictions into single probability and store them into CFG.
864 Remove now useless prediction entries. */
865
866 static void
867 combine_predictions_for_bb (basic_block bb)
868 {
869 int best_probability = PROB_EVEN;
870 enum br_predictor best_predictor = END_PREDICTORS;
871 int combined_probability = REG_BR_PROB_BASE / 2;
872 int d;
873 bool first_match = false;
874 bool found = false;
875 struct edge_prediction *pred;
876 int nedges = 0;
877 edge e, first = NULL, second = NULL;
878 edge_iterator ei;
879
880 FOR_EACH_EDGE (e, ei, bb->succs)
881 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
882 {
883 nedges ++;
884 if (first && !second)
885 second = e;
886 if (!first)
887 first = e;
888 }
889
890 /* When there is no successor or only one choice, prediction is easy.
891
892 We are lazy for now and predict only basic blocks with two outgoing
893 edges. It is possible to predict generic case too, but we have to
894 ignore first match heuristics and do more involved combining. Implement
895 this later. */
896 if (nedges != 2)
897 {
898 if (!bb->count)
899 set_even_probabilities (bb);
900 clear_bb_predictions (bb);
901 if (dump_file)
902 fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
903 nedges, bb->index);
904 return;
905 }
906
907 if (dump_file)
908 fprintf (dump_file, "Predictions for bb %i\n", bb->index);
909
910 edge_prediction **preds = bb_predictions->get (bb);
911 if (preds)
912 {
913 /* We implement "first match" heuristics and use probability guessed
914 by predictor with smallest index. */
915 for (pred = *preds; pred; pred = pred->ep_next)
916 {
917 enum br_predictor predictor = pred->ep_predictor;
918 int probability = pred->ep_probability;
919
920 if (pred->ep_edge != first)
921 probability = REG_BR_PROB_BASE - probability;
922
923 found = true;
924 /* First match heuristics would be widly confused if we predicted
925 both directions. */
926 if (best_predictor > predictor)
927 {
928 struct edge_prediction *pred2;
929 int prob = probability;
930
931 for (pred2 = (struct edge_prediction *) *preds;
932 pred2; pred2 = pred2->ep_next)
933 if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
934 {
935 int probability2 = pred->ep_probability;
936
937 if (pred2->ep_edge != first)
938 probability2 = REG_BR_PROB_BASE - probability2;
939
940 if ((probability < REG_BR_PROB_BASE / 2) !=
941 (probability2 < REG_BR_PROB_BASE / 2))
942 break;
943
944 /* If the same predictor later gave better result, go for it! */
945 if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
946 || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
947 prob = probability2;
948 }
949 if (!pred2)
950 best_probability = prob, best_predictor = predictor;
951 }
952
953 d = (combined_probability * probability
954 + (REG_BR_PROB_BASE - combined_probability)
955 * (REG_BR_PROB_BASE - probability));
956
957 /* Use FP math to avoid overflows of 32bit integers. */
958 if (d == 0)
959 /* If one probability is 0% and one 100%, avoid division by zero. */
960 combined_probability = REG_BR_PROB_BASE / 2;
961 else
962 combined_probability = (((double) combined_probability)
963 * probability
964 * REG_BR_PROB_BASE / d + 0.5);
965 }
966 }
967
968 /* Decide which heuristic to use. In case we didn't match anything,
969 use no_prediction heuristic, in case we did match, use either
970 first match or Dempster-Shaffer theory depending on the flags. */
971
972 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
973 first_match = true;
974
975 if (!found)
976 dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
977 else
978 {
979 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
980 !first_match);
981 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
982 first_match);
983 }
984
985 if (first_match)
986 combined_probability = best_probability;
987 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
988
989 if (preds)
990 {
991 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
992 {
993 enum br_predictor predictor = pred->ep_predictor;
994 int probability = pred->ep_probability;
995
996 if (pred->ep_edge != EDGE_SUCC (bb, 0))
997 probability = REG_BR_PROB_BASE - probability;
998 dump_prediction (dump_file, predictor, probability, bb,
999 !first_match || best_predictor == predictor);
1000 }
1001 }
1002 clear_bb_predictions (bb);
1003
1004 if (!bb->count)
1005 {
1006 first->probability = combined_probability;
1007 second->probability = REG_BR_PROB_BASE - combined_probability;
1008 }
1009 }
1010
1011 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1012 Return the SSA_NAME if the condition satisfies, NULL otherwise.
1013
1014 T1 and T2 should be one of the following cases:
1015 1. T1 is SSA_NAME, T2 is NULL
1016 2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1017 3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4] */
1018
1019 static tree
1020 strips_small_constant (tree t1, tree t2)
1021 {
1022 tree ret = NULL;
1023 int value = 0;
1024
1025 if (!t1)
1026 return NULL;
1027 else if (TREE_CODE (t1) == SSA_NAME)
1028 ret = t1;
1029 else if (tree_fits_shwi_p (t1))
1030 value = tree_to_shwi (t1);
1031 else
1032 return NULL;
1033
1034 if (!t2)
1035 return ret;
1036 else if (tree_fits_shwi_p (t2))
1037 value = tree_to_shwi (t2);
1038 else if (TREE_CODE (t2) == SSA_NAME)
1039 {
1040 if (ret)
1041 return NULL;
1042 else
1043 ret = t2;
1044 }
1045
1046 if (value <= 4 && value >= -4)
1047 return ret;
1048 else
1049 return NULL;
1050 }
1051
1052 /* Return the SSA_NAME in T or T's operands.
1053 Return NULL if SSA_NAME cannot be found. */
1054
1055 static tree
1056 get_base_value (tree t)
1057 {
1058 if (TREE_CODE (t) == SSA_NAME)
1059 return t;
1060
1061 if (!BINARY_CLASS_P (t))
1062 return NULL;
1063
1064 switch (TREE_OPERAND_LENGTH (t))
1065 {
1066 case 1:
1067 return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1068 case 2:
1069 return strips_small_constant (TREE_OPERAND (t, 0),
1070 TREE_OPERAND (t, 1));
1071 default:
1072 return NULL;
1073 }
1074 }
1075
1076 /* Check the compare STMT in LOOP. If it compares an induction
1077 variable to a loop invariant, return true, and save
1078 LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1079 Otherwise return false and set LOOP_INVAIANT to NULL. */
1080
1081 static bool
1082 is_comparison_with_loop_invariant_p (gcond *stmt, struct loop *loop,
1083 tree *loop_invariant,
1084 enum tree_code *compare_code,
1085 tree *loop_step,
1086 tree *loop_iv_base)
1087 {
1088 tree op0, op1, bound, base;
1089 affine_iv iv0, iv1;
1090 enum tree_code code;
1091 tree step;
1092
1093 code = gimple_cond_code (stmt);
1094 *loop_invariant = NULL;
1095
1096 switch (code)
1097 {
1098 case GT_EXPR:
1099 case GE_EXPR:
1100 case NE_EXPR:
1101 case LT_EXPR:
1102 case LE_EXPR:
1103 case EQ_EXPR:
1104 break;
1105
1106 default:
1107 return false;
1108 }
1109
1110 op0 = gimple_cond_lhs (stmt);
1111 op1 = gimple_cond_rhs (stmt);
1112
1113 if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1114 || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1115 return false;
1116 if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1117 return false;
1118 if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1119 return false;
1120 if (TREE_CODE (iv0.step) != INTEGER_CST
1121 || TREE_CODE (iv1.step) != INTEGER_CST)
1122 return false;
1123 if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1124 || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1125 return false;
1126
1127 if (integer_zerop (iv0.step))
1128 {
1129 if (code != NE_EXPR && code != EQ_EXPR)
1130 code = invert_tree_comparison (code, false);
1131 bound = iv0.base;
1132 base = iv1.base;
1133 if (tree_fits_shwi_p (iv1.step))
1134 step = iv1.step;
1135 else
1136 return false;
1137 }
1138 else
1139 {
1140 bound = iv1.base;
1141 base = iv0.base;
1142 if (tree_fits_shwi_p (iv0.step))
1143 step = iv0.step;
1144 else
1145 return false;
1146 }
1147
1148 if (TREE_CODE (bound) != INTEGER_CST)
1149 bound = get_base_value (bound);
1150 if (!bound)
1151 return false;
1152 if (TREE_CODE (base) != INTEGER_CST)
1153 base = get_base_value (base);
1154 if (!base)
1155 return false;
1156
1157 *loop_invariant = bound;
1158 *compare_code = code;
1159 *loop_step = step;
1160 *loop_iv_base = base;
1161 return true;
1162 }
1163
1164 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent. */
1165
1166 static bool
1167 expr_coherent_p (tree t1, tree t2)
1168 {
1169 gimple stmt;
1170 tree ssa_name_1 = NULL;
1171 tree ssa_name_2 = NULL;
1172
1173 gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1174 gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1175
1176 if (t1 == t2)
1177 return true;
1178
1179 if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1180 return true;
1181 if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1182 return false;
1183
1184 /* Check to see if t1 is expressed/defined with t2. */
1185 stmt = SSA_NAME_DEF_STMT (t1);
1186 gcc_assert (stmt != NULL);
1187 if (is_gimple_assign (stmt))
1188 {
1189 ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1190 if (ssa_name_1 && ssa_name_1 == t2)
1191 return true;
1192 }
1193
1194 /* Check to see if t2 is expressed/defined with t1. */
1195 stmt = SSA_NAME_DEF_STMT (t2);
1196 gcc_assert (stmt != NULL);
1197 if (is_gimple_assign (stmt))
1198 {
1199 ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1200 if (ssa_name_2 && ssa_name_2 == t1)
1201 return true;
1202 }
1203
1204 /* Compare if t1 and t2's def_stmts are identical. */
1205 if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1206 return true;
1207 else
1208 return false;
1209 }
1210
1211 /* Predict branch probability of BB when BB contains a branch that compares
1212 an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1213 loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1214
1215 E.g.
1216 for (int i = 0; i < bound; i++) {
1217 if (i < bound - 2)
1218 computation_1();
1219 else
1220 computation_2();
1221 }
1222
1223 In this loop, we will predict the branch inside the loop to be taken. */
1224
1225 static void
1226 predict_iv_comparison (struct loop *loop, basic_block bb,
1227 tree loop_bound_var,
1228 tree loop_iv_base_var,
1229 enum tree_code loop_bound_code,
1230 int loop_bound_step)
1231 {
1232 gimple stmt;
1233 tree compare_var, compare_base;
1234 enum tree_code compare_code;
1235 tree compare_step_var;
1236 edge then_edge;
1237 edge_iterator ei;
1238
1239 if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1240 || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1241 || predicted_by_p (bb, PRED_LOOP_EXIT))
1242 return;
1243
1244 stmt = last_stmt (bb);
1245 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1246 return;
1247 if (!is_comparison_with_loop_invariant_p (as_a <gcond *> (stmt),
1248 loop, &compare_var,
1249 &compare_code,
1250 &compare_step_var,
1251 &compare_base))
1252 return;
1253
1254 /* Find the taken edge. */
1255 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1256 if (then_edge->flags & EDGE_TRUE_VALUE)
1257 break;
1258
1259 /* When comparing an IV to a loop invariant, NE is more likely to be
1260 taken while EQ is more likely to be not-taken. */
1261 if (compare_code == NE_EXPR)
1262 {
1263 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1264 return;
1265 }
1266 else if (compare_code == EQ_EXPR)
1267 {
1268 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1269 return;
1270 }
1271
1272 if (!expr_coherent_p (loop_iv_base_var, compare_base))
1273 return;
1274
1275 /* If loop bound, base and compare bound are all constants, we can
1276 calculate the probability directly. */
1277 if (tree_fits_shwi_p (loop_bound_var)
1278 && tree_fits_shwi_p (compare_var)
1279 && tree_fits_shwi_p (compare_base))
1280 {
1281 int probability;
1282 bool overflow, overall_overflow = false;
1283 widest_int compare_count, tem;
1284
1285 /* (loop_bound - base) / compare_step */
1286 tem = wi::sub (wi::to_widest (loop_bound_var),
1287 wi::to_widest (compare_base), SIGNED, &overflow);
1288 overall_overflow |= overflow;
1289 widest_int loop_count = wi::div_trunc (tem,
1290 wi::to_widest (compare_step_var),
1291 SIGNED, &overflow);
1292 overall_overflow |= overflow;
1293
1294 if (!wi::neg_p (wi::to_widest (compare_step_var))
1295 ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1296 {
1297 /* (loop_bound - compare_bound) / compare_step */
1298 tem = wi::sub (wi::to_widest (loop_bound_var),
1299 wi::to_widest (compare_var), SIGNED, &overflow);
1300 overall_overflow |= overflow;
1301 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1302 SIGNED, &overflow);
1303 overall_overflow |= overflow;
1304 }
1305 else
1306 {
1307 /* (compare_bound - base) / compare_step */
1308 tem = wi::sub (wi::to_widest (compare_var),
1309 wi::to_widest (compare_base), SIGNED, &overflow);
1310 overall_overflow |= overflow;
1311 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1312 SIGNED, &overflow);
1313 overall_overflow |= overflow;
1314 }
1315 if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1316 ++compare_count;
1317 if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1318 ++loop_count;
1319 if (wi::neg_p (compare_count))
1320 compare_count = 0;
1321 if (wi::neg_p (loop_count))
1322 loop_count = 0;
1323 if (loop_count == 0)
1324 probability = 0;
1325 else if (wi::cmps (compare_count, loop_count) == 1)
1326 probability = REG_BR_PROB_BASE;
1327 else
1328 {
1329 tem = compare_count * REG_BR_PROB_BASE;
1330 tem = wi::udiv_trunc (tem, loop_count);
1331 probability = tem.to_uhwi ();
1332 }
1333
1334 if (!overall_overflow)
1335 predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1336
1337 return;
1338 }
1339
1340 if (expr_coherent_p (loop_bound_var, compare_var))
1341 {
1342 if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1343 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1344 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1345 else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1346 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1347 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1348 else if (loop_bound_code == NE_EXPR)
1349 {
1350 /* If the loop backedge condition is "(i != bound)", we do
1351 the comparison based on the step of IV:
1352 * step < 0 : backedge condition is like (i > bound)
1353 * step > 0 : backedge condition is like (i < bound) */
1354 gcc_assert (loop_bound_step != 0);
1355 if (loop_bound_step > 0
1356 && (compare_code == LT_EXPR
1357 || compare_code == LE_EXPR))
1358 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1359 else if (loop_bound_step < 0
1360 && (compare_code == GT_EXPR
1361 || compare_code == GE_EXPR))
1362 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1363 else
1364 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1365 }
1366 else
1367 /* The branch is predicted not-taken if loop_bound_code is
1368 opposite with compare_code. */
1369 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1370 }
1371 else if (expr_coherent_p (loop_iv_base_var, compare_var))
1372 {
1373 /* For cases like:
1374 for (i = s; i < h; i++)
1375 if (i > s + 2) ....
1376 The branch should be predicted taken. */
1377 if (loop_bound_step > 0
1378 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1379 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1380 else if (loop_bound_step < 0
1381 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1382 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1383 else
1384 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1385 }
1386 }
1387
1388 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1389 exits are resulted from short-circuit conditions that will generate an
1390 if_tmp. E.g.:
1391
1392 if (foo() || global > 10)
1393 break;
1394
1395 This will be translated into:
1396
1397 BB3:
1398 loop header...
1399 BB4:
1400 if foo() goto BB6 else goto BB5
1401 BB5:
1402 if global > 10 goto BB6 else goto BB7
1403 BB6:
1404 goto BB7
1405 BB7:
1406 iftmp = (PHI 0(BB5), 1(BB6))
1407 if iftmp == 1 goto BB8 else goto BB3
1408 BB8:
1409 outside of the loop...
1410
1411 The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1412 From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1413 exits. This function takes BB7->BB8 as input, and finds out the extra loop
1414 exits to predict them using PRED_LOOP_EXIT. */
1415
1416 static void
1417 predict_extra_loop_exits (edge exit_edge)
1418 {
1419 unsigned i;
1420 bool check_value_one;
1421 gimple lhs_def_stmt;
1422 gphi *phi_stmt;
1423 tree cmp_rhs, cmp_lhs;
1424 gimple last;
1425 gcond *cmp_stmt;
1426
1427 last = last_stmt (exit_edge->src);
1428 if (!last)
1429 return;
1430 cmp_stmt = dyn_cast <gcond *> (last);
1431 if (!cmp_stmt)
1432 return;
1433
1434 cmp_rhs = gimple_cond_rhs (cmp_stmt);
1435 cmp_lhs = gimple_cond_lhs (cmp_stmt);
1436 if (!TREE_CONSTANT (cmp_rhs)
1437 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1438 return;
1439 if (TREE_CODE (cmp_lhs) != SSA_NAME)
1440 return;
1441
1442 /* If check_value_one is true, only the phi_args with value '1' will lead
1443 to loop exit. Otherwise, only the phi_args with value '0' will lead to
1444 loop exit. */
1445 check_value_one = (((integer_onep (cmp_rhs))
1446 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1447 ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1448
1449 lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1450 if (!lhs_def_stmt)
1451 return;
1452
1453 phi_stmt = dyn_cast <gphi *> (lhs_def_stmt);
1454 if (!phi_stmt)
1455 return;
1456
1457 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1458 {
1459 edge e1;
1460 edge_iterator ei;
1461 tree val = gimple_phi_arg_def (phi_stmt, i);
1462 edge e = gimple_phi_arg_edge (phi_stmt, i);
1463
1464 if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1465 continue;
1466 if ((check_value_one ^ integer_onep (val)) == 1)
1467 continue;
1468 if (EDGE_COUNT (e->src->succs) != 1)
1469 {
1470 predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1471 continue;
1472 }
1473
1474 FOR_EACH_EDGE (e1, ei, e->src->preds)
1475 predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1476 }
1477 }
1478
1479 /* Predict edge probabilities by exploiting loop structure. */
1480
1481 static void
1482 predict_loops (void)
1483 {
1484 struct loop *loop;
1485
1486 /* Try to predict out blocks in a loop that are not part of a
1487 natural loop. */
1488 FOR_EACH_LOOP (loop, 0)
1489 {
1490 basic_block bb, *bbs;
1491 unsigned j, n_exits;
1492 vec<edge> exits;
1493 struct tree_niter_desc niter_desc;
1494 edge ex;
1495 struct nb_iter_bound *nb_iter;
1496 enum tree_code loop_bound_code = ERROR_MARK;
1497 tree loop_bound_step = NULL;
1498 tree loop_bound_var = NULL;
1499 tree loop_iv_base = NULL;
1500 gcond *stmt = NULL;
1501
1502 exits = get_loop_exit_edges (loop);
1503 n_exits = exits.length ();
1504 if (!n_exits)
1505 {
1506 exits.release ();
1507 continue;
1508 }
1509
1510 FOR_EACH_VEC_ELT (exits, j, ex)
1511 {
1512 tree niter = NULL;
1513 HOST_WIDE_INT nitercst;
1514 int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1515 int probability;
1516 enum br_predictor predictor;
1517
1518 predict_extra_loop_exits (ex);
1519
1520 if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1521 niter = niter_desc.niter;
1522 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1523 niter = loop_niter_by_eval (loop, ex);
1524
1525 if (TREE_CODE (niter) == INTEGER_CST)
1526 {
1527 if (tree_fits_uhwi_p (niter)
1528 && max
1529 && compare_tree_int (niter, max - 1) == -1)
1530 nitercst = tree_to_uhwi (niter) + 1;
1531 else
1532 nitercst = max;
1533 predictor = PRED_LOOP_ITERATIONS;
1534 }
1535 /* If we have just one exit and we can derive some information about
1536 the number of iterations of the loop from the statements inside
1537 the loop, use it to predict this exit. */
1538 else if (n_exits == 1)
1539 {
1540 nitercst = estimated_stmt_executions_int (loop);
1541 if (nitercst < 0)
1542 continue;
1543 if (nitercst > max)
1544 nitercst = max;
1545
1546 predictor = PRED_LOOP_ITERATIONS_GUESSED;
1547 }
1548 else
1549 continue;
1550
1551 /* If the prediction for number of iterations is zero, do not
1552 predict the exit edges. */
1553 if (nitercst == 0)
1554 continue;
1555
1556 probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1557 predict_edge (ex, predictor, probability);
1558 }
1559 exits.release ();
1560
1561 /* Find information about loop bound variables. */
1562 for (nb_iter = loop->bounds; nb_iter;
1563 nb_iter = nb_iter->next)
1564 if (nb_iter->stmt
1565 && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1566 {
1567 stmt = as_a <gcond *> (nb_iter->stmt);
1568 break;
1569 }
1570 if (!stmt && last_stmt (loop->header)
1571 && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1572 stmt = as_a <gcond *> (last_stmt (loop->header));
1573 if (stmt)
1574 is_comparison_with_loop_invariant_p (stmt, loop,
1575 &loop_bound_var,
1576 &loop_bound_code,
1577 &loop_bound_step,
1578 &loop_iv_base);
1579
1580 bbs = get_loop_body (loop);
1581
1582 for (j = 0; j < loop->num_nodes; j++)
1583 {
1584 int header_found = 0;
1585 edge e;
1586 edge_iterator ei;
1587
1588 bb = bbs[j];
1589
1590 /* Bypass loop heuristics on continue statement. These
1591 statements construct loops via "non-loop" constructs
1592 in the source language and are better to be handled
1593 separately. */
1594 if (predicted_by_p (bb, PRED_CONTINUE))
1595 continue;
1596
1597 /* Loop branch heuristics - predict an edge back to a
1598 loop's head as taken. */
1599 if (bb == loop->latch)
1600 {
1601 e = find_edge (loop->latch, loop->header);
1602 if (e)
1603 {
1604 header_found = 1;
1605 predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1606 }
1607 }
1608
1609 /* Loop exit heuristics - predict an edge exiting the loop if the
1610 conditional has no loop header successors as not taken. */
1611 if (!header_found
1612 /* If we already used more reliable loop exit predictors, do not
1613 bother with PRED_LOOP_EXIT. */
1614 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1615 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1616 {
1617 /* For loop with many exits we don't want to predict all exits
1618 with the pretty large probability, because if all exits are
1619 considered in row, the loop would be predicted to iterate
1620 almost never. The code to divide probability by number of
1621 exits is very rough. It should compute the number of exits
1622 taken in each patch through function (not the overall number
1623 of exits that might be a lot higher for loops with wide switch
1624 statements in them) and compute n-th square root.
1625
1626 We limit the minimal probability by 2% to avoid
1627 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1628 as this was causing regression in perl benchmark containing such
1629 a wide loop. */
1630
1631 int probability = ((REG_BR_PROB_BASE
1632 - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1633 / n_exits);
1634 if (probability < HITRATE (2))
1635 probability = HITRATE (2);
1636 FOR_EACH_EDGE (e, ei, bb->succs)
1637 if (e->dest->index < NUM_FIXED_BLOCKS
1638 || !flow_bb_inside_loop_p (loop, e->dest))
1639 predict_edge (e, PRED_LOOP_EXIT, probability);
1640 }
1641 if (loop_bound_var)
1642 predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1643 loop_bound_code,
1644 tree_to_shwi (loop_bound_step));
1645 }
1646
1647 /* Free basic blocks from get_loop_body. */
1648 free (bbs);
1649 }
1650 }
1651
1652 /* Attempt to predict probabilities of BB outgoing edges using local
1653 properties. */
1654 static void
1655 bb_estimate_probability_locally (basic_block bb)
1656 {
1657 rtx_insn *last_insn = BB_END (bb);
1658 rtx cond;
1659
1660 if (! can_predict_insn_p (last_insn))
1661 return;
1662 cond = get_condition (last_insn, NULL, false, false);
1663 if (! cond)
1664 return;
1665
1666 /* Try "pointer heuristic."
1667 A comparison ptr == 0 is predicted as false.
1668 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1669 if (COMPARISON_P (cond)
1670 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1671 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1672 {
1673 if (GET_CODE (cond) == EQ)
1674 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1675 else if (GET_CODE (cond) == NE)
1676 predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1677 }
1678 else
1679
1680 /* Try "opcode heuristic."
1681 EQ tests are usually false and NE tests are usually true. Also,
1682 most quantities are positive, so we can make the appropriate guesses
1683 about signed comparisons against zero. */
1684 switch (GET_CODE (cond))
1685 {
1686 case CONST_INT:
1687 /* Unconditional branch. */
1688 predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1689 cond == const0_rtx ? NOT_TAKEN : TAKEN);
1690 break;
1691
1692 case EQ:
1693 case UNEQ:
1694 /* Floating point comparisons appears to behave in a very
1695 unpredictable way because of special role of = tests in
1696 FP code. */
1697 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1698 ;
1699 /* Comparisons with 0 are often used for booleans and there is
1700 nothing useful to predict about them. */
1701 else if (XEXP (cond, 1) == const0_rtx
1702 || XEXP (cond, 0) == const0_rtx)
1703 ;
1704 else
1705 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1706 break;
1707
1708 case NE:
1709 case LTGT:
1710 /* Floating point comparisons appears to behave in a very
1711 unpredictable way because of special role of = tests in
1712 FP code. */
1713 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1714 ;
1715 /* Comparisons with 0 are often used for booleans and there is
1716 nothing useful to predict about them. */
1717 else if (XEXP (cond, 1) == const0_rtx
1718 || XEXP (cond, 0) == const0_rtx)
1719 ;
1720 else
1721 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1722 break;
1723
1724 case ORDERED:
1725 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1726 break;
1727
1728 case UNORDERED:
1729 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1730 break;
1731
1732 case LE:
1733 case LT:
1734 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1735 || XEXP (cond, 1) == constm1_rtx)
1736 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1737 break;
1738
1739 case GE:
1740 case GT:
1741 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1742 || XEXP (cond, 1) == constm1_rtx)
1743 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1744 break;
1745
1746 default:
1747 break;
1748 }
1749 }
1750
1751 /* Set edge->probability for each successor edge of BB. */
1752 void
1753 guess_outgoing_edge_probabilities (basic_block bb)
1754 {
1755 bb_estimate_probability_locally (bb);
1756 combine_predictions_for_insn (BB_END (bb), bb);
1757 }
1758 \f
1759 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1760
1761 /* Helper function for expr_expected_value. */
1762
1763 static tree
1764 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1765 tree op1, bitmap visited, enum br_predictor *predictor)
1766 {
1767 gimple def;
1768
1769 if (predictor)
1770 *predictor = PRED_UNCONDITIONAL;
1771
1772 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1773 {
1774 if (TREE_CONSTANT (op0))
1775 return op0;
1776
1777 if (code != SSA_NAME)
1778 return NULL_TREE;
1779
1780 def = SSA_NAME_DEF_STMT (op0);
1781
1782 /* If we were already here, break the infinite cycle. */
1783 if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1784 return NULL;
1785
1786 if (gimple_code (def) == GIMPLE_PHI)
1787 {
1788 /* All the arguments of the PHI node must have the same constant
1789 length. */
1790 int i, n = gimple_phi_num_args (def);
1791 tree val = NULL, new_val;
1792
1793 for (i = 0; i < n; i++)
1794 {
1795 tree arg = PHI_ARG_DEF (def, i);
1796 enum br_predictor predictor2;
1797
1798 /* If this PHI has itself as an argument, we cannot
1799 determine the string length of this argument. However,
1800 if we can find an expected constant value for the other
1801 PHI args then we can still be sure that this is
1802 likely a constant. So be optimistic and just
1803 continue with the next argument. */
1804 if (arg == PHI_RESULT (def))
1805 continue;
1806
1807 new_val = expr_expected_value (arg, visited, &predictor2);
1808
1809 /* It is difficult to combine value predictors. Simply assume
1810 that later predictor is weaker and take its prediction. */
1811 if (predictor && *predictor < predictor2)
1812 *predictor = predictor2;
1813 if (!new_val)
1814 return NULL;
1815 if (!val)
1816 val = new_val;
1817 else if (!operand_equal_p (val, new_val, false))
1818 return NULL;
1819 }
1820 return val;
1821 }
1822 if (is_gimple_assign (def))
1823 {
1824 if (gimple_assign_lhs (def) != op0)
1825 return NULL;
1826
1827 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1828 gimple_assign_rhs1 (def),
1829 gimple_assign_rhs_code (def),
1830 gimple_assign_rhs2 (def),
1831 visited, predictor);
1832 }
1833
1834 if (is_gimple_call (def))
1835 {
1836 tree decl = gimple_call_fndecl (def);
1837 if (!decl)
1838 {
1839 if (gimple_call_internal_p (def)
1840 && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1841 {
1842 gcc_assert (gimple_call_num_args (def) == 3);
1843 tree val = gimple_call_arg (def, 0);
1844 if (TREE_CONSTANT (val))
1845 return val;
1846 if (predictor)
1847 {
1848 tree val2 = gimple_call_arg (def, 2);
1849 gcc_assert (TREE_CODE (val2) == INTEGER_CST
1850 && tree_fits_uhwi_p (val2)
1851 && tree_to_uhwi (val2) < END_PREDICTORS);
1852 *predictor = (enum br_predictor) tree_to_uhwi (val2);
1853 }
1854 return gimple_call_arg (def, 1);
1855 }
1856 return NULL;
1857 }
1858 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1859 switch (DECL_FUNCTION_CODE (decl))
1860 {
1861 case BUILT_IN_EXPECT:
1862 {
1863 tree val;
1864 if (gimple_call_num_args (def) != 2)
1865 return NULL;
1866 val = gimple_call_arg (def, 0);
1867 if (TREE_CONSTANT (val))
1868 return val;
1869 if (predictor)
1870 *predictor = PRED_BUILTIN_EXPECT;
1871 return gimple_call_arg (def, 1);
1872 }
1873
1874 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1875 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1876 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1877 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1878 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1879 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1880 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1881 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1882 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1883 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1884 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1885 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1886 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1887 /* Assume that any given atomic operation has low contention,
1888 and thus the compare-and-swap operation succeeds. */
1889 if (predictor)
1890 *predictor = PRED_COMPARE_AND_SWAP;
1891 return boolean_true_node;
1892 default:
1893 break;
1894 }
1895 }
1896
1897 return NULL;
1898 }
1899
1900 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1901 {
1902 tree res;
1903 enum br_predictor predictor2;
1904 op0 = expr_expected_value (op0, visited, predictor);
1905 if (!op0)
1906 return NULL;
1907 op1 = expr_expected_value (op1, visited, &predictor2);
1908 if (predictor && *predictor < predictor2)
1909 *predictor = predictor2;
1910 if (!op1)
1911 return NULL;
1912 res = fold_build2 (code, type, op0, op1);
1913 if (TREE_CONSTANT (res))
1914 return res;
1915 return NULL;
1916 }
1917 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1918 {
1919 tree res;
1920 op0 = expr_expected_value (op0, visited, predictor);
1921 if (!op0)
1922 return NULL;
1923 res = fold_build1 (code, type, op0);
1924 if (TREE_CONSTANT (res))
1925 return res;
1926 return NULL;
1927 }
1928 return NULL;
1929 }
1930
1931 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1932 The function is used by builtin_expect branch predictor so the evidence
1933 must come from this construct and additional possible constant folding.
1934
1935 We may want to implement more involved value guess (such as value range
1936 propagation based prediction), but such tricks shall go to new
1937 implementation. */
1938
1939 static tree
1940 expr_expected_value (tree expr, bitmap visited,
1941 enum br_predictor *predictor)
1942 {
1943 enum tree_code code;
1944 tree op0, op1;
1945
1946 if (TREE_CONSTANT (expr))
1947 {
1948 if (predictor)
1949 *predictor = PRED_UNCONDITIONAL;
1950 return expr;
1951 }
1952
1953 extract_ops_from_tree (expr, &code, &op0, &op1);
1954 return expr_expected_value_1 (TREE_TYPE (expr),
1955 op0, code, op1, visited, predictor);
1956 }
1957 \f
1958 /* Predict using opcode of the last statement in basic block. */
1959 static void
1960 tree_predict_by_opcode (basic_block bb)
1961 {
1962 gimple stmt = last_stmt (bb);
1963 edge then_edge;
1964 tree op0, op1;
1965 tree type;
1966 tree val;
1967 enum tree_code cmp;
1968 bitmap visited;
1969 edge_iterator ei;
1970 enum br_predictor predictor;
1971
1972 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1973 return;
1974 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1975 if (then_edge->flags & EDGE_TRUE_VALUE)
1976 break;
1977 op0 = gimple_cond_lhs (stmt);
1978 op1 = gimple_cond_rhs (stmt);
1979 cmp = gimple_cond_code (stmt);
1980 type = TREE_TYPE (op0);
1981 visited = BITMAP_ALLOC (NULL);
1982 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
1983 &predictor);
1984 BITMAP_FREE (visited);
1985 if (val && TREE_CODE (val) == INTEGER_CST)
1986 {
1987 if (predictor == PRED_BUILTIN_EXPECT)
1988 {
1989 int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
1990
1991 gcc_assert (percent >= 0 && percent <= 100);
1992 if (integer_zerop (val))
1993 percent = 100 - percent;
1994 predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
1995 }
1996 else
1997 predict_edge (then_edge, predictor,
1998 integer_zerop (val) ? NOT_TAKEN : TAKEN);
1999 }
2000 /* Try "pointer heuristic."
2001 A comparison ptr == 0 is predicted as false.
2002 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
2003 if (POINTER_TYPE_P (type))
2004 {
2005 if (cmp == EQ_EXPR)
2006 predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2007 else if (cmp == NE_EXPR)
2008 predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2009 }
2010 else
2011
2012 /* Try "opcode heuristic."
2013 EQ tests are usually false and NE tests are usually true. Also,
2014 most quantities are positive, so we can make the appropriate guesses
2015 about signed comparisons against zero. */
2016 switch (cmp)
2017 {
2018 case EQ_EXPR:
2019 case UNEQ_EXPR:
2020 /* Floating point comparisons appears to behave in a very
2021 unpredictable way because of special role of = tests in
2022 FP code. */
2023 if (FLOAT_TYPE_P (type))
2024 ;
2025 /* Comparisons with 0 are often used for booleans and there is
2026 nothing useful to predict about them. */
2027 else if (integer_zerop (op0) || integer_zerop (op1))
2028 ;
2029 else
2030 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2031 break;
2032
2033 case NE_EXPR:
2034 case LTGT_EXPR:
2035 /* Floating point comparisons appears to behave in a very
2036 unpredictable way because of special role of = tests in
2037 FP code. */
2038 if (FLOAT_TYPE_P (type))
2039 ;
2040 /* Comparisons with 0 are often used for booleans and there is
2041 nothing useful to predict about them. */
2042 else if (integer_zerop (op0)
2043 || integer_zerop (op1))
2044 ;
2045 else
2046 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2047 break;
2048
2049 case ORDERED_EXPR:
2050 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2051 break;
2052
2053 case UNORDERED_EXPR:
2054 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2055 break;
2056
2057 case LE_EXPR:
2058 case LT_EXPR:
2059 if (integer_zerop (op1)
2060 || integer_onep (op1)
2061 || integer_all_onesp (op1)
2062 || real_zerop (op1)
2063 || real_onep (op1)
2064 || real_minus_onep (op1))
2065 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2066 break;
2067
2068 case GE_EXPR:
2069 case GT_EXPR:
2070 if (integer_zerop (op1)
2071 || integer_onep (op1)
2072 || integer_all_onesp (op1)
2073 || real_zerop (op1)
2074 || real_onep (op1)
2075 || real_minus_onep (op1))
2076 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2077 break;
2078
2079 default:
2080 break;
2081 }
2082 }
2083
2084 /* Try to guess whether the value of return means error code. */
2085
2086 static enum br_predictor
2087 return_prediction (tree val, enum prediction *prediction)
2088 {
2089 /* VOID. */
2090 if (!val)
2091 return PRED_NO_PREDICTION;
2092 /* Different heuristics for pointers and scalars. */
2093 if (POINTER_TYPE_P (TREE_TYPE (val)))
2094 {
2095 /* NULL is usually not returned. */
2096 if (integer_zerop (val))
2097 {
2098 *prediction = NOT_TAKEN;
2099 return PRED_NULL_RETURN;
2100 }
2101 }
2102 else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2103 {
2104 /* Negative return values are often used to indicate
2105 errors. */
2106 if (TREE_CODE (val) == INTEGER_CST
2107 && tree_int_cst_sgn (val) < 0)
2108 {
2109 *prediction = NOT_TAKEN;
2110 return PRED_NEGATIVE_RETURN;
2111 }
2112 /* Constant return values seems to be commonly taken.
2113 Zero/one often represent booleans so exclude them from the
2114 heuristics. */
2115 if (TREE_CONSTANT (val)
2116 && (!integer_zerop (val) && !integer_onep (val)))
2117 {
2118 *prediction = TAKEN;
2119 return PRED_CONST_RETURN;
2120 }
2121 }
2122 return PRED_NO_PREDICTION;
2123 }
2124
2125 /* Find the basic block with return expression and look up for possible
2126 return value trying to apply RETURN_PREDICTION heuristics. */
2127 static void
2128 apply_return_prediction (void)
2129 {
2130 greturn *return_stmt = NULL;
2131 tree return_val;
2132 edge e;
2133 gphi *phi;
2134 int phi_num_args, i;
2135 enum br_predictor pred;
2136 enum prediction direction;
2137 edge_iterator ei;
2138
2139 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2140 {
2141 gimple last = last_stmt (e->src);
2142 if (last
2143 && gimple_code (last) == GIMPLE_RETURN)
2144 {
2145 return_stmt = as_a <greturn *> (last);
2146 break;
2147 }
2148 }
2149 if (!e)
2150 return;
2151 return_val = gimple_return_retval (return_stmt);
2152 if (!return_val)
2153 return;
2154 if (TREE_CODE (return_val) != SSA_NAME
2155 || !SSA_NAME_DEF_STMT (return_val)
2156 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2157 return;
2158 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val));
2159 phi_num_args = gimple_phi_num_args (phi);
2160 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2161
2162 /* Avoid the degenerate case where all return values form the function
2163 belongs to same category (ie they are all positive constants)
2164 so we can hardly say something about them. */
2165 for (i = 1; i < phi_num_args; i++)
2166 if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2167 break;
2168 if (i != phi_num_args)
2169 for (i = 0; i < phi_num_args; i++)
2170 {
2171 pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2172 if (pred != PRED_NO_PREDICTION)
2173 predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2174 direction);
2175 }
2176 }
2177
2178 /* Look for basic block that contains unlikely to happen events
2179 (such as noreturn calls) and mark all paths leading to execution
2180 of this basic blocks as unlikely. */
2181
2182 static void
2183 tree_bb_level_predictions (void)
2184 {
2185 basic_block bb;
2186 bool has_return_edges = false;
2187 edge e;
2188 edge_iterator ei;
2189
2190 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2191 if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2192 {
2193 has_return_edges = true;
2194 break;
2195 }
2196
2197 apply_return_prediction ();
2198
2199 FOR_EACH_BB_FN (bb, cfun)
2200 {
2201 gimple_stmt_iterator gsi;
2202
2203 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2204 {
2205 gimple stmt = gsi_stmt (gsi);
2206 tree decl;
2207
2208 if (is_gimple_call (stmt))
2209 {
2210 if ((gimple_call_flags (stmt) & ECF_NORETURN)
2211 && has_return_edges)
2212 predict_paths_leading_to (bb, PRED_NORETURN,
2213 NOT_TAKEN);
2214 decl = gimple_call_fndecl (stmt);
2215 if (decl
2216 && lookup_attribute ("cold",
2217 DECL_ATTRIBUTES (decl)))
2218 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2219 NOT_TAKEN);
2220 }
2221 else if (gimple_code (stmt) == GIMPLE_PREDICT)
2222 {
2223 predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2224 gimple_predict_outcome (stmt));
2225 /* Keep GIMPLE_PREDICT around so early inlining will propagate
2226 hints to callers. */
2227 }
2228 }
2229 }
2230 }
2231
2232 #ifdef ENABLE_CHECKING
2233
2234 /* Callback for hash_map::traverse, asserts that the pointer map is
2235 empty. */
2236
2237 bool
2238 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2239 void *)
2240 {
2241 gcc_assert (!value);
2242 return false;
2243 }
2244 #endif
2245
2246 /* Predict branch probabilities and estimate profile for basic block BB. */
2247
2248 static void
2249 tree_estimate_probability_bb (basic_block bb)
2250 {
2251 edge e;
2252 edge_iterator ei;
2253 gimple last;
2254
2255 FOR_EACH_EDGE (e, ei, bb->succs)
2256 {
2257 /* Predict edges to user labels with attributes. */
2258 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2259 {
2260 gimple_stmt_iterator gi;
2261 for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2262 {
2263 glabel *label_stmt = dyn_cast <glabel *> (gsi_stmt (gi));
2264 tree decl;
2265
2266 if (!label_stmt)
2267 break;
2268 decl = gimple_label_label (label_stmt);
2269 if (DECL_ARTIFICIAL (decl))
2270 continue;
2271
2272 /* Finally, we have a user-defined label. */
2273 if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2274 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2275 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2276 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2277 }
2278 }
2279
2280 /* Predict early returns to be probable, as we've already taken
2281 care for error returns and other cases are often used for
2282 fast paths through function.
2283
2284 Since we've already removed the return statements, we are
2285 looking for CFG like:
2286
2287 if (conditional)
2288 {
2289 ..
2290 goto return_block
2291 }
2292 some other blocks
2293 return_block:
2294 return_stmt. */
2295 if (e->dest != bb->next_bb
2296 && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2297 && single_succ_p (e->dest)
2298 && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2299 && (last = last_stmt (e->dest)) != NULL
2300 && gimple_code (last) == GIMPLE_RETURN)
2301 {
2302 edge e1;
2303 edge_iterator ei1;
2304
2305 if (single_succ_p (bb))
2306 {
2307 FOR_EACH_EDGE (e1, ei1, bb->preds)
2308 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2309 && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2310 && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2311 predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2312 }
2313 else
2314 if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2315 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2316 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2317 predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2318 }
2319
2320 /* Look for block we are guarding (ie we dominate it,
2321 but it doesn't postdominate us). */
2322 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2323 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2324 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2325 {
2326 gimple_stmt_iterator bi;
2327
2328 /* The call heuristic claims that a guarded function call
2329 is improbable. This is because such calls are often used
2330 to signal exceptional situations such as printing error
2331 messages. */
2332 for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2333 gsi_next (&bi))
2334 {
2335 gimple stmt = gsi_stmt (bi);
2336 if (is_gimple_call (stmt)
2337 /* Constant and pure calls are hardly used to signalize
2338 something exceptional. */
2339 && gimple_has_side_effects (stmt))
2340 {
2341 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2342 break;
2343 }
2344 }
2345 }
2346 }
2347 tree_predict_by_opcode (bb);
2348 }
2349
2350 /* Predict branch probabilities and estimate profile of the tree CFG.
2351 This function can be called from the loop optimizers to recompute
2352 the profile information. */
2353
2354 void
2355 tree_estimate_probability (void)
2356 {
2357 basic_block bb;
2358
2359 add_noreturn_fake_exit_edges ();
2360 connect_infinite_loops_to_exit ();
2361 /* We use loop_niter_by_eval, which requires that the loops have
2362 preheaders. */
2363 create_preheaders (CP_SIMPLE_PREHEADERS);
2364 calculate_dominance_info (CDI_POST_DOMINATORS);
2365
2366 bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2367 tree_bb_level_predictions ();
2368 record_loop_exits ();
2369
2370 if (number_of_loops (cfun) > 1)
2371 predict_loops ();
2372
2373 FOR_EACH_BB_FN (bb, cfun)
2374 tree_estimate_probability_bb (bb);
2375
2376 FOR_EACH_BB_FN (bb, cfun)
2377 combine_predictions_for_bb (bb);
2378
2379 #ifdef ENABLE_CHECKING
2380 bb_predictions->traverse<void *, assert_is_empty> (NULL);
2381 #endif
2382 delete bb_predictions;
2383 bb_predictions = NULL;
2384
2385 estimate_bb_frequencies (false);
2386 free_dominance_info (CDI_POST_DOMINATORS);
2387 remove_fake_exit_edges ();
2388 }
2389 \f
2390 /* Predict edges to successors of CUR whose sources are not postdominated by
2391 BB by PRED and recurse to all postdominators. */
2392
2393 static void
2394 predict_paths_for_bb (basic_block cur, basic_block bb,
2395 enum br_predictor pred,
2396 enum prediction taken,
2397 bitmap visited)
2398 {
2399 edge e;
2400 edge_iterator ei;
2401 basic_block son;
2402
2403 /* We are looking for all edges forming edge cut induced by
2404 set of all blocks postdominated by BB. */
2405 FOR_EACH_EDGE (e, ei, cur->preds)
2406 if (e->src->index >= NUM_FIXED_BLOCKS
2407 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2408 {
2409 edge e2;
2410 edge_iterator ei2;
2411 bool found = false;
2412
2413 /* Ignore fake edges and eh, we predict them as not taken anyway. */
2414 if (e->flags & (EDGE_EH | EDGE_FAKE))
2415 continue;
2416 gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2417
2418 /* See if there is an edge from e->src that is not abnormal
2419 and does not lead to BB. */
2420 FOR_EACH_EDGE (e2, ei2, e->src->succs)
2421 if (e2 != e
2422 && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2423 && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2424 {
2425 found = true;
2426 break;
2427 }
2428
2429 /* If there is non-abnormal path leaving e->src, predict edge
2430 using predictor. Otherwise we need to look for paths
2431 leading to e->src.
2432
2433 The second may lead to infinite loop in the case we are predicitng
2434 regions that are only reachable by abnormal edges. We simply
2435 prevent visiting given BB twice. */
2436 if (found)
2437 predict_edge_def (e, pred, taken);
2438 else if (bitmap_set_bit (visited, e->src->index))
2439 predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2440 }
2441 for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2442 son;
2443 son = next_dom_son (CDI_POST_DOMINATORS, son))
2444 predict_paths_for_bb (son, bb, pred, taken, visited);
2445 }
2446
2447 /* Sets branch probabilities according to PREDiction and
2448 FLAGS. */
2449
2450 static void
2451 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2452 enum prediction taken)
2453 {
2454 bitmap visited = BITMAP_ALLOC (NULL);
2455 predict_paths_for_bb (bb, bb, pred, taken, visited);
2456 BITMAP_FREE (visited);
2457 }
2458
2459 /* Like predict_paths_leading_to but take edge instead of basic block. */
2460
2461 static void
2462 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2463 enum prediction taken)
2464 {
2465 bool has_nonloop_edge = false;
2466 edge_iterator ei;
2467 edge e2;
2468
2469 basic_block bb = e->src;
2470 FOR_EACH_EDGE (e2, ei, bb->succs)
2471 if (e2->dest != e->src && e2->dest != e->dest
2472 && !(e->flags & (EDGE_EH | EDGE_FAKE))
2473 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2474 {
2475 has_nonloop_edge = true;
2476 break;
2477 }
2478 if (!has_nonloop_edge)
2479 {
2480 bitmap visited = BITMAP_ALLOC (NULL);
2481 predict_paths_for_bb (bb, bb, pred, taken, visited);
2482 BITMAP_FREE (visited);
2483 }
2484 else
2485 predict_edge_def (e, pred, taken);
2486 }
2487 \f
2488 /* This is used to carry information about basic blocks. It is
2489 attached to the AUX field of the standard CFG block. */
2490
2491 struct block_info
2492 {
2493 /* Estimated frequency of execution of basic_block. */
2494 sreal frequency;
2495
2496 /* To keep queue of basic blocks to process. */
2497 basic_block next;
2498
2499 /* Number of predecessors we need to visit first. */
2500 int npredecessors;
2501 };
2502
2503 /* Similar information for edges. */
2504 struct edge_prob_info
2505 {
2506 /* In case edge is a loopback edge, the probability edge will be reached
2507 in case header is. Estimated number of iterations of the loop can be
2508 then computed as 1 / (1 - back_edge_prob). */
2509 sreal back_edge_prob;
2510 /* True if the edge is a loopback edge in the natural loop. */
2511 unsigned int back_edge:1;
2512 };
2513
2514 #define BLOCK_INFO(B) ((block_info *) (B)->aux)
2515 #undef EDGE_INFO
2516 #define EDGE_INFO(E) ((edge_prob_info *) (E)->aux)
2517
2518 /* Helper function for estimate_bb_frequencies.
2519 Propagate the frequencies in blocks marked in
2520 TOVISIT, starting in HEAD. */
2521
2522 static void
2523 propagate_freq (basic_block head, bitmap tovisit)
2524 {
2525 basic_block bb;
2526 basic_block last;
2527 unsigned i;
2528 edge e;
2529 basic_block nextbb;
2530 bitmap_iterator bi;
2531
2532 /* For each basic block we need to visit count number of his predecessors
2533 we need to visit first. */
2534 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2535 {
2536 edge_iterator ei;
2537 int count = 0;
2538
2539 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2540
2541 FOR_EACH_EDGE (e, ei, bb->preds)
2542 {
2543 bool visit = bitmap_bit_p (tovisit, e->src->index);
2544
2545 if (visit && !(e->flags & EDGE_DFS_BACK))
2546 count++;
2547 else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2548 fprintf (dump_file,
2549 "Irreducible region hit, ignoring edge to %i->%i\n",
2550 e->src->index, bb->index);
2551 }
2552 BLOCK_INFO (bb)->npredecessors = count;
2553 /* When function never returns, we will never process exit block. */
2554 if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2555 bb->count = bb->frequency = 0;
2556 }
2557
2558 BLOCK_INFO (head)->frequency = 1;
2559 last = head;
2560 for (bb = head; bb; bb = nextbb)
2561 {
2562 edge_iterator ei;
2563 sreal cyclic_probability = 0;
2564 sreal frequency = 0;
2565
2566 nextbb = BLOCK_INFO (bb)->next;
2567 BLOCK_INFO (bb)->next = NULL;
2568
2569 /* Compute frequency of basic block. */
2570 if (bb != head)
2571 {
2572 #ifdef ENABLE_CHECKING
2573 FOR_EACH_EDGE (e, ei, bb->preds)
2574 gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2575 || (e->flags & EDGE_DFS_BACK));
2576 #endif
2577
2578 FOR_EACH_EDGE (e, ei, bb->preds)
2579 if (EDGE_INFO (e)->back_edge)
2580 {
2581 cyclic_probability += EDGE_INFO (e)->back_edge_prob;
2582 }
2583 else if (!(e->flags & EDGE_DFS_BACK))
2584 {
2585 /* frequency += (e->probability
2586 * BLOCK_INFO (e->src)->frequency /
2587 REG_BR_PROB_BASE); */
2588
2589 sreal tmp = e->probability;
2590 tmp *= BLOCK_INFO (e->src)->frequency;
2591 tmp *= real_inv_br_prob_base;
2592 frequency += tmp;
2593 }
2594
2595 if (cyclic_probability == 0)
2596 {
2597 BLOCK_INFO (bb)->frequency = frequency;
2598 }
2599 else
2600 {
2601 if (cyclic_probability > real_almost_one)
2602 cyclic_probability = real_almost_one;
2603
2604 /* BLOCK_INFO (bb)->frequency = frequency
2605 / (1 - cyclic_probability) */
2606
2607 cyclic_probability = sreal (1) - cyclic_probability;
2608 BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
2609 }
2610 }
2611
2612 bitmap_clear_bit (tovisit, bb->index);
2613
2614 e = find_edge (bb, head);
2615 if (e)
2616 {
2617 /* EDGE_INFO (e)->back_edge_prob
2618 = ((e->probability * BLOCK_INFO (bb)->frequency)
2619 / REG_BR_PROB_BASE); */
2620
2621 sreal tmp = e->probability;
2622 tmp *= BLOCK_INFO (bb)->frequency;
2623 EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
2624 }
2625
2626 /* Propagate to successor blocks. */
2627 FOR_EACH_EDGE (e, ei, bb->succs)
2628 if (!(e->flags & EDGE_DFS_BACK)
2629 && BLOCK_INFO (e->dest)->npredecessors)
2630 {
2631 BLOCK_INFO (e->dest)->npredecessors--;
2632 if (!BLOCK_INFO (e->dest)->npredecessors)
2633 {
2634 if (!nextbb)
2635 nextbb = e->dest;
2636 else
2637 BLOCK_INFO (last)->next = e->dest;
2638
2639 last = e->dest;
2640 }
2641 }
2642 }
2643 }
2644
2645 /* Estimate frequencies in loops at same nest level. */
2646
2647 static void
2648 estimate_loops_at_level (struct loop *first_loop)
2649 {
2650 struct loop *loop;
2651
2652 for (loop = first_loop; loop; loop = loop->next)
2653 {
2654 edge e;
2655 basic_block *bbs;
2656 unsigned i;
2657 bitmap tovisit = BITMAP_ALLOC (NULL);
2658
2659 estimate_loops_at_level (loop->inner);
2660
2661 /* Find current loop back edge and mark it. */
2662 e = loop_latch_edge (loop);
2663 EDGE_INFO (e)->back_edge = 1;
2664
2665 bbs = get_loop_body (loop);
2666 for (i = 0; i < loop->num_nodes; i++)
2667 bitmap_set_bit (tovisit, bbs[i]->index);
2668 free (bbs);
2669 propagate_freq (loop->header, tovisit);
2670 BITMAP_FREE (tovisit);
2671 }
2672 }
2673
2674 /* Propagates frequencies through structure of loops. */
2675
2676 static void
2677 estimate_loops (void)
2678 {
2679 bitmap tovisit = BITMAP_ALLOC (NULL);
2680 basic_block bb;
2681
2682 /* Start by estimating the frequencies in the loops. */
2683 if (number_of_loops (cfun) > 1)
2684 estimate_loops_at_level (current_loops->tree_root->inner);
2685
2686 /* Now propagate the frequencies through all the blocks. */
2687 FOR_ALL_BB_FN (bb, cfun)
2688 {
2689 bitmap_set_bit (tovisit, bb->index);
2690 }
2691 propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2692 BITMAP_FREE (tovisit);
2693 }
2694
2695 /* Drop the profile for NODE to guessed, and update its frequency based on
2696 whether it is expected to be hot given the CALL_COUNT. */
2697
2698 static void
2699 drop_profile (struct cgraph_node *node, gcov_type call_count)
2700 {
2701 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2702 /* In the case where this was called by another function with a
2703 dropped profile, call_count will be 0. Since there are no
2704 non-zero call counts to this function, we don't know for sure
2705 whether it is hot, and therefore it will be marked normal below. */
2706 bool hot = maybe_hot_count_p (NULL, call_count);
2707
2708 if (dump_file)
2709 fprintf (dump_file,
2710 "Dropping 0 profile for %s/%i. %s based on calls.\n",
2711 node->name (), node->order,
2712 hot ? "Function is hot" : "Function is normal");
2713 /* We only expect to miss profiles for functions that are reached
2714 via non-zero call edges in cases where the function may have
2715 been linked from another module or library (COMDATs and extern
2716 templates). See the comments below for handle_missing_profiles.
2717 Also, only warn in cases where the missing counts exceed the
2718 number of training runs. In certain cases with an execv followed
2719 by a no-return call the profile for the no-return call is not
2720 dumped and there can be a mismatch. */
2721 if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2722 && call_count > profile_info->runs)
2723 {
2724 if (flag_profile_correction)
2725 {
2726 if (dump_file)
2727 fprintf (dump_file,
2728 "Missing counts for called function %s/%i\n",
2729 node->name (), node->order);
2730 }
2731 else
2732 warning (0, "Missing counts for called function %s/%i",
2733 node->name (), node->order);
2734 }
2735
2736 profile_status_for_fn (fn)
2737 = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2738 node->frequency
2739 = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2740 }
2741
2742 /* In the case of COMDAT routines, multiple object files will contain the same
2743 function and the linker will select one for the binary. In that case
2744 all the other copies from the profile instrument binary will be missing
2745 profile counts. Look for cases where this happened, due to non-zero
2746 call counts going to 0-count functions, and drop the profile to guessed
2747 so that we can use the estimated probabilities and avoid optimizing only
2748 for size.
2749
2750 The other case where the profile may be missing is when the routine
2751 is not going to be emitted to the object file, e.g. for "extern template"
2752 class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2753 all other cases of non-zero calls to 0-count functions. */
2754
2755 void
2756 handle_missing_profiles (void)
2757 {
2758 struct cgraph_node *node;
2759 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2760 vec<struct cgraph_node *> worklist;
2761 worklist.create (64);
2762
2763 /* See if 0 count function has non-0 count callers. In this case we
2764 lost some profile. Drop its function profile to PROFILE_GUESSED. */
2765 FOR_EACH_DEFINED_FUNCTION (node)
2766 {
2767 struct cgraph_edge *e;
2768 gcov_type call_count = 0;
2769 gcov_type max_tp_first_run = 0;
2770 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2771
2772 if (node->count)
2773 continue;
2774 for (e = node->callers; e; e = e->next_caller)
2775 {
2776 call_count += e->count;
2777
2778 if (e->caller->tp_first_run > max_tp_first_run)
2779 max_tp_first_run = e->caller->tp_first_run;
2780 }
2781
2782 /* If time profile is missing, let assign the maximum that comes from
2783 caller functions. */
2784 if (!node->tp_first_run && max_tp_first_run)
2785 node->tp_first_run = max_tp_first_run + 1;
2786
2787 if (call_count
2788 && fn && fn->cfg
2789 && (call_count * unlikely_count_fraction >= profile_info->runs))
2790 {
2791 drop_profile (node, call_count);
2792 worklist.safe_push (node);
2793 }
2794 }
2795
2796 /* Propagate the profile dropping to other 0-count COMDATs that are
2797 potentially called by COMDATs we already dropped the profile on. */
2798 while (worklist.length () > 0)
2799 {
2800 struct cgraph_edge *e;
2801
2802 node = worklist.pop ();
2803 for (e = node->callees; e; e = e->next_caller)
2804 {
2805 struct cgraph_node *callee = e->callee;
2806 struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2807
2808 if (callee->count > 0)
2809 continue;
2810 if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2811 && profile_status_for_fn (fn) == PROFILE_READ)
2812 {
2813 drop_profile (node, 0);
2814 worklist.safe_push (callee);
2815 }
2816 }
2817 }
2818 worklist.release ();
2819 }
2820
2821 /* Convert counts measured by profile driven feedback to frequencies.
2822 Return nonzero iff there was any nonzero execution count. */
2823
2824 int
2825 counts_to_freqs (void)
2826 {
2827 gcov_type count_max, true_count_max = 0;
2828 basic_block bb;
2829
2830 /* Don't overwrite the estimated frequencies when the profile for
2831 the function is missing. We may drop this function PROFILE_GUESSED
2832 later in drop_profile (). */
2833 if (!flag_auto_profile && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2834 return 0;
2835
2836 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2837 true_count_max = MAX (bb->count, true_count_max);
2838
2839 count_max = MAX (true_count_max, 1);
2840 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2841 bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2842
2843 return true_count_max;
2844 }
2845
2846 /* Return true if function is likely to be expensive, so there is no point to
2847 optimize performance of prologue, epilogue or do inlining at the expense
2848 of code size growth. THRESHOLD is the limit of number of instructions
2849 function can execute at average to be still considered not expensive. */
2850
2851 bool
2852 expensive_function_p (int threshold)
2853 {
2854 unsigned int sum = 0;
2855 basic_block bb;
2856 unsigned int limit;
2857
2858 /* We can not compute accurately for large thresholds due to scaled
2859 frequencies. */
2860 gcc_assert (threshold <= BB_FREQ_MAX);
2861
2862 /* Frequencies are out of range. This either means that function contains
2863 internal loop executing more than BB_FREQ_MAX times or profile feedback
2864 is available and function has not been executed at all. */
2865 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2866 return true;
2867
2868 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
2869 limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2870 FOR_EACH_BB_FN (bb, cfun)
2871 {
2872 rtx_insn *insn;
2873
2874 FOR_BB_INSNS (bb, insn)
2875 if (active_insn_p (insn))
2876 {
2877 sum += bb->frequency;
2878 if (sum > limit)
2879 return true;
2880 }
2881 }
2882
2883 return false;
2884 }
2885
2886 /* Estimate and propagate basic block frequencies using the given branch
2887 probabilities. If FORCE is true, the frequencies are used to estimate
2888 the counts even when there are already non-zero profile counts. */
2889
2890 void
2891 estimate_bb_frequencies (bool force)
2892 {
2893 basic_block bb;
2894 sreal freq_max;
2895
2896 if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
2897 {
2898 static int real_values_initialized = 0;
2899
2900 if (!real_values_initialized)
2901 {
2902 real_values_initialized = 1;
2903 real_br_prob_base = REG_BR_PROB_BASE;
2904 real_bb_freq_max = BB_FREQ_MAX;
2905 real_one_half = sreal (1, -1);
2906 real_inv_br_prob_base = sreal (1) / real_br_prob_base;
2907 real_almost_one = sreal (1) - real_inv_br_prob_base;
2908 }
2909
2910 mark_dfs_back_edges ();
2911
2912 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2913 REG_BR_PROB_BASE;
2914
2915 /* Set up block info for each basic block. */
2916 alloc_aux_for_blocks (sizeof (block_info));
2917 alloc_aux_for_edges (sizeof (edge_prob_info));
2918 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2919 {
2920 edge e;
2921 edge_iterator ei;
2922
2923 FOR_EACH_EDGE (e, ei, bb->succs)
2924 {
2925 EDGE_INFO (e)->back_edge_prob = e->probability;
2926 EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
2927 }
2928 }
2929
2930 /* First compute frequencies locally for each loop from innermost
2931 to outermost to examine frequencies for back edges. */
2932 estimate_loops ();
2933
2934 freq_max = 0;
2935 FOR_EACH_BB_FN (bb, cfun)
2936 if (freq_max < BLOCK_INFO (bb)->frequency)
2937 freq_max = BLOCK_INFO (bb)->frequency;
2938
2939 freq_max = real_bb_freq_max / freq_max;
2940 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2941 {
2942 sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
2943 bb->frequency = tmp.to_int ();
2944 }
2945
2946 free_aux_for_blocks ();
2947 free_aux_for_edges ();
2948 }
2949 compute_function_frequency ();
2950 }
2951
2952 /* Decide whether function is hot, cold or unlikely executed. */
2953 void
2954 compute_function_frequency (void)
2955 {
2956 basic_block bb;
2957 struct cgraph_node *node = cgraph_node::get (current_function_decl);
2958
2959 if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2960 || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2961 node->only_called_at_startup = true;
2962 if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2963 node->only_called_at_exit = true;
2964
2965 if (profile_status_for_fn (cfun) != PROFILE_READ)
2966 {
2967 int flags = flags_from_decl_or_type (current_function_decl);
2968 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2969 != NULL)
2970 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2971 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2972 != NULL)
2973 node->frequency = NODE_FREQUENCY_HOT;
2974 else if (flags & ECF_NORETURN)
2975 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2976 else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
2977 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2978 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2979 || DECL_STATIC_DESTRUCTOR (current_function_decl))
2980 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2981 return;
2982 }
2983
2984 /* Only first time try to drop function into unlikely executed.
2985 After inlining the roundoff errors may confuse us.
2986 Ipa-profile pass will drop functions only called from unlikely
2987 functions to unlikely and that is most of what we care about. */
2988 if (!cfun->after_inlining)
2989 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2990 FOR_EACH_BB_FN (bb, cfun)
2991 {
2992 if (maybe_hot_bb_p (cfun, bb))
2993 {
2994 node->frequency = NODE_FREQUENCY_HOT;
2995 return;
2996 }
2997 if (!probably_never_executed_bb_p (cfun, bb))
2998 node->frequency = NODE_FREQUENCY_NORMAL;
2999 }
3000 }
3001
3002 /* Build PREDICT_EXPR. */
3003 tree
3004 build_predict_expr (enum br_predictor predictor, enum prediction taken)
3005 {
3006 tree t = build1 (PREDICT_EXPR, void_type_node,
3007 build_int_cst (integer_type_node, predictor));
3008 SET_PREDICT_EXPR_OUTCOME (t, taken);
3009 return t;
3010 }
3011
3012 const char *
3013 predictor_name (enum br_predictor predictor)
3014 {
3015 return predictor_info[predictor].name;
3016 }
3017
3018 /* Predict branch probabilities and estimate profile of the tree CFG. */
3019
3020 namespace {
3021
3022 const pass_data pass_data_profile =
3023 {
3024 GIMPLE_PASS, /* type */
3025 "profile_estimate", /* name */
3026 OPTGROUP_NONE, /* optinfo_flags */
3027 TV_BRANCH_PROB, /* tv_id */
3028 PROP_cfg, /* properties_required */
3029 0, /* properties_provided */
3030 0, /* properties_destroyed */
3031 0, /* todo_flags_start */
3032 0, /* todo_flags_finish */
3033 };
3034
3035 class pass_profile : public gimple_opt_pass
3036 {
3037 public:
3038 pass_profile (gcc::context *ctxt)
3039 : gimple_opt_pass (pass_data_profile, ctxt)
3040 {}
3041
3042 /* opt_pass methods: */
3043 virtual bool gate (function *) { return flag_guess_branch_prob; }
3044 virtual unsigned int execute (function *);
3045
3046 }; // class pass_profile
3047
3048 unsigned int
3049 pass_profile::execute (function *fun)
3050 {
3051 unsigned nb_loops;
3052
3053 loop_optimizer_init (LOOPS_NORMAL);
3054 if (dump_file && (dump_flags & TDF_DETAILS))
3055 flow_loops_dump (dump_file, NULL, 0);
3056
3057 mark_irreducible_loops ();
3058
3059 nb_loops = number_of_loops (fun);
3060 if (nb_loops > 1)
3061 scev_initialize ();
3062
3063 tree_estimate_probability ();
3064
3065 if (nb_loops > 1)
3066 scev_finalize ();
3067
3068 loop_optimizer_finalize ();
3069 if (dump_file && (dump_flags & TDF_DETAILS))
3070 gimple_dump_cfg (dump_file, dump_flags);
3071 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3072 profile_status_for_fn (fun) = PROFILE_GUESSED;
3073 return 0;
3074 }
3075
3076 } // anon namespace
3077
3078 gimple_opt_pass *
3079 make_pass_profile (gcc::context *ctxt)
3080 {
3081 return new pass_profile (ctxt);
3082 }
3083
3084 namespace {
3085
3086 const pass_data pass_data_strip_predict_hints =
3087 {
3088 GIMPLE_PASS, /* type */
3089 "*strip_predict_hints", /* name */
3090 OPTGROUP_NONE, /* optinfo_flags */
3091 TV_BRANCH_PROB, /* tv_id */
3092 PROP_cfg, /* properties_required */
3093 0, /* properties_provided */
3094 0, /* properties_destroyed */
3095 0, /* todo_flags_start */
3096 0, /* todo_flags_finish */
3097 };
3098
3099 class pass_strip_predict_hints : public gimple_opt_pass
3100 {
3101 public:
3102 pass_strip_predict_hints (gcc::context *ctxt)
3103 : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3104 {}
3105
3106 /* opt_pass methods: */
3107 opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3108 virtual unsigned int execute (function *);
3109
3110 }; // class pass_strip_predict_hints
3111
3112 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3113 we no longer need. */
3114 unsigned int
3115 pass_strip_predict_hints::execute (function *fun)
3116 {
3117 basic_block bb;
3118 gimple ass_stmt;
3119 tree var;
3120
3121 FOR_EACH_BB_FN (bb, fun)
3122 {
3123 gimple_stmt_iterator bi;
3124 for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3125 {
3126 gimple stmt = gsi_stmt (bi);
3127
3128 if (gimple_code (stmt) == GIMPLE_PREDICT)
3129 {
3130 gsi_remove (&bi, true);
3131 continue;
3132 }
3133 else if (is_gimple_call (stmt))
3134 {
3135 tree fndecl = gimple_call_fndecl (stmt);
3136
3137 if ((fndecl
3138 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3139 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3140 && gimple_call_num_args (stmt) == 2)
3141 || (gimple_call_internal_p (stmt)
3142 && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3143 {
3144 var = gimple_call_lhs (stmt);
3145 if (var)
3146 {
3147 ass_stmt
3148 = gimple_build_assign (var, gimple_call_arg (stmt, 0));
3149 gsi_replace (&bi, ass_stmt, true);
3150 }
3151 else
3152 {
3153 gsi_remove (&bi, true);
3154 continue;
3155 }
3156 }
3157 }
3158 gsi_next (&bi);
3159 }
3160 }
3161 return 0;
3162 }
3163
3164 } // anon namespace
3165
3166 gimple_opt_pass *
3167 make_pass_strip_predict_hints (gcc::context *ctxt)
3168 {
3169 return new pass_strip_predict_hints (ctxt);
3170 }
3171
3172 /* Rebuild function frequencies. Passes are in general expected to
3173 maintain profile by hand, however in some cases this is not possible:
3174 for example when inlining several functions with loops freuqencies might run
3175 out of scale and thus needs to be recomputed. */
3176
3177 void
3178 rebuild_frequencies (void)
3179 {
3180 timevar_push (TV_REBUILD_FREQUENCIES);
3181
3182 /* When the max bb count in the function is small, there is a higher
3183 chance that there were truncation errors in the integer scaling
3184 of counts by inlining and other optimizations. This could lead
3185 to incorrect classification of code as being cold when it isn't.
3186 In that case, force the estimation of bb counts/frequencies from the
3187 branch probabilities, rather than computing frequencies from counts,
3188 which may also lead to frequencies incorrectly reduced to 0. There
3189 is less precision in the probabilities, so we only do this for small
3190 max counts. */
3191 gcov_type count_max = 0;
3192 basic_block bb;
3193 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3194 count_max = MAX (bb->count, count_max);
3195
3196 if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3197 || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
3198 && count_max < REG_BR_PROB_BASE/10))
3199 {
3200 loop_optimizer_init (0);
3201 add_noreturn_fake_exit_edges ();
3202 mark_irreducible_loops ();
3203 connect_infinite_loops_to_exit ();
3204 estimate_bb_frequencies (true);
3205 remove_fake_exit_edges ();
3206 loop_optimizer_finalize ();
3207 }
3208 else if (profile_status_for_fn (cfun) == PROFILE_READ)
3209 counts_to_freqs ();
3210 else
3211 gcc_unreachable ();
3212 timevar_pop (TV_REBUILD_FREQUENCIES);
3213 }