predict.c: Include pointer-set.h.
[gcc.git] / gcc / predict.c
1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005
3 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA. */
21
22 /* References:
23
24 [1] "Branch Prediction for Free"
25 Ball and Larus; PLDI '93.
26 [2] "Static Branch Frequency and Program Profile Analysis"
27 Wu and Larus; MICRO-27.
28 [3] "Corpus-based Static Branch Prediction"
29 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
30
31
32 #include "config.h"
33 #include "system.h"
34 #include "coretypes.h"
35 #include "tm.h"
36 #include "tree.h"
37 #include "rtl.h"
38 #include "tm_p.h"
39 #include "hard-reg-set.h"
40 #include "basic-block.h"
41 #include "insn-config.h"
42 #include "regs.h"
43 #include "flags.h"
44 #include "output.h"
45 #include "function.h"
46 #include "except.h"
47 #include "toplev.h"
48 #include "recog.h"
49 #include "expr.h"
50 #include "predict.h"
51 #include "coverage.h"
52 #include "sreal.h"
53 #include "params.h"
54 #include "target.h"
55 #include "cfgloop.h"
56 #include "tree-flow.h"
57 #include "ggc.h"
58 #include "tree-dump.h"
59 #include "tree-pass.h"
60 #include "timevar.h"
61 #include "tree-scalar-evolution.h"
62 #include "cfgloop.h"
63 #include "pointer-set.h"
64
65 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
66 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
67 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
68 real_inv_br_prob_base, real_one_half, real_bb_freq_max;
69
70 /* Random guesstimation given names. */
71 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 100 - 1)
72 #define PROB_EVEN (REG_BR_PROB_BASE / 2)
73 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
74 #define PROB_ALWAYS (REG_BR_PROB_BASE)
75
76 static void combine_predictions_for_insn (rtx, basic_block);
77 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
78 static void predict_paths_leading_to (basic_block, int *, enum br_predictor, enum prediction);
79 static void compute_function_frequency (void);
80 static void choose_function_section (void);
81 static bool can_predict_insn_p (rtx);
82
83 /* Information we hold about each branch predictor.
84 Filled using information from predict.def. */
85
86 struct predictor_info
87 {
88 const char *const name; /* Name used in the debugging dumps. */
89 const int hitrate; /* Expected hitrate used by
90 predict_insn_def call. */
91 const int flags;
92 };
93
94 /* Use given predictor without Dempster-Shaffer theory if it matches
95 using first_match heuristics. */
96 #define PRED_FLAG_FIRST_MATCH 1
97
98 /* Recompute hitrate in percent to our representation. */
99
100 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
101
102 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
103 static const struct predictor_info predictor_info[]= {
104 #include "predict.def"
105
106 /* Upper bound on predictors. */
107 {NULL, 0, 0}
108 };
109 #undef DEF_PREDICTOR
110
111 /* Return true in case BB can be CPU intensive and should be optimized
112 for maximal performance. */
113
114 bool
115 maybe_hot_bb_p (basic_block bb)
116 {
117 if (profile_info && flag_branch_probabilities
118 && (bb->count
119 < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
120 return false;
121 if (!profile_info || !flag_branch_probabilities)
122 {
123 if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
124 return false;
125 if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT)
126 return true;
127 }
128 if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
129 return false;
130 return true;
131 }
132
133 /* Return true in case BB is cold and should be optimized for size. */
134
135 bool
136 probably_cold_bb_p (basic_block bb)
137 {
138 if (profile_info && flag_branch_probabilities
139 && (bb->count
140 < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
141 return true;
142 if ((!profile_info || !flag_branch_probabilities)
143 && cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
144 return true;
145 if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
146 return true;
147 return false;
148 }
149
150 /* Return true in case BB is probably never executed. */
151 bool
152 probably_never_executed_bb_p (basic_block bb)
153 {
154 if (profile_info && flag_branch_probabilities)
155 return ((bb->count + profile_info->runs / 2) / profile_info->runs) == 0;
156 if ((!profile_info || !flag_branch_probabilities)
157 && cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
158 return true;
159 return false;
160 }
161
162 /* Return true if the one of outgoing edges is already predicted by
163 PREDICTOR. */
164
165 bool
166 rtl_predicted_by_p (basic_block bb, enum br_predictor predictor)
167 {
168 rtx note;
169 if (!INSN_P (BB_END (bb)))
170 return false;
171 for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
172 if (REG_NOTE_KIND (note) == REG_BR_PRED
173 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
174 return true;
175 return false;
176 }
177
178 /* This map contains for a basic block the list of predictions for the
179 outgoing edges. */
180
181 static struct pointer_map_t *bb_predictions;
182
183 /* Return true if the one of outgoing edges is already predicted by
184 PREDICTOR. */
185
186 bool
187 tree_predicted_by_p (basic_block bb, enum br_predictor predictor)
188 {
189 struct edge_prediction *i;
190 void **preds = pointer_map_contains (bb_predictions, bb);
191
192 if (!preds)
193 return false;
194
195 for (i = *preds; i; i = i->ep_next)
196 if (i->ep_predictor == predictor)
197 return true;
198 return false;
199 }
200
201 /* Return true when the probability of edge is reliable.
202
203 The profile guessing code is good at predicting branch outcome (ie.
204 taken/not taken), that is predicted right slightly over 75% of time.
205 It is however notoriously poor on predicting the probability itself.
206 In general the profile appear a lot flatter (with probabilities closer
207 to 50%) than the reality so it is bad idea to use it to drive optimization
208 such as those disabling dynamic branch prediction for well predictable
209 branches.
210
211 There are two exceptions - edges leading to noreturn edges and edges
212 predicted by number of iterations heuristics are predicted well. This macro
213 should be able to distinguish those, but at the moment it simply check for
214 noreturn heuristic that is only one giving probability over 99% or bellow
215 1%. In future we might want to propagate reliability information across the
216 CFG if we find this information useful on multiple places. */
217 static bool
218 probability_reliable_p (int prob)
219 {
220 return (profile_status == PROFILE_READ
221 || (profile_status == PROFILE_GUESSED
222 && (prob <= HITRATE (1) || prob >= HITRATE (99))));
223 }
224
225 /* Same predicate as above, working on edges. */
226 bool
227 edge_probability_reliable_p (edge e)
228 {
229 return probability_reliable_p (e->probability);
230 }
231
232 /* Same predicate as edge_probability_reliable_p, working on notes. */
233 bool
234 br_prob_note_reliable_p (rtx note)
235 {
236 gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
237 return probability_reliable_p (INTVAL (XEXP (note, 0)));
238 }
239
240 static void
241 predict_insn (rtx insn, enum br_predictor predictor, int probability)
242 {
243 gcc_assert (any_condjump_p (insn));
244 if (!flag_guess_branch_prob)
245 return;
246
247 REG_NOTES (insn)
248 = gen_rtx_EXPR_LIST (REG_BR_PRED,
249 gen_rtx_CONCAT (VOIDmode,
250 GEN_INT ((int) predictor),
251 GEN_INT ((int) probability)),
252 REG_NOTES (insn));
253 }
254
255 /* Predict insn by given predictor. */
256
257 void
258 predict_insn_def (rtx insn, enum br_predictor predictor,
259 enum prediction taken)
260 {
261 int probability = predictor_info[(int) predictor].hitrate;
262
263 if (taken != TAKEN)
264 probability = REG_BR_PROB_BASE - probability;
265
266 predict_insn (insn, predictor, probability);
267 }
268
269 /* Predict edge E with given probability if possible. */
270
271 void
272 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
273 {
274 rtx last_insn;
275 last_insn = BB_END (e->src);
276
277 /* We can store the branch prediction information only about
278 conditional jumps. */
279 if (!any_condjump_p (last_insn))
280 return;
281
282 /* We always store probability of branching. */
283 if (e->flags & EDGE_FALLTHRU)
284 probability = REG_BR_PROB_BASE - probability;
285
286 predict_insn (last_insn, predictor, probability);
287 }
288
289 /* Predict edge E with the given PROBABILITY. */
290 void
291 tree_predict_edge (edge e, enum br_predictor predictor, int probability)
292 {
293 gcc_assert (profile_status != PROFILE_GUESSED);
294 if ((e->src != ENTRY_BLOCK_PTR && EDGE_COUNT (e->src->succs) > 1)
295 && flag_guess_branch_prob && optimize)
296 {
297 struct edge_prediction *i = XNEW (struct edge_prediction);
298 void **preds = pointer_map_insert (bb_predictions, e->src);
299
300 i->ep_next = *preds;
301 *preds = i;
302 i->ep_probability = probability;
303 i->ep_predictor = predictor;
304 i->ep_edge = e;
305 }
306 }
307
308 /* Remove all predictions on given basic block that are attached
309 to edge E. */
310 void
311 remove_predictions_associated_with_edge (edge e)
312 {
313 void **preds;
314
315 if (!bb_predictions)
316 return;
317
318 preds = pointer_map_contains (bb_predictions, e->src);
319
320 if (preds)
321 {
322 struct edge_prediction **prediction = (struct edge_prediction **) preds;
323 struct edge_prediction *next;
324
325 while (*prediction)
326 {
327 if ((*prediction)->ep_edge == e)
328 {
329 next = (*prediction)->ep_next;
330 free (*prediction);
331 *prediction = next;
332 }
333 else
334 prediction = &((*prediction)->ep_next);
335 }
336 }
337 }
338
339 /* Clears the list of predictions stored for BB. */
340
341 static void
342 clear_bb_predictions (basic_block bb)
343 {
344 void **preds = pointer_map_contains (bb_predictions, bb);
345 struct edge_prediction *pred, *next;
346
347 if (!preds)
348 return;
349
350 for (pred = *preds; pred; pred = next)
351 {
352 next = pred->ep_next;
353 free (pred);
354 }
355 *preds = NULL;
356 }
357
358 /* Return true when we can store prediction on insn INSN.
359 At the moment we represent predictions only on conditional
360 jumps, not at computed jump or other complicated cases. */
361 static bool
362 can_predict_insn_p (rtx insn)
363 {
364 return (JUMP_P (insn)
365 && any_condjump_p (insn)
366 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
367 }
368
369 /* Predict edge E by given predictor if possible. */
370
371 void
372 predict_edge_def (edge e, enum br_predictor predictor,
373 enum prediction taken)
374 {
375 int probability = predictor_info[(int) predictor].hitrate;
376
377 if (taken != TAKEN)
378 probability = REG_BR_PROB_BASE - probability;
379
380 predict_edge (e, predictor, probability);
381 }
382
383 /* Invert all branch predictions or probability notes in the INSN. This needs
384 to be done each time we invert the condition used by the jump. */
385
386 void
387 invert_br_probabilities (rtx insn)
388 {
389 rtx note;
390
391 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
392 if (REG_NOTE_KIND (note) == REG_BR_PROB)
393 XEXP (note, 0) = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (note, 0)));
394 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
395 XEXP (XEXP (note, 0), 1)
396 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
397 }
398
399 /* Dump information about the branch prediction to the output file. */
400
401 static void
402 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
403 basic_block bb, int used)
404 {
405 edge e;
406 edge_iterator ei;
407
408 if (!file)
409 return;
410
411 FOR_EACH_EDGE (e, ei, bb->succs)
412 if (! (e->flags & EDGE_FALLTHRU))
413 break;
414
415 fprintf (file, " %s heuristics%s: %.1f%%",
416 predictor_info[predictor].name,
417 used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
418
419 if (bb->count)
420 {
421 fprintf (file, " exec ");
422 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
423 if (e)
424 {
425 fprintf (file, " hit ");
426 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count);
427 fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
428 }
429 }
430
431 fprintf (file, "\n");
432 }
433
434 /* We can not predict the probabilities of outgoing edges of bb. Set them
435 evenly and hope for the best. */
436 static void
437 set_even_probabilities (basic_block bb)
438 {
439 int nedges = 0;
440 edge e;
441 edge_iterator ei;
442
443 FOR_EACH_EDGE (e, ei, bb->succs)
444 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
445 nedges ++;
446 FOR_EACH_EDGE (e, ei, bb->succs)
447 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
448 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
449 else
450 e->probability = 0;
451 }
452
453 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
454 note if not already present. Remove now useless REG_BR_PRED notes. */
455
456 static void
457 combine_predictions_for_insn (rtx insn, basic_block bb)
458 {
459 rtx prob_note;
460 rtx *pnote;
461 rtx note;
462 int best_probability = PROB_EVEN;
463 int best_predictor = END_PREDICTORS;
464 int combined_probability = REG_BR_PROB_BASE / 2;
465 int d;
466 bool first_match = false;
467 bool found = false;
468
469 if (!can_predict_insn_p (insn))
470 {
471 set_even_probabilities (bb);
472 return;
473 }
474
475 prob_note = find_reg_note (insn, REG_BR_PROB, 0);
476 pnote = &REG_NOTES (insn);
477 if (dump_file)
478 fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
479 bb->index);
480
481 /* We implement "first match" heuristics and use probability guessed
482 by predictor with smallest index. */
483 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
484 if (REG_NOTE_KIND (note) == REG_BR_PRED)
485 {
486 int predictor = INTVAL (XEXP (XEXP (note, 0), 0));
487 int probability = INTVAL (XEXP (XEXP (note, 0), 1));
488
489 found = true;
490 if (best_predictor > predictor)
491 best_probability = probability, best_predictor = predictor;
492
493 d = (combined_probability * probability
494 + (REG_BR_PROB_BASE - combined_probability)
495 * (REG_BR_PROB_BASE - probability));
496
497 /* Use FP math to avoid overflows of 32bit integers. */
498 if (d == 0)
499 /* If one probability is 0% and one 100%, avoid division by zero. */
500 combined_probability = REG_BR_PROB_BASE / 2;
501 else
502 combined_probability = (((double) combined_probability) * probability
503 * REG_BR_PROB_BASE / d + 0.5);
504 }
505
506 /* Decide which heuristic to use. In case we didn't match anything,
507 use no_prediction heuristic, in case we did match, use either
508 first match or Dempster-Shaffer theory depending on the flags. */
509
510 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
511 first_match = true;
512
513 if (!found)
514 dump_prediction (dump_file, PRED_NO_PREDICTION,
515 combined_probability, bb, true);
516 else
517 {
518 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
519 bb, !first_match);
520 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
521 bb, first_match);
522 }
523
524 if (first_match)
525 combined_probability = best_probability;
526 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
527
528 while (*pnote)
529 {
530 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
531 {
532 int predictor = INTVAL (XEXP (XEXP (*pnote, 0), 0));
533 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
534
535 dump_prediction (dump_file, predictor, probability, bb,
536 !first_match || best_predictor == predictor);
537 *pnote = XEXP (*pnote, 1);
538 }
539 else
540 pnote = &XEXP (*pnote, 1);
541 }
542
543 if (!prob_note)
544 {
545 REG_NOTES (insn)
546 = gen_rtx_EXPR_LIST (REG_BR_PROB,
547 GEN_INT (combined_probability), REG_NOTES (insn));
548
549 /* Save the prediction into CFG in case we are seeing non-degenerated
550 conditional jump. */
551 if (!single_succ_p (bb))
552 {
553 BRANCH_EDGE (bb)->probability = combined_probability;
554 FALLTHRU_EDGE (bb)->probability
555 = REG_BR_PROB_BASE - combined_probability;
556 }
557 }
558 else if (!single_succ_p (bb))
559 {
560 int prob = INTVAL (XEXP (prob_note, 0));
561
562 BRANCH_EDGE (bb)->probability = prob;
563 FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
564 }
565 else
566 single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
567 }
568
569 /* Combine predictions into single probability and store them into CFG.
570 Remove now useless prediction entries. */
571
572 static void
573 combine_predictions_for_bb (basic_block bb)
574 {
575 int best_probability = PROB_EVEN;
576 int best_predictor = END_PREDICTORS;
577 int combined_probability = REG_BR_PROB_BASE / 2;
578 int d;
579 bool first_match = false;
580 bool found = false;
581 struct edge_prediction *pred;
582 int nedges = 0;
583 edge e, first = NULL, second = NULL;
584 edge_iterator ei;
585 void **preds;
586
587 FOR_EACH_EDGE (e, ei, bb->succs)
588 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
589 {
590 nedges ++;
591 if (first && !second)
592 second = e;
593 if (!first)
594 first = e;
595 }
596
597 /* When there is no successor or only one choice, prediction is easy.
598
599 We are lazy for now and predict only basic blocks with two outgoing
600 edges. It is possible to predict generic case too, but we have to
601 ignore first match heuristics and do more involved combining. Implement
602 this later. */
603 if (nedges != 2)
604 {
605 if (!bb->count)
606 set_even_probabilities (bb);
607 clear_bb_predictions (bb);
608 if (dump_file)
609 fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
610 nedges, bb->index);
611 return;
612 }
613
614 if (dump_file)
615 fprintf (dump_file, "Predictions for bb %i\n", bb->index);
616
617 preds = pointer_map_contains (bb_predictions, bb);
618 if (preds)
619 {
620 /* We implement "first match" heuristics and use probability guessed
621 by predictor with smallest index. */
622 for (pred = *preds; pred; pred = pred->ep_next)
623 {
624 int predictor = pred->ep_predictor;
625 int probability = pred->ep_probability;
626
627 if (pred->ep_edge != first)
628 probability = REG_BR_PROB_BASE - probability;
629
630 found = true;
631 if (best_predictor > predictor)
632 best_probability = probability, best_predictor = predictor;
633
634 d = (combined_probability * probability
635 + (REG_BR_PROB_BASE - combined_probability)
636 * (REG_BR_PROB_BASE - probability));
637
638 /* Use FP math to avoid overflows of 32bit integers. */
639 if (d == 0)
640 /* If one probability is 0% and one 100%, avoid division by zero. */
641 combined_probability = REG_BR_PROB_BASE / 2;
642 else
643 combined_probability = (((double) combined_probability)
644 * probability
645 * REG_BR_PROB_BASE / d + 0.5);
646 }
647 }
648
649 /* Decide which heuristic to use. In case we didn't match anything,
650 use no_prediction heuristic, in case we did match, use either
651 first match or Dempster-Shaffer theory depending on the flags. */
652
653 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
654 first_match = true;
655
656 if (!found)
657 dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
658 else
659 {
660 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
661 !first_match);
662 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
663 first_match);
664 }
665
666 if (first_match)
667 combined_probability = best_probability;
668 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
669
670 if (preds)
671 {
672 for (pred = *preds; pred; pred = pred->ep_next)
673 {
674 int predictor = pred->ep_predictor;
675 int probability = pred->ep_probability;
676
677 if (pred->ep_edge != EDGE_SUCC (bb, 0))
678 probability = REG_BR_PROB_BASE - probability;
679 dump_prediction (dump_file, predictor, probability, bb,
680 !first_match || best_predictor == predictor);
681 }
682 }
683 clear_bb_predictions (bb);
684
685 if (!bb->count)
686 {
687 first->probability = combined_probability;
688 second->probability = REG_BR_PROB_BASE - combined_probability;
689 }
690 }
691
692 /* Predict edge probabilities by exploiting loop structure. */
693
694 static void
695 predict_loops (void)
696 {
697 loop_iterator li;
698 struct loop *loop;
699
700 scev_initialize ();
701
702 /* Try to predict out blocks in a loop that are not part of a
703 natural loop. */
704 FOR_EACH_LOOP (li, loop, 0)
705 {
706 basic_block bb, *bbs;
707 unsigned j, n_exits;
708 VEC (edge, heap) *exits;
709 struct tree_niter_desc niter_desc;
710 edge ex;
711
712 exits = get_loop_exit_edges (loop);
713 n_exits = VEC_length (edge, exits);
714
715 for (j = 0; VEC_iterate (edge, exits, j, ex); j++)
716 {
717 tree niter = NULL;
718 HOST_WIDE_INT nitercst;
719 int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
720 int probability;
721 enum br_predictor predictor;
722
723 if (number_of_iterations_exit (loop, ex, &niter_desc, false))
724 niter = niter_desc.niter;
725 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
726 niter = loop_niter_by_eval (loop, ex);
727
728 if (TREE_CODE (niter) == INTEGER_CST)
729 {
730 if (host_integerp (niter, 1)
731 && compare_tree_int (niter, max-1) == -1)
732 nitercst = tree_low_cst (niter, 1) + 1;
733 else
734 nitercst = max;
735 predictor = PRED_LOOP_ITERATIONS;
736 }
737 /* If we have just one exit and we can derive some information about
738 the number of iterations of the loop from the statements inside
739 the loop, use it to predict this exit. */
740 else if (n_exits == 1)
741 {
742 nitercst = estimated_loop_iterations_int (loop, false);
743 if (nitercst < 0)
744 continue;
745 if (nitercst > max)
746 nitercst = max;
747
748 predictor = PRED_LOOP_ITERATIONS_GUESSED;
749 }
750 else
751 continue;
752
753 probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
754 predict_edge (ex, predictor, probability);
755 }
756 VEC_free (edge, heap, exits);
757
758 bbs = get_loop_body (loop);
759
760 for (j = 0; j < loop->num_nodes; j++)
761 {
762 int header_found = 0;
763 edge e;
764 edge_iterator ei;
765
766 bb = bbs[j];
767
768 /* Bypass loop heuristics on continue statement. These
769 statements construct loops via "non-loop" constructs
770 in the source language and are better to be handled
771 separately. */
772 if (predicted_by_p (bb, PRED_CONTINUE))
773 continue;
774
775 /* Loop branch heuristics - predict an edge back to a
776 loop's head as taken. */
777 if (bb == loop->latch)
778 {
779 e = find_edge (loop->latch, loop->header);
780 if (e)
781 {
782 header_found = 1;
783 predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
784 }
785 }
786
787 /* Loop exit heuristics - predict an edge exiting the loop if the
788 conditional has no loop header successors as not taken. */
789 if (!header_found
790 /* If we already used more reliable loop exit predictors, do not
791 bother with PRED_LOOP_EXIT. */
792 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
793 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
794 {
795 /* For loop with many exits we don't want to predict all exits
796 with the pretty large probability, because if all exits are
797 considered in row, the loop would be predicted to iterate
798 almost never. The code to divide probability by number of
799 exits is very rough. It should compute the number of exits
800 taken in each patch through function (not the overall number
801 of exits that might be a lot higher for loops with wide switch
802 statements in them) and compute n-th square root.
803
804 We limit the minimal probability by 2% to avoid
805 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
806 as this was causing regression in perl benchmark containing such
807 a wide loop. */
808
809 int probability = ((REG_BR_PROB_BASE
810 - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
811 / n_exits);
812 if (probability < HITRATE (2))
813 probability = HITRATE (2);
814 FOR_EACH_EDGE (e, ei, bb->succs)
815 if (e->dest->index < NUM_FIXED_BLOCKS
816 || !flow_bb_inside_loop_p (loop, e->dest))
817 predict_edge (e, PRED_LOOP_EXIT, probability);
818 }
819 }
820
821 /* Free basic blocks from get_loop_body. */
822 free (bbs);
823 }
824
825 scev_finalize ();
826 }
827
828 /* Attempt to predict probabilities of BB outgoing edges using local
829 properties. */
830 static void
831 bb_estimate_probability_locally (basic_block bb)
832 {
833 rtx last_insn = BB_END (bb);
834 rtx cond;
835
836 if (! can_predict_insn_p (last_insn))
837 return;
838 cond = get_condition (last_insn, NULL, false, false);
839 if (! cond)
840 return;
841
842 /* Try "pointer heuristic."
843 A comparison ptr == 0 is predicted as false.
844 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
845 if (COMPARISON_P (cond)
846 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
847 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
848 {
849 if (GET_CODE (cond) == EQ)
850 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
851 else if (GET_CODE (cond) == NE)
852 predict_insn_def (last_insn, PRED_POINTER, TAKEN);
853 }
854 else
855
856 /* Try "opcode heuristic."
857 EQ tests are usually false and NE tests are usually true. Also,
858 most quantities are positive, so we can make the appropriate guesses
859 about signed comparisons against zero. */
860 switch (GET_CODE (cond))
861 {
862 case CONST_INT:
863 /* Unconditional branch. */
864 predict_insn_def (last_insn, PRED_UNCONDITIONAL,
865 cond == const0_rtx ? NOT_TAKEN : TAKEN);
866 break;
867
868 case EQ:
869 case UNEQ:
870 /* Floating point comparisons appears to behave in a very
871 unpredictable way because of special role of = tests in
872 FP code. */
873 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
874 ;
875 /* Comparisons with 0 are often used for booleans and there is
876 nothing useful to predict about them. */
877 else if (XEXP (cond, 1) == const0_rtx
878 || XEXP (cond, 0) == const0_rtx)
879 ;
880 else
881 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
882 break;
883
884 case NE:
885 case LTGT:
886 /* Floating point comparisons appears to behave in a very
887 unpredictable way because of special role of = tests in
888 FP code. */
889 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
890 ;
891 /* Comparisons with 0 are often used for booleans and there is
892 nothing useful to predict about them. */
893 else if (XEXP (cond, 1) == const0_rtx
894 || XEXP (cond, 0) == const0_rtx)
895 ;
896 else
897 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
898 break;
899
900 case ORDERED:
901 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
902 break;
903
904 case UNORDERED:
905 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
906 break;
907
908 case LE:
909 case LT:
910 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
911 || XEXP (cond, 1) == constm1_rtx)
912 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
913 break;
914
915 case GE:
916 case GT:
917 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
918 || XEXP (cond, 1) == constm1_rtx)
919 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
920 break;
921
922 default:
923 break;
924 }
925 }
926
927 /* Set edge->probability for each successor edge of BB. */
928 void
929 guess_outgoing_edge_probabilities (basic_block bb)
930 {
931 bb_estimate_probability_locally (bb);
932 combine_predictions_for_insn (BB_END (bb), bb);
933 }
934 \f
935 /* Return constant EXPR will likely have at execution time, NULL if unknown.
936 The function is used by builtin_expect branch predictor so the evidence
937 must come from this construct and additional possible constant folding.
938
939 We may want to implement more involved value guess (such as value range
940 propagation based prediction), but such tricks shall go to new
941 implementation. */
942
943 static tree
944 expr_expected_value (tree expr, bitmap visited)
945 {
946 if (TREE_CONSTANT (expr))
947 return expr;
948 else if (TREE_CODE (expr) == SSA_NAME)
949 {
950 tree def = SSA_NAME_DEF_STMT (expr);
951
952 /* If we were already here, break the infinite cycle. */
953 if (bitmap_bit_p (visited, SSA_NAME_VERSION (expr)))
954 return NULL;
955 bitmap_set_bit (visited, SSA_NAME_VERSION (expr));
956
957 if (TREE_CODE (def) == PHI_NODE)
958 {
959 /* All the arguments of the PHI node must have the same constant
960 length. */
961 int i;
962 tree val = NULL, new_val;
963
964 for (i = 0; i < PHI_NUM_ARGS (def); i++)
965 {
966 tree arg = PHI_ARG_DEF (def, i);
967
968 /* If this PHI has itself as an argument, we cannot
969 determine the string length of this argument. However,
970 if we can find an expected constant value for the other
971 PHI args then we can still be sure that this is
972 likely a constant. So be optimistic and just
973 continue with the next argument. */
974 if (arg == PHI_RESULT (def))
975 continue;
976
977 new_val = expr_expected_value (arg, visited);
978 if (!new_val)
979 return NULL;
980 if (!val)
981 val = new_val;
982 else if (!operand_equal_p (val, new_val, false))
983 return NULL;
984 }
985 return val;
986 }
987 if (TREE_CODE (def) != GIMPLE_MODIFY_STMT
988 || GIMPLE_STMT_OPERAND (def, 0) != expr)
989 return NULL;
990 return expr_expected_value (GIMPLE_STMT_OPERAND (def, 1), visited);
991 }
992 else if (TREE_CODE (expr) == CALL_EXPR)
993 {
994 tree decl = get_callee_fndecl (expr);
995 if (!decl)
996 return NULL;
997 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL
998 && DECL_FUNCTION_CODE (decl) == BUILT_IN_EXPECT)
999 {
1000 tree val;
1001
1002 if (call_expr_nargs (expr) != 2)
1003 return NULL;
1004 val = CALL_EXPR_ARG (expr, 0);
1005 if (TREE_CONSTANT (val))
1006 return val;
1007 return CALL_EXPR_ARG (expr, 1);
1008 }
1009 }
1010 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
1011 {
1012 tree op0, op1, res;
1013 op0 = expr_expected_value (TREE_OPERAND (expr, 0), visited);
1014 if (!op0)
1015 return NULL;
1016 op1 = expr_expected_value (TREE_OPERAND (expr, 1), visited);
1017 if (!op1)
1018 return NULL;
1019 res = fold_build2 (TREE_CODE (expr), TREE_TYPE (expr), op0, op1);
1020 if (TREE_CONSTANT (res))
1021 return res;
1022 return NULL;
1023 }
1024 if (UNARY_CLASS_P (expr))
1025 {
1026 tree op0, res;
1027 op0 = expr_expected_value (TREE_OPERAND (expr, 0), visited);
1028 if (!op0)
1029 return NULL;
1030 res = fold_build1 (TREE_CODE (expr), TREE_TYPE (expr), op0);
1031 if (TREE_CONSTANT (res))
1032 return res;
1033 return NULL;
1034 }
1035 return NULL;
1036 }
1037 \f
1038 /* Get rid of all builtin_expect calls we no longer need. */
1039 static void
1040 strip_builtin_expect (void)
1041 {
1042 basic_block bb;
1043 FOR_EACH_BB (bb)
1044 {
1045 block_stmt_iterator bi;
1046 for (bi = bsi_start (bb); !bsi_end_p (bi); bsi_next (&bi))
1047 {
1048 tree stmt = bsi_stmt (bi);
1049 tree fndecl;
1050 tree call;
1051
1052 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
1053 && (call = GIMPLE_STMT_OPERAND (stmt, 1))
1054 && TREE_CODE (call) == CALL_EXPR
1055 && (fndecl = get_callee_fndecl (call))
1056 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
1057 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
1058 && call_expr_nargs (call) == 2)
1059 {
1060 GIMPLE_STMT_OPERAND (stmt, 1) = CALL_EXPR_ARG (call, 0);
1061 update_stmt (stmt);
1062 }
1063 }
1064 }
1065 }
1066 \f
1067 /* Predict using opcode of the last statement in basic block. */
1068 static void
1069 tree_predict_by_opcode (basic_block bb)
1070 {
1071 tree stmt = last_stmt (bb);
1072 edge then_edge;
1073 tree cond;
1074 tree op0;
1075 tree type;
1076 tree val;
1077 bitmap visited;
1078 edge_iterator ei;
1079
1080 if (!stmt || TREE_CODE (stmt) != COND_EXPR)
1081 return;
1082 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1083 if (then_edge->flags & EDGE_TRUE_VALUE)
1084 break;
1085 cond = TREE_OPERAND (stmt, 0);
1086 if (!COMPARISON_CLASS_P (cond))
1087 return;
1088 op0 = TREE_OPERAND (cond, 0);
1089 type = TREE_TYPE (op0);
1090 visited = BITMAP_ALLOC (NULL);
1091 val = expr_expected_value (cond, visited);
1092 BITMAP_FREE (visited);
1093 if (val)
1094 {
1095 if (integer_zerop (val))
1096 predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, NOT_TAKEN);
1097 else
1098 predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, TAKEN);
1099 return;
1100 }
1101 /* Try "pointer heuristic."
1102 A comparison ptr == 0 is predicted as false.
1103 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1104 if (POINTER_TYPE_P (type))
1105 {
1106 if (TREE_CODE (cond) == EQ_EXPR)
1107 predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
1108 else if (TREE_CODE (cond) == NE_EXPR)
1109 predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
1110 }
1111 else
1112
1113 /* Try "opcode heuristic."
1114 EQ tests are usually false and NE tests are usually true. Also,
1115 most quantities are positive, so we can make the appropriate guesses
1116 about signed comparisons against zero. */
1117 switch (TREE_CODE (cond))
1118 {
1119 case EQ_EXPR:
1120 case UNEQ_EXPR:
1121 /* Floating point comparisons appears to behave in a very
1122 unpredictable way because of special role of = tests in
1123 FP code. */
1124 if (FLOAT_TYPE_P (type))
1125 ;
1126 /* Comparisons with 0 are often used for booleans and there is
1127 nothing useful to predict about them. */
1128 else if (integer_zerop (op0)
1129 || integer_zerop (TREE_OPERAND (cond, 1)))
1130 ;
1131 else
1132 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
1133 break;
1134
1135 case NE_EXPR:
1136 case LTGT_EXPR:
1137 /* Floating point comparisons appears to behave in a very
1138 unpredictable way because of special role of = tests in
1139 FP code. */
1140 if (FLOAT_TYPE_P (type))
1141 ;
1142 /* Comparisons with 0 are often used for booleans and there is
1143 nothing useful to predict about them. */
1144 else if (integer_zerop (op0)
1145 || integer_zerop (TREE_OPERAND (cond, 1)))
1146 ;
1147 else
1148 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
1149 break;
1150
1151 case ORDERED_EXPR:
1152 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
1153 break;
1154
1155 case UNORDERED_EXPR:
1156 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
1157 break;
1158
1159 case LE_EXPR:
1160 case LT_EXPR:
1161 if (integer_zerop (TREE_OPERAND (cond, 1))
1162 || integer_onep (TREE_OPERAND (cond, 1))
1163 || integer_all_onesp (TREE_OPERAND (cond, 1))
1164 || real_zerop (TREE_OPERAND (cond, 1))
1165 || real_onep (TREE_OPERAND (cond, 1))
1166 || real_minus_onep (TREE_OPERAND (cond, 1)))
1167 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
1168 break;
1169
1170 case GE_EXPR:
1171 case GT_EXPR:
1172 if (integer_zerop (TREE_OPERAND (cond, 1))
1173 || integer_onep (TREE_OPERAND (cond, 1))
1174 || integer_all_onesp (TREE_OPERAND (cond, 1))
1175 || real_zerop (TREE_OPERAND (cond, 1))
1176 || real_onep (TREE_OPERAND (cond, 1))
1177 || real_minus_onep (TREE_OPERAND (cond, 1)))
1178 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
1179 break;
1180
1181 default:
1182 break;
1183 }
1184 }
1185
1186 /* Try to guess whether the value of return means error code. */
1187 static enum br_predictor
1188 return_prediction (tree val, enum prediction *prediction)
1189 {
1190 /* VOID. */
1191 if (!val)
1192 return PRED_NO_PREDICTION;
1193 /* Different heuristics for pointers and scalars. */
1194 if (POINTER_TYPE_P (TREE_TYPE (val)))
1195 {
1196 /* NULL is usually not returned. */
1197 if (integer_zerop (val))
1198 {
1199 *prediction = NOT_TAKEN;
1200 return PRED_NULL_RETURN;
1201 }
1202 }
1203 else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
1204 {
1205 /* Negative return values are often used to indicate
1206 errors. */
1207 if (TREE_CODE (val) == INTEGER_CST
1208 && tree_int_cst_sgn (val) < 0)
1209 {
1210 *prediction = NOT_TAKEN;
1211 return PRED_NEGATIVE_RETURN;
1212 }
1213 /* Constant return values seems to be commonly taken.
1214 Zero/one often represent booleans so exclude them from the
1215 heuristics. */
1216 if (TREE_CONSTANT (val)
1217 && (!integer_zerop (val) && !integer_onep (val)))
1218 {
1219 *prediction = TAKEN;
1220 return PRED_CONST_RETURN;
1221 }
1222 }
1223 return PRED_NO_PREDICTION;
1224 }
1225
1226 /* Find the basic block with return expression and look up for possible
1227 return value trying to apply RETURN_PREDICTION heuristics. */
1228 static void
1229 apply_return_prediction (int *heads)
1230 {
1231 tree return_stmt = NULL;
1232 tree return_val;
1233 edge e;
1234 tree phi;
1235 int phi_num_args, i;
1236 enum br_predictor pred;
1237 enum prediction direction;
1238 edge_iterator ei;
1239
1240 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
1241 {
1242 return_stmt = last_stmt (e->src);
1243 if (TREE_CODE (return_stmt) == RETURN_EXPR)
1244 break;
1245 }
1246 if (!e)
1247 return;
1248 return_val = TREE_OPERAND (return_stmt, 0);
1249 if (!return_val)
1250 return;
1251 if (TREE_CODE (return_val) == GIMPLE_MODIFY_STMT)
1252 return_val = GIMPLE_STMT_OPERAND (return_val, 1);
1253 if (TREE_CODE (return_val) != SSA_NAME
1254 || !SSA_NAME_DEF_STMT (return_val)
1255 || TREE_CODE (SSA_NAME_DEF_STMT (return_val)) != PHI_NODE)
1256 return;
1257 for (phi = SSA_NAME_DEF_STMT (return_val); phi; phi = PHI_CHAIN (phi))
1258 if (PHI_RESULT (phi) == return_val)
1259 break;
1260 if (!phi)
1261 return;
1262 phi_num_args = PHI_NUM_ARGS (phi);
1263 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
1264
1265 /* Avoid the degenerate case where all return values form the function
1266 belongs to same category (ie they are all positive constants)
1267 so we can hardly say something about them. */
1268 for (i = 1; i < phi_num_args; i++)
1269 if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
1270 break;
1271 if (i != phi_num_args)
1272 for (i = 0; i < phi_num_args; i++)
1273 {
1274 pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
1275 if (pred != PRED_NO_PREDICTION)
1276 predict_paths_leading_to (PHI_ARG_EDGE (phi, i)->src, heads, pred,
1277 direction);
1278 }
1279 }
1280
1281 /* Look for basic block that contains unlikely to happen events
1282 (such as noreturn calls) and mark all paths leading to execution
1283 of this basic blocks as unlikely. */
1284
1285 static void
1286 tree_bb_level_predictions (void)
1287 {
1288 basic_block bb;
1289 int *heads;
1290
1291 heads = XCNEWVEC (int, last_basic_block);
1292 heads[ENTRY_BLOCK_PTR->next_bb->index] = last_basic_block;
1293
1294 apply_return_prediction (heads);
1295
1296 FOR_EACH_BB (bb)
1297 {
1298 block_stmt_iterator bsi = bsi_last (bb);
1299
1300 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1301 {
1302 tree stmt = bsi_stmt (bsi);
1303 tree decl;
1304 switch (TREE_CODE (stmt))
1305 {
1306 case GIMPLE_MODIFY_STMT:
1307 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1)) == CALL_EXPR)
1308 {
1309 stmt = GIMPLE_STMT_OPERAND (stmt, 1);
1310 goto call_expr;
1311 }
1312 break;
1313 case CALL_EXPR:
1314 call_expr:;
1315 if (call_expr_flags (stmt) & ECF_NORETURN)
1316 predict_paths_leading_to (bb, heads, PRED_NORETURN,
1317 NOT_TAKEN);
1318 decl = get_callee_fndecl (stmt);
1319 if (decl
1320 && lookup_attribute ("cold",
1321 DECL_ATTRIBUTES (decl)))
1322 predict_paths_leading_to (bb, heads, PRED_COLD_FUNCTION,
1323 NOT_TAKEN);
1324 break;
1325 default:
1326 break;
1327 }
1328 }
1329 }
1330
1331 free (heads);
1332 }
1333
1334 #ifdef ENABLE_CHECKING
1335
1336 /* Callback for pointer_map_traverse, asserts that the pointer map is
1337 empty. */
1338
1339 static bool
1340 assert_is_empty (void *key ATTRIBUTE_UNUSED, void **value,
1341 void *data ATTRIBUTE_UNUSED)
1342 {
1343 gcc_assert (!*value);
1344 return false;
1345 }
1346 #endif
1347
1348 /* Predict branch probabilities and estimate profile of the tree CFG. */
1349 static unsigned int
1350 tree_estimate_probability (void)
1351 {
1352 basic_block bb;
1353
1354 loop_optimizer_init (0);
1355 if (current_loops && dump_file && (dump_flags & TDF_DETAILS))
1356 flow_loops_dump (dump_file, NULL, 0);
1357
1358 add_noreturn_fake_exit_edges ();
1359 connect_infinite_loops_to_exit ();
1360 /* We use loop_niter_by_eval, which requires that the loops have
1361 preheaders. */
1362 create_preheaders (CP_SIMPLE_PREHEADERS);
1363 calculate_dominance_info (CDI_POST_DOMINATORS);
1364
1365 bb_predictions = pointer_map_create ();
1366 tree_bb_level_predictions ();
1367
1368 mark_irreducible_loops ();
1369 record_loop_exits ();
1370 if (current_loops)
1371 predict_loops ();
1372
1373 FOR_EACH_BB (bb)
1374 {
1375 edge e;
1376 edge_iterator ei;
1377
1378 FOR_EACH_EDGE (e, ei, bb->succs)
1379 {
1380 /* Predict early returns to be probable, as we've already taken
1381 care for error returns and other cases are often used for
1382 fast paths through function.
1383
1384 Since we've already removed the return statements, we are
1385 looking for CFG like:
1386
1387 if (conditional)
1388 {
1389 ..
1390 goto return_block
1391 }
1392 some other blocks
1393 return_block:
1394 return_stmt. */
1395 if (e->dest != bb->next_bb
1396 && e->dest != EXIT_BLOCK_PTR
1397 && single_succ_p (e->dest)
1398 && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR
1399 && TREE_CODE (last_stmt (e->dest)) == RETURN_EXPR)
1400 {
1401 edge e1;
1402 edge_iterator ei1;
1403
1404 if (single_succ_p (bb))
1405 {
1406 FOR_EACH_EDGE (e1, ei1, bb->preds)
1407 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
1408 && !predicted_by_p (e1->src, PRED_CONST_RETURN)
1409 && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
1410 predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
1411 }
1412 else
1413 if (!predicted_by_p (e->src, PRED_NULL_RETURN)
1414 && !predicted_by_p (e->src, PRED_CONST_RETURN)
1415 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
1416 predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
1417 }
1418
1419 /* Look for block we are guarding (ie we dominate it,
1420 but it doesn't postdominate us). */
1421 if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
1422 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
1423 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
1424 {
1425 block_stmt_iterator bi;
1426
1427 /* The call heuristic claims that a guarded function call
1428 is improbable. This is because such calls are often used
1429 to signal exceptional situations such as printing error
1430 messages. */
1431 for (bi = bsi_start (e->dest); !bsi_end_p (bi);
1432 bsi_next (&bi))
1433 {
1434 tree stmt = bsi_stmt (bi);
1435 if ((TREE_CODE (stmt) == CALL_EXPR
1436 || (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
1437 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1))
1438 == CALL_EXPR))
1439 /* Constant and pure calls are hardly used to signalize
1440 something exceptional. */
1441 && TREE_SIDE_EFFECTS (stmt))
1442 {
1443 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
1444 break;
1445 }
1446 }
1447 }
1448 }
1449 tree_predict_by_opcode (bb);
1450 }
1451 FOR_EACH_BB (bb)
1452 combine_predictions_for_bb (bb);
1453
1454 #ifdef ENABLE_CHECKING
1455 pointer_map_traverse (bb_predictions, assert_is_empty, NULL);
1456 #endif
1457 pointer_map_destroy (bb_predictions);
1458 bb_predictions = NULL;
1459
1460 strip_builtin_expect ();
1461 estimate_bb_frequencies ();
1462 free_dominance_info (CDI_POST_DOMINATORS);
1463 remove_fake_exit_edges ();
1464 loop_optimizer_finalize ();
1465 if (dump_file && (dump_flags & TDF_DETAILS))
1466 dump_tree_cfg (dump_file, dump_flags);
1467 if (profile_status == PROFILE_ABSENT)
1468 profile_status = PROFILE_GUESSED;
1469 return 0;
1470 }
1471 \f
1472 /* Sets branch probabilities according to PREDiction and
1473 FLAGS. HEADS[bb->index] should be index of basic block in that we
1474 need to alter branch predictions (i.e. the first of our dominators
1475 such that we do not post-dominate it) (but we fill this information
1476 on demand, so -1 may be there in case this was not needed yet). */
1477
1478 static void
1479 predict_paths_leading_to (basic_block bb, int *heads, enum br_predictor pred,
1480 enum prediction taken)
1481 {
1482 edge e;
1483 edge_iterator ei;
1484 int y;
1485
1486 if (heads[bb->index] == ENTRY_BLOCK)
1487 {
1488 /* This is first time we need this field in heads array; so
1489 find first dominator that we do not post-dominate (we are
1490 using already known members of heads array). */
1491 basic_block ai = bb;
1492 basic_block next_ai = get_immediate_dominator (CDI_DOMINATORS, bb);
1493 int head;
1494
1495 while (heads[next_ai->index] == ENTRY_BLOCK)
1496 {
1497 if (!dominated_by_p (CDI_POST_DOMINATORS, next_ai, bb))
1498 break;
1499 heads[next_ai->index] = ai->index;
1500 ai = next_ai;
1501 next_ai = get_immediate_dominator (CDI_DOMINATORS, next_ai);
1502 }
1503 if (!dominated_by_p (CDI_POST_DOMINATORS, next_ai, bb))
1504 head = next_ai->index;
1505 else
1506 head = heads[next_ai->index];
1507 while (next_ai != bb)
1508 {
1509 next_ai = ai;
1510 ai = BASIC_BLOCK (heads[ai->index]);
1511 heads[next_ai->index] = head;
1512 }
1513 }
1514 y = heads[bb->index];
1515
1516 /* Now find the edge that leads to our branch and aply the prediction. */
1517
1518 if (y == last_basic_block)
1519 return;
1520 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (y)->succs)
1521 if (e->dest->index >= NUM_FIXED_BLOCKS
1522 && dominated_by_p (CDI_POST_DOMINATORS, e->dest, bb))
1523 predict_edge_def (e, pred, taken);
1524 }
1525 \f
1526 /* This is used to carry information about basic blocks. It is
1527 attached to the AUX field of the standard CFG block. */
1528
1529 typedef struct block_info_def
1530 {
1531 /* Estimated frequency of execution of basic_block. */
1532 sreal frequency;
1533
1534 /* To keep queue of basic blocks to process. */
1535 basic_block next;
1536
1537 /* Number of predecessors we need to visit first. */
1538 int npredecessors;
1539 } *block_info;
1540
1541 /* Similar information for edges. */
1542 typedef struct edge_info_def
1543 {
1544 /* In case edge is a loopback edge, the probability edge will be reached
1545 in case header is. Estimated number of iterations of the loop can be
1546 then computed as 1 / (1 - back_edge_prob). */
1547 sreal back_edge_prob;
1548 /* True if the edge is a loopback edge in the natural loop. */
1549 unsigned int back_edge:1;
1550 } *edge_info;
1551
1552 #define BLOCK_INFO(B) ((block_info) (B)->aux)
1553 #define EDGE_INFO(E) ((edge_info) (E)->aux)
1554
1555 /* Helper function for estimate_bb_frequencies.
1556 Propagate the frequencies in blocks marked in
1557 TOVISIT, starting in HEAD. */
1558
1559 static void
1560 propagate_freq (basic_block head, bitmap tovisit)
1561 {
1562 basic_block bb;
1563 basic_block last;
1564 unsigned i;
1565 edge e;
1566 basic_block nextbb;
1567 bitmap_iterator bi;
1568
1569 /* For each basic block we need to visit count number of his predecessors
1570 we need to visit first. */
1571 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
1572 {
1573 edge_iterator ei;
1574 int count = 0;
1575
1576 /* The outermost "loop" includes the exit block, which we can not
1577 look up via BASIC_BLOCK. Detect this and use EXIT_BLOCK_PTR
1578 directly. Do the same for the entry block. */
1579 bb = BASIC_BLOCK (i);
1580
1581 FOR_EACH_EDGE (e, ei, bb->preds)
1582 {
1583 bool visit = bitmap_bit_p (tovisit, e->src->index);
1584
1585 if (visit && !(e->flags & EDGE_DFS_BACK))
1586 count++;
1587 else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
1588 fprintf (dump_file,
1589 "Irreducible region hit, ignoring edge to %i->%i\n",
1590 e->src->index, bb->index);
1591 }
1592 BLOCK_INFO (bb)->npredecessors = count;
1593 }
1594
1595 memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
1596 last = head;
1597 for (bb = head; bb; bb = nextbb)
1598 {
1599 edge_iterator ei;
1600 sreal cyclic_probability, frequency;
1601
1602 memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
1603 memcpy (&frequency, &real_zero, sizeof (real_zero));
1604
1605 nextbb = BLOCK_INFO (bb)->next;
1606 BLOCK_INFO (bb)->next = NULL;
1607
1608 /* Compute frequency of basic block. */
1609 if (bb != head)
1610 {
1611 #ifdef ENABLE_CHECKING
1612 FOR_EACH_EDGE (e, ei, bb->preds)
1613 gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
1614 || (e->flags & EDGE_DFS_BACK));
1615 #endif
1616
1617 FOR_EACH_EDGE (e, ei, bb->preds)
1618 if (EDGE_INFO (e)->back_edge)
1619 {
1620 sreal_add (&cyclic_probability, &cyclic_probability,
1621 &EDGE_INFO (e)->back_edge_prob);
1622 }
1623 else if (!(e->flags & EDGE_DFS_BACK))
1624 {
1625 sreal tmp;
1626
1627 /* frequency += (e->probability
1628 * BLOCK_INFO (e->src)->frequency /
1629 REG_BR_PROB_BASE); */
1630
1631 sreal_init (&tmp, e->probability, 0);
1632 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
1633 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
1634 sreal_add (&frequency, &frequency, &tmp);
1635 }
1636
1637 if (sreal_compare (&cyclic_probability, &real_zero) == 0)
1638 {
1639 memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
1640 sizeof (frequency));
1641 }
1642 else
1643 {
1644 if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
1645 {
1646 memcpy (&cyclic_probability, &real_almost_one,
1647 sizeof (real_almost_one));
1648 }
1649
1650 /* BLOCK_INFO (bb)->frequency = frequency
1651 / (1 - cyclic_probability) */
1652
1653 sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
1654 sreal_div (&BLOCK_INFO (bb)->frequency,
1655 &frequency, &cyclic_probability);
1656 }
1657 }
1658
1659 bitmap_clear_bit (tovisit, bb->index);
1660
1661 e = find_edge (bb, head);
1662 if (e)
1663 {
1664 sreal tmp;
1665
1666 /* EDGE_INFO (e)->back_edge_prob
1667 = ((e->probability * BLOCK_INFO (bb)->frequency)
1668 / REG_BR_PROB_BASE); */
1669
1670 sreal_init (&tmp, e->probability, 0);
1671 sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
1672 sreal_mul (&EDGE_INFO (e)->back_edge_prob,
1673 &tmp, &real_inv_br_prob_base);
1674 }
1675
1676 /* Propagate to successor blocks. */
1677 FOR_EACH_EDGE (e, ei, bb->succs)
1678 if (!(e->flags & EDGE_DFS_BACK)
1679 && BLOCK_INFO (e->dest)->npredecessors)
1680 {
1681 BLOCK_INFO (e->dest)->npredecessors--;
1682 if (!BLOCK_INFO (e->dest)->npredecessors)
1683 {
1684 if (!nextbb)
1685 nextbb = e->dest;
1686 else
1687 BLOCK_INFO (last)->next = e->dest;
1688
1689 last = e->dest;
1690 }
1691 }
1692 }
1693 }
1694
1695 /* Estimate probabilities of loopback edges in loops at same nest level. */
1696
1697 static void
1698 estimate_loops_at_level (struct loop *first_loop)
1699 {
1700 struct loop *loop;
1701
1702 for (loop = first_loop; loop; loop = loop->next)
1703 {
1704 edge e;
1705 basic_block *bbs;
1706 unsigned i;
1707 bitmap tovisit = BITMAP_ALLOC (NULL);
1708
1709 estimate_loops_at_level (loop->inner);
1710
1711 /* Find current loop back edge and mark it. */
1712 e = loop_latch_edge (loop);
1713 EDGE_INFO (e)->back_edge = 1;
1714
1715 bbs = get_loop_body (loop);
1716 for (i = 0; i < loop->num_nodes; i++)
1717 bitmap_set_bit (tovisit, bbs[i]->index);
1718 free (bbs);
1719 propagate_freq (loop->header, tovisit);
1720 BITMAP_FREE (tovisit);
1721 }
1722 }
1723
1724 /* Propagates frequencies through structure of loops. */
1725
1726 static void
1727 estimate_loops (void)
1728 {
1729 bitmap tovisit = BITMAP_ALLOC (NULL);
1730 basic_block bb;
1731
1732 /* Start by estimating the frequencies in the loops. */
1733 if (current_loops)
1734 estimate_loops_at_level (current_loops->tree_root->inner);
1735
1736 /* Now propagate the frequencies through all the blocks. */
1737 FOR_ALL_BB (bb)
1738 {
1739 bitmap_set_bit (tovisit, bb->index);
1740 }
1741 propagate_freq (ENTRY_BLOCK_PTR, tovisit);
1742 BITMAP_FREE (tovisit);
1743 }
1744
1745 /* Convert counts measured by profile driven feedback to frequencies.
1746 Return nonzero iff there was any nonzero execution count. */
1747
1748 int
1749 counts_to_freqs (void)
1750 {
1751 gcov_type count_max, true_count_max = 0;
1752 basic_block bb;
1753
1754 FOR_EACH_BB (bb)
1755 true_count_max = MAX (bb->count, true_count_max);
1756
1757 count_max = MAX (true_count_max, 1);
1758 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1759 bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
1760
1761 return true_count_max;
1762 }
1763
1764 /* Return true if function is likely to be expensive, so there is no point to
1765 optimize performance of prologue, epilogue or do inlining at the expense
1766 of code size growth. THRESHOLD is the limit of number of instructions
1767 function can execute at average to be still considered not expensive. */
1768
1769 bool
1770 expensive_function_p (int threshold)
1771 {
1772 unsigned int sum = 0;
1773 basic_block bb;
1774 unsigned int limit;
1775
1776 /* We can not compute accurately for large thresholds due to scaled
1777 frequencies. */
1778 gcc_assert (threshold <= BB_FREQ_MAX);
1779
1780 /* Frequencies are out of range. This either means that function contains
1781 internal loop executing more than BB_FREQ_MAX times or profile feedback
1782 is available and function has not been executed at all. */
1783 if (ENTRY_BLOCK_PTR->frequency == 0)
1784 return true;
1785
1786 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
1787 limit = ENTRY_BLOCK_PTR->frequency * threshold;
1788 FOR_EACH_BB (bb)
1789 {
1790 rtx insn;
1791
1792 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
1793 insn = NEXT_INSN (insn))
1794 if (active_insn_p (insn))
1795 {
1796 sum += bb->frequency;
1797 if (sum > limit)
1798 return true;
1799 }
1800 }
1801
1802 return false;
1803 }
1804
1805 /* Estimate basic blocks frequency by given branch probabilities. */
1806
1807 void
1808 estimate_bb_frequencies (void)
1809 {
1810 basic_block bb;
1811 sreal freq_max;
1812
1813 if (!flag_branch_probabilities || !counts_to_freqs ())
1814 {
1815 static int real_values_initialized = 0;
1816
1817 if (!real_values_initialized)
1818 {
1819 real_values_initialized = 1;
1820 sreal_init (&real_zero, 0, 0);
1821 sreal_init (&real_one, 1, 0);
1822 sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
1823 sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
1824 sreal_init (&real_one_half, 1, -1);
1825 sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
1826 sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
1827 }
1828
1829 mark_dfs_back_edges ();
1830
1831 single_succ_edge (ENTRY_BLOCK_PTR)->probability = REG_BR_PROB_BASE;
1832
1833 /* Set up block info for each basic block. */
1834 alloc_aux_for_blocks (sizeof (struct block_info_def));
1835 alloc_aux_for_edges (sizeof (struct edge_info_def));
1836 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1837 {
1838 edge e;
1839 edge_iterator ei;
1840
1841 FOR_EACH_EDGE (e, ei, bb->succs)
1842 {
1843 sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
1844 sreal_mul (&EDGE_INFO (e)->back_edge_prob,
1845 &EDGE_INFO (e)->back_edge_prob,
1846 &real_inv_br_prob_base);
1847 }
1848 }
1849
1850 /* First compute probabilities locally for each loop from innermost
1851 to outermost to examine probabilities for back edges. */
1852 estimate_loops ();
1853
1854 memcpy (&freq_max, &real_zero, sizeof (real_zero));
1855 FOR_EACH_BB (bb)
1856 if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
1857 memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
1858
1859 sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
1860 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1861 {
1862 sreal tmp;
1863
1864 sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
1865 sreal_add (&tmp, &tmp, &real_one_half);
1866 bb->frequency = sreal_to_int (&tmp);
1867 }
1868
1869 free_aux_for_blocks ();
1870 free_aux_for_edges ();
1871 }
1872 compute_function_frequency ();
1873 if (flag_reorder_functions)
1874 choose_function_section ();
1875 }
1876
1877 /* Decide whether function is hot, cold or unlikely executed. */
1878 static void
1879 compute_function_frequency (void)
1880 {
1881 basic_block bb;
1882
1883 if (!profile_info || !flag_branch_probabilities)
1884 {
1885 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
1886 != NULL)
1887 cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED;
1888 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
1889 != NULL)
1890 cfun->function_frequency = FUNCTION_FREQUENCY_HOT;
1891 return;
1892 }
1893 cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED;
1894 FOR_EACH_BB (bb)
1895 {
1896 if (maybe_hot_bb_p (bb))
1897 {
1898 cfun->function_frequency = FUNCTION_FREQUENCY_HOT;
1899 return;
1900 }
1901 if (!probably_never_executed_bb_p (bb))
1902 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
1903 }
1904 }
1905
1906 /* Choose appropriate section for the function. */
1907 static void
1908 choose_function_section (void)
1909 {
1910 if (DECL_SECTION_NAME (current_function_decl)
1911 || !targetm.have_named_sections
1912 /* Theoretically we can split the gnu.linkonce text section too,
1913 but this requires more work as the frequency needs to match
1914 for all generated objects so we need to merge the frequency
1915 of all instances. For now just never set frequency for these. */
1916 || DECL_ONE_ONLY (current_function_decl))
1917 return;
1918
1919 /* If we are doing the partitioning optimization, let the optimization
1920 choose the correct section into which to put things. */
1921
1922 if (flag_reorder_blocks_and_partition)
1923 return;
1924
1925 if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT)
1926 DECL_SECTION_NAME (current_function_decl) =
1927 build_string (strlen (HOT_TEXT_SECTION_NAME), HOT_TEXT_SECTION_NAME);
1928 if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
1929 DECL_SECTION_NAME (current_function_decl) =
1930 build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME),
1931 UNLIKELY_EXECUTED_TEXT_SECTION_NAME);
1932 }
1933
1934 static bool
1935 gate_estimate_probability (void)
1936 {
1937 return flag_guess_branch_prob;
1938 }
1939
1940 struct tree_opt_pass pass_profile =
1941 {
1942 "profile", /* name */
1943 gate_estimate_probability, /* gate */
1944 tree_estimate_probability, /* execute */
1945 NULL, /* sub */
1946 NULL, /* next */
1947 0, /* static_pass_number */
1948 TV_BRANCH_PROB, /* tv_id */
1949 PROP_cfg, /* properties_required */
1950 0, /* properties_provided */
1951 0, /* properties_destroyed */
1952 0, /* todo_flags_start */
1953 TODO_ggc_collect | TODO_verify_ssa, /* todo_flags_finish */
1954 0 /* letter */
1955 };