re PR gcov-profile/24487 (Basic block frequencies inaccurate)
[gcc.git] / gcc / predict.c
1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005
3 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA. */
21
22 /* References:
23
24 [1] "Branch Prediction for Free"
25 Ball and Larus; PLDI '93.
26 [2] "Static Branch Frequency and Program Profile Analysis"
27 Wu and Larus; MICRO-27.
28 [3] "Corpus-based Static Branch Prediction"
29 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
30
31
32 #include "config.h"
33 #include "system.h"
34 #include "coretypes.h"
35 #include "tm.h"
36 #include "tree.h"
37 #include "rtl.h"
38 #include "tm_p.h"
39 #include "hard-reg-set.h"
40 #include "basic-block.h"
41 #include "insn-config.h"
42 #include "regs.h"
43 #include "flags.h"
44 #include "output.h"
45 #include "function.h"
46 #include "except.h"
47 #include "toplev.h"
48 #include "recog.h"
49 #include "expr.h"
50 #include "predict.h"
51 #include "coverage.h"
52 #include "sreal.h"
53 #include "params.h"
54 #include "target.h"
55 #include "cfgloop.h"
56 #include "tree-flow.h"
57 #include "ggc.h"
58 #include "tree-dump.h"
59 #include "tree-pass.h"
60 #include "timevar.h"
61 #include "tree-scalar-evolution.h"
62 #include "cfgloop.h"
63
64 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
65 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
66 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
67 real_inv_br_prob_base, real_one_half, real_bb_freq_max;
68
69 /* Random guesstimation given names. */
70 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 100 - 1)
71 #define PROB_EVEN (REG_BR_PROB_BASE / 2)
72 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
73 #define PROB_ALWAYS (REG_BR_PROB_BASE)
74
75 static void combine_predictions_for_insn (rtx, basic_block);
76 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
77 static void estimate_loops_at_level (struct loop *, bitmap);
78 static void propagate_freq (struct loop *, bitmap);
79 static void estimate_bb_frequencies (struct loops *);
80 static void predict_paths_leading_to (basic_block, int *, enum br_predictor, enum prediction);
81 static bool last_basic_block_p (basic_block);
82 static void compute_function_frequency (void);
83 static void choose_function_section (void);
84 static bool can_predict_insn_p (rtx);
85
86 /* Information we hold about each branch predictor.
87 Filled using information from predict.def. */
88
89 struct predictor_info
90 {
91 const char *const name; /* Name used in the debugging dumps. */
92 const int hitrate; /* Expected hitrate used by
93 predict_insn_def call. */
94 const int flags;
95 };
96
97 /* Use given predictor without Dempster-Shaffer theory if it matches
98 using first_match heuristics. */
99 #define PRED_FLAG_FIRST_MATCH 1
100
101 /* Recompute hitrate in percent to our representation. */
102
103 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
104
105 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
106 static const struct predictor_info predictor_info[]= {
107 #include "predict.def"
108
109 /* Upper bound on predictors. */
110 {NULL, 0, 0}
111 };
112 #undef DEF_PREDICTOR
113
114 /* Return true in case BB can be CPU intensive and should be optimized
115 for maximal performance. */
116
117 bool
118 maybe_hot_bb_p (basic_block bb)
119 {
120 if (profile_info && flag_branch_probabilities
121 && (bb->count
122 < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
123 return false;
124 if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
125 return false;
126 return true;
127 }
128
129 /* Return true in case BB is cold and should be optimized for size. */
130
131 bool
132 probably_cold_bb_p (basic_block bb)
133 {
134 if (profile_info && flag_branch_probabilities
135 && (bb->count
136 < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
137 return true;
138 if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
139 return true;
140 return false;
141 }
142
143 /* Return true in case BB is probably never executed. */
144 bool
145 probably_never_executed_bb_p (basic_block bb)
146 {
147 if (profile_info && flag_branch_probabilities)
148 return ((bb->count + profile_info->runs / 2) / profile_info->runs) == 0;
149 return false;
150 }
151
152 /* Return true if the one of outgoing edges is already predicted by
153 PREDICTOR. */
154
155 bool
156 rtl_predicted_by_p (basic_block bb, enum br_predictor predictor)
157 {
158 rtx note;
159 if (!INSN_P (BB_END (bb)))
160 return false;
161 for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
162 if (REG_NOTE_KIND (note) == REG_BR_PRED
163 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
164 return true;
165 return false;
166 }
167
168 /* Return true if the one of outgoing edges is already predicted by
169 PREDICTOR. */
170
171 bool
172 tree_predicted_by_p (basic_block bb, enum br_predictor predictor)
173 {
174 struct edge_prediction *i;
175 for (i = bb->predictions; i; i = i->next)
176 if (i->predictor == predictor)
177 return true;
178 return false;
179 }
180
181 static void
182 predict_insn (rtx insn, enum br_predictor predictor, int probability)
183 {
184 gcc_assert (any_condjump_p (insn));
185 if (!flag_guess_branch_prob)
186 return;
187
188 REG_NOTES (insn)
189 = gen_rtx_EXPR_LIST (REG_BR_PRED,
190 gen_rtx_CONCAT (VOIDmode,
191 GEN_INT ((int) predictor),
192 GEN_INT ((int) probability)),
193 REG_NOTES (insn));
194 }
195
196 /* Predict insn by given predictor. */
197
198 void
199 predict_insn_def (rtx insn, enum br_predictor predictor,
200 enum prediction taken)
201 {
202 int probability = predictor_info[(int) predictor].hitrate;
203
204 if (taken != TAKEN)
205 probability = REG_BR_PROB_BASE - probability;
206
207 predict_insn (insn, predictor, probability);
208 }
209
210 /* Predict edge E with given probability if possible. */
211
212 void
213 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
214 {
215 rtx last_insn;
216 last_insn = BB_END (e->src);
217
218 /* We can store the branch prediction information only about
219 conditional jumps. */
220 if (!any_condjump_p (last_insn))
221 return;
222
223 /* We always store probability of branching. */
224 if (e->flags & EDGE_FALLTHRU)
225 probability = REG_BR_PROB_BASE - probability;
226
227 predict_insn (last_insn, predictor, probability);
228 }
229
230 /* Predict edge E with the given PROBABILITY. */
231 void
232 tree_predict_edge (edge e, enum br_predictor predictor, int probability)
233 {
234 gcc_assert (profile_status != PROFILE_GUESSED);
235 if ((e->src != ENTRY_BLOCK_PTR && EDGE_COUNT (e->src->succs) > 1)
236 && flag_guess_branch_prob && optimize)
237 {
238 struct edge_prediction *i = ggc_alloc (sizeof (struct edge_prediction));
239
240 i->next = e->src->predictions;
241 e->src->predictions = i;
242 i->probability = probability;
243 i->predictor = predictor;
244 i->edge = e;
245 }
246 }
247
248 /* Remove all predictions on given basic block that are attached
249 to edge E. */
250 void
251 remove_predictions_associated_with_edge (edge e)
252 {
253 if (e->src->predictions)
254 {
255 struct edge_prediction **prediction = &e->src->predictions;
256 while (*prediction)
257 {
258 if ((*prediction)->edge == e)
259 *prediction = (*prediction)->next;
260 else
261 prediction = &((*prediction)->next);
262 }
263 }
264 }
265
266 /* Return true when we can store prediction on insn INSN.
267 At the moment we represent predictions only on conditional
268 jumps, not at computed jump or other complicated cases. */
269 static bool
270 can_predict_insn_p (rtx insn)
271 {
272 return (JUMP_P (insn)
273 && any_condjump_p (insn)
274 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
275 }
276
277 /* Predict edge E by given predictor if possible. */
278
279 void
280 predict_edge_def (edge e, enum br_predictor predictor,
281 enum prediction taken)
282 {
283 int probability = predictor_info[(int) predictor].hitrate;
284
285 if (taken != TAKEN)
286 probability = REG_BR_PROB_BASE - probability;
287
288 predict_edge (e, predictor, probability);
289 }
290
291 /* Invert all branch predictions or probability notes in the INSN. This needs
292 to be done each time we invert the condition used by the jump. */
293
294 void
295 invert_br_probabilities (rtx insn)
296 {
297 rtx note;
298
299 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
300 if (REG_NOTE_KIND (note) == REG_BR_PROB)
301 XEXP (note, 0) = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (note, 0)));
302 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
303 XEXP (XEXP (note, 0), 1)
304 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
305 }
306
307 /* Dump information about the branch prediction to the output file. */
308
309 static void
310 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
311 basic_block bb, int used)
312 {
313 edge e;
314 edge_iterator ei;
315
316 if (!file)
317 return;
318
319 FOR_EACH_EDGE (e, ei, bb->succs)
320 if (! (e->flags & EDGE_FALLTHRU))
321 break;
322
323 fprintf (file, " %s heuristics%s: %.1f%%",
324 predictor_info[predictor].name,
325 used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
326
327 if (bb->count)
328 {
329 fprintf (file, " exec ");
330 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
331 if (e)
332 {
333 fprintf (file, " hit ");
334 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count);
335 fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
336 }
337 }
338
339 fprintf (file, "\n");
340 }
341
342 /* We can not predict the probabilities of outgoing edges of bb. Set them
343 evenly and hope for the best. */
344 static void
345 set_even_probabilities (basic_block bb)
346 {
347 int nedges = 0;
348 edge e;
349 edge_iterator ei;
350
351 FOR_EACH_EDGE (e, ei, bb->succs)
352 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
353 nedges ++;
354 FOR_EACH_EDGE (e, ei, bb->succs)
355 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
356 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
357 else
358 e->probability = 0;
359 }
360
361 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
362 note if not already present. Remove now useless REG_BR_PRED notes. */
363
364 static void
365 combine_predictions_for_insn (rtx insn, basic_block bb)
366 {
367 rtx prob_note;
368 rtx *pnote;
369 rtx note;
370 int best_probability = PROB_EVEN;
371 int best_predictor = END_PREDICTORS;
372 int combined_probability = REG_BR_PROB_BASE / 2;
373 int d;
374 bool first_match = false;
375 bool found = false;
376
377 if (!can_predict_insn_p (insn))
378 {
379 set_even_probabilities (bb);
380 return;
381 }
382
383 prob_note = find_reg_note (insn, REG_BR_PROB, 0);
384 pnote = &REG_NOTES (insn);
385 if (dump_file)
386 fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
387 bb->index);
388
389 /* We implement "first match" heuristics and use probability guessed
390 by predictor with smallest index. */
391 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
392 if (REG_NOTE_KIND (note) == REG_BR_PRED)
393 {
394 int predictor = INTVAL (XEXP (XEXP (note, 0), 0));
395 int probability = INTVAL (XEXP (XEXP (note, 0), 1));
396
397 found = true;
398 if (best_predictor > predictor)
399 best_probability = probability, best_predictor = predictor;
400
401 d = (combined_probability * probability
402 + (REG_BR_PROB_BASE - combined_probability)
403 * (REG_BR_PROB_BASE - probability));
404
405 /* Use FP math to avoid overflows of 32bit integers. */
406 if (d == 0)
407 /* If one probability is 0% and one 100%, avoid division by zero. */
408 combined_probability = REG_BR_PROB_BASE / 2;
409 else
410 combined_probability = (((double) combined_probability) * probability
411 * REG_BR_PROB_BASE / d + 0.5);
412 }
413
414 /* Decide which heuristic to use. In case we didn't match anything,
415 use no_prediction heuristic, in case we did match, use either
416 first match or Dempster-Shaffer theory depending on the flags. */
417
418 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
419 first_match = true;
420
421 if (!found)
422 dump_prediction (dump_file, PRED_NO_PREDICTION,
423 combined_probability, bb, true);
424 else
425 {
426 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
427 bb, !first_match);
428 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
429 bb, first_match);
430 }
431
432 if (first_match)
433 combined_probability = best_probability;
434 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
435
436 while (*pnote)
437 {
438 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
439 {
440 int predictor = INTVAL (XEXP (XEXP (*pnote, 0), 0));
441 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
442
443 dump_prediction (dump_file, predictor, probability, bb,
444 !first_match || best_predictor == predictor);
445 *pnote = XEXP (*pnote, 1);
446 }
447 else
448 pnote = &XEXP (*pnote, 1);
449 }
450
451 if (!prob_note)
452 {
453 REG_NOTES (insn)
454 = gen_rtx_EXPR_LIST (REG_BR_PROB,
455 GEN_INT (combined_probability), REG_NOTES (insn));
456
457 /* Save the prediction into CFG in case we are seeing non-degenerated
458 conditional jump. */
459 if (!single_succ_p (bb))
460 {
461 BRANCH_EDGE (bb)->probability = combined_probability;
462 FALLTHRU_EDGE (bb)->probability
463 = REG_BR_PROB_BASE - combined_probability;
464 }
465 }
466 else if (!single_succ_p (bb))
467 {
468 int prob = INTVAL (XEXP (prob_note, 0));
469
470 BRANCH_EDGE (bb)->probability = prob;
471 FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
472 }
473 else
474 single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
475 }
476
477 /* Combine predictions into single probability and store them into CFG.
478 Remove now useless prediction entries. */
479
480 static void
481 combine_predictions_for_bb (FILE *file, basic_block bb)
482 {
483 int best_probability = PROB_EVEN;
484 int best_predictor = END_PREDICTORS;
485 int combined_probability = REG_BR_PROB_BASE / 2;
486 int d;
487 bool first_match = false;
488 bool found = false;
489 struct edge_prediction *pred;
490 int nedges = 0;
491 edge e, first = NULL, second = NULL;
492 edge_iterator ei;
493
494 FOR_EACH_EDGE (e, ei, bb->succs)
495 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
496 {
497 nedges ++;
498 if (first && !second)
499 second = e;
500 if (!first)
501 first = e;
502 }
503
504 /* When there is no successor or only one choice, prediction is easy.
505
506 We are lazy for now and predict only basic blocks with two outgoing
507 edges. It is possible to predict generic case too, but we have to
508 ignore first match heuristics and do more involved combining. Implement
509 this later. */
510 if (nedges != 2)
511 {
512 if (!bb->count)
513 set_even_probabilities (bb);
514 bb->predictions = NULL;
515 if (file)
516 fprintf (file, "%i edges in bb %i predicted to even probabilities\n",
517 nedges, bb->index);
518 return;
519 }
520
521 if (file)
522 fprintf (file, "Predictions for bb %i\n", bb->index);
523
524 /* We implement "first match" heuristics and use probability guessed
525 by predictor with smallest index. */
526 for (pred = bb->predictions; pred; pred = pred->next)
527 {
528 int predictor = pred->predictor;
529 int probability = pred->probability;
530
531 if (pred->edge != first)
532 probability = REG_BR_PROB_BASE - probability;
533
534 found = true;
535 if (best_predictor > predictor)
536 best_probability = probability, best_predictor = predictor;
537
538 d = (combined_probability * probability
539 + (REG_BR_PROB_BASE - combined_probability)
540 * (REG_BR_PROB_BASE - probability));
541
542 /* Use FP math to avoid overflows of 32bit integers. */
543 if (d == 0)
544 /* If one probability is 0% and one 100%, avoid division by zero. */
545 combined_probability = REG_BR_PROB_BASE / 2;
546 else
547 combined_probability = (((double) combined_probability) * probability
548 * REG_BR_PROB_BASE / d + 0.5);
549 }
550
551 /* Decide which heuristic to use. In case we didn't match anything,
552 use no_prediction heuristic, in case we did match, use either
553 first match or Dempster-Shaffer theory depending on the flags. */
554
555 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
556 first_match = true;
557
558 if (!found)
559 dump_prediction (file, PRED_NO_PREDICTION, combined_probability, bb, true);
560 else
561 {
562 dump_prediction (file, PRED_DS_THEORY, combined_probability, bb,
563 !first_match);
564 dump_prediction (file, PRED_FIRST_MATCH, best_probability, bb,
565 first_match);
566 }
567
568 if (first_match)
569 combined_probability = best_probability;
570 dump_prediction (file, PRED_COMBINED, combined_probability, bb, true);
571
572 for (pred = bb->predictions; pred; pred = pred->next)
573 {
574 int predictor = pred->predictor;
575 int probability = pred->probability;
576
577 if (pred->edge != EDGE_SUCC (bb, 0))
578 probability = REG_BR_PROB_BASE - probability;
579 dump_prediction (file, predictor, probability, bb,
580 !first_match || best_predictor == predictor);
581 }
582 bb->predictions = NULL;
583
584 if (!bb->count)
585 {
586 first->probability = combined_probability;
587 second->probability = REG_BR_PROB_BASE - combined_probability;
588 }
589 }
590
591 /* Predict edge probabilities by exploiting loop structure.
592 When RTLSIMPLELOOPS is set, attempt to count number of iterations by analyzing
593 RTL otherwise use tree based approach. */
594 static void
595 predict_loops (struct loops *loops_info, bool rtlsimpleloops)
596 {
597 unsigned i;
598
599 if (!rtlsimpleloops)
600 scev_initialize (loops_info);
601
602 /* Try to predict out blocks in a loop that are not part of a
603 natural loop. */
604 for (i = 1; i < loops_info->num; i++)
605 {
606 basic_block bb, *bbs;
607 unsigned j;
608 unsigned n_exits;
609 struct loop *loop = loops_info->parray[i];
610 struct niter_desc desc;
611 unsigned HOST_WIDE_INT niter;
612 edge *exits;
613
614 exits = get_loop_exit_edges (loop, &n_exits);
615
616 if (rtlsimpleloops)
617 {
618 iv_analysis_loop_init (loop);
619 find_simple_exit (loop, &desc);
620
621 if (desc.simple_p && desc.const_iter)
622 {
623 int prob;
624 niter = desc.niter + 1;
625 if (niter == 0) /* We might overflow here. */
626 niter = desc.niter;
627 if (niter > MAX_PRED_LOOP_ITERATIONS)
628 niter = MAX_PRED_LOOP_ITERATIONS;
629
630 prob = (REG_BR_PROB_BASE
631 - (REG_BR_PROB_BASE + niter /2) / niter);
632 /* Branch prediction algorithm gives 0 frequency for everything
633 after the end of loop for loop having 0 probability to finish. */
634 if (prob == REG_BR_PROB_BASE)
635 prob = REG_BR_PROB_BASE - 1;
636 predict_edge (desc.in_edge, PRED_LOOP_ITERATIONS,
637 prob);
638 }
639 }
640 else
641 {
642 struct tree_niter_desc niter_desc;
643
644 for (j = 0; j < n_exits; j++)
645 {
646 tree niter = NULL;
647
648 if (number_of_iterations_exit (loop, exits[j], &niter_desc, false))
649 niter = niter_desc.niter;
650 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
651 niter = loop_niter_by_eval (loop, exits[j]);
652
653 if (TREE_CODE (niter) == INTEGER_CST)
654 {
655 int probability;
656 if (host_integerp (niter, 1)
657 && tree_int_cst_lt (niter,
658 build_int_cstu (NULL_TREE,
659 MAX_PRED_LOOP_ITERATIONS - 1)))
660 {
661 HOST_WIDE_INT nitercst = tree_low_cst (niter, 1) + 1;
662 probability = ((REG_BR_PROB_BASE + nitercst / 2)
663 / nitercst);
664 }
665 else
666 probability = ((REG_BR_PROB_BASE
667 + MAX_PRED_LOOP_ITERATIONS / 2)
668 / MAX_PRED_LOOP_ITERATIONS);
669
670 predict_edge (exits[j], PRED_LOOP_ITERATIONS, probability);
671 }
672 }
673
674 }
675 free (exits);
676
677 bbs = get_loop_body (loop);
678
679 for (j = 0; j < loop->num_nodes; j++)
680 {
681 int header_found = 0;
682 edge e;
683 edge_iterator ei;
684
685 bb = bbs[j];
686
687 /* Bypass loop heuristics on continue statement. These
688 statements construct loops via "non-loop" constructs
689 in the source language and are better to be handled
690 separately. */
691 if ((rtlsimpleloops && !can_predict_insn_p (BB_END (bb)))
692 || predicted_by_p (bb, PRED_CONTINUE))
693 continue;
694
695 /* Loop branch heuristics - predict an edge back to a
696 loop's head as taken. */
697 if (bb == loop->latch)
698 {
699 e = find_edge (loop->latch, loop->header);
700 if (e)
701 {
702 header_found = 1;
703 predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
704 }
705 }
706
707 /* Loop exit heuristics - predict an edge exiting the loop if the
708 conditional has no loop header successors as not taken. */
709 if (!header_found)
710 FOR_EACH_EDGE (e, ei, bb->succs)
711 if (e->dest->index < 0
712 || !flow_bb_inside_loop_p (loop, e->dest))
713 predict_edge
714 (e, PRED_LOOP_EXIT,
715 (REG_BR_PROB_BASE
716 - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
717 / n_exits);
718 }
719
720 /* Free basic blocks from get_loop_body. */
721 free (bbs);
722 }
723
724 if (!rtlsimpleloops)
725 {
726 scev_finalize ();
727 current_loops = NULL;
728 }
729 }
730
731 /* Attempt to predict probabilities of BB outgoing edges using local
732 properties. */
733 static void
734 bb_estimate_probability_locally (basic_block bb)
735 {
736 rtx last_insn = BB_END (bb);
737 rtx cond;
738
739 if (! can_predict_insn_p (last_insn))
740 return;
741 cond = get_condition (last_insn, NULL, false, false);
742 if (! cond)
743 return;
744
745 /* Try "pointer heuristic."
746 A comparison ptr == 0 is predicted as false.
747 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
748 if (COMPARISON_P (cond)
749 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
750 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
751 {
752 if (GET_CODE (cond) == EQ)
753 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
754 else if (GET_CODE (cond) == NE)
755 predict_insn_def (last_insn, PRED_POINTER, TAKEN);
756 }
757 else
758
759 /* Try "opcode heuristic."
760 EQ tests are usually false and NE tests are usually true. Also,
761 most quantities are positive, so we can make the appropriate guesses
762 about signed comparisons against zero. */
763 switch (GET_CODE (cond))
764 {
765 case CONST_INT:
766 /* Unconditional branch. */
767 predict_insn_def (last_insn, PRED_UNCONDITIONAL,
768 cond == const0_rtx ? NOT_TAKEN : TAKEN);
769 break;
770
771 case EQ:
772 case UNEQ:
773 /* Floating point comparisons appears to behave in a very
774 unpredictable way because of special role of = tests in
775 FP code. */
776 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
777 ;
778 /* Comparisons with 0 are often used for booleans and there is
779 nothing useful to predict about them. */
780 else if (XEXP (cond, 1) == const0_rtx
781 || XEXP (cond, 0) == const0_rtx)
782 ;
783 else
784 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
785 break;
786
787 case NE:
788 case LTGT:
789 /* Floating point comparisons appears to behave in a very
790 unpredictable way because of special role of = tests in
791 FP code. */
792 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
793 ;
794 /* Comparisons with 0 are often used for booleans and there is
795 nothing useful to predict about them. */
796 else if (XEXP (cond, 1) == const0_rtx
797 || XEXP (cond, 0) == const0_rtx)
798 ;
799 else
800 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
801 break;
802
803 case ORDERED:
804 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
805 break;
806
807 case UNORDERED:
808 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
809 break;
810
811 case LE:
812 case LT:
813 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
814 || XEXP (cond, 1) == constm1_rtx)
815 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
816 break;
817
818 case GE:
819 case GT:
820 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
821 || XEXP (cond, 1) == constm1_rtx)
822 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
823 break;
824
825 default:
826 break;
827 }
828 }
829
830 /* Statically estimate the probability that a branch will be taken and produce
831 estimated profile. When profile feedback is present never executed portions
832 of function gets estimated. */
833
834 void
835 estimate_probability (struct loops *loops_info)
836 {
837 basic_block bb;
838
839 connect_infinite_loops_to_exit ();
840 calculate_dominance_info (CDI_DOMINATORS);
841 calculate_dominance_info (CDI_POST_DOMINATORS);
842
843 predict_loops (loops_info, true);
844
845 iv_analysis_done ();
846
847 /* Attempt to predict conditional jumps using a number of heuristics. */
848 FOR_EACH_BB (bb)
849 {
850 rtx last_insn = BB_END (bb);
851 edge e;
852 edge_iterator ei;
853
854 if (! can_predict_insn_p (last_insn))
855 continue;
856
857 FOR_EACH_EDGE (e, ei, bb->succs)
858 {
859 /* Predict early returns to be probable, as we've already taken
860 care for error returns and other are often used for fast paths
861 trought function. */
862 if ((e->dest == EXIT_BLOCK_PTR
863 || (single_succ_p (e->dest)
864 && single_succ (e->dest) == EXIT_BLOCK_PTR))
865 && !predicted_by_p (bb, PRED_NULL_RETURN)
866 && !predicted_by_p (bb, PRED_CONST_RETURN)
867 && !predicted_by_p (bb, PRED_NEGATIVE_RETURN)
868 && !last_basic_block_p (e->dest))
869 predict_edge_def (e, PRED_EARLY_RETURN, TAKEN);
870
871 /* Look for block we are guarding (i.e. we dominate it,
872 but it doesn't postdominate us). */
873 if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
874 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
875 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
876 {
877 rtx insn;
878
879 /* The call heuristic claims that a guarded function call
880 is improbable. This is because such calls are often used
881 to signal exceptional situations such as printing error
882 messages. */
883 for (insn = BB_HEAD (e->dest); insn != NEXT_INSN (BB_END (e->dest));
884 insn = NEXT_INSN (insn))
885 if (CALL_P (insn)
886 /* Constant and pure calls are hardly used to signalize
887 something exceptional. */
888 && ! CONST_OR_PURE_CALL_P (insn))
889 {
890 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
891 break;
892 }
893 }
894 }
895 bb_estimate_probability_locally (bb);
896 }
897
898 /* Attach the combined probability to each conditional jump. */
899 FOR_EACH_BB (bb)
900 combine_predictions_for_insn (BB_END (bb), bb);
901
902 remove_fake_edges ();
903 estimate_bb_frequencies (loops_info);
904 free_dominance_info (CDI_POST_DOMINATORS);
905 if (profile_status == PROFILE_ABSENT)
906 profile_status = PROFILE_GUESSED;
907 }
908
909 /* Set edge->probability for each successor edge of BB. */
910 void
911 guess_outgoing_edge_probabilities (basic_block bb)
912 {
913 bb_estimate_probability_locally (bb);
914 combine_predictions_for_insn (BB_END (bb), bb);
915 }
916 \f
917 /* Return constant EXPR will likely have at execution time, NULL if unknown.
918 The function is used by builtin_expect branch predictor so the evidence
919 must come from this construct and additional possible constant folding.
920
921 We may want to implement more involved value guess (such as value range
922 propagation based prediction), but such tricks shall go to new
923 implementation. */
924
925 static tree
926 expr_expected_value (tree expr, bitmap visited)
927 {
928 if (TREE_CONSTANT (expr))
929 return expr;
930 else if (TREE_CODE (expr) == SSA_NAME)
931 {
932 tree def = SSA_NAME_DEF_STMT (expr);
933
934 /* If we were already here, break the infinite cycle. */
935 if (bitmap_bit_p (visited, SSA_NAME_VERSION (expr)))
936 return NULL;
937 bitmap_set_bit (visited, SSA_NAME_VERSION (expr));
938
939 if (TREE_CODE (def) == PHI_NODE)
940 {
941 /* All the arguments of the PHI node must have the same constant
942 length. */
943 int i;
944 tree val = NULL, new_val;
945
946 for (i = 0; i < PHI_NUM_ARGS (def); i++)
947 {
948 tree arg = PHI_ARG_DEF (def, i);
949
950 /* If this PHI has itself as an argument, we cannot
951 determine the string length of this argument. However,
952 if we can find an expected constant value for the other
953 PHI args then we can still be sure that this is
954 likely a constant. So be optimistic and just
955 continue with the next argument. */
956 if (arg == PHI_RESULT (def))
957 continue;
958
959 new_val = expr_expected_value (arg, visited);
960 if (!new_val)
961 return NULL;
962 if (!val)
963 val = new_val;
964 else if (!operand_equal_p (val, new_val, false))
965 return NULL;
966 }
967 return val;
968 }
969 if (TREE_CODE (def) != MODIFY_EXPR || TREE_OPERAND (def, 0) != expr)
970 return NULL;
971 return expr_expected_value (TREE_OPERAND (def, 1), visited);
972 }
973 else if (TREE_CODE (expr) == CALL_EXPR)
974 {
975 tree decl = get_callee_fndecl (expr);
976 if (!decl)
977 return NULL;
978 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL
979 && DECL_FUNCTION_CODE (decl) == BUILT_IN_EXPECT)
980 {
981 tree arglist = TREE_OPERAND (expr, 1);
982 tree val;
983
984 if (arglist == NULL_TREE
985 || TREE_CHAIN (arglist) == NULL_TREE)
986 return NULL;
987 val = TREE_VALUE (TREE_CHAIN (TREE_OPERAND (expr, 1)));
988 if (TREE_CONSTANT (val))
989 return val;
990 return TREE_VALUE (TREE_CHAIN (TREE_OPERAND (expr, 1)));
991 }
992 }
993 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
994 {
995 tree op0, op1, res;
996 op0 = expr_expected_value (TREE_OPERAND (expr, 0), visited);
997 if (!op0)
998 return NULL;
999 op1 = expr_expected_value (TREE_OPERAND (expr, 1), visited);
1000 if (!op1)
1001 return NULL;
1002 res = fold_build2 (TREE_CODE (expr), TREE_TYPE (expr), op0, op1);
1003 if (TREE_CONSTANT (res))
1004 return res;
1005 return NULL;
1006 }
1007 if (UNARY_CLASS_P (expr))
1008 {
1009 tree op0, res;
1010 op0 = expr_expected_value (TREE_OPERAND (expr, 0), visited);
1011 if (!op0)
1012 return NULL;
1013 res = fold_build1 (TREE_CODE (expr), TREE_TYPE (expr), op0);
1014 if (TREE_CONSTANT (res))
1015 return res;
1016 return NULL;
1017 }
1018 return NULL;
1019 }
1020 \f
1021 /* Get rid of all builtin_expect calls we no longer need. */
1022 static void
1023 strip_builtin_expect (void)
1024 {
1025 basic_block bb;
1026 FOR_EACH_BB (bb)
1027 {
1028 block_stmt_iterator bi;
1029 for (bi = bsi_start (bb); !bsi_end_p (bi); bsi_next (&bi))
1030 {
1031 tree stmt = bsi_stmt (bi);
1032 tree fndecl;
1033 tree arglist;
1034
1035 if (TREE_CODE (stmt) == MODIFY_EXPR
1036 && TREE_CODE (TREE_OPERAND (stmt, 1)) == CALL_EXPR
1037 && (fndecl = get_callee_fndecl (TREE_OPERAND (stmt, 1)))
1038 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
1039 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
1040 && (arglist = TREE_OPERAND (TREE_OPERAND (stmt, 1), 1))
1041 && TREE_CHAIN (arglist))
1042 {
1043 TREE_OPERAND (stmt, 1) = TREE_VALUE (arglist);
1044 update_stmt (stmt);
1045 }
1046 }
1047 }
1048 }
1049 \f
1050 /* Predict using opcode of the last statement in basic block. */
1051 static void
1052 tree_predict_by_opcode (basic_block bb)
1053 {
1054 tree stmt = last_stmt (bb);
1055 edge then_edge;
1056 tree cond;
1057 tree op0;
1058 tree type;
1059 tree val;
1060 bitmap visited;
1061 edge_iterator ei;
1062
1063 if (!stmt || TREE_CODE (stmt) != COND_EXPR)
1064 return;
1065 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1066 if (then_edge->flags & EDGE_TRUE_VALUE)
1067 break;
1068 cond = TREE_OPERAND (stmt, 0);
1069 if (!COMPARISON_CLASS_P (cond))
1070 return;
1071 op0 = TREE_OPERAND (cond, 0);
1072 type = TREE_TYPE (op0);
1073 visited = BITMAP_ALLOC (NULL);
1074 val = expr_expected_value (cond, visited);
1075 BITMAP_FREE (visited);
1076 if (val)
1077 {
1078 if (integer_zerop (val))
1079 predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, NOT_TAKEN);
1080 else
1081 predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, TAKEN);
1082 return;
1083 }
1084 /* Try "pointer heuristic."
1085 A comparison ptr == 0 is predicted as false.
1086 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1087 if (POINTER_TYPE_P (type))
1088 {
1089 if (TREE_CODE (cond) == EQ_EXPR)
1090 predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
1091 else if (TREE_CODE (cond) == NE_EXPR)
1092 predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
1093 }
1094 else
1095
1096 /* Try "opcode heuristic."
1097 EQ tests are usually false and NE tests are usually true. Also,
1098 most quantities are positive, so we can make the appropriate guesses
1099 about signed comparisons against zero. */
1100 switch (TREE_CODE (cond))
1101 {
1102 case EQ_EXPR:
1103 case UNEQ_EXPR:
1104 /* Floating point comparisons appears to behave in a very
1105 unpredictable way because of special role of = tests in
1106 FP code. */
1107 if (FLOAT_TYPE_P (type))
1108 ;
1109 /* Comparisons with 0 are often used for booleans and there is
1110 nothing useful to predict about them. */
1111 else if (integer_zerop (op0)
1112 || integer_zerop (TREE_OPERAND (cond, 1)))
1113 ;
1114 else
1115 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
1116 break;
1117
1118 case NE_EXPR:
1119 case LTGT_EXPR:
1120 /* Floating point comparisons appears to behave in a very
1121 unpredictable way because of special role of = tests in
1122 FP code. */
1123 if (FLOAT_TYPE_P (type))
1124 ;
1125 /* Comparisons with 0 are often used for booleans and there is
1126 nothing useful to predict about them. */
1127 else if (integer_zerop (op0)
1128 || integer_zerop (TREE_OPERAND (cond, 1)))
1129 ;
1130 else
1131 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
1132 break;
1133
1134 case ORDERED_EXPR:
1135 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
1136 break;
1137
1138 case UNORDERED_EXPR:
1139 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
1140 break;
1141
1142 case LE_EXPR:
1143 case LT_EXPR:
1144 if (integer_zerop (TREE_OPERAND (cond, 1))
1145 || integer_onep (TREE_OPERAND (cond, 1))
1146 || integer_all_onesp (TREE_OPERAND (cond, 1))
1147 || real_zerop (TREE_OPERAND (cond, 1))
1148 || real_onep (TREE_OPERAND (cond, 1))
1149 || real_minus_onep (TREE_OPERAND (cond, 1)))
1150 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
1151 break;
1152
1153 case GE_EXPR:
1154 case GT_EXPR:
1155 if (integer_zerop (TREE_OPERAND (cond, 1))
1156 || integer_onep (TREE_OPERAND (cond, 1))
1157 || integer_all_onesp (TREE_OPERAND (cond, 1))
1158 || real_zerop (TREE_OPERAND (cond, 1))
1159 || real_onep (TREE_OPERAND (cond, 1))
1160 || real_minus_onep (TREE_OPERAND (cond, 1)))
1161 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
1162 break;
1163
1164 default:
1165 break;
1166 }
1167 }
1168
1169 /* Try to guess whether the value of return means error code. */
1170 static enum br_predictor
1171 return_prediction (tree val, enum prediction *prediction)
1172 {
1173 /* VOID. */
1174 if (!val)
1175 return PRED_NO_PREDICTION;
1176 /* Different heuristics for pointers and scalars. */
1177 if (POINTER_TYPE_P (TREE_TYPE (val)))
1178 {
1179 /* NULL is usually not returned. */
1180 if (integer_zerop (val))
1181 {
1182 *prediction = NOT_TAKEN;
1183 return PRED_NULL_RETURN;
1184 }
1185 }
1186 else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
1187 {
1188 /* Negative return values are often used to indicate
1189 errors. */
1190 if (TREE_CODE (val) == INTEGER_CST
1191 && tree_int_cst_sgn (val) < 0)
1192 {
1193 *prediction = NOT_TAKEN;
1194 return PRED_NEGATIVE_RETURN;
1195 }
1196 /* Constant return values seems to be commonly taken.
1197 Zero/one often represent booleans so exclude them from the
1198 heuristics. */
1199 if (TREE_CONSTANT (val)
1200 && (!integer_zerop (val) && !integer_onep (val)))
1201 {
1202 *prediction = TAKEN;
1203 return PRED_NEGATIVE_RETURN;
1204 }
1205 }
1206 return PRED_NO_PREDICTION;
1207 }
1208
1209 /* Find the basic block with return expression and look up for possible
1210 return value trying to apply RETURN_PREDICTION heuristics. */
1211 static void
1212 apply_return_prediction (int *heads)
1213 {
1214 tree return_stmt = NULL;
1215 tree return_val;
1216 edge e;
1217 tree phi;
1218 int phi_num_args, i;
1219 enum br_predictor pred;
1220 enum prediction direction;
1221 edge_iterator ei;
1222
1223 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
1224 {
1225 return_stmt = last_stmt (e->src);
1226 if (TREE_CODE (return_stmt) == RETURN_EXPR)
1227 break;
1228 }
1229 if (!e)
1230 return;
1231 return_val = TREE_OPERAND (return_stmt, 0);
1232 if (!return_val)
1233 return;
1234 if (TREE_CODE (return_val) == MODIFY_EXPR)
1235 return_val = TREE_OPERAND (return_val, 1);
1236 if (TREE_CODE (return_val) != SSA_NAME
1237 || !SSA_NAME_DEF_STMT (return_val)
1238 || TREE_CODE (SSA_NAME_DEF_STMT (return_val)) != PHI_NODE)
1239 return;
1240 for (phi = SSA_NAME_DEF_STMT (return_val); phi; phi = PHI_CHAIN (phi))
1241 if (PHI_RESULT (phi) == return_val)
1242 break;
1243 if (!phi)
1244 return;
1245 phi_num_args = PHI_NUM_ARGS (phi);
1246 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
1247
1248 /* Avoid the degenerate case where all return values form the function
1249 belongs to same category (ie they are all positive constants)
1250 so we can hardly say something about them. */
1251 for (i = 1; i < phi_num_args; i++)
1252 if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
1253 break;
1254 if (i != phi_num_args)
1255 for (i = 0; i < phi_num_args; i++)
1256 {
1257 pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
1258 if (pred != PRED_NO_PREDICTION)
1259 predict_paths_leading_to (PHI_ARG_EDGE (phi, i)->src, heads, pred,
1260 direction);
1261 }
1262 }
1263
1264 /* Look for basic block that contains unlikely to happen events
1265 (such as noreturn calls) and mark all paths leading to execution
1266 of this basic blocks as unlikely. */
1267
1268 static void
1269 tree_bb_level_predictions (void)
1270 {
1271 basic_block bb;
1272 int *heads;
1273
1274 heads = xmalloc (sizeof (int) * last_basic_block);
1275 memset (heads, -1, sizeof (int) * last_basic_block);
1276 heads[ENTRY_BLOCK_PTR->next_bb->index] = last_basic_block;
1277
1278 apply_return_prediction (heads);
1279
1280 FOR_EACH_BB (bb)
1281 {
1282 block_stmt_iterator bsi = bsi_last (bb);
1283
1284 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1285 {
1286 tree stmt = bsi_stmt (bsi);
1287 switch (TREE_CODE (stmt))
1288 {
1289 case MODIFY_EXPR:
1290 if (TREE_CODE (TREE_OPERAND (stmt, 1)) == CALL_EXPR)
1291 {
1292 stmt = TREE_OPERAND (stmt, 1);
1293 goto call_expr;
1294 }
1295 break;
1296 case CALL_EXPR:
1297 call_expr:;
1298 if (call_expr_flags (stmt) & ECF_NORETURN)
1299 predict_paths_leading_to (bb, heads, PRED_NORETURN,
1300 NOT_TAKEN);
1301 break;
1302 default:
1303 break;
1304 }
1305 }
1306 }
1307
1308 free (heads);
1309 }
1310
1311 /* Predict branch probabilities and estimate profile of the tree CFG. */
1312 static void
1313 tree_estimate_probability (void)
1314 {
1315 basic_block bb;
1316 struct loops loops_info;
1317
1318 flow_loops_find (&loops_info);
1319 if (dump_file && (dump_flags & TDF_DETAILS))
1320 flow_loops_dump (&loops_info, dump_file, NULL, 0);
1321
1322 add_noreturn_fake_exit_edges ();
1323 connect_infinite_loops_to_exit ();
1324 calculate_dominance_info (CDI_DOMINATORS);
1325 calculate_dominance_info (CDI_POST_DOMINATORS);
1326
1327 tree_bb_level_predictions ();
1328
1329 mark_irreducible_loops (&loops_info);
1330 predict_loops (&loops_info, false);
1331
1332 FOR_EACH_BB (bb)
1333 {
1334 edge e;
1335 edge_iterator ei;
1336
1337 FOR_EACH_EDGE (e, ei, bb->succs)
1338 {
1339 /* Predict early returns to be probable, as we've already taken
1340 care for error returns and other cases are often used for
1341 fast paths trought function. */
1342 if (e->dest == EXIT_BLOCK_PTR
1343 && TREE_CODE (last_stmt (bb)) == RETURN_EXPR
1344 && !single_pred_p (bb))
1345 {
1346 edge e1;
1347 edge_iterator ei1;
1348
1349 FOR_EACH_EDGE (e1, ei1, bb->preds)
1350 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
1351 && !predicted_by_p (e1->src, PRED_CONST_RETURN)
1352 && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN)
1353 && !last_basic_block_p (e1->src))
1354 predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
1355 }
1356
1357 /* Look for block we are guarding (ie we dominate it,
1358 but it doesn't postdominate us). */
1359 if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
1360 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
1361 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
1362 {
1363 block_stmt_iterator bi;
1364
1365 /* The call heuristic claims that a guarded function call
1366 is improbable. This is because such calls are often used
1367 to signal exceptional situations such as printing error
1368 messages. */
1369 for (bi = bsi_start (e->dest); !bsi_end_p (bi);
1370 bsi_next (&bi))
1371 {
1372 tree stmt = bsi_stmt (bi);
1373 if ((TREE_CODE (stmt) == CALL_EXPR
1374 || (TREE_CODE (stmt) == MODIFY_EXPR
1375 && TREE_CODE (TREE_OPERAND (stmt, 1)) == CALL_EXPR))
1376 /* Constant and pure calls are hardly used to signalize
1377 something exceptional. */
1378 && TREE_SIDE_EFFECTS (stmt))
1379 {
1380 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
1381 break;
1382 }
1383 }
1384 }
1385 }
1386 tree_predict_by_opcode (bb);
1387 }
1388 FOR_EACH_BB (bb)
1389 combine_predictions_for_bb (dump_file, bb);
1390
1391 if (!flag_loop_optimize)
1392 strip_builtin_expect ();
1393 estimate_bb_frequencies (&loops_info);
1394 free_dominance_info (CDI_POST_DOMINATORS);
1395 remove_fake_exit_edges ();
1396 flow_loops_free (&loops_info);
1397 if (dump_file && (dump_flags & TDF_DETAILS))
1398 dump_tree_cfg (dump_file, dump_flags);
1399 if (profile_status == PROFILE_ABSENT)
1400 profile_status = PROFILE_GUESSED;
1401 }
1402 \f
1403 /* __builtin_expect dropped tokens into the insn stream describing expected
1404 values of registers. Generate branch probabilities based off these
1405 values. */
1406
1407 void
1408 expected_value_to_br_prob (void)
1409 {
1410 rtx insn, cond, ev = NULL_RTX, ev_reg = NULL_RTX;
1411
1412 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
1413 {
1414 switch (GET_CODE (insn))
1415 {
1416 case NOTE:
1417 /* Look for expected value notes. */
1418 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_EXPECTED_VALUE)
1419 {
1420 ev = NOTE_EXPECTED_VALUE (insn);
1421 ev_reg = XEXP (ev, 0);
1422 delete_insn (insn);
1423 }
1424 continue;
1425
1426 case CODE_LABEL:
1427 /* Never propagate across labels. */
1428 ev = NULL_RTX;
1429 continue;
1430
1431 case JUMP_INSN:
1432 /* Look for simple conditional branches. If we haven't got an
1433 expected value yet, no point going further. */
1434 if (!JUMP_P (insn) || ev == NULL_RTX
1435 || ! any_condjump_p (insn))
1436 continue;
1437 break;
1438
1439 default:
1440 /* Look for insns that clobber the EV register. */
1441 if (ev && reg_set_p (ev_reg, insn))
1442 ev = NULL_RTX;
1443 continue;
1444 }
1445
1446 /* Collect the branch condition, hopefully relative to EV_REG. */
1447 /* ??? At present we'll miss things like
1448 (expected_value (eq r70 0))
1449 (set r71 -1)
1450 (set r80 (lt r70 r71))
1451 (set pc (if_then_else (ne r80 0) ...))
1452 as canonicalize_condition will render this to us as
1453 (lt r70, r71)
1454 Could use cselib to try and reduce this further. */
1455 cond = XEXP (SET_SRC (pc_set (insn)), 0);
1456 cond = canonicalize_condition (insn, cond, 0, NULL, ev_reg,
1457 false, false);
1458 if (! cond || XEXP (cond, 0) != ev_reg
1459 || GET_CODE (XEXP (cond, 1)) != CONST_INT)
1460 continue;
1461
1462 /* Substitute and simplify. Given that the expression we're
1463 building involves two constants, we should wind up with either
1464 true or false. */
1465 cond = gen_rtx_fmt_ee (GET_CODE (cond), VOIDmode,
1466 XEXP (ev, 1), XEXP (cond, 1));
1467 cond = simplify_rtx (cond);
1468
1469 /* Turn the condition into a scaled branch probability. */
1470 gcc_assert (cond == const_true_rtx || cond == const0_rtx);
1471 predict_insn_def (insn, PRED_BUILTIN_EXPECT,
1472 cond == const_true_rtx ? TAKEN : NOT_TAKEN);
1473 }
1474 }
1475 \f
1476 /* Check whether this is the last basic block of function. Commonly
1477 there is one extra common cleanup block. */
1478 static bool
1479 last_basic_block_p (basic_block bb)
1480 {
1481 if (bb == EXIT_BLOCK_PTR)
1482 return false;
1483
1484 return (bb->next_bb == EXIT_BLOCK_PTR
1485 || (bb->next_bb->next_bb == EXIT_BLOCK_PTR
1486 && single_succ_p (bb)
1487 && single_succ (bb)->next_bb == EXIT_BLOCK_PTR));
1488 }
1489
1490 /* Sets branch probabilities according to PREDiction and
1491 FLAGS. HEADS[bb->index] should be index of basic block in that we
1492 need to alter branch predictions (i.e. the first of our dominators
1493 such that we do not post-dominate it) (but we fill this information
1494 on demand, so -1 may be there in case this was not needed yet). */
1495
1496 static void
1497 predict_paths_leading_to (basic_block bb, int *heads, enum br_predictor pred,
1498 enum prediction taken)
1499 {
1500 edge e;
1501 edge_iterator ei;
1502 int y;
1503
1504 if (heads[bb->index] < 0)
1505 {
1506 /* This is first time we need this field in heads array; so
1507 find first dominator that we do not post-dominate (we are
1508 using already known members of heads array). */
1509 basic_block ai = bb;
1510 basic_block next_ai = get_immediate_dominator (CDI_DOMINATORS, bb);
1511 int head;
1512
1513 while (heads[next_ai->index] < 0)
1514 {
1515 if (!dominated_by_p (CDI_POST_DOMINATORS, next_ai, bb))
1516 break;
1517 heads[next_ai->index] = ai->index;
1518 ai = next_ai;
1519 next_ai = get_immediate_dominator (CDI_DOMINATORS, next_ai);
1520 }
1521 if (!dominated_by_p (CDI_POST_DOMINATORS, next_ai, bb))
1522 head = next_ai->index;
1523 else
1524 head = heads[next_ai->index];
1525 while (next_ai != bb)
1526 {
1527 next_ai = ai;
1528 if (heads[ai->index] == ENTRY_BLOCK)
1529 ai = ENTRY_BLOCK_PTR;
1530 else
1531 ai = BASIC_BLOCK (heads[ai->index]);
1532 heads[next_ai->index] = head;
1533 }
1534 }
1535 y = heads[bb->index];
1536
1537 /* Now find the edge that leads to our branch and aply the prediction. */
1538
1539 if (y == last_basic_block)
1540 return;
1541 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (y)->succs)
1542 if (e->dest->index >= 0
1543 && dominated_by_p (CDI_POST_DOMINATORS, e->dest, bb))
1544 predict_edge_def (e, pred, taken);
1545 }
1546 \f
1547 /* This is used to carry information about basic blocks. It is
1548 attached to the AUX field of the standard CFG block. */
1549
1550 typedef struct block_info_def
1551 {
1552 /* Estimated frequency of execution of basic_block. */
1553 sreal frequency;
1554
1555 /* To keep queue of basic blocks to process. */
1556 basic_block next;
1557
1558 /* Number of predecessors we need to visit first. */
1559 int npredecessors;
1560 } *block_info;
1561
1562 /* Similar information for edges. */
1563 typedef struct edge_info_def
1564 {
1565 /* In case edge is a loopback edge, the probability edge will be reached
1566 in case header is. Estimated number of iterations of the loop can be
1567 then computed as 1 / (1 - back_edge_prob). */
1568 sreal back_edge_prob;
1569 /* True if the edge is a loopback edge in the natural loop. */
1570 unsigned int back_edge:1;
1571 } *edge_info;
1572
1573 #define BLOCK_INFO(B) ((block_info) (B)->aux)
1574 #define EDGE_INFO(E) ((edge_info) (E)->aux)
1575
1576 /* Helper function for estimate_bb_frequencies.
1577 Propagate the frequencies for LOOP. */
1578
1579 static void
1580 propagate_freq (struct loop *loop, bitmap tovisit)
1581 {
1582 basic_block head = loop->header;
1583 basic_block bb;
1584 basic_block last;
1585 unsigned i;
1586 edge e;
1587 basic_block nextbb;
1588 bitmap_iterator bi;
1589
1590 /* For each basic block we need to visit count number of his predecessors
1591 we need to visit first. */
1592 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
1593 {
1594 edge_iterator ei;
1595 int count = 0;
1596
1597 /* The outermost "loop" includes the exit block, which we can not
1598 look up via BASIC_BLOCK. Detect this and use EXIT_BLOCK_PTR
1599 directly. Do the same for the entry block. */
1600 if (i == (unsigned)ENTRY_BLOCK)
1601 bb = ENTRY_BLOCK_PTR;
1602 else if (i == (unsigned)EXIT_BLOCK)
1603 bb = EXIT_BLOCK_PTR;
1604 else
1605 bb = BASIC_BLOCK (i);
1606
1607 FOR_EACH_EDGE (e, ei, bb->preds)
1608 {
1609 bool visit = bitmap_bit_p (tovisit, e->src->index);
1610
1611 if (visit && !(e->flags & EDGE_DFS_BACK))
1612 count++;
1613 else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
1614 fprintf (dump_file,
1615 "Irreducible region hit, ignoring edge to %i->%i\n",
1616 e->src->index, bb->index);
1617 }
1618 BLOCK_INFO (bb)->npredecessors = count;
1619 }
1620
1621 memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
1622 last = head;
1623 for (bb = head; bb; bb = nextbb)
1624 {
1625 edge_iterator ei;
1626 sreal cyclic_probability, frequency;
1627
1628 memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
1629 memcpy (&frequency, &real_zero, sizeof (real_zero));
1630
1631 nextbb = BLOCK_INFO (bb)->next;
1632 BLOCK_INFO (bb)->next = NULL;
1633
1634 /* Compute frequency of basic block. */
1635 if (bb != head)
1636 {
1637 #ifdef ENABLE_CHECKING
1638 FOR_EACH_EDGE (e, ei, bb->preds)
1639 gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
1640 || (e->flags & EDGE_DFS_BACK));
1641 #endif
1642
1643 FOR_EACH_EDGE (e, ei, bb->preds)
1644 if (EDGE_INFO (e)->back_edge)
1645 {
1646 sreal_add (&cyclic_probability, &cyclic_probability,
1647 &EDGE_INFO (e)->back_edge_prob);
1648 }
1649 else if (!(e->flags & EDGE_DFS_BACK))
1650 {
1651 sreal tmp;
1652
1653 /* frequency += (e->probability
1654 * BLOCK_INFO (e->src)->frequency /
1655 REG_BR_PROB_BASE); */
1656
1657 sreal_init (&tmp, e->probability, 0);
1658 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
1659 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
1660 sreal_add (&frequency, &frequency, &tmp);
1661 }
1662
1663 if (sreal_compare (&cyclic_probability, &real_zero) == 0)
1664 {
1665 memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
1666 sizeof (frequency));
1667 }
1668 else
1669 {
1670 if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
1671 {
1672 memcpy (&cyclic_probability, &real_almost_one,
1673 sizeof (real_almost_one));
1674 }
1675
1676 /* BLOCK_INFO (bb)->frequency = frequency
1677 / (1 - cyclic_probability) */
1678
1679 sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
1680 sreal_div (&BLOCK_INFO (bb)->frequency,
1681 &frequency, &cyclic_probability);
1682 }
1683 }
1684
1685 bitmap_clear_bit (tovisit, bb->index);
1686
1687 e = find_edge (bb, head);
1688 if (e)
1689 {
1690 sreal tmp;
1691
1692 /* EDGE_INFO (e)->back_edge_prob
1693 = ((e->probability * BLOCK_INFO (bb)->frequency)
1694 / REG_BR_PROB_BASE); */
1695
1696 sreal_init (&tmp, e->probability, 0);
1697 sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
1698 sreal_mul (&EDGE_INFO (e)->back_edge_prob,
1699 &tmp, &real_inv_br_prob_base);
1700 }
1701
1702 /* Propagate to successor blocks. */
1703 FOR_EACH_EDGE (e, ei, bb->succs)
1704 if (!(e->flags & EDGE_DFS_BACK)
1705 && BLOCK_INFO (e->dest)->npredecessors)
1706 {
1707 BLOCK_INFO (e->dest)->npredecessors--;
1708 if (!BLOCK_INFO (e->dest)->npredecessors)
1709 {
1710 if (!nextbb)
1711 nextbb = e->dest;
1712 else
1713 BLOCK_INFO (last)->next = e->dest;
1714
1715 last = e->dest;
1716 }
1717 }
1718 }
1719 }
1720
1721 /* Estimate probabilities of loopback edges in loops at same nest level. */
1722
1723 static void
1724 estimate_loops_at_level (struct loop *first_loop, bitmap tovisit)
1725 {
1726 struct loop *loop;
1727
1728 for (loop = first_loop; loop; loop = loop->next)
1729 {
1730 edge e;
1731 basic_block *bbs;
1732 unsigned i;
1733
1734 estimate_loops_at_level (loop->inner, tovisit);
1735
1736 /* Do not do this for dummy function loop. */
1737 if (EDGE_COUNT (loop->latch->succs) > 0)
1738 {
1739 /* Find current loop back edge and mark it. */
1740 e = loop_latch_edge (loop);
1741 EDGE_INFO (e)->back_edge = 1;
1742 }
1743
1744 bbs = get_loop_body (loop);
1745 for (i = 0; i < loop->num_nodes; i++)
1746 bitmap_set_bit (tovisit, bbs[i]->index);
1747 free (bbs);
1748 propagate_freq (loop, tovisit);
1749 }
1750 }
1751
1752 /* Convert counts measured by profile driven feedback to frequencies.
1753 Return nonzero iff there was any nonzero execution count. */
1754
1755 int
1756 counts_to_freqs (void)
1757 {
1758 gcov_type count_max, true_count_max = 0;
1759 basic_block bb;
1760
1761 FOR_EACH_BB (bb)
1762 true_count_max = MAX (bb->count, true_count_max);
1763
1764 count_max = MAX (true_count_max, 1);
1765 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1766 bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
1767 return true_count_max;
1768 }
1769
1770 /* Return true if function is likely to be expensive, so there is no point to
1771 optimize performance of prologue, epilogue or do inlining at the expense
1772 of code size growth. THRESHOLD is the limit of number of instructions
1773 function can execute at average to be still considered not expensive. */
1774
1775 bool
1776 expensive_function_p (int threshold)
1777 {
1778 unsigned int sum = 0;
1779 basic_block bb;
1780 unsigned int limit;
1781
1782 /* We can not compute accurately for large thresholds due to scaled
1783 frequencies. */
1784 gcc_assert (threshold <= BB_FREQ_MAX);
1785
1786 /* Frequencies are out of range. This either means that function contains
1787 internal loop executing more than BB_FREQ_MAX times or profile feedback
1788 is available and function has not been executed at all. */
1789 if (ENTRY_BLOCK_PTR->frequency == 0)
1790 return true;
1791
1792 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
1793 limit = ENTRY_BLOCK_PTR->frequency * threshold;
1794 FOR_EACH_BB (bb)
1795 {
1796 rtx insn;
1797
1798 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
1799 insn = NEXT_INSN (insn))
1800 if (active_insn_p (insn))
1801 {
1802 sum += bb->frequency;
1803 if (sum > limit)
1804 return true;
1805 }
1806 }
1807
1808 return false;
1809 }
1810
1811 /* Estimate basic blocks frequency by given branch probabilities. */
1812
1813 static void
1814 estimate_bb_frequencies (struct loops *loops)
1815 {
1816 basic_block bb;
1817 sreal freq_max;
1818
1819 if (!flag_branch_probabilities || !counts_to_freqs ())
1820 {
1821 static int real_values_initialized = 0;
1822 bitmap tovisit;
1823
1824 if (!real_values_initialized)
1825 {
1826 real_values_initialized = 1;
1827 sreal_init (&real_zero, 0, 0);
1828 sreal_init (&real_one, 1, 0);
1829 sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
1830 sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
1831 sreal_init (&real_one_half, 1, -1);
1832 sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
1833 sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
1834 }
1835
1836 mark_dfs_back_edges ();
1837
1838 single_succ_edge (ENTRY_BLOCK_PTR)->probability = REG_BR_PROB_BASE;
1839
1840 /* Set up block info for each basic block. */
1841 tovisit = BITMAP_ALLOC (NULL);
1842 alloc_aux_for_blocks (sizeof (struct block_info_def));
1843 alloc_aux_for_edges (sizeof (struct edge_info_def));
1844 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1845 {
1846 edge e;
1847 edge_iterator ei;
1848
1849 FOR_EACH_EDGE (e, ei, bb->succs)
1850 {
1851 sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
1852 sreal_mul (&EDGE_INFO (e)->back_edge_prob,
1853 &EDGE_INFO (e)->back_edge_prob,
1854 &real_inv_br_prob_base);
1855 }
1856 }
1857
1858 /* First compute probabilities locally for each loop from innermost
1859 to outermost to examine probabilities for back edges. */
1860 estimate_loops_at_level (loops->tree_root, tovisit);
1861
1862 memcpy (&freq_max, &real_zero, sizeof (real_zero));
1863 FOR_EACH_BB (bb)
1864 if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
1865 memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
1866
1867 sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
1868 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1869 {
1870 sreal tmp;
1871
1872 sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
1873 sreal_add (&tmp, &tmp, &real_one_half);
1874 bb->frequency = sreal_to_int (&tmp);
1875 }
1876
1877 free_aux_for_blocks ();
1878 free_aux_for_edges ();
1879 BITMAP_FREE (tovisit);
1880 }
1881 compute_function_frequency ();
1882 if (flag_reorder_functions)
1883 choose_function_section ();
1884 }
1885
1886 /* Decide whether function is hot, cold or unlikely executed. */
1887 static void
1888 compute_function_frequency (void)
1889 {
1890 basic_block bb;
1891
1892 if (!profile_info || !flag_branch_probabilities)
1893 return;
1894 cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED;
1895 FOR_EACH_BB (bb)
1896 {
1897 if (maybe_hot_bb_p (bb))
1898 {
1899 cfun->function_frequency = FUNCTION_FREQUENCY_HOT;
1900 return;
1901 }
1902 if (!probably_never_executed_bb_p (bb))
1903 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
1904 }
1905 }
1906
1907 /* Choose appropriate section for the function. */
1908 static void
1909 choose_function_section (void)
1910 {
1911 if (DECL_SECTION_NAME (current_function_decl)
1912 || !targetm.have_named_sections
1913 /* Theoretically we can split the gnu.linkonce text section too,
1914 but this requires more work as the frequency needs to match
1915 for all generated objects so we need to merge the frequency
1916 of all instances. For now just never set frequency for these. */
1917 || DECL_ONE_ONLY (current_function_decl))
1918 return;
1919
1920 /* If we are doing the partitioning optimization, let the optimization
1921 choose the correct section into which to put things. */
1922
1923 if (flag_reorder_blocks_and_partition)
1924 return;
1925
1926 if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT)
1927 DECL_SECTION_NAME (current_function_decl) =
1928 build_string (strlen (HOT_TEXT_SECTION_NAME), HOT_TEXT_SECTION_NAME);
1929 if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
1930 DECL_SECTION_NAME (current_function_decl) =
1931 build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME),
1932 UNLIKELY_EXECUTED_TEXT_SECTION_NAME);
1933 }
1934
1935 static bool
1936 gate_estimate_probability (void)
1937 {
1938 return flag_guess_branch_prob;
1939 }
1940
1941 struct tree_opt_pass pass_profile =
1942 {
1943 "profile", /* name */
1944 gate_estimate_probability, /* gate */
1945 tree_estimate_probability, /* execute */
1946 NULL, /* sub */
1947 NULL, /* next */
1948 0, /* static_pass_number */
1949 TV_BRANCH_PROB, /* tv_id */
1950 PROP_cfg, /* properties_required */
1951 0, /* properties_provided */
1952 0, /* properties_destroyed */
1953 0, /* todo_flags_start */
1954 TODO_ggc_collect | TODO_verify_ssa, /* todo_flags_finish */
1955 0 /* letter */
1956 };