predict.c (expr_expected_value, [...]): New function.
[gcc.git] / gcc / predict.c
1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
20
21 /* References:
22
23 [1] "Branch Prediction for Free"
24 Ball and Larus; PLDI '93.
25 [2] "Static Branch Frequency and Program Profile Analysis"
26 Wu and Larus; MICRO-27.
27 [3] "Corpus-based Static Branch Prediction"
28 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
29
30
31 #include "config.h"
32 #include "system.h"
33 #include "coretypes.h"
34 #include "tm.h"
35 #include "tree.h"
36 #include "rtl.h"
37 #include "tm_p.h"
38 #include "hard-reg-set.h"
39 #include "basic-block.h"
40 #include "insn-config.h"
41 #include "regs.h"
42 #include "flags.h"
43 #include "output.h"
44 #include "function.h"
45 #include "except.h"
46 #include "toplev.h"
47 #include "recog.h"
48 #include "expr.h"
49 #include "predict.h"
50 #include "coverage.h"
51 #include "sreal.h"
52 #include "params.h"
53 #include "target.h"
54 #include "cfgloop.h"
55 #include "tree-flow.h"
56 #include "ggc.h"
57 #include "tree-dump.h"
58 #include "tree-pass.h"
59 #include "timevar.h"
60 #include "tree-scalar-evolution.h"
61 #include "cfgloop.h"
62
63 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
64 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
65 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
66 real_inv_br_prob_base, real_one_half, real_bb_freq_max;
67
68 /* Random guesstimation given names. */
69 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 10 - 1)
70 #define PROB_EVEN (REG_BR_PROB_BASE / 2)
71 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
72 #define PROB_ALWAYS (REG_BR_PROB_BASE)
73
74 static void combine_predictions_for_insn (rtx, basic_block);
75 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
76 static void estimate_loops_at_level (struct loop *loop);
77 static void propagate_freq (struct loop *);
78 static void estimate_bb_frequencies (struct loops *);
79 static int counts_to_freqs (void);
80 static bool last_basic_block_p (basic_block);
81 static void compute_function_frequency (void);
82 static void choose_function_section (void);
83 static bool can_predict_insn_p (rtx);
84
85 /* Information we hold about each branch predictor.
86 Filled using information from predict.def. */
87
88 struct predictor_info
89 {
90 const char *const name; /* Name used in the debugging dumps. */
91 const int hitrate; /* Expected hitrate used by
92 predict_insn_def call. */
93 const int flags;
94 };
95
96 /* Use given predictor without Dempster-Shaffer theory if it matches
97 using first_match heuristics. */
98 #define PRED_FLAG_FIRST_MATCH 1
99
100 /* Recompute hitrate in percent to our representation. */
101
102 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
103
104 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
105 static const struct predictor_info predictor_info[]= {
106 #include "predict.def"
107
108 /* Upper bound on predictors. */
109 {NULL, 0, 0}
110 };
111 #undef DEF_PREDICTOR
112
113 /* Return true in case BB can be CPU intensive and should be optimized
114 for maximal performance. */
115
116 bool
117 maybe_hot_bb_p (basic_block bb)
118 {
119 if (profile_info && flag_branch_probabilities
120 && (bb->count
121 < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
122 return false;
123 if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
124 return false;
125 return true;
126 }
127
128 /* Return true in case BB is cold and should be optimized for size. */
129
130 bool
131 probably_cold_bb_p (basic_block bb)
132 {
133 if (profile_info && flag_branch_probabilities
134 && (bb->count
135 < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
136 return true;
137 if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
138 return true;
139 return false;
140 }
141
142 /* Return true in case BB is probably never executed. */
143 bool
144 probably_never_executed_bb_p (basic_block bb)
145 {
146 if (profile_info && flag_branch_probabilities)
147 return ((bb->count + profile_info->runs / 2) / profile_info->runs) == 0;
148 return false;
149 }
150
151 /* Return true if the one of outgoing edges is already predicted by
152 PREDICTOR. */
153
154 bool
155 rtl_predicted_by_p (basic_block bb, enum br_predictor predictor)
156 {
157 rtx note;
158 if (!INSN_P (BB_END (bb)))
159 return false;
160 for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
161 if (REG_NOTE_KIND (note) == REG_BR_PRED
162 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
163 return true;
164 return false;
165 }
166
167 /* Return true if the one of outgoing edges is already predicted by
168 PREDICTOR. */
169
170 bool
171 tree_predicted_by_p (basic_block bb, enum br_predictor predictor)
172 {
173 struct edge_prediction *i = bb_ann (bb)->predictions;
174 for (i = bb_ann (bb)->predictions; i; i = i->next)
175 if (i->predictor == predictor)
176 return true;
177 return false;
178 }
179
180 void
181 predict_insn (rtx insn, enum br_predictor predictor, int probability)
182 {
183 if (!any_condjump_p (insn))
184 abort ();
185 if (!flag_guess_branch_prob)
186 return;
187
188 REG_NOTES (insn)
189 = gen_rtx_EXPR_LIST (REG_BR_PRED,
190 gen_rtx_CONCAT (VOIDmode,
191 GEN_INT ((int) predictor),
192 GEN_INT ((int) probability)),
193 REG_NOTES (insn));
194 }
195
196 /* Predict insn by given predictor. */
197
198 void
199 predict_insn_def (rtx insn, enum br_predictor predictor,
200 enum prediction taken)
201 {
202 int probability = predictor_info[(int) predictor].hitrate;
203
204 if (taken != TAKEN)
205 probability = REG_BR_PROB_BASE - probability;
206
207 predict_insn (insn, predictor, probability);
208 }
209
210 /* Predict edge E with given probability if possible. */
211
212 void
213 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
214 {
215 rtx last_insn;
216 last_insn = BB_END (e->src);
217
218 /* We can store the branch prediction information only about
219 conditional jumps. */
220 if (!any_condjump_p (last_insn))
221 return;
222
223 /* We always store probability of branching. */
224 if (e->flags & EDGE_FALLTHRU)
225 probability = REG_BR_PROB_BASE - probability;
226
227 predict_insn (last_insn, predictor, probability);
228 }
229
230 /* Predict edge E with the given PROBABILITY. */
231 void
232 tree_predict_edge (edge e, enum br_predictor predictor, int probability)
233 {
234 struct edge_prediction *i = ggc_alloc (sizeof (struct edge_prediction));
235
236 i->next = bb_ann (e->src)->predictions;
237 bb_ann (e->src)->predictions = i;
238 i->probability = probability;
239 i->predictor = predictor;
240 i->edge = e;
241 }
242
243 /* Return true when we can store prediction on insn INSN.
244 At the moment we represent predictions only on conditional
245 jumps, not at computed jump or other complicated cases. */
246 static bool
247 can_predict_insn_p (rtx insn)
248 {
249 return (JUMP_P (insn)
250 && any_condjump_p (insn)
251 && BLOCK_FOR_INSN (insn)->succ->succ_next);
252 }
253
254 /* Predict edge E by given predictor if possible. */
255
256 void
257 predict_edge_def (edge e, enum br_predictor predictor,
258 enum prediction taken)
259 {
260 int probability = predictor_info[(int) predictor].hitrate;
261
262 if (taken != TAKEN)
263 probability = REG_BR_PROB_BASE - probability;
264
265 predict_edge (e, predictor, probability);
266 }
267
268 /* Invert all branch predictions or probability notes in the INSN. This needs
269 to be done each time we invert the condition used by the jump. */
270
271 void
272 invert_br_probabilities (rtx insn)
273 {
274 rtx note;
275
276 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
277 if (REG_NOTE_KIND (note) == REG_BR_PROB)
278 XEXP (note, 0) = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (note, 0)));
279 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
280 XEXP (XEXP (note, 0), 1)
281 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
282 }
283
284 /* Dump information about the branch prediction to the output file. */
285
286 static void
287 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
288 basic_block bb, int used)
289 {
290 edge e = bb->succ;
291
292 if (!file)
293 return;
294
295 while (e && (e->flags & EDGE_FALLTHRU))
296 e = e->succ_next;
297
298 fprintf (file, " %s heuristics%s: %.1f%%",
299 predictor_info[predictor].name,
300 used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
301
302 if (bb->count)
303 {
304 fprintf (file, " exec ");
305 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
306 if (e)
307 {
308 fprintf (file, " hit ");
309 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count);
310 fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
311 }
312 }
313
314 fprintf (file, "\n");
315 }
316
317 /* We can not predict the probabilities of outgoing edges of bb. Set them
318 evenly and hope for the best. */
319 static void
320 set_even_probabilities (basic_block bb)
321 {
322 int nedges = 0;
323 edge e;
324
325 for (e = bb->succ; e; e = e->succ_next)
326 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
327 nedges ++;
328 for (e = bb->succ; e; e = e->succ_next)
329 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
330 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
331 else
332 e->probability = 0;
333 }
334
335 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
336 note if not already present. Remove now useless REG_BR_PRED notes. */
337
338 static void
339 combine_predictions_for_insn (rtx insn, basic_block bb)
340 {
341 rtx prob_note;
342 rtx *pnote;
343 rtx note;
344 int best_probability = PROB_EVEN;
345 int best_predictor = END_PREDICTORS;
346 int combined_probability = REG_BR_PROB_BASE / 2;
347 int d;
348 bool first_match = false;
349 bool found = false;
350
351 if (!can_predict_insn_p (insn))
352 {
353 set_even_probabilities (bb);
354 return;
355 }
356
357 prob_note = find_reg_note (insn, REG_BR_PROB, 0);
358 pnote = &REG_NOTES (insn);
359 if (dump_file)
360 fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
361 bb->index);
362
363 /* We implement "first match" heuristics and use probability guessed
364 by predictor with smallest index. */
365 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
366 if (REG_NOTE_KIND (note) == REG_BR_PRED)
367 {
368 int predictor = INTVAL (XEXP (XEXP (note, 0), 0));
369 int probability = INTVAL (XEXP (XEXP (note, 0), 1));
370
371 found = true;
372 if (best_predictor > predictor)
373 best_probability = probability, best_predictor = predictor;
374
375 d = (combined_probability * probability
376 + (REG_BR_PROB_BASE - combined_probability)
377 * (REG_BR_PROB_BASE - probability));
378
379 /* Use FP math to avoid overflows of 32bit integers. */
380 if (d == 0)
381 /* If one probability is 0% and one 100%, avoid division by zero. */
382 combined_probability = REG_BR_PROB_BASE / 2;
383 else
384 combined_probability = (((double) combined_probability) * probability
385 * REG_BR_PROB_BASE / d + 0.5);
386 }
387
388 /* Decide which heuristic to use. In case we didn't match anything,
389 use no_prediction heuristic, in case we did match, use either
390 first match or Dempster-Shaffer theory depending on the flags. */
391
392 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
393 first_match = true;
394
395 if (!found)
396 dump_prediction (dump_file, PRED_NO_PREDICTION,
397 combined_probability, bb, true);
398 else
399 {
400 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
401 bb, !first_match);
402 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
403 bb, first_match);
404 }
405
406 if (first_match)
407 combined_probability = best_probability;
408 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
409
410 while (*pnote)
411 {
412 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
413 {
414 int predictor = INTVAL (XEXP (XEXP (*pnote, 0), 0));
415 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
416
417 dump_prediction (dump_file, predictor, probability, bb,
418 !first_match || best_predictor == predictor);
419 *pnote = XEXP (*pnote, 1);
420 }
421 else
422 pnote = &XEXP (*pnote, 1);
423 }
424
425 if (!prob_note)
426 {
427 REG_NOTES (insn)
428 = gen_rtx_EXPR_LIST (REG_BR_PROB,
429 GEN_INT (combined_probability), REG_NOTES (insn));
430
431 /* Save the prediction into CFG in case we are seeing non-degenerated
432 conditional jump. */
433 if (bb->succ->succ_next)
434 {
435 BRANCH_EDGE (bb)->probability = combined_probability;
436 FALLTHRU_EDGE (bb)->probability
437 = REG_BR_PROB_BASE - combined_probability;
438 }
439 }
440 }
441
442 /* Combine predictions into single probability and store them into CFG.
443 Remove now useless prediction entries. */
444
445 static void
446 combine_predictions_for_bb (FILE *file, basic_block bb)
447 {
448 int best_probability = PROB_EVEN;
449 int best_predictor = END_PREDICTORS;
450 int combined_probability = REG_BR_PROB_BASE / 2;
451 int d;
452 bool first_match = false;
453 bool found = false;
454 struct edge_prediction *pred;
455 int nedges = 0;
456 edge e, first = NULL, second = NULL;
457
458 for (e = bb->succ; e; e = e->succ_next)
459 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
460 {
461 nedges ++;
462 if (first && !second)
463 second = e;
464 if (!first)
465 first = e;
466 }
467
468 /* When there is no successor or only one choice, prediction is easy.
469
470 We are lazy for now and predict only basic blocks with two outgoing
471 edges. It is possible to predict generic case too, but we have to
472 ignore first match heuristics and do more involved combining. Implement
473 this later. */
474 if (nedges != 2)
475 {
476 if (!bb->count)
477 set_even_probabilities (bb);
478 bb_ann (bb)->predictions = NULL;
479 if (file)
480 fprintf (file, "%i edges in bb %i predicted to even probabilities\n",
481 nedges, bb->index);
482 return;
483 }
484
485 if (file)
486 fprintf (file, "Predictions for bb %i\n", bb->index);
487
488 /* We implement "first match" heuristics and use probability guessed
489 by predictor with smallest index. */
490 for (pred = bb_ann (bb)->predictions; pred; pred = pred->next)
491 {
492 int predictor = pred->predictor;
493 int probability = pred->probability;
494
495 if (pred->edge != first)
496 probability = REG_BR_PROB_BASE - probability;
497
498 found = true;
499 if (best_predictor > predictor)
500 best_probability = probability, best_predictor = predictor;
501
502 d = (combined_probability * probability
503 + (REG_BR_PROB_BASE - combined_probability)
504 * (REG_BR_PROB_BASE - probability));
505
506 /* Use FP math to avoid overflows of 32bit integers. */
507 if (d == 0)
508 /* If one probability is 0% and one 100%, avoid division by zero. */
509 combined_probability = REG_BR_PROB_BASE / 2;
510 else
511 combined_probability = (((double) combined_probability) * probability
512 * REG_BR_PROB_BASE / d + 0.5);
513 }
514
515 /* Decide which heuristic to use. In case we didn't match anything,
516 use no_prediction heuristic, in case we did match, use either
517 first match or Dempster-Shaffer theory depending on the flags. */
518
519 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
520 first_match = true;
521
522 if (!found)
523 dump_prediction (file, PRED_NO_PREDICTION, combined_probability, bb, true);
524 else
525 {
526 dump_prediction (file, PRED_DS_THEORY, combined_probability, bb,
527 !first_match);
528 dump_prediction (file, PRED_FIRST_MATCH, best_probability, bb,
529 first_match);
530 }
531
532 if (first_match)
533 combined_probability = best_probability;
534 dump_prediction (file, PRED_COMBINED, combined_probability, bb, true);
535
536 for (pred = bb_ann (bb)->predictions; pred; pred = pred->next)
537 {
538 int predictor = pred->predictor;
539 int probability = pred->probability;
540
541 if (pred->edge != bb->succ)
542 probability = REG_BR_PROB_BASE - probability;
543 dump_prediction (file, predictor, probability, bb,
544 !first_match || best_predictor == predictor);
545 }
546 bb_ann (bb)->predictions = NULL;
547
548 if (!bb->count)
549 {
550 first->probability = combined_probability;
551 second->probability = REG_BR_PROB_BASE - combined_probability;
552 }
553 }
554
555 /* Predict edge probabilities by exploiting loop structure.
556 When RTLSIMPLELOOPS is set, attempt to count number of iterations by analyzing
557 RTL otherwise use tree based approach. */
558 static void
559 predict_loops (struct loops *loops_info, bool rtlsimpleloops)
560 {
561 unsigned i;
562
563 if (!rtlsimpleloops)
564 scev_initialize (loops_info);
565
566 /* Try to predict out blocks in a loop that are not part of a
567 natural loop. */
568 for (i = 1; i < loops_info->num; i++)
569 {
570 basic_block bb, *bbs;
571 unsigned j;
572 int exits;
573 struct loop *loop = loops_info->parray[i];
574 struct niter_desc desc;
575 unsigned HOST_WIDE_INT niter;
576
577 flow_loop_scan (loop, LOOP_EXIT_EDGES);
578 exits = loop->num_exits;
579
580 if (rtlsimpleloops)
581 {
582 iv_analysis_loop_init (loop);
583 find_simple_exit (loop, &desc);
584
585 if (desc.simple_p && desc.const_iter)
586 {
587 int prob;
588 niter = desc.niter + 1;
589 if (niter == 0) /* We might overflow here. */
590 niter = desc.niter;
591
592 prob = (REG_BR_PROB_BASE
593 - (REG_BR_PROB_BASE + niter /2) / niter);
594 /* Branch prediction algorithm gives 0 frequency for everything
595 after the end of loop for loop having 0 probability to finish. */
596 if (prob == REG_BR_PROB_BASE)
597 prob = REG_BR_PROB_BASE - 1;
598 predict_edge (desc.in_edge, PRED_LOOP_ITERATIONS,
599 prob);
600 }
601 }
602 else
603 {
604 edge *exits;
605 unsigned j, n_exits;
606 struct tree_niter_desc niter_desc;
607
608 exits = get_loop_exit_edges (loop, &n_exits);
609 for (j = 0; j < n_exits; j++)
610 {
611 tree niter = NULL;
612
613 if (number_of_iterations_exit (loop, exits[j], &niter_desc))
614 niter = niter_desc.niter;
615 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
616 niter = loop_niter_by_eval (loop, exits[j]);
617
618 if (TREE_CODE (niter) == INTEGER_CST)
619 {
620 int probability;
621 if (host_integerp (niter, 1)
622 && tree_int_cst_lt (niter,
623 build_int_cstu (NULL_TREE,
624 REG_BR_PROB_BASE - 1)))
625 {
626 HOST_WIDE_INT nitercst = tree_low_cst (niter, 1) + 1;
627 probability = (REG_BR_PROB_BASE + nitercst / 2) / nitercst;
628 }
629 else
630 probability = 1;
631
632 predict_edge (exits[j], PRED_LOOP_ITERATIONS, probability);
633 }
634 }
635
636 free (exits);
637 }
638
639 bbs = get_loop_body (loop);
640
641 for (j = 0; j < loop->num_nodes; j++)
642 {
643 int header_found = 0;
644 edge e;
645
646 bb = bbs[j];
647
648 /* Bypass loop heuristics on continue statement. These
649 statements construct loops via "non-loop" constructs
650 in the source language and are better to be handled
651 separately. */
652 if ((rtlsimpleloops && !can_predict_insn_p (BB_END (bb)))
653 || predicted_by_p (bb, PRED_CONTINUE))
654 continue;
655
656 /* Loop branch heuristics - predict an edge back to a
657 loop's head as taken. */
658 for (e = bb->succ; e; e = e->succ_next)
659 if (e->dest == loop->header
660 && e->src == loop->latch)
661 {
662 header_found = 1;
663 predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
664 }
665
666 /* Loop exit heuristics - predict an edge exiting the loop if the
667 conditional has no loop header successors as not taken. */
668 if (!header_found)
669 for (e = bb->succ; e; e = e->succ_next)
670 if (e->dest->index < 0
671 || !flow_bb_inside_loop_p (loop, e->dest))
672 predict_edge
673 (e, PRED_LOOP_EXIT,
674 (REG_BR_PROB_BASE
675 - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
676 / exits);
677 }
678
679 /* Free basic blocks from get_loop_body. */
680 free (bbs);
681 }
682
683 if (!rtlsimpleloops)
684 scev_reset ();
685 }
686
687 /* Attempt to predict probabilities of BB outgoing edges using local
688 properties. */
689 static void
690 bb_estimate_probability_locally (basic_block bb)
691 {
692 rtx last_insn = BB_END (bb);
693 rtx cond;
694
695 if (! can_predict_insn_p (last_insn))
696 return;
697 cond = get_condition (last_insn, NULL, false, false);
698 if (! cond)
699 return;
700
701 /* Try "pointer heuristic."
702 A comparison ptr == 0 is predicted as false.
703 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
704 if (COMPARISON_P (cond)
705 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
706 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
707 {
708 if (GET_CODE (cond) == EQ)
709 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
710 else if (GET_CODE (cond) == NE)
711 predict_insn_def (last_insn, PRED_POINTER, TAKEN);
712 }
713 else
714
715 /* Try "opcode heuristic."
716 EQ tests are usually false and NE tests are usually true. Also,
717 most quantities are positive, so we can make the appropriate guesses
718 about signed comparisons against zero. */
719 switch (GET_CODE (cond))
720 {
721 case CONST_INT:
722 /* Unconditional branch. */
723 predict_insn_def (last_insn, PRED_UNCONDITIONAL,
724 cond == const0_rtx ? NOT_TAKEN : TAKEN);
725 break;
726
727 case EQ:
728 case UNEQ:
729 /* Floating point comparisons appears to behave in a very
730 unpredictable way because of special role of = tests in
731 FP code. */
732 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
733 ;
734 /* Comparisons with 0 are often used for booleans and there is
735 nothing useful to predict about them. */
736 else if (XEXP (cond, 1) == const0_rtx
737 || XEXP (cond, 0) == const0_rtx)
738 ;
739 else
740 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
741 break;
742
743 case NE:
744 case LTGT:
745 /* Floating point comparisons appears to behave in a very
746 unpredictable way because of special role of = tests in
747 FP code. */
748 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
749 ;
750 /* Comparisons with 0 are often used for booleans and there is
751 nothing useful to predict about them. */
752 else if (XEXP (cond, 1) == const0_rtx
753 || XEXP (cond, 0) == const0_rtx)
754 ;
755 else
756 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
757 break;
758
759 case ORDERED:
760 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
761 break;
762
763 case UNORDERED:
764 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
765 break;
766
767 case LE:
768 case LT:
769 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
770 || XEXP (cond, 1) == constm1_rtx)
771 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
772 break;
773
774 case GE:
775 case GT:
776 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
777 || XEXP (cond, 1) == constm1_rtx)
778 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
779 break;
780
781 default:
782 break;
783 }
784 }
785
786 /* Statically estimate the probability that a branch will be taken and produce
787 estimated profile. When profile feedback is present never executed portions
788 of function gets estimated. */
789
790 void
791 estimate_probability (struct loops *loops_info)
792 {
793 basic_block bb;
794
795 connect_infinite_loops_to_exit ();
796 calculate_dominance_info (CDI_DOMINATORS);
797 calculate_dominance_info (CDI_POST_DOMINATORS);
798
799 predict_loops (loops_info, true);
800
801 iv_analysis_done ();
802
803 /* Attempt to predict conditional jumps using a number of heuristics. */
804 FOR_EACH_BB (bb)
805 {
806 rtx last_insn = BB_END (bb);
807 edge e;
808
809 if (! can_predict_insn_p (last_insn))
810 continue;
811
812 for (e = bb->succ; e; e = e->succ_next)
813 {
814 /* Predict early returns to be probable, as we've already taken
815 care for error returns and other are often used for fast paths
816 trought function. */
817 if ((e->dest == EXIT_BLOCK_PTR
818 || (e->dest->succ && !e->dest->succ->succ_next
819 && e->dest->succ->dest == EXIT_BLOCK_PTR))
820 && !predicted_by_p (bb, PRED_NULL_RETURN)
821 && !predicted_by_p (bb, PRED_CONST_RETURN)
822 && !predicted_by_p (bb, PRED_NEGATIVE_RETURN)
823 && !last_basic_block_p (e->dest))
824 predict_edge_def (e, PRED_EARLY_RETURN, TAKEN);
825
826 /* Look for block we are guarding (ie we dominate it,
827 but it doesn't postdominate us). */
828 if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
829 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
830 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
831 {
832 rtx insn;
833
834 /* The call heuristic claims that a guarded function call
835 is improbable. This is because such calls are often used
836 to signal exceptional situations such as printing error
837 messages. */
838 for (insn = BB_HEAD (e->dest); insn != NEXT_INSN (BB_END (e->dest));
839 insn = NEXT_INSN (insn))
840 if (CALL_P (insn)
841 /* Constant and pure calls are hardly used to signalize
842 something exceptional. */
843 && ! CONST_OR_PURE_CALL_P (insn))
844 {
845 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
846 break;
847 }
848 }
849 }
850 bb_estimate_probability_locally (bb);
851 }
852
853 /* Attach the combined probability to each conditional jump. */
854 FOR_EACH_BB (bb)
855 if (JUMP_P (BB_END (bb))
856 && any_condjump_p (BB_END (bb))
857 && bb->succ->succ_next != NULL)
858 combine_predictions_for_insn (BB_END (bb), bb);
859
860 remove_fake_exit_edges ();
861 /* Fill in the probability values in flowgraph based on the REG_BR_PROB
862 notes. */
863 FOR_EACH_BB (bb)
864 {
865 rtx last_insn = BB_END (bb);
866
867 if (!can_predict_insn_p (last_insn))
868 {
869 /* We can predict only conditional jumps at the moment.
870 Expect each edge to be equally probable.
871 ?? In the future we want to make abnormal edges improbable. */
872 int nedges = 0;
873 edge e;
874
875 for (e = bb->succ; e; e = e->succ_next)
876 {
877 nedges++;
878 if (e->probability != 0)
879 break;
880 }
881 if (!e)
882 for (e = bb->succ; e; e = e->succ_next)
883 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
884 }
885 }
886 estimate_bb_frequencies (loops_info);
887 free_dominance_info (CDI_POST_DOMINATORS);
888 if (profile_status == PROFILE_ABSENT)
889 profile_status = PROFILE_GUESSED;
890 }
891
892 /* Set edge->probability for each successor edge of BB. */
893 void
894 guess_outgoing_edge_probabilities (basic_block bb)
895 {
896 bb_estimate_probability_locally (bb);
897 combine_predictions_for_insn (BB_END (bb), bb);
898 }
899 \f
900 /* Return constant EXPR will likely have at execution time, NULL if unknown.
901 The function is used by builtin_expect branch predictor so the evidence
902 must come from this construct and additional possible constant folding.
903
904 We may want to implement more involved value guess (such as value range
905 propagation based prediction), but such tricks shall go to new
906 implementation. */
907
908 static tree
909 expr_expected_value (tree expr, bitmap visited)
910 {
911 if (TREE_CONSTANT (expr))
912 return expr;
913 else if (TREE_CODE (expr) == SSA_NAME)
914 {
915 tree def = SSA_NAME_DEF_STMT (expr);
916
917 /* If we were already here, break the infinite cycle. */
918 if (bitmap_bit_p (visited, SSA_NAME_VERSION (expr)))
919 return NULL;
920 bitmap_set_bit (visited, SSA_NAME_VERSION (expr));
921
922 if (TREE_CODE (def) == PHI_NODE)
923 {
924 /* All the arguments of the PHI node must have the same constant
925 length. */
926 int i;
927 tree val = NULL, new_val;
928
929 for (i = 0; i < PHI_NUM_ARGS (def); i++)
930 {
931 tree arg = PHI_ARG_DEF (def, i);
932
933 /* If this PHI has itself as an argument, we cannot
934 determine the string length of this argument. However,
935 if we can find a expectd constant value for the other
936 PHI args then we can still be sure that this is
937 likely a constant. So be optimistic and just
938 continue with the next argument. */
939 if (arg == PHI_RESULT (def))
940 continue;
941
942 new_val = expr_expected_value (arg, visited);
943 if (!new_val)
944 return NULL;
945 if (!val)
946 val = new_val;
947 else if (!operand_equal_p (val, new_val, false))
948 return NULL;
949 }
950 return val;
951 }
952 if (TREE_CODE (def) != MODIFY_EXPR || TREE_OPERAND (def, 0) != expr)
953 return NULL;
954 return expr_expected_value (TREE_OPERAND (def, 1), visited);
955 }
956 else if (TREE_CODE (expr) == CALL_EXPR)
957 {
958 tree decl = get_callee_fndecl (expr);
959 if (!decl)
960 return NULL;
961 if (DECL_BUILT_IN (decl) && DECL_FUNCTION_CODE (decl) == BUILT_IN_EXPECT)
962 {
963 tree arglist = TREE_OPERAND (expr, 1);
964 tree val;
965
966 if (arglist == NULL_TREE
967 || TREE_CHAIN (arglist) == NULL_TREE)
968 return NULL;
969 val = TREE_VALUE (TREE_CHAIN (TREE_OPERAND (expr, 1)));
970 if (TREE_CONSTANT (val))
971 return val;
972 return TREE_VALUE (TREE_CHAIN (TREE_OPERAND (expr, 1)));
973 }
974 }
975 if (TREE_CODE_CLASS (TREE_CODE (expr)) == '2'
976 || TREE_CODE_CLASS (TREE_CODE (expr)) == '<')
977 {
978 tree op0, op1, res;
979 op0 = expr_expected_value (TREE_OPERAND (expr, 0), visited);
980 if (!op0)
981 return NULL;
982 op1 = expr_expected_value (TREE_OPERAND (expr, 1), visited);
983 if (!op1)
984 return NULL;
985 res = fold (build (TREE_CODE (expr), TREE_TYPE (expr), op0, op1));
986 if (TREE_CONSTANT (res))
987 return res;
988 return NULL;
989 }
990 if (TREE_CODE_CLASS (TREE_CODE (expr)) == '1')
991 {
992 tree op0, res;
993 op0 = expr_expected_value (TREE_OPERAND (expr, 0), visited);
994 if (!op0)
995 return NULL;
996 res = fold (build1 (TREE_CODE (expr), TREE_TYPE (expr), op0));
997 if (TREE_CONSTANT (res))
998 return res;
999 return NULL;
1000 }
1001 return NULL;
1002 }
1003 \f
1004 /* Get rid of all builtin_expect calls we no longer need. */
1005 static void
1006 strip_builtin_expect (void)
1007 {
1008 basic_block bb;
1009 FOR_EACH_BB (bb)
1010 {
1011 block_stmt_iterator bi;
1012 for (bi = bsi_start (bb); !bsi_end_p (bi); bsi_next (&bi))
1013 {
1014 tree stmt = bsi_stmt (bi);
1015 tree fndecl;
1016 tree arglist;
1017
1018 if (TREE_CODE (stmt) == MODIFY_EXPR
1019 && TREE_CODE (TREE_OPERAND (stmt, 1)) == CALL_EXPR
1020 && (fndecl = get_callee_fndecl (TREE_OPERAND (stmt, 1)))
1021 && DECL_BUILT_IN (fndecl)
1022 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
1023 && (arglist = TREE_OPERAND (TREE_OPERAND (stmt, 1), 1))
1024 && TREE_CHAIN (arglist))
1025 {
1026 TREE_OPERAND (stmt, 1) = TREE_VALUE (arglist);
1027 modify_stmt (stmt);
1028 }
1029 }
1030 }
1031 }
1032 \f
1033 /* Predict using opcode of the last statement in basic block. */
1034 static void
1035 tree_predict_by_opcode (basic_block bb)
1036 {
1037 tree stmt = last_stmt (bb);
1038 edge then_edge;
1039 tree cond;
1040 tree op0;
1041 tree type;
1042 tree val;
1043 bitmap visited;
1044
1045 if (!stmt || TREE_CODE (stmt) != COND_EXPR)
1046 return;
1047 for (then_edge = bb->succ; then_edge; then_edge = then_edge->succ_next)
1048 if (then_edge->flags & EDGE_TRUE_VALUE)
1049 break;
1050 cond = TREE_OPERAND (stmt, 0);
1051 if (TREE_CODE_CLASS (TREE_CODE (cond)) != '<')
1052 return;
1053 op0 = TREE_OPERAND (cond, 0);
1054 type = TREE_TYPE (op0);
1055 visited = BITMAP_XMALLOC ();
1056 val = expr_expected_value (cond, visited);
1057 BITMAP_XFREE (visited);
1058 if (val)
1059 {
1060 if (integer_zerop (val))
1061 predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, NOT_TAKEN);
1062 else
1063 predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, TAKEN);
1064 return;
1065 }
1066 /* Try "pointer heuristic."
1067 A comparison ptr == 0 is predicted as false.
1068 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1069 if (POINTER_TYPE_P (type))
1070 {
1071 if (TREE_CODE (cond) == EQ_EXPR)
1072 predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
1073 else if (TREE_CODE (cond) == NE_EXPR)
1074 predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
1075 }
1076 else
1077
1078 /* Try "opcode heuristic."
1079 EQ tests are usually false and NE tests are usually true. Also,
1080 most quantities are positive, so we can make the appropriate guesses
1081 about signed comparisons against zero. */
1082 switch (TREE_CODE (cond))
1083 {
1084 case EQ_EXPR:
1085 case UNEQ_EXPR:
1086 /* Floating point comparisons appears to behave in a very
1087 unpredictable way because of special role of = tests in
1088 FP code. */
1089 if (FLOAT_TYPE_P (type))
1090 ;
1091 /* Comparisons with 0 are often used for booleans and there is
1092 nothing useful to predict about them. */
1093 else if (integer_zerop (op0)
1094 || integer_zerop (TREE_OPERAND (cond, 1)))
1095 ;
1096 else
1097 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
1098 break;
1099
1100 case NE_EXPR:
1101 case LTGT_EXPR:
1102 /* Floating point comparisons appears to behave in a very
1103 unpredictable way because of special role of = tests in
1104 FP code. */
1105 if (FLOAT_TYPE_P (type))
1106 ;
1107 /* Comparisons with 0 are often used for booleans and there is
1108 nothing useful to predict about them. */
1109 else if (integer_zerop (op0)
1110 || integer_zerop (TREE_OPERAND (cond, 1)))
1111 ;
1112 else
1113 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
1114 break;
1115
1116 case ORDERED_EXPR:
1117 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
1118 break;
1119
1120 case UNORDERED_EXPR:
1121 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
1122 break;
1123
1124 case LE_EXPR:
1125 case LT_EXPR:
1126 if (integer_zerop (TREE_OPERAND (cond, 1))
1127 || integer_onep (TREE_OPERAND (cond, 1))
1128 || integer_all_onesp (TREE_OPERAND (cond, 1))
1129 || real_zerop (TREE_OPERAND (cond, 1))
1130 || real_onep (TREE_OPERAND (cond, 1))
1131 || real_minus_onep (TREE_OPERAND (cond, 1)))
1132 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
1133 break;
1134
1135 case GE_EXPR:
1136 case GT_EXPR:
1137 if (integer_zerop (TREE_OPERAND (cond, 1))
1138 || integer_onep (TREE_OPERAND (cond, 1))
1139 || integer_all_onesp (TREE_OPERAND (cond, 1))
1140 || real_zerop (TREE_OPERAND (cond, 1))
1141 || real_onep (TREE_OPERAND (cond, 1))
1142 || real_minus_onep (TREE_OPERAND (cond, 1)))
1143 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
1144 break;
1145
1146 default:
1147 break;
1148 }
1149 }
1150
1151 /* Predict branch probabilities and estimate profile of the tree CFG. */
1152 static void
1153 tree_estimate_probability (void)
1154 {
1155 basic_block bb;
1156 struct loops loops_info;
1157
1158 flow_loops_find (&loops_info, LOOP_TREE);
1159 if (dump_file && (dump_flags & TDF_DETAILS))
1160 flow_loops_dump (&loops_info, dump_file, NULL, 0);
1161
1162 connect_infinite_loops_to_exit ();
1163 calculate_dominance_info (CDI_DOMINATORS);
1164 calculate_dominance_info (CDI_POST_DOMINATORS);
1165
1166 predict_loops (&loops_info, false);
1167
1168 FOR_EACH_BB (bb)
1169 {
1170 edge e;
1171
1172 for (e = bb->succ; e; e = e->succ_next)
1173 {
1174 /* Predict early returns to be probable, as we've already taken
1175 care for error returns and other are often used for fast paths
1176 trought function. */
1177 if ((e->dest == EXIT_BLOCK_PTR
1178 || (e->dest->succ && !e->dest->succ->succ_next
1179 && e->dest->succ->dest == EXIT_BLOCK_PTR))
1180 && !predicted_by_p (bb, PRED_NULL_RETURN)
1181 && !predicted_by_p (bb, PRED_CONST_RETURN)
1182 && !predicted_by_p (bb, PRED_NEGATIVE_RETURN)
1183 && !last_basic_block_p (e->dest))
1184 predict_edge_def (e, PRED_EARLY_RETURN, TAKEN);
1185
1186 /* Look for block we are guarding (ie we dominate it,
1187 but it doesn't postdominate us). */
1188 if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
1189 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
1190 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
1191 {
1192 block_stmt_iterator bi;
1193
1194 /* The call heuristic claims that a guarded function call
1195 is improbable. This is because such calls are often used
1196 to signal exceptional situations such as printing error
1197 messages. */
1198 for (bi = bsi_start (e->dest); !bsi_end_p (bi);
1199 bsi_next (&bi))
1200 {
1201 tree stmt = bsi_stmt (bi);
1202 if ((TREE_CODE (stmt) == CALL_EXPR
1203 || (TREE_CODE (stmt) == MODIFY_EXPR
1204 && TREE_CODE (TREE_OPERAND (stmt, 1)) == CALL_EXPR))
1205 /* Constant and pure calls are hardly used to signalize
1206 something exceptional. */
1207 && TREE_SIDE_EFFECTS (stmt))
1208 {
1209 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
1210 break;
1211 }
1212 }
1213 }
1214 }
1215 tree_predict_by_opcode (bb);
1216 }
1217 FOR_EACH_BB (bb)
1218 combine_predictions_for_bb (dump_file, bb);
1219
1220 if (0) /* FIXME: Enable once we are pass down the profile to RTL level. */
1221 strip_builtin_expect ();
1222 estimate_bb_frequencies (&loops_info);
1223 free_dominance_info (CDI_POST_DOMINATORS);
1224 remove_fake_exit_edges ();
1225 flow_loops_free (&loops_info);
1226 if (dump_file && (dump_flags & TDF_DETAILS))
1227 dump_tree_cfg (dump_file, dump_flags);
1228 if (profile_status == PROFILE_ABSENT)
1229 profile_status = PROFILE_GUESSED;
1230 }
1231 \f
1232 /* __builtin_expect dropped tokens into the insn stream describing expected
1233 values of registers. Generate branch probabilities based off these
1234 values. */
1235
1236 void
1237 expected_value_to_br_prob (void)
1238 {
1239 rtx insn, cond, ev = NULL_RTX, ev_reg = NULL_RTX;
1240
1241 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
1242 {
1243 switch (GET_CODE (insn))
1244 {
1245 case NOTE:
1246 /* Look for expected value notes. */
1247 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_EXPECTED_VALUE)
1248 {
1249 ev = NOTE_EXPECTED_VALUE (insn);
1250 ev_reg = XEXP (ev, 0);
1251 delete_insn (insn);
1252 }
1253 continue;
1254
1255 case CODE_LABEL:
1256 /* Never propagate across labels. */
1257 ev = NULL_RTX;
1258 continue;
1259
1260 case JUMP_INSN:
1261 /* Look for simple conditional branches. If we haven't got an
1262 expected value yet, no point going further. */
1263 if (!JUMP_P (insn) || ev == NULL_RTX
1264 || ! any_condjump_p (insn))
1265 continue;
1266 break;
1267
1268 default:
1269 /* Look for insns that clobber the EV register. */
1270 if (ev && reg_set_p (ev_reg, insn))
1271 ev = NULL_RTX;
1272 continue;
1273 }
1274
1275 /* Collect the branch condition, hopefully relative to EV_REG. */
1276 /* ??? At present we'll miss things like
1277 (expected_value (eq r70 0))
1278 (set r71 -1)
1279 (set r80 (lt r70 r71))
1280 (set pc (if_then_else (ne r80 0) ...))
1281 as canonicalize_condition will render this to us as
1282 (lt r70, r71)
1283 Could use cselib to try and reduce this further. */
1284 cond = XEXP (SET_SRC (pc_set (insn)), 0);
1285 cond = canonicalize_condition (insn, cond, 0, NULL, ev_reg,
1286 false, false);
1287 if (! cond || XEXP (cond, 0) != ev_reg
1288 || GET_CODE (XEXP (cond, 1)) != CONST_INT)
1289 continue;
1290
1291 /* Substitute and simplify. Given that the expression we're
1292 building involves two constants, we should wind up with either
1293 true or false. */
1294 cond = gen_rtx_fmt_ee (GET_CODE (cond), VOIDmode,
1295 XEXP (ev, 1), XEXP (cond, 1));
1296 cond = simplify_rtx (cond);
1297
1298 /* Turn the condition into a scaled branch probability. */
1299 if (cond != const_true_rtx && cond != const0_rtx)
1300 abort ();
1301 predict_insn_def (insn, PRED_BUILTIN_EXPECT,
1302 cond == const_true_rtx ? TAKEN : NOT_TAKEN);
1303 }
1304 }
1305 \f
1306 /* Check whether this is the last basic block of function. Commonly
1307 there is one extra common cleanup block. */
1308 static bool
1309 last_basic_block_p (basic_block bb)
1310 {
1311 if (bb == EXIT_BLOCK_PTR)
1312 return false;
1313
1314 return (bb->next_bb == EXIT_BLOCK_PTR
1315 || (bb->next_bb->next_bb == EXIT_BLOCK_PTR
1316 && bb->succ && !bb->succ->succ_next
1317 && bb->succ->dest->next_bb == EXIT_BLOCK_PTR));
1318 }
1319 \f
1320 /* This is used to carry information about basic blocks. It is
1321 attached to the AUX field of the standard CFG block. */
1322
1323 typedef struct block_info_def
1324 {
1325 /* Estimated frequency of execution of basic_block. */
1326 sreal frequency;
1327
1328 /* To keep queue of basic blocks to process. */
1329 basic_block next;
1330
1331 /* True if block needs to be visited in propagate_freq. */
1332 unsigned int tovisit:1;
1333
1334 /* Number of predecessors we need to visit first. */
1335 int npredecessors;
1336 } *block_info;
1337
1338 /* Similar information for edges. */
1339 typedef struct edge_info_def
1340 {
1341 /* In case edge is an loopback edge, the probability edge will be reached
1342 in case header is. Estimated number of iterations of the loop can be
1343 then computed as 1 / (1 - back_edge_prob). */
1344 sreal back_edge_prob;
1345 /* True if the edge is an loopback edge in the natural loop. */
1346 unsigned int back_edge:1;
1347 } *edge_info;
1348
1349 #define BLOCK_INFO(B) ((block_info) (B)->aux)
1350 #define EDGE_INFO(E) ((edge_info) (E)->aux)
1351
1352 /* Helper function for estimate_bb_frequencies.
1353 Propagate the frequencies for LOOP. */
1354
1355 static void
1356 propagate_freq (struct loop *loop)
1357 {
1358 basic_block head = loop->header;
1359 basic_block bb;
1360 basic_block last;
1361 edge e;
1362 basic_block nextbb;
1363
1364 /* For each basic block we need to visit count number of his predecessors
1365 we need to visit first. */
1366 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1367 {
1368 if (BLOCK_INFO (bb)->tovisit)
1369 {
1370 int count = 0;
1371
1372 for (e = bb->pred; e; e = e->pred_next)
1373 if (BLOCK_INFO (e->src)->tovisit && !(e->flags & EDGE_DFS_BACK))
1374 count++;
1375 else if (BLOCK_INFO (e->src)->tovisit
1376 && dump_file && !EDGE_INFO (e)->back_edge)
1377 fprintf (dump_file,
1378 "Irreducible region hit, ignoring edge to %i->%i\n",
1379 e->src->index, bb->index);
1380 BLOCK_INFO (bb)->npredecessors = count;
1381 }
1382 }
1383
1384 memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
1385 last = head;
1386 for (bb = head; bb; bb = nextbb)
1387 {
1388 sreal cyclic_probability, frequency;
1389
1390 memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
1391 memcpy (&frequency, &real_zero, sizeof (real_zero));
1392
1393 nextbb = BLOCK_INFO (bb)->next;
1394 BLOCK_INFO (bb)->next = NULL;
1395
1396 /* Compute frequency of basic block. */
1397 if (bb != head)
1398 {
1399 #ifdef ENABLE_CHECKING
1400 for (e = bb->pred; e; e = e->pred_next)
1401 if (BLOCK_INFO (e->src)->tovisit && !(e->flags & EDGE_DFS_BACK))
1402 abort ();
1403 #endif
1404
1405 for (e = bb->pred; e; e = e->pred_next)
1406 if (EDGE_INFO (e)->back_edge)
1407 {
1408 sreal_add (&cyclic_probability, &cyclic_probability,
1409 &EDGE_INFO (e)->back_edge_prob);
1410 }
1411 else if (!(e->flags & EDGE_DFS_BACK))
1412 {
1413 sreal tmp;
1414
1415 /* frequency += (e->probability
1416 * BLOCK_INFO (e->src)->frequency /
1417 REG_BR_PROB_BASE); */
1418
1419 sreal_init (&tmp, e->probability, 0);
1420 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
1421 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
1422 sreal_add (&frequency, &frequency, &tmp);
1423 }
1424
1425 if (sreal_compare (&cyclic_probability, &real_zero) == 0)
1426 {
1427 memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
1428 sizeof (frequency));
1429 }
1430 else
1431 {
1432 if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
1433 {
1434 memcpy (&cyclic_probability, &real_almost_one,
1435 sizeof (real_almost_one));
1436 }
1437
1438 /* BLOCK_INFO (bb)->frequency = frequency
1439 / (1 - cyclic_probability) */
1440
1441 sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
1442 sreal_div (&BLOCK_INFO (bb)->frequency,
1443 &frequency, &cyclic_probability);
1444 }
1445 }
1446
1447 BLOCK_INFO (bb)->tovisit = 0;
1448
1449 /* Compute back edge frequencies. */
1450 for (e = bb->succ; e; e = e->succ_next)
1451 if (e->dest == head)
1452 {
1453 sreal tmp;
1454
1455 /* EDGE_INFO (e)->back_edge_prob
1456 = ((e->probability * BLOCK_INFO (bb)->frequency)
1457 / REG_BR_PROB_BASE); */
1458
1459 sreal_init (&tmp, e->probability, 0);
1460 sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
1461 sreal_mul (&EDGE_INFO (e)->back_edge_prob,
1462 &tmp, &real_inv_br_prob_base);
1463 }
1464
1465 /* Propagate to successor blocks. */
1466 for (e = bb->succ; e; e = e->succ_next)
1467 if (!(e->flags & EDGE_DFS_BACK)
1468 && BLOCK_INFO (e->dest)->npredecessors)
1469 {
1470 BLOCK_INFO (e->dest)->npredecessors--;
1471 if (!BLOCK_INFO (e->dest)->npredecessors)
1472 {
1473 if (!nextbb)
1474 nextbb = e->dest;
1475 else
1476 BLOCK_INFO (last)->next = e->dest;
1477
1478 last = e->dest;
1479 }
1480 }
1481 }
1482 }
1483
1484 /* Estimate probabilities of loopback edges in loops at same nest level. */
1485
1486 static void
1487 estimate_loops_at_level (struct loop *first_loop)
1488 {
1489 struct loop *loop;
1490
1491 for (loop = first_loop; loop; loop = loop->next)
1492 {
1493 edge e;
1494 basic_block *bbs;
1495 unsigned i;
1496
1497 estimate_loops_at_level (loop->inner);
1498
1499 if (loop->latch->succ) /* Do not do this for dummy function loop. */
1500 {
1501 /* Find current loop back edge and mark it. */
1502 e = loop_latch_edge (loop);
1503 EDGE_INFO (e)->back_edge = 1;
1504 }
1505
1506 bbs = get_loop_body (loop);
1507 for (i = 0; i < loop->num_nodes; i++)
1508 BLOCK_INFO (bbs[i])->tovisit = 1;
1509 free (bbs);
1510 propagate_freq (loop);
1511 }
1512 }
1513
1514 /* Convert counts measured by profile driven feedback to frequencies.
1515 Return nonzero iff there was any nonzero execution count. */
1516
1517 static int
1518 counts_to_freqs (void)
1519 {
1520 gcov_type count_max, true_count_max = 0;
1521 basic_block bb;
1522
1523 FOR_EACH_BB (bb)
1524 true_count_max = MAX (bb->count, true_count_max);
1525
1526 count_max = MAX (true_count_max, 1);
1527 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1528 bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
1529 return true_count_max;
1530 }
1531
1532 /* Return true if function is likely to be expensive, so there is no point to
1533 optimize performance of prologue, epilogue or do inlining at the expense
1534 of code size growth. THRESHOLD is the limit of number of instructions
1535 function can execute at average to be still considered not expensive. */
1536
1537 bool
1538 expensive_function_p (int threshold)
1539 {
1540 unsigned int sum = 0;
1541 basic_block bb;
1542 unsigned int limit;
1543
1544 /* We can not compute accurately for large thresholds due to scaled
1545 frequencies. */
1546 if (threshold > BB_FREQ_MAX)
1547 abort ();
1548
1549 /* Frequencies are out of range. This either means that function contains
1550 internal loop executing more than BB_FREQ_MAX times or profile feedback
1551 is available and function has not been executed at all. */
1552 if (ENTRY_BLOCK_PTR->frequency == 0)
1553 return true;
1554
1555 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
1556 limit = ENTRY_BLOCK_PTR->frequency * threshold;
1557 FOR_EACH_BB (bb)
1558 {
1559 rtx insn;
1560
1561 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
1562 insn = NEXT_INSN (insn))
1563 if (active_insn_p (insn))
1564 {
1565 sum += bb->frequency;
1566 if (sum > limit)
1567 return true;
1568 }
1569 }
1570
1571 return false;
1572 }
1573
1574 /* Estimate basic blocks frequency by given branch probabilities. */
1575
1576 static void
1577 estimate_bb_frequencies (struct loops *loops)
1578 {
1579 basic_block bb;
1580 sreal freq_max;
1581
1582 if (!flag_branch_probabilities || !counts_to_freqs ())
1583 {
1584 static int real_values_initialized = 0;
1585
1586 if (!real_values_initialized)
1587 {
1588 real_values_initialized = 1;
1589 sreal_init (&real_zero, 0, 0);
1590 sreal_init (&real_one, 1, 0);
1591 sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
1592 sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
1593 sreal_init (&real_one_half, 1, -1);
1594 sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
1595 sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
1596 }
1597
1598 mark_dfs_back_edges ();
1599
1600 ENTRY_BLOCK_PTR->succ->probability = REG_BR_PROB_BASE;
1601
1602 /* Set up block info for each basic block. */
1603 alloc_aux_for_blocks (sizeof (struct block_info_def));
1604 alloc_aux_for_edges (sizeof (struct edge_info_def));
1605 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1606 {
1607 edge e;
1608
1609 BLOCK_INFO (bb)->tovisit = 0;
1610 for (e = bb->succ; e; e = e->succ_next)
1611 {
1612 sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
1613 sreal_mul (&EDGE_INFO (e)->back_edge_prob,
1614 &EDGE_INFO (e)->back_edge_prob,
1615 &real_inv_br_prob_base);
1616 }
1617 }
1618
1619 /* First compute probabilities locally for each loop from innermost
1620 to outermost to examine probabilities for back edges. */
1621 estimate_loops_at_level (loops->tree_root);
1622
1623 memcpy (&freq_max, &real_zero, sizeof (real_zero));
1624 FOR_EACH_BB (bb)
1625 if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
1626 memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
1627
1628 sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
1629 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1630 {
1631 sreal tmp;
1632
1633 sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
1634 sreal_add (&tmp, &tmp, &real_one_half);
1635 bb->frequency = sreal_to_int (&tmp);
1636 }
1637
1638 free_aux_for_blocks ();
1639 free_aux_for_edges ();
1640 }
1641 compute_function_frequency ();
1642 if (flag_reorder_functions)
1643 choose_function_section ();
1644 }
1645
1646 /* Decide whether function is hot, cold or unlikely executed. */
1647 static void
1648 compute_function_frequency (void)
1649 {
1650 basic_block bb;
1651
1652 if (!profile_info || !flag_branch_probabilities)
1653 return;
1654 cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED;
1655 FOR_EACH_BB (bb)
1656 {
1657 if (maybe_hot_bb_p (bb))
1658 {
1659 cfun->function_frequency = FUNCTION_FREQUENCY_HOT;
1660 return;
1661 }
1662 if (!probably_never_executed_bb_p (bb))
1663 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
1664 }
1665 }
1666
1667 /* Choose appropriate section for the function. */
1668 static void
1669 choose_function_section (void)
1670 {
1671 if (DECL_SECTION_NAME (current_function_decl)
1672 || !targetm.have_named_sections
1673 /* Theoretically we can split the gnu.linkonce text section too,
1674 but this requires more work as the frequency needs to match
1675 for all generated objects so we need to merge the frequency
1676 of all instances. For now just never set frequency for these. */
1677 || DECL_ONE_ONLY (current_function_decl))
1678 return;
1679
1680 /* If we are doing the partitioning optimization, let the optimization
1681 choose the correct section into which to put things. */
1682
1683 if (flag_reorder_blocks_and_partition)
1684 return;
1685
1686 if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT)
1687 DECL_SECTION_NAME (current_function_decl) =
1688 build_string (strlen (HOT_TEXT_SECTION_NAME), HOT_TEXT_SECTION_NAME);
1689 if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
1690 DECL_SECTION_NAME (current_function_decl) =
1691 build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME),
1692 UNLIKELY_EXECUTED_TEXT_SECTION_NAME);
1693 }
1694
1695
1696 struct tree_opt_pass pass_profile =
1697 {
1698 "profile", /* name */
1699 NULL, /* gate */
1700 tree_estimate_probability, /* execute */
1701 NULL, /* sub */
1702 NULL, /* next */
1703 0, /* static_pass_number */
1704 TV_BRANCH_PROB, /* tv_id */
1705 PROP_cfg, /* properties_required */
1706 0, /* properties_provided */
1707 0, /* properties_destroyed */
1708 0, /* todo_flags_start */
1709 TODO_ggc_collect | TODO_verify_ssa, /* todo_flags_finish */
1710 0 /* letter */
1711 };