physmem.c (physmem_total): Use getsysinfo on Tru64 UNIX.
[gcc.git] / gcc / predict.c
1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
20
21 /* References:
22
23 [1] "Branch Prediction for Free"
24 Ball and Larus; PLDI '93.
25 [2] "Static Branch Frequency and Program Profile Analysis"
26 Wu and Larus; MICRO-27.
27 [3] "Corpus-based Static Branch Prediction"
28 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
29
30
31 #include "config.h"
32 #include "system.h"
33 #include "coretypes.h"
34 #include "tm.h"
35 #include "tree.h"
36 #include "rtl.h"
37 #include "tm_p.h"
38 #include "hard-reg-set.h"
39 #include "basic-block.h"
40 #include "insn-config.h"
41 #include "regs.h"
42 #include "flags.h"
43 #include "output.h"
44 #include "function.h"
45 #include "except.h"
46 #include "toplev.h"
47 #include "recog.h"
48 #include "expr.h"
49 #include "predict.h"
50 #include "profile.h"
51 #include "sreal.h"
52 #include "params.h"
53 #include "target.h"
54 #include "loop.h"
55 #include "cfgloop.h"
56
57 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
58 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
59 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
60 real_inv_br_prob_base, real_one_half, real_bb_freq_max;
61
62 /* Random guesstimation given names. */
63 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 10 - 1)
64 #define PROB_EVEN (REG_BR_PROB_BASE / 2)
65 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
66 #define PROB_ALWAYS (REG_BR_PROB_BASE)
67
68 static bool predicted_by_p PARAMS ((basic_block,
69 enum br_predictor));
70 static void combine_predictions_for_insn PARAMS ((rtx, basic_block));
71 static void dump_prediction PARAMS ((enum br_predictor, int,
72 basic_block, int));
73 static void estimate_loops_at_level PARAMS ((struct loop *loop));
74 static void propagate_freq PARAMS ((struct loop *));
75 static void estimate_bb_frequencies PARAMS ((struct loops *));
76 static void counts_to_freqs PARAMS ((void));
77 static void process_note_predictions PARAMS ((basic_block, int *,
78 dominance_info,
79 dominance_info));
80 static void process_note_prediction PARAMS ((basic_block, int *,
81 dominance_info,
82 dominance_info, int, int));
83 static bool last_basic_block_p PARAMS ((basic_block));
84 static void compute_function_frequency PARAMS ((void));
85 static void choose_function_section PARAMS ((void));
86 static bool can_predict_insn_p PARAMS ((rtx));
87
88 /* Information we hold about each branch predictor.
89 Filled using information from predict.def. */
90
91 struct predictor_info
92 {
93 const char *const name; /* Name used in the debugging dumps. */
94 const int hitrate; /* Expected hitrate used by
95 predict_insn_def call. */
96 const int flags;
97 };
98
99 /* Use given predictor without Dempster-Shaffer theory if it matches
100 using first_match heuristics. */
101 #define PRED_FLAG_FIRST_MATCH 1
102
103 /* Recompute hitrate in percent to our representation. */
104
105 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
106
107 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
108 static const struct predictor_info predictor_info[]= {
109 #include "predict.def"
110
111 /* Upper bound on predictors. */
112 {NULL, 0, 0}
113 };
114 #undef DEF_PREDICTOR
115
116 /* Return true in case BB can be CPU intensive and should be optimized
117 for maximal performance. */
118
119 bool
120 maybe_hot_bb_p (bb)
121 basic_block bb;
122 {
123 if (profile_info.count_profiles_merged
124 && flag_branch_probabilities
125 && (bb->count
126 < profile_info.max_counter_in_program
127 / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
128 return false;
129 if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
130 return false;
131 return true;
132 }
133
134 /* Return true in case BB is cold and should be optimized for size. */
135
136 bool
137 probably_cold_bb_p (bb)
138 basic_block bb;
139 {
140 if (profile_info.count_profiles_merged
141 && flag_branch_probabilities
142 && (bb->count
143 < profile_info.max_counter_in_program
144 / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
145 return true;
146 if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
147 return true;
148 return false;
149 }
150
151 /* Return true in case BB is probably never executed. */
152 bool
153 probably_never_executed_bb_p (bb)
154 basic_block bb;
155 {
156 if (profile_info.count_profiles_merged
157 && flag_branch_probabilities)
158 return ((bb->count + profile_info.count_profiles_merged / 2)
159 / profile_info.count_profiles_merged) == 0;
160 return false;
161 }
162
163 /* Return true if the one of outgoing edges is already predicted by
164 PREDICTOR. */
165
166 static bool
167 predicted_by_p (bb, predictor)
168 basic_block bb;
169 enum br_predictor predictor;
170 {
171 rtx note;
172 if (!INSN_P (bb->end))
173 return false;
174 for (note = REG_NOTES (bb->end); note; note = XEXP (note, 1))
175 if (REG_NOTE_KIND (note) == REG_BR_PRED
176 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
177 return true;
178 return false;
179 }
180
181 void
182 predict_insn (insn, predictor, probability)
183 rtx insn;
184 int probability;
185 enum br_predictor predictor;
186 {
187 if (!any_condjump_p (insn))
188 abort ();
189
190 REG_NOTES (insn)
191 = gen_rtx_EXPR_LIST (REG_BR_PRED,
192 gen_rtx_CONCAT (VOIDmode,
193 GEN_INT ((int) predictor),
194 GEN_INT ((int) probability)),
195 REG_NOTES (insn));
196 }
197
198 /* Predict insn by given predictor. */
199
200 void
201 predict_insn_def (insn, predictor, taken)
202 rtx insn;
203 enum br_predictor predictor;
204 enum prediction taken;
205 {
206 int probability = predictor_info[(int) predictor].hitrate;
207
208 if (taken != TAKEN)
209 probability = REG_BR_PROB_BASE - probability;
210
211 predict_insn (insn, predictor, probability);
212 }
213
214 /* Predict edge E with given probability if possible. */
215
216 void
217 predict_edge (e, predictor, probability)
218 edge e;
219 int probability;
220 enum br_predictor predictor;
221 {
222 rtx last_insn;
223 last_insn = e->src->end;
224
225 /* We can store the branch prediction information only about
226 conditional jumps. */
227 if (!any_condjump_p (last_insn))
228 return;
229
230 /* We always store probability of branching. */
231 if (e->flags & EDGE_FALLTHRU)
232 probability = REG_BR_PROB_BASE - probability;
233
234 predict_insn (last_insn, predictor, probability);
235 }
236
237 /* Return true when we can store prediction on insn INSN.
238 At the moment we represent predictions only on conditional
239 jumps, not at computed jump or other complicated cases. */
240 static bool
241 can_predict_insn_p (insn)
242 rtx insn;
243 {
244 return (GET_CODE (insn) == JUMP_INSN
245 && any_condjump_p (insn)
246 && BLOCK_FOR_INSN (insn)->succ->succ_next);
247 }
248
249 /* Predict edge E by given predictor if possible. */
250
251 void
252 predict_edge_def (e, predictor, taken)
253 edge e;
254 enum br_predictor predictor;
255 enum prediction taken;
256 {
257 int probability = predictor_info[(int) predictor].hitrate;
258
259 if (taken != TAKEN)
260 probability = REG_BR_PROB_BASE - probability;
261
262 predict_edge (e, predictor, probability);
263 }
264
265 /* Invert all branch predictions or probability notes in the INSN. This needs
266 to be done each time we invert the condition used by the jump. */
267
268 void
269 invert_br_probabilities (insn)
270 rtx insn;
271 {
272 rtx note;
273
274 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
275 if (REG_NOTE_KIND (note) == REG_BR_PROB)
276 XEXP (note, 0) = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (note, 0)));
277 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
278 XEXP (XEXP (note, 0), 1)
279 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
280 }
281
282 /* Dump information about the branch prediction to the output file. */
283
284 static void
285 dump_prediction (predictor, probability, bb, used)
286 enum br_predictor predictor;
287 int probability;
288 basic_block bb;
289 int used;
290 {
291 edge e = bb->succ;
292
293 if (!rtl_dump_file)
294 return;
295
296 while (e && (e->flags & EDGE_FALLTHRU))
297 e = e->succ_next;
298
299 fprintf (rtl_dump_file, " %s heuristics%s: %.1f%%",
300 predictor_info[predictor].name,
301 used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
302
303 if (bb->count)
304 {
305 fprintf (rtl_dump_file, " exec ");
306 fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
307 if (e)
308 {
309 fprintf (rtl_dump_file, " hit ");
310 fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, e->count);
311 fprintf (rtl_dump_file, " (%.1f%%)", e->count * 100.0 / bb->count);
312 }
313 }
314
315 fprintf (rtl_dump_file, "\n");
316 }
317
318 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
319 note if not already present. Remove now useless REG_BR_PRED notes. */
320
321 static void
322 combine_predictions_for_insn (insn, bb)
323 rtx insn;
324 basic_block bb;
325 {
326 rtx prob_note = find_reg_note (insn, REG_BR_PROB, 0);
327 rtx *pnote = &REG_NOTES (insn);
328 rtx note;
329 int best_probability = PROB_EVEN;
330 int best_predictor = END_PREDICTORS;
331 int combined_probability = REG_BR_PROB_BASE / 2;
332 int d;
333 bool first_match = false;
334 bool found = false;
335
336 if (rtl_dump_file)
337 fprintf (rtl_dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
338 bb->index);
339
340 /* We implement "first match" heuristics and use probability guessed
341 by predictor with smallest index. In the future we will use better
342 probability combination techniques. */
343 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
344 if (REG_NOTE_KIND (note) == REG_BR_PRED)
345 {
346 int predictor = INTVAL (XEXP (XEXP (note, 0), 0));
347 int probability = INTVAL (XEXP (XEXP (note, 0), 1));
348
349 found = true;
350 if (best_predictor > predictor)
351 best_probability = probability, best_predictor = predictor;
352
353 d = (combined_probability * probability
354 + (REG_BR_PROB_BASE - combined_probability)
355 * (REG_BR_PROB_BASE - probability));
356
357 /* Use FP math to avoid overflows of 32bit integers. */
358 if (d == 0)
359 /* If one probability is 0% and one 100%, avoid division by zero. */
360 combined_probability = REG_BR_PROB_BASE / 2;
361 else
362 combined_probability = (((double) combined_probability) * probability
363 * REG_BR_PROB_BASE / d + 0.5);
364 }
365
366 /* Decide which heuristic to use. In case we didn't match anything,
367 use no_prediction heuristic, in case we did match, use either
368 first match or Dempster-Shaffer theory depending on the flags. */
369
370 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
371 first_match = true;
372
373 if (!found)
374 dump_prediction (PRED_NO_PREDICTION, combined_probability, bb, true);
375 else
376 {
377 dump_prediction (PRED_DS_THEORY, combined_probability, bb, !first_match);
378 dump_prediction (PRED_FIRST_MATCH, best_probability, bb, first_match);
379 }
380
381 if (first_match)
382 combined_probability = best_probability;
383 dump_prediction (PRED_COMBINED, combined_probability, bb, true);
384
385 while (*pnote)
386 {
387 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
388 {
389 int predictor = INTVAL (XEXP (XEXP (*pnote, 0), 0));
390 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
391
392 dump_prediction (predictor, probability, bb,
393 !first_match || best_predictor == predictor);
394 *pnote = XEXP (*pnote, 1);
395 }
396 else
397 pnote = &XEXP (*pnote, 1);
398 }
399
400 if (!prob_note)
401 {
402 REG_NOTES (insn)
403 = gen_rtx_EXPR_LIST (REG_BR_PROB,
404 GEN_INT (combined_probability), REG_NOTES (insn));
405
406 /* Save the prediction into CFG in case we are seeing non-degenerated
407 conditional jump. */
408 if (bb->succ->succ_next)
409 {
410 BRANCH_EDGE (bb)->probability = combined_probability;
411 FALLTHRU_EDGE (bb)->probability
412 = REG_BR_PROB_BASE - combined_probability;
413 }
414 }
415 }
416
417 /* Statically estimate the probability that a branch will be taken.
418 ??? In the next revision there will be a number of other predictors added
419 from the above references. Further, each heuristic will be factored out
420 into its own function for clarity (and to facilitate the combination of
421 predictions). */
422
423 void
424 estimate_probability (loops_info)
425 struct loops *loops_info;
426 {
427 dominance_info dominators, post_dominators;
428 basic_block bb;
429 unsigned i;
430
431 connect_infinite_loops_to_exit ();
432 dominators = calculate_dominance_info (CDI_DOMINATORS);
433 post_dominators = calculate_dominance_info (CDI_POST_DOMINATORS);
434
435 /* Try to predict out blocks in a loop that are not part of a
436 natural loop. */
437 for (i = 1; i < loops_info->num; i++)
438 {
439 basic_block bb, *bbs;
440 unsigned j;
441 int exits;
442 struct loop *loop = loops_info->parray[i];
443 struct loop_desc desc;
444 unsigned HOST_WIDE_INT niter;
445
446 flow_loop_scan (loops_info, loop, LOOP_EXIT_EDGES);
447 exits = loop->num_exits;
448
449 if (simple_loop_p (loops_info, loop, &desc)
450 && desc.const_iter)
451 {
452 int prob;
453 niter = desc.niter + 1;
454 if (niter == 0) /* We might overflow here. */
455 niter = desc.niter;
456
457 prob = (REG_BR_PROB_BASE
458 - (REG_BR_PROB_BASE + niter /2) / niter);
459 /* Branch prediction algorithm gives 0 frequency for everything
460 after the end of loop for loop having 0 probability to finish. */
461 if (prob == REG_BR_PROB_BASE)
462 prob = REG_BR_PROB_BASE - 1;
463 predict_edge (desc.in_edge, PRED_LOOP_ITERATIONS,
464 prob);
465 }
466
467 bbs = get_loop_body (loop);
468 for (j = 0; j < loop->num_nodes; j++)
469 {
470 int header_found = 0;
471 edge e;
472
473 bb = bbs[j];
474
475 /* Bypass loop heuristics on continue statement. These
476 statements construct loops via "non-loop" constructs
477 in the source language and are better to be handled
478 separately. */
479 if (!can_predict_insn_p (bb->end)
480 || predicted_by_p (bb, PRED_CONTINUE))
481 continue;
482
483 /* Loop branch heuristics - predict an edge back to a
484 loop's head as taken. */
485 for (e = bb->succ; e; e = e->succ_next)
486 if (e->dest == loop->header
487 && e->src == loop->latch)
488 {
489 header_found = 1;
490 predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
491 }
492
493 /* Loop exit heuristics - predict an edge exiting the loop if the
494 conditional has no loop header successors as not taken. */
495 if (!header_found)
496 for (e = bb->succ; e; e = e->succ_next)
497 if (e->dest->index < 0
498 || !flow_bb_inside_loop_p (loop, e->dest))
499 predict_edge
500 (e, PRED_LOOP_EXIT,
501 (REG_BR_PROB_BASE
502 - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
503 / exits);
504 }
505 }
506
507 /* Attempt to predict conditional jumps using a number of heuristics. */
508 FOR_EACH_BB (bb)
509 {
510 rtx last_insn = bb->end;
511 rtx cond, earliest;
512 edge e;
513
514 if (! can_predict_insn_p (last_insn))
515 continue;
516
517 for (e = bb->succ; e; e = e->succ_next)
518 {
519 /* Predict early returns to be probable, as we've already taken
520 care for error returns and other are often used for fast paths
521 trought function. */
522 if ((e->dest == EXIT_BLOCK_PTR
523 || (e->dest->succ && !e->dest->succ->succ_next
524 && e->dest->succ->dest == EXIT_BLOCK_PTR))
525 && !predicted_by_p (bb, PRED_NULL_RETURN)
526 && !predicted_by_p (bb, PRED_CONST_RETURN)
527 && !predicted_by_p (bb, PRED_NEGATIVE_RETURN)
528 && !last_basic_block_p (e->dest))
529 predict_edge_def (e, PRED_EARLY_RETURN, TAKEN);
530
531 /* Look for block we are guarding (ie we dominate it,
532 but it doesn't postdominate us). */
533 if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
534 && dominated_by_p (dominators, e->dest, e->src)
535 && !dominated_by_p (post_dominators, e->src, e->dest))
536 {
537 rtx insn;
538
539 /* The call heuristic claims that a guarded function call
540 is improbable. This is because such calls are often used
541 to signal exceptional situations such as printing error
542 messages. */
543 for (insn = e->dest->head; insn != NEXT_INSN (e->dest->end);
544 insn = NEXT_INSN (insn))
545 if (GET_CODE (insn) == CALL_INSN
546 /* Constant and pure calls are hardly used to signalize
547 something exceptional. */
548 && ! CONST_OR_PURE_CALL_P (insn))
549 {
550 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
551 break;
552 }
553 }
554 }
555
556 cond = get_condition (last_insn, &earliest);
557 if (! cond)
558 continue;
559
560 /* Try "pointer heuristic."
561 A comparison ptr == 0 is predicted as false.
562 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
563 if (GET_RTX_CLASS (GET_CODE (cond)) == '<'
564 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
565 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
566 {
567 if (GET_CODE (cond) == EQ)
568 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
569 else if (GET_CODE (cond) == NE)
570 predict_insn_def (last_insn, PRED_POINTER, TAKEN);
571 }
572 else
573
574 /* Try "opcode heuristic."
575 EQ tests are usually false and NE tests are usually true. Also,
576 most quantities are positive, so we can make the appropriate guesses
577 about signed comparisons against zero. */
578 switch (GET_CODE (cond))
579 {
580 case CONST_INT:
581 /* Unconditional branch. */
582 predict_insn_def (last_insn, PRED_UNCONDITIONAL,
583 cond == const0_rtx ? NOT_TAKEN : TAKEN);
584 break;
585
586 case EQ:
587 case UNEQ:
588 /* Floating point comparisons appears to behave in a very
589 unpredictable way because of special role of = tests in
590 FP code. */
591 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
592 ;
593 /* Comparisons with 0 are often used for booleans and there is
594 nothing useful to predict about them. */
595 else if (XEXP (cond, 1) == const0_rtx
596 || XEXP (cond, 0) == const0_rtx)
597 ;
598 else
599 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
600 break;
601
602 case NE:
603 case LTGT:
604 /* Floating point comparisons appears to behave in a very
605 unpredictable way because of special role of = tests in
606 FP code. */
607 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
608 ;
609 /* Comparisons with 0 are often used for booleans and there is
610 nothing useful to predict about them. */
611 else if (XEXP (cond, 1) == const0_rtx
612 || XEXP (cond, 0) == const0_rtx)
613 ;
614 else
615 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
616 break;
617
618 case ORDERED:
619 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
620 break;
621
622 case UNORDERED:
623 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
624 break;
625
626 case LE:
627 case LT:
628 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
629 || XEXP (cond, 1) == constm1_rtx)
630 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
631 break;
632
633 case GE:
634 case GT:
635 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
636 || XEXP (cond, 1) == constm1_rtx)
637 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
638 break;
639
640 default:
641 break;
642 }
643 }
644
645 /* Attach the combined probability to each conditional jump. */
646 FOR_EACH_BB (bb)
647 if (GET_CODE (bb->end) == JUMP_INSN
648 && any_condjump_p (bb->end)
649 && bb->succ->succ_next != NULL)
650 combine_predictions_for_insn (bb->end, bb);
651
652 free_dominance_info (post_dominators);
653 free_dominance_info (dominators);
654
655 remove_fake_edges ();
656 estimate_bb_frequencies (loops_info);
657 }
658 \f
659 /* __builtin_expect dropped tokens into the insn stream describing expected
660 values of registers. Generate branch probabilities based off these
661 values. */
662
663 void
664 expected_value_to_br_prob ()
665 {
666 rtx insn, cond, ev = NULL_RTX, ev_reg = NULL_RTX;
667
668 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
669 {
670 switch (GET_CODE (insn))
671 {
672 case NOTE:
673 /* Look for expected value notes. */
674 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_EXPECTED_VALUE)
675 {
676 ev = NOTE_EXPECTED_VALUE (insn);
677 ev_reg = XEXP (ev, 0);
678 delete_insn (insn);
679 }
680 continue;
681
682 case CODE_LABEL:
683 /* Never propagate across labels. */
684 ev = NULL_RTX;
685 continue;
686
687 case JUMP_INSN:
688 /* Look for simple conditional branches. If we haven't got an
689 expected value yet, no point going further. */
690 if (GET_CODE (insn) != JUMP_INSN || ev == NULL_RTX
691 || ! any_condjump_p (insn))
692 continue;
693 break;
694
695 default:
696 /* Look for insns that clobber the EV register. */
697 if (ev && reg_set_p (ev_reg, insn))
698 ev = NULL_RTX;
699 continue;
700 }
701
702 /* Collect the branch condition, hopefully relative to EV_REG. */
703 /* ??? At present we'll miss things like
704 (expected_value (eq r70 0))
705 (set r71 -1)
706 (set r80 (lt r70 r71))
707 (set pc (if_then_else (ne r80 0) ...))
708 as canonicalize_condition will render this to us as
709 (lt r70, r71)
710 Could use cselib to try and reduce this further. */
711 cond = XEXP (SET_SRC (pc_set (insn)), 0);
712 cond = canonicalize_condition (insn, cond, 0, NULL, ev_reg);
713 if (! cond || XEXP (cond, 0) != ev_reg
714 || GET_CODE (XEXP (cond, 1)) != CONST_INT)
715 continue;
716
717 /* Substitute and simplify. Given that the expression we're
718 building involves two constants, we should wind up with either
719 true or false. */
720 cond = gen_rtx_fmt_ee (GET_CODE (cond), VOIDmode,
721 XEXP (ev, 1), XEXP (cond, 1));
722 cond = simplify_rtx (cond);
723
724 /* Turn the condition into a scaled branch probability. */
725 if (cond != const_true_rtx && cond != const0_rtx)
726 abort ();
727 predict_insn_def (insn, PRED_BUILTIN_EXPECT,
728 cond == const_true_rtx ? TAKEN : NOT_TAKEN);
729 }
730 }
731 \f
732 /* Check whether this is the last basic block of function. Commonly tehre
733 is one extra common cleanup block. */
734 static bool
735 last_basic_block_p (bb)
736 basic_block bb;
737 {
738 if (bb == EXIT_BLOCK_PTR)
739 return false;
740
741 return (bb->next_bb == EXIT_BLOCK_PTR
742 || (bb->next_bb->next_bb == EXIT_BLOCK_PTR
743 && bb->succ && !bb->succ->succ_next
744 && bb->succ->dest->next_bb == EXIT_BLOCK_PTR));
745 }
746
747 /* Sets branch probabilities according to PREDiction and FLAGS. HEADS[bb->index]
748 should be index of basic block in that we need to alter branch predictions
749 (i.e. the first of our dominators such that we do not post-dominate it)
750 (but we fill this information on demand, so -1 may be there in case this
751 was not needed yet). */
752
753 static void
754 process_note_prediction (bb, heads, dominators, post_dominators, pred, flags)
755 basic_block bb;
756 int *heads;
757 dominance_info dominators;
758 dominance_info post_dominators;
759 int pred;
760 int flags;
761 {
762 edge e;
763 int y;
764 bool taken;
765
766 taken = flags & IS_TAKEN;
767
768 if (heads[bb->index] < 0)
769 {
770 /* This is first time we need this field in heads array; so
771 find first dominator that we do not post-dominate (we are
772 using already known members of heads array). */
773 basic_block ai = bb;
774 basic_block next_ai = get_immediate_dominator (dominators, bb);
775 int head;
776
777 while (heads[next_ai->index] < 0)
778 {
779 if (!dominated_by_p (post_dominators, next_ai, bb))
780 break;
781 heads[next_ai->index] = ai->index;
782 ai = next_ai;
783 next_ai = get_immediate_dominator (dominators, next_ai);
784 }
785 if (!dominated_by_p (post_dominators, next_ai, bb))
786 head = next_ai->index;
787 else
788 head = heads[next_ai->index];
789 while (next_ai != bb)
790 {
791 next_ai = ai;
792 if (heads[ai->index] == ENTRY_BLOCK)
793 ai = ENTRY_BLOCK_PTR;
794 else
795 ai = BASIC_BLOCK (heads[ai->index]);
796 heads[next_ai->index] = head;
797 }
798 }
799 y = heads[bb->index];
800
801 /* Now find the edge that leads to our branch and aply the prediction. */
802
803 if (y == last_basic_block || !can_predict_insn_p (BASIC_BLOCK (y)->end))
804 return;
805 for (e = BASIC_BLOCK (y)->succ; e; e = e->succ_next)
806 if (e->dest->index >= 0
807 && dominated_by_p (post_dominators, e->dest, bb))
808 predict_edge_def (e, pred, taken);
809 }
810
811 /* Gathers NOTE_INSN_PREDICTIONs in given basic block and turns them
812 into branch probabilities. For description of heads array, see
813 process_note_prediction. */
814
815 static void
816 process_note_predictions (bb, heads, dominators, post_dominators)
817 basic_block bb;
818 int *heads;
819 dominance_info dominators;
820 dominance_info post_dominators;
821 {
822 rtx insn;
823 edge e;
824
825 /* Additionally, we check here for blocks with no successors. */
826 int contained_noreturn_call = 0;
827 int was_bb_head = 0;
828 int noreturn_block = 1;
829
830 for (insn = bb->end; insn;
831 was_bb_head |= (insn == bb->head), insn = PREV_INSN (insn))
832 {
833 if (GET_CODE (insn) != NOTE)
834 {
835 if (was_bb_head)
836 break;
837 else
838 {
839 /* Noreturn calls cause program to exit, therefore they are
840 always predicted as not taken. */
841 if (GET_CODE (insn) == CALL_INSN
842 && find_reg_note (insn, REG_NORETURN, NULL))
843 contained_noreturn_call = 1;
844 continue;
845 }
846 }
847 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_PREDICTION)
848 {
849 int alg = (int) NOTE_PREDICTION_ALG (insn);
850 /* Process single prediction note. */
851 process_note_prediction (bb,
852 heads,
853 dominators,
854 post_dominators,
855 alg, (int) NOTE_PREDICTION_FLAGS (insn));
856 delete_insn (insn);
857 }
858 }
859 for (e = bb->succ; e; e = e->succ_next)
860 if (!(e->flags & EDGE_FAKE))
861 noreturn_block = 0;
862 if (contained_noreturn_call)
863 {
864 /* This block ended from other reasons than because of return.
865 If it is because of noreturn call, this should certainly not
866 be taken. Otherwise it is probably some error recovery. */
867 process_note_prediction (bb,
868 heads,
869 dominators,
870 post_dominators, PRED_NORETURN, NOT_TAKEN);
871 }
872 }
873
874 /* Gathers NOTE_INSN_PREDICTIONs and turns them into
875 branch probabilities. */
876
877 void
878 note_prediction_to_br_prob ()
879 {
880 basic_block bb;
881 dominance_info post_dominators, dominators;
882 int *heads;
883
884 /* To enable handling of noreturn blocks. */
885 add_noreturn_fake_exit_edges ();
886 connect_infinite_loops_to_exit ();
887
888 post_dominators = calculate_dominance_info (CDI_POST_DOMINATORS);
889 dominators = calculate_dominance_info (CDI_DOMINATORS);
890
891 heads = xmalloc (sizeof (int) * last_basic_block);
892 memset (heads, -1, sizeof (int) * last_basic_block);
893 heads[ENTRY_BLOCK_PTR->next_bb->index] = last_basic_block;
894
895 /* Process all prediction notes. */
896
897 FOR_EACH_BB (bb)
898 process_note_predictions (bb, heads, dominators, post_dominators);
899
900 free_dominance_info (post_dominators);
901 free_dominance_info (dominators);
902 free (heads);
903
904 remove_fake_edges ();
905 }
906 \f
907 /* This is used to carry information about basic blocks. It is
908 attached to the AUX field of the standard CFG block. */
909
910 typedef struct block_info_def
911 {
912 /* Estimated frequency of execution of basic_block. */
913 sreal frequency;
914
915 /* To keep queue of basic blocks to process. */
916 basic_block next;
917
918 /* True if block needs to be visited in prop_freqency. */
919 int tovisit:1;
920
921 /* Number of predecessors we need to visit first. */
922 int npredecessors;
923 } *block_info;
924
925 /* Similar information for edges. */
926 typedef struct edge_info_def
927 {
928 /* In case edge is an loopback edge, the probability edge will be reached
929 in case header is. Estimated number of iterations of the loop can be
930 then computed as 1 / (1 - back_edge_prob). */
931 sreal back_edge_prob;
932 /* True if the edge is an loopback edge in the natural loop. */
933 int back_edge:1;
934 } *edge_info;
935
936 #define BLOCK_INFO(B) ((block_info) (B)->aux)
937 #define EDGE_INFO(E) ((edge_info) (E)->aux)
938
939 /* Helper function for estimate_bb_frequencies.
940 Propagate the frequencies for LOOP. */
941
942 static void
943 propagate_freq (loop)
944 struct loop *loop;
945 {
946 basic_block head = loop->header;
947 basic_block bb;
948 basic_block last;
949 edge e;
950 basic_block nextbb;
951
952 /* For each basic block we need to visit count number of his predecessors
953 we need to visit first. */
954 FOR_EACH_BB (bb)
955 {
956 if (BLOCK_INFO (bb)->tovisit)
957 {
958 int count = 0;
959
960 for (e = bb->pred; e; e = e->pred_next)
961 if (BLOCK_INFO (e->src)->tovisit && !(e->flags & EDGE_DFS_BACK))
962 count++;
963 else if (BLOCK_INFO (e->src)->tovisit
964 && rtl_dump_file && !EDGE_INFO (e)->back_edge)
965 fprintf (rtl_dump_file,
966 "Irreducible region hit, ignoring edge to %i->%i\n",
967 e->src->index, bb->index);
968 BLOCK_INFO (bb)->npredecessors = count;
969 }
970 }
971
972 memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
973 last = head;
974 for (bb = head; bb; bb = nextbb)
975 {
976 sreal cyclic_probability, frequency;
977
978 memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
979 memcpy (&frequency, &real_zero, sizeof (real_zero));
980
981 nextbb = BLOCK_INFO (bb)->next;
982 BLOCK_INFO (bb)->next = NULL;
983
984 /* Compute frequency of basic block. */
985 if (bb != head)
986 {
987 #ifdef ENABLE_CHECKING
988 for (e = bb->pred; e; e = e->pred_next)
989 if (BLOCK_INFO (e->src)->tovisit && !(e->flags & EDGE_DFS_BACK))
990 abort ();
991 #endif
992
993 for (e = bb->pred; e; e = e->pred_next)
994 if (EDGE_INFO (e)->back_edge)
995 {
996 sreal_add (&cyclic_probability, &cyclic_probability,
997 &EDGE_INFO (e)->back_edge_prob);
998 }
999 else if (!(e->flags & EDGE_DFS_BACK))
1000 {
1001 sreal tmp;
1002
1003 /* frequency += (e->probability
1004 * BLOCK_INFO (e->src)->frequency /
1005 REG_BR_PROB_BASE); */
1006
1007 sreal_init (&tmp, e->probability, 0);
1008 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
1009 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
1010 sreal_add (&frequency, &frequency, &tmp);
1011 }
1012
1013 if (sreal_compare (&cyclic_probability, &real_zero) == 0)
1014 {
1015 memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
1016 sizeof (frequency));
1017 }
1018 else
1019 {
1020 if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
1021 {
1022 memcpy (&cyclic_probability, &real_almost_one,
1023 sizeof (real_almost_one));
1024 }
1025
1026 /* BLOCK_INFO (bb)->frequency = frequency
1027 / (1 - cyclic_probability) */
1028
1029 sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
1030 sreal_div (&BLOCK_INFO (bb)->frequency,
1031 &frequency, &cyclic_probability);
1032 }
1033 }
1034
1035 BLOCK_INFO (bb)->tovisit = 0;
1036
1037 /* Compute back edge frequencies. */
1038 for (e = bb->succ; e; e = e->succ_next)
1039 if (e->dest == head)
1040 {
1041 sreal tmp;
1042
1043 /* EDGE_INFO (e)->back_edge_prob
1044 = ((e->probability * BLOCK_INFO (bb)->frequency)
1045 / REG_BR_PROB_BASE); */
1046
1047 sreal_init (&tmp, e->probability, 0);
1048 sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
1049 sreal_mul (&EDGE_INFO (e)->back_edge_prob,
1050 &tmp, &real_inv_br_prob_base);
1051 }
1052
1053 /* Propagate to successor blocks. */
1054 for (e = bb->succ; e; e = e->succ_next)
1055 if (!(e->flags & EDGE_DFS_BACK)
1056 && BLOCK_INFO (e->dest)->npredecessors)
1057 {
1058 BLOCK_INFO (e->dest)->npredecessors--;
1059 if (!BLOCK_INFO (e->dest)->npredecessors)
1060 {
1061 if (!nextbb)
1062 nextbb = e->dest;
1063 else
1064 BLOCK_INFO (last)->next = e->dest;
1065
1066 last = e->dest;
1067 }
1068 }
1069 }
1070 }
1071
1072 /* Estimate probabilities of loopback edges in loops at same nest level. */
1073
1074 static void
1075 estimate_loops_at_level (first_loop)
1076 struct loop *first_loop;
1077 {
1078 struct loop *loop;
1079
1080 for (loop = first_loop; loop; loop = loop->next)
1081 {
1082 edge e;
1083 basic_block *bbs;
1084 unsigned i;
1085
1086 estimate_loops_at_level (loop->inner);
1087
1088 if (loop->latch->succ) /* Do not do this for dummy function loop. */
1089 {
1090 /* Find current loop back edge and mark it. */
1091 e = loop_latch_edge (loop);
1092 EDGE_INFO (e)->back_edge = 1;
1093 }
1094
1095 bbs = get_loop_body (loop);
1096 for (i = 0; i < loop->num_nodes; i++)
1097 BLOCK_INFO (bbs[i])->tovisit = 1;
1098 free (bbs);
1099 propagate_freq (loop);
1100 }
1101 }
1102
1103 /* Convert counts measured by profile driven feedback to frequencies. */
1104
1105 static void
1106 counts_to_freqs ()
1107 {
1108 gcov_type count_max = 1;
1109 basic_block bb;
1110
1111 FOR_EACH_BB (bb)
1112 count_max = MAX (bb->count, count_max);
1113
1114 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1115 bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
1116 }
1117
1118 /* Return true if function is likely to be expensive, so there is no point to
1119 optimize performance of prologue, epilogue or do inlining at the expense
1120 of code size growth. THRESHOLD is the limit of number of instructions
1121 function can execute at average to be still considered not expensive. */
1122
1123 bool
1124 expensive_function_p (threshold)
1125 int threshold;
1126 {
1127 unsigned int sum = 0;
1128 basic_block bb;
1129 unsigned int limit;
1130
1131 /* We can not compute accurately for large thresholds due to scaled
1132 frequencies. */
1133 if (threshold > BB_FREQ_MAX)
1134 abort ();
1135
1136 /* Frequencies are out of range. This either means that function contains
1137 internal loop executing more than BB_FREQ_MAX times or profile feedback
1138 is available and function has not been executed at all. */
1139 if (ENTRY_BLOCK_PTR->frequency == 0)
1140 return true;
1141
1142 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
1143 limit = ENTRY_BLOCK_PTR->frequency * threshold;
1144 FOR_EACH_BB (bb)
1145 {
1146 rtx insn;
1147
1148 for (insn = bb->head; insn != NEXT_INSN (bb->end);
1149 insn = NEXT_INSN (insn))
1150 if (active_insn_p (insn))
1151 {
1152 sum += bb->frequency;
1153 if (sum > limit)
1154 return true;
1155 }
1156 }
1157
1158 return false;
1159 }
1160
1161 /* Estimate basic blocks frequency by given branch probabilities. */
1162
1163 static void
1164 estimate_bb_frequencies (loops)
1165 struct loops *loops;
1166 {
1167 basic_block bb;
1168 sreal freq_max;
1169
1170 if (flag_branch_probabilities)
1171 counts_to_freqs ();
1172 else
1173 {
1174 sreal_init (&real_zero, 0, 0);
1175 sreal_init (&real_one, 1, 0);
1176 sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
1177 sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
1178 sreal_init (&real_one_half, 1, -1);
1179 sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
1180 sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
1181
1182 mark_dfs_back_edges ();
1183 /* Fill in the probability values in flowgraph based on the REG_BR_PROB
1184 notes. */
1185 FOR_EACH_BB (bb)
1186 {
1187 rtx last_insn = bb->end;
1188
1189 if (!can_predict_insn_p (last_insn))
1190 {
1191 /* We can predict only conditional jumps at the moment.
1192 Expect each edge to be equally probable.
1193 ?? In the future we want to make abnormal edges improbable. */
1194 int nedges = 0;
1195 edge e;
1196
1197 for (e = bb->succ; e; e = e->succ_next)
1198 {
1199 nedges++;
1200 if (e->probability != 0)
1201 break;
1202 }
1203 if (!e)
1204 for (e = bb->succ; e; e = e->succ_next)
1205 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
1206 }
1207 }
1208
1209 ENTRY_BLOCK_PTR->succ->probability = REG_BR_PROB_BASE;
1210
1211 /* Set up block info for each basic block. */
1212 alloc_aux_for_blocks (sizeof (struct block_info_def));
1213 alloc_aux_for_edges (sizeof (struct edge_info_def));
1214 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1215 {
1216 edge e;
1217
1218 BLOCK_INFO (bb)->tovisit = 0;
1219 for (e = bb->succ; e; e = e->succ_next)
1220 {
1221 sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
1222 sreal_mul (&EDGE_INFO (e)->back_edge_prob,
1223 &EDGE_INFO (e)->back_edge_prob,
1224 &real_inv_br_prob_base);
1225 }
1226 }
1227
1228 /* First compute probabilities locally for each loop from innermost
1229 to outermost to examine probabilities for back edges. */
1230 estimate_loops_at_level (loops->tree_root);
1231
1232 memcpy (&freq_max, &real_zero, sizeof (real_zero));
1233 FOR_EACH_BB (bb)
1234 if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
1235 memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
1236
1237 sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
1238 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1239 {
1240 sreal tmp;
1241
1242 sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
1243 sreal_add (&tmp, &tmp, &real_one_half);
1244 bb->frequency = sreal_to_int (&tmp);
1245 }
1246
1247 free_aux_for_blocks ();
1248 free_aux_for_edges ();
1249 }
1250 compute_function_frequency ();
1251 if (flag_reorder_functions)
1252 choose_function_section ();
1253 }
1254
1255 /* Decide whether function is hot, cold or unlikely executed. */
1256 static void
1257 compute_function_frequency ()
1258 {
1259 basic_block bb;
1260
1261 if (!profile_info.count_profiles_merged
1262 || !flag_branch_probabilities)
1263 return;
1264 cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED;
1265 FOR_EACH_BB (bb)
1266 {
1267 if (maybe_hot_bb_p (bb))
1268 {
1269 cfun->function_frequency = FUNCTION_FREQUENCY_HOT;
1270 return;
1271 }
1272 if (!probably_never_executed_bb_p (bb))
1273 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
1274 }
1275 }
1276
1277 /* Choose appropriate section for the function. */
1278 static void
1279 choose_function_section ()
1280 {
1281 if (DECL_SECTION_NAME (current_function_decl)
1282 || !targetm.have_named_sections
1283 /* Theoretically we can split the gnu.linkonce text section too,
1284 but this requires more work as the frequency needs to match
1285 for all generated objects so we need to merge the frequency
1286 of all instances. For now just never set frequency for these. */
1287 || DECL_ONE_ONLY (current_function_decl))
1288 return;
1289 if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT)
1290 DECL_SECTION_NAME (current_function_decl) =
1291 build_string (strlen (HOT_TEXT_SECTION_NAME), HOT_TEXT_SECTION_NAME);
1292 if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
1293 DECL_SECTION_NAME (current_function_decl) =
1294 build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME),
1295 UNLIKELY_EXECUTED_TEXT_SECTION_NAME);
1296 }