invoke.texi (-malign-double): Re-add lost warning.
[gcc.git] / gcc / predict.c
1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
20
21 /* References:
22
23 [1] "Branch Prediction for Free"
24 Ball and Larus; PLDI '93.
25 [2] "Static Branch Frequency and Program Profile Analysis"
26 Wu and Larus; MICRO-27.
27 [3] "Corpus-based Static Branch Prediction"
28 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
29
30
31 #include "config.h"
32 #include "system.h"
33 #include "tree.h"
34 #include "rtl.h"
35 #include "tm_p.h"
36 #include "hard-reg-set.h"
37 #include "basic-block.h"
38 #include "insn-config.h"
39 #include "regs.h"
40 #include "flags.h"
41 #include "output.h"
42 #include "function.h"
43 #include "except.h"
44 #include "toplev.h"
45 #include "recog.h"
46 #include "expr.h"
47 #include "predict.h"
48 #include "profile.h"
49 #include "real.h"
50 #include "params.h"
51 #include "target.h"
52
53 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE, 0.5,
54 REAL_BB_FREQ_MAX. */
55 static REAL_VALUE_TYPE real_zero, real_one, real_almost_one, real_br_prob_base,
56 real_one_half, real_bb_freq_max;
57
58 /* Random guesstimation given names. */
59 #define PROB_NEVER (0)
60 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 10 - 1)
61 #define PROB_UNLIKELY (REG_BR_PROB_BASE * 4 / 10 - 1)
62 #define PROB_EVEN (REG_BR_PROB_BASE / 2)
63 #define PROB_LIKELY (REG_BR_PROB_BASE - PROB_UNLIKELY)
64 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
65 #define PROB_ALWAYS (REG_BR_PROB_BASE)
66
67 static bool predicted_by_p PARAMS ((basic_block,
68 enum br_predictor));
69 static void combine_predictions_for_insn PARAMS ((rtx, basic_block));
70 static void dump_prediction PARAMS ((enum br_predictor, int,
71 basic_block, int));
72 static void estimate_loops_at_level PARAMS ((struct loop *loop));
73 static void propagate_freq PARAMS ((basic_block));
74 static void estimate_bb_frequencies PARAMS ((struct loops *));
75 static void counts_to_freqs PARAMS ((void));
76 static void process_note_predictions PARAMS ((basic_block, int *, int *,
77 sbitmap *));
78 static void process_note_prediction PARAMS ((basic_block, int *, int *,
79 sbitmap *, int, int));
80 static bool last_basic_block_p PARAMS ((basic_block));
81 static void compute_function_frequency PARAMS ((void));
82 static void choose_function_section PARAMS ((void));
83
84 /* Information we hold about each branch predictor.
85 Filled using information from predict.def. */
86
87 struct predictor_info
88 {
89 const char *const name; /* Name used in the debugging dumps. */
90 const int hitrate; /* Expected hitrate used by
91 predict_insn_def call. */
92 const int flags;
93 };
94
95 /* Use given predictor without Dempster-Shaffer theory if it matches
96 using first_match heuristics. */
97 #define PRED_FLAG_FIRST_MATCH 1
98
99 /* Recompute hitrate in percent to our representation. */
100
101 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
102
103 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
104 static const struct predictor_info predictor_info[]= {
105 #include "predict.def"
106
107 /* Upper bound on predictors. */
108 {NULL, 0, 0}
109 };
110 #undef DEF_PREDICTOR
111
112 /* Return true in case BB can be CPU intensive and should be optimized
113 for maximal perofmrance. */
114
115 bool
116 maybe_hot_bb_p (bb)
117 basic_block bb;
118 {
119 if (profile_info.count_profiles_merged
120 && flag_branch_probabilities
121 && (bb->count
122 < profile_info.max_counter_in_program
123 / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
124 return false;
125 if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
126 return false;
127 return true;
128 }
129
130 /* Return true in case BB is cold and should be optimized for size. */
131
132 bool
133 probably_cold_bb_p (bb)
134 basic_block bb;
135 {
136 if (profile_info.count_profiles_merged
137 && flag_branch_probabilities
138 && (bb->count
139 < profile_info.max_counter_in_program
140 / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
141 return true;
142 if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
143 return true;
144 return false;
145 }
146
147 /* Return true in case BB is probably never executed. */
148 bool
149 probably_never_executed_bb_p (bb)
150 basic_block bb;
151 {
152 if (profile_info.count_profiles_merged
153 && flag_branch_probabilities)
154 return ((bb->count + profile_info.count_profiles_merged / 2)
155 / profile_info.count_profiles_merged) == 0;
156 return false;
157 }
158
159 /* Return true if the one of outgoing edges is already predicted by
160 PREDICTOR. */
161
162 static bool
163 predicted_by_p (bb, predictor)
164 basic_block bb;
165 enum br_predictor predictor;
166 {
167 rtx note;
168 if (!INSN_P (bb->end))
169 return false;
170 for (note = REG_NOTES (bb->end); note; note = XEXP (note, 1))
171 if (REG_NOTE_KIND (note) == REG_BR_PRED
172 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
173 return true;
174 return false;
175 }
176
177 void
178 predict_insn (insn, predictor, probability)
179 rtx insn;
180 int probability;
181 enum br_predictor predictor;
182 {
183 if (!any_condjump_p (insn))
184 abort ();
185
186 REG_NOTES (insn)
187 = gen_rtx_EXPR_LIST (REG_BR_PRED,
188 gen_rtx_CONCAT (VOIDmode,
189 GEN_INT ((int) predictor),
190 GEN_INT ((int) probability)),
191 REG_NOTES (insn));
192 }
193
194 /* Predict insn by given predictor. */
195
196 void
197 predict_insn_def (insn, predictor, taken)
198 rtx insn;
199 enum br_predictor predictor;
200 enum prediction taken;
201 {
202 int probability = predictor_info[(int) predictor].hitrate;
203
204 if (taken != TAKEN)
205 probability = REG_BR_PROB_BASE - probability;
206
207 predict_insn (insn, predictor, probability);
208 }
209
210 /* Predict edge E with given probability if possible. */
211
212 void
213 predict_edge (e, predictor, probability)
214 edge e;
215 int probability;
216 enum br_predictor predictor;
217 {
218 rtx last_insn;
219 last_insn = e->src->end;
220
221 /* We can store the branch prediction information only about
222 conditional jumps. */
223 if (!any_condjump_p (last_insn))
224 return;
225
226 /* We always store probability of branching. */
227 if (e->flags & EDGE_FALLTHRU)
228 probability = REG_BR_PROB_BASE - probability;
229
230 predict_insn (last_insn, predictor, probability);
231 }
232
233 /* Predict edge E by given predictor if possible. */
234
235 void
236 predict_edge_def (e, predictor, taken)
237 edge e;
238 enum br_predictor predictor;
239 enum prediction taken;
240 {
241 int probability = predictor_info[(int) predictor].hitrate;
242
243 if (taken != TAKEN)
244 probability = REG_BR_PROB_BASE - probability;
245
246 predict_edge (e, predictor, probability);
247 }
248
249 /* Invert all branch predictions or probability notes in the INSN. This needs
250 to be done each time we invert the condition used by the jump. */
251
252 void
253 invert_br_probabilities (insn)
254 rtx insn;
255 {
256 rtx note;
257
258 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
259 if (REG_NOTE_KIND (note) == REG_BR_PROB)
260 XEXP (note, 0) = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (note, 0)));
261 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
262 XEXP (XEXP (note, 0), 1)
263 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
264 }
265
266 /* Dump information about the branch prediction to the output file. */
267
268 static void
269 dump_prediction (predictor, probability, bb, used)
270 enum br_predictor predictor;
271 int probability;
272 basic_block bb;
273 int used;
274 {
275 edge e = bb->succ;
276
277 if (!rtl_dump_file)
278 return;
279
280 while (e && (e->flags & EDGE_FALLTHRU))
281 e = e->succ_next;
282
283 fprintf (rtl_dump_file, " %s heuristics%s: %.1f%%",
284 predictor_info[predictor].name,
285 used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
286
287 if (bb->count)
288 {
289 fprintf (rtl_dump_file, " exec ");
290 fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
291 if (e)
292 {
293 fprintf (rtl_dump_file, " hit ");
294 fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, e->count);
295 fprintf (rtl_dump_file, " (%.1f%%)", e->count * 100.0 / bb->count);
296 }
297 }
298
299 fprintf (rtl_dump_file, "\n");
300 }
301
302 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
303 note if not already present. Remove now useless REG_BR_PRED notes. */
304
305 static void
306 combine_predictions_for_insn (insn, bb)
307 rtx insn;
308 basic_block bb;
309 {
310 rtx prob_note = find_reg_note (insn, REG_BR_PROB, 0);
311 rtx *pnote = &REG_NOTES (insn);
312 rtx note;
313 int best_probability = PROB_EVEN;
314 int best_predictor = END_PREDICTORS;
315 int combined_probability = REG_BR_PROB_BASE / 2;
316 int d;
317 bool first_match = false;
318 bool found = false;
319
320 if (rtl_dump_file)
321 fprintf (rtl_dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
322 bb->index);
323
324 /* We implement "first match" heuristics and use probability guessed
325 by predictor with smallest index. In the future we will use better
326 probability combination techniques. */
327 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
328 if (REG_NOTE_KIND (note) == REG_BR_PRED)
329 {
330 int predictor = INTVAL (XEXP (XEXP (note, 0), 0));
331 int probability = INTVAL (XEXP (XEXP (note, 0), 1));
332
333 found = true;
334 if (best_predictor > predictor)
335 best_probability = probability, best_predictor = predictor;
336
337 d = (combined_probability * probability
338 + (REG_BR_PROB_BASE - combined_probability)
339 * (REG_BR_PROB_BASE - probability));
340
341 /* Use FP math to avoid overflows of 32bit integers. */
342 if (d == 0)
343 /* If one probability is 0% and one 100%, avoid division by zero. */
344 combined_probability = REG_BR_PROB_BASE / 2;
345 else
346 combined_probability = (((double) combined_probability) * probability
347 * REG_BR_PROB_BASE / d + 0.5);
348 }
349
350 /* Decide which heuristic to use. In case we didn't match anything,
351 use no_prediction heuristic, in case we did match, use either
352 first match or Dempster-Shaffer theory depending on the flags. */
353
354 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
355 first_match = true;
356
357 if (!found)
358 dump_prediction (PRED_NO_PREDICTION, combined_probability, bb, true);
359 else
360 {
361 dump_prediction (PRED_DS_THEORY, combined_probability, bb, !first_match);
362 dump_prediction (PRED_FIRST_MATCH, best_probability, bb, first_match);
363 }
364
365 if (first_match)
366 combined_probability = best_probability;
367 dump_prediction (PRED_COMBINED, combined_probability, bb, true);
368
369 while (*pnote)
370 {
371 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
372 {
373 int predictor = INTVAL (XEXP (XEXP (*pnote, 0), 0));
374 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
375
376 dump_prediction (predictor, probability, bb,
377 !first_match || best_predictor == predictor);
378 *pnote = XEXP (*pnote, 1);
379 }
380 else
381 pnote = &XEXP (*pnote, 1);
382 }
383
384 if (!prob_note)
385 {
386 REG_NOTES (insn)
387 = gen_rtx_EXPR_LIST (REG_BR_PROB,
388 GEN_INT (combined_probability), REG_NOTES (insn));
389
390 /* Save the prediction into CFG in case we are seeing non-degenerated
391 conditional jump. */
392 if (bb->succ->succ_next)
393 {
394 BRANCH_EDGE (bb)->probability = combined_probability;
395 FALLTHRU_EDGE (bb)->probability
396 = REG_BR_PROB_BASE - combined_probability;
397 }
398 }
399 }
400
401 /* Statically estimate the probability that a branch will be taken.
402 ??? In the next revision there will be a number of other predictors added
403 from the above references. Further, each heuristic will be factored out
404 into its own function for clarity (and to facilitate the combination of
405 predictions). */
406
407 void
408 estimate_probability (loops_info)
409 struct loops *loops_info;
410 {
411 sbitmap *dominators, *post_dominators;
412 int i;
413
414 dominators = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
415 post_dominators = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
416 calculate_dominance_info (NULL, dominators, CDI_DOMINATORS);
417 calculate_dominance_info (NULL, post_dominators, CDI_POST_DOMINATORS);
418
419 /* Try to predict out blocks in a loop that are not part of a
420 natural loop. */
421 for (i = 0; i < loops_info->num; i++)
422 {
423 int j;
424 int exits;
425 struct loop *loop = &loops_info->array[i];
426
427 flow_loop_scan (loops_info, loop, LOOP_EXIT_EDGES);
428 exits = loop->num_exits;
429
430 for (j = loop->first->index; j <= loop->last->index; ++j)
431 if (TEST_BIT (loop->nodes, j))
432 {
433 int header_found = 0;
434 edge e;
435
436 /* Bypass loop heuristics on continue statement. These
437 statements construct loops via "non-loop" constructs
438 in the source language and are better to be handled
439 separately. */
440 if (predicted_by_p (BASIC_BLOCK (j), PRED_CONTINUE))
441 continue;
442
443 /* Loop branch heuristics - predict an edge back to a
444 loop's head as taken. */
445 for (e = BASIC_BLOCK(j)->succ; e; e = e->succ_next)
446 if (e->dest == loop->header
447 && e->src == loop->latch)
448 {
449 header_found = 1;
450 predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
451 }
452
453 /* Loop exit heuristics - predict an edge exiting the loop if the
454 conditinal has no loop header successors as not taken. */
455 if (!header_found)
456 for (e = BASIC_BLOCK(j)->succ; e; e = e->succ_next)
457 if (e->dest->index < 0
458 || !TEST_BIT (loop->nodes, e->dest->index))
459 predict_edge
460 (e, PRED_LOOP_EXIT,
461 (REG_BR_PROB_BASE
462 - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
463 / exits);
464 }
465 }
466
467 /* Attempt to predict conditional jumps using a number of heuristics. */
468 for (i = 0; i < n_basic_blocks; i++)
469 {
470 basic_block bb = BASIC_BLOCK (i);
471 rtx last_insn = bb->end;
472 rtx cond, earliest;
473 edge e;
474
475 if (GET_CODE (last_insn) != JUMP_INSN || ! any_condjump_p (last_insn))
476 continue;
477
478 for (e = bb->succ; e; e = e->succ_next)
479 {
480 /* Predict early returns to be probable, as we've already taken
481 care for error returns and other are often used for fast paths
482 trought function. */
483 if ((e->dest == EXIT_BLOCK_PTR
484 || (e->dest->succ && !e->dest->succ->succ_next
485 && e->dest->succ->dest == EXIT_BLOCK_PTR))
486 && !predicted_by_p (bb, PRED_NULL_RETURN)
487 && !predicted_by_p (bb, PRED_CONST_RETURN)
488 && !predicted_by_p (bb, PRED_NEGATIVE_RETURN)
489 && !last_basic_block_p (e->dest))
490 predict_edge_def (e, PRED_EARLY_RETURN, TAKEN);
491
492 /* Look for block we are guarding (ie we dominate it,
493 but it doesn't postdominate us). */
494 if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
495 && TEST_BIT (dominators[e->dest->index], e->src->index)
496 && !TEST_BIT (post_dominators[e->src->index], e->dest->index))
497 {
498 rtx insn;
499
500 /* The call heuristic claims that a guarded function call
501 is improbable. This is because such calls are often used
502 to signal exceptional situations such as printing error
503 messages. */
504 for (insn = e->dest->head; insn != NEXT_INSN (e->dest->end);
505 insn = NEXT_INSN (insn))
506 if (GET_CODE (insn) == CALL_INSN
507 /* Constant and pure calls are hardly used to signalize
508 something exceptional. */
509 && ! CONST_OR_PURE_CALL_P (insn))
510 {
511 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
512 break;
513 }
514 }
515 }
516
517 cond = get_condition (last_insn, &earliest);
518 if (! cond)
519 continue;
520
521 /* Try "pointer heuristic."
522 A comparison ptr == 0 is predicted as false.
523 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
524 if (GET_RTX_CLASS (GET_CODE (cond)) == '<'
525 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
526 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
527 {
528 if (GET_CODE (cond) == EQ)
529 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
530 else if (GET_CODE (cond) == NE)
531 predict_insn_def (last_insn, PRED_POINTER, TAKEN);
532 }
533 else
534
535 /* Try "opcode heuristic."
536 EQ tests are usually false and NE tests are usually true. Also,
537 most quantities are positive, so we can make the appropriate guesses
538 about signed comparisons against zero. */
539 switch (GET_CODE (cond))
540 {
541 case CONST_INT:
542 /* Unconditional branch. */
543 predict_insn_def (last_insn, PRED_UNCONDITIONAL,
544 cond == const0_rtx ? NOT_TAKEN : TAKEN);
545 break;
546
547 case EQ:
548 case UNEQ:
549 /* Floating point comparisons appears to behave in a very
550 inpredictable way because of special role of = tests in
551 FP code. */
552 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
553 ;
554 /* Comparisons with 0 are often used for booleans and there is
555 nothing usefull to predict about them. */
556 else if (XEXP (cond, 1) == const0_rtx
557 || XEXP (cond, 0) == const0_rtx)
558 ;
559 else
560 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
561 break;
562
563 case NE:
564 case LTGT:
565 /* Floating point comparisons appears to behave in a very
566 inpredictable way because of special role of = tests in
567 FP code. */
568 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
569 ;
570 /* Comparisons with 0 are often used for booleans and there is
571 nothing usefull to predict about them. */
572 else if (XEXP (cond, 1) == const0_rtx
573 || XEXP (cond, 0) == const0_rtx)
574 ;
575 else
576 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
577 break;
578
579 case ORDERED:
580 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
581 break;
582
583 case UNORDERED:
584 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
585 break;
586
587 case LE:
588 case LT:
589 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
590 || XEXP (cond, 1) == constm1_rtx)
591 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
592 break;
593
594 case GE:
595 case GT:
596 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
597 || XEXP (cond, 1) == constm1_rtx)
598 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
599 break;
600
601 default:
602 break;
603 }
604 }
605
606 /* Attach the combined probability to each conditional jump. */
607 for (i = 0; i < n_basic_blocks; i++)
608 if (GET_CODE (BLOCK_END (i)) == JUMP_INSN
609 && any_condjump_p (BLOCK_END (i))
610 && BASIC_BLOCK (i)->succ->succ_next != NULL)
611 combine_predictions_for_insn (BLOCK_END (i), BASIC_BLOCK (i));
612
613 sbitmap_vector_free (post_dominators);
614 sbitmap_vector_free (dominators);
615
616 estimate_bb_frequencies (loops_info);
617 }
618 \f
619 /* __builtin_expect dropped tokens into the insn stream describing expected
620 values of registers. Generate branch probabilities based off these
621 values. */
622
623 void
624 expected_value_to_br_prob ()
625 {
626 rtx insn, cond, ev = NULL_RTX, ev_reg = NULL_RTX;
627
628 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
629 {
630 switch (GET_CODE (insn))
631 {
632 case NOTE:
633 /* Look for expected value notes. */
634 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_EXPECTED_VALUE)
635 {
636 ev = NOTE_EXPECTED_VALUE (insn);
637 ev_reg = XEXP (ev, 0);
638 delete_insn (insn);
639 }
640 continue;
641
642 case CODE_LABEL:
643 /* Never propagate across labels. */
644 ev = NULL_RTX;
645 continue;
646
647 case JUMP_INSN:
648 /* Look for simple conditional branches. If we haven't got an
649 expected value yet, no point going further. */
650 if (GET_CODE (insn) != JUMP_INSN || ev == NULL_RTX
651 || ! any_condjump_p (insn))
652 continue;
653 break;
654
655 default:
656 /* Look for insns that clobber the EV register. */
657 if (ev && reg_set_p (ev_reg, insn))
658 ev = NULL_RTX;
659 continue;
660 }
661
662 /* Collect the branch condition, hopefully relative to EV_REG. */
663 /* ??? At present we'll miss things like
664 (expected_value (eq r70 0))
665 (set r71 -1)
666 (set r80 (lt r70 r71))
667 (set pc (if_then_else (ne r80 0) ...))
668 as canonicalize_condition will render this to us as
669 (lt r70, r71)
670 Could use cselib to try and reduce this further. */
671 cond = XEXP (SET_SRC (pc_set (insn)), 0);
672 cond = canonicalize_condition (insn, cond, 0, NULL, ev_reg);
673 if (! cond || XEXP (cond, 0) != ev_reg
674 || GET_CODE (XEXP (cond, 1)) != CONST_INT)
675 continue;
676
677 /* Substitute and simplify. Given that the expression we're
678 building involves two constants, we should wind up with either
679 true or false. */
680 cond = gen_rtx_fmt_ee (GET_CODE (cond), VOIDmode,
681 XEXP (ev, 1), XEXP (cond, 1));
682 cond = simplify_rtx (cond);
683
684 /* Turn the condition into a scaled branch probability. */
685 if (cond != const_true_rtx && cond != const0_rtx)
686 abort ();
687 predict_insn_def (insn, PRED_BUILTIN_EXPECT,
688 cond == const_true_rtx ? TAKEN : NOT_TAKEN);
689 }
690 }
691 \f
692 /* Check whether this is the last basic block of function. Commonly tehre
693 is one extra common cleanup block. */
694 static bool
695 last_basic_block_p (bb)
696 basic_block bb;
697 {
698 return (bb->index == n_basic_blocks - 1
699 || (bb->index == n_basic_blocks - 2
700 && bb->succ && !bb->succ->succ_next
701 && bb->succ->dest->index == n_basic_blocks - 1));
702 }
703
704 /* Sets branch probabilities according to PREDiction and FLAGS. HEADS[bb->index]
705 should be index of basic block in that we need to alter branch predictions
706 (i.e. the first of our dominators such that we do not post-dominate it)
707 (but we fill this information on demand, so -1 may be there in case this
708 was not needed yet). */
709
710 static void
711 process_note_prediction (bb, heads, dominators, post_dominators, pred, flags)
712 basic_block bb;
713 int *heads;
714 int *dominators;
715 sbitmap *post_dominators;
716 int pred;
717 int flags;
718 {
719 edge e;
720 int y;
721 bool taken;
722
723 taken = flags & IS_TAKEN;
724
725 if (heads[bb->index] < 0)
726 {
727 /* This is first time we need this field in heads array; so
728 find first dominator that we do not post-dominate (we are
729 using already known members of heads array). */
730 int ai = bb->index;
731 int next_ai = dominators[bb->index];
732 int head;
733
734 while (heads[next_ai] < 0)
735 {
736 if (!TEST_BIT (post_dominators[next_ai], bb->index))
737 break;
738 heads[next_ai] = ai;
739 ai = next_ai;
740 next_ai = dominators[next_ai];
741 }
742 if (!TEST_BIT (post_dominators[next_ai], bb->index))
743 head = next_ai;
744 else
745 head = heads[next_ai];
746 while (next_ai != bb->index)
747 {
748 next_ai = ai;
749 ai = heads[ai];
750 heads[next_ai] = head;
751 }
752 }
753 y = heads[bb->index];
754
755 /* Now find the edge that leads to our branch and aply the prediction. */
756
757 if (y == n_basic_blocks)
758 return;
759 for (e = BASIC_BLOCK (y)->succ; e; e = e->succ_next)
760 if (e->dest->index >= 0
761 && TEST_BIT (post_dominators[e->dest->index], bb->index))
762 predict_edge_def (e, pred, taken);
763 }
764
765 /* Gathers NOTE_INSN_PREDICTIONs in given basic block and turns them
766 into branch probabilities. For description of heads array, see
767 process_note_prediction. */
768
769 static void
770 process_note_predictions (bb, heads, dominators, post_dominators)
771 basic_block bb;
772 int *heads;
773 int *dominators;
774 sbitmap *post_dominators;
775 {
776 rtx insn;
777 edge e;
778
779 /* Additionaly, we check here for blocks with no successors. */
780 int contained_noreturn_call = 0;
781 int was_bb_head = 0;
782 int noreturn_block = 1;
783
784 for (insn = bb->end; insn;
785 was_bb_head |= (insn == bb->head), insn = PREV_INSN (insn))
786 {
787 if (GET_CODE (insn) != NOTE)
788 {
789 if (was_bb_head)
790 break;
791 else
792 {
793 /* Noreturn calls cause program to exit, therefore they are
794 always predicted as not taken. */
795 if (GET_CODE (insn) == CALL_INSN
796 && find_reg_note (insn, REG_NORETURN, NULL))
797 contained_noreturn_call = 1;
798 continue;
799 }
800 }
801 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_PREDICTION)
802 {
803 int alg = (int) NOTE_PREDICTION_ALG (insn);
804 /* Process single prediction note. */
805 process_note_prediction (bb,
806 heads,
807 dominators,
808 post_dominators,
809 alg, (int) NOTE_PREDICTION_FLAGS (insn));
810 delete_insn (insn);
811 }
812 }
813 for (e = bb->succ; e; e = e->succ_next)
814 if (!(e->flags & EDGE_FAKE))
815 noreturn_block = 0;
816 if (contained_noreturn_call)
817 {
818 /* This block ended from other reasons than because of return.
819 If it is because of noreturn call, this should certainly not
820 be taken. Otherwise it is probably some error recovery. */
821 process_note_prediction (bb,
822 heads,
823 dominators,
824 post_dominators, PRED_NORETURN, NOT_TAKEN);
825 }
826 }
827
828 /* Gathers NOTE_INSN_PREDICTIONs and turns them into
829 branch probabilities. */
830
831 void
832 note_prediction_to_br_prob ()
833 {
834 int i;
835 sbitmap *post_dominators;
836 int *dominators, *heads;
837
838 /* To enable handling of noreturn blocks. */
839 add_noreturn_fake_exit_edges ();
840 connect_infinite_loops_to_exit ();
841
842 dominators = xmalloc (sizeof (int) * n_basic_blocks);
843 memset (dominators, -1, sizeof (int) * n_basic_blocks);
844 post_dominators = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
845 calculate_dominance_info (NULL, post_dominators, CDI_POST_DOMINATORS);
846 calculate_dominance_info (dominators, NULL, CDI_DOMINATORS);
847
848 heads = xmalloc (sizeof (int) * n_basic_blocks);
849 memset (heads, -1, sizeof (int) * n_basic_blocks);
850 heads[0] = n_basic_blocks;
851
852 /* Process all prediction notes. */
853
854 for (i = 0; i < n_basic_blocks; ++i)
855 {
856 basic_block bb = BASIC_BLOCK (i);
857 process_note_predictions (bb, heads, dominators, post_dominators);
858 }
859
860 sbitmap_vector_free (post_dominators);
861 free (dominators);
862 free (heads);
863
864 remove_fake_edges ();
865 }
866 \f
867 /* This is used to carry information about basic blocks. It is
868 attached to the AUX field of the standard CFG block. */
869
870 typedef struct block_info_def
871 {
872 /* Estimated frequency of execution of basic_block. */
873 REAL_VALUE_TYPE frequency;
874
875 /* To keep queue of basic blocks to process. */
876 basic_block next;
877
878 /* True if block needs to be visited in prop_freqency. */
879 int tovisit:1;
880
881 /* Number of predecessors we need to visit first. */
882 int npredecessors;
883 } *block_info;
884
885 /* Similar information for edges. */
886 typedef struct edge_info_def
887 {
888 /* In case edge is an loopback edge, the probability edge will be reached
889 in case header is. Estimated number of iterations of the loop can be
890 then computed as 1 / (1 - back_edge_prob). */
891 REAL_VALUE_TYPE back_edge_prob;
892 /* True if the edge is an loopback edge in the natural loop. */
893 int back_edge:1;
894 } *edge_info;
895
896 #define BLOCK_INFO(B) ((block_info) (B)->aux)
897 #define EDGE_INFO(E) ((edge_info) (E)->aux)
898
899 /* Helper function for estimate_bb_frequencies.
900 Propagate the frequencies for loops headed by HEAD. */
901
902 static void
903 propagate_freq (head)
904 basic_block head;
905 {
906 basic_block bb = head;
907 basic_block last = bb;
908 edge e;
909 basic_block nextbb;
910 int n;
911
912 /* For each basic block we need to visit count number of his predecessors
913 we need to visit first. */
914 for (n = 0; n < n_basic_blocks; n++)
915 {
916 basic_block bb = BASIC_BLOCK (n);
917 if (BLOCK_INFO (bb)->tovisit)
918 {
919 int count = 0;
920
921 for (e = bb->pred; e; e = e->pred_next)
922 if (BLOCK_INFO (e->src)->tovisit && !(e->flags & EDGE_DFS_BACK))
923 count++;
924 else if (BLOCK_INFO (e->src)->tovisit
925 && rtl_dump_file && !EDGE_INFO (e)->back_edge)
926 fprintf (rtl_dump_file,
927 "Irreducible region hit, ignoring edge to %i->%i\n",
928 e->src->index, bb->index);
929 BLOCK_INFO (bb)->npredecessors = count;
930 }
931 }
932
933 memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
934 for (; bb; bb = nextbb)
935 {
936 REAL_VALUE_TYPE cyclic_probability, frequency;
937
938 memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
939 memcpy (&frequency, &real_zero, sizeof (real_zero));
940
941 nextbb = BLOCK_INFO (bb)->next;
942 BLOCK_INFO (bb)->next = NULL;
943
944 /* Compute frequency of basic block. */
945 if (bb != head)
946 {
947 #ifdef ENABLE_CHECKING
948 for (e = bb->pred; e; e = e->pred_next)
949 if (BLOCK_INFO (e->src)->tovisit && !(e->flags & EDGE_DFS_BACK))
950 abort ();
951 #endif
952
953 for (e = bb->pred; e; e = e->pred_next)
954 if (EDGE_INFO (e)->back_edge)
955 {
956 REAL_ARITHMETIC (cyclic_probability, PLUS_EXPR,
957 cyclic_probability,
958 EDGE_INFO (e)->back_edge_prob);
959 }
960 else if (!(e->flags & EDGE_DFS_BACK))
961 {
962 REAL_VALUE_TYPE tmp;
963
964 /* frequency += (e->probability
965 * BLOCK_INFO (e->src)->frequency /
966 REG_BR_PROB_BASE); */
967
968 REAL_VALUE_FROM_INT (tmp, e->probability, 0,
969 TYPE_MODE (double_type_node));
970 REAL_ARITHMETIC (tmp, MULT_EXPR, tmp,
971 BLOCK_INFO (e->src)->frequency);
972 REAL_ARITHMETIC (tmp, RDIV_EXPR, tmp, real_br_prob_base);
973 REAL_ARITHMETIC (frequency, PLUS_EXPR, frequency, tmp);
974 }
975
976 if (REAL_VALUES_LESS (real_almost_one, cyclic_probability))
977 memcpy (&cyclic_probability, &real_almost_one, sizeof (real_zero));
978
979 /* BLOCK_INFO (bb)->frequency = frequency / (1 - cyclic_probability)
980 */
981
982 REAL_ARITHMETIC (cyclic_probability, MINUS_EXPR, real_one,
983 cyclic_probability);
984 REAL_ARITHMETIC (BLOCK_INFO (bb)->frequency,
985 RDIV_EXPR, frequency, cyclic_probability);
986 }
987
988 BLOCK_INFO (bb)->tovisit = 0;
989
990 /* Compute back edge frequencies. */
991 for (e = bb->succ; e; e = e->succ_next)
992 if (e->dest == head)
993 {
994 REAL_VALUE_TYPE tmp;
995
996 /* EDGE_INFO (e)->back_edge_prob
997 = ((e->probability * BLOCK_INFO (bb)->frequency)
998 / REG_BR_PROB_BASE); */
999 REAL_VALUE_FROM_INT (tmp, e->probability, 0,
1000 TYPE_MODE (double_type_node));
1001 REAL_ARITHMETIC (tmp, MULT_EXPR, tmp,
1002 BLOCK_INFO (bb)->frequency);
1003 REAL_ARITHMETIC (EDGE_INFO (e)->back_edge_prob,
1004 RDIV_EXPR, tmp, real_br_prob_base);
1005
1006 }
1007
1008 /* Propagate to successor blocks. */
1009 for (e = bb->succ; e; e = e->succ_next)
1010 if (!(e->flags & EDGE_DFS_BACK)
1011 && BLOCK_INFO (e->dest)->npredecessors)
1012 {
1013 BLOCK_INFO (e->dest)->npredecessors--;
1014 if (!BLOCK_INFO (e->dest)->npredecessors)
1015 {
1016 if (!nextbb)
1017 nextbb = e->dest;
1018 else
1019 BLOCK_INFO (last)->next = e->dest;
1020
1021 last = e->dest;
1022 }
1023 }
1024 }
1025 }
1026
1027 /* Estimate probabilities of loopback edges in loops at same nest level. */
1028
1029 static void
1030 estimate_loops_at_level (first_loop)
1031 struct loop *first_loop;
1032 {
1033 struct loop *l, *loop = first_loop;
1034
1035 for (loop = first_loop; loop; loop = loop->next)
1036 {
1037 int n;
1038 edge e;
1039
1040 estimate_loops_at_level (loop->inner);
1041
1042 /* Find current loop back edge and mark it. */
1043 for (e = loop->latch->succ; e->dest != loop->header; e = e->succ_next)
1044 ;
1045
1046 EDGE_INFO (e)->back_edge = 1;
1047
1048 /* In case the loop header is shared, ensure that it is the last
1049 one sharing the same header, so we avoid redundant work. */
1050 if (loop->shared)
1051 {
1052 for (l = loop->next; l; l = l->next)
1053 if (l->header == loop->header)
1054 break;
1055
1056 if (l)
1057 continue;
1058 }
1059
1060 /* Now merge all nodes of all loops with given header as not visited. */
1061 for (l = loop->shared ? first_loop : loop; l != loop->next; l = l->next)
1062 if (loop->header == l->header)
1063 EXECUTE_IF_SET_IN_SBITMAP (l->nodes, 0, n,
1064 BLOCK_INFO (BASIC_BLOCK (n))->tovisit = 1
1065 );
1066
1067 propagate_freq (loop->header);
1068 }
1069 }
1070
1071 /* Convert counts measured by profile driven feedback to frequencies. */
1072
1073 static void
1074 counts_to_freqs ()
1075 {
1076 HOST_WIDEST_INT count_max = 1;
1077 int i;
1078
1079 for (i = 0; i < n_basic_blocks; i++)
1080 count_max = MAX (BASIC_BLOCK (i)->count, count_max);
1081
1082 for (i = -2; i < n_basic_blocks; i++)
1083 {
1084 basic_block bb;
1085
1086 if (i == -2)
1087 bb = ENTRY_BLOCK_PTR;
1088 else if (i == -1)
1089 bb = EXIT_BLOCK_PTR;
1090 else
1091 bb = BASIC_BLOCK (i);
1092
1093 bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
1094 }
1095 }
1096
1097 /* Return true if function is likely to be expensive, so there is no point to
1098 optimize performance of prologue, epilogue or do inlining at the expense
1099 of code size growth. THRESHOLD is the limit of number of isntructions
1100 function can execute at average to be still considered not expensive. */
1101
1102 bool
1103 expensive_function_p (threshold)
1104 int threshold;
1105 {
1106 unsigned int sum = 0;
1107 int i;
1108 unsigned int limit;
1109
1110 /* We can not compute accurately for large thresholds due to scaled
1111 frequencies. */
1112 if (threshold > BB_FREQ_MAX)
1113 abort ();
1114
1115 /* Frequencies are out of range. This either means that function contains
1116 internal loop executing more than BB_FREQ_MAX times or profile feedback
1117 is available and function has not been executed at all. */
1118 if (ENTRY_BLOCK_PTR->frequency == 0)
1119 return true;
1120
1121 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
1122 limit = ENTRY_BLOCK_PTR->frequency * threshold;
1123 for (i = 0; i < n_basic_blocks; i++)
1124 {
1125 basic_block bb = BASIC_BLOCK (i);
1126 rtx insn;
1127
1128 for (insn = bb->head; insn != NEXT_INSN (bb->end);
1129 insn = NEXT_INSN (insn))
1130 if (active_insn_p (insn))
1131 {
1132 sum += bb->frequency;
1133 if (sum > limit)
1134 return true;
1135 }
1136 }
1137
1138 return false;
1139 }
1140
1141 /* Estimate basic blocks frequency by given branch probabilities. */
1142
1143 static void
1144 estimate_bb_frequencies (loops)
1145 struct loops *loops;
1146 {
1147 int i;
1148 REAL_VALUE_TYPE freq_max;
1149 enum machine_mode double_mode = TYPE_MODE (double_type_node);
1150
1151 if (flag_branch_probabilities)
1152 counts_to_freqs ();
1153 else
1154 {
1155 REAL_VALUE_FROM_INT (real_zero, 0, 0, double_mode);
1156 REAL_VALUE_FROM_INT (real_one, 1, 0, double_mode);
1157 REAL_VALUE_FROM_INT (real_br_prob_base, REG_BR_PROB_BASE, 0, double_mode);
1158 REAL_VALUE_FROM_INT (real_bb_freq_max, BB_FREQ_MAX, 0, double_mode);
1159 REAL_VALUE_FROM_INT (real_one_half, 2, 0, double_mode);
1160
1161 REAL_ARITHMETIC (real_one_half, RDIV_EXPR, real_one, real_one_half);
1162
1163 REAL_ARITHMETIC (real_almost_one, RDIV_EXPR, real_one, real_br_prob_base);
1164 REAL_ARITHMETIC (real_almost_one, MINUS_EXPR, real_one, real_almost_one);
1165
1166 mark_dfs_back_edges ();
1167 /* Fill in the probability values in flowgraph based on the REG_BR_PROB
1168 notes. */
1169 for (i = 0; i < n_basic_blocks; i++)
1170 {
1171 rtx last_insn = BLOCK_END (i);
1172
1173 if (GET_CODE (last_insn) != JUMP_INSN || !any_condjump_p (last_insn)
1174 /* Avoid handling of conditional jumps jumping to fallthru edge. */
1175 || BASIC_BLOCK (i)->succ->succ_next == NULL)
1176 {
1177 /* We can predict only conditional jumps at the moment.
1178 Expect each edge to be equally probable.
1179 ?? In the future we want to make abnormal edges improbable. */
1180 int nedges = 0;
1181 edge e;
1182
1183 for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
1184 {
1185 nedges++;
1186 if (e->probability != 0)
1187 break;
1188 }
1189 if (!e)
1190 for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
1191 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
1192 }
1193 }
1194
1195 ENTRY_BLOCK_PTR->succ->probability = REG_BR_PROB_BASE;
1196
1197 /* Set up block info for each basic block. */
1198 alloc_aux_for_blocks (sizeof (struct block_info_def));
1199 alloc_aux_for_edges (sizeof (struct edge_info_def));
1200 for (i = -2; i < n_basic_blocks; i++)
1201 {
1202 edge e;
1203 basic_block bb;
1204
1205 if (i == -2)
1206 bb = ENTRY_BLOCK_PTR;
1207 else if (i == -1)
1208 bb = EXIT_BLOCK_PTR;
1209 else
1210 bb = BASIC_BLOCK (i);
1211
1212 BLOCK_INFO (bb)->tovisit = 0;
1213 for (e = bb->succ; e; e = e->succ_next)
1214 {
1215
1216 REAL_VALUE_FROM_INT (EDGE_INFO (e)->back_edge_prob,
1217 e->probability, 0, double_mode);
1218 REAL_ARITHMETIC (EDGE_INFO (e)->back_edge_prob,
1219 RDIV_EXPR, EDGE_INFO (e)->back_edge_prob,
1220 real_br_prob_base);
1221 }
1222 }
1223
1224 /* First compute probabilities locally for each loop from innermost
1225 to outermost to examine probabilities for back edges. */
1226 estimate_loops_at_level (loops->tree_root);
1227
1228 /* Now fake loop around whole function to finalize probabilities. */
1229 for (i = 0; i < n_basic_blocks; i++)
1230 BLOCK_INFO (BASIC_BLOCK (i))->tovisit = 1;
1231
1232 BLOCK_INFO (ENTRY_BLOCK_PTR)->tovisit = 1;
1233 BLOCK_INFO (EXIT_BLOCK_PTR)->tovisit = 1;
1234 propagate_freq (ENTRY_BLOCK_PTR);
1235
1236 memcpy (&freq_max, &real_zero, sizeof (real_zero));
1237 for (i = 0; i < n_basic_blocks; i++)
1238 if (REAL_VALUES_LESS
1239 (freq_max, BLOCK_INFO (BASIC_BLOCK (i))->frequency))
1240 memcpy (&freq_max, &BLOCK_INFO (BASIC_BLOCK (i))->frequency,
1241 sizeof (freq_max));
1242
1243 for (i = -2; i < n_basic_blocks; i++)
1244 {
1245 basic_block bb;
1246 REAL_VALUE_TYPE tmp;
1247
1248 if (i == -2)
1249 bb = ENTRY_BLOCK_PTR;
1250 else if (i == -1)
1251 bb = EXIT_BLOCK_PTR;
1252 else
1253 bb = BASIC_BLOCK (i);
1254
1255 REAL_ARITHMETIC (tmp, MULT_EXPR, BLOCK_INFO (bb)->frequency,
1256 real_bb_freq_max);
1257 REAL_ARITHMETIC (tmp, RDIV_EXPR, tmp, freq_max);
1258 REAL_ARITHMETIC (tmp, PLUS_EXPR, tmp, real_one_half);
1259 bb->frequency = REAL_VALUE_UNSIGNED_FIX (tmp);
1260 }
1261
1262 free_aux_for_blocks ();
1263 free_aux_for_edges ();
1264 }
1265 compute_function_frequency ();
1266 if (flag_reorder_functions)
1267 choose_function_section ();
1268 }
1269
1270 /* Decide whether function is hot, cold or unlikely executed. */
1271 static void
1272 compute_function_frequency ()
1273 {
1274 int i;
1275 if (!profile_info.count_profiles_merged
1276 || !flag_branch_probabilities)
1277 return;
1278 cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED;
1279 for (i = 0; i < n_basic_blocks; i++)
1280 {
1281 basic_block bb = BASIC_BLOCK (i);
1282 if (maybe_hot_bb_p (bb))
1283 {
1284 cfun->function_frequency = FUNCTION_FREQUENCY_HOT;
1285 return;
1286 }
1287 if (!probably_never_executed_bb_p (bb))
1288 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
1289 }
1290 }
1291
1292 /* Choose appropriate section for the function. */
1293 static void
1294 choose_function_section ()
1295 {
1296 if (DECL_SECTION_NAME (current_function_decl)
1297 || !targetm.have_named_sections)
1298 return;
1299 if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT)
1300 DECL_SECTION_NAME (current_function_decl) =
1301 build_string (strlen (HOT_TEXT_SECTION_NAME), HOT_TEXT_SECTION_NAME);
1302 if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
1303 DECL_SECTION_NAME (current_function_decl) =
1304 build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME),
1305 UNLIKELY_EXECUTED_TEXT_SECTION_NAME);
1306 }