gcc/predict.c

   1 /* Branch prediction routines for the GNU compiler.
   2    Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  20 02110-1301, USA.  */
  21
  22 /* References:
  23
  24    [1] "Branch Prediction for Free"
  25        Ball and Larus; PLDI '93.
  26    [2] "Static Branch Frequency and Program Profile Analysis"
  27        Wu and Larus; MICRO-27.
  28    [3] "Corpus-based Static Branch Prediction"
  29        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
  30
  31
  32 #include "config.h"
  33 #include "system.h"
  34 #include "coretypes.h"
  35 #include "tm.h"
  36 #include "tree.h"
  37 #include "rtl.h"
  38 #include "tm_p.h"
  39 #include "hard-reg-set.h"
  40 #include "basic-block.h"
  41 #include "insn-config.h"
  42 #include "regs.h"
  43 #include "flags.h"
  44 #include "output.h"
  45 #include "function.h"
  46 #include "except.h"
  47 #include "toplev.h"
  48 #include "recog.h"
  49 #include "expr.h"
  50 #include "predict.h"
  51 #include "coverage.h"
  52 #include "sreal.h"
  53 #include "params.h"
  54 #include "target.h"
  55 #include "cfgloop.h"
  56 #include "tree-flow.h"
  57 #include "ggc.h"
  58 #include "tree-dump.h"
  59 #include "tree-pass.h"
  60 #include "timevar.h"
  61 #include "tree-scalar-evolution.h"
  62 #include "cfgloop.h"
  63
  64 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
  65                    1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
  66 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
  67              real_inv_br_prob_base, real_one_half, real_bb_freq_max;
  68
  69 /* Random guesstimation given names.  */
  70 #define PROB_VERY_UNLIKELY      (REG_BR_PROB_BASE / 100 - 1)
  71 #define PROB_EVEN               (REG_BR_PROB_BASE / 2)
  72 #define PROB_VERY_LIKELY        (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
  73 #define PROB_ALWAYS             (REG_BR_PROB_BASE)
  74
  75 static void combine_predictions_for_insn (rtx, basic_block);
  76 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
  77 static void predict_paths_leading_to (basic_block, int *, enum br_predictor, enum prediction);
  78 static bool last_basic_block_p (basic_block);
  79 static void compute_function_frequency (void);
  80 static void choose_function_section (void);
  81 static bool can_predict_insn_p (rtx);
  82
  83 /* Information we hold about each branch predictor.
  84    Filled using information from predict.def.  */
  85
  86 struct predictor_info
  87 {
  88   const char *const name;       /* Name used in the debugging dumps.  */
  89   const int hitrate;            /* Expected hitrate used by
  90                                    predict_insn_def call.  */
  91   const int flags;
  92 };
  93
  94 /* Use given predictor without Dempster-Shaffer theory if it matches
  95    using first_match heuristics.  */
  96 #define PRED_FLAG_FIRST_MATCH 1
  97
  98 /* Recompute hitrate in percent to our representation.  */
  99
 100 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
 101
 102 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
 103 static const struct predictor_info predictor_info[]= {
 104 #include "predict.def"
 105
 106   /* Upper bound on predictors.  */
 107   {NULL, 0, 0}
 108 };
 109 #undef DEF_PREDICTOR
 110
 111 /* Return true in case BB can be CPU intensive and should be optimized
 112    for maximal performance.  */
 113
 114 bool
 115 maybe_hot_bb_p (basic_block bb)
 116 {
 117   if (profile_info && flag_branch_probabilities
 118       && (bb->count
 119           < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
 120     return false;
 121   if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
 122     return false;
 123   return true;
 124 }
 125
 126 /* Return true in case BB is cold and should be optimized for size.  */
 127
 128 bool
 129 probably_cold_bb_p (basic_block bb)
 130 {
 131   if (profile_info && flag_branch_probabilities
 132       && (bb->count
 133           < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
 134     return true;
 135   if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
 136     return true;
 137   return false;
 138 }
 139
 140 /* Return true in case BB is probably never executed.  */
 141 bool
 142 probably_never_executed_bb_p (basic_block bb)
 143 {
 144   if (profile_info && flag_branch_probabilities)
 145     return ((bb->count + profile_info->runs / 2) / profile_info->runs) == 0;
 146   return false;
 147 }
 148
 149 /* Return true if the one of outgoing edges is already predicted by
 150    PREDICTOR.  */
 151
 152 bool
 153 rtl_predicted_by_p (basic_block bb, enum br_predictor predictor)
 154 {
 155   rtx note;
 156   if (!INSN_P (BB_END (bb)))
 157     return false;
 158   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
 159     if (REG_NOTE_KIND (note) == REG_BR_PRED
 160         && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
 161       return true;
 162   return false;
 163 }
 164
 165 /* Return true if the one of outgoing edges is already predicted by
 166    PREDICTOR.  */
 167
 168 bool
 169 tree_predicted_by_p (basic_block bb, enum br_predictor predictor)
 170 {
 171   struct edge_prediction *i;
 172   for (i = bb->predictions; i; i = i->ep_next)
 173     if (i->ep_predictor == predictor)
 174       return true;
 175   return false;
 176 }
 177
 178 /* Return true when the probability of edge is reliable.
 179
 180    The profile guessing code is good at predicting branch outcome (ie.
 181    taken/not taken), that is predicted right slightly over 75% of time.
 182    It is however notoriously poor on predicting the probability itself.
 183    In general the profile appear a lot flatter (with probabilities closer
 184    to 50%) than the reality so it is bad idea to use it to drive optimization
 185    such as those disabling dynamic branch prediction for well predictable
 186    branches.
 187
 188    There are two exceptions - edges leading to noreturn edges and edges
 189    predicted by number of iterations heuristics are predicted well.  This macro
 190    should be able to distinguish those, but at the moment it simply check for
 191    noreturn heuristic that is only one giving probability over 99% or bellow
 192    1%.  In future we might want to propagate reliability information across the
 193    CFG if we find this information useful on multiple places.   */
 194 static bool
 195 probability_reliable_p (int prob)
 196 {
 197   return (profile_status == PROFILE_READ
 198           || (profile_status == PROFILE_GUESSED
 199               && (prob <= HITRATE (1) || prob >= HITRATE (99))));
 200 }
 201
 202 /* Same predicate as above, working on edges.  */
 203 bool
 204 edge_probability_reliable_p (edge e)
 205 {
 206   return probability_reliable_p (e->probability);
 207 }
 208
 209 /* Same predicate as edge_probability_reliable_p, working on notes.  */
 210 bool
 211 br_prob_note_reliable_p (rtx note)
 212 {
 213   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
 214   return probability_reliable_p (INTVAL (XEXP (note, 0)));
 215 }
 216
 217 static void
 218 predict_insn (rtx insn, enum br_predictor predictor, int probability)
 219 {
 220   gcc_assert (any_condjump_p (insn));
 221   if (!flag_guess_branch_prob)
 222     return;
 223
 224   REG_NOTES (insn)
 225     = gen_rtx_EXPR_LIST (REG_BR_PRED,
 226                          gen_rtx_CONCAT (VOIDmode,
 227                                          GEN_INT ((int) predictor),
 228                                          GEN_INT ((int) probability)),
 229                          REG_NOTES (insn));
 230 }
 231
 232 /* Predict insn by given predictor.  */
 233
 234 void
 235 predict_insn_def (rtx insn, enum br_predictor predictor,
 236                   enum prediction taken)
 237 {
 238    int probability = predictor_info[(int) predictor].hitrate;
 239
 240    if (taken != TAKEN)
 241      probability = REG_BR_PROB_BASE - probability;
 242
 243    predict_insn (insn, predictor, probability);
 244 }
 245
 246 /* Predict edge E with given probability if possible.  */
 247
 248 void
 249 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
 250 {
 251   rtx last_insn;
 252   last_insn = BB_END (e->src);
 253
 254   /* We can store the branch prediction information only about
 255      conditional jumps.  */
 256   if (!any_condjump_p (last_insn))
 257     return;
 258
 259   /* We always store probability of branching.  */
 260   if (e->flags & EDGE_FALLTHRU)
 261     probability = REG_BR_PROB_BASE - probability;
 262
 263   predict_insn (last_insn, predictor, probability);
 264 }
 265
 266 /* Predict edge E with the given PROBABILITY.  */
 267 void
 268 tree_predict_edge (edge e, enum br_predictor predictor, int probability)
 269 {
 270   gcc_assert (profile_status != PROFILE_GUESSED);
 271   if ((e->src != ENTRY_BLOCK_PTR && EDGE_COUNT (e->src->succs) > 1)
 272       && flag_guess_branch_prob && optimize)
 273     {
 274       struct edge_prediction *i = ggc_alloc (sizeof (struct edge_prediction));
 275
 276       i->ep_next = e->src->predictions;
 277       e->src->predictions = i;
 278       i->ep_probability = probability;
 279       i->ep_predictor = predictor;
 280       i->ep_edge = e;
 281     }
 282 }
 283
 284 /* Remove all predictions on given basic block that are attached
 285    to edge E.  */
 286 void
 287 remove_predictions_associated_with_edge (edge e)
 288 {
 289   if (e->src->predictions)
 290     {
 291       struct edge_prediction **prediction = &e->src->predictions;
 292       while (*prediction)
 293         {
 294           if ((*prediction)->ep_edge == e)
 295             *prediction = (*prediction)->ep_next;
 296           else
 297             prediction = &((*prediction)->ep_next);
 298         }
 299     }
 300 }
 301
 302 /* Return true when we can store prediction on insn INSN.
 303    At the moment we represent predictions only on conditional
 304    jumps, not at computed jump or other complicated cases.  */
 305 static bool
 306 can_predict_insn_p (rtx insn)
 307 {
 308   return (JUMP_P (insn)
 309           && any_condjump_p (insn)
 310           && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
 311 }
 312
 313 /* Predict edge E by given predictor if possible.  */
 314
 315 void
 316 predict_edge_def (edge e, enum br_predictor predictor,
 317                   enum prediction taken)
 318 {
 319    int probability = predictor_info[(int) predictor].hitrate;
 320
 321    if (taken != TAKEN)
 322      probability = REG_BR_PROB_BASE - probability;
 323
 324    predict_edge (e, predictor, probability);
 325 }
 326
 327 /* Invert all branch predictions or probability notes in the INSN.  This needs
 328    to be done each time we invert the condition used by the jump.  */
 329
 330 void
 331 invert_br_probabilities (rtx insn)
 332 {
 333   rtx note;
 334
 335   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 336     if (REG_NOTE_KIND (note) == REG_BR_PROB)
 337       XEXP (note, 0) = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (note, 0)));
 338     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
 339       XEXP (XEXP (note, 0), 1)
 340         = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
 341 }
 342
 343 /* Dump information about the branch prediction to the output file.  */
 344
 345 static void
 346 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
 347                  basic_block bb, int used)
 348 {
 349   edge e;
 350   edge_iterator ei;
 351
 352   if (!file)
 353     return;
 354
 355   FOR_EACH_EDGE (e, ei, bb->succs)
 356     if (! (e->flags & EDGE_FALLTHRU))
 357       break;
 358
 359   fprintf (file, "  %s heuristics%s: %.1f%%",
 360            predictor_info[predictor].name,
 361            used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
 362
 363   if (bb->count)
 364     {
 365       fprintf (file, "  exec ");
 366       fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
 367       if (e)
 368         {
 369           fprintf (file, " hit ");
 370           fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count);
 371           fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
 372         }
 373     }
 374
 375   fprintf (file, "\n");
 376 }
 377
 378 /* We can not predict the probabilities of outgoing edges of bb.  Set them
 379    evenly and hope for the best.  */
 380 static void
 381 set_even_probabilities (basic_block bb)
 382 {
 383   int nedges = 0;
 384   edge e;
 385   edge_iterator ei;
 386
 387   FOR_EACH_EDGE (e, ei, bb->succs)
 388     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 389       nedges ++;
 390   FOR_EACH_EDGE (e, ei, bb->succs)
 391     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 392       e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
 393     else
 394       e->probability = 0;
 395 }
 396
 397 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
 398    note if not already present.  Remove now useless REG_BR_PRED notes.  */
 399
 400 static void
 401 combine_predictions_for_insn (rtx insn, basic_block bb)
 402 {
 403   rtx prob_note;
 404   rtx *pnote;
 405   rtx note;
 406   int best_probability = PROB_EVEN;
 407   int best_predictor = END_PREDICTORS;
 408   int combined_probability = REG_BR_PROB_BASE / 2;
 409   int d;
 410   bool first_match = false;
 411   bool found = false;
 412
 413   if (!can_predict_insn_p (insn))
 414     {
 415       set_even_probabilities (bb);
 416       return;
 417     }
 418
 419   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
 420   pnote = &REG_NOTES (insn);
 421   if (dump_file)
 422     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
 423              bb->index);
 424
 425   /* We implement "first match" heuristics and use probability guessed
 426      by predictor with smallest index.  */
 427   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 428     if (REG_NOTE_KIND (note) == REG_BR_PRED)
 429       {
 430         int predictor = INTVAL (XEXP (XEXP (note, 0), 0));
 431         int probability = INTVAL (XEXP (XEXP (note, 0), 1));
 432
 433         found = true;
 434         if (best_predictor > predictor)
 435           best_probability = probability, best_predictor = predictor;
 436
 437         d = (combined_probability * probability
 438              + (REG_BR_PROB_BASE - combined_probability)
 439              * (REG_BR_PROB_BASE - probability));
 440
 441         /* Use FP math to avoid overflows of 32bit integers.  */
 442         if (d == 0)
 443           /* If one probability is 0% and one 100%, avoid division by zero.  */
 444           combined_probability = REG_BR_PROB_BASE / 2;
 445         else
 446           combined_probability = (((double) combined_probability) * probability
 447                                   * REG_BR_PROB_BASE / d + 0.5);
 448       }
 449
 450   /* Decide which heuristic to use.  In case we didn't match anything,
 451      use no_prediction heuristic, in case we did match, use either
 452      first match or Dempster-Shaffer theory depending on the flags.  */
 453
 454   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 455     first_match = true;
 456
 457   if (!found)
 458     dump_prediction (dump_file, PRED_NO_PREDICTION,
 459                      combined_probability, bb, true);
 460   else
 461     {
 462       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
 463                        bb, !first_match);
 464       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
 465                        bb, first_match);
 466     }
 467
 468   if (first_match)
 469     combined_probability = best_probability;
 470   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 471
 472   while (*pnote)
 473     {
 474       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
 475         {
 476           int predictor = INTVAL (XEXP (XEXP (*pnote, 0), 0));
 477           int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
 478
 479           dump_prediction (dump_file, predictor, probability, bb,
 480                            !first_match || best_predictor == predictor);
 481           *pnote = XEXP (*pnote, 1);
 482         }
 483       else
 484         pnote = &XEXP (*pnote, 1);
 485     }
 486
 487   if (!prob_note)
 488     {
 489       REG_NOTES (insn)
 490         = gen_rtx_EXPR_LIST (REG_BR_PROB,
 491                              GEN_INT (combined_probability), REG_NOTES (insn));
 492
 493       /* Save the prediction into CFG in case we are seeing non-degenerated
 494          conditional jump.  */
 495       if (!single_succ_p (bb))
 496         {
 497           BRANCH_EDGE (bb)->probability = combined_probability;
 498           FALLTHRU_EDGE (bb)->probability
 499             = REG_BR_PROB_BASE - combined_probability;
 500         }
 501     }
 502   else if (!single_succ_p (bb))
 503     {
 504       int prob = INTVAL (XEXP (prob_note, 0));
 505
 506       BRANCH_EDGE (bb)->probability = prob;
 507       FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
 508     }
 509   else
 510     single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
 511 }
 512
 513 /* Combine predictions into single probability and store them into CFG.
 514    Remove now useless prediction entries.  */
 515
 516 static void
 517 combine_predictions_for_bb (basic_block bb)
 518 {
 519   int best_probability = PROB_EVEN;
 520   int best_predictor = END_PREDICTORS;
 521   int combined_probability = REG_BR_PROB_BASE / 2;
 522   int d;
 523   bool first_match = false;
 524   bool found = false;
 525   struct edge_prediction *pred;
 526   int nedges = 0;
 527   edge e, first = NULL, second = NULL;
 528   edge_iterator ei;
 529
 530   FOR_EACH_EDGE (e, ei, bb->succs)
 531     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 532       {
 533         nedges ++;
 534         if (first && !second)
 535           second = e;
 536         if (!first)
 537           first = e;
 538       }
 539
 540   /* When there is no successor or only one choice, prediction is easy.
 541
 542      We are lazy for now and predict only basic blocks with two outgoing
 543      edges.  It is possible to predict generic case too, but we have to
 544      ignore first match heuristics and do more involved combining.  Implement
 545      this later.  */
 546   if (nedges != 2)
 547     {
 548       if (!bb->count)
 549         set_even_probabilities (bb);
 550       bb->predictions = NULL;
 551       if (dump_file)
 552         fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
 553                  nedges, bb->index);
 554       return;
 555     }
 556
 557   if (dump_file)
 558     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
 559
 560   /* We implement "first match" heuristics and use probability guessed
 561      by predictor with smallest index.  */
 562   for (pred = bb->predictions; pred; pred = pred->ep_next)
 563     {
 564       int predictor = pred->ep_predictor;
 565       int probability = pred->ep_probability;
 566
 567       if (pred->ep_edge != first)
 568         probability = REG_BR_PROB_BASE - probability;
 569
 570       found = true;
 571       if (best_predictor > predictor)
 572         best_probability = probability, best_predictor = predictor;
 573
 574       d = (combined_probability * probability
 575            + (REG_BR_PROB_BASE - combined_probability)
 576            * (REG_BR_PROB_BASE - probability));
 577
 578       /* Use FP math to avoid overflows of 32bit integers.  */
 579       if (d == 0)
 580         /* If one probability is 0% and one 100%, avoid division by zero.  */
 581         combined_probability = REG_BR_PROB_BASE / 2;
 582       else
 583         combined_probability = (((double) combined_probability) * probability
 584                                 * REG_BR_PROB_BASE / d + 0.5);
 585     }
 586
 587   /* Decide which heuristic to use.  In case we didn't match anything,
 588      use no_prediction heuristic, in case we did match, use either
 589      first match or Dempster-Shaffer theory depending on the flags.  */
 590
 591   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 592     first_match = true;
 593
 594   if (!found)
 595     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
 596   else
 597     {
 598       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
 599                        !first_match);
 600       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
 601                        first_match);
 602     }
 603
 604   if (first_match)
 605     combined_probability = best_probability;
 606   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 607
 608   for (pred = bb->predictions; pred; pred = pred->ep_next)
 609     {
 610       int predictor = pred->ep_predictor;
 611       int probability = pred->ep_probability;
 612
 613       if (pred->ep_edge != EDGE_SUCC (bb, 0))
 614         probability = REG_BR_PROB_BASE - probability;
 615       dump_prediction (dump_file, predictor, probability, bb,
 616                        !first_match || best_predictor == predictor);
 617     }
 618   bb->predictions = NULL;
 619
 620   if (!bb->count)
 621     {
 622       first->probability = combined_probability;
 623       second->probability = REG_BR_PROB_BASE - combined_probability;
 624     }
 625 }
 626
 627 /* Predict edge probabilities by exploiting loop structure.  */
 628
 629 static void
 630 predict_loops (void)
 631 {
 632   loop_iterator li;
 633   struct loop *loop;
 634
 635   scev_initialize ();
 636
 637   /* Try to predict out blocks in a loop that are not part of a
 638      natural loop.  */
 639   FOR_EACH_LOOP (li, loop, 0)
 640     {
 641       basic_block bb, *bbs;
 642       unsigned j, n_exits;
 643       VEC (edge, heap) *exits;
 644       struct tree_niter_desc niter_desc;
 645       edge ex;
 646
 647       exits = get_loop_exit_edges (loop);
 648       n_exits = VEC_length (edge, exits);
 649
 650       for (j = 0; VEC_iterate (edge, exits, j, ex); j++)
 651         {
 652           tree niter = NULL;
 653           HOST_WIDE_INT nitercst;
 654           int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
 655           int probability;
 656           enum br_predictor predictor;
 657
 658           if (number_of_iterations_exit (loop, ex, &niter_desc, false))
 659             niter = niter_desc.niter;
 660           if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
 661             niter = loop_niter_by_eval (loop, ex);
 662
 663           if (TREE_CODE (niter) == INTEGER_CST)
 664             {
 665               if (host_integerp (niter, 1)
 666                   && compare_tree_int (niter, max-1) == -1)
 667                 nitercst = tree_low_cst (niter, 1) + 1;
 668               else
 669                 nitercst = max;
 670               predictor = PRED_LOOP_ITERATIONS;
 671             }
 672           /* If we have just one exit and we can derive some information about
 673              the number of iterations of the loop from the statements inside
 674              the loop, use it to predict this exit.  */
 675           else if (n_exits == 1)
 676             {
 677               nitercst = estimated_loop_iterations_int (loop, false);
 678               if (nitercst < 0)
 679                 continue;
 680               if (nitercst > max)
 681                 nitercst = max;
 682
 683               predictor = PRED_LOOP_ITERATIONS_GUESSED;
 684             }
 685           else
 686             continue;
 687
 688           probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
 689           predict_edge (ex, predictor, probability);
 690         }
 691       VEC_free (edge, heap, exits);
 692
 693       bbs = get_loop_body (loop);
 694
 695       for (j = 0; j < loop->num_nodes; j++)
 696         {
 697           int header_found = 0;
 698           edge e;
 699           edge_iterator ei;
 700
 701           bb = bbs[j];
 702
 703           /* Bypass loop heuristics on continue statement.  These
 704              statements construct loops via "non-loop" constructs
 705              in the source language and are better to be handled
 706              separately.  */
 707           if (predicted_by_p (bb, PRED_CONTINUE))
 708             continue;
 709
 710           /* Loop branch heuristics - predict an edge back to a
 711              loop's head as taken.  */
 712           if (bb == loop->latch)
 713             {
 714               e = find_edge (loop->latch, loop->header);
 715               if (e)
 716                 {
 717                   header_found = 1;
 718                   predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
 719                 }
 720             }
 721
 722           /* Loop exit heuristics - predict an edge exiting the loop if the
 723              conditional has no loop header successors as not taken.  */
 724           if (!header_found
 725               /* If we already used more reliable loop exit predictors, do not
 726                  bother with PRED_LOOP_EXIT.  */
 727               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
 728               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
 729             {
 730               /* For loop with many exits we don't want to predict all exits
 731                  with the pretty large probability, because if all exits are
 732                  considered in row, the loop would be predicted to iterate
 733                  almost never.  The code to divide probability by number of
 734                  exits is very rough.  It should compute the number of exits
 735                  taken in each patch through function (not the overall number
 736                  of exits that might be a lot higher for loops with wide switch
 737                  statements in them) and compute n-th square root.
 738
 739                  We limit the minimal probability by 2% to avoid
 740                  EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
 741                  as this was causing regression in perl benchmark containing such
 742                  a wide loop.  */
 743
 744               int probability = ((REG_BR_PROB_BASE
 745                                   - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
 746                                  / n_exits);
 747               if (probability < HITRATE (2))
 748                 probability = HITRATE (2);
 749               FOR_EACH_EDGE (e, ei, bb->succs)
 750                 if (e->dest->index < NUM_FIXED_BLOCKS
 751                     || !flow_bb_inside_loop_p (loop, e->dest))
 752                   predict_edge (e, PRED_LOOP_EXIT, probability);
 753             }
 754         }
 755
 756       /* Free basic blocks from get_loop_body.  */
 757       free (bbs);
 758     }
 759
 760   scev_finalize ();
 761 }
 762
 763 /* Attempt to predict probabilities of BB outgoing edges using local
 764    properties.  */
 765 static void
 766 bb_estimate_probability_locally (basic_block bb)
 767 {
 768   rtx last_insn = BB_END (bb);
 769   rtx cond;
 770
 771   if (! can_predict_insn_p (last_insn))
 772     return;
 773   cond = get_condition (last_insn, NULL, false, false);
 774   if (! cond)
 775     return;
 776
 777   /* Try "pointer heuristic."
 778      A comparison ptr == 0 is predicted as false.
 779      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
 780   if (COMPARISON_P (cond)
 781       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
 782           || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
 783     {
 784       if (GET_CODE (cond) == EQ)
 785         predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
 786       else if (GET_CODE (cond) == NE)
 787         predict_insn_def (last_insn, PRED_POINTER, TAKEN);
 788     }
 789   else
 790
 791   /* Try "opcode heuristic."
 792      EQ tests are usually false and NE tests are usually true. Also,
 793      most quantities are positive, so we can make the appropriate guesses
 794      about signed comparisons against zero.  */
 795     switch (GET_CODE (cond))
 796       {
 797       case CONST_INT:
 798         /* Unconditional branch.  */
 799         predict_insn_def (last_insn, PRED_UNCONDITIONAL,
 800                           cond == const0_rtx ? NOT_TAKEN : TAKEN);
 801         break;
 802
 803       case EQ:
 804       case UNEQ:
 805         /* Floating point comparisons appears to behave in a very
 806            unpredictable way because of special role of = tests in
 807            FP code.  */
 808         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
 809           ;
 810         /* Comparisons with 0 are often used for booleans and there is
 811            nothing useful to predict about them.  */
 812         else if (XEXP (cond, 1) == const0_rtx
 813                  || XEXP (cond, 0) == const0_rtx)
 814           ;
 815         else
 816           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
 817         break;
 818
 819       case NE:
 820       case LTGT:
 821         /* Floating point comparisons appears to behave in a very
 822            unpredictable way because of special role of = tests in
 823            FP code.  */
 824         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
 825           ;
 826         /* Comparisons with 0 are often used for booleans and there is
 827            nothing useful to predict about them.  */
 828         else if (XEXP (cond, 1) == const0_rtx
 829                  || XEXP (cond, 0) == const0_rtx)
 830           ;
 831         else
 832           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
 833         break;
 834
 835       case ORDERED:
 836         predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
 837         break;
 838
 839       case UNORDERED:
 840         predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
 841         break;
 842
 843       case LE:
 844       case LT:
 845         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
 846             || XEXP (cond, 1) == constm1_rtx)
 847           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
 848         break;
 849
 850       case GE:
 851       case GT:
 852         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
 853             || XEXP (cond, 1) == constm1_rtx)
 854           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
 855         break;
 856
 857       default:
 858         break;
 859       }
 860 }
 861
 862 /* Set edge->probability for each successor edge of BB.  */
 863 void
 864 guess_outgoing_edge_probabilities (basic_block bb)
 865 {
 866   bb_estimate_probability_locally (bb);
 867   combine_predictions_for_insn (BB_END (bb), bb);
 868 }
 869 \f
 870 /* Return constant EXPR will likely have at execution time, NULL if unknown.
 871    The function is used by builtin_expect branch predictor so the evidence
 872    must come from this construct and additional possible constant folding.
 873
 874    We may want to implement more involved value guess (such as value range
 875    propagation based prediction), but such tricks shall go to new
 876    implementation.  */
 877
 878 static tree
 879 expr_expected_value (tree expr, bitmap visited)
 880 {
 881   if (TREE_CONSTANT (expr))
 882     return expr;
 883   else if (TREE_CODE (expr) == SSA_NAME)
 884     {
 885       tree def = SSA_NAME_DEF_STMT (expr);
 886
 887       /* If we were already here, break the infinite cycle.  */
 888       if (bitmap_bit_p (visited, SSA_NAME_VERSION (expr)))
 889         return NULL;
 890       bitmap_set_bit (visited, SSA_NAME_VERSION (expr));
 891
 892       if (TREE_CODE (def) == PHI_NODE)
 893         {
 894           /* All the arguments of the PHI node must have the same constant
 895              length.  */
 896           int i;
 897           tree val = NULL, new_val;
 898
 899           for (i = 0; i < PHI_NUM_ARGS (def); i++)
 900             {
 901               tree arg = PHI_ARG_DEF (def, i);
 902
 903               /* If this PHI has itself as an argument, we cannot
 904                  determine the string length of this argument.  However,
 905                  if we can find an expected constant value for the other
 906                  PHI args then we can still be sure that this is
 907                  likely a constant.  So be optimistic and just
 908                  continue with the next argument.  */
 909               if (arg == PHI_RESULT (def))
 910                 continue;
 911
 912               new_val = expr_expected_value (arg, visited);
 913               if (!new_val)
 914                 return NULL;
 915               if (!val)
 916                 val = new_val;
 917               else if (!operand_equal_p (val, new_val, false))
 918                 return NULL;
 919             }
 920           return val;
 921         }
 922       if (TREE_CODE (def) != GIMPLE_MODIFY_STMT
 923           || GIMPLE_STMT_OPERAND (def, 0) != expr)
 924         return NULL;
 925       return expr_expected_value (GIMPLE_STMT_OPERAND (def, 1), visited);
 926     }
 927   else if (TREE_CODE (expr) == CALL_EXPR)
 928     {
 929       tree decl = get_callee_fndecl (expr);
 930       if (!decl)
 931         return NULL;
 932       if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL
 933           && DECL_FUNCTION_CODE (decl) == BUILT_IN_EXPECT)
 934         {
 935           tree val;
 936
 937           if (call_expr_nargs (expr) != 2)
 938             return NULL;
 939           val = CALL_EXPR_ARG (expr, 0);
 940           if (TREE_CONSTANT (val))
 941             return val;
 942           return CALL_EXPR_ARG (expr, 1);
 943         }
 944     }
 945   if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
 946     {
 947       tree op0, op1, res;
 948       op0 = expr_expected_value (TREE_OPERAND (expr, 0), visited);
 949       if (!op0)
 950         return NULL;
 951       op1 = expr_expected_value (TREE_OPERAND (expr, 1), visited);
 952       if (!op1)
 953         return NULL;
 954       res = fold_build2 (TREE_CODE (expr), TREE_TYPE (expr), op0, op1);
 955       if (TREE_CONSTANT (res))
 956         return res;
 957       return NULL;
 958     }
 959   if (UNARY_CLASS_P (expr))
 960     {
 961       tree op0, res;
 962       op0 = expr_expected_value (TREE_OPERAND (expr, 0), visited);
 963       if (!op0)
 964         return NULL;
 965       res = fold_build1 (TREE_CODE (expr), TREE_TYPE (expr), op0);
 966       if (TREE_CONSTANT (res))
 967         return res;
 968       return NULL;
 969     }
 970   return NULL;
 971 }
 972 \f
 973 /* Get rid of all builtin_expect calls we no longer need.  */
 974 static void
 975 strip_builtin_expect (void)
 976 {
 977   basic_block bb;
 978   FOR_EACH_BB (bb)
 979     {
 980       block_stmt_iterator bi;
 981       for (bi = bsi_start (bb); !bsi_end_p (bi); bsi_next (&bi))
 982         {
 983           tree stmt = bsi_stmt (bi);
 984           tree fndecl;
 985           tree call;
 986
 987           if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
 988               && (call = GIMPLE_STMT_OPERAND (stmt, 1))
 989               && TREE_CODE (call) == CALL_EXPR
 990               && (fndecl = get_callee_fndecl (call))
 991               && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
 992               && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
 993               && call_expr_nargs (call) == 2)
 994             {
 995               GIMPLE_STMT_OPERAND (stmt, 1) = CALL_EXPR_ARG (call, 0);
 996               update_stmt (stmt);
 997             }
 998         }
 999     }
1000 }
1001 \f
1002 /* Predict using opcode of the last statement in basic block.  */
1003 static void
1004 tree_predict_by_opcode (basic_block bb)
1005 {
1006   tree stmt = last_stmt (bb);
1007   edge then_edge;
1008   tree cond;
1009   tree op0;
1010   tree type;
1011   tree val;
1012   bitmap visited;
1013   edge_iterator ei;
1014
1015   if (!stmt || TREE_CODE (stmt) != COND_EXPR)
1016     return;
1017   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1018     if (then_edge->flags & EDGE_TRUE_VALUE)
1019       break;
1020   cond = TREE_OPERAND (stmt, 0);
1021   if (!COMPARISON_CLASS_P (cond))
1022     return;
1023   op0 = TREE_OPERAND (cond, 0);
1024   type = TREE_TYPE (op0);
1025   visited = BITMAP_ALLOC (NULL);
1026   val = expr_expected_value (cond, visited);
1027   BITMAP_FREE (visited);
1028   if (val)
1029     {
1030       if (integer_zerop (val))
1031         predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, NOT_TAKEN);
1032       else
1033         predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, TAKEN);
1034       return;
1035     }
1036   /* Try "pointer heuristic."
1037      A comparison ptr == 0 is predicted as false.
1038      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1039   if (POINTER_TYPE_P (type))
1040     {
1041       if (TREE_CODE (cond) == EQ_EXPR)
1042         predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
1043       else if (TREE_CODE (cond) == NE_EXPR)
1044         predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
1045     }
1046   else
1047
1048   /* Try "opcode heuristic."
1049      EQ tests are usually false and NE tests are usually true. Also,
1050      most quantities are positive, so we can make the appropriate guesses
1051      about signed comparisons against zero.  */
1052     switch (TREE_CODE (cond))
1053       {
1054       case EQ_EXPR:
1055       case UNEQ_EXPR:
1056         /* Floating point comparisons appears to behave in a very
1057            unpredictable way because of special role of = tests in
1058            FP code.  */
1059         if (FLOAT_TYPE_P (type))
1060           ;
1061         /* Comparisons with 0 are often used for booleans and there is
1062            nothing useful to predict about them.  */
1063         else if (integer_zerop (op0)
1064                  || integer_zerop (TREE_OPERAND (cond, 1)))
1065           ;
1066         else
1067           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
1068         break;
1069
1070       case NE_EXPR:
1071       case LTGT_EXPR:
1072         /* Floating point comparisons appears to behave in a very
1073            unpredictable way because of special role of = tests in
1074            FP code.  */
1075         if (FLOAT_TYPE_P (type))
1076           ;
1077         /* Comparisons with 0 are often used for booleans and there is
1078            nothing useful to predict about them.  */
1079         else if (integer_zerop (op0)
1080                  || integer_zerop (TREE_OPERAND (cond, 1)))
1081           ;
1082         else
1083           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
1084         break;
1085
1086       case ORDERED_EXPR:
1087         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
1088         break;
1089
1090       case UNORDERED_EXPR:
1091         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
1092         break;
1093
1094       case LE_EXPR:
1095       case LT_EXPR:
1096         if (integer_zerop (TREE_OPERAND (cond, 1))
1097             || integer_onep (TREE_OPERAND (cond, 1))
1098             || integer_all_onesp (TREE_OPERAND (cond, 1))
1099             || real_zerop (TREE_OPERAND (cond, 1))
1100             || real_onep (TREE_OPERAND (cond, 1))
1101             || real_minus_onep (TREE_OPERAND (cond, 1)))
1102           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
1103         break;
1104
1105       case GE_EXPR:
1106       case GT_EXPR:
1107         if (integer_zerop (TREE_OPERAND (cond, 1))
1108             || integer_onep (TREE_OPERAND (cond, 1))
1109             || integer_all_onesp (TREE_OPERAND (cond, 1))
1110             || real_zerop (TREE_OPERAND (cond, 1))
1111             || real_onep (TREE_OPERAND (cond, 1))
1112             || real_minus_onep (TREE_OPERAND (cond, 1)))
1113           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
1114         break;
1115
1116       default:
1117         break;
1118       }
1119 }
1120
1121 /* Try to guess whether the value of return means error code.  */
1122 static enum br_predictor
1123 return_prediction (tree val, enum prediction *prediction)
1124 {
1125   /* VOID.  */
1126   if (!val)
1127     return PRED_NO_PREDICTION;
1128   /* Different heuristics for pointers and scalars.  */
1129   if (POINTER_TYPE_P (TREE_TYPE (val)))
1130     {
1131       /* NULL is usually not returned.  */
1132       if (integer_zerop (val))
1133         {
1134           *prediction = NOT_TAKEN;
1135           return PRED_NULL_RETURN;
1136         }
1137     }
1138   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
1139     {
1140       /* Negative return values are often used to indicate
1141          errors.  */
1142       if (TREE_CODE (val) == INTEGER_CST
1143           && tree_int_cst_sgn (val) < 0)
1144         {
1145           *prediction = NOT_TAKEN;
1146           return PRED_NEGATIVE_RETURN;
1147         }
1148       /* Constant return values seems to be commonly taken.
1149          Zero/one often represent booleans so exclude them from the
1150          heuristics.  */
1151       if (TREE_CONSTANT (val)
1152           && (!integer_zerop (val) && !integer_onep (val)))
1153         {
1154           *prediction = TAKEN;
1155           return PRED_CONST_RETURN;
1156         }
1157     }
1158   return PRED_NO_PREDICTION;
1159 }
1160
1161 /* Find the basic block with return expression and look up for possible
1162    return value trying to apply RETURN_PREDICTION heuristics.  */
1163 static void
1164 apply_return_prediction (int *heads)
1165 {
1166   tree return_stmt = NULL;
1167   tree return_val;
1168   edge e;
1169   tree phi;
1170   int phi_num_args, i;
1171   enum br_predictor pred;
1172   enum prediction direction;
1173   edge_iterator ei;
1174
1175   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
1176     {
1177       return_stmt = last_stmt (e->src);
1178       if (TREE_CODE (return_stmt) == RETURN_EXPR)
1179         break;
1180     }
1181   if (!e)
1182     return;
1183   return_val = TREE_OPERAND (return_stmt, 0);
1184   if (!return_val)
1185     return;
1186   if (TREE_CODE (return_val) == GIMPLE_MODIFY_STMT)
1187     return_val = GIMPLE_STMT_OPERAND (return_val, 1);
1188   if (TREE_CODE (return_val) != SSA_NAME
1189       || !SSA_NAME_DEF_STMT (return_val)
1190       || TREE_CODE (SSA_NAME_DEF_STMT (return_val)) != PHI_NODE)
1191     return;
1192   for (phi = SSA_NAME_DEF_STMT (return_val); phi; phi = PHI_CHAIN (phi))
1193     if (PHI_RESULT (phi) == return_val)
1194       break;
1195   if (!phi)
1196     return;
1197   phi_num_args = PHI_NUM_ARGS (phi);
1198   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
1199
1200   /* Avoid the degenerate case where all return values form the function
1201      belongs to same category (ie they are all positive constants)
1202      so we can hardly say something about them.  */
1203   for (i = 1; i < phi_num_args; i++)
1204     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
1205       break;
1206   if (i != phi_num_args)
1207     for (i = 0; i < phi_num_args; i++)
1208       {
1209         pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
1210         if (pred != PRED_NO_PREDICTION)
1211           predict_paths_leading_to (PHI_ARG_EDGE (phi, i)->src, heads, pred,
1212                                     direction);
1213       }
1214 }
1215
1216 /* Look for basic block that contains unlikely to happen events
1217    (such as noreturn calls) and mark all paths leading to execution
1218    of this basic blocks as unlikely.  */
1219
1220 static void
1221 tree_bb_level_predictions (void)
1222 {
1223   basic_block bb;
1224   int *heads;
1225
1226   heads = XCNEWVEC (int, last_basic_block);
1227   heads[ENTRY_BLOCK_PTR->next_bb->index] = last_basic_block;
1228
1229   apply_return_prediction (heads);
1230
1231   FOR_EACH_BB (bb)
1232     {
1233       block_stmt_iterator bsi = bsi_last (bb);
1234
1235       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1236         {
1237           tree stmt = bsi_stmt (bsi);
1238           switch (TREE_CODE (stmt))
1239             {
1240               case GIMPLE_MODIFY_STMT:
1241                 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1)) == CALL_EXPR)
1242                   {
1243                     stmt = GIMPLE_STMT_OPERAND (stmt, 1);
1244                     goto call_expr;
1245                   }
1246                 break;
1247               case CALL_EXPR:
1248 call_expr:;
1249                 if (call_expr_flags (stmt) & ECF_NORETURN)
1250                   predict_paths_leading_to (bb, heads, PRED_NORETURN,
1251                                             NOT_TAKEN);
1252                 break;
1253               default:
1254                 break;
1255             }
1256         }
1257     }
1258
1259   free (heads);
1260 }
1261
1262 /* Predict branch probabilities and estimate profile of the tree CFG.  */
1263 static unsigned int
1264 tree_estimate_probability (void)
1265 {
1266   basic_block bb;
1267
1268   loop_optimizer_init (0);
1269   if (current_loops && dump_file && (dump_flags & TDF_DETAILS))
1270     flow_loops_dump (dump_file, NULL, 0);
1271
1272   add_noreturn_fake_exit_edges ();
1273   connect_infinite_loops_to_exit ();
1274   calculate_dominance_info (CDI_DOMINATORS);
1275   calculate_dominance_info (CDI_POST_DOMINATORS);
1276
1277   tree_bb_level_predictions ();
1278
1279   mark_irreducible_loops ();
1280   record_loop_exits ();
1281   if (current_loops)
1282     predict_loops ();
1283
1284   FOR_EACH_BB (bb)
1285     {
1286       edge e;
1287       edge_iterator ei;
1288
1289       FOR_EACH_EDGE (e, ei, bb->succs)
1290         {
1291           /* Predict early returns to be probable, as we've already taken
1292              care for error returns and other cases are often used for
1293              fast paths through function.  */
1294           if (e->dest == EXIT_BLOCK_PTR
1295               && TREE_CODE (last_stmt (bb)) == RETURN_EXPR
1296               && !single_pred_p (bb))
1297             {
1298               edge e1;
1299               edge_iterator ei1;
1300
1301               FOR_EACH_EDGE (e1, ei1, bb->preds)
1302                 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
1303                     && !predicted_by_p (e1->src, PRED_CONST_RETURN)
1304                     && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN)
1305                     && !last_basic_block_p (e1->src))
1306                   predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
1307             }
1308
1309           /* Look for block we are guarding (ie we dominate it,
1310              but it doesn't postdominate us).  */
1311           if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
1312               && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
1313               && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
1314             {
1315               block_stmt_iterator bi;
1316
1317               /* The call heuristic claims that a guarded function call
1318                  is improbable.  This is because such calls are often used
1319                  to signal exceptional situations such as printing error
1320                  messages.  */
1321               for (bi = bsi_start (e->dest); !bsi_end_p (bi);
1322                    bsi_next (&bi))
1323                 {
1324                   tree stmt = bsi_stmt (bi);
1325                   if ((TREE_CODE (stmt) == CALL_EXPR
1326                        || (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
1327                            && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1))
1328                               == CALL_EXPR))
1329                       /* Constant and pure calls are hardly used to signalize
1330                          something exceptional.  */
1331                       && TREE_SIDE_EFFECTS (stmt))
1332                     {
1333                       predict_edge_def (e, PRED_CALL, NOT_TAKEN);
1334                       break;
1335                     }
1336                 }
1337             }
1338         }
1339       tree_predict_by_opcode (bb);
1340     }
1341   FOR_EACH_BB (bb)
1342     combine_predictions_for_bb (bb);
1343
1344   strip_builtin_expect ();
1345   estimate_bb_frequencies ();
1346   free_dominance_info (CDI_POST_DOMINATORS);
1347   remove_fake_exit_edges ();
1348   loop_optimizer_finalize ();
1349   if (dump_file && (dump_flags & TDF_DETAILS))
1350     dump_tree_cfg (dump_file, dump_flags);
1351   if (profile_status == PROFILE_ABSENT)
1352     profile_status = PROFILE_GUESSED;
1353   return 0;
1354 }
1355 \f
1356 /* Check whether this is the last basic block of function.  Commonly
1357    there is one extra common cleanup block.  */
1358 static bool
1359 last_basic_block_p (basic_block bb)
1360 {
1361   if (bb == EXIT_BLOCK_PTR)
1362     return false;
1363
1364   return (bb->next_bb == EXIT_BLOCK_PTR
1365           || (bb->next_bb->next_bb == EXIT_BLOCK_PTR
1366               && single_succ_p (bb)
1367               && single_succ (bb)->next_bb == EXIT_BLOCK_PTR));
1368 }
1369
1370 /* Sets branch probabilities according to PREDiction and
1371    FLAGS. HEADS[bb->index] should be index of basic block in that we
1372    need to alter branch predictions (i.e. the first of our dominators
1373    such that we do not post-dominate it) (but we fill this information
1374    on demand, so -1 may be there in case this was not needed yet).  */
1375
1376 static void
1377 predict_paths_leading_to (basic_block bb, int *heads, enum br_predictor pred,
1378                           enum prediction taken)
1379 {
1380   edge e;
1381   edge_iterator ei;
1382   int y;
1383
1384   if (heads[bb->index] == ENTRY_BLOCK)
1385     {
1386       /* This is first time we need this field in heads array; so
1387          find first dominator that we do not post-dominate (we are
1388          using already known members of heads array).  */
1389       basic_block ai = bb;
1390       basic_block next_ai = get_immediate_dominator (CDI_DOMINATORS, bb);
1391       int head;
1392
1393       while (heads[next_ai->index] == ENTRY_BLOCK)
1394         {
1395           if (!dominated_by_p (CDI_POST_DOMINATORS, next_ai, bb))
1396             break;
1397           heads[next_ai->index] = ai->index;
1398           ai = next_ai;
1399           next_ai = get_immediate_dominator (CDI_DOMINATORS, next_ai);
1400         }
1401       if (!dominated_by_p (CDI_POST_DOMINATORS, next_ai, bb))
1402         head = next_ai->index;
1403       else
1404         head = heads[next_ai->index];
1405       while (next_ai != bb)
1406         {
1407           next_ai = ai;
1408           ai = BASIC_BLOCK (heads[ai->index]);
1409           heads[next_ai->index] = head;
1410         }
1411     }
1412   y = heads[bb->index];
1413
1414   /* Now find the edge that leads to our branch and aply the prediction.  */
1415
1416   if (y == last_basic_block)
1417     return;
1418   FOR_EACH_EDGE (e, ei, BASIC_BLOCK (y)->succs)
1419     if (e->dest->index >= NUM_FIXED_BLOCKS
1420         && dominated_by_p (CDI_POST_DOMINATORS, e->dest, bb))
1421       predict_edge_def (e, pred, taken);
1422 }
1423 \f
1424 /* This is used to carry information about basic blocks.  It is
1425    attached to the AUX field of the standard CFG block.  */
1426
1427 typedef struct block_info_def
1428 {
1429   /* Estimated frequency of execution of basic_block.  */
1430   sreal frequency;
1431
1432   /* To keep queue of basic blocks to process.  */
1433   basic_block next;
1434
1435   /* Number of predecessors we need to visit first.  */
1436   int npredecessors;
1437 } *block_info;
1438
1439 /* Similar information for edges.  */
1440 typedef struct edge_info_def
1441 {
1442   /* In case edge is a loopback edge, the probability edge will be reached
1443      in case header is.  Estimated number of iterations of the loop can be
1444      then computed as 1 / (1 - back_edge_prob).  */
1445   sreal back_edge_prob;
1446   /* True if the edge is a loopback edge in the natural loop.  */
1447   unsigned int back_edge:1;
1448 } *edge_info;
1449
1450 #define BLOCK_INFO(B)   ((block_info) (B)->aux)
1451 #define EDGE_INFO(E)    ((edge_info) (E)->aux)
1452
1453 /* Helper function for estimate_bb_frequencies.
1454    Propagate the frequencies in blocks marked in
1455    TOVISIT, starting in HEAD.  */
1456
1457 static void
1458 propagate_freq (basic_block head, bitmap tovisit)
1459 {
1460   basic_block bb;
1461   basic_block last;
1462   unsigned i;
1463   edge e;
1464   basic_block nextbb;
1465   bitmap_iterator bi;
1466
1467   /* For each basic block we need to visit count number of his predecessors
1468      we need to visit first.  */
1469   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
1470     {
1471       edge_iterator ei;
1472       int count = 0;
1473
1474        /* The outermost "loop" includes the exit block, which we can not
1475           look up via BASIC_BLOCK.  Detect this and use EXIT_BLOCK_PTR
1476           directly.  Do the same for the entry block.  */
1477       bb = BASIC_BLOCK (i);
1478
1479       FOR_EACH_EDGE (e, ei, bb->preds)
1480         {
1481           bool visit = bitmap_bit_p (tovisit, e->src->index);
1482
1483           if (visit && !(e->flags & EDGE_DFS_BACK))
1484             count++;
1485           else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
1486             fprintf (dump_file,
1487                      "Irreducible region hit, ignoring edge to %i->%i\n",
1488                      e->src->index, bb->index);
1489         }
1490       BLOCK_INFO (bb)->npredecessors = count;
1491     }
1492
1493   memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
1494   last = head;
1495   for (bb = head; bb; bb = nextbb)
1496     {
1497       edge_iterator ei;
1498       sreal cyclic_probability, frequency;
1499
1500       memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
1501       memcpy (&frequency, &real_zero, sizeof (real_zero));
1502
1503       nextbb = BLOCK_INFO (bb)->next;
1504       BLOCK_INFO (bb)->next = NULL;
1505
1506       /* Compute frequency of basic block.  */
1507       if (bb != head)
1508         {
1509 #ifdef ENABLE_CHECKING
1510           FOR_EACH_EDGE (e, ei, bb->preds)
1511             gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
1512                         || (e->flags & EDGE_DFS_BACK));
1513 #endif
1514
1515           FOR_EACH_EDGE (e, ei, bb->preds)
1516             if (EDGE_INFO (e)->back_edge)
1517               {
1518                 sreal_add (&cyclic_probability, &cyclic_probability,
1519                            &EDGE_INFO (e)->back_edge_prob);
1520               }
1521             else if (!(e->flags & EDGE_DFS_BACK))
1522               {
1523                 sreal tmp;
1524
1525                 /*  frequency += (e->probability
1526                                   * BLOCK_INFO (e->src)->frequency /
1527                                   REG_BR_PROB_BASE);  */
1528
1529                 sreal_init (&tmp, e->probability, 0);
1530                 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
1531                 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
1532                 sreal_add (&frequency, &frequency, &tmp);
1533               }
1534
1535           if (sreal_compare (&cyclic_probability, &real_zero) == 0)
1536             {
1537               memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
1538                       sizeof (frequency));
1539             }
1540           else
1541             {
1542               if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
1543                 {
1544                   memcpy (&cyclic_probability, &real_almost_one,
1545                           sizeof (real_almost_one));
1546                 }
1547
1548               /* BLOCK_INFO (bb)->frequency = frequency
1549                                               / (1 - cyclic_probability) */
1550
1551               sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
1552               sreal_div (&BLOCK_INFO (bb)->frequency,
1553                          &frequency, &cyclic_probability);
1554             }
1555         }
1556
1557       bitmap_clear_bit (tovisit, bb->index);
1558
1559       e = find_edge (bb, head);
1560       if (e)
1561         {
1562           sreal tmp;
1563
1564           /* EDGE_INFO (e)->back_edge_prob
1565              = ((e->probability * BLOCK_INFO (bb)->frequency)
1566              / REG_BR_PROB_BASE); */
1567
1568           sreal_init (&tmp, e->probability, 0);
1569           sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
1570           sreal_mul (&EDGE_INFO (e)->back_edge_prob,
1571                      &tmp, &real_inv_br_prob_base);
1572         }
1573
1574       /* Propagate to successor blocks.  */
1575       FOR_EACH_EDGE (e, ei, bb->succs)
1576         if (!(e->flags & EDGE_DFS_BACK)
1577             && BLOCK_INFO (e->dest)->npredecessors)
1578           {
1579             BLOCK_INFO (e->dest)->npredecessors--;
1580             if (!BLOCK_INFO (e->dest)->npredecessors)
1581               {
1582                 if (!nextbb)
1583                   nextbb = e->dest;
1584                 else
1585                   BLOCK_INFO (last)->next = e->dest;
1586
1587                 last = e->dest;
1588               }
1589           }
1590     }
1591 }
1592
1593 /* Estimate probabilities of loopback edges in loops at same nest level.  */
1594
1595 static void
1596 estimate_loops_at_level (struct loop *first_loop)
1597 {
1598   struct loop *loop;
1599
1600   for (loop = first_loop; loop; loop = loop->next)
1601     {
1602       edge e;
1603       basic_block *bbs;
1604       unsigned i;
1605       bitmap tovisit = BITMAP_ALLOC (NULL);
1606
1607       estimate_loops_at_level (loop->inner);
1608
1609       /* Find current loop back edge and mark it.  */
1610       e = loop_latch_edge (loop);
1611       EDGE_INFO (e)->back_edge = 1;
1612
1613       bbs = get_loop_body (loop);
1614       for (i = 0; i < loop->num_nodes; i++)
1615         bitmap_set_bit (tovisit, bbs[i]->index);
1616       free (bbs);
1617       propagate_freq (loop->header, tovisit);
1618       BITMAP_FREE (tovisit);
1619     }
1620 }
1621
1622 /* Propagates frequencies through structure of loops.  */
1623
1624 static void
1625 estimate_loops (void)
1626 {
1627   bitmap tovisit = BITMAP_ALLOC (NULL);
1628   basic_block bb;
1629
1630   /* Start by estimating the frequencies in the loops.  */
1631   if (current_loops)
1632     estimate_loops_at_level (current_loops->tree_root->inner);
1633
1634   /* Now propagate the frequencies through all the blocks.  */
1635   FOR_ALL_BB (bb)
1636     {
1637       bitmap_set_bit (tovisit, bb->index);
1638     }
1639   propagate_freq (ENTRY_BLOCK_PTR, tovisit);
1640   BITMAP_FREE (tovisit);
1641 }
1642
1643 /* Convert counts measured by profile driven feedback to frequencies.
1644    Return nonzero iff there was any nonzero execution count.  */
1645
1646 int
1647 counts_to_freqs (void)
1648 {
1649   gcov_type count_max, true_count_max = 0;
1650   basic_block bb;
1651
1652   FOR_EACH_BB (bb)
1653     true_count_max = MAX (bb->count, true_count_max);
1654
1655   count_max = MAX (true_count_max, 1);
1656   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1657     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
1658
1659   return true_count_max;
1660 }
1661
1662 /* Return true if function is likely to be expensive, so there is no point to
1663    optimize performance of prologue, epilogue or do inlining at the expense
1664    of code size growth.  THRESHOLD is the limit of number of instructions
1665    function can execute at average to be still considered not expensive.  */
1666
1667 bool
1668 expensive_function_p (int threshold)
1669 {
1670   unsigned int sum = 0;
1671   basic_block bb;
1672   unsigned int limit;
1673
1674   /* We can not compute accurately for large thresholds due to scaled
1675      frequencies.  */
1676   gcc_assert (threshold <= BB_FREQ_MAX);
1677
1678   /* Frequencies are out of range.  This either means that function contains
1679      internal loop executing more than BB_FREQ_MAX times or profile feedback
1680      is available and function has not been executed at all.  */
1681   if (ENTRY_BLOCK_PTR->frequency == 0)
1682     return true;
1683
1684   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
1685   limit = ENTRY_BLOCK_PTR->frequency * threshold;
1686   FOR_EACH_BB (bb)
1687     {
1688       rtx insn;
1689
1690       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
1691            insn = NEXT_INSN (insn))
1692         if (active_insn_p (insn))
1693           {
1694             sum += bb->frequency;
1695             if (sum > limit)
1696               return true;
1697         }
1698     }
1699
1700   return false;
1701 }
1702
1703 /* Estimate basic blocks frequency by given branch probabilities.  */
1704
1705 void
1706 estimate_bb_frequencies (void)
1707 {
1708   basic_block bb;
1709   sreal freq_max;
1710
1711   if (!flag_branch_probabilities || !counts_to_freqs ())
1712     {
1713       static int real_values_initialized = 0;
1714
1715       if (!real_values_initialized)
1716         {
1717           real_values_initialized = 1;
1718           sreal_init (&real_zero, 0, 0);
1719           sreal_init (&real_one, 1, 0);
1720           sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
1721           sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
1722           sreal_init (&real_one_half, 1, -1);
1723           sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
1724           sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
1725         }
1726
1727       mark_dfs_back_edges ();
1728
1729       single_succ_edge (ENTRY_BLOCK_PTR)->probability = REG_BR_PROB_BASE;
1730
1731       /* Set up block info for each basic block.  */
1732       alloc_aux_for_blocks (sizeof (struct block_info_def));
1733       alloc_aux_for_edges (sizeof (struct edge_info_def));
1734       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1735         {
1736           edge e;
1737           edge_iterator ei;
1738
1739           FOR_EACH_EDGE (e, ei, bb->succs)
1740             {
1741               sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
1742               sreal_mul (&EDGE_INFO (e)->back_edge_prob,
1743                          &EDGE_INFO (e)->back_edge_prob,
1744                          &real_inv_br_prob_base);
1745             }
1746         }
1747
1748       /* First compute probabilities locally for each loop from innermost
1749          to outermost to examine probabilities for back edges.  */
1750       estimate_loops ();
1751
1752       memcpy (&freq_max, &real_zero, sizeof (real_zero));
1753       FOR_EACH_BB (bb)
1754         if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
1755           memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
1756
1757       sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
1758       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1759         {
1760           sreal tmp;
1761
1762           sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
1763           sreal_add (&tmp, &tmp, &real_one_half);
1764           bb->frequency = sreal_to_int (&tmp);
1765         }
1766
1767       free_aux_for_blocks ();
1768       free_aux_for_edges ();
1769     }
1770   compute_function_frequency ();
1771   if (flag_reorder_functions)
1772     choose_function_section ();
1773 }
1774
1775 /* Decide whether function is hot, cold or unlikely executed.  */
1776 static void
1777 compute_function_frequency (void)
1778 {
1779   basic_block bb;
1780
1781   if (!profile_info || !flag_branch_probabilities)
1782     return;
1783   cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED;
1784   FOR_EACH_BB (bb)
1785     {
1786       if (maybe_hot_bb_p (bb))
1787         {
1788           cfun->function_frequency = FUNCTION_FREQUENCY_HOT;
1789           return;
1790         }
1791       if (!probably_never_executed_bb_p (bb))
1792         cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
1793     }
1794 }
1795
1796 /* Choose appropriate section for the function.  */
1797 static void
1798 choose_function_section (void)
1799 {
1800   if (DECL_SECTION_NAME (current_function_decl)
1801       || !targetm.have_named_sections
1802       /* Theoretically we can split the gnu.linkonce text section too,
1803          but this requires more work as the frequency needs to match
1804          for all generated objects so we need to merge the frequency
1805          of all instances.  For now just never set frequency for these.  */
1806       || DECL_ONE_ONLY (current_function_decl))
1807     return;
1808
1809   /* If we are doing the partitioning optimization, let the optimization
1810      choose the correct section into which to put things.  */
1811
1812   if (flag_reorder_blocks_and_partition)
1813     return;
1814
1815   if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT)
1816     DECL_SECTION_NAME (current_function_decl) =
1817       build_string (strlen (HOT_TEXT_SECTION_NAME), HOT_TEXT_SECTION_NAME);
1818   if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
1819     DECL_SECTION_NAME (current_function_decl) =
1820       build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME),
1821                     UNLIKELY_EXECUTED_TEXT_SECTION_NAME);
1822 }
1823
1824 static bool
1825 gate_estimate_probability (void)
1826 {
1827   return flag_guess_branch_prob;
1828 }
1829
1830 struct tree_opt_pass pass_profile =
1831 {
1832   "profile",                            /* name */
1833   gate_estimate_probability,            /* gate */
1834   tree_estimate_probability,            /* execute */
1835   NULL,                                 /* sub */
1836   NULL,                                 /* next */
1837   0,                                    /* static_pass_number */
1838   TV_BRANCH_PROB,                       /* tv_id */
1839   PROP_cfg,                             /* properties_required */
1840   0,                                    /* properties_provided */
1841   0,                                    /* properties_destroyed */
1842   0,                                    /* todo_flags_start */
1843   TODO_ggc_collect | TODO_verify_ssa,                   /* todo_flags_finish */
1844   0                                     /* letter */
1845 };