gcc/predict.c

   1 /* Branch prediction routines for the GNU compiler.
   2    Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  20 02110-1301, USA.  */
  21
  22 /* References:
  23
  24    [1] "Branch Prediction for Free"
  25        Ball and Larus; PLDI '93.
  26    [2] "Static Branch Frequency and Program Profile Analysis"
  27        Wu and Larus; MICRO-27.
  28    [3] "Corpus-based Static Branch Prediction"
  29        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
  30
  31
  32 #include "config.h"
  33 #include "system.h"
  34 #include "coretypes.h"
  35 #include "tm.h"
  36 #include "tree.h"
  37 #include "rtl.h"
  38 #include "tm_p.h"
  39 #include "hard-reg-set.h"
  40 #include "basic-block.h"
  41 #include "insn-config.h"
  42 #include "regs.h"
  43 #include "flags.h"
  44 #include "output.h"
  45 #include "function.h"
  46 #include "except.h"
  47 #include "toplev.h"
  48 #include "recog.h"
  49 #include "expr.h"
  50 #include "predict.h"
  51 #include "coverage.h"
  52 #include "sreal.h"
  53 #include "params.h"
  54 #include "target.h"
  55 #include "cfgloop.h"
  56 #include "tree-flow.h"
  57 #include "ggc.h"
  58 #include "tree-dump.h"
  59 #include "tree-pass.h"
  60 #include "timevar.h"
  61 #include "tree-scalar-evolution.h"
  62 #include "cfgloop.h"
  63
  64 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
  65                    1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
  66 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
  67              real_inv_br_prob_base, real_one_half, real_bb_freq_max;
  68
  69 /* Random guesstimation given names.  */
  70 #define PROB_VERY_UNLIKELY      (REG_BR_PROB_BASE / 100 - 1)
  71 #define PROB_EVEN               (REG_BR_PROB_BASE / 2)
  72 #define PROB_VERY_LIKELY        (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
  73 #define PROB_ALWAYS             (REG_BR_PROB_BASE)
  74
  75 static void combine_predictions_for_insn (rtx, basic_block);
  76 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
  77 static void estimate_bb_frequencies (struct loops *);
  78 static void predict_paths_leading_to (basic_block, int *, enum br_predictor, enum prediction);
  79 static bool last_basic_block_p (basic_block);
  80 static void compute_function_frequency (void);
  81 static void choose_function_section (void);
  82 static bool can_predict_insn_p (rtx);
  83
  84 /* Information we hold about each branch predictor.
  85    Filled using information from predict.def.  */
  86
  87 struct predictor_info
  88 {
  89   const char *const name;       /* Name used in the debugging dumps.  */
  90   const int hitrate;            /* Expected hitrate used by
  91                                    predict_insn_def call.  */
  92   const int flags;
  93 };
  94
  95 /* Use given predictor without Dempster-Shaffer theory if it matches
  96    using first_match heuristics.  */
  97 #define PRED_FLAG_FIRST_MATCH 1
  98
  99 /* Recompute hitrate in percent to our representation.  */
 100
 101 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
 102
 103 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
 104 static const struct predictor_info predictor_info[]= {
 105 #include "predict.def"
 106
 107   /* Upper bound on predictors.  */
 108   {NULL, 0, 0}
 109 };
 110 #undef DEF_PREDICTOR
 111
 112 /* Return true in case BB can be CPU intensive and should be optimized
 113    for maximal performance.  */
 114
 115 bool
 116 maybe_hot_bb_p (basic_block bb)
 117 {
 118   if (profile_info && flag_branch_probabilities
 119       && (bb->count
 120           < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
 121     return false;
 122   if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
 123     return false;
 124   return true;
 125 }
 126
 127 /* Return true in case BB is cold and should be optimized for size.  */
 128
 129 bool
 130 probably_cold_bb_p (basic_block bb)
 131 {
 132   if (profile_info && flag_branch_probabilities
 133       && (bb->count
 134           < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
 135     return true;
 136   if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
 137     return true;
 138   return false;
 139 }
 140
 141 /* Return true in case BB is probably never executed.  */
 142 bool
 143 probably_never_executed_bb_p (basic_block bb)
 144 {
 145   if (profile_info && flag_branch_probabilities)
 146     return ((bb->count + profile_info->runs / 2) / profile_info->runs) == 0;
 147   return false;
 148 }
 149
 150 /* Return true if the one of outgoing edges is already predicted by
 151    PREDICTOR.  */
 152
 153 bool
 154 rtl_predicted_by_p (basic_block bb, enum br_predictor predictor)
 155 {
 156   rtx note;
 157   if (!INSN_P (BB_END (bb)))
 158     return false;
 159   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
 160     if (REG_NOTE_KIND (note) == REG_BR_PRED
 161         && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
 162       return true;
 163   return false;
 164 }
 165
 166 /* Return true if the one of outgoing edges is already predicted by
 167    PREDICTOR.  */
 168
 169 bool
 170 tree_predicted_by_p (basic_block bb, enum br_predictor predictor)
 171 {
 172   struct edge_prediction *i;
 173   for (i = bb->predictions; i; i = i->ep_next)
 174     if (i->ep_predictor == predictor)
 175       return true;
 176   return false;
 177 }
 178
 179 /* Return true when the probability of edge is reliable.
 180
 181    The profile guessing code is good at predicting branch outcome (ie.
 182    taken/not taken), that is predicted right slightly over 75% of time.
 183    It is however notoriously poor on predicting the probability itself.
 184    In general the profile appear a lot flatter (with probabilities closer
 185    to 50%) than the reality so it is bad idea to use it to drive optimization
 186    such as those disabling dynamic branch prediction for well predictable
 187    branches.
 188
 189    There are two exceptions - edges leading to noreturn edges and edges
 190    predicted by number of iterations heuristics are predicted well.  This macro
 191    should be able to distinguish those, but at the moment it simply check for
 192    noreturn heuristic that is only one giving probability over 99% or bellow
 193    1%.  In future we might want to propagate reliability information across the
 194    CFG if we find this information useful on multiple places.   */
 195 static bool
 196 probability_reliable_p (int prob)
 197 {
 198   return (profile_status == PROFILE_READ
 199           || (profile_status == PROFILE_GUESSED
 200               && (prob <= HITRATE (1) || prob >= HITRATE (99))));
 201 }
 202
 203 /* Same predicate as above, working on edges.  */
 204 bool
 205 edge_probability_reliable_p (edge e)
 206 {
 207   return probability_reliable_p (e->probability);
 208 }
 209
 210 /* Same predicate as edge_probability_reliable_p, working on notes.  */
 211 bool
 212 br_prob_note_reliable_p (rtx note)
 213 {
 214   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
 215   return probability_reliable_p (INTVAL (XEXP (note, 0)));
 216 }
 217
 218 static void
 219 predict_insn (rtx insn, enum br_predictor predictor, int probability)
 220 {
 221   gcc_assert (any_condjump_p (insn));
 222   if (!flag_guess_branch_prob)
 223     return;
 224
 225   REG_NOTES (insn)
 226     = gen_rtx_EXPR_LIST (REG_BR_PRED,
 227                          gen_rtx_CONCAT (VOIDmode,
 228                                          GEN_INT ((int) predictor),
 229                                          GEN_INT ((int) probability)),
 230                          REG_NOTES (insn));
 231 }
 232
 233 /* Predict insn by given predictor.  */
 234
 235 void
 236 predict_insn_def (rtx insn, enum br_predictor predictor,
 237                   enum prediction taken)
 238 {
 239    int probability = predictor_info[(int) predictor].hitrate;
 240
 241    if (taken != TAKEN)
 242      probability = REG_BR_PROB_BASE - probability;
 243
 244    predict_insn (insn, predictor, probability);
 245 }
 246
 247 /* Predict edge E with given probability if possible.  */
 248
 249 void
 250 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
 251 {
 252   rtx last_insn;
 253   last_insn = BB_END (e->src);
 254
 255   /* We can store the branch prediction information only about
 256      conditional jumps.  */
 257   if (!any_condjump_p (last_insn))
 258     return;
 259
 260   /* We always store probability of branching.  */
 261   if (e->flags & EDGE_FALLTHRU)
 262     probability = REG_BR_PROB_BASE - probability;
 263
 264   predict_insn (last_insn, predictor, probability);
 265 }
 266
 267 /* Predict edge E with the given PROBABILITY.  */
 268 void
 269 tree_predict_edge (edge e, enum br_predictor predictor, int probability)
 270 {
 271   gcc_assert (profile_status != PROFILE_GUESSED);
 272   if ((e->src != ENTRY_BLOCK_PTR && EDGE_COUNT (e->src->succs) > 1)
 273       && flag_guess_branch_prob && optimize)
 274     {
 275       struct edge_prediction *i = ggc_alloc (sizeof (struct edge_prediction));
 276
 277       i->ep_next = e->src->predictions;
 278       e->src->predictions = i;
 279       i->ep_probability = probability;
 280       i->ep_predictor = predictor;
 281       i->ep_edge = e;
 282     }
 283 }
 284
 285 /* Remove all predictions on given basic block that are attached
 286    to edge E.  */
 287 void
 288 remove_predictions_associated_with_edge (edge e)
 289 {
 290   if (e->src->predictions)
 291     {
 292       struct edge_prediction **prediction = &e->src->predictions;
 293       while (*prediction)
 294         {
 295           if ((*prediction)->ep_edge == e)
 296             *prediction = (*prediction)->ep_next;
 297           else
 298             prediction = &((*prediction)->ep_next);
 299         }
 300     }
 301 }
 302
 303 /* Return true when we can store prediction on insn INSN.
 304    At the moment we represent predictions only on conditional
 305    jumps, not at computed jump or other complicated cases.  */
 306 static bool
 307 can_predict_insn_p (rtx insn)
 308 {
 309   return (JUMP_P (insn)
 310           && any_condjump_p (insn)
 311           && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
 312 }
 313
 314 /* Predict edge E by given predictor if possible.  */
 315
 316 void
 317 predict_edge_def (edge e, enum br_predictor predictor,
 318                   enum prediction taken)
 319 {
 320    int probability = predictor_info[(int) predictor].hitrate;
 321
 322    if (taken != TAKEN)
 323      probability = REG_BR_PROB_BASE - probability;
 324
 325    predict_edge (e, predictor, probability);
 326 }
 327
 328 /* Invert all branch predictions or probability notes in the INSN.  This needs
 329    to be done each time we invert the condition used by the jump.  */
 330
 331 void
 332 invert_br_probabilities (rtx insn)
 333 {
 334   rtx note;
 335
 336   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 337     if (REG_NOTE_KIND (note) == REG_BR_PROB)
 338       XEXP (note, 0) = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (note, 0)));
 339     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
 340       XEXP (XEXP (note, 0), 1)
 341         = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
 342 }
 343
 344 /* Dump information about the branch prediction to the output file.  */
 345
 346 static void
 347 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
 348                  basic_block bb, int used)
 349 {
 350   edge e;
 351   edge_iterator ei;
 352
 353   if (!file)
 354     return;
 355
 356   FOR_EACH_EDGE (e, ei, bb->succs)
 357     if (! (e->flags & EDGE_FALLTHRU))
 358       break;
 359
 360   fprintf (file, "  %s heuristics%s: %.1f%%",
 361            predictor_info[predictor].name,
 362            used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
 363
 364   if (bb->count)
 365     {
 366       fprintf (file, "  exec ");
 367       fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
 368       if (e)
 369         {
 370           fprintf (file, " hit ");
 371           fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count);
 372           fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
 373         }
 374     }
 375
 376   fprintf (file, "\n");
 377 }
 378
 379 /* We can not predict the probabilities of outgoing edges of bb.  Set them
 380    evenly and hope for the best.  */
 381 static void
 382 set_even_probabilities (basic_block bb)
 383 {
 384   int nedges = 0;
 385   edge e;
 386   edge_iterator ei;
 387
 388   FOR_EACH_EDGE (e, ei, bb->succs)
 389     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 390       nedges ++;
 391   FOR_EACH_EDGE (e, ei, bb->succs)
 392     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 393       e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
 394     else
 395       e->probability = 0;
 396 }
 397
 398 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
 399    note if not already present.  Remove now useless REG_BR_PRED notes.  */
 400
 401 static void
 402 combine_predictions_for_insn (rtx insn, basic_block bb)
 403 {
 404   rtx prob_note;
 405   rtx *pnote;
 406   rtx note;
 407   int best_probability = PROB_EVEN;
 408   int best_predictor = END_PREDICTORS;
 409   int combined_probability = REG_BR_PROB_BASE / 2;
 410   int d;
 411   bool first_match = false;
 412   bool found = false;
 413
 414   if (!can_predict_insn_p (insn))
 415     {
 416       set_even_probabilities (bb);
 417       return;
 418     }
 419
 420   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
 421   pnote = &REG_NOTES (insn);
 422   if (dump_file)
 423     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
 424              bb->index);
 425
 426   /* We implement "first match" heuristics and use probability guessed
 427      by predictor with smallest index.  */
 428   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 429     if (REG_NOTE_KIND (note) == REG_BR_PRED)
 430       {
 431         int predictor = INTVAL (XEXP (XEXP (note, 0), 0));
 432         int probability = INTVAL (XEXP (XEXP (note, 0), 1));
 433
 434         found = true;
 435         if (best_predictor > predictor)
 436           best_probability = probability, best_predictor = predictor;
 437
 438         d = (combined_probability * probability
 439              + (REG_BR_PROB_BASE - combined_probability)
 440              * (REG_BR_PROB_BASE - probability));
 441
 442         /* Use FP math to avoid overflows of 32bit integers.  */
 443         if (d == 0)
 444           /* If one probability is 0% and one 100%, avoid division by zero.  */
 445           combined_probability = REG_BR_PROB_BASE / 2;
 446         else
 447           combined_probability = (((double) combined_probability) * probability
 448                                   * REG_BR_PROB_BASE / d + 0.5);
 449       }
 450
 451   /* Decide which heuristic to use.  In case we didn't match anything,
 452      use no_prediction heuristic, in case we did match, use either
 453      first match or Dempster-Shaffer theory depending on the flags.  */
 454
 455   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 456     first_match = true;
 457
 458   if (!found)
 459     dump_prediction (dump_file, PRED_NO_PREDICTION,
 460                      combined_probability, bb, true);
 461   else
 462     {
 463       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
 464                        bb, !first_match);
 465       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
 466                        bb, first_match);
 467     }
 468
 469   if (first_match)
 470     combined_probability = best_probability;
 471   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 472
 473   while (*pnote)
 474     {
 475       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
 476         {
 477           int predictor = INTVAL (XEXP (XEXP (*pnote, 0), 0));
 478           int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
 479
 480           dump_prediction (dump_file, predictor, probability, bb,
 481                            !first_match || best_predictor == predictor);
 482           *pnote = XEXP (*pnote, 1);
 483         }
 484       else
 485         pnote = &XEXP (*pnote, 1);
 486     }
 487
 488   if (!prob_note)
 489     {
 490       REG_NOTES (insn)
 491         = gen_rtx_EXPR_LIST (REG_BR_PROB,
 492                              GEN_INT (combined_probability), REG_NOTES (insn));
 493
 494       /* Save the prediction into CFG in case we are seeing non-degenerated
 495          conditional jump.  */
 496       if (!single_succ_p (bb))
 497         {
 498           BRANCH_EDGE (bb)->probability = combined_probability;
 499           FALLTHRU_EDGE (bb)->probability
 500             = REG_BR_PROB_BASE - combined_probability;
 501         }
 502     }
 503   else if (!single_succ_p (bb))
 504     {
 505       int prob = INTVAL (XEXP (prob_note, 0));
 506
 507       BRANCH_EDGE (bb)->probability = prob;
 508       FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
 509     }
 510   else
 511     single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
 512 }
 513
 514 /* Combine predictions into single probability and store them into CFG.
 515    Remove now useless prediction entries.  */
 516
 517 static void
 518 combine_predictions_for_bb (basic_block bb)
 519 {
 520   int best_probability = PROB_EVEN;
 521   int best_predictor = END_PREDICTORS;
 522   int combined_probability = REG_BR_PROB_BASE / 2;
 523   int d;
 524   bool first_match = false;
 525   bool found = false;
 526   struct edge_prediction *pred;
 527   int nedges = 0;
 528   edge e, first = NULL, second = NULL;
 529   edge_iterator ei;
 530
 531   FOR_EACH_EDGE (e, ei, bb->succs)
 532     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 533       {
 534         nedges ++;
 535         if (first && !second)
 536           second = e;
 537         if (!first)
 538           first = e;
 539       }
 540
 541   /* When there is no successor or only one choice, prediction is easy.
 542
 543      We are lazy for now and predict only basic blocks with two outgoing
 544      edges.  It is possible to predict generic case too, but we have to
 545      ignore first match heuristics and do more involved combining.  Implement
 546      this later.  */
 547   if (nedges != 2)
 548     {
 549       if (!bb->count)
 550         set_even_probabilities (bb);
 551       bb->predictions = NULL;
 552       if (dump_file)
 553         fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
 554                  nedges, bb->index);
 555       return;
 556     }
 557
 558   if (dump_file)
 559     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
 560
 561   /* We implement "first match" heuristics and use probability guessed
 562      by predictor with smallest index.  */
 563   for (pred = bb->predictions; pred; pred = pred->ep_next)
 564     {
 565       int predictor = pred->ep_predictor;
 566       int probability = pred->ep_probability;
 567
 568       if (pred->ep_edge != first)
 569         probability = REG_BR_PROB_BASE - probability;
 570
 571       found = true;
 572       if (best_predictor > predictor)
 573         best_probability = probability, best_predictor = predictor;
 574
 575       d = (combined_probability * probability
 576            + (REG_BR_PROB_BASE - combined_probability)
 577            * (REG_BR_PROB_BASE - probability));
 578
 579       /* Use FP math to avoid overflows of 32bit integers.  */
 580       if (d == 0)
 581         /* If one probability is 0% and one 100%, avoid division by zero.  */
 582         combined_probability = REG_BR_PROB_BASE / 2;
 583       else
 584         combined_probability = (((double) combined_probability) * probability
 585                                 * REG_BR_PROB_BASE / d + 0.5);
 586     }
 587
 588   /* Decide which heuristic to use.  In case we didn't match anything,
 589      use no_prediction heuristic, in case we did match, use either
 590      first match or Dempster-Shaffer theory depending on the flags.  */
 591
 592   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 593     first_match = true;
 594
 595   if (!found)
 596     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
 597   else
 598     {
 599       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
 600                        !first_match);
 601       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
 602                        first_match);
 603     }
 604
 605   if (first_match)
 606     combined_probability = best_probability;
 607   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 608
 609   for (pred = bb->predictions; pred; pred = pred->ep_next)
 610     {
 611       int predictor = pred->ep_predictor;
 612       int probability = pred->ep_probability;
 613
 614       if (pred->ep_edge != EDGE_SUCC (bb, 0))
 615         probability = REG_BR_PROB_BASE - probability;
 616       dump_prediction (dump_file, predictor, probability, bb,
 617                        !first_match || best_predictor == predictor);
 618     }
 619   bb->predictions = NULL;
 620
 621   if (!bb->count)
 622     {
 623       first->probability = combined_probability;
 624       second->probability = REG_BR_PROB_BASE - combined_probability;
 625     }
 626 }
 627
 628 /* Predict edge probabilities by exploiting loop structure.
 629    When RTLSIMPLELOOPS is set, attempt to count number of iterations by analyzing
 630    RTL otherwise use tree based approach.  */
 631 static void
 632 predict_loops (struct loops *loops_info)
 633 {
 634   unsigned i;
 635
 636   scev_initialize (loops_info);
 637
 638   /* Try to predict out blocks in a loop that are not part of a
 639      natural loop.  */
 640   for (i = 1; i < loops_info->num; i++)
 641     {
 642       basic_block bb, *bbs;
 643       unsigned j, n_exits;
 644       struct loop *loop = loops_info->parray[i];
 645       VEC (edge, heap) *exits;
 646       struct tree_niter_desc niter_desc;
 647       edge ex;
 648
 649       exits = get_loop_exit_edges (loop);
 650       n_exits = VEC_length (edge, exits);
 651
 652       for (j = 0; VEC_iterate (edge, exits, j, ex); j++)
 653         {
 654           tree niter = NULL;
 655
 656           if (number_of_iterations_exit (loop, ex, &niter_desc, false))
 657             niter = niter_desc.niter;
 658           if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
 659             niter = loop_niter_by_eval (loop, ex);
 660
 661           if (TREE_CODE (niter) == INTEGER_CST)
 662             {
 663               int probability;
 664               int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
 665               if (host_integerp (niter, 1)
 666                   && tree_int_cst_lt (niter,
 667                                       build_int_cstu (NULL_TREE, max - 1)))
 668                 {
 669                   HOST_WIDE_INT nitercst = tree_low_cst (niter, 1) + 1;
 670                   probability = ((REG_BR_PROB_BASE + nitercst / 2)
 671                                  / nitercst);
 672                 }
 673               else
 674                 probability = ((REG_BR_PROB_BASE + max / 2) / max);
 675
 676               predict_edge (ex, PRED_LOOP_ITERATIONS, probability);
 677             }
 678         }
 679       VEC_free (edge, heap, exits);
 680
 681       bbs = get_loop_body (loop);
 682
 683       for (j = 0; j < loop->num_nodes; j++)
 684         {
 685           int header_found = 0;
 686           edge e;
 687           edge_iterator ei;
 688
 689           bb = bbs[j];
 690
 691           /* Bypass loop heuristics on continue statement.  These
 692              statements construct loops via "non-loop" constructs
 693              in the source language and are better to be handled
 694              separately.  */
 695           if (predicted_by_p (bb, PRED_CONTINUE))
 696             continue;
 697
 698           /* Loop branch heuristics - predict an edge back to a
 699              loop's head as taken.  */
 700           if (bb == loop->latch)
 701             {
 702               e = find_edge (loop->latch, loop->header);
 703               if (e)
 704                 {
 705                   header_found = 1;
 706                   predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
 707                 }
 708             }
 709
 710           /* Loop exit heuristics - predict an edge exiting the loop if the
 711              conditional has no loop header successors as not taken.  */
 712           if (!header_found)
 713             {
 714               /* For loop with many exits we don't want to predict all exits
 715                  with the pretty large probability, because if all exits are
 716                  considered in row, the loop would be predicted to iterate
 717                  almost never.  The code to divide probability by number of
 718                  exits is very rough.  It should compute the number of exits
 719                  taken in each patch through function (not the overall number
 720                  of exits that might be a lot higher for loops with wide switch
 721                  statements in them) and compute n-th square root.
 722
 723                  We limit the minimal probability by 2% to avoid
 724                  EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
 725                  as this was causing regression in perl benchmark containing such
 726                  a wide loop.  */
 727
 728               int probability = ((REG_BR_PROB_BASE
 729                                   - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
 730                                  / n_exits);
 731               if (probability < HITRATE (2))
 732                 probability = HITRATE (2);
 733               FOR_EACH_EDGE (e, ei, bb->succs)
 734                 if (e->dest->index < NUM_FIXED_BLOCKS
 735                     || !flow_bb_inside_loop_p (loop, e->dest))
 736                   predict_edge (e, PRED_LOOP_EXIT, probability);
 737             }
 738         }
 739
 740       /* Free basic blocks from get_loop_body.  */
 741       free (bbs);
 742     }
 743
 744   scev_finalize ();
 745 }
 746
 747 /* Attempt to predict probabilities of BB outgoing edges using local
 748    properties.  */
 749 static void
 750 bb_estimate_probability_locally (basic_block bb)
 751 {
 752   rtx last_insn = BB_END (bb);
 753   rtx cond;
 754
 755   if (! can_predict_insn_p (last_insn))
 756     return;
 757   cond = get_condition (last_insn, NULL, false, false);
 758   if (! cond)
 759     return;
 760
 761   /* Try "pointer heuristic."
 762      A comparison ptr == 0 is predicted as false.
 763      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
 764   if (COMPARISON_P (cond)
 765       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
 766           || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
 767     {
 768       if (GET_CODE (cond) == EQ)
 769         predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
 770       else if (GET_CODE (cond) == NE)
 771         predict_insn_def (last_insn, PRED_POINTER, TAKEN);
 772     }
 773   else
 774
 775   /* Try "opcode heuristic."
 776      EQ tests are usually false and NE tests are usually true. Also,
 777      most quantities are positive, so we can make the appropriate guesses
 778      about signed comparisons against zero.  */
 779     switch (GET_CODE (cond))
 780       {
 781       case CONST_INT:
 782         /* Unconditional branch.  */
 783         predict_insn_def (last_insn, PRED_UNCONDITIONAL,
 784                           cond == const0_rtx ? NOT_TAKEN : TAKEN);
 785         break;
 786
 787       case EQ:
 788       case UNEQ:
 789         /* Floating point comparisons appears to behave in a very
 790            unpredictable way because of special role of = tests in
 791            FP code.  */
 792         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
 793           ;
 794         /* Comparisons with 0 are often used for booleans and there is
 795            nothing useful to predict about them.  */
 796         else if (XEXP (cond, 1) == const0_rtx
 797                  || XEXP (cond, 0) == const0_rtx)
 798           ;
 799         else
 800           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
 801         break;
 802
 803       case NE:
 804       case LTGT:
 805         /* Floating point comparisons appears to behave in a very
 806            unpredictable way because of special role of = tests in
 807            FP code.  */
 808         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
 809           ;
 810         /* Comparisons with 0 are often used for booleans and there is
 811            nothing useful to predict about them.  */
 812         else if (XEXP (cond, 1) == const0_rtx
 813                  || XEXP (cond, 0) == const0_rtx)
 814           ;
 815         else
 816           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
 817         break;
 818
 819       case ORDERED:
 820         predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
 821         break;
 822
 823       case UNORDERED:
 824         predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
 825         break;
 826
 827       case LE:
 828       case LT:
 829         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
 830             || XEXP (cond, 1) == constm1_rtx)
 831           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
 832         break;
 833
 834       case GE:
 835       case GT:
 836         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
 837             || XEXP (cond, 1) == constm1_rtx)
 838           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
 839         break;
 840
 841       default:
 842         break;
 843       }
 844 }
 845
 846 /* Set edge->probability for each successor edge of BB.  */
 847 void
 848 guess_outgoing_edge_probabilities (basic_block bb)
 849 {
 850   bb_estimate_probability_locally (bb);
 851   combine_predictions_for_insn (BB_END (bb), bb);
 852 }
 853 \f
 854 /* Return constant EXPR will likely have at execution time, NULL if unknown.
 855    The function is used by builtin_expect branch predictor so the evidence
 856    must come from this construct and additional possible constant folding.
 857
 858    We may want to implement more involved value guess (such as value range
 859    propagation based prediction), but such tricks shall go to new
 860    implementation.  */
 861
 862 static tree
 863 expr_expected_value (tree expr, bitmap visited)
 864 {
 865   if (TREE_CONSTANT (expr))
 866     return expr;
 867   else if (TREE_CODE (expr) == SSA_NAME)
 868     {
 869       tree def = SSA_NAME_DEF_STMT (expr);
 870
 871       /* If we were already here, break the infinite cycle.  */
 872       if (bitmap_bit_p (visited, SSA_NAME_VERSION (expr)))
 873         return NULL;
 874       bitmap_set_bit (visited, SSA_NAME_VERSION (expr));
 875
 876       if (TREE_CODE (def) == PHI_NODE)
 877         {
 878           /* All the arguments of the PHI node must have the same constant
 879              length.  */
 880           int i;
 881           tree val = NULL, new_val;
 882
 883           for (i = 0; i < PHI_NUM_ARGS (def); i++)
 884             {
 885               tree arg = PHI_ARG_DEF (def, i);
 886
 887               /* If this PHI has itself as an argument, we cannot
 888                  determine the string length of this argument.  However,
 889                  if we can find an expected constant value for the other
 890                  PHI args then we can still be sure that this is
 891                  likely a constant.  So be optimistic and just
 892                  continue with the next argument.  */
 893               if (arg == PHI_RESULT (def))
 894                 continue;
 895
 896               new_val = expr_expected_value (arg, visited);
 897               if (!new_val)
 898                 return NULL;
 899               if (!val)
 900                 val = new_val;
 901               else if (!operand_equal_p (val, new_val, false))
 902                 return NULL;
 903             }
 904           return val;
 905         }
 906       if (TREE_CODE (def) != MODIFY_EXPR || TREE_OPERAND (def, 0) != expr)
 907         return NULL;
 908       return expr_expected_value (TREE_OPERAND (def, 1), visited);
 909     }
 910   else if (TREE_CODE (expr) == CALL_EXPR)
 911     {
 912       tree decl = get_callee_fndecl (expr);
 913       if (!decl)
 914         return NULL;
 915       if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL
 916           && DECL_FUNCTION_CODE (decl) == BUILT_IN_EXPECT)
 917         {
 918           tree arglist = TREE_OPERAND (expr, 1);
 919           tree val;
 920
 921           if (arglist == NULL_TREE
 922               || TREE_CHAIN (arglist) == NULL_TREE)
 923             return NULL;
 924           val = TREE_VALUE (TREE_CHAIN (TREE_OPERAND (expr, 1)));
 925           if (TREE_CONSTANT (val))
 926             return val;
 927           return TREE_VALUE (TREE_CHAIN (TREE_OPERAND (expr, 1)));
 928         }
 929     }
 930   if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
 931     {
 932       tree op0, op1, res;
 933       op0 = expr_expected_value (TREE_OPERAND (expr, 0), visited);
 934       if (!op0)
 935         return NULL;
 936       op1 = expr_expected_value (TREE_OPERAND (expr, 1), visited);
 937       if (!op1)
 938         return NULL;
 939       res = fold_build2 (TREE_CODE (expr), TREE_TYPE (expr), op0, op1);
 940       if (TREE_CONSTANT (res))
 941         return res;
 942       return NULL;
 943     }
 944   if (UNARY_CLASS_P (expr))
 945     {
 946       tree op0, res;
 947       op0 = expr_expected_value (TREE_OPERAND (expr, 0), visited);
 948       if (!op0)
 949         return NULL;
 950       res = fold_build1 (TREE_CODE (expr), TREE_TYPE (expr), op0);
 951       if (TREE_CONSTANT (res))
 952         return res;
 953       return NULL;
 954     }
 955   return NULL;
 956 }
 957 \f
 958 /* Get rid of all builtin_expect calls we no longer need.  */
 959 static void
 960 strip_builtin_expect (void)
 961 {
 962   basic_block bb;
 963   FOR_EACH_BB (bb)
 964     {
 965       block_stmt_iterator bi;
 966       for (bi = bsi_start (bb); !bsi_end_p (bi); bsi_next (&bi))
 967         {
 968           tree stmt = bsi_stmt (bi);
 969           tree fndecl;
 970           tree arglist;
 971
 972           if (TREE_CODE (stmt) == MODIFY_EXPR
 973               && TREE_CODE (TREE_OPERAND (stmt, 1)) == CALL_EXPR
 974               && (fndecl = get_callee_fndecl (TREE_OPERAND (stmt, 1)))
 975               && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
 976               && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
 977               && (arglist = TREE_OPERAND (TREE_OPERAND (stmt, 1), 1))
 978               && TREE_CHAIN (arglist))
 979             {
 980               TREE_OPERAND (stmt, 1) = TREE_VALUE (arglist);
 981               update_stmt (stmt);
 982             }
 983         }
 984     }
 985 }
 986 \f
 987 /* Predict using opcode of the last statement in basic block.  */
 988 static void
 989 tree_predict_by_opcode (basic_block bb)
 990 {
 991   tree stmt = last_stmt (bb);
 992   edge then_edge;
 993   tree cond;
 994   tree op0;
 995   tree type;
 996   tree val;
 997   bitmap visited;
 998   edge_iterator ei;
 999
1000   if (!stmt || TREE_CODE (stmt) != COND_EXPR)
1001     return;
1002   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1003     if (then_edge->flags & EDGE_TRUE_VALUE)
1004       break;
1005   cond = TREE_OPERAND (stmt, 0);
1006   if (!COMPARISON_CLASS_P (cond))
1007     return;
1008   op0 = TREE_OPERAND (cond, 0);
1009   type = TREE_TYPE (op0);
1010   visited = BITMAP_ALLOC (NULL);
1011   val = expr_expected_value (cond, visited);
1012   BITMAP_FREE (visited);
1013   if (val)
1014     {
1015       if (integer_zerop (val))
1016         predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, NOT_TAKEN);
1017       else
1018         predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, TAKEN);
1019       return;
1020     }
1021   /* Try "pointer heuristic."
1022      A comparison ptr == 0 is predicted as false.
1023      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1024   if (POINTER_TYPE_P (type))
1025     {
1026       if (TREE_CODE (cond) == EQ_EXPR)
1027         predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
1028       else if (TREE_CODE (cond) == NE_EXPR)
1029         predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
1030     }
1031   else
1032
1033   /* Try "opcode heuristic."
1034      EQ tests are usually false and NE tests are usually true. Also,
1035      most quantities are positive, so we can make the appropriate guesses
1036      about signed comparisons against zero.  */
1037     switch (TREE_CODE (cond))
1038       {
1039       case EQ_EXPR:
1040       case UNEQ_EXPR:
1041         /* Floating point comparisons appears to behave in a very
1042            unpredictable way because of special role of = tests in
1043            FP code.  */
1044         if (FLOAT_TYPE_P (type))
1045           ;
1046         /* Comparisons with 0 are often used for booleans and there is
1047            nothing useful to predict about them.  */
1048         else if (integer_zerop (op0)
1049                  || integer_zerop (TREE_OPERAND (cond, 1)))
1050           ;
1051         else
1052           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
1053         break;
1054
1055       case NE_EXPR:
1056       case LTGT_EXPR:
1057         /* Floating point comparisons appears to behave in a very
1058            unpredictable way because of special role of = tests in
1059            FP code.  */
1060         if (FLOAT_TYPE_P (type))
1061           ;
1062         /* Comparisons with 0 are often used for booleans and there is
1063            nothing useful to predict about them.  */
1064         else if (integer_zerop (op0)
1065                  || integer_zerop (TREE_OPERAND (cond, 1)))
1066           ;
1067         else
1068           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
1069         break;
1070
1071       case ORDERED_EXPR:
1072         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
1073         break;
1074
1075       case UNORDERED_EXPR:
1076         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
1077         break;
1078
1079       case LE_EXPR:
1080       case LT_EXPR:
1081         if (integer_zerop (TREE_OPERAND (cond, 1))
1082             || integer_onep (TREE_OPERAND (cond, 1))
1083             || integer_all_onesp (TREE_OPERAND (cond, 1))
1084             || real_zerop (TREE_OPERAND (cond, 1))
1085             || real_onep (TREE_OPERAND (cond, 1))
1086             || real_minus_onep (TREE_OPERAND (cond, 1)))
1087           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
1088         break;
1089
1090       case GE_EXPR:
1091       case GT_EXPR:
1092         if (integer_zerop (TREE_OPERAND (cond, 1))
1093             || integer_onep (TREE_OPERAND (cond, 1))
1094             || integer_all_onesp (TREE_OPERAND (cond, 1))
1095             || real_zerop (TREE_OPERAND (cond, 1))
1096             || real_onep (TREE_OPERAND (cond, 1))
1097             || real_minus_onep (TREE_OPERAND (cond, 1)))
1098           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
1099         break;
1100
1101       default:
1102         break;
1103       }
1104 }
1105
1106 /* Try to guess whether the value of return means error code.  */
1107 static enum br_predictor
1108 return_prediction (tree val, enum prediction *prediction)
1109 {
1110   /* VOID.  */
1111   if (!val)
1112     return PRED_NO_PREDICTION;
1113   /* Different heuristics for pointers and scalars.  */
1114   if (POINTER_TYPE_P (TREE_TYPE (val)))
1115     {
1116       /* NULL is usually not returned.  */
1117       if (integer_zerop (val))
1118         {
1119           *prediction = NOT_TAKEN;
1120           return PRED_NULL_RETURN;
1121         }
1122     }
1123   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
1124     {
1125       /* Negative return values are often used to indicate
1126          errors.  */
1127       if (TREE_CODE (val) == INTEGER_CST
1128           && tree_int_cst_sgn (val) < 0)
1129         {
1130           *prediction = NOT_TAKEN;
1131           return PRED_NEGATIVE_RETURN;
1132         }
1133       /* Constant return values seems to be commonly taken.
1134          Zero/one often represent booleans so exclude them from the
1135          heuristics.  */
1136       if (TREE_CONSTANT (val)
1137           && (!integer_zerop (val) && !integer_onep (val)))
1138         {
1139           *prediction = TAKEN;
1140           return PRED_NEGATIVE_RETURN;
1141         }
1142     }
1143   return PRED_NO_PREDICTION;
1144 }
1145
1146 /* Find the basic block with return expression and look up for possible
1147    return value trying to apply RETURN_PREDICTION heuristics.  */
1148 static void
1149 apply_return_prediction (int *heads)
1150 {
1151   tree return_stmt = NULL;
1152   tree return_val;
1153   edge e;
1154   tree phi;
1155   int phi_num_args, i;
1156   enum br_predictor pred;
1157   enum prediction direction;
1158   edge_iterator ei;
1159
1160   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
1161     {
1162       return_stmt = last_stmt (e->src);
1163       if (TREE_CODE (return_stmt) == RETURN_EXPR)
1164         break;
1165     }
1166   if (!e)
1167     return;
1168   return_val = TREE_OPERAND (return_stmt, 0);
1169   if (!return_val)
1170     return;
1171   if (TREE_CODE (return_val) == MODIFY_EXPR)
1172     return_val = TREE_OPERAND (return_val, 1);
1173   if (TREE_CODE (return_val) != SSA_NAME
1174       || !SSA_NAME_DEF_STMT (return_val)
1175       || TREE_CODE (SSA_NAME_DEF_STMT (return_val)) != PHI_NODE)
1176     return;
1177   for (phi = SSA_NAME_DEF_STMT (return_val); phi; phi = PHI_CHAIN (phi))
1178     if (PHI_RESULT (phi) == return_val)
1179       break;
1180   if (!phi)
1181     return;
1182   phi_num_args = PHI_NUM_ARGS (phi);
1183   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
1184
1185   /* Avoid the degenerate case where all return values form the function
1186      belongs to same category (ie they are all positive constants)
1187      so we can hardly say something about them.  */
1188   for (i = 1; i < phi_num_args; i++)
1189     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
1190       break;
1191   if (i != phi_num_args)
1192     for (i = 0; i < phi_num_args; i++)
1193       {
1194         pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
1195         if (pred != PRED_NO_PREDICTION)
1196           predict_paths_leading_to (PHI_ARG_EDGE (phi, i)->src, heads, pred,
1197                                     direction);
1198       }
1199 }
1200
1201 /* Look for basic block that contains unlikely to happen events
1202    (such as noreturn calls) and mark all paths leading to execution
1203    of this basic blocks as unlikely.  */
1204
1205 static void
1206 tree_bb_level_predictions (void)
1207 {
1208   basic_block bb;
1209   int *heads;
1210
1211   heads = XCNEWVEC (int, last_basic_block);
1212   heads[ENTRY_BLOCK_PTR->next_bb->index] = last_basic_block;
1213
1214   apply_return_prediction (heads);
1215
1216   FOR_EACH_BB (bb)
1217     {
1218       block_stmt_iterator bsi = bsi_last (bb);
1219
1220       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1221         {
1222           tree stmt = bsi_stmt (bsi);
1223           switch (TREE_CODE (stmt))
1224             {
1225               case MODIFY_EXPR:
1226                 if (TREE_CODE (TREE_OPERAND (stmt, 1)) == CALL_EXPR)
1227                   {
1228                     stmt = TREE_OPERAND (stmt, 1);
1229                     goto call_expr;
1230                   }
1231                 break;
1232               case CALL_EXPR:
1233 call_expr:;
1234                 if (call_expr_flags (stmt) & ECF_NORETURN)
1235                   predict_paths_leading_to (bb, heads, PRED_NORETURN,
1236                                             NOT_TAKEN);
1237                 break;
1238               default:
1239                 break;
1240             }
1241         }
1242     }
1243
1244   free (heads);
1245 }
1246
1247 /* Predict branch probabilities and estimate profile of the tree CFG.  */
1248 static unsigned int
1249 tree_estimate_probability (void)
1250 {
1251   basic_block bb;
1252
1253   loop_optimizer_init (0);
1254   if (current_loops && dump_file && (dump_flags & TDF_DETAILS))
1255     flow_loops_dump (current_loops, dump_file, NULL, 0);
1256
1257   add_noreturn_fake_exit_edges ();
1258   connect_infinite_loops_to_exit ();
1259   calculate_dominance_info (CDI_DOMINATORS);
1260   calculate_dominance_info (CDI_POST_DOMINATORS);
1261
1262   tree_bb_level_predictions ();
1263
1264   mark_irreducible_loops (current_loops);
1265   if (current_loops)
1266     predict_loops (current_loops);
1267
1268   FOR_EACH_BB (bb)
1269     {
1270       edge e;
1271       edge_iterator ei;
1272
1273       FOR_EACH_EDGE (e, ei, bb->succs)
1274         {
1275           /* Predict early returns to be probable, as we've already taken
1276              care for error returns and other cases are often used for
1277              fast paths through function.  */
1278           if (e->dest == EXIT_BLOCK_PTR
1279               && TREE_CODE (last_stmt (bb)) == RETURN_EXPR
1280               && !single_pred_p (bb))
1281             {
1282               edge e1;
1283               edge_iterator ei1;
1284
1285               FOR_EACH_EDGE (e1, ei1, bb->preds)
1286                 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
1287                     && !predicted_by_p (e1->src, PRED_CONST_RETURN)
1288                     && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN)
1289                     && !last_basic_block_p (e1->src))
1290                   predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
1291             }
1292
1293           /* Look for block we are guarding (ie we dominate it,
1294              but it doesn't postdominate us).  */
1295           if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
1296               && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
1297               && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
1298             {
1299               block_stmt_iterator bi;
1300
1301               /* The call heuristic claims that a guarded function call
1302                  is improbable.  This is because such calls are often used
1303                  to signal exceptional situations such as printing error
1304                  messages.  */
1305               for (bi = bsi_start (e->dest); !bsi_end_p (bi);
1306                    bsi_next (&bi))
1307                 {
1308                   tree stmt = bsi_stmt (bi);
1309                   if ((TREE_CODE (stmt) == CALL_EXPR
1310                        || (TREE_CODE (stmt) == MODIFY_EXPR
1311                            && TREE_CODE (TREE_OPERAND (stmt, 1)) == CALL_EXPR))
1312                       /* Constant and pure calls are hardly used to signalize
1313                          something exceptional.  */
1314                       && TREE_SIDE_EFFECTS (stmt))
1315                     {
1316                       predict_edge_def (e, PRED_CALL, NOT_TAKEN);
1317                       break;
1318                     }
1319                 }
1320             }
1321         }
1322       tree_predict_by_opcode (bb);
1323     }
1324   FOR_EACH_BB (bb)
1325     combine_predictions_for_bb (bb);
1326
1327   strip_builtin_expect ();
1328   estimate_bb_frequencies (current_loops);
1329   free_dominance_info (CDI_POST_DOMINATORS);
1330   remove_fake_exit_edges ();
1331   loop_optimizer_finalize ();
1332   if (dump_file && (dump_flags & TDF_DETAILS))
1333     dump_tree_cfg (dump_file, dump_flags);
1334   if (profile_status == PROFILE_ABSENT)
1335     profile_status = PROFILE_GUESSED;
1336   return 0;
1337 }
1338 \f
1339 /* Check whether this is the last basic block of function.  Commonly
1340    there is one extra common cleanup block.  */
1341 static bool
1342 last_basic_block_p (basic_block bb)
1343 {
1344   if (bb == EXIT_BLOCK_PTR)
1345     return false;
1346
1347   return (bb->next_bb == EXIT_BLOCK_PTR
1348           || (bb->next_bb->next_bb == EXIT_BLOCK_PTR
1349               && single_succ_p (bb)
1350               && single_succ (bb)->next_bb == EXIT_BLOCK_PTR));
1351 }
1352
1353 /* Sets branch probabilities according to PREDiction and
1354    FLAGS. HEADS[bb->index] should be index of basic block in that we
1355    need to alter branch predictions (i.e. the first of our dominators
1356    such that we do not post-dominate it) (but we fill this information
1357    on demand, so -1 may be there in case this was not needed yet).  */
1358
1359 static void
1360 predict_paths_leading_to (basic_block bb, int *heads, enum br_predictor pred,
1361                           enum prediction taken)
1362 {
1363   edge e;
1364   edge_iterator ei;
1365   int y;
1366
1367   if (heads[bb->index] == ENTRY_BLOCK)
1368     {
1369       /* This is first time we need this field in heads array; so
1370          find first dominator that we do not post-dominate (we are
1371          using already known members of heads array).  */
1372       basic_block ai = bb;
1373       basic_block next_ai = get_immediate_dominator (CDI_DOMINATORS, bb);
1374       int head;
1375
1376       while (heads[next_ai->index] == ENTRY_BLOCK)
1377         {
1378           if (!dominated_by_p (CDI_POST_DOMINATORS, next_ai, bb))
1379             break;
1380           heads[next_ai->index] = ai->index;
1381           ai = next_ai;
1382           next_ai = get_immediate_dominator (CDI_DOMINATORS, next_ai);
1383         }
1384       if (!dominated_by_p (CDI_POST_DOMINATORS, next_ai, bb))
1385         head = next_ai->index;
1386       else
1387         head = heads[next_ai->index];
1388       while (next_ai != bb)
1389         {
1390           next_ai = ai;
1391           ai = BASIC_BLOCK (heads[ai->index]);
1392           heads[next_ai->index] = head;
1393         }
1394     }
1395   y = heads[bb->index];
1396
1397   /* Now find the edge that leads to our branch and aply the prediction.  */
1398
1399   if (y == last_basic_block)
1400     return;
1401   FOR_EACH_EDGE (e, ei, BASIC_BLOCK (y)->succs)
1402     if (e->dest->index >= NUM_FIXED_BLOCKS
1403         && dominated_by_p (CDI_POST_DOMINATORS, e->dest, bb))
1404       predict_edge_def (e, pred, taken);
1405 }
1406 \f
1407 /* This is used to carry information about basic blocks.  It is
1408    attached to the AUX field of the standard CFG block.  */
1409
1410 typedef struct block_info_def
1411 {
1412   /* Estimated frequency of execution of basic_block.  */
1413   sreal frequency;
1414
1415   /* To keep queue of basic blocks to process.  */
1416   basic_block next;
1417
1418   /* Number of predecessors we need to visit first.  */
1419   int npredecessors;
1420 } *block_info;
1421
1422 /* Similar information for edges.  */
1423 typedef struct edge_info_def
1424 {
1425   /* In case edge is a loopback edge, the probability edge will be reached
1426      in case header is.  Estimated number of iterations of the loop can be
1427      then computed as 1 / (1 - back_edge_prob).  */
1428   sreal back_edge_prob;
1429   /* True if the edge is a loopback edge in the natural loop.  */
1430   unsigned int back_edge:1;
1431 } *edge_info;
1432
1433 #define BLOCK_INFO(B)   ((block_info) (B)->aux)
1434 #define EDGE_INFO(E)    ((edge_info) (E)->aux)
1435
1436 /* Helper function for estimate_bb_frequencies.
1437    Propagate the frequencies in blocks marked in
1438    TOVISIT, starting in HEAD.  */
1439
1440 static void
1441 propagate_freq (basic_block head, bitmap tovisit)
1442 {
1443   basic_block bb;
1444   basic_block last;
1445   unsigned i;
1446   edge e;
1447   basic_block nextbb;
1448   bitmap_iterator bi;
1449
1450   /* For each basic block we need to visit count number of his predecessors
1451      we need to visit first.  */
1452   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
1453     {
1454       edge_iterator ei;
1455       int count = 0;
1456
1457        /* The outermost "loop" includes the exit block, which we can not
1458           look up via BASIC_BLOCK.  Detect this and use EXIT_BLOCK_PTR
1459           directly.  Do the same for the entry block.  */
1460       bb = BASIC_BLOCK (i);
1461
1462       FOR_EACH_EDGE (e, ei, bb->preds)
1463         {
1464           bool visit = bitmap_bit_p (tovisit, e->src->index);
1465
1466           if (visit && !(e->flags & EDGE_DFS_BACK))
1467             count++;
1468           else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
1469             fprintf (dump_file,
1470                      "Irreducible region hit, ignoring edge to %i->%i\n",
1471                      e->src->index, bb->index);
1472         }
1473       BLOCK_INFO (bb)->npredecessors = count;
1474     }
1475
1476   memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
1477   last = head;
1478   for (bb = head; bb; bb = nextbb)
1479     {
1480       edge_iterator ei;
1481       sreal cyclic_probability, frequency;
1482
1483       memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
1484       memcpy (&frequency, &real_zero, sizeof (real_zero));
1485
1486       nextbb = BLOCK_INFO (bb)->next;
1487       BLOCK_INFO (bb)->next = NULL;
1488
1489       /* Compute frequency of basic block.  */
1490       if (bb != head)
1491         {
1492 #ifdef ENABLE_CHECKING
1493           FOR_EACH_EDGE (e, ei, bb->preds)
1494             gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
1495                         || (e->flags & EDGE_DFS_BACK));
1496 #endif
1497
1498           FOR_EACH_EDGE (e, ei, bb->preds)
1499             if (EDGE_INFO (e)->back_edge)
1500               {
1501                 sreal_add (&cyclic_probability, &cyclic_probability,
1502                            &EDGE_INFO (e)->back_edge_prob);
1503               }
1504             else if (!(e->flags & EDGE_DFS_BACK))
1505               {
1506                 sreal tmp;
1507
1508                 /*  frequency += (e->probability
1509                                   * BLOCK_INFO (e->src)->frequency /
1510                                   REG_BR_PROB_BASE);  */
1511
1512                 sreal_init (&tmp, e->probability, 0);
1513                 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
1514                 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
1515                 sreal_add (&frequency, &frequency, &tmp);
1516               }
1517
1518           if (sreal_compare (&cyclic_probability, &real_zero) == 0)
1519             {
1520               memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
1521                       sizeof (frequency));
1522             }
1523           else
1524             {
1525               if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
1526                 {
1527                   memcpy (&cyclic_probability, &real_almost_one,
1528                           sizeof (real_almost_one));
1529                 }
1530
1531               /* BLOCK_INFO (bb)->frequency = frequency
1532                                               / (1 - cyclic_probability) */
1533
1534               sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
1535               sreal_div (&BLOCK_INFO (bb)->frequency,
1536                          &frequency, &cyclic_probability);
1537             }
1538         }
1539
1540       bitmap_clear_bit (tovisit, bb->index);
1541
1542       e = find_edge (bb, head);
1543       if (e)
1544         {
1545           sreal tmp;
1546
1547           /* EDGE_INFO (e)->back_edge_prob
1548              = ((e->probability * BLOCK_INFO (bb)->frequency)
1549              / REG_BR_PROB_BASE); */
1550
1551           sreal_init (&tmp, e->probability, 0);
1552           sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
1553           sreal_mul (&EDGE_INFO (e)->back_edge_prob,
1554                      &tmp, &real_inv_br_prob_base);
1555         }
1556
1557       /* Propagate to successor blocks.  */
1558       FOR_EACH_EDGE (e, ei, bb->succs)
1559         if (!(e->flags & EDGE_DFS_BACK)
1560             && BLOCK_INFO (e->dest)->npredecessors)
1561           {
1562             BLOCK_INFO (e->dest)->npredecessors--;
1563             if (!BLOCK_INFO (e->dest)->npredecessors)
1564               {
1565                 if (!nextbb)
1566                   nextbb = e->dest;
1567                 else
1568                   BLOCK_INFO (last)->next = e->dest;
1569
1570                 last = e->dest;
1571               }
1572           }
1573     }
1574 }
1575
1576 /* Estimate probabilities of loopback edges in loops at same nest level.  */
1577
1578 static void
1579 estimate_loops_at_level (struct loop *first_loop)
1580 {
1581   struct loop *loop;
1582
1583   for (loop = first_loop; loop; loop = loop->next)
1584     {
1585       edge e;
1586       basic_block *bbs;
1587       unsigned i;
1588       bitmap tovisit = BITMAP_ALLOC (NULL);
1589
1590       estimate_loops_at_level (loop->inner);
1591
1592       /* Find current loop back edge and mark it.  */
1593       e = loop_latch_edge (loop);
1594       EDGE_INFO (e)->back_edge = 1;
1595
1596       bbs = get_loop_body (loop);
1597       for (i = 0; i < loop->num_nodes; i++)
1598         bitmap_set_bit (tovisit, bbs[i]->index);
1599       free (bbs);
1600       propagate_freq (loop->header, tovisit);
1601       BITMAP_FREE (tovisit);
1602     }
1603 }
1604
1605 /* Propates frequencies through structure of LOOPS.  */
1606
1607 static void
1608 estimate_loops (struct loops *loops)
1609 {
1610   bitmap tovisit = BITMAP_ALLOC (NULL);
1611   basic_block bb;
1612
1613   /* Start by estimating the frequencies in the loops.  */
1614   if (loops)
1615     estimate_loops_at_level (loops->tree_root->inner);
1616
1617   /* Now propagate the frequencies through all the blocks.  */
1618   FOR_ALL_BB (bb)
1619     {
1620       bitmap_set_bit (tovisit, bb->index);
1621     }
1622   propagate_freq (ENTRY_BLOCK_PTR, tovisit);
1623   BITMAP_FREE (tovisit);
1624 }
1625
1626 /* Convert counts measured by profile driven feedback to frequencies.
1627    Return nonzero iff there was any nonzero execution count.  */
1628
1629 int
1630 counts_to_freqs (void)
1631 {
1632   gcov_type count_max, true_count_max = 0;
1633   basic_block bb;
1634
1635   FOR_EACH_BB (bb)
1636     true_count_max = MAX (bb->count, true_count_max);
1637
1638   count_max = MAX (true_count_max, 1);
1639   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1640     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
1641   return true_count_max;
1642 }
1643
1644 /* Return true if function is likely to be expensive, so there is no point to
1645    optimize performance of prologue, epilogue or do inlining at the expense
1646    of code size growth.  THRESHOLD is the limit of number of instructions
1647    function can execute at average to be still considered not expensive.  */
1648
1649 bool
1650 expensive_function_p (int threshold)
1651 {
1652   unsigned int sum = 0;
1653   basic_block bb;
1654   unsigned int limit;
1655
1656   /* We can not compute accurately for large thresholds due to scaled
1657      frequencies.  */
1658   gcc_assert (threshold <= BB_FREQ_MAX);
1659
1660   /* Frequencies are out of range.  This either means that function contains
1661      internal loop executing more than BB_FREQ_MAX times or profile feedback
1662      is available and function has not been executed at all.  */
1663   if (ENTRY_BLOCK_PTR->frequency == 0)
1664     return true;
1665
1666   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
1667   limit = ENTRY_BLOCK_PTR->frequency * threshold;
1668   FOR_EACH_BB (bb)
1669     {
1670       rtx insn;
1671
1672       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
1673            insn = NEXT_INSN (insn))
1674         if (active_insn_p (insn))
1675           {
1676             sum += bb->frequency;
1677             if (sum > limit)
1678               return true;
1679         }
1680     }
1681
1682   return false;
1683 }
1684
1685 /* Estimate basic blocks frequency by given branch probabilities.  */
1686
1687 static void
1688 estimate_bb_frequencies (struct loops *loops)
1689 {
1690   basic_block bb;
1691   sreal freq_max;
1692
1693   if (!flag_branch_probabilities || !counts_to_freqs ())
1694     {
1695       static int real_values_initialized = 0;
1696
1697       if (!real_values_initialized)
1698         {
1699           real_values_initialized = 1;
1700           sreal_init (&real_zero, 0, 0);
1701           sreal_init (&real_one, 1, 0);
1702           sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
1703           sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
1704           sreal_init (&real_one_half, 1, -1);
1705           sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
1706           sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
1707         }
1708
1709       mark_dfs_back_edges ();
1710
1711       single_succ_edge (ENTRY_BLOCK_PTR)->probability = REG_BR_PROB_BASE;
1712
1713       /* Set up block info for each basic block.  */
1714       alloc_aux_for_blocks (sizeof (struct block_info_def));
1715       alloc_aux_for_edges (sizeof (struct edge_info_def));
1716       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1717         {
1718           edge e;
1719           edge_iterator ei;
1720
1721           FOR_EACH_EDGE (e, ei, bb->succs)
1722             {
1723               sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
1724               sreal_mul (&EDGE_INFO (e)->back_edge_prob,
1725                          &EDGE_INFO (e)->back_edge_prob,
1726                          &real_inv_br_prob_base);
1727             }
1728         }
1729
1730       /* First compute probabilities locally for each loop from innermost
1731          to outermost to examine probabilities for back edges.  */
1732       estimate_loops (loops);
1733
1734       memcpy (&freq_max, &real_zero, sizeof (real_zero));
1735       FOR_EACH_BB (bb)
1736         if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
1737           memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
1738
1739       sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
1740       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1741         {
1742           sreal tmp;
1743
1744           sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
1745           sreal_add (&tmp, &tmp, &real_one_half);
1746           bb->frequency = sreal_to_int (&tmp);
1747         }
1748
1749       free_aux_for_blocks ();
1750       free_aux_for_edges ();
1751     }
1752   compute_function_frequency ();
1753   if (flag_reorder_functions)
1754     choose_function_section ();
1755 }
1756
1757 /* Decide whether function is hot, cold or unlikely executed.  */
1758 static void
1759 compute_function_frequency (void)
1760 {
1761   basic_block bb;
1762
1763   if (!profile_info || !flag_branch_probabilities)
1764     return;
1765   cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED;
1766   FOR_EACH_BB (bb)
1767     {
1768       if (maybe_hot_bb_p (bb))
1769         {
1770           cfun->function_frequency = FUNCTION_FREQUENCY_HOT;
1771           return;
1772         }
1773       if (!probably_never_executed_bb_p (bb))
1774         cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
1775     }
1776 }
1777
1778 /* Choose appropriate section for the function.  */
1779 static void
1780 choose_function_section (void)
1781 {
1782   if (DECL_SECTION_NAME (current_function_decl)
1783       || !targetm.have_named_sections
1784       /* Theoretically we can split the gnu.linkonce text section too,
1785          but this requires more work as the frequency needs to match
1786          for all generated objects so we need to merge the frequency
1787          of all instances.  For now just never set frequency for these.  */
1788       || DECL_ONE_ONLY (current_function_decl))
1789     return;
1790
1791   /* If we are doing the partitioning optimization, let the optimization
1792      choose the correct section into which to put things.  */
1793
1794   if (flag_reorder_blocks_and_partition)
1795     return;
1796
1797   if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT)
1798     DECL_SECTION_NAME (current_function_decl) =
1799       build_string (strlen (HOT_TEXT_SECTION_NAME), HOT_TEXT_SECTION_NAME);
1800   if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
1801     DECL_SECTION_NAME (current_function_decl) =
1802       build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME),
1803                     UNLIKELY_EXECUTED_TEXT_SECTION_NAME);
1804 }
1805
1806 static bool
1807 gate_estimate_probability (void)
1808 {
1809   return flag_guess_branch_prob;
1810 }
1811
1812 struct tree_opt_pass pass_profile =
1813 {
1814   "profile",                            /* name */
1815   gate_estimate_probability,            /* gate */
1816   tree_estimate_probability,            /* execute */
1817   NULL,                                 /* sub */
1818   NULL,                                 /* next */
1819   0,                                    /* static_pass_number */
1820   TV_BRANCH_PROB,                       /* tv_id */
1821   PROP_cfg,                             /* properties_required */
1822   0,                                    /* properties_provided */
1823   0,                                    /* properties_destroyed */
1824   0,                                    /* todo_flags_start */
1825   TODO_ggc_collect | TODO_verify_ssa,                   /* todo_flags_finish */
1826   0                                     /* letter */
1827 };