gcc/predict.c

   1 /* Branch prediction routines for the GNU compiler.
   2    Copyright (C) 2000-2014 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* References:
  21
  22    [1] "Branch Prediction for Free"
  23        Ball and Larus; PLDI '93.
  24    [2] "Static Branch Frequency and Program Profile Analysis"
  25        Wu and Larus; MICRO-27.
  26    [3] "Corpus-based Static Branch Prediction"
  27        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
  28
  29
  30 #include "config.h"
  31 #include "system.h"
  32 #include "coretypes.h"
  33 #include "tm.h"
  34 #include "tree.h"
  35 #include "calls.h"
  36 #include "rtl.h"
  37 #include "tm_p.h"
  38 #include "hard-reg-set.h"
  39 #include "predict.h"
  40 #include "vec.h"
  41 #include "hashtab.h"
  42 #include "hash-set.h"
  43 #include "machmode.h"
  44 #include "input.h"
  45 #include "function.h"
  46 #include "dominance.h"
  47 #include "cfg.h"
  48 #include "cfganal.h"
  49 #include "basic-block.h"
  50 #include "insn-config.h"
  51 #include "regs.h"
  52 #include "flags.h"
  53 #include "profile.h"
  54 #include "except.h"
  55 #include "diagnostic-core.h"
  56 #include "recog.h"
  57 #include "expr.h"
  58 #include "coverage.h"
  59 #include "sreal.h"
  60 #include "params.h"
  61 #include "target.h"
  62 #include "cfgloop.h"
  63 #include "hash-map.h"
  64 #include "tree-ssa-alias.h"
  65 #include "internal-fn.h"
  66 #include "gimple-expr.h"
  67 #include "is-a.h"
  68 #include "gimple.h"
  69 #include "gimple-iterator.h"
  70 #include "gimple-ssa.h"
  71 #include "plugin-api.h"
  72 #include "ipa-ref.h"
  73 #include "cgraph.h"
  74 #include "tree-cfg.h"
  75 #include "tree-phinodes.h"
  76 #include "ssa-iterators.h"
  77 #include "tree-ssa-loop-niter.h"
  78 #include "tree-ssa-loop.h"
  79 #include "tree-pass.h"
  80 #include "tree-scalar-evolution.h"
  81 #include "cfgloop.h"
  82
  83 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
  84                    1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
  85 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
  86              real_inv_br_prob_base, real_one_half, real_bb_freq_max;
  87
  88 static void combine_predictions_for_insn (rtx_insn *, basic_block);
  89 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
  90 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
  91 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
  92 static bool can_predict_insn_p (const rtx_insn *);
  93
  94 /* Information we hold about each branch predictor.
  95    Filled using information from predict.def.  */
  96
  97 struct predictor_info
  98 {
  99   const char *const name;       /* Name used in the debugging dumps.  */
 100   const int hitrate;            /* Expected hitrate used by
 101                                    predict_insn_def call.  */
 102   const int flags;
 103 };
 104
 105 /* Use given predictor without Dempster-Shaffer theory if it matches
 106    using first_match heuristics.  */
 107 #define PRED_FLAG_FIRST_MATCH 1
 108
 109 /* Recompute hitrate in percent to our representation.  */
 110
 111 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
 112
 113 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
 114 static const struct predictor_info predictor_info[]= {
 115 #include "predict.def"
 116
 117   /* Upper bound on predictors.  */
 118   {NULL, 0, 0}
 119 };
 120 #undef DEF_PREDICTOR
 121
 122 /* Return TRUE if frequency FREQ is considered to be hot.  */
 123
 124 static inline bool
 125 maybe_hot_frequency_p (struct function *fun, int freq)
 126 {
 127   struct cgraph_node *node = cgraph_node::get (fun->decl);
 128   if (!profile_info
 129       || !opt_for_fn (fun->decl, flag_branch_probabilities))
 130     {
 131       if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
 132         return false;
 133       if (node->frequency == NODE_FREQUENCY_HOT)
 134         return true;
 135     }
 136   if (profile_status_for_fn (fun) == PROFILE_ABSENT)
 137     return true;
 138   if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 139       && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
 140     return false;
 141   if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
 142     return false;
 143   if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
 144               / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 145     return false;
 146   return true;
 147 }
 148
 149 static gcov_type min_count = -1;
 150
 151 /* Determine the threshold for hot BB counts.  */
 152
 153 gcov_type
 154 get_hot_bb_threshold ()
 155 {
 156   gcov_working_set_t *ws;
 157   if (min_count == -1)
 158     {
 159       ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
 160       gcc_assert (ws);
 161       min_count = ws->min_counter;
 162     }
 163   return min_count;
 164 }
 165
 166 /* Set the threshold for hot BB counts.  */
 167
 168 void
 169 set_hot_bb_threshold (gcov_type min)
 170 {
 171   min_count = min;
 172 }
 173
 174 /* Return TRUE if frequency FREQ is considered to be hot.  */
 175
 176 bool
 177 maybe_hot_count_p (struct function *fun, gcov_type count)
 178 {
 179   if (fun && profile_status_for_fn (fun) != PROFILE_READ)
 180     return true;
 181   /* Code executed at most once is not hot.  */
 182   if (profile_info->runs >= count)
 183     return false;
 184   return (count >= get_hot_bb_threshold ());
 185 }
 186
 187 /* Return true in case BB can be CPU intensive and should be optimized
 188    for maximal performance.  */
 189
 190 bool
 191 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
 192 {
 193   gcc_checking_assert (fun);
 194   if (profile_status_for_fn (fun) == PROFILE_READ)
 195     return maybe_hot_count_p (fun, bb->count);
 196   return maybe_hot_frequency_p (fun, bb->frequency);
 197 }
 198
 199 /* Return true in case BB can be CPU intensive and should be optimized
 200    for maximal performance.  */
 201
 202 bool
 203 maybe_hot_edge_p (edge e)
 204 {
 205   if (profile_status_for_fn (cfun) == PROFILE_READ)
 206     return maybe_hot_count_p (cfun, e->count);
 207   return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
 208 }
 209
 210 /* Return true if profile COUNT and FREQUENCY, or function FUN static
 211    node frequency reflects never being executed.  */
 212
 213 static bool
 214 probably_never_executed (struct function *fun,
 215                          gcov_type count, int frequency)
 216 {
 217   gcc_checking_assert (fun);
 218   if (profile_status_for_fn (fun) == PROFILE_READ)
 219     {
 220       int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
 221       if (count * unlikely_count_fraction >= profile_info->runs)
 222         return false;
 223       if (!frequency)
 224         return true;
 225       if (!ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency)
 226         return false;
 227       if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count)
 228         {
 229           gcov_type computed_count;
 230           /* Check for possibility of overflow, in which case entry bb count
 231              is large enough to do the division first without losing much
 232              precision.  */
 233           if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count < REG_BR_PROB_BASE *
 234               REG_BR_PROB_BASE)
 235             {
 236               gcov_type scaled_count
 237                   = frequency * ENTRY_BLOCK_PTR_FOR_FN (fun)->count *
 238              unlikely_count_fraction;
 239               computed_count = RDIV (scaled_count,
 240                                      ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
 241             }
 242           else
 243             {
 244               computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (fun)->count,
 245                                      ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
 246               computed_count *= frequency * unlikely_count_fraction;
 247             }
 248           if (computed_count >= profile_info->runs)
 249             return false;
 250         }
 251       return true;
 252     }
 253   if ((!profile_info || !(opt_for_fn (fun->decl, flag_branch_probabilities)))
 254       && (cgraph_node::get (fun->decl)->frequency
 255           == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 256     return true;
 257   return false;
 258 }
 259
 260
 261 /* Return true in case BB is probably never executed.  */
 262
 263 bool
 264 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
 265 {
 266   return probably_never_executed (fun, bb->count, bb->frequency);
 267 }
 268
 269
 270 /* Return true in case edge E is probably never executed.  */
 271
 272 bool
 273 probably_never_executed_edge_p (struct function *fun, edge e)
 274 {
 275   return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
 276 }
 277
 278 /* Return true when current function should always be optimized for size.  */
 279
 280 bool
 281 optimize_function_for_size_p (struct function *fun)
 282 {
 283   if (!fun || !fun->decl)
 284     return optimize_size;
 285   cgraph_node *n = cgraph_node::get (fun->decl);
 286   return n && n->optimize_for_size_p ();
 287 }
 288
 289 /* Return true when current function should always be optimized for speed.  */
 290
 291 bool
 292 optimize_function_for_speed_p (struct function *fun)
 293 {
 294   return !optimize_function_for_size_p (fun);
 295 }
 296
 297 /* Return TRUE when BB should be optimized for size.  */
 298
 299 bool
 300 optimize_bb_for_size_p (const_basic_block bb)
 301 {
 302   return (optimize_function_for_size_p (cfun)
 303           || (bb && !maybe_hot_bb_p (cfun, bb)));
 304 }
 305
 306 /* Return TRUE when BB should be optimized for speed.  */
 307
 308 bool
 309 optimize_bb_for_speed_p (const_basic_block bb)
 310 {
 311   return !optimize_bb_for_size_p (bb);
 312 }
 313
 314 /* Return TRUE when BB should be optimized for size.  */
 315
 316 bool
 317 optimize_edge_for_size_p (edge e)
 318 {
 319   return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
 320 }
 321
 322 /* Return TRUE when BB should be optimized for speed.  */
 323
 324 bool
 325 optimize_edge_for_speed_p (edge e)
 326 {
 327   return !optimize_edge_for_size_p (e);
 328 }
 329
 330 /* Return TRUE when BB should be optimized for size.  */
 331
 332 bool
 333 optimize_insn_for_size_p (void)
 334 {
 335   return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
 336 }
 337
 338 /* Return TRUE when BB should be optimized for speed.  */
 339
 340 bool
 341 optimize_insn_for_speed_p (void)
 342 {
 343   return !optimize_insn_for_size_p ();
 344 }
 345
 346 /* Return TRUE when LOOP should be optimized for size.  */
 347
 348 bool
 349 optimize_loop_for_size_p (struct loop *loop)
 350 {
 351   return optimize_bb_for_size_p (loop->header);
 352 }
 353
 354 /* Return TRUE when LOOP should be optimized for speed.  */
 355
 356 bool
 357 optimize_loop_for_speed_p (struct loop *loop)
 358 {
 359   return optimize_bb_for_speed_p (loop->header);
 360 }
 361
 362 /* Return TRUE when LOOP nest should be optimized for speed.  */
 363
 364 bool
 365 optimize_loop_nest_for_speed_p (struct loop *loop)
 366 {
 367   struct loop *l = loop;
 368   if (optimize_loop_for_speed_p (loop))
 369     return true;
 370   l = loop->inner;
 371   while (l && l != loop)
 372     {
 373       if (optimize_loop_for_speed_p (l))
 374         return true;
 375       if (l->inner)
 376         l = l->inner;
 377       else if (l->next)
 378         l = l->next;
 379       else
 380         {
 381           while (l != loop && !l->next)
 382             l = loop_outer (l);
 383           if (l != loop)
 384             l = l->next;
 385         }
 386     }
 387   return false;
 388 }
 389
 390 /* Return TRUE when LOOP nest should be optimized for size.  */
 391
 392 bool
 393 optimize_loop_nest_for_size_p (struct loop *loop)
 394 {
 395   return !optimize_loop_nest_for_speed_p (loop);
 396 }
 397
 398 /* Return true when edge E is likely to be well predictable by branch
 399    predictor.  */
 400
 401 bool
 402 predictable_edge_p (edge e)
 403 {
 404   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
 405     return false;
 406   if ((e->probability
 407        <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
 408       || (REG_BR_PROB_BASE - e->probability
 409           <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
 410     return true;
 411   return false;
 412 }
 413
 414
 415 /* Set RTL expansion for BB profile.  */
 416
 417 void
 418 rtl_profile_for_bb (basic_block bb)
 419 {
 420   crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
 421 }
 422
 423 /* Set RTL expansion for edge profile.  */
 424
 425 void
 426 rtl_profile_for_edge (edge e)
 427 {
 428   crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
 429 }
 430
 431 /* Set RTL expansion to default mode (i.e. when profile info is not known).  */
 432 void
 433 default_rtl_profile (void)
 434 {
 435   crtl->maybe_hot_insn_p = true;
 436 }
 437
 438 /* Return true if the one of outgoing edges is already predicted by
 439    PREDICTOR.  */
 440
 441 bool
 442 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 443 {
 444   rtx note;
 445   if (!INSN_P (BB_END (bb)))
 446     return false;
 447   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
 448     if (REG_NOTE_KIND (note) == REG_BR_PRED
 449         && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
 450       return true;
 451   return false;
 452 }
 453
 454 /*  Structure representing predictions in tree level. */
 455
 456 struct edge_prediction {
 457     struct edge_prediction *ep_next;
 458     edge ep_edge;
 459     enum br_predictor ep_predictor;
 460     int ep_probability;
 461 };
 462
 463 /* This map contains for a basic block the list of predictions for the
 464    outgoing edges.  */
 465
 466 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
 467
 468 /* Return true if the one of outgoing edges is already predicted by
 469    PREDICTOR.  */
 470
 471 bool
 472 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 473 {
 474   struct edge_prediction *i;
 475   edge_prediction **preds = bb_predictions->get (bb);
 476
 477   if (!preds)
 478     return false;
 479
 480   for (i = *preds; i; i = i->ep_next)
 481     if (i->ep_predictor == predictor)
 482       return true;
 483   return false;
 484 }
 485
 486 /* Return true when the probability of edge is reliable.
 487
 488    The profile guessing code is good at predicting branch outcome (ie.
 489    taken/not taken), that is predicted right slightly over 75% of time.
 490    It is however notoriously poor on predicting the probability itself.
 491    In general the profile appear a lot flatter (with probabilities closer
 492    to 50%) than the reality so it is bad idea to use it to drive optimization
 493    such as those disabling dynamic branch prediction for well predictable
 494    branches.
 495
 496    There are two exceptions - edges leading to noreturn edges and edges
 497    predicted by number of iterations heuristics are predicted well.  This macro
 498    should be able to distinguish those, but at the moment it simply check for
 499    noreturn heuristic that is only one giving probability over 99% or bellow
 500    1%.  In future we might want to propagate reliability information across the
 501    CFG if we find this information useful on multiple places.   */
 502 static bool
 503 probability_reliable_p (int prob)
 504 {
 505   return (profile_status_for_fn (cfun) == PROFILE_READ
 506           || (profile_status_for_fn (cfun) == PROFILE_GUESSED
 507               && (prob <= HITRATE (1) || prob >= HITRATE (99))));
 508 }
 509
 510 /* Same predicate as above, working on edges.  */
 511 bool
 512 edge_probability_reliable_p (const_edge e)
 513 {
 514   return probability_reliable_p (e->probability);
 515 }
 516
 517 /* Same predicate as edge_probability_reliable_p, working on notes.  */
 518 bool
 519 br_prob_note_reliable_p (const_rtx note)
 520 {
 521   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
 522   return probability_reliable_p (XINT (note, 0));
 523 }
 524
 525 static void
 526 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
 527 {
 528   gcc_assert (any_condjump_p (insn));
 529   if (!flag_guess_branch_prob)
 530     return;
 531
 532   add_reg_note (insn, REG_BR_PRED,
 533                 gen_rtx_CONCAT (VOIDmode,
 534                                 GEN_INT ((int) predictor),
 535                                 GEN_INT ((int) probability)));
 536 }
 537
 538 /* Predict insn by given predictor.  */
 539
 540 void
 541 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
 542                   enum prediction taken)
 543 {
 544    int probability = predictor_info[(int) predictor].hitrate;
 545
 546    if (taken != TAKEN)
 547      probability = REG_BR_PROB_BASE - probability;
 548
 549    predict_insn (insn, predictor, probability);
 550 }
 551
 552 /* Predict edge E with given probability if possible.  */
 553
 554 void
 555 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
 556 {
 557   rtx_insn *last_insn;
 558   last_insn = BB_END (e->src);
 559
 560   /* We can store the branch prediction information only about
 561      conditional jumps.  */
 562   if (!any_condjump_p (last_insn))
 563     return;
 564
 565   /* We always store probability of branching.  */
 566   if (e->flags & EDGE_FALLTHRU)
 567     probability = REG_BR_PROB_BASE - probability;
 568
 569   predict_insn (last_insn, predictor, probability);
 570 }
 571
 572 /* Predict edge E with the given PROBABILITY.  */
 573 void
 574 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
 575 {
 576   gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
 577   if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
 578        1)
 579       && flag_guess_branch_prob && optimize)
 580     {
 581       struct edge_prediction *i = XNEW (struct edge_prediction);
 582       edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
 583
 584       i->ep_next = preds;
 585       preds = i;
 586       i->ep_probability = probability;
 587       i->ep_predictor = predictor;
 588       i->ep_edge = e;
 589     }
 590 }
 591
 592 /* Remove all predictions on given basic block that are attached
 593    to edge E.  */
 594 void
 595 remove_predictions_associated_with_edge (edge e)
 596 {
 597   if (!bb_predictions)
 598     return;
 599
 600   edge_prediction **preds = bb_predictions->get (e->src);
 601
 602   if (preds)
 603     {
 604       struct edge_prediction **prediction = preds;
 605       struct edge_prediction *next;
 606
 607       while (*prediction)
 608         {
 609           if ((*prediction)->ep_edge == e)
 610             {
 611               next = (*prediction)->ep_next;
 612               free (*prediction);
 613               *prediction = next;
 614             }
 615           else
 616             prediction = &((*prediction)->ep_next);
 617         }
 618     }
 619 }
 620
 621 /* Clears the list of predictions stored for BB.  */
 622
 623 static void
 624 clear_bb_predictions (basic_block bb)
 625 {
 626   edge_prediction **preds = bb_predictions->get (bb);
 627   struct edge_prediction *pred, *next;
 628
 629   if (!preds)
 630     return;
 631
 632   for (pred = *preds; pred; pred = next)
 633     {
 634       next = pred->ep_next;
 635       free (pred);
 636     }
 637   *preds = NULL;
 638 }
 639
 640 /* Return true when we can store prediction on insn INSN.
 641    At the moment we represent predictions only on conditional
 642    jumps, not at computed jump or other complicated cases.  */
 643 static bool
 644 can_predict_insn_p (const rtx_insn *insn)
 645 {
 646   return (JUMP_P (insn)
 647           && any_condjump_p (insn)
 648           && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
 649 }
 650
 651 /* Predict edge E by given predictor if possible.  */
 652
 653 void
 654 predict_edge_def (edge e, enum br_predictor predictor,
 655                   enum prediction taken)
 656 {
 657    int probability = predictor_info[(int) predictor].hitrate;
 658
 659    if (taken != TAKEN)
 660      probability = REG_BR_PROB_BASE - probability;
 661
 662    predict_edge (e, predictor, probability);
 663 }
 664
 665 /* Invert all branch predictions or probability notes in the INSN.  This needs
 666    to be done each time we invert the condition used by the jump.  */
 667
 668 void
 669 invert_br_probabilities (rtx insn)
 670 {
 671   rtx note;
 672
 673   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 674     if (REG_NOTE_KIND (note) == REG_BR_PROB)
 675       XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
 676     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
 677       XEXP (XEXP (note, 0), 1)
 678         = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
 679 }
 680
 681 /* Dump information about the branch prediction to the output file.  */
 682
 683 static void
 684 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
 685                  basic_block bb, int used)
 686 {
 687   edge e;
 688   edge_iterator ei;
 689
 690   if (!file)
 691     return;
 692
 693   FOR_EACH_EDGE (e, ei, bb->succs)
 694     if (! (e->flags & EDGE_FALLTHRU))
 695       break;
 696
 697   fprintf (file, "  %s heuristics%s: %.1f%%",
 698            predictor_info[predictor].name,
 699            used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
 700
 701   if (bb->count)
 702     {
 703       fprintf (file, "  exec %"PRId64, bb->count);
 704       if (e)
 705         {
 706           fprintf (file, " hit %"PRId64, e->count);
 707           fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
 708         }
 709     }
 710
 711   fprintf (file, "\n");
 712 }
 713
 714 /* We can not predict the probabilities of outgoing edges of bb.  Set them
 715    evenly and hope for the best.  */
 716 static void
 717 set_even_probabilities (basic_block bb)
 718 {
 719   int nedges = 0;
 720   edge e;
 721   edge_iterator ei;
 722
 723   FOR_EACH_EDGE (e, ei, bb->succs)
 724     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 725       nedges ++;
 726   FOR_EACH_EDGE (e, ei, bb->succs)
 727     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 728       e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
 729     else
 730       e->probability = 0;
 731 }
 732
 733 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
 734    note if not already present.  Remove now useless REG_BR_PRED notes.  */
 735
 736 static void
 737 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
 738 {
 739   rtx prob_note;
 740   rtx *pnote;
 741   rtx note;
 742   int best_probability = PROB_EVEN;
 743   enum br_predictor best_predictor = END_PREDICTORS;
 744   int combined_probability = REG_BR_PROB_BASE / 2;
 745   int d;
 746   bool first_match = false;
 747   bool found = false;
 748
 749   if (!can_predict_insn_p (insn))
 750     {
 751       set_even_probabilities (bb);
 752       return;
 753     }
 754
 755   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
 756   pnote = &REG_NOTES (insn);
 757   if (dump_file)
 758     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
 759              bb->index);
 760
 761   /* We implement "first match" heuristics and use probability guessed
 762      by predictor with smallest index.  */
 763   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 764     if (REG_NOTE_KIND (note) == REG_BR_PRED)
 765       {
 766         enum br_predictor predictor = ((enum br_predictor)
 767                                        INTVAL (XEXP (XEXP (note, 0), 0)));
 768         int probability = INTVAL (XEXP (XEXP (note, 0), 1));
 769
 770         found = true;
 771         if (best_predictor > predictor)
 772           best_probability = probability, best_predictor = predictor;
 773
 774         d = (combined_probability * probability
 775              + (REG_BR_PROB_BASE - combined_probability)
 776              * (REG_BR_PROB_BASE - probability));
 777
 778         /* Use FP math to avoid overflows of 32bit integers.  */
 779         if (d == 0)
 780           /* If one probability is 0% and one 100%, avoid division by zero.  */
 781           combined_probability = REG_BR_PROB_BASE / 2;
 782         else
 783           combined_probability = (((double) combined_probability) * probability
 784                                   * REG_BR_PROB_BASE / d + 0.5);
 785       }
 786
 787   /* Decide which heuristic to use.  In case we didn't match anything,
 788      use no_prediction heuristic, in case we did match, use either
 789      first match or Dempster-Shaffer theory depending on the flags.  */
 790
 791   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 792     first_match = true;
 793
 794   if (!found)
 795     dump_prediction (dump_file, PRED_NO_PREDICTION,
 796                      combined_probability, bb, true);
 797   else
 798     {
 799       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
 800                        bb, !first_match);
 801       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
 802                        bb, first_match);
 803     }
 804
 805   if (first_match)
 806     combined_probability = best_probability;
 807   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 808
 809   while (*pnote)
 810     {
 811       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
 812         {
 813           enum br_predictor predictor = ((enum br_predictor)
 814                                          INTVAL (XEXP (XEXP (*pnote, 0), 0)));
 815           int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
 816
 817           dump_prediction (dump_file, predictor, probability, bb,
 818                            !first_match || best_predictor == predictor);
 819           *pnote = XEXP (*pnote, 1);
 820         }
 821       else
 822         pnote = &XEXP (*pnote, 1);
 823     }
 824
 825   if (!prob_note)
 826     {
 827       add_int_reg_note (insn, REG_BR_PROB, combined_probability);
 828
 829       /* Save the prediction into CFG in case we are seeing non-degenerated
 830          conditional jump.  */
 831       if (!single_succ_p (bb))
 832         {
 833           BRANCH_EDGE (bb)->probability = combined_probability;
 834           FALLTHRU_EDGE (bb)->probability
 835             = REG_BR_PROB_BASE - combined_probability;
 836         }
 837     }
 838   else if (!single_succ_p (bb))
 839     {
 840       int prob = XINT (prob_note, 0);
 841
 842       BRANCH_EDGE (bb)->probability = prob;
 843       FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
 844     }
 845   else
 846     single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
 847 }
 848
 849 /* Combine predictions into single probability and store them into CFG.
 850    Remove now useless prediction entries.  */
 851
 852 static void
 853 combine_predictions_for_bb (basic_block bb)
 854 {
 855   int best_probability = PROB_EVEN;
 856   enum br_predictor best_predictor = END_PREDICTORS;
 857   int combined_probability = REG_BR_PROB_BASE / 2;
 858   int d;
 859   bool first_match = false;
 860   bool found = false;
 861   struct edge_prediction *pred;
 862   int nedges = 0;
 863   edge e, first = NULL, second = NULL;
 864   edge_iterator ei;
 865
 866   FOR_EACH_EDGE (e, ei, bb->succs)
 867     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 868       {
 869         nedges ++;
 870         if (first && !second)
 871           second = e;
 872         if (!first)
 873           first = e;
 874       }
 875
 876   /* When there is no successor or only one choice, prediction is easy.
 877
 878      We are lazy for now and predict only basic blocks with two outgoing
 879      edges.  It is possible to predict generic case too, but we have to
 880      ignore first match heuristics and do more involved combining.  Implement
 881      this later.  */
 882   if (nedges != 2)
 883     {
 884       if (!bb->count)
 885         set_even_probabilities (bb);
 886       clear_bb_predictions (bb);
 887       if (dump_file)
 888         fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
 889                  nedges, bb->index);
 890       return;
 891     }
 892
 893   if (dump_file)
 894     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
 895
 896   edge_prediction **preds = bb_predictions->get (bb);
 897   if (preds)
 898     {
 899       /* We implement "first match" heuristics and use probability guessed
 900          by predictor with smallest index.  */
 901       for (pred = *preds; pred; pred = pred->ep_next)
 902         {
 903           enum br_predictor predictor = pred->ep_predictor;
 904           int probability = pred->ep_probability;
 905
 906           if (pred->ep_edge != first)
 907             probability = REG_BR_PROB_BASE - probability;
 908
 909           found = true;
 910           /* First match heuristics would be widly confused if we predicted
 911              both directions.  */
 912           if (best_predictor > predictor)
 913             {
 914               struct edge_prediction *pred2;
 915               int prob = probability;
 916
 917               for (pred2 = (struct edge_prediction *) *preds;
 918                    pred2; pred2 = pred2->ep_next)
 919                if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
 920                  {
 921                    int probability2 = pred->ep_probability;
 922
 923                    if (pred2->ep_edge != first)
 924                      probability2 = REG_BR_PROB_BASE - probability2;
 925
 926                    if ((probability < REG_BR_PROB_BASE / 2) !=
 927                        (probability2 < REG_BR_PROB_BASE / 2))
 928                      break;
 929
 930                    /* If the same predictor later gave better result, go for it! */
 931                    if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
 932                        || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
 933                      prob = probability2;
 934                  }
 935               if (!pred2)
 936                 best_probability = prob, best_predictor = predictor;
 937             }
 938
 939           d = (combined_probability * probability
 940                + (REG_BR_PROB_BASE - combined_probability)
 941                * (REG_BR_PROB_BASE - probability));
 942
 943           /* Use FP math to avoid overflows of 32bit integers.  */
 944           if (d == 0)
 945             /* If one probability is 0% and one 100%, avoid division by zero.  */
 946             combined_probability = REG_BR_PROB_BASE / 2;
 947           else
 948             combined_probability = (((double) combined_probability)
 949                                     * probability
 950                                     * REG_BR_PROB_BASE / d + 0.5);
 951         }
 952     }
 953
 954   /* Decide which heuristic to use.  In case we didn't match anything,
 955      use no_prediction heuristic, in case we did match, use either
 956      first match or Dempster-Shaffer theory depending on the flags.  */
 957
 958   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 959     first_match = true;
 960
 961   if (!found)
 962     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
 963   else
 964     {
 965       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
 966                        !first_match);
 967       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
 968                        first_match);
 969     }
 970
 971   if (first_match)
 972     combined_probability = best_probability;
 973   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 974
 975   if (preds)
 976     {
 977       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
 978         {
 979           enum br_predictor predictor = pred->ep_predictor;
 980           int probability = pred->ep_probability;
 981
 982           if (pred->ep_edge != EDGE_SUCC (bb, 0))
 983             probability = REG_BR_PROB_BASE - probability;
 984           dump_prediction (dump_file, predictor, probability, bb,
 985                            !first_match || best_predictor == predictor);
 986         }
 987     }
 988   clear_bb_predictions (bb);
 989
 990   if (!bb->count)
 991     {
 992       first->probability = combined_probability;
 993       second->probability = REG_BR_PROB_BASE - combined_probability;
 994     }
 995 }
 996
 997 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
 998    Return the SSA_NAME if the condition satisfies, NULL otherwise.
 999
1000    T1 and T2 should be one of the following cases:
1001      1. T1 is SSA_NAME, T2 is NULL
1002      2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1003      3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1004
1005 static tree
1006 strips_small_constant (tree t1, tree t2)
1007 {
1008   tree ret = NULL;
1009   int value = 0;
1010
1011   if (!t1)
1012     return NULL;
1013   else if (TREE_CODE (t1) == SSA_NAME)
1014     ret = t1;
1015   else if (tree_fits_shwi_p (t1))
1016     value = tree_to_shwi (t1);
1017   else
1018     return NULL;
1019
1020   if (!t2)
1021     return ret;
1022   else if (tree_fits_shwi_p (t2))
1023     value = tree_to_shwi (t2);
1024   else if (TREE_CODE (t2) == SSA_NAME)
1025     {
1026       if (ret)
1027         return NULL;
1028       else
1029         ret = t2;
1030     }
1031
1032   if (value <= 4 && value >= -4)
1033     return ret;
1034   else
1035     return NULL;
1036 }
1037
1038 /* Return the SSA_NAME in T or T's operands.
1039    Return NULL if SSA_NAME cannot be found.  */
1040
1041 static tree
1042 get_base_value (tree t)
1043 {
1044   if (TREE_CODE (t) == SSA_NAME)
1045     return t;
1046
1047   if (!BINARY_CLASS_P (t))
1048     return NULL;
1049
1050   switch (TREE_OPERAND_LENGTH (t))
1051     {
1052     case 1:
1053       return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1054     case 2:
1055       return strips_small_constant (TREE_OPERAND (t, 0),
1056                                     TREE_OPERAND (t, 1));
1057     default:
1058       return NULL;
1059     }
1060 }
1061
1062 /* Check the compare STMT in LOOP. If it compares an induction
1063    variable to a loop invariant, return true, and save
1064    LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1065    Otherwise return false and set LOOP_INVAIANT to NULL.  */
1066
1067 static bool
1068 is_comparison_with_loop_invariant_p (gimple stmt, struct loop *loop,
1069                                      tree *loop_invariant,
1070                                      enum tree_code *compare_code,
1071                                      tree *loop_step,
1072                                      tree *loop_iv_base)
1073 {
1074   tree op0, op1, bound, base;
1075   affine_iv iv0, iv1;
1076   enum tree_code code;
1077   tree step;
1078
1079   code = gimple_cond_code (stmt);
1080   *loop_invariant = NULL;
1081
1082   switch (code)
1083     {
1084     case GT_EXPR:
1085     case GE_EXPR:
1086     case NE_EXPR:
1087     case LT_EXPR:
1088     case LE_EXPR:
1089     case EQ_EXPR:
1090       break;
1091
1092     default:
1093       return false;
1094     }
1095
1096   op0 = gimple_cond_lhs (stmt);
1097   op1 = gimple_cond_rhs (stmt);
1098
1099   if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1100        || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1101     return false;
1102   if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1103     return false;
1104   if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1105     return false;
1106   if (TREE_CODE (iv0.step) != INTEGER_CST
1107       || TREE_CODE (iv1.step) != INTEGER_CST)
1108     return false;
1109   if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1110       || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1111     return false;
1112
1113   if (integer_zerop (iv0.step))
1114     {
1115       if (code != NE_EXPR && code != EQ_EXPR)
1116         code = invert_tree_comparison (code, false);
1117       bound = iv0.base;
1118       base = iv1.base;
1119       if (tree_fits_shwi_p (iv1.step))
1120         step = iv1.step;
1121       else
1122         return false;
1123     }
1124   else
1125     {
1126       bound = iv1.base;
1127       base = iv0.base;
1128       if (tree_fits_shwi_p (iv0.step))
1129         step = iv0.step;
1130       else
1131         return false;
1132     }
1133
1134   if (TREE_CODE (bound) != INTEGER_CST)
1135     bound = get_base_value (bound);
1136   if (!bound)
1137     return false;
1138   if (TREE_CODE (base) != INTEGER_CST)
1139     base = get_base_value (base);
1140   if (!base)
1141     return false;
1142
1143   *loop_invariant = bound;
1144   *compare_code = code;
1145   *loop_step = step;
1146   *loop_iv_base = base;
1147   return true;
1148 }
1149
1150 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1151
1152 static bool
1153 expr_coherent_p (tree t1, tree t2)
1154 {
1155   gimple stmt;
1156   tree ssa_name_1 = NULL;
1157   tree ssa_name_2 = NULL;
1158
1159   gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1160   gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1161
1162   if (t1 == t2)
1163     return true;
1164
1165   if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1166     return true;
1167   if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1168     return false;
1169
1170   /* Check to see if t1 is expressed/defined with t2.  */
1171   stmt = SSA_NAME_DEF_STMT (t1);
1172   gcc_assert (stmt != NULL);
1173   if (is_gimple_assign (stmt))
1174     {
1175       ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1176       if (ssa_name_1 && ssa_name_1 == t2)
1177         return true;
1178     }
1179
1180   /* Check to see if t2 is expressed/defined with t1.  */
1181   stmt = SSA_NAME_DEF_STMT (t2);
1182   gcc_assert (stmt != NULL);
1183   if (is_gimple_assign (stmt))
1184     {
1185       ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1186       if (ssa_name_2 && ssa_name_2 == t1)
1187         return true;
1188     }
1189
1190   /* Compare if t1 and t2's def_stmts are identical.  */
1191   if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1192     return true;
1193   else
1194     return false;
1195 }
1196
1197 /* Predict branch probability of BB when BB contains a branch that compares
1198    an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1199    loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1200
1201    E.g.
1202      for (int i = 0; i < bound; i++) {
1203        if (i < bound - 2)
1204          computation_1();
1205        else
1206          computation_2();
1207      }
1208
1209   In this loop, we will predict the branch inside the loop to be taken.  */
1210
1211 static void
1212 predict_iv_comparison (struct loop *loop, basic_block bb,
1213                        tree loop_bound_var,
1214                        tree loop_iv_base_var,
1215                        enum tree_code loop_bound_code,
1216                        int loop_bound_step)
1217 {
1218   gimple stmt;
1219   tree compare_var, compare_base;
1220   enum tree_code compare_code;
1221   tree compare_step_var;
1222   edge then_edge;
1223   edge_iterator ei;
1224
1225   if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1226       || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1227       || predicted_by_p (bb, PRED_LOOP_EXIT))
1228     return;
1229
1230   stmt = last_stmt (bb);
1231   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1232     return;
1233   if (!is_comparison_with_loop_invariant_p (stmt, loop, &compare_var,
1234                                             &compare_code,
1235                                             &compare_step_var,
1236                                             &compare_base))
1237     return;
1238
1239   /* Find the taken edge.  */
1240   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1241     if (then_edge->flags & EDGE_TRUE_VALUE)
1242       break;
1243
1244   /* When comparing an IV to a loop invariant, NE is more likely to be
1245      taken while EQ is more likely to be not-taken.  */
1246   if (compare_code == NE_EXPR)
1247     {
1248       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1249       return;
1250     }
1251   else if (compare_code == EQ_EXPR)
1252     {
1253       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1254       return;
1255     }
1256
1257   if (!expr_coherent_p (loop_iv_base_var, compare_base))
1258     return;
1259
1260   /* If loop bound, base and compare bound are all constants, we can
1261      calculate the probability directly.  */
1262   if (tree_fits_shwi_p (loop_bound_var)
1263       && tree_fits_shwi_p (compare_var)
1264       && tree_fits_shwi_p (compare_base))
1265     {
1266       int probability;
1267       bool overflow, overall_overflow = false;
1268       widest_int compare_count, tem;
1269
1270       /* (loop_bound - base) / compare_step */
1271       tem = wi::sub (wi::to_widest (loop_bound_var),
1272                      wi::to_widest (compare_base), SIGNED, &overflow);
1273       overall_overflow |= overflow;
1274       widest_int loop_count = wi::div_trunc (tem,
1275                                              wi::to_widest (compare_step_var),
1276                                              SIGNED, &overflow);
1277       overall_overflow |= overflow;
1278
1279       if (!wi::neg_p (wi::to_widest (compare_step_var))
1280           ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1281         {
1282           /* (loop_bound - compare_bound) / compare_step */
1283           tem = wi::sub (wi::to_widest (loop_bound_var),
1284                          wi::to_widest (compare_var), SIGNED, &overflow);
1285           overall_overflow |= overflow;
1286           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1287                                          SIGNED, &overflow);
1288           overall_overflow |= overflow;
1289         }
1290       else
1291         {
1292           /* (compare_bound - base) / compare_step */
1293           tem = wi::sub (wi::to_widest (compare_var),
1294                          wi::to_widest (compare_base), SIGNED, &overflow);
1295           overall_overflow |= overflow;
1296           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1297                                          SIGNED, &overflow);
1298           overall_overflow |= overflow;
1299         }
1300       if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1301         ++compare_count;
1302       if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1303         ++loop_count;
1304       if (wi::neg_p (compare_count))
1305         compare_count = 0;
1306       if (wi::neg_p (loop_count))
1307         loop_count = 0;
1308       if (loop_count == 0)
1309         probability = 0;
1310       else if (wi::cmps (compare_count, loop_count) == 1)
1311         probability = REG_BR_PROB_BASE;
1312       else
1313         {
1314           tem = compare_count * REG_BR_PROB_BASE;
1315           tem = wi::udiv_trunc (tem, loop_count);
1316           probability = tem.to_uhwi ();
1317         }
1318
1319       if (!overall_overflow)
1320         predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1321
1322       return;
1323     }
1324
1325   if (expr_coherent_p (loop_bound_var, compare_var))
1326     {
1327       if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1328           && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1329         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1330       else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1331                && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1332         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1333       else if (loop_bound_code == NE_EXPR)
1334         {
1335           /* If the loop backedge condition is "(i != bound)", we do
1336              the comparison based on the step of IV:
1337              * step < 0 : backedge condition is like (i > bound)
1338              * step > 0 : backedge condition is like (i < bound)  */
1339           gcc_assert (loop_bound_step != 0);
1340           if (loop_bound_step > 0
1341               && (compare_code == LT_EXPR
1342                   || compare_code == LE_EXPR))
1343             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1344           else if (loop_bound_step < 0
1345                    && (compare_code == GT_EXPR
1346                        || compare_code == GE_EXPR))
1347             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1348           else
1349             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1350         }
1351       else
1352         /* The branch is predicted not-taken if loop_bound_code is
1353            opposite with compare_code.  */
1354         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1355     }
1356   else if (expr_coherent_p (loop_iv_base_var, compare_var))
1357     {
1358       /* For cases like:
1359            for (i = s; i < h; i++)
1360              if (i > s + 2) ....
1361          The branch should be predicted taken.  */
1362       if (loop_bound_step > 0
1363           && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1364         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1365       else if (loop_bound_step < 0
1366                && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1367         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1368       else
1369         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1370     }
1371 }
1372
1373 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1374    exits are resulted from short-circuit conditions that will generate an
1375    if_tmp. E.g.:
1376
1377    if (foo() || global > 10)
1378      break;
1379
1380    This will be translated into:
1381
1382    BB3:
1383      loop header...
1384    BB4:
1385      if foo() goto BB6 else goto BB5
1386    BB5:
1387      if global > 10 goto BB6 else goto BB7
1388    BB6:
1389      goto BB7
1390    BB7:
1391      iftmp = (PHI 0(BB5), 1(BB6))
1392      if iftmp == 1 goto BB8 else goto BB3
1393    BB8:
1394      outside of the loop...
1395
1396    The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1397    From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1398    exits. This function takes BB7->BB8 as input, and finds out the extra loop
1399    exits to predict them using PRED_LOOP_EXIT.  */
1400
1401 static void
1402 predict_extra_loop_exits (edge exit_edge)
1403 {
1404   unsigned i;
1405   bool check_value_one;
1406   gimple phi_stmt;
1407   tree cmp_rhs, cmp_lhs;
1408   gimple cmp_stmt = last_stmt (exit_edge->src);
1409
1410   if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1411     return;
1412   cmp_rhs = gimple_cond_rhs (cmp_stmt);
1413   cmp_lhs = gimple_cond_lhs (cmp_stmt);
1414   if (!TREE_CONSTANT (cmp_rhs)
1415       || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1416     return;
1417   if (TREE_CODE (cmp_lhs) != SSA_NAME)
1418     return;
1419
1420   /* If check_value_one is true, only the phi_args with value '1' will lead
1421      to loop exit. Otherwise, only the phi_args with value '0' will lead to
1422      loop exit.  */
1423   check_value_one = (((integer_onep (cmp_rhs))
1424                     ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1425                     ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1426
1427   phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1428   if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
1429     return;
1430
1431   for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1432     {
1433       edge e1;
1434       edge_iterator ei;
1435       tree val = gimple_phi_arg_def (phi_stmt, i);
1436       edge e = gimple_phi_arg_edge (phi_stmt, i);
1437
1438       if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1439         continue;
1440       if ((check_value_one ^ integer_onep (val)) == 1)
1441         continue;
1442       if (EDGE_COUNT (e->src->succs) != 1)
1443         {
1444           predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1445           continue;
1446         }
1447
1448       FOR_EACH_EDGE (e1, ei, e->src->preds)
1449         predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1450     }
1451 }
1452
1453 /* Predict edge probabilities by exploiting loop structure.  */
1454
1455 static void
1456 predict_loops (void)
1457 {
1458   struct loop *loop;
1459
1460   /* Try to predict out blocks in a loop that are not part of a
1461      natural loop.  */
1462   FOR_EACH_LOOP (loop, 0)
1463     {
1464       basic_block bb, *bbs;
1465       unsigned j, n_exits;
1466       vec<edge> exits;
1467       struct tree_niter_desc niter_desc;
1468       edge ex;
1469       struct nb_iter_bound *nb_iter;
1470       enum tree_code loop_bound_code = ERROR_MARK;
1471       tree loop_bound_step = NULL;
1472       tree loop_bound_var = NULL;
1473       tree loop_iv_base = NULL;
1474       gimple stmt = NULL;
1475
1476       exits = get_loop_exit_edges (loop);
1477       n_exits = exits.length ();
1478       if (!n_exits)
1479         {
1480           exits.release ();
1481           continue;
1482         }
1483
1484       FOR_EACH_VEC_ELT (exits, j, ex)
1485         {
1486           tree niter = NULL;
1487           HOST_WIDE_INT nitercst;
1488           int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1489           int probability;
1490           enum br_predictor predictor;
1491
1492           predict_extra_loop_exits (ex);
1493
1494           if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1495             niter = niter_desc.niter;
1496           if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1497             niter = loop_niter_by_eval (loop, ex);
1498
1499           if (TREE_CODE (niter) == INTEGER_CST)
1500             {
1501               if (tree_fits_uhwi_p (niter)
1502                   && max
1503                   && compare_tree_int (niter, max - 1) == -1)
1504                 nitercst = tree_to_uhwi (niter) + 1;
1505               else
1506                 nitercst = max;
1507               predictor = PRED_LOOP_ITERATIONS;
1508             }
1509           /* If we have just one exit and we can derive some information about
1510              the number of iterations of the loop from the statements inside
1511              the loop, use it to predict this exit.  */
1512           else if (n_exits == 1)
1513             {
1514               nitercst = estimated_stmt_executions_int (loop);
1515               if (nitercst < 0)
1516                 continue;
1517               if (nitercst > max)
1518                 nitercst = max;
1519
1520               predictor = PRED_LOOP_ITERATIONS_GUESSED;
1521             }
1522           else
1523             continue;
1524
1525           /* If the prediction for number of iterations is zero, do not
1526              predict the exit edges.  */
1527           if (nitercst == 0)
1528             continue;
1529
1530           probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1531           predict_edge (ex, predictor, probability);
1532         }
1533       exits.release ();
1534
1535       /* Find information about loop bound variables.  */
1536       for (nb_iter = loop->bounds; nb_iter;
1537            nb_iter = nb_iter->next)
1538         if (nb_iter->stmt
1539             && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1540           {
1541             stmt = nb_iter->stmt;
1542             break;
1543           }
1544       if (!stmt && last_stmt (loop->header)
1545           && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1546         stmt = last_stmt (loop->header);
1547       if (stmt)
1548         is_comparison_with_loop_invariant_p (stmt, loop,
1549                                              &loop_bound_var,
1550                                              &loop_bound_code,
1551                                              &loop_bound_step,
1552                                              &loop_iv_base);
1553
1554       bbs = get_loop_body (loop);
1555
1556       for (j = 0; j < loop->num_nodes; j++)
1557         {
1558           int header_found = 0;
1559           edge e;
1560           edge_iterator ei;
1561
1562           bb = bbs[j];
1563
1564           /* Bypass loop heuristics on continue statement.  These
1565              statements construct loops via "non-loop" constructs
1566              in the source language and are better to be handled
1567              separately.  */
1568           if (predicted_by_p (bb, PRED_CONTINUE))
1569             continue;
1570
1571           /* Loop branch heuristics - predict an edge back to a
1572              loop's head as taken.  */
1573           if (bb == loop->latch)
1574             {
1575               e = find_edge (loop->latch, loop->header);
1576               if (e)
1577                 {
1578                   header_found = 1;
1579                   predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1580                 }
1581             }
1582
1583           /* Loop exit heuristics - predict an edge exiting the loop if the
1584              conditional has no loop header successors as not taken.  */
1585           if (!header_found
1586               /* If we already used more reliable loop exit predictors, do not
1587                  bother with PRED_LOOP_EXIT.  */
1588               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1589               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1590             {
1591               /* For loop with many exits we don't want to predict all exits
1592                  with the pretty large probability, because if all exits are
1593                  considered in row, the loop would be predicted to iterate
1594                  almost never.  The code to divide probability by number of
1595                  exits is very rough.  It should compute the number of exits
1596                  taken in each patch through function (not the overall number
1597                  of exits that might be a lot higher for loops with wide switch
1598                  statements in them) and compute n-th square root.
1599
1600                  We limit the minimal probability by 2% to avoid
1601                  EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1602                  as this was causing regression in perl benchmark containing such
1603                  a wide loop.  */
1604
1605               int probability = ((REG_BR_PROB_BASE
1606                                   - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1607                                  / n_exits);
1608               if (probability < HITRATE (2))
1609                 probability = HITRATE (2);
1610               FOR_EACH_EDGE (e, ei, bb->succs)
1611                 if (e->dest->index < NUM_FIXED_BLOCKS
1612                     || !flow_bb_inside_loop_p (loop, e->dest))
1613                   predict_edge (e, PRED_LOOP_EXIT, probability);
1614             }
1615           if (loop_bound_var)
1616             predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1617                                    loop_bound_code,
1618                                    tree_to_shwi (loop_bound_step));
1619         }
1620
1621       /* Free basic blocks from get_loop_body.  */
1622       free (bbs);
1623     }
1624 }
1625
1626 /* Attempt to predict probabilities of BB outgoing edges using local
1627    properties.  */
1628 static void
1629 bb_estimate_probability_locally (basic_block bb)
1630 {
1631   rtx_insn *last_insn = BB_END (bb);
1632   rtx cond;
1633
1634   if (! can_predict_insn_p (last_insn))
1635     return;
1636   cond = get_condition (last_insn, NULL, false, false);
1637   if (! cond)
1638     return;
1639
1640   /* Try "pointer heuristic."
1641      A comparison ptr == 0 is predicted as false.
1642      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1643   if (COMPARISON_P (cond)
1644       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1645           || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1646     {
1647       if (GET_CODE (cond) == EQ)
1648         predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1649       else if (GET_CODE (cond) == NE)
1650         predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1651     }
1652   else
1653
1654   /* Try "opcode heuristic."
1655      EQ tests are usually false and NE tests are usually true. Also,
1656      most quantities are positive, so we can make the appropriate guesses
1657      about signed comparisons against zero.  */
1658     switch (GET_CODE (cond))
1659       {
1660       case CONST_INT:
1661         /* Unconditional branch.  */
1662         predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1663                           cond == const0_rtx ? NOT_TAKEN : TAKEN);
1664         break;
1665
1666       case EQ:
1667       case UNEQ:
1668         /* Floating point comparisons appears to behave in a very
1669            unpredictable way because of special role of = tests in
1670            FP code.  */
1671         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1672           ;
1673         /* Comparisons with 0 are often used for booleans and there is
1674            nothing useful to predict about them.  */
1675         else if (XEXP (cond, 1) == const0_rtx
1676                  || XEXP (cond, 0) == const0_rtx)
1677           ;
1678         else
1679           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1680         break;
1681
1682       case NE:
1683       case LTGT:
1684         /* Floating point comparisons appears to behave in a very
1685            unpredictable way because of special role of = tests in
1686            FP code.  */
1687         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1688           ;
1689         /* Comparisons with 0 are often used for booleans and there is
1690            nothing useful to predict about them.  */
1691         else if (XEXP (cond, 1) == const0_rtx
1692                  || XEXP (cond, 0) == const0_rtx)
1693           ;
1694         else
1695           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1696         break;
1697
1698       case ORDERED:
1699         predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1700         break;
1701
1702       case UNORDERED:
1703         predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1704         break;
1705
1706       case LE:
1707       case LT:
1708         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1709             || XEXP (cond, 1) == constm1_rtx)
1710           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1711         break;
1712
1713       case GE:
1714       case GT:
1715         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1716             || XEXP (cond, 1) == constm1_rtx)
1717           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1718         break;
1719
1720       default:
1721         break;
1722       }
1723 }
1724
1725 /* Set edge->probability for each successor edge of BB.  */
1726 void
1727 guess_outgoing_edge_probabilities (basic_block bb)
1728 {
1729   bb_estimate_probability_locally (bb);
1730   combine_predictions_for_insn (BB_END (bb), bb);
1731 }
1732 \f
1733 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1734
1735 /* Helper function for expr_expected_value.  */
1736
1737 static tree
1738 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1739                        tree op1, bitmap visited, enum br_predictor *predictor)
1740 {
1741   gimple def;
1742
1743   if (predictor)
1744     *predictor = PRED_UNCONDITIONAL;
1745
1746   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1747     {
1748       if (TREE_CONSTANT (op0))
1749         return op0;
1750
1751       if (code != SSA_NAME)
1752         return NULL_TREE;
1753
1754       def = SSA_NAME_DEF_STMT (op0);
1755
1756       /* If we were already here, break the infinite cycle.  */
1757       if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1758         return NULL;
1759
1760       if (gimple_code (def) == GIMPLE_PHI)
1761         {
1762           /* All the arguments of the PHI node must have the same constant
1763              length.  */
1764           int i, n = gimple_phi_num_args (def);
1765           tree val = NULL, new_val;
1766
1767           for (i = 0; i < n; i++)
1768             {
1769               tree arg = PHI_ARG_DEF (def, i);
1770               enum br_predictor predictor2;
1771
1772               /* If this PHI has itself as an argument, we cannot
1773                  determine the string length of this argument.  However,
1774                  if we can find an expected constant value for the other
1775                  PHI args then we can still be sure that this is
1776                  likely a constant.  So be optimistic and just
1777                  continue with the next argument.  */
1778               if (arg == PHI_RESULT (def))
1779                 continue;
1780
1781               new_val = expr_expected_value (arg, visited, &predictor2);
1782
1783               /* It is difficult to combine value predictors.  Simply assume
1784                  that later predictor is weaker and take its prediction.  */
1785               if (predictor && *predictor < predictor2)
1786                 *predictor = predictor2;
1787               if (!new_val)
1788                 return NULL;
1789               if (!val)
1790                 val = new_val;
1791               else if (!operand_equal_p (val, new_val, false))
1792                 return NULL;
1793             }
1794           return val;
1795         }
1796       if (is_gimple_assign (def))
1797         {
1798           if (gimple_assign_lhs (def) != op0)
1799             return NULL;
1800
1801           return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1802                                         gimple_assign_rhs1 (def),
1803                                         gimple_assign_rhs_code (def),
1804                                         gimple_assign_rhs2 (def),
1805                                         visited, predictor);
1806         }
1807
1808       if (is_gimple_call (def))
1809         {
1810           tree decl = gimple_call_fndecl (def);
1811           if (!decl)
1812             {
1813               if (gimple_call_internal_p (def)
1814                   && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1815                 {
1816                   gcc_assert (gimple_call_num_args (def) == 3);
1817                   tree val = gimple_call_arg (def, 0);
1818                   if (TREE_CONSTANT (val))
1819                     return val;
1820                   if (predictor)
1821                     {
1822                       tree val2 = gimple_call_arg (def, 2);
1823                       gcc_assert (TREE_CODE (val2) == INTEGER_CST
1824                                   && tree_fits_uhwi_p (val2)
1825                                   && tree_to_uhwi (val2) < END_PREDICTORS);
1826                       *predictor = (enum br_predictor) tree_to_uhwi (val2);
1827                     }
1828                   return gimple_call_arg (def, 1);
1829                 }
1830               return NULL;
1831             }
1832           if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1833             switch (DECL_FUNCTION_CODE (decl))
1834               {
1835               case BUILT_IN_EXPECT:
1836                 {
1837                   tree val;
1838                   if (gimple_call_num_args (def) != 2)
1839                     return NULL;
1840                   val = gimple_call_arg (def, 0);
1841                   if (TREE_CONSTANT (val))
1842                     return val;
1843                   if (predictor)
1844                     *predictor = PRED_BUILTIN_EXPECT;
1845                   return gimple_call_arg (def, 1);
1846                 }
1847
1848               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1849               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1850               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1851               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1852               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1853               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1854               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1855               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1856               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1857               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1858               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1859               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1860               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1861                 /* Assume that any given atomic operation has low contention,
1862                    and thus the compare-and-swap operation succeeds.  */
1863                 if (predictor)
1864                   *predictor = PRED_COMPARE_AND_SWAP;
1865                 return boolean_true_node;
1866               default:
1867                 break;
1868             }
1869         }
1870
1871       return NULL;
1872     }
1873
1874   if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1875     {
1876       tree res;
1877       enum br_predictor predictor2;
1878       op0 = expr_expected_value (op0, visited, predictor);
1879       if (!op0)
1880         return NULL;
1881       op1 = expr_expected_value (op1, visited, &predictor2);
1882       if (predictor && *predictor < predictor2)
1883         *predictor = predictor2;
1884       if (!op1)
1885         return NULL;
1886       res = fold_build2 (code, type, op0, op1);
1887       if (TREE_CONSTANT (res))
1888         return res;
1889       return NULL;
1890     }
1891   if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1892     {
1893       tree res;
1894       op0 = expr_expected_value (op0, visited, predictor);
1895       if (!op0)
1896         return NULL;
1897       res = fold_build1 (code, type, op0);
1898       if (TREE_CONSTANT (res))
1899         return res;
1900       return NULL;
1901     }
1902   return NULL;
1903 }
1904
1905 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1906    The function is used by builtin_expect branch predictor so the evidence
1907    must come from this construct and additional possible constant folding.
1908
1909    We may want to implement more involved value guess (such as value range
1910    propagation based prediction), but such tricks shall go to new
1911    implementation.  */
1912
1913 static tree
1914 expr_expected_value (tree expr, bitmap visited,
1915                      enum br_predictor *predictor)
1916 {
1917   enum tree_code code;
1918   tree op0, op1;
1919
1920   if (TREE_CONSTANT (expr))
1921     {
1922       if (predictor)
1923         *predictor = PRED_UNCONDITIONAL;
1924       return expr;
1925     }
1926
1927   extract_ops_from_tree (expr, &code, &op0, &op1);
1928   return expr_expected_value_1 (TREE_TYPE (expr),
1929                                 op0, code, op1, visited, predictor);
1930 }
1931 \f
1932 /* Predict using opcode of the last statement in basic block.  */
1933 static void
1934 tree_predict_by_opcode (basic_block bb)
1935 {
1936   gimple stmt = last_stmt (bb);
1937   edge then_edge;
1938   tree op0, op1;
1939   tree type;
1940   tree val;
1941   enum tree_code cmp;
1942   bitmap visited;
1943   edge_iterator ei;
1944   enum br_predictor predictor;
1945
1946   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1947     return;
1948   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1949     if (then_edge->flags & EDGE_TRUE_VALUE)
1950       break;
1951   op0 = gimple_cond_lhs (stmt);
1952   op1 = gimple_cond_rhs (stmt);
1953   cmp = gimple_cond_code (stmt);
1954   type = TREE_TYPE (op0);
1955   visited = BITMAP_ALLOC (NULL);
1956   val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
1957                                &predictor);
1958   BITMAP_FREE (visited);
1959   if (val && TREE_CODE (val) == INTEGER_CST)
1960     {
1961       if (predictor == PRED_BUILTIN_EXPECT)
1962         {
1963           int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
1964
1965           gcc_assert (percent >= 0 && percent <= 100);
1966           if (integer_zerop (val))
1967             percent = 100 - percent;
1968           predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
1969         }
1970       else
1971         predict_edge (then_edge, predictor,
1972                       integer_zerop (val) ? NOT_TAKEN : TAKEN);
1973     }
1974   /* Try "pointer heuristic."
1975      A comparison ptr == 0 is predicted as false.
1976      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1977   if (POINTER_TYPE_P (type))
1978     {
1979       if (cmp == EQ_EXPR)
1980         predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
1981       else if (cmp == NE_EXPR)
1982         predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
1983     }
1984   else
1985
1986   /* Try "opcode heuristic."
1987      EQ tests are usually false and NE tests are usually true. Also,
1988      most quantities are positive, so we can make the appropriate guesses
1989      about signed comparisons against zero.  */
1990     switch (cmp)
1991       {
1992       case EQ_EXPR:
1993       case UNEQ_EXPR:
1994         /* Floating point comparisons appears to behave in a very
1995            unpredictable way because of special role of = tests in
1996            FP code.  */
1997         if (FLOAT_TYPE_P (type))
1998           ;
1999         /* Comparisons with 0 are often used for booleans and there is
2000            nothing useful to predict about them.  */
2001         else if (integer_zerop (op0) || integer_zerop (op1))
2002           ;
2003         else
2004           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2005         break;
2006
2007       case NE_EXPR:
2008       case LTGT_EXPR:
2009         /* Floating point comparisons appears to behave in a very
2010            unpredictable way because of special role of = tests in
2011            FP code.  */
2012         if (FLOAT_TYPE_P (type))
2013           ;
2014         /* Comparisons with 0 are often used for booleans and there is
2015            nothing useful to predict about them.  */
2016         else if (integer_zerop (op0)
2017                  || integer_zerop (op1))
2018           ;
2019         else
2020           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2021         break;
2022
2023       case ORDERED_EXPR:
2024         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2025         break;
2026
2027       case UNORDERED_EXPR:
2028         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2029         break;
2030
2031       case LE_EXPR:
2032       case LT_EXPR:
2033         if (integer_zerop (op1)
2034             || integer_onep (op1)
2035             || integer_all_onesp (op1)
2036             || real_zerop (op1)
2037             || real_onep (op1)
2038             || real_minus_onep (op1))
2039           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2040         break;
2041
2042       case GE_EXPR:
2043       case GT_EXPR:
2044         if (integer_zerop (op1)
2045             || integer_onep (op1)
2046             || integer_all_onesp (op1)
2047             || real_zerop (op1)
2048             || real_onep (op1)
2049             || real_minus_onep (op1))
2050           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2051         break;
2052
2053       default:
2054         break;
2055       }
2056 }
2057
2058 /* Try to guess whether the value of return means error code.  */
2059
2060 static enum br_predictor
2061 return_prediction (tree val, enum prediction *prediction)
2062 {
2063   /* VOID.  */
2064   if (!val)
2065     return PRED_NO_PREDICTION;
2066   /* Different heuristics for pointers and scalars.  */
2067   if (POINTER_TYPE_P (TREE_TYPE (val)))
2068     {
2069       /* NULL is usually not returned.  */
2070       if (integer_zerop (val))
2071         {
2072           *prediction = NOT_TAKEN;
2073           return PRED_NULL_RETURN;
2074         }
2075     }
2076   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2077     {
2078       /* Negative return values are often used to indicate
2079          errors.  */
2080       if (TREE_CODE (val) == INTEGER_CST
2081           && tree_int_cst_sgn (val) < 0)
2082         {
2083           *prediction = NOT_TAKEN;
2084           return PRED_NEGATIVE_RETURN;
2085         }
2086       /* Constant return values seems to be commonly taken.
2087          Zero/one often represent booleans so exclude them from the
2088          heuristics.  */
2089       if (TREE_CONSTANT (val)
2090           && (!integer_zerop (val) && !integer_onep (val)))
2091         {
2092           *prediction = TAKEN;
2093           return PRED_CONST_RETURN;
2094         }
2095     }
2096   return PRED_NO_PREDICTION;
2097 }
2098
2099 /* Find the basic block with return expression and look up for possible
2100    return value trying to apply RETURN_PREDICTION heuristics.  */
2101 static void
2102 apply_return_prediction (void)
2103 {
2104   gimple return_stmt = NULL;
2105   tree return_val;
2106   edge e;
2107   gimple phi;
2108   int phi_num_args, i;
2109   enum br_predictor pred;
2110   enum prediction direction;
2111   edge_iterator ei;
2112
2113   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2114     {
2115       return_stmt = last_stmt (e->src);
2116       if (return_stmt
2117           && gimple_code (return_stmt) == GIMPLE_RETURN)
2118         break;
2119     }
2120   if (!e)
2121     return;
2122   return_val = gimple_return_retval (return_stmt);
2123   if (!return_val)
2124     return;
2125   if (TREE_CODE (return_val) != SSA_NAME
2126       || !SSA_NAME_DEF_STMT (return_val)
2127       || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2128     return;
2129   phi = SSA_NAME_DEF_STMT (return_val);
2130   phi_num_args = gimple_phi_num_args (phi);
2131   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2132
2133   /* Avoid the degenerate case where all return values form the function
2134      belongs to same category (ie they are all positive constants)
2135      so we can hardly say something about them.  */
2136   for (i = 1; i < phi_num_args; i++)
2137     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2138       break;
2139   if (i != phi_num_args)
2140     for (i = 0; i < phi_num_args; i++)
2141       {
2142         pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2143         if (pred != PRED_NO_PREDICTION)
2144           predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2145                                          direction);
2146       }
2147 }
2148
2149 /* Look for basic block that contains unlikely to happen events
2150    (such as noreturn calls) and mark all paths leading to execution
2151    of this basic blocks as unlikely.  */
2152
2153 static void
2154 tree_bb_level_predictions (void)
2155 {
2156   basic_block bb;
2157   bool has_return_edges = false;
2158   edge e;
2159   edge_iterator ei;
2160
2161   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2162     if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2163       {
2164         has_return_edges = true;
2165         break;
2166       }
2167
2168   apply_return_prediction ();
2169
2170   FOR_EACH_BB_FN (bb, cfun)
2171     {
2172       gimple_stmt_iterator gsi;
2173
2174       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2175         {
2176           gimple stmt = gsi_stmt (gsi);
2177           tree decl;
2178
2179           if (is_gimple_call (stmt))
2180             {
2181               if ((gimple_call_flags (stmt) & ECF_NORETURN)
2182                   && has_return_edges)
2183                 predict_paths_leading_to (bb, PRED_NORETURN,
2184                                           NOT_TAKEN);
2185               decl = gimple_call_fndecl (stmt);
2186               if (decl
2187                   && lookup_attribute ("cold",
2188                                        DECL_ATTRIBUTES (decl)))
2189                 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2190                                           NOT_TAKEN);
2191             }
2192           else if (gimple_code (stmt) == GIMPLE_PREDICT)
2193             {
2194               predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2195                                         gimple_predict_outcome (stmt));
2196               /* Keep GIMPLE_PREDICT around so early inlining will propagate
2197                  hints to callers.  */
2198             }
2199         }
2200     }
2201 }
2202
2203 #ifdef ENABLE_CHECKING
2204
2205 /* Callback for hash_map::traverse, asserts that the pointer map is
2206    empty.  */
2207
2208 bool
2209 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2210                  void *)
2211 {
2212   gcc_assert (!value);
2213   return false;
2214 }
2215 #endif
2216
2217 /* Predict branch probabilities and estimate profile for basic block BB.  */
2218
2219 static void
2220 tree_estimate_probability_bb (basic_block bb)
2221 {
2222   edge e;
2223   edge_iterator ei;
2224   gimple last;
2225
2226   FOR_EACH_EDGE (e, ei, bb->succs)
2227     {
2228       /* Predict edges to user labels with attributes.  */
2229       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2230         {
2231           gimple_stmt_iterator gi;
2232           for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2233             {
2234               gimple stmt = gsi_stmt (gi);
2235               tree decl;
2236
2237               if (gimple_code (stmt) != GIMPLE_LABEL)
2238                 break;
2239               decl = gimple_label_label (stmt);
2240               if (DECL_ARTIFICIAL (decl))
2241                 continue;
2242
2243               /* Finally, we have a user-defined label.  */
2244               if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2245                 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2246               else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2247                 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2248             }
2249         }
2250
2251       /* Predict early returns to be probable, as we've already taken
2252          care for error returns and other cases are often used for
2253          fast paths through function.
2254
2255          Since we've already removed the return statements, we are
2256          looking for CFG like:
2257
2258          if (conditional)
2259          {
2260          ..
2261          goto return_block
2262          }
2263          some other blocks
2264          return_block:
2265          return_stmt.  */
2266       if (e->dest != bb->next_bb
2267           && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2268           && single_succ_p (e->dest)
2269           && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2270           && (last = last_stmt (e->dest)) != NULL
2271           && gimple_code (last) == GIMPLE_RETURN)
2272         {
2273           edge e1;
2274           edge_iterator ei1;
2275
2276           if (single_succ_p (bb))
2277             {
2278               FOR_EACH_EDGE (e1, ei1, bb->preds)
2279                 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2280                     && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2281                     && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2282                   predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2283             }
2284           else
2285             if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2286                 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2287                 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2288               predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2289         }
2290
2291       /* Look for block we are guarding (ie we dominate it,
2292          but it doesn't postdominate us).  */
2293       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2294           && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2295           && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2296         {
2297           gimple_stmt_iterator bi;
2298
2299           /* The call heuristic claims that a guarded function call
2300              is improbable.  This is because such calls are often used
2301              to signal exceptional situations such as printing error
2302              messages.  */
2303           for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2304                gsi_next (&bi))
2305             {
2306               gimple stmt = gsi_stmt (bi);
2307               if (is_gimple_call (stmt)
2308                   /* Constant and pure calls are hardly used to signalize
2309                      something exceptional.  */
2310                   && gimple_has_side_effects (stmt))
2311                 {
2312                   predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2313                   break;
2314                 }
2315             }
2316         }
2317     }
2318   tree_predict_by_opcode (bb);
2319 }
2320
2321 /* Predict branch probabilities and estimate profile of the tree CFG.
2322    This function can be called from the loop optimizers to recompute
2323    the profile information.  */
2324
2325 void
2326 tree_estimate_probability (void)
2327 {
2328   basic_block bb;
2329
2330   add_noreturn_fake_exit_edges ();
2331   connect_infinite_loops_to_exit ();
2332   /* We use loop_niter_by_eval, which requires that the loops have
2333      preheaders.  */
2334   create_preheaders (CP_SIMPLE_PREHEADERS);
2335   calculate_dominance_info (CDI_POST_DOMINATORS);
2336
2337   bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2338   tree_bb_level_predictions ();
2339   record_loop_exits ();
2340
2341   if (number_of_loops (cfun) > 1)
2342     predict_loops ();
2343
2344   FOR_EACH_BB_FN (bb, cfun)
2345     tree_estimate_probability_bb (bb);
2346
2347   FOR_EACH_BB_FN (bb, cfun)
2348     combine_predictions_for_bb (bb);
2349
2350 #ifdef ENABLE_CHECKING
2351   bb_predictions->traverse<void *, assert_is_empty> (NULL);
2352 #endif
2353   delete bb_predictions;
2354   bb_predictions = NULL;
2355
2356   estimate_bb_frequencies (false);
2357   free_dominance_info (CDI_POST_DOMINATORS);
2358   remove_fake_exit_edges ();
2359 }
2360 \f
2361 /* Predict edges to successors of CUR whose sources are not postdominated by
2362    BB by PRED and recurse to all postdominators.  */
2363
2364 static void
2365 predict_paths_for_bb (basic_block cur, basic_block bb,
2366                       enum br_predictor pred,
2367                       enum prediction taken,
2368                       bitmap visited)
2369 {
2370   edge e;
2371   edge_iterator ei;
2372   basic_block son;
2373
2374   /* We are looking for all edges forming edge cut induced by
2375      set of all blocks postdominated by BB.  */
2376   FOR_EACH_EDGE (e, ei, cur->preds)
2377     if (e->src->index >= NUM_FIXED_BLOCKS
2378         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2379     {
2380       edge e2;
2381       edge_iterator ei2;
2382       bool found = false;
2383
2384       /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2385       if (e->flags & (EDGE_EH | EDGE_FAKE))
2386         continue;
2387       gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2388
2389       /* See if there is an edge from e->src that is not abnormal
2390          and does not lead to BB.  */
2391       FOR_EACH_EDGE (e2, ei2, e->src->succs)
2392         if (e2 != e
2393             && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2394             && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2395           {
2396             found = true;
2397             break;
2398           }
2399
2400       /* If there is non-abnormal path leaving e->src, predict edge
2401          using predictor.  Otherwise we need to look for paths
2402          leading to e->src.
2403
2404          The second may lead to infinite loop in the case we are predicitng
2405          regions that are only reachable by abnormal edges.  We simply
2406          prevent visiting given BB twice.  */
2407       if (found)
2408         predict_edge_def (e, pred, taken);
2409       else if (bitmap_set_bit (visited, e->src->index))
2410         predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2411     }
2412   for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2413        son;
2414        son = next_dom_son (CDI_POST_DOMINATORS, son))
2415     predict_paths_for_bb (son, bb, pred, taken, visited);
2416 }
2417
2418 /* Sets branch probabilities according to PREDiction and
2419    FLAGS.  */
2420
2421 static void
2422 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2423                           enum prediction taken)
2424 {
2425   bitmap visited = BITMAP_ALLOC (NULL);
2426   predict_paths_for_bb (bb, bb, pred, taken, visited);
2427   BITMAP_FREE (visited);
2428 }
2429
2430 /* Like predict_paths_leading_to but take edge instead of basic block.  */
2431
2432 static void
2433 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2434                                enum prediction taken)
2435 {
2436   bool has_nonloop_edge = false;
2437   edge_iterator ei;
2438   edge e2;
2439
2440   basic_block bb = e->src;
2441   FOR_EACH_EDGE (e2, ei, bb->succs)
2442     if (e2->dest != e->src && e2->dest != e->dest
2443         && !(e->flags & (EDGE_EH | EDGE_FAKE))
2444         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2445       {
2446         has_nonloop_edge = true;
2447         break;
2448       }
2449   if (!has_nonloop_edge)
2450     {
2451       bitmap visited = BITMAP_ALLOC (NULL);
2452       predict_paths_for_bb (bb, bb, pred, taken, visited);
2453       BITMAP_FREE (visited);
2454     }
2455   else
2456     predict_edge_def (e, pred, taken);
2457 }
2458 \f
2459 /* This is used to carry information about basic blocks.  It is
2460    attached to the AUX field of the standard CFG block.  */
2461
2462 struct block_info
2463 {
2464   /* Estimated frequency of execution of basic_block.  */
2465   sreal frequency;
2466
2467   /* To keep queue of basic blocks to process.  */
2468   basic_block next;
2469
2470   /* Number of predecessors we need to visit first.  */
2471   int npredecessors;
2472 };
2473
2474 /* Similar information for edges.  */
2475 struct edge_prob_info
2476 {
2477   /* In case edge is a loopback edge, the probability edge will be reached
2478      in case header is.  Estimated number of iterations of the loop can be
2479      then computed as 1 / (1 - back_edge_prob).  */
2480   sreal back_edge_prob;
2481   /* True if the edge is a loopback edge in the natural loop.  */
2482   unsigned int back_edge:1;
2483 };
2484
2485 #define BLOCK_INFO(B)   ((block_info *) (B)->aux)
2486 #undef EDGE_INFO
2487 #define EDGE_INFO(E)    ((edge_prob_info *) (E)->aux)
2488
2489 /* Helper function for estimate_bb_frequencies.
2490    Propagate the frequencies in blocks marked in
2491    TOVISIT, starting in HEAD.  */
2492
2493 static void
2494 propagate_freq (basic_block head, bitmap tovisit)
2495 {
2496   basic_block bb;
2497   basic_block last;
2498   unsigned i;
2499   edge e;
2500   basic_block nextbb;
2501   bitmap_iterator bi;
2502
2503   /* For each basic block we need to visit count number of his predecessors
2504      we need to visit first.  */
2505   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2506     {
2507       edge_iterator ei;
2508       int count = 0;
2509
2510       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2511
2512       FOR_EACH_EDGE (e, ei, bb->preds)
2513         {
2514           bool visit = bitmap_bit_p (tovisit, e->src->index);
2515
2516           if (visit && !(e->flags & EDGE_DFS_BACK))
2517             count++;
2518           else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2519             fprintf (dump_file,
2520                      "Irreducible region hit, ignoring edge to %i->%i\n",
2521                      e->src->index, bb->index);
2522         }
2523       BLOCK_INFO (bb)->npredecessors = count;
2524       /* When function never returns, we will never process exit block.  */
2525       if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2526         bb->count = bb->frequency = 0;
2527     }
2528
2529   BLOCK_INFO (head)->frequency = real_one;
2530   last = head;
2531   for (bb = head; bb; bb = nextbb)
2532     {
2533       edge_iterator ei;
2534       sreal cyclic_probability = real_zero;
2535       sreal frequency = real_zero;
2536
2537       nextbb = BLOCK_INFO (bb)->next;
2538       BLOCK_INFO (bb)->next = NULL;
2539
2540       /* Compute frequency of basic block.  */
2541       if (bb != head)
2542         {
2543 #ifdef ENABLE_CHECKING
2544           FOR_EACH_EDGE (e, ei, bb->preds)
2545             gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2546                         || (e->flags & EDGE_DFS_BACK));
2547 #endif
2548
2549           FOR_EACH_EDGE (e, ei, bb->preds)
2550             if (EDGE_INFO (e)->back_edge)
2551               {
2552                 cyclic_probability += EDGE_INFO (e)->back_edge_prob;
2553               }
2554             else if (!(e->flags & EDGE_DFS_BACK))
2555               {
2556                 /*  frequency += (e->probability
2557                                   * BLOCK_INFO (e->src)->frequency /
2558                                   REG_BR_PROB_BASE);  */
2559
2560                 sreal tmp (e->probability, 0);
2561                 tmp *= BLOCK_INFO (e->src)->frequency;
2562                 tmp *= real_inv_br_prob_base;
2563                 frequency += tmp;
2564               }
2565
2566           if (cyclic_probability == real_zero)
2567             {
2568               BLOCK_INFO (bb)->frequency = frequency;
2569             }
2570           else
2571             {
2572               if (cyclic_probability > real_almost_one)
2573                 cyclic_probability = real_almost_one;
2574
2575               /* BLOCK_INFO (bb)->frequency = frequency
2576                                               / (1 - cyclic_probability) */
2577
2578               cyclic_probability = real_one - cyclic_probability;
2579               BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
2580             }
2581         }
2582
2583       bitmap_clear_bit (tovisit, bb->index);
2584
2585       e = find_edge (bb, head);
2586       if (e)
2587         {
2588           /* EDGE_INFO (e)->back_edge_prob
2589              = ((e->probability * BLOCK_INFO (bb)->frequency)
2590              / REG_BR_PROB_BASE); */
2591
2592           sreal tmp (e->probability, 0);
2593           tmp *= BLOCK_INFO (bb)->frequency;
2594           EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
2595         }
2596
2597       /* Propagate to successor blocks.  */
2598       FOR_EACH_EDGE (e, ei, bb->succs)
2599         if (!(e->flags & EDGE_DFS_BACK)
2600             && BLOCK_INFO (e->dest)->npredecessors)
2601           {
2602             BLOCK_INFO (e->dest)->npredecessors--;
2603             if (!BLOCK_INFO (e->dest)->npredecessors)
2604               {
2605                 if (!nextbb)
2606                   nextbb = e->dest;
2607                 else
2608                   BLOCK_INFO (last)->next = e->dest;
2609
2610                 last = e->dest;
2611               }
2612           }
2613     }
2614 }
2615
2616 /* Estimate frequencies in loops at same nest level.  */
2617
2618 static void
2619 estimate_loops_at_level (struct loop *first_loop)
2620 {
2621   struct loop *loop;
2622
2623   for (loop = first_loop; loop; loop = loop->next)
2624     {
2625       edge e;
2626       basic_block *bbs;
2627       unsigned i;
2628       bitmap tovisit = BITMAP_ALLOC (NULL);
2629
2630       estimate_loops_at_level (loop->inner);
2631
2632       /* Find current loop back edge and mark it.  */
2633       e = loop_latch_edge (loop);
2634       EDGE_INFO (e)->back_edge = 1;
2635
2636       bbs = get_loop_body (loop);
2637       for (i = 0; i < loop->num_nodes; i++)
2638         bitmap_set_bit (tovisit, bbs[i]->index);
2639       free (bbs);
2640       propagate_freq (loop->header, tovisit);
2641       BITMAP_FREE (tovisit);
2642     }
2643 }
2644
2645 /* Propagates frequencies through structure of loops.  */
2646
2647 static void
2648 estimate_loops (void)
2649 {
2650   bitmap tovisit = BITMAP_ALLOC (NULL);
2651   basic_block bb;
2652
2653   /* Start by estimating the frequencies in the loops.  */
2654   if (number_of_loops (cfun) > 1)
2655     estimate_loops_at_level (current_loops->tree_root->inner);
2656
2657   /* Now propagate the frequencies through all the blocks.  */
2658   FOR_ALL_BB_FN (bb, cfun)
2659     {
2660       bitmap_set_bit (tovisit, bb->index);
2661     }
2662   propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2663   BITMAP_FREE (tovisit);
2664 }
2665
2666 /* Drop the profile for NODE to guessed, and update its frequency based on
2667    whether it is expected to be hot given the CALL_COUNT.  */
2668
2669 static void
2670 drop_profile (struct cgraph_node *node, gcov_type call_count)
2671 {
2672   struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2673   /* In the case where this was called by another function with a
2674      dropped profile, call_count will be 0. Since there are no
2675      non-zero call counts to this function, we don't know for sure
2676      whether it is hot, and therefore it will be marked normal below.  */
2677   bool hot = maybe_hot_count_p (NULL, call_count);
2678
2679   if (dump_file)
2680     fprintf (dump_file,
2681              "Dropping 0 profile for %s/%i. %s based on calls.\n",
2682              node->name (), node->order,
2683              hot ? "Function is hot" : "Function is normal");
2684   /* We only expect to miss profiles for functions that are reached
2685      via non-zero call edges in cases where the function may have
2686      been linked from another module or library (COMDATs and extern
2687      templates). See the comments below for handle_missing_profiles.
2688      Also, only warn in cases where the missing counts exceed the
2689      number of training runs. In certain cases with an execv followed
2690      by a no-return call the profile for the no-return call is not
2691      dumped and there can be a mismatch.  */
2692   if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2693       && call_count > profile_info->runs)
2694     {
2695       if (flag_profile_correction)
2696         {
2697           if (dump_file)
2698             fprintf (dump_file,
2699                      "Missing counts for called function %s/%i\n",
2700                      node->name (), node->order);
2701         }
2702       else
2703         warning (0, "Missing counts for called function %s/%i",
2704                  node->name (), node->order);
2705     }
2706
2707   profile_status_for_fn (fn)
2708       = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2709   node->frequency
2710       = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2711 }
2712
2713 /* In the case of COMDAT routines, multiple object files will contain the same
2714    function and the linker will select one for the binary. In that case
2715    all the other copies from the profile instrument binary will be missing
2716    profile counts. Look for cases where this happened, due to non-zero
2717    call counts going to 0-count functions, and drop the profile to guessed
2718    so that we can use the estimated probabilities and avoid optimizing only
2719    for size.
2720
2721    The other case where the profile may be missing is when the routine
2722    is not going to be emitted to the object file, e.g. for "extern template"
2723    class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2724    all other cases of non-zero calls to 0-count functions.  */
2725
2726 void
2727 handle_missing_profiles (void)
2728 {
2729   struct cgraph_node *node;
2730   int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2731   vec<struct cgraph_node *> worklist;
2732   worklist.create (64);
2733
2734   /* See if 0 count function has non-0 count callers.  In this case we
2735      lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2736   FOR_EACH_DEFINED_FUNCTION (node)
2737     {
2738       struct cgraph_edge *e;
2739       gcov_type call_count = 0;
2740       gcov_type max_tp_first_run = 0;
2741       struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2742
2743       if (node->count)
2744         continue;
2745       for (e = node->callers; e; e = e->next_caller)
2746       {
2747         call_count += e->count;
2748
2749         if (e->caller->tp_first_run > max_tp_first_run)
2750           max_tp_first_run = e->caller->tp_first_run;
2751       }
2752
2753       /* If time profile is missing, let assign the maximum that comes from
2754          caller functions.  */
2755       if (!node->tp_first_run && max_tp_first_run)
2756         node->tp_first_run = max_tp_first_run + 1;
2757
2758       if (call_count
2759           && fn && fn->cfg
2760           && (call_count * unlikely_count_fraction >= profile_info->runs))
2761         {
2762           drop_profile (node, call_count);
2763           worklist.safe_push (node);
2764         }
2765     }
2766
2767   /* Propagate the profile dropping to other 0-count COMDATs that are
2768      potentially called by COMDATs we already dropped the profile on.  */
2769   while (worklist.length () > 0)
2770     {
2771       struct cgraph_edge *e;
2772
2773       node = worklist.pop ();
2774       for (e = node->callees; e; e = e->next_caller)
2775         {
2776           struct cgraph_node *callee = e->callee;
2777           struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2778
2779           if (callee->count > 0)
2780             continue;
2781           if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2782               && profile_status_for_fn (fn) == PROFILE_READ)
2783             {
2784               drop_profile (node, 0);
2785               worklist.safe_push (callee);
2786             }
2787         }
2788     }
2789   worklist.release ();
2790 }
2791
2792 /* Convert counts measured by profile driven feedback to frequencies.
2793    Return nonzero iff there was any nonzero execution count.  */
2794
2795 int
2796 counts_to_freqs (void)
2797 {
2798   gcov_type count_max, true_count_max = 0;
2799   basic_block bb;
2800
2801   /* Don't overwrite the estimated frequencies when the profile for
2802      the function is missing.  We may drop this function PROFILE_GUESSED
2803      later in drop_profile ().  */
2804   if (!flag_auto_profile && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2805     return 0;
2806
2807   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2808     true_count_max = MAX (bb->count, true_count_max);
2809
2810   count_max = MAX (true_count_max, 1);
2811   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2812     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2813
2814   return true_count_max;
2815 }
2816
2817 /* Return true if function is likely to be expensive, so there is no point to
2818    optimize performance of prologue, epilogue or do inlining at the expense
2819    of code size growth.  THRESHOLD is the limit of number of instructions
2820    function can execute at average to be still considered not expensive.  */
2821
2822 bool
2823 expensive_function_p (int threshold)
2824 {
2825   unsigned int sum = 0;
2826   basic_block bb;
2827   unsigned int limit;
2828
2829   /* We can not compute accurately for large thresholds due to scaled
2830      frequencies.  */
2831   gcc_assert (threshold <= BB_FREQ_MAX);
2832
2833   /* Frequencies are out of range.  This either means that function contains
2834      internal loop executing more than BB_FREQ_MAX times or profile feedback
2835      is available and function has not been executed at all.  */
2836   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2837     return true;
2838
2839   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2840   limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2841   FOR_EACH_BB_FN (bb, cfun)
2842     {
2843       rtx_insn *insn;
2844
2845       FOR_BB_INSNS (bb, insn)
2846         if (active_insn_p (insn))
2847           {
2848             sum += bb->frequency;
2849             if (sum > limit)
2850               return true;
2851         }
2852     }
2853
2854   return false;
2855 }
2856
2857 /* Estimate and propagate basic block frequencies using the given branch
2858    probabilities.  If FORCE is true, the frequencies are used to estimate
2859    the counts even when there are already non-zero profile counts.  */
2860
2861 void
2862 estimate_bb_frequencies (bool force)
2863 {
2864   basic_block bb;
2865   sreal freq_max;
2866
2867   if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
2868     {
2869       static int real_values_initialized = 0;
2870
2871       if (!real_values_initialized)
2872         {
2873           real_values_initialized = 1;
2874           real_zero = sreal (0, 0);
2875           real_one = sreal (1, 0);
2876           real_br_prob_base = sreal (REG_BR_PROB_BASE, 0);
2877           real_bb_freq_max = sreal (BB_FREQ_MAX, 0);
2878           real_one_half = sreal (1, -1);
2879           real_inv_br_prob_base = real_one / real_br_prob_base;
2880           real_almost_one = real_one - real_inv_br_prob_base;
2881         }
2882
2883       mark_dfs_back_edges ();
2884
2885       single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2886          REG_BR_PROB_BASE;
2887
2888       /* Set up block info for each basic block.  */
2889       alloc_aux_for_blocks (sizeof (block_info));
2890       alloc_aux_for_edges (sizeof (edge_prob_info));
2891       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2892         {
2893           edge e;
2894           edge_iterator ei;
2895
2896           FOR_EACH_EDGE (e, ei, bb->succs)
2897             {
2898               EDGE_INFO (e)->back_edge_prob = sreal (e->probability, 0);
2899               EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
2900             }
2901         }
2902
2903       /* First compute frequencies locally for each loop from innermost
2904          to outermost to examine frequencies for back edges.  */
2905       estimate_loops ();
2906
2907       freq_max = real_zero;
2908       FOR_EACH_BB_FN (bb, cfun)
2909         if (freq_max < BLOCK_INFO (bb)->frequency)
2910           freq_max = BLOCK_INFO (bb)->frequency;
2911
2912       freq_max = real_bb_freq_max / freq_max;
2913       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2914         {
2915           sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
2916           bb->frequency = tmp.to_int ();
2917         }
2918
2919       free_aux_for_blocks ();
2920       free_aux_for_edges ();
2921     }
2922   compute_function_frequency ();
2923 }
2924
2925 /* Decide whether function is hot, cold or unlikely executed.  */
2926 void
2927 compute_function_frequency (void)
2928 {
2929   basic_block bb;
2930   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2931
2932   if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2933       || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2934     node->only_called_at_startup = true;
2935   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2936     node->only_called_at_exit = true;
2937
2938   if (profile_status_for_fn (cfun) != PROFILE_READ)
2939     {
2940       int flags = flags_from_decl_or_type (current_function_decl);
2941       if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2942           != NULL)
2943         node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2944       else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2945                != NULL)
2946         node->frequency = NODE_FREQUENCY_HOT;
2947       else if (flags & ECF_NORETURN)
2948         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2949       else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
2950         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2951       else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2952                || DECL_STATIC_DESTRUCTOR (current_function_decl))
2953         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2954       return;
2955     }
2956
2957   /* Only first time try to drop function into unlikely executed.
2958      After inlining the roundoff errors may confuse us.
2959      Ipa-profile pass will drop functions only called from unlikely
2960      functions to unlikely and that is most of what we care about.  */
2961   if (!cfun->after_inlining)
2962     node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2963   FOR_EACH_BB_FN (bb, cfun)
2964     {
2965       if (maybe_hot_bb_p (cfun, bb))
2966         {
2967           node->frequency = NODE_FREQUENCY_HOT;
2968           return;
2969         }
2970       if (!probably_never_executed_bb_p (cfun, bb))
2971         node->frequency = NODE_FREQUENCY_NORMAL;
2972     }
2973 }
2974
2975 /* Build PREDICT_EXPR.  */
2976 tree
2977 build_predict_expr (enum br_predictor predictor, enum prediction taken)
2978 {
2979   tree t = build1 (PREDICT_EXPR, void_type_node,
2980                    build_int_cst (integer_type_node, predictor));
2981   SET_PREDICT_EXPR_OUTCOME (t, taken);
2982   return t;
2983 }
2984
2985 const char *
2986 predictor_name (enum br_predictor predictor)
2987 {
2988   return predictor_info[predictor].name;
2989 }
2990
2991 /* Predict branch probabilities and estimate profile of the tree CFG. */
2992
2993 namespace {
2994
2995 const pass_data pass_data_profile =
2996 {
2997   GIMPLE_PASS, /* type */
2998   "profile_estimate", /* name */
2999   OPTGROUP_NONE, /* optinfo_flags */
3000   TV_BRANCH_PROB, /* tv_id */
3001   PROP_cfg, /* properties_required */
3002   0, /* properties_provided */
3003   0, /* properties_destroyed */
3004   0, /* todo_flags_start */
3005   0, /* todo_flags_finish */
3006 };
3007
3008 class pass_profile : public gimple_opt_pass
3009 {
3010 public:
3011   pass_profile (gcc::context *ctxt)
3012     : gimple_opt_pass (pass_data_profile, ctxt)
3013   {}
3014
3015   /* opt_pass methods: */
3016   virtual bool gate (function *) { return flag_guess_branch_prob; }
3017   virtual unsigned int execute (function *);
3018
3019 }; // class pass_profile
3020
3021 unsigned int
3022 pass_profile::execute (function *fun)
3023 {
3024   unsigned nb_loops;
3025
3026   loop_optimizer_init (LOOPS_NORMAL);
3027   if (dump_file && (dump_flags & TDF_DETAILS))
3028     flow_loops_dump (dump_file, NULL, 0);
3029
3030   mark_irreducible_loops ();
3031
3032   nb_loops = number_of_loops (fun);
3033   if (nb_loops > 1)
3034     scev_initialize ();
3035
3036   tree_estimate_probability ();
3037
3038   if (nb_loops > 1)
3039     scev_finalize ();
3040
3041   loop_optimizer_finalize ();
3042   if (dump_file && (dump_flags & TDF_DETAILS))
3043     gimple_dump_cfg (dump_file, dump_flags);
3044  if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3045     profile_status_for_fn (fun) = PROFILE_GUESSED;
3046   return 0;
3047 }
3048
3049 } // anon namespace
3050
3051 gimple_opt_pass *
3052 make_pass_profile (gcc::context *ctxt)
3053 {
3054   return new pass_profile (ctxt);
3055 }
3056
3057 namespace {
3058
3059 const pass_data pass_data_strip_predict_hints =
3060 {
3061   GIMPLE_PASS, /* type */
3062   "*strip_predict_hints", /* name */
3063   OPTGROUP_NONE, /* optinfo_flags */
3064   TV_BRANCH_PROB, /* tv_id */
3065   PROP_cfg, /* properties_required */
3066   0, /* properties_provided */
3067   0, /* properties_destroyed */
3068   0, /* todo_flags_start */
3069   0, /* todo_flags_finish */
3070 };
3071
3072 class pass_strip_predict_hints : public gimple_opt_pass
3073 {
3074 public:
3075   pass_strip_predict_hints (gcc::context *ctxt)
3076     : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3077   {}
3078
3079   /* opt_pass methods: */
3080   opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3081   virtual unsigned int execute (function *);
3082
3083 }; // class pass_strip_predict_hints
3084
3085 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3086    we no longer need.  */
3087 unsigned int
3088 pass_strip_predict_hints::execute (function *fun)
3089 {
3090   basic_block bb;
3091   gimple ass_stmt;
3092   tree var;
3093
3094   FOR_EACH_BB_FN (bb, fun)
3095     {
3096       gimple_stmt_iterator bi;
3097       for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3098         {
3099           gimple stmt = gsi_stmt (bi);
3100
3101           if (gimple_code (stmt) == GIMPLE_PREDICT)
3102             {
3103               gsi_remove (&bi, true);
3104               continue;
3105             }
3106           else if (is_gimple_call (stmt))
3107             {
3108               tree fndecl = gimple_call_fndecl (stmt);
3109
3110               if ((fndecl
3111                    && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3112                    && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3113                    && gimple_call_num_args (stmt) == 2)
3114                   || (gimple_call_internal_p (stmt)
3115                       && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3116                 {
3117                   var = gimple_call_lhs (stmt);
3118                   if (var)
3119                     {
3120                       ass_stmt
3121                         = gimple_build_assign (var, gimple_call_arg (stmt, 0));
3122                       gsi_replace (&bi, ass_stmt, true);
3123                     }
3124                   else
3125                     {
3126                       gsi_remove (&bi, true);
3127                       continue;
3128                     }
3129                 }
3130             }
3131           gsi_next (&bi);
3132         }
3133     }
3134   return 0;
3135 }
3136
3137 } // anon namespace
3138
3139 gimple_opt_pass *
3140 make_pass_strip_predict_hints (gcc::context *ctxt)
3141 {
3142   return new pass_strip_predict_hints (ctxt);
3143 }
3144
3145 /* Rebuild function frequencies.  Passes are in general expected to
3146    maintain profile by hand, however in some cases this is not possible:
3147    for example when inlining several functions with loops freuqencies might run
3148    out of scale and thus needs to be recomputed.  */
3149
3150 void
3151 rebuild_frequencies (void)
3152 {
3153   timevar_push (TV_REBUILD_FREQUENCIES);
3154
3155   /* When the max bb count in the function is small, there is a higher
3156      chance that there were truncation errors in the integer scaling
3157      of counts by inlining and other optimizations. This could lead
3158      to incorrect classification of code as being cold when it isn't.
3159      In that case, force the estimation of bb counts/frequencies from the
3160      branch probabilities, rather than computing frequencies from counts,
3161      which may also lead to frequencies incorrectly reduced to 0. There
3162      is less precision in the probabilities, so we only do this for small
3163      max counts.  */
3164   gcov_type count_max = 0;
3165   basic_block bb;
3166   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3167     count_max = MAX (bb->count, count_max);
3168
3169   if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3170       || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
3171           && count_max < REG_BR_PROB_BASE/10))
3172     {
3173       loop_optimizer_init (0);
3174       add_noreturn_fake_exit_edges ();
3175       mark_irreducible_loops ();
3176       connect_infinite_loops_to_exit ();
3177       estimate_bb_frequencies (true);
3178       remove_fake_exit_edges ();
3179       loop_optimizer_finalize ();
3180     }
3181   else if (profile_status_for_fn (cfun) == PROFILE_READ)
3182     counts_to_freqs ();
3183   else
3184     gcc_unreachable ();
3185   timevar_pop (TV_REBUILD_FREQUENCIES);
3186 }