gcc/predict.c

   1 /* Branch prediction routines for the GNU compiler.
   2    Copyright (C) 2000-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* References:
  21
  22    [1] "Branch Prediction for Free"
  23        Ball and Larus; PLDI '93.
  24    [2] "Static Branch Frequency and Program Profile Analysis"
  25        Wu and Larus; MICRO-27.
  26    [3] "Corpus-based Static Branch Prediction"
  27        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
  28
  29
  30 #include "config.h"
  31 #include "system.h"
  32 #include "coretypes.h"
  33 #include "tm.h"
  34 #include "alias.h"
  35 #include "symtab.h"
  36 #include "tree.h"
  37 #include "fold-const.h"
  38 #include "calls.h"
  39 #include "rtl.h"
  40 #include "tm_p.h"
  41 #include "hard-reg-set.h"
  42 #include "predict.h"
  43 #include "function.h"
  44 #include "dominance.h"
  45 #include "cfg.h"
  46 #include "cfganal.h"
  47 #include "basic-block.h"
  48 #include "insn-config.h"
  49 #include "regs.h"
  50 #include "flags.h"
  51 #include "profile.h"
  52 #include "except.h"
  53 #include "diagnostic-core.h"
  54 #include "recog.h"
  55 #include "expmed.h"
  56 #include "dojump.h"
  57 #include "explow.h"
  58 #include "emit-rtl.h"
  59 #include "varasm.h"
  60 #include "stmt.h"
  61 #include "expr.h"
  62 #include "coverage.h"
  63 #include "sreal.h"
  64 #include "params.h"
  65 #include "target.h"
  66 #include "cfgloop.h"
  67 #include "tree-ssa-alias.h"
  68 #include "internal-fn.h"
  69 #include "gimple-expr.h"
  70 #include "gimple.h"
  71 #include "gimple-iterator.h"
  72 #include "gimple-ssa.h"
  73 #include "cgraph.h"
  74 #include "tree-cfg.h"
  75 #include "tree-phinodes.h"
  76 #include "ssa-iterators.h"
  77 #include "tree-ssa-loop-niter.h"
  78 #include "tree-ssa-loop.h"
  79 #include "tree-pass.h"
  80 #include "tree-scalar-evolution.h"
  81
  82 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
  83                    1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
  84 static sreal real_almost_one, real_br_prob_base,
  85              real_inv_br_prob_base, real_one_half, real_bb_freq_max;
  86
  87 static void combine_predictions_for_insn (rtx_insn *, basic_block);
  88 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
  89 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
  90 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
  91 static bool can_predict_insn_p (const rtx_insn *);
  92
  93 /* Information we hold about each branch predictor.
  94    Filled using information from predict.def.  */
  95
  96 struct predictor_info
  97 {
  98   const char *const name;       /* Name used in the debugging dumps.  */
  99   const int hitrate;            /* Expected hitrate used by
 100                                    predict_insn_def call.  */
 101   const int flags;
 102 };
 103
 104 /* Use given predictor without Dempster-Shaffer theory if it matches
 105    using first_match heuristics.  */
 106 #define PRED_FLAG_FIRST_MATCH 1
 107
 108 /* Recompute hitrate in percent to our representation.  */
 109
 110 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
 111
 112 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
 113 static const struct predictor_info predictor_info[]= {
 114 #include "predict.def"
 115
 116   /* Upper bound on predictors.  */
 117   {NULL, 0, 0}
 118 };
 119 #undef DEF_PREDICTOR
 120
 121 /* Return TRUE if frequency FREQ is considered to be hot.  */
 122
 123 static inline bool
 124 maybe_hot_frequency_p (struct function *fun, int freq)
 125 {
 126   struct cgraph_node *node = cgraph_node::get (fun->decl);
 127   if (!profile_info
 128       || !opt_for_fn (fun->decl, flag_branch_probabilities))
 129     {
 130       if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
 131         return false;
 132       if (node->frequency == NODE_FREQUENCY_HOT)
 133         return true;
 134     }
 135   if (profile_status_for_fn (fun) == PROFILE_ABSENT)
 136     return true;
 137   if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 138       && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
 139     return false;
 140   if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
 141     return false;
 142   if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
 143               / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 144     return false;
 145   return true;
 146 }
 147
 148 static gcov_type min_count = -1;
 149
 150 /* Determine the threshold for hot BB counts.  */
 151
 152 gcov_type
 153 get_hot_bb_threshold ()
 154 {
 155   gcov_working_set_t *ws;
 156   if (min_count == -1)
 157     {
 158       ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
 159       gcc_assert (ws);
 160       min_count = ws->min_counter;
 161     }
 162   return min_count;
 163 }
 164
 165 /* Set the threshold for hot BB counts.  */
 166
 167 void
 168 set_hot_bb_threshold (gcov_type min)
 169 {
 170   min_count = min;
 171 }
 172
 173 /* Return TRUE if frequency FREQ is considered to be hot.  */
 174
 175 bool
 176 maybe_hot_count_p (struct function *fun, gcov_type count)
 177 {
 178   if (fun && profile_status_for_fn (fun) != PROFILE_READ)
 179     return true;
 180   /* Code executed at most once is not hot.  */
 181   if (profile_info->runs >= count)
 182     return false;
 183   return (count >= get_hot_bb_threshold ());
 184 }
 185
 186 /* Return true in case BB can be CPU intensive and should be optimized
 187    for maximal performance.  */
 188
 189 bool
 190 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
 191 {
 192   gcc_checking_assert (fun);
 193   if (profile_status_for_fn (fun) == PROFILE_READ)
 194     return maybe_hot_count_p (fun, bb->count);
 195   return maybe_hot_frequency_p (fun, bb->frequency);
 196 }
 197
 198 /* Return true in case BB can be CPU intensive and should be optimized
 199    for maximal performance.  */
 200
 201 bool
 202 maybe_hot_edge_p (edge e)
 203 {
 204   if (profile_status_for_fn (cfun) == PROFILE_READ)
 205     return maybe_hot_count_p (cfun, e->count);
 206   return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
 207 }
 208
 209 /* Return true if profile COUNT and FREQUENCY, or function FUN static
 210    node frequency reflects never being executed.  */
 211
 212 static bool
 213 probably_never_executed (struct function *fun,
 214                          gcov_type count, int frequency)
 215 {
 216   gcc_checking_assert (fun);
 217   if (profile_status_for_fn (fun) == PROFILE_READ)
 218     {
 219       int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
 220       if (count * unlikely_count_fraction >= profile_info->runs)
 221         return false;
 222       if (!frequency)
 223         return true;
 224       if (!ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency)
 225         return false;
 226       if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count)
 227         {
 228           gcov_type computed_count;
 229           /* Check for possibility of overflow, in which case entry bb count
 230              is large enough to do the division first without losing much
 231              precision.  */
 232           if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count < REG_BR_PROB_BASE *
 233               REG_BR_PROB_BASE)
 234             {
 235               gcov_type scaled_count
 236                   = frequency * ENTRY_BLOCK_PTR_FOR_FN (fun)->count *
 237              unlikely_count_fraction;
 238               computed_count = RDIV (scaled_count,
 239                                      ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
 240             }
 241           else
 242             {
 243               computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (fun)->count,
 244                                      ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
 245               computed_count *= frequency * unlikely_count_fraction;
 246             }
 247           if (computed_count >= profile_info->runs)
 248             return false;
 249         }
 250       return true;
 251     }
 252   if ((!profile_info || !(opt_for_fn (fun->decl, flag_branch_probabilities)))
 253       && (cgraph_node::get (fun->decl)->frequency
 254           == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 255     return true;
 256   return false;
 257 }
 258
 259
 260 /* Return true in case BB is probably never executed.  */
 261
 262 bool
 263 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
 264 {
 265   return probably_never_executed (fun, bb->count, bb->frequency);
 266 }
 267
 268
 269 /* Return true in case edge E is probably never executed.  */
 270
 271 bool
 272 probably_never_executed_edge_p (struct function *fun, edge e)
 273 {
 274   return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
 275 }
 276
 277 /* Return true when current function should always be optimized for size.  */
 278
 279 bool
 280 optimize_function_for_size_p (struct function *fun)
 281 {
 282   if (!fun || !fun->decl)
 283     return optimize_size;
 284   cgraph_node *n = cgraph_node::get (fun->decl);
 285   return n && n->optimize_for_size_p ();
 286 }
 287
 288 /* Return true when current function should always be optimized for speed.  */
 289
 290 bool
 291 optimize_function_for_speed_p (struct function *fun)
 292 {
 293   return !optimize_function_for_size_p (fun);
 294 }
 295
 296 /* Return TRUE when BB should be optimized for size.  */
 297
 298 bool
 299 optimize_bb_for_size_p (const_basic_block bb)
 300 {
 301   return (optimize_function_for_size_p (cfun)
 302           || (bb && !maybe_hot_bb_p (cfun, bb)));
 303 }
 304
 305 /* Return TRUE when BB should be optimized for speed.  */
 306
 307 bool
 308 optimize_bb_for_speed_p (const_basic_block bb)
 309 {
 310   return !optimize_bb_for_size_p (bb);
 311 }
 312
 313 /* Return TRUE when BB should be optimized for size.  */
 314
 315 bool
 316 optimize_edge_for_size_p (edge e)
 317 {
 318   return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
 319 }
 320
 321 /* Return TRUE when BB should be optimized for speed.  */
 322
 323 bool
 324 optimize_edge_for_speed_p (edge e)
 325 {
 326   return !optimize_edge_for_size_p (e);
 327 }
 328
 329 /* Return TRUE when BB should be optimized for size.  */
 330
 331 bool
 332 optimize_insn_for_size_p (void)
 333 {
 334   return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
 335 }
 336
 337 /* Return TRUE when BB should be optimized for speed.  */
 338
 339 bool
 340 optimize_insn_for_speed_p (void)
 341 {
 342   return !optimize_insn_for_size_p ();
 343 }
 344
 345 /* Return TRUE when LOOP should be optimized for size.  */
 346
 347 bool
 348 optimize_loop_for_size_p (struct loop *loop)
 349 {
 350   return optimize_bb_for_size_p (loop->header);
 351 }
 352
 353 /* Return TRUE when LOOP should be optimized for speed.  */
 354
 355 bool
 356 optimize_loop_for_speed_p (struct loop *loop)
 357 {
 358   return optimize_bb_for_speed_p (loop->header);
 359 }
 360
 361 /* Return TRUE when LOOP nest should be optimized for speed.  */
 362
 363 bool
 364 optimize_loop_nest_for_speed_p (struct loop *loop)
 365 {
 366   struct loop *l = loop;
 367   if (optimize_loop_for_speed_p (loop))
 368     return true;
 369   l = loop->inner;
 370   while (l && l != loop)
 371     {
 372       if (optimize_loop_for_speed_p (l))
 373         return true;
 374       if (l->inner)
 375         l = l->inner;
 376       else if (l->next)
 377         l = l->next;
 378       else
 379         {
 380           while (l != loop && !l->next)
 381             l = loop_outer (l);
 382           if (l != loop)
 383             l = l->next;
 384         }
 385     }
 386   return false;
 387 }
 388
 389 /* Return TRUE when LOOP nest should be optimized for size.  */
 390
 391 bool
 392 optimize_loop_nest_for_size_p (struct loop *loop)
 393 {
 394   return !optimize_loop_nest_for_speed_p (loop);
 395 }
 396
 397 /* Return true when edge E is likely to be well predictable by branch
 398    predictor.  */
 399
 400 bool
 401 predictable_edge_p (edge e)
 402 {
 403   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
 404     return false;
 405   if ((e->probability
 406        <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
 407       || (REG_BR_PROB_BASE - e->probability
 408           <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
 409     return true;
 410   return false;
 411 }
 412
 413
 414 /* Set RTL expansion for BB profile.  */
 415
 416 void
 417 rtl_profile_for_bb (basic_block bb)
 418 {
 419   crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
 420 }
 421
 422 /* Set RTL expansion for edge profile.  */
 423
 424 void
 425 rtl_profile_for_edge (edge e)
 426 {
 427   crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
 428 }
 429
 430 /* Set RTL expansion to default mode (i.e. when profile info is not known).  */
 431 void
 432 default_rtl_profile (void)
 433 {
 434   crtl->maybe_hot_insn_p = true;
 435 }
 436
 437 /* Return true if the one of outgoing edges is already predicted by
 438    PREDICTOR.  */
 439
 440 bool
 441 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 442 {
 443   rtx note;
 444   if (!INSN_P (BB_END (bb)))
 445     return false;
 446   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
 447     if (REG_NOTE_KIND (note) == REG_BR_PRED
 448         && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
 449       return true;
 450   return false;
 451 }
 452
 453 /*  Structure representing predictions in tree level. */
 454
 455 struct edge_prediction {
 456     struct edge_prediction *ep_next;
 457     edge ep_edge;
 458     enum br_predictor ep_predictor;
 459     int ep_probability;
 460 };
 461
 462 /* This map contains for a basic block the list of predictions for the
 463    outgoing edges.  */
 464
 465 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
 466
 467 /* Return true if the one of outgoing edges is already predicted by
 468    PREDICTOR.  */
 469
 470 bool
 471 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 472 {
 473   struct edge_prediction *i;
 474   edge_prediction **preds = bb_predictions->get (bb);
 475
 476   if (!preds)
 477     return false;
 478
 479   for (i = *preds; i; i = i->ep_next)
 480     if (i->ep_predictor == predictor)
 481       return true;
 482   return false;
 483 }
 484
 485 /* Return true when the probability of edge is reliable.
 486
 487    The profile guessing code is good at predicting branch outcome (ie.
 488    taken/not taken), that is predicted right slightly over 75% of time.
 489    It is however notoriously poor on predicting the probability itself.
 490    In general the profile appear a lot flatter (with probabilities closer
 491    to 50%) than the reality so it is bad idea to use it to drive optimization
 492    such as those disabling dynamic branch prediction for well predictable
 493    branches.
 494
 495    There are two exceptions - edges leading to noreturn edges and edges
 496    predicted by number of iterations heuristics are predicted well.  This macro
 497    should be able to distinguish those, but at the moment it simply check for
 498    noreturn heuristic that is only one giving probability over 99% or bellow
 499    1%.  In future we might want to propagate reliability information across the
 500    CFG if we find this information useful on multiple places.   */
 501 static bool
 502 probability_reliable_p (int prob)
 503 {
 504   return (profile_status_for_fn (cfun) == PROFILE_READ
 505           || (profile_status_for_fn (cfun) == PROFILE_GUESSED
 506               && (prob <= HITRATE (1) || prob >= HITRATE (99))));
 507 }
 508
 509 /* Same predicate as above, working on edges.  */
 510 bool
 511 edge_probability_reliable_p (const_edge e)
 512 {
 513   return probability_reliable_p (e->probability);
 514 }
 515
 516 /* Same predicate as edge_probability_reliable_p, working on notes.  */
 517 bool
 518 br_prob_note_reliable_p (const_rtx note)
 519 {
 520   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
 521   return probability_reliable_p (XINT (note, 0));
 522 }
 523
 524 static void
 525 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
 526 {
 527   gcc_assert (any_condjump_p (insn));
 528   if (!flag_guess_branch_prob)
 529     return;
 530
 531   add_reg_note (insn, REG_BR_PRED,
 532                 gen_rtx_CONCAT (VOIDmode,
 533                                 GEN_INT ((int) predictor),
 534                                 GEN_INT ((int) probability)));
 535 }
 536
 537 /* Predict insn by given predictor.  */
 538
 539 void
 540 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
 541                   enum prediction taken)
 542 {
 543    int probability = predictor_info[(int) predictor].hitrate;
 544
 545    if (taken != TAKEN)
 546      probability = REG_BR_PROB_BASE - probability;
 547
 548    predict_insn (insn, predictor, probability);
 549 }
 550
 551 /* Predict edge E with given probability if possible.  */
 552
 553 void
 554 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
 555 {
 556   rtx_insn *last_insn;
 557   last_insn = BB_END (e->src);
 558
 559   /* We can store the branch prediction information only about
 560      conditional jumps.  */
 561   if (!any_condjump_p (last_insn))
 562     return;
 563
 564   /* We always store probability of branching.  */
 565   if (e->flags & EDGE_FALLTHRU)
 566     probability = REG_BR_PROB_BASE - probability;
 567
 568   predict_insn (last_insn, predictor, probability);
 569 }
 570
 571 /* Predict edge E with the given PROBABILITY.  */
 572 void
 573 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
 574 {
 575   gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
 576   if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
 577        1)
 578       && flag_guess_branch_prob && optimize)
 579     {
 580       struct edge_prediction *i = XNEW (struct edge_prediction);
 581       edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
 582
 583       i->ep_next = preds;
 584       preds = i;
 585       i->ep_probability = probability;
 586       i->ep_predictor = predictor;
 587       i->ep_edge = e;
 588     }
 589 }
 590
 591 /* Remove all predictions on given basic block that are attached
 592    to edge E.  */
 593 void
 594 remove_predictions_associated_with_edge (edge e)
 595 {
 596   if (!bb_predictions)
 597     return;
 598
 599   edge_prediction **preds = bb_predictions->get (e->src);
 600
 601   if (preds)
 602     {
 603       struct edge_prediction **prediction = preds;
 604       struct edge_prediction *next;
 605
 606       while (*prediction)
 607         {
 608           if ((*prediction)->ep_edge == e)
 609             {
 610               next = (*prediction)->ep_next;
 611               free (*prediction);
 612               *prediction = next;
 613             }
 614           else
 615             prediction = &((*prediction)->ep_next);
 616         }
 617     }
 618 }
 619
 620 /* Clears the list of predictions stored for BB.  */
 621
 622 static void
 623 clear_bb_predictions (basic_block bb)
 624 {
 625   edge_prediction **preds = bb_predictions->get (bb);
 626   struct edge_prediction *pred, *next;
 627
 628   if (!preds)
 629     return;
 630
 631   for (pred = *preds; pred; pred = next)
 632     {
 633       next = pred->ep_next;
 634       free (pred);
 635     }
 636   *preds = NULL;
 637 }
 638
 639 /* Return true when we can store prediction on insn INSN.
 640    At the moment we represent predictions only on conditional
 641    jumps, not at computed jump or other complicated cases.  */
 642 static bool
 643 can_predict_insn_p (const rtx_insn *insn)
 644 {
 645   return (JUMP_P (insn)
 646           && any_condjump_p (insn)
 647           && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
 648 }
 649
 650 /* Predict edge E by given predictor if possible.  */
 651
 652 void
 653 predict_edge_def (edge e, enum br_predictor predictor,
 654                   enum prediction taken)
 655 {
 656    int probability = predictor_info[(int) predictor].hitrate;
 657
 658    if (taken != TAKEN)
 659      probability = REG_BR_PROB_BASE - probability;
 660
 661    predict_edge (e, predictor, probability);
 662 }
 663
 664 /* Invert all branch predictions or probability notes in the INSN.  This needs
 665    to be done each time we invert the condition used by the jump.  */
 666
 667 void
 668 invert_br_probabilities (rtx insn)
 669 {
 670   rtx note;
 671
 672   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 673     if (REG_NOTE_KIND (note) == REG_BR_PROB)
 674       XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
 675     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
 676       XEXP (XEXP (note, 0), 1)
 677         = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
 678 }
 679
 680 /* Dump information about the branch prediction to the output file.  */
 681
 682 static void
 683 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
 684                  basic_block bb, int used)
 685 {
 686   edge e;
 687   edge_iterator ei;
 688
 689   if (!file)
 690     return;
 691
 692   FOR_EACH_EDGE (e, ei, bb->succs)
 693     if (! (e->flags & EDGE_FALLTHRU))
 694       break;
 695
 696   fprintf (file, "  %s heuristics%s: %.1f%%",
 697            predictor_info[predictor].name,
 698            used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
 699
 700   if (bb->count)
 701     {
 702       fprintf (file, "  exec %" PRId64, bb->count);
 703       if (e)
 704         {
 705           fprintf (file, " hit %" PRId64, e->count);
 706           fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
 707         }
 708     }
 709
 710   fprintf (file, "\n");
 711 }
 712
 713 /* We can not predict the probabilities of outgoing edges of bb.  Set them
 714    evenly and hope for the best.  */
 715 static void
 716 set_even_probabilities (basic_block bb)
 717 {
 718   int nedges = 0;
 719   edge e;
 720   edge_iterator ei;
 721
 722   FOR_EACH_EDGE (e, ei, bb->succs)
 723     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 724       nedges ++;
 725   FOR_EACH_EDGE (e, ei, bb->succs)
 726     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 727       e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
 728     else
 729       e->probability = 0;
 730 }
 731
 732 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
 733    note if not already present.  Remove now useless REG_BR_PRED notes.  */
 734
 735 static void
 736 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
 737 {
 738   rtx prob_note;
 739   rtx *pnote;
 740   rtx note;
 741   int best_probability = PROB_EVEN;
 742   enum br_predictor best_predictor = END_PREDICTORS;
 743   int combined_probability = REG_BR_PROB_BASE / 2;
 744   int d;
 745   bool first_match = false;
 746   bool found = false;
 747
 748   if (!can_predict_insn_p (insn))
 749     {
 750       set_even_probabilities (bb);
 751       return;
 752     }
 753
 754   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
 755   pnote = &REG_NOTES (insn);
 756   if (dump_file)
 757     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
 758              bb->index);
 759
 760   /* We implement "first match" heuristics and use probability guessed
 761      by predictor with smallest index.  */
 762   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 763     if (REG_NOTE_KIND (note) == REG_BR_PRED)
 764       {
 765         enum br_predictor predictor = ((enum br_predictor)
 766                                        INTVAL (XEXP (XEXP (note, 0), 0)));
 767         int probability = INTVAL (XEXP (XEXP (note, 0), 1));
 768
 769         found = true;
 770         if (best_predictor > predictor)
 771           best_probability = probability, best_predictor = predictor;
 772
 773         d = (combined_probability * probability
 774              + (REG_BR_PROB_BASE - combined_probability)
 775              * (REG_BR_PROB_BASE - probability));
 776
 777         /* Use FP math to avoid overflows of 32bit integers.  */
 778         if (d == 0)
 779           /* If one probability is 0% and one 100%, avoid division by zero.  */
 780           combined_probability = REG_BR_PROB_BASE / 2;
 781         else
 782           combined_probability = (((double) combined_probability) * probability
 783                                   * REG_BR_PROB_BASE / d + 0.5);
 784       }
 785
 786   /* Decide which heuristic to use.  In case we didn't match anything,
 787      use no_prediction heuristic, in case we did match, use either
 788      first match or Dempster-Shaffer theory depending on the flags.  */
 789
 790   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 791     first_match = true;
 792
 793   if (!found)
 794     dump_prediction (dump_file, PRED_NO_PREDICTION,
 795                      combined_probability, bb, true);
 796   else
 797     {
 798       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
 799                        bb, !first_match);
 800       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
 801                        bb, first_match);
 802     }
 803
 804   if (first_match)
 805     combined_probability = best_probability;
 806   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 807
 808   while (*pnote)
 809     {
 810       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
 811         {
 812           enum br_predictor predictor = ((enum br_predictor)
 813                                          INTVAL (XEXP (XEXP (*pnote, 0), 0)));
 814           int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
 815
 816           dump_prediction (dump_file, predictor, probability, bb,
 817                            !first_match || best_predictor == predictor);
 818           *pnote = XEXP (*pnote, 1);
 819         }
 820       else
 821         pnote = &XEXP (*pnote, 1);
 822     }
 823
 824   if (!prob_note)
 825     {
 826       add_int_reg_note (insn, REG_BR_PROB, combined_probability);
 827
 828       /* Save the prediction into CFG in case we are seeing non-degenerated
 829          conditional jump.  */
 830       if (!single_succ_p (bb))
 831         {
 832           BRANCH_EDGE (bb)->probability = combined_probability;
 833           FALLTHRU_EDGE (bb)->probability
 834             = REG_BR_PROB_BASE - combined_probability;
 835         }
 836     }
 837   else if (!single_succ_p (bb))
 838     {
 839       int prob = XINT (prob_note, 0);
 840
 841       BRANCH_EDGE (bb)->probability = prob;
 842       FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
 843     }
 844   else
 845     single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
 846 }
 847
 848 /* Combine predictions into single probability and store them into CFG.
 849    Remove now useless prediction entries.  */
 850
 851 static void
 852 combine_predictions_for_bb (basic_block bb)
 853 {
 854   int best_probability = PROB_EVEN;
 855   enum br_predictor best_predictor = END_PREDICTORS;
 856   int combined_probability = REG_BR_PROB_BASE / 2;
 857   int d;
 858   bool first_match = false;
 859   bool found = false;
 860   struct edge_prediction *pred;
 861   int nedges = 0;
 862   edge e, first = NULL, second = NULL;
 863   edge_iterator ei;
 864
 865   FOR_EACH_EDGE (e, ei, bb->succs)
 866     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 867       {
 868         nedges ++;
 869         if (first && !second)
 870           second = e;
 871         if (!first)
 872           first = e;
 873       }
 874
 875   /* When there is no successor or only one choice, prediction is easy.
 876
 877      We are lazy for now and predict only basic blocks with two outgoing
 878      edges.  It is possible to predict generic case too, but we have to
 879      ignore first match heuristics and do more involved combining.  Implement
 880      this later.  */
 881   if (nedges != 2)
 882     {
 883       if (!bb->count)
 884         set_even_probabilities (bb);
 885       clear_bb_predictions (bb);
 886       if (dump_file)
 887         fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
 888                  nedges, bb->index);
 889       return;
 890     }
 891
 892   if (dump_file)
 893     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
 894
 895   edge_prediction **preds = bb_predictions->get (bb);
 896   if (preds)
 897     {
 898       /* We implement "first match" heuristics and use probability guessed
 899          by predictor with smallest index.  */
 900       for (pred = *preds; pred; pred = pred->ep_next)
 901         {
 902           enum br_predictor predictor = pred->ep_predictor;
 903           int probability = pred->ep_probability;
 904
 905           if (pred->ep_edge != first)
 906             probability = REG_BR_PROB_BASE - probability;
 907
 908           found = true;
 909           /* First match heuristics would be widly confused if we predicted
 910              both directions.  */
 911           if (best_predictor > predictor)
 912             {
 913               struct edge_prediction *pred2;
 914               int prob = probability;
 915
 916               for (pred2 = (struct edge_prediction *) *preds;
 917                    pred2; pred2 = pred2->ep_next)
 918                if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
 919                  {
 920                    int probability2 = pred->ep_probability;
 921
 922                    if (pred2->ep_edge != first)
 923                      probability2 = REG_BR_PROB_BASE - probability2;
 924
 925                    if ((probability < REG_BR_PROB_BASE / 2) !=
 926                        (probability2 < REG_BR_PROB_BASE / 2))
 927                      break;
 928
 929                    /* If the same predictor later gave better result, go for it! */
 930                    if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
 931                        || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
 932                      prob = probability2;
 933                  }
 934               if (!pred2)
 935                 best_probability = prob, best_predictor = predictor;
 936             }
 937
 938           d = (combined_probability * probability
 939                + (REG_BR_PROB_BASE - combined_probability)
 940                * (REG_BR_PROB_BASE - probability));
 941
 942           /* Use FP math to avoid overflows of 32bit integers.  */
 943           if (d == 0)
 944             /* If one probability is 0% and one 100%, avoid division by zero.  */
 945             combined_probability = REG_BR_PROB_BASE / 2;
 946           else
 947             combined_probability = (((double) combined_probability)
 948                                     * probability
 949                                     * REG_BR_PROB_BASE / d + 0.5);
 950         }
 951     }
 952
 953   /* Decide which heuristic to use.  In case we didn't match anything,
 954      use no_prediction heuristic, in case we did match, use either
 955      first match or Dempster-Shaffer theory depending on the flags.  */
 956
 957   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 958     first_match = true;
 959
 960   if (!found)
 961     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
 962   else
 963     {
 964       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
 965                        !first_match);
 966       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
 967                        first_match);
 968     }
 969
 970   if (first_match)
 971     combined_probability = best_probability;
 972   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 973
 974   if (preds)
 975     {
 976       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
 977         {
 978           enum br_predictor predictor = pred->ep_predictor;
 979           int probability = pred->ep_probability;
 980
 981           if (pred->ep_edge != EDGE_SUCC (bb, 0))
 982             probability = REG_BR_PROB_BASE - probability;
 983           dump_prediction (dump_file, predictor, probability, bb,
 984                            !first_match || best_predictor == predictor);
 985         }
 986     }
 987   clear_bb_predictions (bb);
 988
 989   if (!bb->count)
 990     {
 991       first->probability = combined_probability;
 992       second->probability = REG_BR_PROB_BASE - combined_probability;
 993     }
 994 }
 995
 996 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
 997    Return the SSA_NAME if the condition satisfies, NULL otherwise.
 998
 999    T1 and T2 should be one of the following cases:
1000      1. T1 is SSA_NAME, T2 is NULL
1001      2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1002      3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1003
1004 static tree
1005 strips_small_constant (tree t1, tree t2)
1006 {
1007   tree ret = NULL;
1008   int value = 0;
1009
1010   if (!t1)
1011     return NULL;
1012   else if (TREE_CODE (t1) == SSA_NAME)
1013     ret = t1;
1014   else if (tree_fits_shwi_p (t1))
1015     value = tree_to_shwi (t1);
1016   else
1017     return NULL;
1018
1019   if (!t2)
1020     return ret;
1021   else if (tree_fits_shwi_p (t2))
1022     value = tree_to_shwi (t2);
1023   else if (TREE_CODE (t2) == SSA_NAME)
1024     {
1025       if (ret)
1026         return NULL;
1027       else
1028         ret = t2;
1029     }
1030
1031   if (value <= 4 && value >= -4)
1032     return ret;
1033   else
1034     return NULL;
1035 }
1036
1037 /* Return the SSA_NAME in T or T's operands.
1038    Return NULL if SSA_NAME cannot be found.  */
1039
1040 static tree
1041 get_base_value (tree t)
1042 {
1043   if (TREE_CODE (t) == SSA_NAME)
1044     return t;
1045
1046   if (!BINARY_CLASS_P (t))
1047     return NULL;
1048
1049   switch (TREE_OPERAND_LENGTH (t))
1050     {
1051     case 1:
1052       return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1053     case 2:
1054       return strips_small_constant (TREE_OPERAND (t, 0),
1055                                     TREE_OPERAND (t, 1));
1056     default:
1057       return NULL;
1058     }
1059 }
1060
1061 /* Check the compare STMT in LOOP. If it compares an induction
1062    variable to a loop invariant, return true, and save
1063    LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1064    Otherwise return false and set LOOP_INVAIANT to NULL.  */
1065
1066 static bool
1067 is_comparison_with_loop_invariant_p (gcond *stmt, struct loop *loop,
1068                                      tree *loop_invariant,
1069                                      enum tree_code *compare_code,
1070                                      tree *loop_step,
1071                                      tree *loop_iv_base)
1072 {
1073   tree op0, op1, bound, base;
1074   affine_iv iv0, iv1;
1075   enum tree_code code;
1076   tree step;
1077
1078   code = gimple_cond_code (stmt);
1079   *loop_invariant = NULL;
1080
1081   switch (code)
1082     {
1083     case GT_EXPR:
1084     case GE_EXPR:
1085     case NE_EXPR:
1086     case LT_EXPR:
1087     case LE_EXPR:
1088     case EQ_EXPR:
1089       break;
1090
1091     default:
1092       return false;
1093     }
1094
1095   op0 = gimple_cond_lhs (stmt);
1096   op1 = gimple_cond_rhs (stmt);
1097
1098   if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1099        || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1100     return false;
1101   if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1102     return false;
1103   if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1104     return false;
1105   if (TREE_CODE (iv0.step) != INTEGER_CST
1106       || TREE_CODE (iv1.step) != INTEGER_CST)
1107     return false;
1108   if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1109       || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1110     return false;
1111
1112   if (integer_zerop (iv0.step))
1113     {
1114       if (code != NE_EXPR && code != EQ_EXPR)
1115         code = invert_tree_comparison (code, false);
1116       bound = iv0.base;
1117       base = iv1.base;
1118       if (tree_fits_shwi_p (iv1.step))
1119         step = iv1.step;
1120       else
1121         return false;
1122     }
1123   else
1124     {
1125       bound = iv1.base;
1126       base = iv0.base;
1127       if (tree_fits_shwi_p (iv0.step))
1128         step = iv0.step;
1129       else
1130         return false;
1131     }
1132
1133   if (TREE_CODE (bound) != INTEGER_CST)
1134     bound = get_base_value (bound);
1135   if (!bound)
1136     return false;
1137   if (TREE_CODE (base) != INTEGER_CST)
1138     base = get_base_value (base);
1139   if (!base)
1140     return false;
1141
1142   *loop_invariant = bound;
1143   *compare_code = code;
1144   *loop_step = step;
1145   *loop_iv_base = base;
1146   return true;
1147 }
1148
1149 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1150
1151 static bool
1152 expr_coherent_p (tree t1, tree t2)
1153 {
1154   gimple stmt;
1155   tree ssa_name_1 = NULL;
1156   tree ssa_name_2 = NULL;
1157
1158   gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1159   gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1160
1161   if (t1 == t2)
1162     return true;
1163
1164   if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1165     return true;
1166   if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1167     return false;
1168
1169   /* Check to see if t1 is expressed/defined with t2.  */
1170   stmt = SSA_NAME_DEF_STMT (t1);
1171   gcc_assert (stmt != NULL);
1172   if (is_gimple_assign (stmt))
1173     {
1174       ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1175       if (ssa_name_1 && ssa_name_1 == t2)
1176         return true;
1177     }
1178
1179   /* Check to see if t2 is expressed/defined with t1.  */
1180   stmt = SSA_NAME_DEF_STMT (t2);
1181   gcc_assert (stmt != NULL);
1182   if (is_gimple_assign (stmt))
1183     {
1184       ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1185       if (ssa_name_2 && ssa_name_2 == t1)
1186         return true;
1187     }
1188
1189   /* Compare if t1 and t2's def_stmts are identical.  */
1190   if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1191     return true;
1192   else
1193     return false;
1194 }
1195
1196 /* Predict branch probability of BB when BB contains a branch that compares
1197    an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1198    loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1199
1200    E.g.
1201      for (int i = 0; i < bound; i++) {
1202        if (i < bound - 2)
1203          computation_1();
1204        else
1205          computation_2();
1206      }
1207
1208   In this loop, we will predict the branch inside the loop to be taken.  */
1209
1210 static void
1211 predict_iv_comparison (struct loop *loop, basic_block bb,
1212                        tree loop_bound_var,
1213                        tree loop_iv_base_var,
1214                        enum tree_code loop_bound_code,
1215                        int loop_bound_step)
1216 {
1217   gimple stmt;
1218   tree compare_var, compare_base;
1219   enum tree_code compare_code;
1220   tree compare_step_var;
1221   edge then_edge;
1222   edge_iterator ei;
1223
1224   if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1225       || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1226       || predicted_by_p (bb, PRED_LOOP_EXIT))
1227     return;
1228
1229   stmt = last_stmt (bb);
1230   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1231     return;
1232   if (!is_comparison_with_loop_invariant_p (as_a <gcond *> (stmt),
1233                                             loop, &compare_var,
1234                                             &compare_code,
1235                                             &compare_step_var,
1236                                             &compare_base))
1237     return;
1238
1239   /* Find the taken edge.  */
1240   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1241     if (then_edge->flags & EDGE_TRUE_VALUE)
1242       break;
1243
1244   /* When comparing an IV to a loop invariant, NE is more likely to be
1245      taken while EQ is more likely to be not-taken.  */
1246   if (compare_code == NE_EXPR)
1247     {
1248       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1249       return;
1250     }
1251   else if (compare_code == EQ_EXPR)
1252     {
1253       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1254       return;
1255     }
1256
1257   if (!expr_coherent_p (loop_iv_base_var, compare_base))
1258     return;
1259
1260   /* If loop bound, base and compare bound are all constants, we can
1261      calculate the probability directly.  */
1262   if (tree_fits_shwi_p (loop_bound_var)
1263       && tree_fits_shwi_p (compare_var)
1264       && tree_fits_shwi_p (compare_base))
1265     {
1266       int probability;
1267       bool overflow, overall_overflow = false;
1268       widest_int compare_count, tem;
1269
1270       /* (loop_bound - base) / compare_step */
1271       tem = wi::sub (wi::to_widest (loop_bound_var),
1272                      wi::to_widest (compare_base), SIGNED, &overflow);
1273       overall_overflow |= overflow;
1274       widest_int loop_count = wi::div_trunc (tem,
1275                                              wi::to_widest (compare_step_var),
1276                                              SIGNED, &overflow);
1277       overall_overflow |= overflow;
1278
1279       if (!wi::neg_p (wi::to_widest (compare_step_var))
1280           ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1281         {
1282           /* (loop_bound - compare_bound) / compare_step */
1283           tem = wi::sub (wi::to_widest (loop_bound_var),
1284                          wi::to_widest (compare_var), SIGNED, &overflow);
1285           overall_overflow |= overflow;
1286           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1287                                          SIGNED, &overflow);
1288           overall_overflow |= overflow;
1289         }
1290       else
1291         {
1292           /* (compare_bound - base) / compare_step */
1293           tem = wi::sub (wi::to_widest (compare_var),
1294                          wi::to_widest (compare_base), SIGNED, &overflow);
1295           overall_overflow |= overflow;
1296           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1297                                          SIGNED, &overflow);
1298           overall_overflow |= overflow;
1299         }
1300       if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1301         ++compare_count;
1302       if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1303         ++loop_count;
1304       if (wi::neg_p (compare_count))
1305         compare_count = 0;
1306       if (wi::neg_p (loop_count))
1307         loop_count = 0;
1308       if (loop_count == 0)
1309         probability = 0;
1310       else if (wi::cmps (compare_count, loop_count) == 1)
1311         probability = REG_BR_PROB_BASE;
1312       else
1313         {
1314           tem = compare_count * REG_BR_PROB_BASE;
1315           tem = wi::udiv_trunc (tem, loop_count);
1316           probability = tem.to_uhwi ();
1317         }
1318
1319       if (!overall_overflow)
1320         predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1321
1322       return;
1323     }
1324
1325   if (expr_coherent_p (loop_bound_var, compare_var))
1326     {
1327       if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1328           && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1329         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1330       else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1331                && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1332         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1333       else if (loop_bound_code == NE_EXPR)
1334         {
1335           /* If the loop backedge condition is "(i != bound)", we do
1336              the comparison based on the step of IV:
1337              * step < 0 : backedge condition is like (i > bound)
1338              * step > 0 : backedge condition is like (i < bound)  */
1339           gcc_assert (loop_bound_step != 0);
1340           if (loop_bound_step > 0
1341               && (compare_code == LT_EXPR
1342                   || compare_code == LE_EXPR))
1343             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1344           else if (loop_bound_step < 0
1345                    && (compare_code == GT_EXPR
1346                        || compare_code == GE_EXPR))
1347             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1348           else
1349             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1350         }
1351       else
1352         /* The branch is predicted not-taken if loop_bound_code is
1353            opposite with compare_code.  */
1354         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1355     }
1356   else if (expr_coherent_p (loop_iv_base_var, compare_var))
1357     {
1358       /* For cases like:
1359            for (i = s; i < h; i++)
1360              if (i > s + 2) ....
1361          The branch should be predicted taken.  */
1362       if (loop_bound_step > 0
1363           && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1364         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1365       else if (loop_bound_step < 0
1366                && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1367         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1368       else
1369         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1370     }
1371 }
1372
1373 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1374    exits are resulted from short-circuit conditions that will generate an
1375    if_tmp. E.g.:
1376
1377    if (foo() || global > 10)
1378      break;
1379
1380    This will be translated into:
1381
1382    BB3:
1383      loop header...
1384    BB4:
1385      if foo() goto BB6 else goto BB5
1386    BB5:
1387      if global > 10 goto BB6 else goto BB7
1388    BB6:
1389      goto BB7
1390    BB7:
1391      iftmp = (PHI 0(BB5), 1(BB6))
1392      if iftmp == 1 goto BB8 else goto BB3
1393    BB8:
1394      outside of the loop...
1395
1396    The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1397    From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1398    exits. This function takes BB7->BB8 as input, and finds out the extra loop
1399    exits to predict them using PRED_LOOP_EXIT.  */
1400
1401 static void
1402 predict_extra_loop_exits (edge exit_edge)
1403 {
1404   unsigned i;
1405   bool check_value_one;
1406   gimple lhs_def_stmt;
1407   gphi *phi_stmt;
1408   tree cmp_rhs, cmp_lhs;
1409   gimple last;
1410   gcond *cmp_stmt;
1411
1412   last = last_stmt (exit_edge->src);
1413   if (!last)
1414     return;
1415   cmp_stmt = dyn_cast <gcond *> (last);
1416   if (!cmp_stmt)
1417     return;
1418
1419   cmp_rhs = gimple_cond_rhs (cmp_stmt);
1420   cmp_lhs = gimple_cond_lhs (cmp_stmt);
1421   if (!TREE_CONSTANT (cmp_rhs)
1422       || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1423     return;
1424   if (TREE_CODE (cmp_lhs) != SSA_NAME)
1425     return;
1426
1427   /* If check_value_one is true, only the phi_args with value '1' will lead
1428      to loop exit. Otherwise, only the phi_args with value '0' will lead to
1429      loop exit.  */
1430   check_value_one = (((integer_onep (cmp_rhs))
1431                     ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1432                     ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1433
1434   lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1435   if (!lhs_def_stmt)
1436     return;
1437
1438   phi_stmt = dyn_cast <gphi *> (lhs_def_stmt);
1439   if (!phi_stmt)
1440     return;
1441
1442   for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1443     {
1444       edge e1;
1445       edge_iterator ei;
1446       tree val = gimple_phi_arg_def (phi_stmt, i);
1447       edge e = gimple_phi_arg_edge (phi_stmt, i);
1448
1449       if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1450         continue;
1451       if ((check_value_one ^ integer_onep (val)) == 1)
1452         continue;
1453       if (EDGE_COUNT (e->src->succs) != 1)
1454         {
1455           predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1456           continue;
1457         }
1458
1459       FOR_EACH_EDGE (e1, ei, e->src->preds)
1460         predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1461     }
1462 }
1463
1464 /* Predict edge probabilities by exploiting loop structure.  */
1465
1466 static void
1467 predict_loops (void)
1468 {
1469   struct loop *loop;
1470
1471   /* Try to predict out blocks in a loop that are not part of a
1472      natural loop.  */
1473   FOR_EACH_LOOP (loop, 0)
1474     {
1475       basic_block bb, *bbs;
1476       unsigned j, n_exits;
1477       vec<edge> exits;
1478       struct tree_niter_desc niter_desc;
1479       edge ex;
1480       struct nb_iter_bound *nb_iter;
1481       enum tree_code loop_bound_code = ERROR_MARK;
1482       tree loop_bound_step = NULL;
1483       tree loop_bound_var = NULL;
1484       tree loop_iv_base = NULL;
1485       gcond *stmt = NULL;
1486
1487       exits = get_loop_exit_edges (loop);
1488       n_exits = exits.length ();
1489       if (!n_exits)
1490         {
1491           exits.release ();
1492           continue;
1493         }
1494
1495       FOR_EACH_VEC_ELT (exits, j, ex)
1496         {
1497           tree niter = NULL;
1498           HOST_WIDE_INT nitercst;
1499           int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1500           int probability;
1501           enum br_predictor predictor;
1502
1503           predict_extra_loop_exits (ex);
1504
1505           if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1506             niter = niter_desc.niter;
1507           if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1508             niter = loop_niter_by_eval (loop, ex);
1509
1510           if (TREE_CODE (niter) == INTEGER_CST)
1511             {
1512               if (tree_fits_uhwi_p (niter)
1513                   && max
1514                   && compare_tree_int (niter, max - 1) == -1)
1515                 nitercst = tree_to_uhwi (niter) + 1;
1516               else
1517                 nitercst = max;
1518               predictor = PRED_LOOP_ITERATIONS;
1519             }
1520           /* If we have just one exit and we can derive some information about
1521              the number of iterations of the loop from the statements inside
1522              the loop, use it to predict this exit.  */
1523           else if (n_exits == 1)
1524             {
1525               nitercst = estimated_stmt_executions_int (loop);
1526               if (nitercst < 0)
1527                 continue;
1528               if (nitercst > max)
1529                 nitercst = max;
1530
1531               predictor = PRED_LOOP_ITERATIONS_GUESSED;
1532             }
1533           else
1534             continue;
1535
1536           /* If the prediction for number of iterations is zero, do not
1537              predict the exit edges.  */
1538           if (nitercst == 0)
1539             continue;
1540
1541           probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1542           predict_edge (ex, predictor, probability);
1543         }
1544       exits.release ();
1545
1546       /* Find information about loop bound variables.  */
1547       for (nb_iter = loop->bounds; nb_iter;
1548            nb_iter = nb_iter->next)
1549         if (nb_iter->stmt
1550             && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1551           {
1552             stmt = as_a <gcond *> (nb_iter->stmt);
1553             break;
1554           }
1555       if (!stmt && last_stmt (loop->header)
1556           && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1557         stmt = as_a <gcond *> (last_stmt (loop->header));
1558       if (stmt)
1559         is_comparison_with_loop_invariant_p (stmt, loop,
1560                                              &loop_bound_var,
1561                                              &loop_bound_code,
1562                                              &loop_bound_step,
1563                                              &loop_iv_base);
1564
1565       bbs = get_loop_body (loop);
1566
1567       for (j = 0; j < loop->num_nodes; j++)
1568         {
1569           int header_found = 0;
1570           edge e;
1571           edge_iterator ei;
1572
1573           bb = bbs[j];
1574
1575           /* Bypass loop heuristics on continue statement.  These
1576              statements construct loops via "non-loop" constructs
1577              in the source language and are better to be handled
1578              separately.  */
1579           if (predicted_by_p (bb, PRED_CONTINUE))
1580             continue;
1581
1582           /* Loop branch heuristics - predict an edge back to a
1583              loop's head as taken.  */
1584           if (bb == loop->latch)
1585             {
1586               e = find_edge (loop->latch, loop->header);
1587               if (e)
1588                 {
1589                   header_found = 1;
1590                   predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1591                 }
1592             }
1593
1594           /* Loop exit heuristics - predict an edge exiting the loop if the
1595              conditional has no loop header successors as not taken.  */
1596           if (!header_found
1597               /* If we already used more reliable loop exit predictors, do not
1598                  bother with PRED_LOOP_EXIT.  */
1599               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1600               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1601             {
1602               /* For loop with many exits we don't want to predict all exits
1603                  with the pretty large probability, because if all exits are
1604                  considered in row, the loop would be predicted to iterate
1605                  almost never.  The code to divide probability by number of
1606                  exits is very rough.  It should compute the number of exits
1607                  taken in each patch through function (not the overall number
1608                  of exits that might be a lot higher for loops with wide switch
1609                  statements in them) and compute n-th square root.
1610
1611                  We limit the minimal probability by 2% to avoid
1612                  EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1613                  as this was causing regression in perl benchmark containing such
1614                  a wide loop.  */
1615
1616               int probability = ((REG_BR_PROB_BASE
1617                                   - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1618                                  / n_exits);
1619               if (probability < HITRATE (2))
1620                 probability = HITRATE (2);
1621               FOR_EACH_EDGE (e, ei, bb->succs)
1622                 if (e->dest->index < NUM_FIXED_BLOCKS
1623                     || !flow_bb_inside_loop_p (loop, e->dest))
1624                   predict_edge (e, PRED_LOOP_EXIT, probability);
1625             }
1626           if (loop_bound_var)
1627             predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1628                                    loop_bound_code,
1629                                    tree_to_shwi (loop_bound_step));
1630         }
1631
1632       /* Free basic blocks from get_loop_body.  */
1633       free (bbs);
1634     }
1635 }
1636
1637 /* Attempt to predict probabilities of BB outgoing edges using local
1638    properties.  */
1639 static void
1640 bb_estimate_probability_locally (basic_block bb)
1641 {
1642   rtx_insn *last_insn = BB_END (bb);
1643   rtx cond;
1644
1645   if (! can_predict_insn_p (last_insn))
1646     return;
1647   cond = get_condition (last_insn, NULL, false, false);
1648   if (! cond)
1649     return;
1650
1651   /* Try "pointer heuristic."
1652      A comparison ptr == 0 is predicted as false.
1653      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1654   if (COMPARISON_P (cond)
1655       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1656           || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1657     {
1658       if (GET_CODE (cond) == EQ)
1659         predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1660       else if (GET_CODE (cond) == NE)
1661         predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1662     }
1663   else
1664
1665   /* Try "opcode heuristic."
1666      EQ tests are usually false and NE tests are usually true. Also,
1667      most quantities are positive, so we can make the appropriate guesses
1668      about signed comparisons against zero.  */
1669     switch (GET_CODE (cond))
1670       {
1671       case CONST_INT:
1672         /* Unconditional branch.  */
1673         predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1674                           cond == const0_rtx ? NOT_TAKEN : TAKEN);
1675         break;
1676
1677       case EQ:
1678       case UNEQ:
1679         /* Floating point comparisons appears to behave in a very
1680            unpredictable way because of special role of = tests in
1681            FP code.  */
1682         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1683           ;
1684         /* Comparisons with 0 are often used for booleans and there is
1685            nothing useful to predict about them.  */
1686         else if (XEXP (cond, 1) == const0_rtx
1687                  || XEXP (cond, 0) == const0_rtx)
1688           ;
1689         else
1690           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1691         break;
1692
1693       case NE:
1694       case LTGT:
1695         /* Floating point comparisons appears to behave in a very
1696            unpredictable way because of special role of = tests in
1697            FP code.  */
1698         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1699           ;
1700         /* Comparisons with 0 are often used for booleans and there is
1701            nothing useful to predict about them.  */
1702         else if (XEXP (cond, 1) == const0_rtx
1703                  || XEXP (cond, 0) == const0_rtx)
1704           ;
1705         else
1706           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1707         break;
1708
1709       case ORDERED:
1710         predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1711         break;
1712
1713       case UNORDERED:
1714         predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1715         break;
1716
1717       case LE:
1718       case LT:
1719         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1720             || XEXP (cond, 1) == constm1_rtx)
1721           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1722         break;
1723
1724       case GE:
1725       case GT:
1726         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1727             || XEXP (cond, 1) == constm1_rtx)
1728           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1729         break;
1730
1731       default:
1732         break;
1733       }
1734 }
1735
1736 /* Set edge->probability for each successor edge of BB.  */
1737 void
1738 guess_outgoing_edge_probabilities (basic_block bb)
1739 {
1740   bb_estimate_probability_locally (bb);
1741   combine_predictions_for_insn (BB_END (bb), bb);
1742 }
1743 \f
1744 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1745
1746 /* Helper function for expr_expected_value.  */
1747
1748 static tree
1749 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1750                        tree op1, bitmap visited, enum br_predictor *predictor)
1751 {
1752   gimple def;
1753
1754   if (predictor)
1755     *predictor = PRED_UNCONDITIONAL;
1756
1757   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1758     {
1759       if (TREE_CONSTANT (op0))
1760         return op0;
1761
1762       if (code != SSA_NAME)
1763         return NULL_TREE;
1764
1765       def = SSA_NAME_DEF_STMT (op0);
1766
1767       /* If we were already here, break the infinite cycle.  */
1768       if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1769         return NULL;
1770
1771       if (gimple_code (def) == GIMPLE_PHI)
1772         {
1773           /* All the arguments of the PHI node must have the same constant
1774              length.  */
1775           int i, n = gimple_phi_num_args (def);
1776           tree val = NULL, new_val;
1777
1778           for (i = 0; i < n; i++)
1779             {
1780               tree arg = PHI_ARG_DEF (def, i);
1781               enum br_predictor predictor2;
1782
1783               /* If this PHI has itself as an argument, we cannot
1784                  determine the string length of this argument.  However,
1785                  if we can find an expected constant value for the other
1786                  PHI args then we can still be sure that this is
1787                  likely a constant.  So be optimistic and just
1788                  continue with the next argument.  */
1789               if (arg == PHI_RESULT (def))
1790                 continue;
1791
1792               new_val = expr_expected_value (arg, visited, &predictor2);
1793
1794               /* It is difficult to combine value predictors.  Simply assume
1795                  that later predictor is weaker and take its prediction.  */
1796               if (predictor && *predictor < predictor2)
1797                 *predictor = predictor2;
1798               if (!new_val)
1799                 return NULL;
1800               if (!val)
1801                 val = new_val;
1802               else if (!operand_equal_p (val, new_val, false))
1803                 return NULL;
1804             }
1805           return val;
1806         }
1807       if (is_gimple_assign (def))
1808         {
1809           if (gimple_assign_lhs (def) != op0)
1810             return NULL;
1811
1812           return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1813                                         gimple_assign_rhs1 (def),
1814                                         gimple_assign_rhs_code (def),
1815                                         gimple_assign_rhs2 (def),
1816                                         visited, predictor);
1817         }
1818
1819       if (is_gimple_call (def))
1820         {
1821           tree decl = gimple_call_fndecl (def);
1822           if (!decl)
1823             {
1824               if (gimple_call_internal_p (def)
1825                   && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1826                 {
1827                   gcc_assert (gimple_call_num_args (def) == 3);
1828                   tree val = gimple_call_arg (def, 0);
1829                   if (TREE_CONSTANT (val))
1830                     return val;
1831                   if (predictor)
1832                     {
1833                       tree val2 = gimple_call_arg (def, 2);
1834                       gcc_assert (TREE_CODE (val2) == INTEGER_CST
1835                                   && tree_fits_uhwi_p (val2)
1836                                   && tree_to_uhwi (val2) < END_PREDICTORS);
1837                       *predictor = (enum br_predictor) tree_to_uhwi (val2);
1838                     }
1839                   return gimple_call_arg (def, 1);
1840                 }
1841               return NULL;
1842             }
1843           if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1844             switch (DECL_FUNCTION_CODE (decl))
1845               {
1846               case BUILT_IN_EXPECT:
1847                 {
1848                   tree val;
1849                   if (gimple_call_num_args (def) != 2)
1850                     return NULL;
1851                   val = gimple_call_arg (def, 0);
1852                   if (TREE_CONSTANT (val))
1853                     return val;
1854                   if (predictor)
1855                     *predictor = PRED_BUILTIN_EXPECT;
1856                   return gimple_call_arg (def, 1);
1857                 }
1858
1859               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1860               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1861               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1862               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1863               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1864               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1865               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1866               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1867               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1868               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1869               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1870               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1871               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1872                 /* Assume that any given atomic operation has low contention,
1873                    and thus the compare-and-swap operation succeeds.  */
1874                 if (predictor)
1875                   *predictor = PRED_COMPARE_AND_SWAP;
1876                 return boolean_true_node;
1877               default:
1878                 break;
1879             }
1880         }
1881
1882       return NULL;
1883     }
1884
1885   if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1886     {
1887       tree res;
1888       enum br_predictor predictor2;
1889       op0 = expr_expected_value (op0, visited, predictor);
1890       if (!op0)
1891         return NULL;
1892       op1 = expr_expected_value (op1, visited, &predictor2);
1893       if (predictor && *predictor < predictor2)
1894         *predictor = predictor2;
1895       if (!op1)
1896         return NULL;
1897       res = fold_build2 (code, type, op0, op1);
1898       if (TREE_CONSTANT (res))
1899         return res;
1900       return NULL;
1901     }
1902   if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1903     {
1904       tree res;
1905       op0 = expr_expected_value (op0, visited, predictor);
1906       if (!op0)
1907         return NULL;
1908       res = fold_build1 (code, type, op0);
1909       if (TREE_CONSTANT (res))
1910         return res;
1911       return NULL;
1912     }
1913   return NULL;
1914 }
1915
1916 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1917    The function is used by builtin_expect branch predictor so the evidence
1918    must come from this construct and additional possible constant folding.
1919
1920    We may want to implement more involved value guess (such as value range
1921    propagation based prediction), but such tricks shall go to new
1922    implementation.  */
1923
1924 static tree
1925 expr_expected_value (tree expr, bitmap visited,
1926                      enum br_predictor *predictor)
1927 {
1928   enum tree_code code;
1929   tree op0, op1;
1930
1931   if (TREE_CONSTANT (expr))
1932     {
1933       if (predictor)
1934         *predictor = PRED_UNCONDITIONAL;
1935       return expr;
1936     }
1937
1938   extract_ops_from_tree (expr, &code, &op0, &op1);
1939   return expr_expected_value_1 (TREE_TYPE (expr),
1940                                 op0, code, op1, visited, predictor);
1941 }
1942 \f
1943 /* Predict using opcode of the last statement in basic block.  */
1944 static void
1945 tree_predict_by_opcode (basic_block bb)
1946 {
1947   gimple stmt = last_stmt (bb);
1948   edge then_edge;
1949   tree op0, op1;
1950   tree type;
1951   tree val;
1952   enum tree_code cmp;
1953   bitmap visited;
1954   edge_iterator ei;
1955   enum br_predictor predictor;
1956
1957   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1958     return;
1959   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1960     if (then_edge->flags & EDGE_TRUE_VALUE)
1961       break;
1962   op0 = gimple_cond_lhs (stmt);
1963   op1 = gimple_cond_rhs (stmt);
1964   cmp = gimple_cond_code (stmt);
1965   type = TREE_TYPE (op0);
1966   visited = BITMAP_ALLOC (NULL);
1967   val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
1968                                &predictor);
1969   BITMAP_FREE (visited);
1970   if (val && TREE_CODE (val) == INTEGER_CST)
1971     {
1972       if (predictor == PRED_BUILTIN_EXPECT)
1973         {
1974           int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
1975
1976           gcc_assert (percent >= 0 && percent <= 100);
1977           if (integer_zerop (val))
1978             percent = 100 - percent;
1979           predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
1980         }
1981       else
1982         predict_edge (then_edge, predictor,
1983                       integer_zerop (val) ? NOT_TAKEN : TAKEN);
1984     }
1985   /* Try "pointer heuristic."
1986      A comparison ptr == 0 is predicted as false.
1987      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1988   if (POINTER_TYPE_P (type))
1989     {
1990       if (cmp == EQ_EXPR)
1991         predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
1992       else if (cmp == NE_EXPR)
1993         predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
1994     }
1995   else
1996
1997   /* Try "opcode heuristic."
1998      EQ tests are usually false and NE tests are usually true. Also,
1999      most quantities are positive, so we can make the appropriate guesses
2000      about signed comparisons against zero.  */
2001     switch (cmp)
2002       {
2003       case EQ_EXPR:
2004       case UNEQ_EXPR:
2005         /* Floating point comparisons appears to behave in a very
2006            unpredictable way because of special role of = tests in
2007            FP code.  */
2008         if (FLOAT_TYPE_P (type))
2009           ;
2010         /* Comparisons with 0 are often used for booleans and there is
2011            nothing useful to predict about them.  */
2012         else if (integer_zerop (op0) || integer_zerop (op1))
2013           ;
2014         else
2015           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2016         break;
2017
2018       case NE_EXPR:
2019       case LTGT_EXPR:
2020         /* Floating point comparisons appears to behave in a very
2021            unpredictable way because of special role of = tests in
2022            FP code.  */
2023         if (FLOAT_TYPE_P (type))
2024           ;
2025         /* Comparisons with 0 are often used for booleans and there is
2026            nothing useful to predict about them.  */
2027         else if (integer_zerop (op0)
2028                  || integer_zerop (op1))
2029           ;
2030         else
2031           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2032         break;
2033
2034       case ORDERED_EXPR:
2035         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2036         break;
2037
2038       case UNORDERED_EXPR:
2039         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2040         break;
2041
2042       case LE_EXPR:
2043       case LT_EXPR:
2044         if (integer_zerop (op1)
2045             || integer_onep (op1)
2046             || integer_all_onesp (op1)
2047             || real_zerop (op1)
2048             || real_onep (op1)
2049             || real_minus_onep (op1))
2050           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2051         break;
2052
2053       case GE_EXPR:
2054       case GT_EXPR:
2055         if (integer_zerop (op1)
2056             || integer_onep (op1)
2057             || integer_all_onesp (op1)
2058             || real_zerop (op1)
2059             || real_onep (op1)
2060             || real_minus_onep (op1))
2061           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2062         break;
2063
2064       default:
2065         break;
2066       }
2067 }
2068
2069 /* Try to guess whether the value of return means error code.  */
2070
2071 static enum br_predictor
2072 return_prediction (tree val, enum prediction *prediction)
2073 {
2074   /* VOID.  */
2075   if (!val)
2076     return PRED_NO_PREDICTION;
2077   /* Different heuristics for pointers and scalars.  */
2078   if (POINTER_TYPE_P (TREE_TYPE (val)))
2079     {
2080       /* NULL is usually not returned.  */
2081       if (integer_zerop (val))
2082         {
2083           *prediction = NOT_TAKEN;
2084           return PRED_NULL_RETURN;
2085         }
2086     }
2087   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2088     {
2089       /* Negative return values are often used to indicate
2090          errors.  */
2091       if (TREE_CODE (val) == INTEGER_CST
2092           && tree_int_cst_sgn (val) < 0)
2093         {
2094           *prediction = NOT_TAKEN;
2095           return PRED_NEGATIVE_RETURN;
2096         }
2097       /* Constant return values seems to be commonly taken.
2098          Zero/one often represent booleans so exclude them from the
2099          heuristics.  */
2100       if (TREE_CONSTANT (val)
2101           && (!integer_zerop (val) && !integer_onep (val)))
2102         {
2103           *prediction = TAKEN;
2104           return PRED_CONST_RETURN;
2105         }
2106     }
2107   return PRED_NO_PREDICTION;
2108 }
2109
2110 /* Find the basic block with return expression and look up for possible
2111    return value trying to apply RETURN_PREDICTION heuristics.  */
2112 static void
2113 apply_return_prediction (void)
2114 {
2115   greturn *return_stmt = NULL;
2116   tree return_val;
2117   edge e;
2118   gphi *phi;
2119   int phi_num_args, i;
2120   enum br_predictor pred;
2121   enum prediction direction;
2122   edge_iterator ei;
2123
2124   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2125     {
2126       gimple last = last_stmt (e->src);
2127       if (last
2128           && gimple_code (last) == GIMPLE_RETURN)
2129         {
2130           return_stmt = as_a <greturn *> (last);
2131           break;
2132         }
2133     }
2134   if (!e)
2135     return;
2136   return_val = gimple_return_retval (return_stmt);
2137   if (!return_val)
2138     return;
2139   if (TREE_CODE (return_val) != SSA_NAME
2140       || !SSA_NAME_DEF_STMT (return_val)
2141       || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2142     return;
2143   phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val));
2144   phi_num_args = gimple_phi_num_args (phi);
2145   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2146
2147   /* Avoid the degenerate case where all return values form the function
2148      belongs to same category (ie they are all positive constants)
2149      so we can hardly say something about them.  */
2150   for (i = 1; i < phi_num_args; i++)
2151     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2152       break;
2153   if (i != phi_num_args)
2154     for (i = 0; i < phi_num_args; i++)
2155       {
2156         pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2157         if (pred != PRED_NO_PREDICTION)
2158           predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2159                                          direction);
2160       }
2161 }
2162
2163 /* Look for basic block that contains unlikely to happen events
2164    (such as noreturn calls) and mark all paths leading to execution
2165    of this basic blocks as unlikely.  */
2166
2167 static void
2168 tree_bb_level_predictions (void)
2169 {
2170   basic_block bb;
2171   bool has_return_edges = false;
2172   edge e;
2173   edge_iterator ei;
2174
2175   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2176     if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2177       {
2178         has_return_edges = true;
2179         break;
2180       }
2181
2182   apply_return_prediction ();
2183
2184   FOR_EACH_BB_FN (bb, cfun)
2185     {
2186       gimple_stmt_iterator gsi;
2187
2188       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2189         {
2190           gimple stmt = gsi_stmt (gsi);
2191           tree decl;
2192
2193           if (is_gimple_call (stmt))
2194             {
2195               if ((gimple_call_flags (stmt) & ECF_NORETURN)
2196                   && has_return_edges)
2197                 predict_paths_leading_to (bb, PRED_NORETURN,
2198                                           NOT_TAKEN);
2199               decl = gimple_call_fndecl (stmt);
2200               if (decl
2201                   && lookup_attribute ("cold",
2202                                        DECL_ATTRIBUTES (decl)))
2203                 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2204                                           NOT_TAKEN);
2205             }
2206           else if (gimple_code (stmt) == GIMPLE_PREDICT)
2207             {
2208               predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2209                                         gimple_predict_outcome (stmt));
2210               /* Keep GIMPLE_PREDICT around so early inlining will propagate
2211                  hints to callers.  */
2212             }
2213         }
2214     }
2215 }
2216
2217 #ifdef ENABLE_CHECKING
2218
2219 /* Callback for hash_map::traverse, asserts that the pointer map is
2220    empty.  */
2221
2222 bool
2223 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2224                  void *)
2225 {
2226   gcc_assert (!value);
2227   return false;
2228 }
2229 #endif
2230
2231 /* Predict branch probabilities and estimate profile for basic block BB.  */
2232
2233 static void
2234 tree_estimate_probability_bb (basic_block bb)
2235 {
2236   edge e;
2237   edge_iterator ei;
2238   gimple last;
2239
2240   FOR_EACH_EDGE (e, ei, bb->succs)
2241     {
2242       /* Predict edges to user labels with attributes.  */
2243       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2244         {
2245           gimple_stmt_iterator gi;
2246           for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2247             {
2248               glabel *label_stmt = dyn_cast <glabel *> (gsi_stmt (gi));
2249               tree decl;
2250
2251               if (!label_stmt)
2252                 break;
2253               decl = gimple_label_label (label_stmt);
2254               if (DECL_ARTIFICIAL (decl))
2255                 continue;
2256
2257               /* Finally, we have a user-defined label.  */
2258               if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2259                 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2260               else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2261                 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2262             }
2263         }
2264
2265       /* Predict early returns to be probable, as we've already taken
2266          care for error returns and other cases are often used for
2267          fast paths through function.
2268
2269          Since we've already removed the return statements, we are
2270          looking for CFG like:
2271
2272          if (conditional)
2273          {
2274          ..
2275          goto return_block
2276          }
2277          some other blocks
2278          return_block:
2279          return_stmt.  */
2280       if (e->dest != bb->next_bb
2281           && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2282           && single_succ_p (e->dest)
2283           && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2284           && (last = last_stmt (e->dest)) != NULL
2285           && gimple_code (last) == GIMPLE_RETURN)
2286         {
2287           edge e1;
2288           edge_iterator ei1;
2289
2290           if (single_succ_p (bb))
2291             {
2292               FOR_EACH_EDGE (e1, ei1, bb->preds)
2293                 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2294                     && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2295                     && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2296                   predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2297             }
2298           else
2299             if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2300                 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2301                 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2302               predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2303         }
2304
2305       /* Look for block we are guarding (ie we dominate it,
2306          but it doesn't postdominate us).  */
2307       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2308           && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2309           && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2310         {
2311           gimple_stmt_iterator bi;
2312
2313           /* The call heuristic claims that a guarded function call
2314              is improbable.  This is because such calls are often used
2315              to signal exceptional situations such as printing error
2316              messages.  */
2317           for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2318                gsi_next (&bi))
2319             {
2320               gimple stmt = gsi_stmt (bi);
2321               if (is_gimple_call (stmt)
2322                   /* Constant and pure calls are hardly used to signalize
2323                      something exceptional.  */
2324                   && gimple_has_side_effects (stmt))
2325                 {
2326                   predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2327                   break;
2328                 }
2329             }
2330         }
2331     }
2332   tree_predict_by_opcode (bb);
2333 }
2334
2335 /* Predict branch probabilities and estimate profile of the tree CFG.
2336    This function can be called from the loop optimizers to recompute
2337    the profile information.  */
2338
2339 void
2340 tree_estimate_probability (void)
2341 {
2342   basic_block bb;
2343
2344   add_noreturn_fake_exit_edges ();
2345   connect_infinite_loops_to_exit ();
2346   /* We use loop_niter_by_eval, which requires that the loops have
2347      preheaders.  */
2348   create_preheaders (CP_SIMPLE_PREHEADERS);
2349   calculate_dominance_info (CDI_POST_DOMINATORS);
2350
2351   bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2352   tree_bb_level_predictions ();
2353   record_loop_exits ();
2354
2355   if (number_of_loops (cfun) > 1)
2356     predict_loops ();
2357
2358   FOR_EACH_BB_FN (bb, cfun)
2359     tree_estimate_probability_bb (bb);
2360
2361   FOR_EACH_BB_FN (bb, cfun)
2362     combine_predictions_for_bb (bb);
2363
2364 #ifdef ENABLE_CHECKING
2365   bb_predictions->traverse<void *, assert_is_empty> (NULL);
2366 #endif
2367   delete bb_predictions;
2368   bb_predictions = NULL;
2369
2370   estimate_bb_frequencies (false);
2371   free_dominance_info (CDI_POST_DOMINATORS);
2372   remove_fake_exit_edges ();
2373 }
2374 \f
2375 /* Predict edges to successors of CUR whose sources are not postdominated by
2376    BB by PRED and recurse to all postdominators.  */
2377
2378 static void
2379 predict_paths_for_bb (basic_block cur, basic_block bb,
2380                       enum br_predictor pred,
2381                       enum prediction taken,
2382                       bitmap visited)
2383 {
2384   edge e;
2385   edge_iterator ei;
2386   basic_block son;
2387
2388   /* We are looking for all edges forming edge cut induced by
2389      set of all blocks postdominated by BB.  */
2390   FOR_EACH_EDGE (e, ei, cur->preds)
2391     if (e->src->index >= NUM_FIXED_BLOCKS
2392         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2393     {
2394       edge e2;
2395       edge_iterator ei2;
2396       bool found = false;
2397
2398       /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2399       if (e->flags & (EDGE_EH | EDGE_FAKE))
2400         continue;
2401       gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2402
2403       /* See if there is an edge from e->src that is not abnormal
2404          and does not lead to BB.  */
2405       FOR_EACH_EDGE (e2, ei2, e->src->succs)
2406         if (e2 != e
2407             && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2408             && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2409           {
2410             found = true;
2411             break;
2412           }
2413
2414       /* If there is non-abnormal path leaving e->src, predict edge
2415          using predictor.  Otherwise we need to look for paths
2416          leading to e->src.
2417
2418          The second may lead to infinite loop in the case we are predicitng
2419          regions that are only reachable by abnormal edges.  We simply
2420          prevent visiting given BB twice.  */
2421       if (found)
2422         predict_edge_def (e, pred, taken);
2423       else if (bitmap_set_bit (visited, e->src->index))
2424         predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2425     }
2426   for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2427        son;
2428        son = next_dom_son (CDI_POST_DOMINATORS, son))
2429     predict_paths_for_bb (son, bb, pred, taken, visited);
2430 }
2431
2432 /* Sets branch probabilities according to PREDiction and
2433    FLAGS.  */
2434
2435 static void
2436 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2437                           enum prediction taken)
2438 {
2439   bitmap visited = BITMAP_ALLOC (NULL);
2440   predict_paths_for_bb (bb, bb, pred, taken, visited);
2441   BITMAP_FREE (visited);
2442 }
2443
2444 /* Like predict_paths_leading_to but take edge instead of basic block.  */
2445
2446 static void
2447 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2448                                enum prediction taken)
2449 {
2450   bool has_nonloop_edge = false;
2451   edge_iterator ei;
2452   edge e2;
2453
2454   basic_block bb = e->src;
2455   FOR_EACH_EDGE (e2, ei, bb->succs)
2456     if (e2->dest != e->src && e2->dest != e->dest
2457         && !(e->flags & (EDGE_EH | EDGE_FAKE))
2458         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2459       {
2460         has_nonloop_edge = true;
2461         break;
2462       }
2463   if (!has_nonloop_edge)
2464     {
2465       bitmap visited = BITMAP_ALLOC (NULL);
2466       predict_paths_for_bb (bb, bb, pred, taken, visited);
2467       BITMAP_FREE (visited);
2468     }
2469   else
2470     predict_edge_def (e, pred, taken);
2471 }
2472 \f
2473 /* This is used to carry information about basic blocks.  It is
2474    attached to the AUX field of the standard CFG block.  */
2475
2476 struct block_info
2477 {
2478   /* Estimated frequency of execution of basic_block.  */
2479   sreal frequency;
2480
2481   /* To keep queue of basic blocks to process.  */
2482   basic_block next;
2483
2484   /* Number of predecessors we need to visit first.  */
2485   int npredecessors;
2486 };
2487
2488 /* Similar information for edges.  */
2489 struct edge_prob_info
2490 {
2491   /* In case edge is a loopback edge, the probability edge will be reached
2492      in case header is.  Estimated number of iterations of the loop can be
2493      then computed as 1 / (1 - back_edge_prob).  */
2494   sreal back_edge_prob;
2495   /* True if the edge is a loopback edge in the natural loop.  */
2496   unsigned int back_edge:1;
2497 };
2498
2499 #define BLOCK_INFO(B)   ((block_info *) (B)->aux)
2500 #undef EDGE_INFO
2501 #define EDGE_INFO(E)    ((edge_prob_info *) (E)->aux)
2502
2503 /* Helper function for estimate_bb_frequencies.
2504    Propagate the frequencies in blocks marked in
2505    TOVISIT, starting in HEAD.  */
2506
2507 static void
2508 propagate_freq (basic_block head, bitmap tovisit)
2509 {
2510   basic_block bb;
2511   basic_block last;
2512   unsigned i;
2513   edge e;
2514   basic_block nextbb;
2515   bitmap_iterator bi;
2516
2517   /* For each basic block we need to visit count number of his predecessors
2518      we need to visit first.  */
2519   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2520     {
2521       edge_iterator ei;
2522       int count = 0;
2523
2524       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2525
2526       FOR_EACH_EDGE (e, ei, bb->preds)
2527         {
2528           bool visit = bitmap_bit_p (tovisit, e->src->index);
2529
2530           if (visit && !(e->flags & EDGE_DFS_BACK))
2531             count++;
2532           else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2533             fprintf (dump_file,
2534                      "Irreducible region hit, ignoring edge to %i->%i\n",
2535                      e->src->index, bb->index);
2536         }
2537       BLOCK_INFO (bb)->npredecessors = count;
2538       /* When function never returns, we will never process exit block.  */
2539       if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2540         bb->count = bb->frequency = 0;
2541     }
2542
2543   BLOCK_INFO (head)->frequency = 1;
2544   last = head;
2545   for (bb = head; bb; bb = nextbb)
2546     {
2547       edge_iterator ei;
2548       sreal cyclic_probability = 0;
2549       sreal frequency = 0;
2550
2551       nextbb = BLOCK_INFO (bb)->next;
2552       BLOCK_INFO (bb)->next = NULL;
2553
2554       /* Compute frequency of basic block.  */
2555       if (bb != head)
2556         {
2557 #ifdef ENABLE_CHECKING
2558           FOR_EACH_EDGE (e, ei, bb->preds)
2559             gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2560                         || (e->flags & EDGE_DFS_BACK));
2561 #endif
2562
2563           FOR_EACH_EDGE (e, ei, bb->preds)
2564             if (EDGE_INFO (e)->back_edge)
2565               {
2566                 cyclic_probability += EDGE_INFO (e)->back_edge_prob;
2567               }
2568             else if (!(e->flags & EDGE_DFS_BACK))
2569               {
2570                 /*  frequency += (e->probability
2571                                   * BLOCK_INFO (e->src)->frequency /
2572                                   REG_BR_PROB_BASE);  */
2573
2574                 sreal tmp = e->probability;
2575                 tmp *= BLOCK_INFO (e->src)->frequency;
2576                 tmp *= real_inv_br_prob_base;
2577                 frequency += tmp;
2578               }
2579
2580           if (cyclic_probability == 0)
2581             {
2582               BLOCK_INFO (bb)->frequency = frequency;
2583             }
2584           else
2585             {
2586               if (cyclic_probability > real_almost_one)
2587                 cyclic_probability = real_almost_one;
2588
2589               /* BLOCK_INFO (bb)->frequency = frequency
2590                                               / (1 - cyclic_probability) */
2591
2592               cyclic_probability = sreal (1) - cyclic_probability;
2593               BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
2594             }
2595         }
2596
2597       bitmap_clear_bit (tovisit, bb->index);
2598
2599       e = find_edge (bb, head);
2600       if (e)
2601         {
2602           /* EDGE_INFO (e)->back_edge_prob
2603              = ((e->probability * BLOCK_INFO (bb)->frequency)
2604              / REG_BR_PROB_BASE); */
2605
2606           sreal tmp = e->probability;
2607           tmp *= BLOCK_INFO (bb)->frequency;
2608           EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
2609         }
2610
2611       /* Propagate to successor blocks.  */
2612       FOR_EACH_EDGE (e, ei, bb->succs)
2613         if (!(e->flags & EDGE_DFS_BACK)
2614             && BLOCK_INFO (e->dest)->npredecessors)
2615           {
2616             BLOCK_INFO (e->dest)->npredecessors--;
2617             if (!BLOCK_INFO (e->dest)->npredecessors)
2618               {
2619                 if (!nextbb)
2620                   nextbb = e->dest;
2621                 else
2622                   BLOCK_INFO (last)->next = e->dest;
2623
2624                 last = e->dest;
2625               }
2626           }
2627     }
2628 }
2629
2630 /* Estimate frequencies in loops at same nest level.  */
2631
2632 static void
2633 estimate_loops_at_level (struct loop *first_loop)
2634 {
2635   struct loop *loop;
2636
2637   for (loop = first_loop; loop; loop = loop->next)
2638     {
2639       edge e;
2640       basic_block *bbs;
2641       unsigned i;
2642       bitmap tovisit = BITMAP_ALLOC (NULL);
2643
2644       estimate_loops_at_level (loop->inner);
2645
2646       /* Find current loop back edge and mark it.  */
2647       e = loop_latch_edge (loop);
2648       EDGE_INFO (e)->back_edge = 1;
2649
2650       bbs = get_loop_body (loop);
2651       for (i = 0; i < loop->num_nodes; i++)
2652         bitmap_set_bit (tovisit, bbs[i]->index);
2653       free (bbs);
2654       propagate_freq (loop->header, tovisit);
2655       BITMAP_FREE (tovisit);
2656     }
2657 }
2658
2659 /* Propagates frequencies through structure of loops.  */
2660
2661 static void
2662 estimate_loops (void)
2663 {
2664   bitmap tovisit = BITMAP_ALLOC (NULL);
2665   basic_block bb;
2666
2667   /* Start by estimating the frequencies in the loops.  */
2668   if (number_of_loops (cfun) > 1)
2669     estimate_loops_at_level (current_loops->tree_root->inner);
2670
2671   /* Now propagate the frequencies through all the blocks.  */
2672   FOR_ALL_BB_FN (bb, cfun)
2673     {
2674       bitmap_set_bit (tovisit, bb->index);
2675     }
2676   propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2677   BITMAP_FREE (tovisit);
2678 }
2679
2680 /* Drop the profile for NODE to guessed, and update its frequency based on
2681    whether it is expected to be hot given the CALL_COUNT.  */
2682
2683 static void
2684 drop_profile (struct cgraph_node *node, gcov_type call_count)
2685 {
2686   struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2687   /* In the case where this was called by another function with a
2688      dropped profile, call_count will be 0. Since there are no
2689      non-zero call counts to this function, we don't know for sure
2690      whether it is hot, and therefore it will be marked normal below.  */
2691   bool hot = maybe_hot_count_p (NULL, call_count);
2692
2693   if (dump_file)
2694     fprintf (dump_file,
2695              "Dropping 0 profile for %s/%i. %s based on calls.\n",
2696              node->name (), node->order,
2697              hot ? "Function is hot" : "Function is normal");
2698   /* We only expect to miss profiles for functions that are reached
2699      via non-zero call edges in cases where the function may have
2700      been linked from another module or library (COMDATs and extern
2701      templates). See the comments below for handle_missing_profiles.
2702      Also, only warn in cases where the missing counts exceed the
2703      number of training runs. In certain cases with an execv followed
2704      by a no-return call the profile for the no-return call is not
2705      dumped and there can be a mismatch.  */
2706   if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2707       && call_count > profile_info->runs)
2708     {
2709       if (flag_profile_correction)
2710         {
2711           if (dump_file)
2712             fprintf (dump_file,
2713                      "Missing counts for called function %s/%i\n",
2714                      node->name (), node->order);
2715         }
2716       else
2717         warning (0, "Missing counts for called function %s/%i",
2718                  node->name (), node->order);
2719     }
2720
2721   profile_status_for_fn (fn)
2722       = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2723   node->frequency
2724       = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2725 }
2726
2727 /* In the case of COMDAT routines, multiple object files will contain the same
2728    function and the linker will select one for the binary. In that case
2729    all the other copies from the profile instrument binary will be missing
2730    profile counts. Look for cases where this happened, due to non-zero
2731    call counts going to 0-count functions, and drop the profile to guessed
2732    so that we can use the estimated probabilities and avoid optimizing only
2733    for size.
2734
2735    The other case where the profile may be missing is when the routine
2736    is not going to be emitted to the object file, e.g. for "extern template"
2737    class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2738    all other cases of non-zero calls to 0-count functions.  */
2739
2740 void
2741 handle_missing_profiles (void)
2742 {
2743   struct cgraph_node *node;
2744   int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2745   vec<struct cgraph_node *> worklist;
2746   worklist.create (64);
2747
2748   /* See if 0 count function has non-0 count callers.  In this case we
2749      lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2750   FOR_EACH_DEFINED_FUNCTION (node)
2751     {
2752       struct cgraph_edge *e;
2753       gcov_type call_count = 0;
2754       gcov_type max_tp_first_run = 0;
2755       struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2756
2757       if (node->count)
2758         continue;
2759       for (e = node->callers; e; e = e->next_caller)
2760       {
2761         call_count += e->count;
2762
2763         if (e->caller->tp_first_run > max_tp_first_run)
2764           max_tp_first_run = e->caller->tp_first_run;
2765       }
2766
2767       /* If time profile is missing, let assign the maximum that comes from
2768          caller functions.  */
2769       if (!node->tp_first_run && max_tp_first_run)
2770         node->tp_first_run = max_tp_first_run + 1;
2771
2772       if (call_count
2773           && fn && fn->cfg
2774           && (call_count * unlikely_count_fraction >= profile_info->runs))
2775         {
2776           drop_profile (node, call_count);
2777           worklist.safe_push (node);
2778         }
2779     }
2780
2781   /* Propagate the profile dropping to other 0-count COMDATs that are
2782      potentially called by COMDATs we already dropped the profile on.  */
2783   while (worklist.length () > 0)
2784     {
2785       struct cgraph_edge *e;
2786
2787       node = worklist.pop ();
2788       for (e = node->callees; e; e = e->next_caller)
2789         {
2790           struct cgraph_node *callee = e->callee;
2791           struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2792
2793           if (callee->count > 0)
2794             continue;
2795           if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2796               && profile_status_for_fn (fn) == PROFILE_READ)
2797             {
2798               drop_profile (node, 0);
2799               worklist.safe_push (callee);
2800             }
2801         }
2802     }
2803   worklist.release ();
2804 }
2805
2806 /* Convert counts measured by profile driven feedback to frequencies.
2807    Return nonzero iff there was any nonzero execution count.  */
2808
2809 int
2810 counts_to_freqs (void)
2811 {
2812   gcov_type count_max, true_count_max = 0;
2813   basic_block bb;
2814
2815   /* Don't overwrite the estimated frequencies when the profile for
2816      the function is missing.  We may drop this function PROFILE_GUESSED
2817      later in drop_profile ().  */
2818   if (!flag_auto_profile && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2819     return 0;
2820
2821   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2822     true_count_max = MAX (bb->count, true_count_max);
2823
2824   count_max = MAX (true_count_max, 1);
2825   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2826     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2827
2828   return true_count_max;
2829 }
2830
2831 /* Return true if function is likely to be expensive, so there is no point to
2832    optimize performance of prologue, epilogue or do inlining at the expense
2833    of code size growth.  THRESHOLD is the limit of number of instructions
2834    function can execute at average to be still considered not expensive.  */
2835
2836 bool
2837 expensive_function_p (int threshold)
2838 {
2839   unsigned int sum = 0;
2840   basic_block bb;
2841   unsigned int limit;
2842
2843   /* We can not compute accurately for large thresholds due to scaled
2844      frequencies.  */
2845   gcc_assert (threshold <= BB_FREQ_MAX);
2846
2847   /* Frequencies are out of range.  This either means that function contains
2848      internal loop executing more than BB_FREQ_MAX times or profile feedback
2849      is available and function has not been executed at all.  */
2850   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2851     return true;
2852
2853   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2854   limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2855   FOR_EACH_BB_FN (bb, cfun)
2856     {
2857       rtx_insn *insn;
2858
2859       FOR_BB_INSNS (bb, insn)
2860         if (active_insn_p (insn))
2861           {
2862             sum += bb->frequency;
2863             if (sum > limit)
2864               return true;
2865         }
2866     }
2867
2868   return false;
2869 }
2870
2871 /* Estimate and propagate basic block frequencies using the given branch
2872    probabilities.  If FORCE is true, the frequencies are used to estimate
2873    the counts even when there are already non-zero profile counts.  */
2874
2875 void
2876 estimate_bb_frequencies (bool force)
2877 {
2878   basic_block bb;
2879   sreal freq_max;
2880
2881   if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
2882     {
2883       static int real_values_initialized = 0;
2884
2885       if (!real_values_initialized)
2886         {
2887           real_values_initialized = 1;
2888           real_br_prob_base = REG_BR_PROB_BASE;
2889           real_bb_freq_max = BB_FREQ_MAX;
2890           real_one_half = sreal (1, -1);
2891           real_inv_br_prob_base = sreal (1) / real_br_prob_base;
2892           real_almost_one = sreal (1) - real_inv_br_prob_base;
2893         }
2894
2895       mark_dfs_back_edges ();
2896
2897       single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2898          REG_BR_PROB_BASE;
2899
2900       /* Set up block info for each basic block.  */
2901       alloc_aux_for_blocks (sizeof (block_info));
2902       alloc_aux_for_edges (sizeof (edge_prob_info));
2903       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2904         {
2905           edge e;
2906           edge_iterator ei;
2907
2908           FOR_EACH_EDGE (e, ei, bb->succs)
2909             {
2910               EDGE_INFO (e)->back_edge_prob = e->probability;
2911               EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
2912             }
2913         }
2914
2915       /* First compute frequencies locally for each loop from innermost
2916          to outermost to examine frequencies for back edges.  */
2917       estimate_loops ();
2918
2919       freq_max = 0;
2920       FOR_EACH_BB_FN (bb, cfun)
2921         if (freq_max < BLOCK_INFO (bb)->frequency)
2922           freq_max = BLOCK_INFO (bb)->frequency;
2923
2924       freq_max = real_bb_freq_max / freq_max;
2925       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2926         {
2927           sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
2928           bb->frequency = tmp.to_int ();
2929         }
2930
2931       free_aux_for_blocks ();
2932       free_aux_for_edges ();
2933     }
2934   compute_function_frequency ();
2935 }
2936
2937 /* Decide whether function is hot, cold or unlikely executed.  */
2938 void
2939 compute_function_frequency (void)
2940 {
2941   basic_block bb;
2942   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2943
2944   if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2945       || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2946     node->only_called_at_startup = true;
2947   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2948     node->only_called_at_exit = true;
2949
2950   if (profile_status_for_fn (cfun) != PROFILE_READ)
2951     {
2952       int flags = flags_from_decl_or_type (current_function_decl);
2953       if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2954           != NULL)
2955         node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2956       else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2957                != NULL)
2958         node->frequency = NODE_FREQUENCY_HOT;
2959       else if (flags & ECF_NORETURN)
2960         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2961       else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
2962         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2963       else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2964                || DECL_STATIC_DESTRUCTOR (current_function_decl))
2965         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2966       return;
2967     }
2968
2969   /* Only first time try to drop function into unlikely executed.
2970      After inlining the roundoff errors may confuse us.
2971      Ipa-profile pass will drop functions only called from unlikely
2972      functions to unlikely and that is most of what we care about.  */
2973   if (!cfun->after_inlining)
2974     node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2975   FOR_EACH_BB_FN (bb, cfun)
2976     {
2977       if (maybe_hot_bb_p (cfun, bb))
2978         {
2979           node->frequency = NODE_FREQUENCY_HOT;
2980           return;
2981         }
2982       if (!probably_never_executed_bb_p (cfun, bb))
2983         node->frequency = NODE_FREQUENCY_NORMAL;
2984     }
2985 }
2986
2987 /* Build PREDICT_EXPR.  */
2988 tree
2989 build_predict_expr (enum br_predictor predictor, enum prediction taken)
2990 {
2991   tree t = build1 (PREDICT_EXPR, void_type_node,
2992                    build_int_cst (integer_type_node, predictor));
2993   SET_PREDICT_EXPR_OUTCOME (t, taken);
2994   return t;
2995 }
2996
2997 const char *
2998 predictor_name (enum br_predictor predictor)
2999 {
3000   return predictor_info[predictor].name;
3001 }
3002
3003 /* Predict branch probabilities and estimate profile of the tree CFG. */
3004
3005 namespace {
3006
3007 const pass_data pass_data_profile =
3008 {
3009   GIMPLE_PASS, /* type */
3010   "profile_estimate", /* name */
3011   OPTGROUP_NONE, /* optinfo_flags */
3012   TV_BRANCH_PROB, /* tv_id */
3013   PROP_cfg, /* properties_required */
3014   0, /* properties_provided */
3015   0, /* properties_destroyed */
3016   0, /* todo_flags_start */
3017   0, /* todo_flags_finish */
3018 };
3019
3020 class pass_profile : public gimple_opt_pass
3021 {
3022 public:
3023   pass_profile (gcc::context *ctxt)
3024     : gimple_opt_pass (pass_data_profile, ctxt)
3025   {}
3026
3027   /* opt_pass methods: */
3028   virtual bool gate (function *) { return flag_guess_branch_prob; }
3029   virtual unsigned int execute (function *);
3030
3031 }; // class pass_profile
3032
3033 unsigned int
3034 pass_profile::execute (function *fun)
3035 {
3036   unsigned nb_loops;
3037
3038   if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
3039     return 0;
3040
3041   loop_optimizer_init (LOOPS_NORMAL);
3042   if (dump_file && (dump_flags & TDF_DETAILS))
3043     flow_loops_dump (dump_file, NULL, 0);
3044
3045   mark_irreducible_loops ();
3046
3047   nb_loops = number_of_loops (fun);
3048   if (nb_loops > 1)
3049     scev_initialize ();
3050
3051   tree_estimate_probability ();
3052
3053   if (nb_loops > 1)
3054     scev_finalize ();
3055
3056   loop_optimizer_finalize ();
3057   if (dump_file && (dump_flags & TDF_DETAILS))
3058     gimple_dump_cfg (dump_file, dump_flags);
3059  if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3060     profile_status_for_fn (fun) = PROFILE_GUESSED;
3061   return 0;
3062 }
3063
3064 } // anon namespace
3065
3066 gimple_opt_pass *
3067 make_pass_profile (gcc::context *ctxt)
3068 {
3069   return new pass_profile (ctxt);
3070 }
3071
3072 namespace {
3073
3074 const pass_data pass_data_strip_predict_hints =
3075 {
3076   GIMPLE_PASS, /* type */
3077   "*strip_predict_hints", /* name */
3078   OPTGROUP_NONE, /* optinfo_flags */
3079   TV_BRANCH_PROB, /* tv_id */
3080   PROP_cfg, /* properties_required */
3081   0, /* properties_provided */
3082   0, /* properties_destroyed */
3083   0, /* todo_flags_start */
3084   0, /* todo_flags_finish */
3085 };
3086
3087 class pass_strip_predict_hints : public gimple_opt_pass
3088 {
3089 public:
3090   pass_strip_predict_hints (gcc::context *ctxt)
3091     : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3092   {}
3093
3094   /* opt_pass methods: */
3095   opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3096   virtual unsigned int execute (function *);
3097
3098 }; // class pass_strip_predict_hints
3099
3100 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3101    we no longer need.  */
3102 unsigned int
3103 pass_strip_predict_hints::execute (function *fun)
3104 {
3105   basic_block bb;
3106   gimple ass_stmt;
3107   tree var;
3108
3109   FOR_EACH_BB_FN (bb, fun)
3110     {
3111       gimple_stmt_iterator bi;
3112       for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3113         {
3114           gimple stmt = gsi_stmt (bi);
3115
3116           if (gimple_code (stmt) == GIMPLE_PREDICT)
3117             {
3118               gsi_remove (&bi, true);
3119               continue;
3120             }
3121           else if (is_gimple_call (stmt))
3122             {
3123               tree fndecl = gimple_call_fndecl (stmt);
3124
3125               if ((fndecl
3126                    && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3127                    && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3128                    && gimple_call_num_args (stmt) == 2)
3129                   || (gimple_call_internal_p (stmt)
3130                       && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3131                 {
3132                   var = gimple_call_lhs (stmt);
3133                   if (var)
3134                     {
3135                       ass_stmt
3136                         = gimple_build_assign (var, gimple_call_arg (stmt, 0));
3137                       gsi_replace (&bi, ass_stmt, true);
3138                     }
3139                   else
3140                     {
3141                       gsi_remove (&bi, true);
3142                       continue;
3143                     }
3144                 }
3145             }
3146           gsi_next (&bi);
3147         }
3148     }
3149   return 0;
3150 }
3151
3152 } // anon namespace
3153
3154 gimple_opt_pass *
3155 make_pass_strip_predict_hints (gcc::context *ctxt)
3156 {
3157   return new pass_strip_predict_hints (ctxt);
3158 }
3159
3160 /* Rebuild function frequencies.  Passes are in general expected to
3161    maintain profile by hand, however in some cases this is not possible:
3162    for example when inlining several functions with loops freuqencies might run
3163    out of scale and thus needs to be recomputed.  */
3164
3165 void
3166 rebuild_frequencies (void)
3167 {
3168   timevar_push (TV_REBUILD_FREQUENCIES);
3169
3170   /* When the max bb count in the function is small, there is a higher
3171      chance that there were truncation errors in the integer scaling
3172      of counts by inlining and other optimizations. This could lead
3173      to incorrect classification of code as being cold when it isn't.
3174      In that case, force the estimation of bb counts/frequencies from the
3175      branch probabilities, rather than computing frequencies from counts,
3176      which may also lead to frequencies incorrectly reduced to 0. There
3177      is less precision in the probabilities, so we only do this for small
3178      max counts.  */
3179   gcov_type count_max = 0;
3180   basic_block bb;
3181   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3182     count_max = MAX (bb->count, count_max);
3183
3184   if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3185       || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
3186           && count_max < REG_BR_PROB_BASE/10))
3187     {
3188       loop_optimizer_init (0);
3189       add_noreturn_fake_exit_edges ();
3190       mark_irreducible_loops ();
3191       connect_infinite_loops_to_exit ();
3192       estimate_bb_frequencies (true);
3193       remove_fake_exit_edges ();
3194       loop_optimizer_finalize ();
3195     }
3196   else if (profile_status_for_fn (cfun) == PROFILE_READ)
3197     counts_to_freqs ();
3198   else
3199     gcc_unreachable ();
3200   timevar_pop (TV_REBUILD_FREQUENCIES);
3201 }