gcc/predict.c

   1 /* Branch prediction routines for the GNU compiler.
   2    Copyright (C) 2000-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* References:
  21
  22    [1] "Branch Prediction for Free"
  23        Ball and Larus; PLDI '93.
  24    [2] "Static Branch Frequency and Program Profile Analysis"
  25        Wu and Larus; MICRO-27.
  26    [3] "Corpus-based Static Branch Prediction"
  27        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
  28
  29
  30 #include "config.h"
  31 #include "system.h"
  32 #include "coretypes.h"
  33 #include "tm.h"
  34 #include "hash-set.h"
  35 #include "machmode.h"
  36 #include "vec.h"
  37 #include "double-int.h"
  38 #include "input.h"
  39 #include "alias.h"
  40 #include "symtab.h"
  41 #include "wide-int.h"
  42 #include "inchash.h"
  43 #include "tree.h"
  44 #include "fold-const.h"
  45 #include "calls.h"
  46 #include "rtl.h"
  47 #include "tm_p.h"
  48 #include "hard-reg-set.h"
  49 #include "predict.h"
  50 #include "function.h"
  51 #include "dominance.h"
  52 #include "cfg.h"
  53 #include "cfganal.h"
  54 #include "basic-block.h"
  55 #include "insn-config.h"
  56 #include "regs.h"
  57 #include "flags.h"
  58 #include "profile.h"
  59 #include "except.h"
  60 #include "diagnostic-core.h"
  61 #include "recog.h"
  62 #include "hashtab.h"
  63 #include "statistics.h"
  64 #include "real.h"
  65 #include "fixed-value.h"
  66 #include "expmed.h"
  67 #include "dojump.h"
  68 #include "explow.h"
  69 #include "emit-rtl.h"
  70 #include "varasm.h"
  71 #include "stmt.h"
  72 #include "expr.h"
  73 #include "coverage.h"
  74 #include "sreal.h"
  75 #include "params.h"
  76 #include "target.h"
  77 #include "cfgloop.h"
  78 #include "hash-map.h"
  79 #include "tree-ssa-alias.h"
  80 #include "internal-fn.h"
  81 #include "gimple-expr.h"
  82 #include "is-a.h"
  83 #include "gimple.h"
  84 #include "gimple-iterator.h"
  85 #include "gimple-ssa.h"
  86 #include "plugin-api.h"
  87 #include "ipa-ref.h"
  88 #include "cgraph.h"
  89 #include "tree-cfg.h"
  90 #include "tree-phinodes.h"
  91 #include "ssa-iterators.h"
  92 #include "tree-ssa-loop-niter.h"
  93 #include "tree-ssa-loop.h"
  94 #include "tree-pass.h"
  95 #include "tree-scalar-evolution.h"
  96
  97 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
  98                    1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
  99 static sreal real_almost_one, real_br_prob_base,
 100              real_inv_br_prob_base, real_one_half, real_bb_freq_max;
 101
 102 static void combine_predictions_for_insn (rtx_insn *, basic_block);
 103 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
 104 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
 105 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
 106 static bool can_predict_insn_p (const rtx_insn *);
 107
 108 /* Information we hold about each branch predictor.
 109    Filled using information from predict.def.  */
 110
 111 struct predictor_info
 112 {
 113   const char *const name;       /* Name used in the debugging dumps.  */
 114   const int hitrate;            /* Expected hitrate used by
 115                                    predict_insn_def call.  */
 116   const int flags;
 117 };
 118
 119 /* Use given predictor without Dempster-Shaffer theory if it matches
 120    using first_match heuristics.  */
 121 #define PRED_FLAG_FIRST_MATCH 1
 122
 123 /* Recompute hitrate in percent to our representation.  */
 124
 125 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
 126
 127 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
 128 static const struct predictor_info predictor_info[]= {
 129 #include "predict.def"
 130
 131   /* Upper bound on predictors.  */
 132   {NULL, 0, 0}
 133 };
 134 #undef DEF_PREDICTOR
 135
 136 /* Return TRUE if frequency FREQ is considered to be hot.  */
 137
 138 static inline bool
 139 maybe_hot_frequency_p (struct function *fun, int freq)
 140 {
 141   struct cgraph_node *node = cgraph_node::get (fun->decl);
 142   if (!profile_info
 143       || !opt_for_fn (fun->decl, flag_branch_probabilities))
 144     {
 145       if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
 146         return false;
 147       if (node->frequency == NODE_FREQUENCY_HOT)
 148         return true;
 149     }
 150   if (profile_status_for_fn (fun) == PROFILE_ABSENT)
 151     return true;
 152   if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 153       && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
 154     return false;
 155   if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
 156     return false;
 157   if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
 158               / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 159     return false;
 160   return true;
 161 }
 162
 163 static gcov_type min_count = -1;
 164
 165 /* Determine the threshold for hot BB counts.  */
 166
 167 gcov_type
 168 get_hot_bb_threshold ()
 169 {
 170   gcov_working_set_t *ws;
 171   if (min_count == -1)
 172     {
 173       ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
 174       gcc_assert (ws);
 175       min_count = ws->min_counter;
 176     }
 177   return min_count;
 178 }
 179
 180 /* Set the threshold for hot BB counts.  */
 181
 182 void
 183 set_hot_bb_threshold (gcov_type min)
 184 {
 185   min_count = min;
 186 }
 187
 188 /* Return TRUE if frequency FREQ is considered to be hot.  */
 189
 190 bool
 191 maybe_hot_count_p (struct function *fun, gcov_type count)
 192 {
 193   if (fun && profile_status_for_fn (fun) != PROFILE_READ)
 194     return true;
 195   /* Code executed at most once is not hot.  */
 196   if (profile_info->runs >= count)
 197     return false;
 198   return (count >= get_hot_bb_threshold ());
 199 }
 200
 201 /* Return true in case BB can be CPU intensive and should be optimized
 202    for maximal performance.  */
 203
 204 bool
 205 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
 206 {
 207   gcc_checking_assert (fun);
 208   if (profile_status_for_fn (fun) == PROFILE_READ)
 209     return maybe_hot_count_p (fun, bb->count);
 210   return maybe_hot_frequency_p (fun, bb->frequency);
 211 }
 212
 213 /* Return true in case BB can be CPU intensive and should be optimized
 214    for maximal performance.  */
 215
 216 bool
 217 maybe_hot_edge_p (edge e)
 218 {
 219   if (profile_status_for_fn (cfun) == PROFILE_READ)
 220     return maybe_hot_count_p (cfun, e->count);
 221   return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
 222 }
 223
 224 /* Return true if profile COUNT and FREQUENCY, or function FUN static
 225    node frequency reflects never being executed.  */
 226
 227 static bool
 228 probably_never_executed (struct function *fun,
 229                          gcov_type count, int frequency)
 230 {
 231   gcc_checking_assert (fun);
 232   if (profile_status_for_fn (fun) == PROFILE_READ)
 233     {
 234       int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
 235       if (count * unlikely_count_fraction >= profile_info->runs)
 236         return false;
 237       if (!frequency)
 238         return true;
 239       if (!ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency)
 240         return false;
 241       if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count)
 242         {
 243           gcov_type computed_count;
 244           /* Check for possibility of overflow, in which case entry bb count
 245              is large enough to do the division first without losing much
 246              precision.  */
 247           if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count < REG_BR_PROB_BASE *
 248               REG_BR_PROB_BASE)
 249             {
 250               gcov_type scaled_count
 251                   = frequency * ENTRY_BLOCK_PTR_FOR_FN (fun)->count *
 252              unlikely_count_fraction;
 253               computed_count = RDIV (scaled_count,
 254                                      ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
 255             }
 256           else
 257             {
 258               computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (fun)->count,
 259                                      ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
 260               computed_count *= frequency * unlikely_count_fraction;
 261             }
 262           if (computed_count >= profile_info->runs)
 263             return false;
 264         }
 265       return true;
 266     }
 267   if ((!profile_info || !(opt_for_fn (fun->decl, flag_branch_probabilities)))
 268       && (cgraph_node::get (fun->decl)->frequency
 269           == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 270     return true;
 271   return false;
 272 }
 273
 274
 275 /* Return true in case BB is probably never executed.  */
 276
 277 bool
 278 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
 279 {
 280   return probably_never_executed (fun, bb->count, bb->frequency);
 281 }
 282
 283
 284 /* Return true in case edge E is probably never executed.  */
 285
 286 bool
 287 probably_never_executed_edge_p (struct function *fun, edge e)
 288 {
 289   return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
 290 }
 291
 292 /* Return true when current function should always be optimized for size.  */
 293
 294 bool
 295 optimize_function_for_size_p (struct function *fun)
 296 {
 297   if (!fun || !fun->decl)
 298     return optimize_size;
 299   cgraph_node *n = cgraph_node::get (fun->decl);
 300   return n && n->optimize_for_size_p ();
 301 }
 302
 303 /* Return true when current function should always be optimized for speed.  */
 304
 305 bool
 306 optimize_function_for_speed_p (struct function *fun)
 307 {
 308   return !optimize_function_for_size_p (fun);
 309 }
 310
 311 /* Return TRUE when BB should be optimized for size.  */
 312
 313 bool
 314 optimize_bb_for_size_p (const_basic_block bb)
 315 {
 316   return (optimize_function_for_size_p (cfun)
 317           || (bb && !maybe_hot_bb_p (cfun, bb)));
 318 }
 319
 320 /* Return TRUE when BB should be optimized for speed.  */
 321
 322 bool
 323 optimize_bb_for_speed_p (const_basic_block bb)
 324 {
 325   return !optimize_bb_for_size_p (bb);
 326 }
 327
 328 /* Return TRUE when BB should be optimized for size.  */
 329
 330 bool
 331 optimize_edge_for_size_p (edge e)
 332 {
 333   return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
 334 }
 335
 336 /* Return TRUE when BB should be optimized for speed.  */
 337
 338 bool
 339 optimize_edge_for_speed_p (edge e)
 340 {
 341   return !optimize_edge_for_size_p (e);
 342 }
 343
 344 /* Return TRUE when BB should be optimized for size.  */
 345
 346 bool
 347 optimize_insn_for_size_p (void)
 348 {
 349   return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
 350 }
 351
 352 /* Return TRUE when BB should be optimized for speed.  */
 353
 354 bool
 355 optimize_insn_for_speed_p (void)
 356 {
 357   return !optimize_insn_for_size_p ();
 358 }
 359
 360 /* Return TRUE when LOOP should be optimized for size.  */
 361
 362 bool
 363 optimize_loop_for_size_p (struct loop *loop)
 364 {
 365   return optimize_bb_for_size_p (loop->header);
 366 }
 367
 368 /* Return TRUE when LOOP should be optimized for speed.  */
 369
 370 bool
 371 optimize_loop_for_speed_p (struct loop *loop)
 372 {
 373   return optimize_bb_for_speed_p (loop->header);
 374 }
 375
 376 /* Return TRUE when LOOP nest should be optimized for speed.  */
 377
 378 bool
 379 optimize_loop_nest_for_speed_p (struct loop *loop)
 380 {
 381   struct loop *l = loop;
 382   if (optimize_loop_for_speed_p (loop))
 383     return true;
 384   l = loop->inner;
 385   while (l && l != loop)
 386     {
 387       if (optimize_loop_for_speed_p (l))
 388         return true;
 389       if (l->inner)
 390         l = l->inner;
 391       else if (l->next)
 392         l = l->next;
 393       else
 394         {
 395           while (l != loop && !l->next)
 396             l = loop_outer (l);
 397           if (l != loop)
 398             l = l->next;
 399         }
 400     }
 401   return false;
 402 }
 403
 404 /* Return TRUE when LOOP nest should be optimized for size.  */
 405
 406 bool
 407 optimize_loop_nest_for_size_p (struct loop *loop)
 408 {
 409   return !optimize_loop_nest_for_speed_p (loop);
 410 }
 411
 412 /* Return true when edge E is likely to be well predictable by branch
 413    predictor.  */
 414
 415 bool
 416 predictable_edge_p (edge e)
 417 {
 418   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
 419     return false;
 420   if ((e->probability
 421        <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
 422       || (REG_BR_PROB_BASE - e->probability
 423           <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
 424     return true;
 425   return false;
 426 }
 427
 428
 429 /* Set RTL expansion for BB profile.  */
 430
 431 void
 432 rtl_profile_for_bb (basic_block bb)
 433 {
 434   crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
 435 }
 436
 437 /* Set RTL expansion for edge profile.  */
 438
 439 void
 440 rtl_profile_for_edge (edge e)
 441 {
 442   crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
 443 }
 444
 445 /* Set RTL expansion to default mode (i.e. when profile info is not known).  */
 446 void
 447 default_rtl_profile (void)
 448 {
 449   crtl->maybe_hot_insn_p = true;
 450 }
 451
 452 /* Return true if the one of outgoing edges is already predicted by
 453    PREDICTOR.  */
 454
 455 bool
 456 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 457 {
 458   rtx note;
 459   if (!INSN_P (BB_END (bb)))
 460     return false;
 461   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
 462     if (REG_NOTE_KIND (note) == REG_BR_PRED
 463         && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
 464       return true;
 465   return false;
 466 }
 467
 468 /*  Structure representing predictions in tree level. */
 469
 470 struct edge_prediction {
 471     struct edge_prediction *ep_next;
 472     edge ep_edge;
 473     enum br_predictor ep_predictor;
 474     int ep_probability;
 475 };
 476
 477 /* This map contains for a basic block the list of predictions for the
 478    outgoing edges.  */
 479
 480 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
 481
 482 /* Return true if the one of outgoing edges is already predicted by
 483    PREDICTOR.  */
 484
 485 bool
 486 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 487 {
 488   struct edge_prediction *i;
 489   edge_prediction **preds = bb_predictions->get (bb);
 490
 491   if (!preds)
 492     return false;
 493
 494   for (i = *preds; i; i = i->ep_next)
 495     if (i->ep_predictor == predictor)
 496       return true;
 497   return false;
 498 }
 499
 500 /* Return true when the probability of edge is reliable.
 501
 502    The profile guessing code is good at predicting branch outcome (ie.
 503    taken/not taken), that is predicted right slightly over 75% of time.
 504    It is however notoriously poor on predicting the probability itself.
 505    In general the profile appear a lot flatter (with probabilities closer
 506    to 50%) than the reality so it is bad idea to use it to drive optimization
 507    such as those disabling dynamic branch prediction for well predictable
 508    branches.
 509
 510    There are two exceptions - edges leading to noreturn edges and edges
 511    predicted by number of iterations heuristics are predicted well.  This macro
 512    should be able to distinguish those, but at the moment it simply check for
 513    noreturn heuristic that is only one giving probability over 99% or bellow
 514    1%.  In future we might want to propagate reliability information across the
 515    CFG if we find this information useful on multiple places.   */
 516 static bool
 517 probability_reliable_p (int prob)
 518 {
 519   return (profile_status_for_fn (cfun) == PROFILE_READ
 520           || (profile_status_for_fn (cfun) == PROFILE_GUESSED
 521               && (prob <= HITRATE (1) || prob >= HITRATE (99))));
 522 }
 523
 524 /* Same predicate as above, working on edges.  */
 525 bool
 526 edge_probability_reliable_p (const_edge e)
 527 {
 528   return probability_reliable_p (e->probability);
 529 }
 530
 531 /* Same predicate as edge_probability_reliable_p, working on notes.  */
 532 bool
 533 br_prob_note_reliable_p (const_rtx note)
 534 {
 535   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
 536   return probability_reliable_p (XINT (note, 0));
 537 }
 538
 539 static void
 540 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
 541 {
 542   gcc_assert (any_condjump_p (insn));
 543   if (!flag_guess_branch_prob)
 544     return;
 545
 546   add_reg_note (insn, REG_BR_PRED,
 547                 gen_rtx_CONCAT (VOIDmode,
 548                                 GEN_INT ((int) predictor),
 549                                 GEN_INT ((int) probability)));
 550 }
 551
 552 /* Predict insn by given predictor.  */
 553
 554 void
 555 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
 556                   enum prediction taken)
 557 {
 558    int probability = predictor_info[(int) predictor].hitrate;
 559
 560    if (taken != TAKEN)
 561      probability = REG_BR_PROB_BASE - probability;
 562
 563    predict_insn (insn, predictor, probability);
 564 }
 565
 566 /* Predict edge E with given probability if possible.  */
 567
 568 void
 569 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
 570 {
 571   rtx_insn *last_insn;
 572   last_insn = BB_END (e->src);
 573
 574   /* We can store the branch prediction information only about
 575      conditional jumps.  */
 576   if (!any_condjump_p (last_insn))
 577     return;
 578
 579   /* We always store probability of branching.  */
 580   if (e->flags & EDGE_FALLTHRU)
 581     probability = REG_BR_PROB_BASE - probability;
 582
 583   predict_insn (last_insn, predictor, probability);
 584 }
 585
 586 /* Predict edge E with the given PROBABILITY.  */
 587 void
 588 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
 589 {
 590   gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
 591   if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
 592        1)
 593       && flag_guess_branch_prob && optimize)
 594     {
 595       struct edge_prediction *i = XNEW (struct edge_prediction);
 596       edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
 597
 598       i->ep_next = preds;
 599       preds = i;
 600       i->ep_probability = probability;
 601       i->ep_predictor = predictor;
 602       i->ep_edge = e;
 603     }
 604 }
 605
 606 /* Remove all predictions on given basic block that are attached
 607    to edge E.  */
 608 void
 609 remove_predictions_associated_with_edge (edge e)
 610 {
 611   if (!bb_predictions)
 612     return;
 613
 614   edge_prediction **preds = bb_predictions->get (e->src);
 615
 616   if (preds)
 617     {
 618       struct edge_prediction **prediction = preds;
 619       struct edge_prediction *next;
 620
 621       while (*prediction)
 622         {
 623           if ((*prediction)->ep_edge == e)
 624             {
 625               next = (*prediction)->ep_next;
 626               free (*prediction);
 627               *prediction = next;
 628             }
 629           else
 630             prediction = &((*prediction)->ep_next);
 631         }
 632     }
 633 }
 634
 635 /* Clears the list of predictions stored for BB.  */
 636
 637 static void
 638 clear_bb_predictions (basic_block bb)
 639 {
 640   edge_prediction **preds = bb_predictions->get (bb);
 641   struct edge_prediction *pred, *next;
 642
 643   if (!preds)
 644     return;
 645
 646   for (pred = *preds; pred; pred = next)
 647     {
 648       next = pred->ep_next;
 649       free (pred);
 650     }
 651   *preds = NULL;
 652 }
 653
 654 /* Return true when we can store prediction on insn INSN.
 655    At the moment we represent predictions only on conditional
 656    jumps, not at computed jump or other complicated cases.  */
 657 static bool
 658 can_predict_insn_p (const rtx_insn *insn)
 659 {
 660   return (JUMP_P (insn)
 661           && any_condjump_p (insn)
 662           && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
 663 }
 664
 665 /* Predict edge E by given predictor if possible.  */
 666
 667 void
 668 predict_edge_def (edge e, enum br_predictor predictor,
 669                   enum prediction taken)
 670 {
 671    int probability = predictor_info[(int) predictor].hitrate;
 672
 673    if (taken != TAKEN)
 674      probability = REG_BR_PROB_BASE - probability;
 675
 676    predict_edge (e, predictor, probability);
 677 }
 678
 679 /* Invert all branch predictions or probability notes in the INSN.  This needs
 680    to be done each time we invert the condition used by the jump.  */
 681
 682 void
 683 invert_br_probabilities (rtx insn)
 684 {
 685   rtx note;
 686
 687   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 688     if (REG_NOTE_KIND (note) == REG_BR_PROB)
 689       XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
 690     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
 691       XEXP (XEXP (note, 0), 1)
 692         = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
 693 }
 694
 695 /* Dump information about the branch prediction to the output file.  */
 696
 697 static void
 698 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
 699                  basic_block bb, int used)
 700 {
 701   edge e;
 702   edge_iterator ei;
 703
 704   if (!file)
 705     return;
 706
 707   FOR_EACH_EDGE (e, ei, bb->succs)
 708     if (! (e->flags & EDGE_FALLTHRU))
 709       break;
 710
 711   fprintf (file, "  %s heuristics%s: %.1f%%",
 712            predictor_info[predictor].name,
 713            used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
 714
 715   if (bb->count)
 716     {
 717       fprintf (file, "  exec %"PRId64, bb->count);
 718       if (e)
 719         {
 720           fprintf (file, " hit %"PRId64, e->count);
 721           fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
 722         }
 723     }
 724
 725   fprintf (file, "\n");
 726 }
 727
 728 /* We can not predict the probabilities of outgoing edges of bb.  Set them
 729    evenly and hope for the best.  */
 730 static void
 731 set_even_probabilities (basic_block bb)
 732 {
 733   int nedges = 0;
 734   edge e;
 735   edge_iterator ei;
 736
 737   FOR_EACH_EDGE (e, ei, bb->succs)
 738     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 739       nedges ++;
 740   FOR_EACH_EDGE (e, ei, bb->succs)
 741     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 742       e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
 743     else
 744       e->probability = 0;
 745 }
 746
 747 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
 748    note if not already present.  Remove now useless REG_BR_PRED notes.  */
 749
 750 static void
 751 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
 752 {
 753   rtx prob_note;
 754   rtx *pnote;
 755   rtx note;
 756   int best_probability = PROB_EVEN;
 757   enum br_predictor best_predictor = END_PREDICTORS;
 758   int combined_probability = REG_BR_PROB_BASE / 2;
 759   int d;
 760   bool first_match = false;
 761   bool found = false;
 762
 763   if (!can_predict_insn_p (insn))
 764     {
 765       set_even_probabilities (bb);
 766       return;
 767     }
 768
 769   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
 770   pnote = &REG_NOTES (insn);
 771   if (dump_file)
 772     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
 773              bb->index);
 774
 775   /* We implement "first match" heuristics and use probability guessed
 776      by predictor with smallest index.  */
 777   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 778     if (REG_NOTE_KIND (note) == REG_BR_PRED)
 779       {
 780         enum br_predictor predictor = ((enum br_predictor)
 781                                        INTVAL (XEXP (XEXP (note, 0), 0)));
 782         int probability = INTVAL (XEXP (XEXP (note, 0), 1));
 783
 784         found = true;
 785         if (best_predictor > predictor)
 786           best_probability = probability, best_predictor = predictor;
 787
 788         d = (combined_probability * probability
 789              + (REG_BR_PROB_BASE - combined_probability)
 790              * (REG_BR_PROB_BASE - probability));
 791
 792         /* Use FP math to avoid overflows of 32bit integers.  */
 793         if (d == 0)
 794           /* If one probability is 0% and one 100%, avoid division by zero.  */
 795           combined_probability = REG_BR_PROB_BASE / 2;
 796         else
 797           combined_probability = (((double) combined_probability) * probability
 798                                   * REG_BR_PROB_BASE / d + 0.5);
 799       }
 800
 801   /* Decide which heuristic to use.  In case we didn't match anything,
 802      use no_prediction heuristic, in case we did match, use either
 803      first match or Dempster-Shaffer theory depending on the flags.  */
 804
 805   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 806     first_match = true;
 807
 808   if (!found)
 809     dump_prediction (dump_file, PRED_NO_PREDICTION,
 810                      combined_probability, bb, true);
 811   else
 812     {
 813       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
 814                        bb, !first_match);
 815       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
 816                        bb, first_match);
 817     }
 818
 819   if (first_match)
 820     combined_probability = best_probability;
 821   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 822
 823   while (*pnote)
 824     {
 825       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
 826         {
 827           enum br_predictor predictor = ((enum br_predictor)
 828                                          INTVAL (XEXP (XEXP (*pnote, 0), 0)));
 829           int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
 830
 831           dump_prediction (dump_file, predictor, probability, bb,
 832                            !first_match || best_predictor == predictor);
 833           *pnote = XEXP (*pnote, 1);
 834         }
 835       else
 836         pnote = &XEXP (*pnote, 1);
 837     }
 838
 839   if (!prob_note)
 840     {
 841       add_int_reg_note (insn, REG_BR_PROB, combined_probability);
 842
 843       /* Save the prediction into CFG in case we are seeing non-degenerated
 844          conditional jump.  */
 845       if (!single_succ_p (bb))
 846         {
 847           BRANCH_EDGE (bb)->probability = combined_probability;
 848           FALLTHRU_EDGE (bb)->probability
 849             = REG_BR_PROB_BASE - combined_probability;
 850         }
 851     }
 852   else if (!single_succ_p (bb))
 853     {
 854       int prob = XINT (prob_note, 0);
 855
 856       BRANCH_EDGE (bb)->probability = prob;
 857       FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
 858     }
 859   else
 860     single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
 861 }
 862
 863 /* Combine predictions into single probability and store them into CFG.
 864    Remove now useless prediction entries.  */
 865
 866 static void
 867 combine_predictions_for_bb (basic_block bb)
 868 {
 869   int best_probability = PROB_EVEN;
 870   enum br_predictor best_predictor = END_PREDICTORS;
 871   int combined_probability = REG_BR_PROB_BASE / 2;
 872   int d;
 873   bool first_match = false;
 874   bool found = false;
 875   struct edge_prediction *pred;
 876   int nedges = 0;
 877   edge e, first = NULL, second = NULL;
 878   edge_iterator ei;
 879
 880   FOR_EACH_EDGE (e, ei, bb->succs)
 881     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 882       {
 883         nedges ++;
 884         if (first && !second)
 885           second = e;
 886         if (!first)
 887           first = e;
 888       }
 889
 890   /* When there is no successor or only one choice, prediction is easy.
 891
 892      We are lazy for now and predict only basic blocks with two outgoing
 893      edges.  It is possible to predict generic case too, but we have to
 894      ignore first match heuristics and do more involved combining.  Implement
 895      this later.  */
 896   if (nedges != 2)
 897     {
 898       if (!bb->count)
 899         set_even_probabilities (bb);
 900       clear_bb_predictions (bb);
 901       if (dump_file)
 902         fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
 903                  nedges, bb->index);
 904       return;
 905     }
 906
 907   if (dump_file)
 908     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
 909
 910   edge_prediction **preds = bb_predictions->get (bb);
 911   if (preds)
 912     {
 913       /* We implement "first match" heuristics and use probability guessed
 914          by predictor with smallest index.  */
 915       for (pred = *preds; pred; pred = pred->ep_next)
 916         {
 917           enum br_predictor predictor = pred->ep_predictor;
 918           int probability = pred->ep_probability;
 919
 920           if (pred->ep_edge != first)
 921             probability = REG_BR_PROB_BASE - probability;
 922
 923           found = true;
 924           /* First match heuristics would be widly confused if we predicted
 925              both directions.  */
 926           if (best_predictor > predictor)
 927             {
 928               struct edge_prediction *pred2;
 929               int prob = probability;
 930
 931               for (pred2 = (struct edge_prediction *) *preds;
 932                    pred2; pred2 = pred2->ep_next)
 933                if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
 934                  {
 935                    int probability2 = pred->ep_probability;
 936
 937                    if (pred2->ep_edge != first)
 938                      probability2 = REG_BR_PROB_BASE - probability2;
 939
 940                    if ((probability < REG_BR_PROB_BASE / 2) !=
 941                        (probability2 < REG_BR_PROB_BASE / 2))
 942                      break;
 943
 944                    /* If the same predictor later gave better result, go for it! */
 945                    if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
 946                        || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
 947                      prob = probability2;
 948                  }
 949               if (!pred2)
 950                 best_probability = prob, best_predictor = predictor;
 951             }
 952
 953           d = (combined_probability * probability
 954                + (REG_BR_PROB_BASE - combined_probability)
 955                * (REG_BR_PROB_BASE - probability));
 956
 957           /* Use FP math to avoid overflows of 32bit integers.  */
 958           if (d == 0)
 959             /* If one probability is 0% and one 100%, avoid division by zero.  */
 960             combined_probability = REG_BR_PROB_BASE / 2;
 961           else
 962             combined_probability = (((double) combined_probability)
 963                                     * probability
 964                                     * REG_BR_PROB_BASE / d + 0.5);
 965         }
 966     }
 967
 968   /* Decide which heuristic to use.  In case we didn't match anything,
 969      use no_prediction heuristic, in case we did match, use either
 970      first match or Dempster-Shaffer theory depending on the flags.  */
 971
 972   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 973     first_match = true;
 974
 975   if (!found)
 976     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
 977   else
 978     {
 979       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
 980                        !first_match);
 981       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
 982                        first_match);
 983     }
 984
 985   if (first_match)
 986     combined_probability = best_probability;
 987   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 988
 989   if (preds)
 990     {
 991       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
 992         {
 993           enum br_predictor predictor = pred->ep_predictor;
 994           int probability = pred->ep_probability;
 995
 996           if (pred->ep_edge != EDGE_SUCC (bb, 0))
 997             probability = REG_BR_PROB_BASE - probability;
 998           dump_prediction (dump_file, predictor, probability, bb,
 999                            !first_match || best_predictor == predictor);
1000         }
1001     }
1002   clear_bb_predictions (bb);
1003
1004   if (!bb->count)
1005     {
1006       first->probability = combined_probability;
1007       second->probability = REG_BR_PROB_BASE - combined_probability;
1008     }
1009 }
1010
1011 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1012    Return the SSA_NAME if the condition satisfies, NULL otherwise.
1013
1014    T1 and T2 should be one of the following cases:
1015      1. T1 is SSA_NAME, T2 is NULL
1016      2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1017      3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1018
1019 static tree
1020 strips_small_constant (tree t1, tree t2)
1021 {
1022   tree ret = NULL;
1023   int value = 0;
1024
1025   if (!t1)
1026     return NULL;
1027   else if (TREE_CODE (t1) == SSA_NAME)
1028     ret = t1;
1029   else if (tree_fits_shwi_p (t1))
1030     value = tree_to_shwi (t1);
1031   else
1032     return NULL;
1033
1034   if (!t2)
1035     return ret;
1036   else if (tree_fits_shwi_p (t2))
1037     value = tree_to_shwi (t2);
1038   else if (TREE_CODE (t2) == SSA_NAME)
1039     {
1040       if (ret)
1041         return NULL;
1042       else
1043         ret = t2;
1044     }
1045
1046   if (value <= 4 && value >= -4)
1047     return ret;
1048   else
1049     return NULL;
1050 }
1051
1052 /* Return the SSA_NAME in T or T's operands.
1053    Return NULL if SSA_NAME cannot be found.  */
1054
1055 static tree
1056 get_base_value (tree t)
1057 {
1058   if (TREE_CODE (t) == SSA_NAME)
1059     return t;
1060
1061   if (!BINARY_CLASS_P (t))
1062     return NULL;
1063
1064   switch (TREE_OPERAND_LENGTH (t))
1065     {
1066     case 1:
1067       return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1068     case 2:
1069       return strips_small_constant (TREE_OPERAND (t, 0),
1070                                     TREE_OPERAND (t, 1));
1071     default:
1072       return NULL;
1073     }
1074 }
1075
1076 /* Check the compare STMT in LOOP. If it compares an induction
1077    variable to a loop invariant, return true, and save
1078    LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1079    Otherwise return false and set LOOP_INVAIANT to NULL.  */
1080
1081 static bool
1082 is_comparison_with_loop_invariant_p (gcond *stmt, struct loop *loop,
1083                                      tree *loop_invariant,
1084                                      enum tree_code *compare_code,
1085                                      tree *loop_step,
1086                                      tree *loop_iv_base)
1087 {
1088   tree op0, op1, bound, base;
1089   affine_iv iv0, iv1;
1090   enum tree_code code;
1091   tree step;
1092
1093   code = gimple_cond_code (stmt);
1094   *loop_invariant = NULL;
1095
1096   switch (code)
1097     {
1098     case GT_EXPR:
1099     case GE_EXPR:
1100     case NE_EXPR:
1101     case LT_EXPR:
1102     case LE_EXPR:
1103     case EQ_EXPR:
1104       break;
1105
1106     default:
1107       return false;
1108     }
1109
1110   op0 = gimple_cond_lhs (stmt);
1111   op1 = gimple_cond_rhs (stmt);
1112
1113   if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1114        || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1115     return false;
1116   if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1117     return false;
1118   if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1119     return false;
1120   if (TREE_CODE (iv0.step) != INTEGER_CST
1121       || TREE_CODE (iv1.step) != INTEGER_CST)
1122     return false;
1123   if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1124       || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1125     return false;
1126
1127   if (integer_zerop (iv0.step))
1128     {
1129       if (code != NE_EXPR && code != EQ_EXPR)
1130         code = invert_tree_comparison (code, false);
1131       bound = iv0.base;
1132       base = iv1.base;
1133       if (tree_fits_shwi_p (iv1.step))
1134         step = iv1.step;
1135       else
1136         return false;
1137     }
1138   else
1139     {
1140       bound = iv1.base;
1141       base = iv0.base;
1142       if (tree_fits_shwi_p (iv0.step))
1143         step = iv0.step;
1144       else
1145         return false;
1146     }
1147
1148   if (TREE_CODE (bound) != INTEGER_CST)
1149     bound = get_base_value (bound);
1150   if (!bound)
1151     return false;
1152   if (TREE_CODE (base) != INTEGER_CST)
1153     base = get_base_value (base);
1154   if (!base)
1155     return false;
1156
1157   *loop_invariant = bound;
1158   *compare_code = code;
1159   *loop_step = step;
1160   *loop_iv_base = base;
1161   return true;
1162 }
1163
1164 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1165
1166 static bool
1167 expr_coherent_p (tree t1, tree t2)
1168 {
1169   gimple stmt;
1170   tree ssa_name_1 = NULL;
1171   tree ssa_name_2 = NULL;
1172
1173   gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1174   gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1175
1176   if (t1 == t2)
1177     return true;
1178
1179   if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1180     return true;
1181   if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1182     return false;
1183
1184   /* Check to see if t1 is expressed/defined with t2.  */
1185   stmt = SSA_NAME_DEF_STMT (t1);
1186   gcc_assert (stmt != NULL);
1187   if (is_gimple_assign (stmt))
1188     {
1189       ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1190       if (ssa_name_1 && ssa_name_1 == t2)
1191         return true;
1192     }
1193
1194   /* Check to see if t2 is expressed/defined with t1.  */
1195   stmt = SSA_NAME_DEF_STMT (t2);
1196   gcc_assert (stmt != NULL);
1197   if (is_gimple_assign (stmt))
1198     {
1199       ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1200       if (ssa_name_2 && ssa_name_2 == t1)
1201         return true;
1202     }
1203
1204   /* Compare if t1 and t2's def_stmts are identical.  */
1205   if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1206     return true;
1207   else
1208     return false;
1209 }
1210
1211 /* Predict branch probability of BB when BB contains a branch that compares
1212    an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1213    loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1214
1215    E.g.
1216      for (int i = 0; i < bound; i++) {
1217        if (i < bound - 2)
1218          computation_1();
1219        else
1220          computation_2();
1221      }
1222
1223   In this loop, we will predict the branch inside the loop to be taken.  */
1224
1225 static void
1226 predict_iv_comparison (struct loop *loop, basic_block bb,
1227                        tree loop_bound_var,
1228                        tree loop_iv_base_var,
1229                        enum tree_code loop_bound_code,
1230                        int loop_bound_step)
1231 {
1232   gimple stmt;
1233   tree compare_var, compare_base;
1234   enum tree_code compare_code;
1235   tree compare_step_var;
1236   edge then_edge;
1237   edge_iterator ei;
1238
1239   if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1240       || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1241       || predicted_by_p (bb, PRED_LOOP_EXIT))
1242     return;
1243
1244   stmt = last_stmt (bb);
1245   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1246     return;
1247   if (!is_comparison_with_loop_invariant_p (as_a <gcond *> (stmt),
1248                                             loop, &compare_var,
1249                                             &compare_code,
1250                                             &compare_step_var,
1251                                             &compare_base))
1252     return;
1253
1254   /* Find the taken edge.  */
1255   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1256     if (then_edge->flags & EDGE_TRUE_VALUE)
1257       break;
1258
1259   /* When comparing an IV to a loop invariant, NE is more likely to be
1260      taken while EQ is more likely to be not-taken.  */
1261   if (compare_code == NE_EXPR)
1262     {
1263       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1264       return;
1265     }
1266   else if (compare_code == EQ_EXPR)
1267     {
1268       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1269       return;
1270     }
1271
1272   if (!expr_coherent_p (loop_iv_base_var, compare_base))
1273     return;
1274
1275   /* If loop bound, base and compare bound are all constants, we can
1276      calculate the probability directly.  */
1277   if (tree_fits_shwi_p (loop_bound_var)
1278       && tree_fits_shwi_p (compare_var)
1279       && tree_fits_shwi_p (compare_base))
1280     {
1281       int probability;
1282       bool overflow, overall_overflow = false;
1283       widest_int compare_count, tem;
1284
1285       /* (loop_bound - base) / compare_step */
1286       tem = wi::sub (wi::to_widest (loop_bound_var),
1287                      wi::to_widest (compare_base), SIGNED, &overflow);
1288       overall_overflow |= overflow;
1289       widest_int loop_count = wi::div_trunc (tem,
1290                                              wi::to_widest (compare_step_var),
1291                                              SIGNED, &overflow);
1292       overall_overflow |= overflow;
1293
1294       if (!wi::neg_p (wi::to_widest (compare_step_var))
1295           ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1296         {
1297           /* (loop_bound - compare_bound) / compare_step */
1298           tem = wi::sub (wi::to_widest (loop_bound_var),
1299                          wi::to_widest (compare_var), SIGNED, &overflow);
1300           overall_overflow |= overflow;
1301           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1302                                          SIGNED, &overflow);
1303           overall_overflow |= overflow;
1304         }
1305       else
1306         {
1307           /* (compare_bound - base) / compare_step */
1308           tem = wi::sub (wi::to_widest (compare_var),
1309                          wi::to_widest (compare_base), SIGNED, &overflow);
1310           overall_overflow |= overflow;
1311           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1312                                          SIGNED, &overflow);
1313           overall_overflow |= overflow;
1314         }
1315       if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1316         ++compare_count;
1317       if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1318         ++loop_count;
1319       if (wi::neg_p (compare_count))
1320         compare_count = 0;
1321       if (wi::neg_p (loop_count))
1322         loop_count = 0;
1323       if (loop_count == 0)
1324         probability = 0;
1325       else if (wi::cmps (compare_count, loop_count) == 1)
1326         probability = REG_BR_PROB_BASE;
1327       else
1328         {
1329           tem = compare_count * REG_BR_PROB_BASE;
1330           tem = wi::udiv_trunc (tem, loop_count);
1331           probability = tem.to_uhwi ();
1332         }
1333
1334       if (!overall_overflow)
1335         predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1336
1337       return;
1338     }
1339
1340   if (expr_coherent_p (loop_bound_var, compare_var))
1341     {
1342       if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1343           && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1344         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1345       else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1346                && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1347         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1348       else if (loop_bound_code == NE_EXPR)
1349         {
1350           /* If the loop backedge condition is "(i != bound)", we do
1351              the comparison based on the step of IV:
1352              * step < 0 : backedge condition is like (i > bound)
1353              * step > 0 : backedge condition is like (i < bound)  */
1354           gcc_assert (loop_bound_step != 0);
1355           if (loop_bound_step > 0
1356               && (compare_code == LT_EXPR
1357                   || compare_code == LE_EXPR))
1358             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1359           else if (loop_bound_step < 0
1360                    && (compare_code == GT_EXPR
1361                        || compare_code == GE_EXPR))
1362             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1363           else
1364             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1365         }
1366       else
1367         /* The branch is predicted not-taken if loop_bound_code is
1368            opposite with compare_code.  */
1369         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1370     }
1371   else if (expr_coherent_p (loop_iv_base_var, compare_var))
1372     {
1373       /* For cases like:
1374            for (i = s; i < h; i++)
1375              if (i > s + 2) ....
1376          The branch should be predicted taken.  */
1377       if (loop_bound_step > 0
1378           && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1379         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1380       else if (loop_bound_step < 0
1381                && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1382         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1383       else
1384         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1385     }
1386 }
1387
1388 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1389    exits are resulted from short-circuit conditions that will generate an
1390    if_tmp. E.g.:
1391
1392    if (foo() || global > 10)
1393      break;
1394
1395    This will be translated into:
1396
1397    BB3:
1398      loop header...
1399    BB4:
1400      if foo() goto BB6 else goto BB5
1401    BB5:
1402      if global > 10 goto BB6 else goto BB7
1403    BB6:
1404      goto BB7
1405    BB7:
1406      iftmp = (PHI 0(BB5), 1(BB6))
1407      if iftmp == 1 goto BB8 else goto BB3
1408    BB8:
1409      outside of the loop...
1410
1411    The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1412    From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1413    exits. This function takes BB7->BB8 as input, and finds out the extra loop
1414    exits to predict them using PRED_LOOP_EXIT.  */
1415
1416 static void
1417 predict_extra_loop_exits (edge exit_edge)
1418 {
1419   unsigned i;
1420   bool check_value_one;
1421   gimple lhs_def_stmt;
1422   gphi *phi_stmt;
1423   tree cmp_rhs, cmp_lhs;
1424   gimple last;
1425   gcond *cmp_stmt;
1426
1427   last = last_stmt (exit_edge->src);
1428   if (!last)
1429     return;
1430   cmp_stmt = dyn_cast <gcond *> (last);
1431   if (!cmp_stmt)
1432     return;
1433
1434   cmp_rhs = gimple_cond_rhs (cmp_stmt);
1435   cmp_lhs = gimple_cond_lhs (cmp_stmt);
1436   if (!TREE_CONSTANT (cmp_rhs)
1437       || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1438     return;
1439   if (TREE_CODE (cmp_lhs) != SSA_NAME)
1440     return;
1441
1442   /* If check_value_one is true, only the phi_args with value '1' will lead
1443      to loop exit. Otherwise, only the phi_args with value '0' will lead to
1444      loop exit.  */
1445   check_value_one = (((integer_onep (cmp_rhs))
1446                     ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1447                     ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1448
1449   lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1450   if (!lhs_def_stmt)
1451     return;
1452
1453   phi_stmt = dyn_cast <gphi *> (lhs_def_stmt);
1454   if (!phi_stmt)
1455     return;
1456
1457   for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1458     {
1459       edge e1;
1460       edge_iterator ei;
1461       tree val = gimple_phi_arg_def (phi_stmt, i);
1462       edge e = gimple_phi_arg_edge (phi_stmt, i);
1463
1464       if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1465         continue;
1466       if ((check_value_one ^ integer_onep (val)) == 1)
1467         continue;
1468       if (EDGE_COUNT (e->src->succs) != 1)
1469         {
1470           predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1471           continue;
1472         }
1473
1474       FOR_EACH_EDGE (e1, ei, e->src->preds)
1475         predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1476     }
1477 }
1478
1479 /* Predict edge probabilities by exploiting loop structure.  */
1480
1481 static void
1482 predict_loops (void)
1483 {
1484   struct loop *loop;
1485
1486   /* Try to predict out blocks in a loop that are not part of a
1487      natural loop.  */
1488   FOR_EACH_LOOP (loop, 0)
1489     {
1490       basic_block bb, *bbs;
1491       unsigned j, n_exits;
1492       vec<edge> exits;
1493       struct tree_niter_desc niter_desc;
1494       edge ex;
1495       struct nb_iter_bound *nb_iter;
1496       enum tree_code loop_bound_code = ERROR_MARK;
1497       tree loop_bound_step = NULL;
1498       tree loop_bound_var = NULL;
1499       tree loop_iv_base = NULL;
1500       gcond *stmt = NULL;
1501
1502       exits = get_loop_exit_edges (loop);
1503       n_exits = exits.length ();
1504       if (!n_exits)
1505         {
1506           exits.release ();
1507           continue;
1508         }
1509
1510       FOR_EACH_VEC_ELT (exits, j, ex)
1511         {
1512           tree niter = NULL;
1513           HOST_WIDE_INT nitercst;
1514           int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1515           int probability;
1516           enum br_predictor predictor;
1517
1518           predict_extra_loop_exits (ex);
1519
1520           if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1521             niter = niter_desc.niter;
1522           if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1523             niter = loop_niter_by_eval (loop, ex);
1524
1525           if (TREE_CODE (niter) == INTEGER_CST)
1526             {
1527               if (tree_fits_uhwi_p (niter)
1528                   && max
1529                   && compare_tree_int (niter, max - 1) == -1)
1530                 nitercst = tree_to_uhwi (niter) + 1;
1531               else
1532                 nitercst = max;
1533               predictor = PRED_LOOP_ITERATIONS;
1534             }
1535           /* If we have just one exit and we can derive some information about
1536              the number of iterations of the loop from the statements inside
1537              the loop, use it to predict this exit.  */
1538           else if (n_exits == 1)
1539             {
1540               nitercst = estimated_stmt_executions_int (loop);
1541               if (nitercst < 0)
1542                 continue;
1543               if (nitercst > max)
1544                 nitercst = max;
1545
1546               predictor = PRED_LOOP_ITERATIONS_GUESSED;
1547             }
1548           else
1549             continue;
1550
1551           /* If the prediction for number of iterations is zero, do not
1552              predict the exit edges.  */
1553           if (nitercst == 0)
1554             continue;
1555
1556           probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1557           predict_edge (ex, predictor, probability);
1558         }
1559       exits.release ();
1560
1561       /* Find information about loop bound variables.  */
1562       for (nb_iter = loop->bounds; nb_iter;
1563            nb_iter = nb_iter->next)
1564         if (nb_iter->stmt
1565             && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1566           {
1567             stmt = as_a <gcond *> (nb_iter->stmt);
1568             break;
1569           }
1570       if (!stmt && last_stmt (loop->header)
1571           && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1572         stmt = as_a <gcond *> (last_stmt (loop->header));
1573       if (stmt)
1574         is_comparison_with_loop_invariant_p (stmt, loop,
1575                                              &loop_bound_var,
1576                                              &loop_bound_code,
1577                                              &loop_bound_step,
1578                                              &loop_iv_base);
1579
1580       bbs = get_loop_body (loop);
1581
1582       for (j = 0; j < loop->num_nodes; j++)
1583         {
1584           int header_found = 0;
1585           edge e;
1586           edge_iterator ei;
1587
1588           bb = bbs[j];
1589
1590           /* Bypass loop heuristics on continue statement.  These
1591              statements construct loops via "non-loop" constructs
1592              in the source language and are better to be handled
1593              separately.  */
1594           if (predicted_by_p (bb, PRED_CONTINUE))
1595             continue;
1596
1597           /* Loop branch heuristics - predict an edge back to a
1598              loop's head as taken.  */
1599           if (bb == loop->latch)
1600             {
1601               e = find_edge (loop->latch, loop->header);
1602               if (e)
1603                 {
1604                   header_found = 1;
1605                   predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1606                 }
1607             }
1608
1609           /* Loop exit heuristics - predict an edge exiting the loop if the
1610              conditional has no loop header successors as not taken.  */
1611           if (!header_found
1612               /* If we already used more reliable loop exit predictors, do not
1613                  bother with PRED_LOOP_EXIT.  */
1614               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1615               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1616             {
1617               /* For loop with many exits we don't want to predict all exits
1618                  with the pretty large probability, because if all exits are
1619                  considered in row, the loop would be predicted to iterate
1620                  almost never.  The code to divide probability by number of
1621                  exits is very rough.  It should compute the number of exits
1622                  taken in each patch through function (not the overall number
1623                  of exits that might be a lot higher for loops with wide switch
1624                  statements in them) and compute n-th square root.
1625
1626                  We limit the minimal probability by 2% to avoid
1627                  EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1628                  as this was causing regression in perl benchmark containing such
1629                  a wide loop.  */
1630
1631               int probability = ((REG_BR_PROB_BASE
1632                                   - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1633                                  / n_exits);
1634               if (probability < HITRATE (2))
1635                 probability = HITRATE (2);
1636               FOR_EACH_EDGE (e, ei, bb->succs)
1637                 if (e->dest->index < NUM_FIXED_BLOCKS
1638                     || !flow_bb_inside_loop_p (loop, e->dest))
1639                   predict_edge (e, PRED_LOOP_EXIT, probability);
1640             }
1641           if (loop_bound_var)
1642             predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1643                                    loop_bound_code,
1644                                    tree_to_shwi (loop_bound_step));
1645         }
1646
1647       /* Free basic blocks from get_loop_body.  */
1648       free (bbs);
1649     }
1650 }
1651
1652 /* Attempt to predict probabilities of BB outgoing edges using local
1653    properties.  */
1654 static void
1655 bb_estimate_probability_locally (basic_block bb)
1656 {
1657   rtx_insn *last_insn = BB_END (bb);
1658   rtx cond;
1659
1660   if (! can_predict_insn_p (last_insn))
1661     return;
1662   cond = get_condition (last_insn, NULL, false, false);
1663   if (! cond)
1664     return;
1665
1666   /* Try "pointer heuristic."
1667      A comparison ptr == 0 is predicted as false.
1668      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1669   if (COMPARISON_P (cond)
1670       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1671           || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1672     {
1673       if (GET_CODE (cond) == EQ)
1674         predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1675       else if (GET_CODE (cond) == NE)
1676         predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1677     }
1678   else
1679
1680   /* Try "opcode heuristic."
1681      EQ tests are usually false and NE tests are usually true. Also,
1682      most quantities are positive, so we can make the appropriate guesses
1683      about signed comparisons against zero.  */
1684     switch (GET_CODE (cond))
1685       {
1686       case CONST_INT:
1687         /* Unconditional branch.  */
1688         predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1689                           cond == const0_rtx ? NOT_TAKEN : TAKEN);
1690         break;
1691
1692       case EQ:
1693       case UNEQ:
1694         /* Floating point comparisons appears to behave in a very
1695            unpredictable way because of special role of = tests in
1696            FP code.  */
1697         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1698           ;
1699         /* Comparisons with 0 are often used for booleans and there is
1700            nothing useful to predict about them.  */
1701         else if (XEXP (cond, 1) == const0_rtx
1702                  || XEXP (cond, 0) == const0_rtx)
1703           ;
1704         else
1705           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1706         break;
1707
1708       case NE:
1709       case LTGT:
1710         /* Floating point comparisons appears to behave in a very
1711            unpredictable way because of special role of = tests in
1712            FP code.  */
1713         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1714           ;
1715         /* Comparisons with 0 are often used for booleans and there is
1716            nothing useful to predict about them.  */
1717         else if (XEXP (cond, 1) == const0_rtx
1718                  || XEXP (cond, 0) == const0_rtx)
1719           ;
1720         else
1721           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1722         break;
1723
1724       case ORDERED:
1725         predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1726         break;
1727
1728       case UNORDERED:
1729         predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1730         break;
1731
1732       case LE:
1733       case LT:
1734         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1735             || XEXP (cond, 1) == constm1_rtx)
1736           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1737         break;
1738
1739       case GE:
1740       case GT:
1741         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1742             || XEXP (cond, 1) == constm1_rtx)
1743           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1744         break;
1745
1746       default:
1747         break;
1748       }
1749 }
1750
1751 /* Set edge->probability for each successor edge of BB.  */
1752 void
1753 guess_outgoing_edge_probabilities (basic_block bb)
1754 {
1755   bb_estimate_probability_locally (bb);
1756   combine_predictions_for_insn (BB_END (bb), bb);
1757 }
1758 \f
1759 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1760
1761 /* Helper function for expr_expected_value.  */
1762
1763 static tree
1764 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1765                        tree op1, bitmap visited, enum br_predictor *predictor)
1766 {
1767   gimple def;
1768
1769   if (predictor)
1770     *predictor = PRED_UNCONDITIONAL;
1771
1772   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1773     {
1774       if (TREE_CONSTANT (op0))
1775         return op0;
1776
1777       if (code != SSA_NAME)
1778         return NULL_TREE;
1779
1780       def = SSA_NAME_DEF_STMT (op0);
1781
1782       /* If we were already here, break the infinite cycle.  */
1783       if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1784         return NULL;
1785
1786       if (gimple_code (def) == GIMPLE_PHI)
1787         {
1788           /* All the arguments of the PHI node must have the same constant
1789              length.  */
1790           int i, n = gimple_phi_num_args (def);
1791           tree val = NULL, new_val;
1792
1793           for (i = 0; i < n; i++)
1794             {
1795               tree arg = PHI_ARG_DEF (def, i);
1796               enum br_predictor predictor2;
1797
1798               /* If this PHI has itself as an argument, we cannot
1799                  determine the string length of this argument.  However,
1800                  if we can find an expected constant value for the other
1801                  PHI args then we can still be sure that this is
1802                  likely a constant.  So be optimistic and just
1803                  continue with the next argument.  */
1804               if (arg == PHI_RESULT (def))
1805                 continue;
1806
1807               new_val = expr_expected_value (arg, visited, &predictor2);
1808
1809               /* It is difficult to combine value predictors.  Simply assume
1810                  that later predictor is weaker and take its prediction.  */
1811               if (predictor && *predictor < predictor2)
1812                 *predictor = predictor2;
1813               if (!new_val)
1814                 return NULL;
1815               if (!val)
1816                 val = new_val;
1817               else if (!operand_equal_p (val, new_val, false))
1818                 return NULL;
1819             }
1820           return val;
1821         }
1822       if (is_gimple_assign (def))
1823         {
1824           if (gimple_assign_lhs (def) != op0)
1825             return NULL;
1826
1827           return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1828                                         gimple_assign_rhs1 (def),
1829                                         gimple_assign_rhs_code (def),
1830                                         gimple_assign_rhs2 (def),
1831                                         visited, predictor);
1832         }
1833
1834       if (is_gimple_call (def))
1835         {
1836           tree decl = gimple_call_fndecl (def);
1837           if (!decl)
1838             {
1839               if (gimple_call_internal_p (def)
1840                   && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1841                 {
1842                   gcc_assert (gimple_call_num_args (def) == 3);
1843                   tree val = gimple_call_arg (def, 0);
1844                   if (TREE_CONSTANT (val))
1845                     return val;
1846                   if (predictor)
1847                     {
1848                       tree val2 = gimple_call_arg (def, 2);
1849                       gcc_assert (TREE_CODE (val2) == INTEGER_CST
1850                                   && tree_fits_uhwi_p (val2)
1851                                   && tree_to_uhwi (val2) < END_PREDICTORS);
1852                       *predictor = (enum br_predictor) tree_to_uhwi (val2);
1853                     }
1854                   return gimple_call_arg (def, 1);
1855                 }
1856               return NULL;
1857             }
1858           if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1859             switch (DECL_FUNCTION_CODE (decl))
1860               {
1861               case BUILT_IN_EXPECT:
1862                 {
1863                   tree val;
1864                   if (gimple_call_num_args (def) != 2)
1865                     return NULL;
1866                   val = gimple_call_arg (def, 0);
1867                   if (TREE_CONSTANT (val))
1868                     return val;
1869                   if (predictor)
1870                     *predictor = PRED_BUILTIN_EXPECT;
1871                   return gimple_call_arg (def, 1);
1872                 }
1873
1874               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1875               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1876               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1877               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1878               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1879               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1880               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1881               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1882               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1883               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1884               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1885               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1886               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1887                 /* Assume that any given atomic operation has low contention,
1888                    and thus the compare-and-swap operation succeeds.  */
1889                 if (predictor)
1890                   *predictor = PRED_COMPARE_AND_SWAP;
1891                 return boolean_true_node;
1892               default:
1893                 break;
1894             }
1895         }
1896
1897       return NULL;
1898     }
1899
1900   if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1901     {
1902       tree res;
1903       enum br_predictor predictor2;
1904       op0 = expr_expected_value (op0, visited, predictor);
1905       if (!op0)
1906         return NULL;
1907       op1 = expr_expected_value (op1, visited, &predictor2);
1908       if (predictor && *predictor < predictor2)
1909         *predictor = predictor2;
1910       if (!op1)
1911         return NULL;
1912       res = fold_build2 (code, type, op0, op1);
1913       if (TREE_CONSTANT (res))
1914         return res;
1915       return NULL;
1916     }
1917   if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1918     {
1919       tree res;
1920       op0 = expr_expected_value (op0, visited, predictor);
1921       if (!op0)
1922         return NULL;
1923       res = fold_build1 (code, type, op0);
1924       if (TREE_CONSTANT (res))
1925         return res;
1926       return NULL;
1927     }
1928   return NULL;
1929 }
1930
1931 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1932    The function is used by builtin_expect branch predictor so the evidence
1933    must come from this construct and additional possible constant folding.
1934
1935    We may want to implement more involved value guess (such as value range
1936    propagation based prediction), but such tricks shall go to new
1937    implementation.  */
1938
1939 static tree
1940 expr_expected_value (tree expr, bitmap visited,
1941                      enum br_predictor *predictor)
1942 {
1943   enum tree_code code;
1944   tree op0, op1;
1945
1946   if (TREE_CONSTANT (expr))
1947     {
1948       if (predictor)
1949         *predictor = PRED_UNCONDITIONAL;
1950       return expr;
1951     }
1952
1953   extract_ops_from_tree (expr, &code, &op0, &op1);
1954   return expr_expected_value_1 (TREE_TYPE (expr),
1955                                 op0, code, op1, visited, predictor);
1956 }
1957 \f
1958 /* Predict using opcode of the last statement in basic block.  */
1959 static void
1960 tree_predict_by_opcode (basic_block bb)
1961 {
1962   gimple stmt = last_stmt (bb);
1963   edge then_edge;
1964   tree op0, op1;
1965   tree type;
1966   tree val;
1967   enum tree_code cmp;
1968   bitmap visited;
1969   edge_iterator ei;
1970   enum br_predictor predictor;
1971
1972   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1973     return;
1974   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1975     if (then_edge->flags & EDGE_TRUE_VALUE)
1976       break;
1977   op0 = gimple_cond_lhs (stmt);
1978   op1 = gimple_cond_rhs (stmt);
1979   cmp = gimple_cond_code (stmt);
1980   type = TREE_TYPE (op0);
1981   visited = BITMAP_ALLOC (NULL);
1982   val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
1983                                &predictor);
1984   BITMAP_FREE (visited);
1985   if (val && TREE_CODE (val) == INTEGER_CST)
1986     {
1987       if (predictor == PRED_BUILTIN_EXPECT)
1988         {
1989           int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
1990
1991           gcc_assert (percent >= 0 && percent <= 100);
1992           if (integer_zerop (val))
1993             percent = 100 - percent;
1994           predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
1995         }
1996       else
1997         predict_edge (then_edge, predictor,
1998                       integer_zerop (val) ? NOT_TAKEN : TAKEN);
1999     }
2000   /* Try "pointer heuristic."
2001      A comparison ptr == 0 is predicted as false.
2002      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
2003   if (POINTER_TYPE_P (type))
2004     {
2005       if (cmp == EQ_EXPR)
2006         predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2007       else if (cmp == NE_EXPR)
2008         predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2009     }
2010   else
2011
2012   /* Try "opcode heuristic."
2013      EQ tests are usually false and NE tests are usually true. Also,
2014      most quantities are positive, so we can make the appropriate guesses
2015      about signed comparisons against zero.  */
2016     switch (cmp)
2017       {
2018       case EQ_EXPR:
2019       case UNEQ_EXPR:
2020         /* Floating point comparisons appears to behave in a very
2021            unpredictable way because of special role of = tests in
2022            FP code.  */
2023         if (FLOAT_TYPE_P (type))
2024           ;
2025         /* Comparisons with 0 are often used for booleans and there is
2026            nothing useful to predict about them.  */
2027         else if (integer_zerop (op0) || integer_zerop (op1))
2028           ;
2029         else
2030           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2031         break;
2032
2033       case NE_EXPR:
2034       case LTGT_EXPR:
2035         /* Floating point comparisons appears to behave in a very
2036            unpredictable way because of special role of = tests in
2037            FP code.  */
2038         if (FLOAT_TYPE_P (type))
2039           ;
2040         /* Comparisons with 0 are often used for booleans and there is
2041            nothing useful to predict about them.  */
2042         else if (integer_zerop (op0)
2043                  || integer_zerop (op1))
2044           ;
2045         else
2046           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2047         break;
2048
2049       case ORDERED_EXPR:
2050         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2051         break;
2052
2053       case UNORDERED_EXPR:
2054         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2055         break;
2056
2057       case LE_EXPR:
2058       case LT_EXPR:
2059         if (integer_zerop (op1)
2060             || integer_onep (op1)
2061             || integer_all_onesp (op1)
2062             || real_zerop (op1)
2063             || real_onep (op1)
2064             || real_minus_onep (op1))
2065           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2066         break;
2067
2068       case GE_EXPR:
2069       case GT_EXPR:
2070         if (integer_zerop (op1)
2071             || integer_onep (op1)
2072             || integer_all_onesp (op1)
2073             || real_zerop (op1)
2074             || real_onep (op1)
2075             || real_minus_onep (op1))
2076           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2077         break;
2078
2079       default:
2080         break;
2081       }
2082 }
2083
2084 /* Try to guess whether the value of return means error code.  */
2085
2086 static enum br_predictor
2087 return_prediction (tree val, enum prediction *prediction)
2088 {
2089   /* VOID.  */
2090   if (!val)
2091     return PRED_NO_PREDICTION;
2092   /* Different heuristics for pointers and scalars.  */
2093   if (POINTER_TYPE_P (TREE_TYPE (val)))
2094     {
2095       /* NULL is usually not returned.  */
2096       if (integer_zerop (val))
2097         {
2098           *prediction = NOT_TAKEN;
2099           return PRED_NULL_RETURN;
2100         }
2101     }
2102   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2103     {
2104       /* Negative return values are often used to indicate
2105          errors.  */
2106       if (TREE_CODE (val) == INTEGER_CST
2107           && tree_int_cst_sgn (val) < 0)
2108         {
2109           *prediction = NOT_TAKEN;
2110           return PRED_NEGATIVE_RETURN;
2111         }
2112       /* Constant return values seems to be commonly taken.
2113          Zero/one often represent booleans so exclude them from the
2114          heuristics.  */
2115       if (TREE_CONSTANT (val)
2116           && (!integer_zerop (val) && !integer_onep (val)))
2117         {
2118           *prediction = TAKEN;
2119           return PRED_CONST_RETURN;
2120         }
2121     }
2122   return PRED_NO_PREDICTION;
2123 }
2124
2125 /* Find the basic block with return expression and look up for possible
2126    return value trying to apply RETURN_PREDICTION heuristics.  */
2127 static void
2128 apply_return_prediction (void)
2129 {
2130   greturn *return_stmt = NULL;
2131   tree return_val;
2132   edge e;
2133   gphi *phi;
2134   int phi_num_args, i;
2135   enum br_predictor pred;
2136   enum prediction direction;
2137   edge_iterator ei;
2138
2139   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2140     {
2141       gimple last = last_stmt (e->src);
2142       if (last
2143           && gimple_code (last) == GIMPLE_RETURN)
2144         {
2145           return_stmt = as_a <greturn *> (last);
2146           break;
2147         }
2148     }
2149   if (!e)
2150     return;
2151   return_val = gimple_return_retval (return_stmt);
2152   if (!return_val)
2153     return;
2154   if (TREE_CODE (return_val) != SSA_NAME
2155       || !SSA_NAME_DEF_STMT (return_val)
2156       || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2157     return;
2158   phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val));
2159   phi_num_args = gimple_phi_num_args (phi);
2160   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2161
2162   /* Avoid the degenerate case where all return values form the function
2163      belongs to same category (ie they are all positive constants)
2164      so we can hardly say something about them.  */
2165   for (i = 1; i < phi_num_args; i++)
2166     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2167       break;
2168   if (i != phi_num_args)
2169     for (i = 0; i < phi_num_args; i++)
2170       {
2171         pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2172         if (pred != PRED_NO_PREDICTION)
2173           predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2174                                          direction);
2175       }
2176 }
2177
2178 /* Look for basic block that contains unlikely to happen events
2179    (such as noreturn calls) and mark all paths leading to execution
2180    of this basic blocks as unlikely.  */
2181
2182 static void
2183 tree_bb_level_predictions (void)
2184 {
2185   basic_block bb;
2186   bool has_return_edges = false;
2187   edge e;
2188   edge_iterator ei;
2189
2190   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2191     if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2192       {
2193         has_return_edges = true;
2194         break;
2195       }
2196
2197   apply_return_prediction ();
2198
2199   FOR_EACH_BB_FN (bb, cfun)
2200     {
2201       gimple_stmt_iterator gsi;
2202
2203       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2204         {
2205           gimple stmt = gsi_stmt (gsi);
2206           tree decl;
2207
2208           if (is_gimple_call (stmt))
2209             {
2210               if ((gimple_call_flags (stmt) & ECF_NORETURN)
2211                   && has_return_edges)
2212                 predict_paths_leading_to (bb, PRED_NORETURN,
2213                                           NOT_TAKEN);
2214               decl = gimple_call_fndecl (stmt);
2215               if (decl
2216                   && lookup_attribute ("cold",
2217                                        DECL_ATTRIBUTES (decl)))
2218                 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2219                                           NOT_TAKEN);
2220             }
2221           else if (gimple_code (stmt) == GIMPLE_PREDICT)
2222             {
2223               predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2224                                         gimple_predict_outcome (stmt));
2225               /* Keep GIMPLE_PREDICT around so early inlining will propagate
2226                  hints to callers.  */
2227             }
2228         }
2229     }
2230 }
2231
2232 #ifdef ENABLE_CHECKING
2233
2234 /* Callback for hash_map::traverse, asserts that the pointer map is
2235    empty.  */
2236
2237 bool
2238 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2239                  void *)
2240 {
2241   gcc_assert (!value);
2242   return false;
2243 }
2244 #endif
2245
2246 /* Predict branch probabilities and estimate profile for basic block BB.  */
2247
2248 static void
2249 tree_estimate_probability_bb (basic_block bb)
2250 {
2251   edge e;
2252   edge_iterator ei;
2253   gimple last;
2254
2255   FOR_EACH_EDGE (e, ei, bb->succs)
2256     {
2257       /* Predict edges to user labels with attributes.  */
2258       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2259         {
2260           gimple_stmt_iterator gi;
2261           for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2262             {
2263               glabel *label_stmt = dyn_cast <glabel *> (gsi_stmt (gi));
2264               tree decl;
2265
2266               if (!label_stmt)
2267                 break;
2268               decl = gimple_label_label (label_stmt);
2269               if (DECL_ARTIFICIAL (decl))
2270                 continue;
2271
2272               /* Finally, we have a user-defined label.  */
2273               if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2274                 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2275               else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2276                 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2277             }
2278         }
2279
2280       /* Predict early returns to be probable, as we've already taken
2281          care for error returns and other cases are often used for
2282          fast paths through function.
2283
2284          Since we've already removed the return statements, we are
2285          looking for CFG like:
2286
2287          if (conditional)
2288          {
2289          ..
2290          goto return_block
2291          }
2292          some other blocks
2293          return_block:
2294          return_stmt.  */
2295       if (e->dest != bb->next_bb
2296           && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2297           && single_succ_p (e->dest)
2298           && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2299           && (last = last_stmt (e->dest)) != NULL
2300           && gimple_code (last) == GIMPLE_RETURN)
2301         {
2302           edge e1;
2303           edge_iterator ei1;
2304
2305           if (single_succ_p (bb))
2306             {
2307               FOR_EACH_EDGE (e1, ei1, bb->preds)
2308                 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2309                     && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2310                     && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2311                   predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2312             }
2313           else
2314             if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2315                 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2316                 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2317               predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2318         }
2319
2320       /* Look for block we are guarding (ie we dominate it,
2321          but it doesn't postdominate us).  */
2322       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2323           && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2324           && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2325         {
2326           gimple_stmt_iterator bi;
2327
2328           /* The call heuristic claims that a guarded function call
2329              is improbable.  This is because such calls are often used
2330              to signal exceptional situations such as printing error
2331              messages.  */
2332           for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2333                gsi_next (&bi))
2334             {
2335               gimple stmt = gsi_stmt (bi);
2336               if (is_gimple_call (stmt)
2337                   /* Constant and pure calls are hardly used to signalize
2338                      something exceptional.  */
2339                   && gimple_has_side_effects (stmt))
2340                 {
2341                   predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2342                   break;
2343                 }
2344             }
2345         }
2346     }
2347   tree_predict_by_opcode (bb);
2348 }
2349
2350 /* Predict branch probabilities and estimate profile of the tree CFG.
2351    This function can be called from the loop optimizers to recompute
2352    the profile information.  */
2353
2354 void
2355 tree_estimate_probability (void)
2356 {
2357   basic_block bb;
2358
2359   add_noreturn_fake_exit_edges ();
2360   connect_infinite_loops_to_exit ();
2361   /* We use loop_niter_by_eval, which requires that the loops have
2362      preheaders.  */
2363   create_preheaders (CP_SIMPLE_PREHEADERS);
2364   calculate_dominance_info (CDI_POST_DOMINATORS);
2365
2366   bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2367   tree_bb_level_predictions ();
2368   record_loop_exits ();
2369
2370   if (number_of_loops (cfun) > 1)
2371     predict_loops ();
2372
2373   FOR_EACH_BB_FN (bb, cfun)
2374     tree_estimate_probability_bb (bb);
2375
2376   FOR_EACH_BB_FN (bb, cfun)
2377     combine_predictions_for_bb (bb);
2378
2379 #ifdef ENABLE_CHECKING
2380   bb_predictions->traverse<void *, assert_is_empty> (NULL);
2381 #endif
2382   delete bb_predictions;
2383   bb_predictions = NULL;
2384
2385   estimate_bb_frequencies (false);
2386   free_dominance_info (CDI_POST_DOMINATORS);
2387   remove_fake_exit_edges ();
2388 }
2389 \f
2390 /* Predict edges to successors of CUR whose sources are not postdominated by
2391    BB by PRED and recurse to all postdominators.  */
2392
2393 static void
2394 predict_paths_for_bb (basic_block cur, basic_block bb,
2395                       enum br_predictor pred,
2396                       enum prediction taken,
2397                       bitmap visited)
2398 {
2399   edge e;
2400   edge_iterator ei;
2401   basic_block son;
2402
2403   /* We are looking for all edges forming edge cut induced by
2404      set of all blocks postdominated by BB.  */
2405   FOR_EACH_EDGE (e, ei, cur->preds)
2406     if (e->src->index >= NUM_FIXED_BLOCKS
2407         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2408     {
2409       edge e2;
2410       edge_iterator ei2;
2411       bool found = false;
2412
2413       /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2414       if (e->flags & (EDGE_EH | EDGE_FAKE))
2415         continue;
2416       gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2417
2418       /* See if there is an edge from e->src that is not abnormal
2419          and does not lead to BB.  */
2420       FOR_EACH_EDGE (e2, ei2, e->src->succs)
2421         if (e2 != e
2422             && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2423             && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2424           {
2425             found = true;
2426             break;
2427           }
2428
2429       /* If there is non-abnormal path leaving e->src, predict edge
2430          using predictor.  Otherwise we need to look for paths
2431          leading to e->src.
2432
2433          The second may lead to infinite loop in the case we are predicitng
2434          regions that are only reachable by abnormal edges.  We simply
2435          prevent visiting given BB twice.  */
2436       if (found)
2437         predict_edge_def (e, pred, taken);
2438       else if (bitmap_set_bit (visited, e->src->index))
2439         predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2440     }
2441   for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2442        son;
2443        son = next_dom_son (CDI_POST_DOMINATORS, son))
2444     predict_paths_for_bb (son, bb, pred, taken, visited);
2445 }
2446
2447 /* Sets branch probabilities according to PREDiction and
2448    FLAGS.  */
2449
2450 static void
2451 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2452                           enum prediction taken)
2453 {
2454   bitmap visited = BITMAP_ALLOC (NULL);
2455   predict_paths_for_bb (bb, bb, pred, taken, visited);
2456   BITMAP_FREE (visited);
2457 }
2458
2459 /* Like predict_paths_leading_to but take edge instead of basic block.  */
2460
2461 static void
2462 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2463                                enum prediction taken)
2464 {
2465   bool has_nonloop_edge = false;
2466   edge_iterator ei;
2467   edge e2;
2468
2469   basic_block bb = e->src;
2470   FOR_EACH_EDGE (e2, ei, bb->succs)
2471     if (e2->dest != e->src && e2->dest != e->dest
2472         && !(e->flags & (EDGE_EH | EDGE_FAKE))
2473         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2474       {
2475         has_nonloop_edge = true;
2476         break;
2477       }
2478   if (!has_nonloop_edge)
2479     {
2480       bitmap visited = BITMAP_ALLOC (NULL);
2481       predict_paths_for_bb (bb, bb, pred, taken, visited);
2482       BITMAP_FREE (visited);
2483     }
2484   else
2485     predict_edge_def (e, pred, taken);
2486 }
2487 \f
2488 /* This is used to carry information about basic blocks.  It is
2489    attached to the AUX field of the standard CFG block.  */
2490
2491 struct block_info
2492 {
2493   /* Estimated frequency of execution of basic_block.  */
2494   sreal frequency;
2495
2496   /* To keep queue of basic blocks to process.  */
2497   basic_block next;
2498
2499   /* Number of predecessors we need to visit first.  */
2500   int npredecessors;
2501 };
2502
2503 /* Similar information for edges.  */
2504 struct edge_prob_info
2505 {
2506   /* In case edge is a loopback edge, the probability edge will be reached
2507      in case header is.  Estimated number of iterations of the loop can be
2508      then computed as 1 / (1 - back_edge_prob).  */
2509   sreal back_edge_prob;
2510   /* True if the edge is a loopback edge in the natural loop.  */
2511   unsigned int back_edge:1;
2512 };
2513
2514 #define BLOCK_INFO(B)   ((block_info *) (B)->aux)
2515 #undef EDGE_INFO
2516 #define EDGE_INFO(E)    ((edge_prob_info *) (E)->aux)
2517
2518 /* Helper function for estimate_bb_frequencies.
2519    Propagate the frequencies in blocks marked in
2520    TOVISIT, starting in HEAD.  */
2521
2522 static void
2523 propagate_freq (basic_block head, bitmap tovisit)
2524 {
2525   basic_block bb;
2526   basic_block last;
2527   unsigned i;
2528   edge e;
2529   basic_block nextbb;
2530   bitmap_iterator bi;
2531
2532   /* For each basic block we need to visit count number of his predecessors
2533      we need to visit first.  */
2534   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2535     {
2536       edge_iterator ei;
2537       int count = 0;
2538
2539       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2540
2541       FOR_EACH_EDGE (e, ei, bb->preds)
2542         {
2543           bool visit = bitmap_bit_p (tovisit, e->src->index);
2544
2545           if (visit && !(e->flags & EDGE_DFS_BACK))
2546             count++;
2547           else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2548             fprintf (dump_file,
2549                      "Irreducible region hit, ignoring edge to %i->%i\n",
2550                      e->src->index, bb->index);
2551         }
2552       BLOCK_INFO (bb)->npredecessors = count;
2553       /* When function never returns, we will never process exit block.  */
2554       if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2555         bb->count = bb->frequency = 0;
2556     }
2557
2558   BLOCK_INFO (head)->frequency = 1;
2559   last = head;
2560   for (bb = head; bb; bb = nextbb)
2561     {
2562       edge_iterator ei;
2563       sreal cyclic_probability = 0;
2564       sreal frequency = 0;
2565
2566       nextbb = BLOCK_INFO (bb)->next;
2567       BLOCK_INFO (bb)->next = NULL;
2568
2569       /* Compute frequency of basic block.  */
2570       if (bb != head)
2571         {
2572 #ifdef ENABLE_CHECKING
2573           FOR_EACH_EDGE (e, ei, bb->preds)
2574             gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2575                         || (e->flags & EDGE_DFS_BACK));
2576 #endif
2577
2578           FOR_EACH_EDGE (e, ei, bb->preds)
2579             if (EDGE_INFO (e)->back_edge)
2580               {
2581                 cyclic_probability += EDGE_INFO (e)->back_edge_prob;
2582               }
2583             else if (!(e->flags & EDGE_DFS_BACK))
2584               {
2585                 /*  frequency += (e->probability
2586                                   * BLOCK_INFO (e->src)->frequency /
2587                                   REG_BR_PROB_BASE);  */
2588
2589                 sreal tmp = e->probability;
2590                 tmp *= BLOCK_INFO (e->src)->frequency;
2591                 tmp *= real_inv_br_prob_base;
2592                 frequency += tmp;
2593               }
2594
2595           if (cyclic_probability == 0)
2596             {
2597               BLOCK_INFO (bb)->frequency = frequency;
2598             }
2599           else
2600             {
2601               if (cyclic_probability > real_almost_one)
2602                 cyclic_probability = real_almost_one;
2603
2604               /* BLOCK_INFO (bb)->frequency = frequency
2605                                               / (1 - cyclic_probability) */
2606
2607               cyclic_probability = sreal (1) - cyclic_probability;
2608               BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
2609             }
2610         }
2611
2612       bitmap_clear_bit (tovisit, bb->index);
2613
2614       e = find_edge (bb, head);
2615       if (e)
2616         {
2617           /* EDGE_INFO (e)->back_edge_prob
2618              = ((e->probability * BLOCK_INFO (bb)->frequency)
2619              / REG_BR_PROB_BASE); */
2620
2621           sreal tmp = e->probability;
2622           tmp *= BLOCK_INFO (bb)->frequency;
2623           EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
2624         }
2625
2626       /* Propagate to successor blocks.  */
2627       FOR_EACH_EDGE (e, ei, bb->succs)
2628         if (!(e->flags & EDGE_DFS_BACK)
2629             && BLOCK_INFO (e->dest)->npredecessors)
2630           {
2631             BLOCK_INFO (e->dest)->npredecessors--;
2632             if (!BLOCK_INFO (e->dest)->npredecessors)
2633               {
2634                 if (!nextbb)
2635                   nextbb = e->dest;
2636                 else
2637                   BLOCK_INFO (last)->next = e->dest;
2638
2639                 last = e->dest;
2640               }
2641           }
2642     }
2643 }
2644
2645 /* Estimate frequencies in loops at same nest level.  */
2646
2647 static void
2648 estimate_loops_at_level (struct loop *first_loop)
2649 {
2650   struct loop *loop;
2651
2652   for (loop = first_loop; loop; loop = loop->next)
2653     {
2654       edge e;
2655       basic_block *bbs;
2656       unsigned i;
2657       bitmap tovisit = BITMAP_ALLOC (NULL);
2658
2659       estimate_loops_at_level (loop->inner);
2660
2661       /* Find current loop back edge and mark it.  */
2662       e = loop_latch_edge (loop);
2663       EDGE_INFO (e)->back_edge = 1;
2664
2665       bbs = get_loop_body (loop);
2666       for (i = 0; i < loop->num_nodes; i++)
2667         bitmap_set_bit (tovisit, bbs[i]->index);
2668       free (bbs);
2669       propagate_freq (loop->header, tovisit);
2670       BITMAP_FREE (tovisit);
2671     }
2672 }
2673
2674 /* Propagates frequencies through structure of loops.  */
2675
2676 static void
2677 estimate_loops (void)
2678 {
2679   bitmap tovisit = BITMAP_ALLOC (NULL);
2680   basic_block bb;
2681
2682   /* Start by estimating the frequencies in the loops.  */
2683   if (number_of_loops (cfun) > 1)
2684     estimate_loops_at_level (current_loops->tree_root->inner);
2685
2686   /* Now propagate the frequencies through all the blocks.  */
2687   FOR_ALL_BB_FN (bb, cfun)
2688     {
2689       bitmap_set_bit (tovisit, bb->index);
2690     }
2691   propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2692   BITMAP_FREE (tovisit);
2693 }
2694
2695 /* Drop the profile for NODE to guessed, and update its frequency based on
2696    whether it is expected to be hot given the CALL_COUNT.  */
2697
2698 static void
2699 drop_profile (struct cgraph_node *node, gcov_type call_count)
2700 {
2701   struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2702   /* In the case where this was called by another function with a
2703      dropped profile, call_count will be 0. Since there are no
2704      non-zero call counts to this function, we don't know for sure
2705      whether it is hot, and therefore it will be marked normal below.  */
2706   bool hot = maybe_hot_count_p (NULL, call_count);
2707
2708   if (dump_file)
2709     fprintf (dump_file,
2710              "Dropping 0 profile for %s/%i. %s based on calls.\n",
2711              node->name (), node->order,
2712              hot ? "Function is hot" : "Function is normal");
2713   /* We only expect to miss profiles for functions that are reached
2714      via non-zero call edges in cases where the function may have
2715      been linked from another module or library (COMDATs and extern
2716      templates). See the comments below for handle_missing_profiles.
2717      Also, only warn in cases where the missing counts exceed the
2718      number of training runs. In certain cases with an execv followed
2719      by a no-return call the profile for the no-return call is not
2720      dumped and there can be a mismatch.  */
2721   if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2722       && call_count > profile_info->runs)
2723     {
2724       if (flag_profile_correction)
2725         {
2726           if (dump_file)
2727             fprintf (dump_file,
2728                      "Missing counts for called function %s/%i\n",
2729                      node->name (), node->order);
2730         }
2731       else
2732         warning (0, "Missing counts for called function %s/%i",
2733                  node->name (), node->order);
2734     }
2735
2736   profile_status_for_fn (fn)
2737       = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2738   node->frequency
2739       = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2740 }
2741
2742 /* In the case of COMDAT routines, multiple object files will contain the same
2743    function and the linker will select one for the binary. In that case
2744    all the other copies from the profile instrument binary will be missing
2745    profile counts. Look for cases where this happened, due to non-zero
2746    call counts going to 0-count functions, and drop the profile to guessed
2747    so that we can use the estimated probabilities and avoid optimizing only
2748    for size.
2749
2750    The other case where the profile may be missing is when the routine
2751    is not going to be emitted to the object file, e.g. for "extern template"
2752    class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2753    all other cases of non-zero calls to 0-count functions.  */
2754
2755 void
2756 handle_missing_profiles (void)
2757 {
2758   struct cgraph_node *node;
2759   int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2760   vec<struct cgraph_node *> worklist;
2761   worklist.create (64);
2762
2763   /* See if 0 count function has non-0 count callers.  In this case we
2764      lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2765   FOR_EACH_DEFINED_FUNCTION (node)
2766     {
2767       struct cgraph_edge *e;
2768       gcov_type call_count = 0;
2769       gcov_type max_tp_first_run = 0;
2770       struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2771
2772       if (node->count)
2773         continue;
2774       for (e = node->callers; e; e = e->next_caller)
2775       {
2776         call_count += e->count;
2777
2778         if (e->caller->tp_first_run > max_tp_first_run)
2779           max_tp_first_run = e->caller->tp_first_run;
2780       }
2781
2782       /* If time profile is missing, let assign the maximum that comes from
2783          caller functions.  */
2784       if (!node->tp_first_run && max_tp_first_run)
2785         node->tp_first_run = max_tp_first_run + 1;
2786
2787       if (call_count
2788           && fn && fn->cfg
2789           && (call_count * unlikely_count_fraction >= profile_info->runs))
2790         {
2791           drop_profile (node, call_count);
2792           worklist.safe_push (node);
2793         }
2794     }
2795
2796   /* Propagate the profile dropping to other 0-count COMDATs that are
2797      potentially called by COMDATs we already dropped the profile on.  */
2798   while (worklist.length () > 0)
2799     {
2800       struct cgraph_edge *e;
2801
2802       node = worklist.pop ();
2803       for (e = node->callees; e; e = e->next_caller)
2804         {
2805           struct cgraph_node *callee = e->callee;
2806           struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2807
2808           if (callee->count > 0)
2809             continue;
2810           if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2811               && profile_status_for_fn (fn) == PROFILE_READ)
2812             {
2813               drop_profile (node, 0);
2814               worklist.safe_push (callee);
2815             }
2816         }
2817     }
2818   worklist.release ();
2819 }
2820
2821 /* Convert counts measured by profile driven feedback to frequencies.
2822    Return nonzero iff there was any nonzero execution count.  */
2823
2824 int
2825 counts_to_freqs (void)
2826 {
2827   gcov_type count_max, true_count_max = 0;
2828   basic_block bb;
2829
2830   /* Don't overwrite the estimated frequencies when the profile for
2831      the function is missing.  We may drop this function PROFILE_GUESSED
2832      later in drop_profile ().  */
2833   if (!flag_auto_profile && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2834     return 0;
2835
2836   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2837     true_count_max = MAX (bb->count, true_count_max);
2838
2839   count_max = MAX (true_count_max, 1);
2840   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2841     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2842
2843   return true_count_max;
2844 }
2845
2846 /* Return true if function is likely to be expensive, so there is no point to
2847    optimize performance of prologue, epilogue or do inlining at the expense
2848    of code size growth.  THRESHOLD is the limit of number of instructions
2849    function can execute at average to be still considered not expensive.  */
2850
2851 bool
2852 expensive_function_p (int threshold)
2853 {
2854   unsigned int sum = 0;
2855   basic_block bb;
2856   unsigned int limit;
2857
2858   /* We can not compute accurately for large thresholds due to scaled
2859      frequencies.  */
2860   gcc_assert (threshold <= BB_FREQ_MAX);
2861
2862   /* Frequencies are out of range.  This either means that function contains
2863      internal loop executing more than BB_FREQ_MAX times or profile feedback
2864      is available and function has not been executed at all.  */
2865   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2866     return true;
2867
2868   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2869   limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2870   FOR_EACH_BB_FN (bb, cfun)
2871     {
2872       rtx_insn *insn;
2873
2874       FOR_BB_INSNS (bb, insn)
2875         if (active_insn_p (insn))
2876           {
2877             sum += bb->frequency;
2878             if (sum > limit)
2879               return true;
2880         }
2881     }
2882
2883   return false;
2884 }
2885
2886 /* Estimate and propagate basic block frequencies using the given branch
2887    probabilities.  If FORCE is true, the frequencies are used to estimate
2888    the counts even when there are already non-zero profile counts.  */
2889
2890 void
2891 estimate_bb_frequencies (bool force)
2892 {
2893   basic_block bb;
2894   sreal freq_max;
2895
2896   if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
2897     {
2898       static int real_values_initialized = 0;
2899
2900       if (!real_values_initialized)
2901         {
2902           real_values_initialized = 1;
2903           real_br_prob_base = REG_BR_PROB_BASE;
2904           real_bb_freq_max = BB_FREQ_MAX;
2905           real_one_half = sreal (1, -1);
2906           real_inv_br_prob_base = sreal (1) / real_br_prob_base;
2907           real_almost_one = sreal (1) - real_inv_br_prob_base;
2908         }
2909
2910       mark_dfs_back_edges ();
2911
2912       single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2913          REG_BR_PROB_BASE;
2914
2915       /* Set up block info for each basic block.  */
2916       alloc_aux_for_blocks (sizeof (block_info));
2917       alloc_aux_for_edges (sizeof (edge_prob_info));
2918       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2919         {
2920           edge e;
2921           edge_iterator ei;
2922
2923           FOR_EACH_EDGE (e, ei, bb->succs)
2924             {
2925               EDGE_INFO (e)->back_edge_prob = e->probability;
2926               EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
2927             }
2928         }
2929
2930       /* First compute frequencies locally for each loop from innermost
2931          to outermost to examine frequencies for back edges.  */
2932       estimate_loops ();
2933
2934       freq_max = 0;
2935       FOR_EACH_BB_FN (bb, cfun)
2936         if (freq_max < BLOCK_INFO (bb)->frequency)
2937           freq_max = BLOCK_INFO (bb)->frequency;
2938
2939       freq_max = real_bb_freq_max / freq_max;
2940       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2941         {
2942           sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
2943           bb->frequency = tmp.to_int ();
2944         }
2945
2946       free_aux_for_blocks ();
2947       free_aux_for_edges ();
2948     }
2949   compute_function_frequency ();
2950 }
2951
2952 /* Decide whether function is hot, cold or unlikely executed.  */
2953 void
2954 compute_function_frequency (void)
2955 {
2956   basic_block bb;
2957   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2958
2959   if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2960       || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2961     node->only_called_at_startup = true;
2962   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2963     node->only_called_at_exit = true;
2964
2965   if (profile_status_for_fn (cfun) != PROFILE_READ)
2966     {
2967       int flags = flags_from_decl_or_type (current_function_decl);
2968       if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2969           != NULL)
2970         node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2971       else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2972                != NULL)
2973         node->frequency = NODE_FREQUENCY_HOT;
2974       else if (flags & ECF_NORETURN)
2975         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2976       else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
2977         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2978       else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2979                || DECL_STATIC_DESTRUCTOR (current_function_decl))
2980         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2981       return;
2982     }
2983
2984   /* Only first time try to drop function into unlikely executed.
2985      After inlining the roundoff errors may confuse us.
2986      Ipa-profile pass will drop functions only called from unlikely
2987      functions to unlikely and that is most of what we care about.  */
2988   if (!cfun->after_inlining)
2989     node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2990   FOR_EACH_BB_FN (bb, cfun)
2991     {
2992       if (maybe_hot_bb_p (cfun, bb))
2993         {
2994           node->frequency = NODE_FREQUENCY_HOT;
2995           return;
2996         }
2997       if (!probably_never_executed_bb_p (cfun, bb))
2998         node->frequency = NODE_FREQUENCY_NORMAL;
2999     }
3000 }
3001
3002 /* Build PREDICT_EXPR.  */
3003 tree
3004 build_predict_expr (enum br_predictor predictor, enum prediction taken)
3005 {
3006   tree t = build1 (PREDICT_EXPR, void_type_node,
3007                    build_int_cst (integer_type_node, predictor));
3008   SET_PREDICT_EXPR_OUTCOME (t, taken);
3009   return t;
3010 }
3011
3012 const char *
3013 predictor_name (enum br_predictor predictor)
3014 {
3015   return predictor_info[predictor].name;
3016 }
3017
3018 /* Predict branch probabilities and estimate profile of the tree CFG. */
3019
3020 namespace {
3021
3022 const pass_data pass_data_profile =
3023 {
3024   GIMPLE_PASS, /* type */
3025   "profile_estimate", /* name */
3026   OPTGROUP_NONE, /* optinfo_flags */
3027   TV_BRANCH_PROB, /* tv_id */
3028   PROP_cfg, /* properties_required */
3029   0, /* properties_provided */
3030   0, /* properties_destroyed */
3031   0, /* todo_flags_start */
3032   0, /* todo_flags_finish */
3033 };
3034
3035 class pass_profile : public gimple_opt_pass
3036 {
3037 public:
3038   pass_profile (gcc::context *ctxt)
3039     : gimple_opt_pass (pass_data_profile, ctxt)
3040   {}
3041
3042   /* opt_pass methods: */
3043   virtual bool gate (function *) { return flag_guess_branch_prob; }
3044   virtual unsigned int execute (function *);
3045
3046 }; // class pass_profile
3047
3048 unsigned int
3049 pass_profile::execute (function *fun)
3050 {
3051   unsigned nb_loops;
3052
3053   loop_optimizer_init (LOOPS_NORMAL);
3054   if (dump_file && (dump_flags & TDF_DETAILS))
3055     flow_loops_dump (dump_file, NULL, 0);
3056
3057   mark_irreducible_loops ();
3058
3059   nb_loops = number_of_loops (fun);
3060   if (nb_loops > 1)
3061     scev_initialize ();
3062
3063   tree_estimate_probability ();
3064
3065   if (nb_loops > 1)
3066     scev_finalize ();
3067
3068   loop_optimizer_finalize ();
3069   if (dump_file && (dump_flags & TDF_DETAILS))
3070     gimple_dump_cfg (dump_file, dump_flags);
3071  if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3072     profile_status_for_fn (fun) = PROFILE_GUESSED;
3073   return 0;
3074 }
3075
3076 } // anon namespace
3077
3078 gimple_opt_pass *
3079 make_pass_profile (gcc::context *ctxt)
3080 {
3081   return new pass_profile (ctxt);
3082 }
3083
3084 namespace {
3085
3086 const pass_data pass_data_strip_predict_hints =
3087 {
3088   GIMPLE_PASS, /* type */
3089   "*strip_predict_hints", /* name */
3090   OPTGROUP_NONE, /* optinfo_flags */
3091   TV_BRANCH_PROB, /* tv_id */
3092   PROP_cfg, /* properties_required */
3093   0, /* properties_provided */
3094   0, /* properties_destroyed */
3095   0, /* todo_flags_start */
3096   0, /* todo_flags_finish */
3097 };
3098
3099 class pass_strip_predict_hints : public gimple_opt_pass
3100 {
3101 public:
3102   pass_strip_predict_hints (gcc::context *ctxt)
3103     : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3104   {}
3105
3106   /* opt_pass methods: */
3107   opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3108   virtual unsigned int execute (function *);
3109
3110 }; // class pass_strip_predict_hints
3111
3112 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3113    we no longer need.  */
3114 unsigned int
3115 pass_strip_predict_hints::execute (function *fun)
3116 {
3117   basic_block bb;
3118   gimple ass_stmt;
3119   tree var;
3120
3121   FOR_EACH_BB_FN (bb, fun)
3122     {
3123       gimple_stmt_iterator bi;
3124       for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3125         {
3126           gimple stmt = gsi_stmt (bi);
3127
3128           if (gimple_code (stmt) == GIMPLE_PREDICT)
3129             {
3130               gsi_remove (&bi, true);
3131               continue;
3132             }
3133           else if (is_gimple_call (stmt))
3134             {
3135               tree fndecl = gimple_call_fndecl (stmt);
3136
3137               if ((fndecl
3138                    && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3139                    && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3140                    && gimple_call_num_args (stmt) == 2)
3141                   || (gimple_call_internal_p (stmt)
3142                       && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3143                 {
3144                   var = gimple_call_lhs (stmt);
3145                   if (var)
3146                     {
3147                       ass_stmt
3148                         = gimple_build_assign (var, gimple_call_arg (stmt, 0));
3149                       gsi_replace (&bi, ass_stmt, true);
3150                     }
3151                   else
3152                     {
3153                       gsi_remove (&bi, true);
3154                       continue;
3155                     }
3156                 }
3157             }
3158           gsi_next (&bi);
3159         }
3160     }
3161   return 0;
3162 }
3163
3164 } // anon namespace
3165
3166 gimple_opt_pass *
3167 make_pass_strip_predict_hints (gcc::context *ctxt)
3168 {
3169   return new pass_strip_predict_hints (ctxt);
3170 }
3171
3172 /* Rebuild function frequencies.  Passes are in general expected to
3173    maintain profile by hand, however in some cases this is not possible:
3174    for example when inlining several functions with loops freuqencies might run
3175    out of scale and thus needs to be recomputed.  */
3176
3177 void
3178 rebuild_frequencies (void)
3179 {
3180   timevar_push (TV_REBUILD_FREQUENCIES);
3181
3182   /* When the max bb count in the function is small, there is a higher
3183      chance that there were truncation errors in the integer scaling
3184      of counts by inlining and other optimizations. This could lead
3185      to incorrect classification of code as being cold when it isn't.
3186      In that case, force the estimation of bb counts/frequencies from the
3187      branch probabilities, rather than computing frequencies from counts,
3188      which may also lead to frequencies incorrectly reduced to 0. There
3189      is less precision in the probabilities, so we only do this for small
3190      max counts.  */
3191   gcov_type count_max = 0;
3192   basic_block bb;
3193   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3194     count_max = MAX (bb->count, count_max);
3195
3196   if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3197       || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
3198           && count_max < REG_BR_PROB_BASE/10))
3199     {
3200       loop_optimizer_init (0);
3201       add_noreturn_fake_exit_edges ();
3202       mark_irreducible_loops ();
3203       connect_infinite_loops_to_exit ();
3204       estimate_bb_frequencies (true);
3205       remove_fake_exit_edges ();
3206       loop_optimizer_finalize ();
3207     }
3208   else if (profile_status_for_fn (cfun) == PROFILE_READ)
3209     counts_to_freqs ();
3210   else
3211     gcc_unreachable ();
3212   timevar_pop (TV_REBUILD_FREQUENCIES);
3213 }