gcc/predict.c

   1 /* Branch prediction routines for the GNU compiler.
   2    Copyright (C) 2000-2016 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* References:
  21
  22    [1] "Branch Prediction for Free"
  23        Ball and Larus; PLDI '93.
  24    [2] "Static Branch Frequency and Program Profile Analysis"
  25        Wu and Larus; MICRO-27.
  26    [3] "Corpus-based Static Branch Prediction"
  27        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
  28
  29
  30 #include "config.h"
  31 #include "system.h"
  32 #include "coretypes.h"
  33 #include "backend.h"
  34 #include "rtl.h"
  35 #include "tree.h"
  36 #include "gimple.h"
  37 #include "cfghooks.h"
  38 #include "tree-pass.h"
  39 #include "ssa.h"
  40 #include "emit-rtl.h"
  41 #include "cgraph.h"
  42 #include "coverage.h"
  43 #include "diagnostic-core.h"
  44 #include "gimple-predict.h"
  45 #include "fold-const.h"
  46 #include "calls.h"
  47 #include "cfganal.h"
  48 #include "profile.h"
  49 #include "sreal.h"
  50 #include "params.h"
  51 #include "cfgloop.h"
  52 #include "gimple-iterator.h"
  53 #include "tree-cfg.h"
  54 #include "tree-ssa-loop-niter.h"
  55 #include "tree-ssa-loop.h"
  56 #include "tree-scalar-evolution.h"
  57
  58 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
  59                    1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
  60 static sreal real_almost_one, real_br_prob_base,
  61              real_inv_br_prob_base, real_one_half, real_bb_freq_max;
  62
  63 static void combine_predictions_for_insn (rtx_insn *, basic_block);
  64 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
  65 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
  66 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
  67 static bool can_predict_insn_p (const rtx_insn *);
  68
  69 /* Information we hold about each branch predictor.
  70    Filled using information from predict.def.  */
  71
  72 struct predictor_info
  73 {
  74   const char *const name;       /* Name used in the debugging dumps.  */
  75   const int hitrate;            /* Expected hitrate used by
  76                                    predict_insn_def call.  */
  77   const int flags;
  78 };
  79
  80 /* Use given predictor without Dempster-Shaffer theory if it matches
  81    using first_match heuristics.  */
  82 #define PRED_FLAG_FIRST_MATCH 1
  83
  84 /* Recompute hitrate in percent to our representation.  */
  85
  86 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
  87
  88 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
  89 static const struct predictor_info predictor_info[]= {
  90 #include "predict.def"
  91
  92   /* Upper bound on predictors.  */
  93   {NULL, 0, 0}
  94 };
  95 #undef DEF_PREDICTOR
  96
  97 /* Return TRUE if frequency FREQ is considered to be hot.  */
  98
  99 static inline bool
 100 maybe_hot_frequency_p (struct function *fun, int freq)
 101 {
 102   struct cgraph_node *node = cgraph_node::get (fun->decl);
 103   if (!profile_info
 104       || !opt_for_fn (fun->decl, flag_branch_probabilities))
 105     {
 106       if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
 107         return false;
 108       if (node->frequency == NODE_FREQUENCY_HOT)
 109         return true;
 110     }
 111   if (profile_status_for_fn (fun) == PROFILE_ABSENT)
 112     return true;
 113   if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 114       && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
 115     return false;
 116   if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
 117     return false;
 118   if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
 119               / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 120     return false;
 121   return true;
 122 }
 123
 124 static gcov_type min_count = -1;
 125
 126 /* Determine the threshold for hot BB counts.  */
 127
 128 gcov_type
 129 get_hot_bb_threshold ()
 130 {
 131   gcov_working_set_t *ws;
 132   if (min_count == -1)
 133     {
 134       ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
 135       gcc_assert (ws);
 136       min_count = ws->min_counter;
 137     }
 138   return min_count;
 139 }
 140
 141 /* Set the threshold for hot BB counts.  */
 142
 143 void
 144 set_hot_bb_threshold (gcov_type min)
 145 {
 146   min_count = min;
 147 }
 148
 149 /* Return TRUE if frequency FREQ is considered to be hot.  */
 150
 151 bool
 152 maybe_hot_count_p (struct function *fun, gcov_type count)
 153 {
 154   if (fun && profile_status_for_fn (fun) != PROFILE_READ)
 155     return true;
 156   /* Code executed at most once is not hot.  */
 157   if (profile_info->runs >= count)
 158     return false;
 159   return (count >= get_hot_bb_threshold ());
 160 }
 161
 162 /* Return true in case BB can be CPU intensive and should be optimized
 163    for maximal performance.  */
 164
 165 bool
 166 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
 167 {
 168   gcc_checking_assert (fun);
 169   if (profile_status_for_fn (fun) == PROFILE_READ)
 170     return maybe_hot_count_p (fun, bb->count);
 171   return maybe_hot_frequency_p (fun, bb->frequency);
 172 }
 173
 174 /* Return true in case BB can be CPU intensive and should be optimized
 175    for maximal performance.  */
 176
 177 bool
 178 maybe_hot_edge_p (edge e)
 179 {
 180   if (profile_status_for_fn (cfun) == PROFILE_READ)
 181     return maybe_hot_count_p (cfun, e->count);
 182   return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
 183 }
 184
 185 /* Return true if profile COUNT and FREQUENCY, or function FUN static
 186    node frequency reflects never being executed.  */
 187
 188 static bool
 189 probably_never_executed (struct function *fun,
 190                          gcov_type count, int frequency)
 191 {
 192   gcc_checking_assert (fun);
 193   if (profile_status_for_fn (fun) == PROFILE_READ)
 194     {
 195       int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
 196       if (count * unlikely_count_fraction >= profile_info->runs)
 197         return false;
 198       if (!frequency)
 199         return true;
 200       if (!ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency)
 201         return false;
 202       if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count)
 203         {
 204           gcov_type computed_count;
 205           /* Check for possibility of overflow, in which case entry bb count
 206              is large enough to do the division first without losing much
 207              precision.  */
 208           if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count < REG_BR_PROB_BASE *
 209               REG_BR_PROB_BASE)
 210             {
 211               gcov_type scaled_count
 212                   = frequency * ENTRY_BLOCK_PTR_FOR_FN (fun)->count *
 213              unlikely_count_fraction;
 214               computed_count = RDIV (scaled_count,
 215                                      ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
 216             }
 217           else
 218             {
 219               computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (fun)->count,
 220                                      ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
 221               computed_count *= frequency * unlikely_count_fraction;
 222             }
 223           if (computed_count >= profile_info->runs)
 224             return false;
 225         }
 226       return true;
 227     }
 228   if ((!profile_info || !(opt_for_fn (fun->decl, flag_branch_probabilities)))
 229       && (cgraph_node::get (fun->decl)->frequency
 230           == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 231     return true;
 232   return false;
 233 }
 234
 235
 236 /* Return true in case BB is probably never executed.  */
 237
 238 bool
 239 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
 240 {
 241   return probably_never_executed (fun, bb->count, bb->frequency);
 242 }
 243
 244
 245 /* Return true in case edge E is probably never executed.  */
 246
 247 bool
 248 probably_never_executed_edge_p (struct function *fun, edge e)
 249 {
 250   return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
 251 }
 252
 253 /* Return true when current function should always be optimized for size.  */
 254
 255 bool
 256 optimize_function_for_size_p (struct function *fun)
 257 {
 258   if (!fun || !fun->decl)
 259     return optimize_size;
 260   cgraph_node *n = cgraph_node::get (fun->decl);
 261   return n && n->optimize_for_size_p ();
 262 }
 263
 264 /* Return true when current function should always be optimized for speed.  */
 265
 266 bool
 267 optimize_function_for_speed_p (struct function *fun)
 268 {
 269   return !optimize_function_for_size_p (fun);
 270 }
 271
 272 /* Return the optimization type that should be used for the function FUN.  */
 273
 274 optimization_type
 275 function_optimization_type (struct function *fun)
 276 {
 277   return (optimize_function_for_speed_p (fun)
 278           ? OPTIMIZE_FOR_SPEED
 279           : OPTIMIZE_FOR_SIZE);
 280 }
 281
 282 /* Return TRUE when BB should be optimized for size.  */
 283
 284 bool
 285 optimize_bb_for_size_p (const_basic_block bb)
 286 {
 287   return (optimize_function_for_size_p (cfun)
 288           || (bb && !maybe_hot_bb_p (cfun, bb)));
 289 }
 290
 291 /* Return TRUE when BB should be optimized for speed.  */
 292
 293 bool
 294 optimize_bb_for_speed_p (const_basic_block bb)
 295 {
 296   return !optimize_bb_for_size_p (bb);
 297 }
 298
 299 /* Return the optimization type that should be used for block BB.  */
 300
 301 optimization_type
 302 bb_optimization_type (const_basic_block bb)
 303 {
 304   return (optimize_bb_for_speed_p (bb)
 305           ? OPTIMIZE_FOR_SPEED
 306           : OPTIMIZE_FOR_SIZE);
 307 }
 308
 309 /* Return TRUE when BB should be optimized for size.  */
 310
 311 bool
 312 optimize_edge_for_size_p (edge e)
 313 {
 314   return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
 315 }
 316
 317 /* Return TRUE when BB should be optimized for speed.  */
 318
 319 bool
 320 optimize_edge_for_speed_p (edge e)
 321 {
 322   return !optimize_edge_for_size_p (e);
 323 }
 324
 325 /* Return TRUE when BB should be optimized for size.  */
 326
 327 bool
 328 optimize_insn_for_size_p (void)
 329 {
 330   return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
 331 }
 332
 333 /* Return TRUE when BB should be optimized for speed.  */
 334
 335 bool
 336 optimize_insn_for_speed_p (void)
 337 {
 338   return !optimize_insn_for_size_p ();
 339 }
 340
 341 /* Return TRUE when LOOP should be optimized for size.  */
 342
 343 bool
 344 optimize_loop_for_size_p (struct loop *loop)
 345 {
 346   return optimize_bb_for_size_p (loop->header);
 347 }
 348
 349 /* Return TRUE when LOOP should be optimized for speed.  */
 350
 351 bool
 352 optimize_loop_for_speed_p (struct loop *loop)
 353 {
 354   return optimize_bb_for_speed_p (loop->header);
 355 }
 356
 357 /* Return TRUE when LOOP nest should be optimized for speed.  */
 358
 359 bool
 360 optimize_loop_nest_for_speed_p (struct loop *loop)
 361 {
 362   struct loop *l = loop;
 363   if (optimize_loop_for_speed_p (loop))
 364     return true;
 365   l = loop->inner;
 366   while (l && l != loop)
 367     {
 368       if (optimize_loop_for_speed_p (l))
 369         return true;
 370       if (l->inner)
 371         l = l->inner;
 372       else if (l->next)
 373         l = l->next;
 374       else
 375         {
 376           while (l != loop && !l->next)
 377             l = loop_outer (l);
 378           if (l != loop)
 379             l = l->next;
 380         }
 381     }
 382   return false;
 383 }
 384
 385 /* Return TRUE when LOOP nest should be optimized for size.  */
 386
 387 bool
 388 optimize_loop_nest_for_size_p (struct loop *loop)
 389 {
 390   return !optimize_loop_nest_for_speed_p (loop);
 391 }
 392
 393 /* Return true when edge E is likely to be well predictable by branch
 394    predictor.  */
 395
 396 bool
 397 predictable_edge_p (edge e)
 398 {
 399   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
 400     return false;
 401   if ((e->probability
 402        <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
 403       || (REG_BR_PROB_BASE - e->probability
 404           <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
 405     return true;
 406   return false;
 407 }
 408
 409
 410 /* Set RTL expansion for BB profile.  */
 411
 412 void
 413 rtl_profile_for_bb (basic_block bb)
 414 {
 415   crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
 416 }
 417
 418 /* Set RTL expansion for edge profile.  */
 419
 420 void
 421 rtl_profile_for_edge (edge e)
 422 {
 423   crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
 424 }
 425
 426 /* Set RTL expansion to default mode (i.e. when profile info is not known).  */
 427 void
 428 default_rtl_profile (void)
 429 {
 430   crtl->maybe_hot_insn_p = true;
 431 }
 432
 433 /* Return true if the one of outgoing edges is already predicted by
 434    PREDICTOR.  */
 435
 436 bool
 437 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 438 {
 439   rtx note;
 440   if (!INSN_P (BB_END (bb)))
 441     return false;
 442   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
 443     if (REG_NOTE_KIND (note) == REG_BR_PRED
 444         && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
 445       return true;
 446   return false;
 447 }
 448
 449 /*  Structure representing predictions in tree level. */
 450
 451 struct edge_prediction {
 452     struct edge_prediction *ep_next;
 453     edge ep_edge;
 454     enum br_predictor ep_predictor;
 455     int ep_probability;
 456 };
 457
 458 /* This map contains for a basic block the list of predictions for the
 459    outgoing edges.  */
 460
 461 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
 462
 463 /* Return true if the one of outgoing edges is already predicted by
 464    PREDICTOR.  */
 465
 466 bool
 467 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 468 {
 469   struct edge_prediction *i;
 470   edge_prediction **preds = bb_predictions->get (bb);
 471
 472   if (!preds)
 473     return false;
 474
 475   for (i = *preds; i; i = i->ep_next)
 476     if (i->ep_predictor == predictor)
 477       return true;
 478   return false;
 479 }
 480
 481 /* Return true if the one of outgoing edges is already predicted by
 482    PREDICTOR for edge E predicted as TAKEN.  */
 483
 484 bool
 485 edge_predicted_by_p (edge e, enum br_predictor predictor, bool taken)
 486 {
 487   struct edge_prediction *i;
 488   basic_block bb = e->src;
 489   edge_prediction **preds = bb_predictions->get (bb);
 490   if (!preds)
 491     return false;
 492
 493   int probability = predictor_info[(int) predictor].hitrate;
 494
 495   if (taken != TAKEN)
 496     probability = REG_BR_PROB_BASE - probability;
 497
 498   for (i = *preds; i; i = i->ep_next)
 499     if (i->ep_predictor == predictor
 500         && i->ep_edge == e
 501         && i->ep_probability == probability)
 502       return true;
 503   return false;
 504 }
 505
 506 /* Return true when the probability of edge is reliable.
 507
 508    The profile guessing code is good at predicting branch outcome (ie.
 509    taken/not taken), that is predicted right slightly over 75% of time.
 510    It is however notoriously poor on predicting the probability itself.
 511    In general the profile appear a lot flatter (with probabilities closer
 512    to 50%) than the reality so it is bad idea to use it to drive optimization
 513    such as those disabling dynamic branch prediction for well predictable
 514    branches.
 515
 516    There are two exceptions - edges leading to noreturn edges and edges
 517    predicted by number of iterations heuristics are predicted well.  This macro
 518    should be able to distinguish those, but at the moment it simply check for
 519    noreturn heuristic that is only one giving probability over 99% or bellow
 520    1%.  In future we might want to propagate reliability information across the
 521    CFG if we find this information useful on multiple places.   */
 522 static bool
 523 probability_reliable_p (int prob)
 524 {
 525   return (profile_status_for_fn (cfun) == PROFILE_READ
 526           || (profile_status_for_fn (cfun) == PROFILE_GUESSED
 527               && (prob <= HITRATE (1) || prob >= HITRATE (99))));
 528 }
 529
 530 /* Same predicate as above, working on edges.  */
 531 bool
 532 edge_probability_reliable_p (const_edge e)
 533 {
 534   return probability_reliable_p (e->probability);
 535 }
 536
 537 /* Same predicate as edge_probability_reliable_p, working on notes.  */
 538 bool
 539 br_prob_note_reliable_p (const_rtx note)
 540 {
 541   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
 542   return probability_reliable_p (XINT (note, 0));
 543 }
 544
 545 static void
 546 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
 547 {
 548   gcc_assert (any_condjump_p (insn));
 549   if (!flag_guess_branch_prob)
 550     return;
 551
 552   add_reg_note (insn, REG_BR_PRED,
 553                 gen_rtx_CONCAT (VOIDmode,
 554                                 GEN_INT ((int) predictor),
 555                                 GEN_INT ((int) probability)));
 556 }
 557
 558 /* Predict insn by given predictor.  */
 559
 560 void
 561 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
 562                   enum prediction taken)
 563 {
 564    int probability = predictor_info[(int) predictor].hitrate;
 565
 566    if (taken != TAKEN)
 567      probability = REG_BR_PROB_BASE - probability;
 568
 569    predict_insn (insn, predictor, probability);
 570 }
 571
 572 /* Predict edge E with given probability if possible.  */
 573
 574 void
 575 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
 576 {
 577   rtx_insn *last_insn;
 578   last_insn = BB_END (e->src);
 579
 580   /* We can store the branch prediction information only about
 581      conditional jumps.  */
 582   if (!any_condjump_p (last_insn))
 583     return;
 584
 585   /* We always store probability of branching.  */
 586   if (e->flags & EDGE_FALLTHRU)
 587     probability = REG_BR_PROB_BASE - probability;
 588
 589   predict_insn (last_insn, predictor, probability);
 590 }
 591
 592 /* Predict edge E with the given PROBABILITY.  */
 593 void
 594 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
 595 {
 596   gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
 597   if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
 598        1)
 599       && flag_guess_branch_prob && optimize)
 600     {
 601       struct edge_prediction *i = XNEW (struct edge_prediction);
 602       edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
 603
 604       i->ep_next = preds;
 605       preds = i;
 606       i->ep_probability = probability;
 607       i->ep_predictor = predictor;
 608       i->ep_edge = e;
 609     }
 610 }
 611
 612 /* Remove all predictions on given basic block that are attached
 613    to edge E.  */
 614 void
 615 remove_predictions_associated_with_edge (edge e)
 616 {
 617   if (!bb_predictions)
 618     return;
 619
 620   edge_prediction **preds = bb_predictions->get (e->src);
 621
 622   if (preds)
 623     {
 624       struct edge_prediction **prediction = preds;
 625       struct edge_prediction *next;
 626
 627       while (*prediction)
 628         {
 629           if ((*prediction)->ep_edge == e)
 630             {
 631               next = (*prediction)->ep_next;
 632               free (*prediction);
 633               *prediction = next;
 634             }
 635           else
 636             prediction = &((*prediction)->ep_next);
 637         }
 638     }
 639 }
 640
 641 /* Clears the list of predictions stored for BB.  */
 642
 643 static void
 644 clear_bb_predictions (basic_block bb)
 645 {
 646   edge_prediction **preds = bb_predictions->get (bb);
 647   struct edge_prediction *pred, *next;
 648
 649   if (!preds)
 650     return;
 651
 652   for (pred = *preds; pred; pred = next)
 653     {
 654       next = pred->ep_next;
 655       free (pred);
 656     }
 657   *preds = NULL;
 658 }
 659
 660 /* Return true when we can store prediction on insn INSN.
 661    At the moment we represent predictions only on conditional
 662    jumps, not at computed jump or other complicated cases.  */
 663 static bool
 664 can_predict_insn_p (const rtx_insn *insn)
 665 {
 666   return (JUMP_P (insn)
 667           && any_condjump_p (insn)
 668           && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
 669 }
 670
 671 /* Predict edge E by given predictor if possible.  */
 672
 673 void
 674 predict_edge_def (edge e, enum br_predictor predictor,
 675                   enum prediction taken)
 676 {
 677    int probability = predictor_info[(int) predictor].hitrate;
 678
 679    if (taken != TAKEN)
 680      probability = REG_BR_PROB_BASE - probability;
 681
 682    predict_edge (e, predictor, probability);
 683 }
 684
 685 /* Invert all branch predictions or probability notes in the INSN.  This needs
 686    to be done each time we invert the condition used by the jump.  */
 687
 688 void
 689 invert_br_probabilities (rtx insn)
 690 {
 691   rtx note;
 692
 693   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 694     if (REG_NOTE_KIND (note) == REG_BR_PROB)
 695       XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
 696     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
 697       XEXP (XEXP (note, 0), 1)
 698         = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
 699 }
 700
 701 /* Dump information about the branch prediction to the output file.  */
 702
 703 static void
 704 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
 705                  basic_block bb, int used)
 706 {
 707   edge e;
 708   edge_iterator ei;
 709
 710   if (!file)
 711     return;
 712
 713   FOR_EACH_EDGE (e, ei, bb->succs)
 714     if (! (e->flags & EDGE_FALLTHRU))
 715       break;
 716
 717   fprintf (file, "  %s heuristics%s: %.1f%%",
 718            predictor_info[predictor].name,
 719            used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
 720
 721   if (bb->count)
 722     {
 723       fprintf (file, "  exec %" PRId64, bb->count);
 724       if (e)
 725         {
 726           fprintf (file, " hit %" PRId64, e->count);
 727           fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
 728         }
 729     }
 730
 731   fprintf (file, "\n");
 732 }
 733
 734 /* We can not predict the probabilities of outgoing edges of bb.  Set them
 735    evenly and hope for the best.  */
 736 static void
 737 set_even_probabilities (basic_block bb)
 738 {
 739   int nedges = 0;
 740   edge e;
 741   edge_iterator ei;
 742
 743   FOR_EACH_EDGE (e, ei, bb->succs)
 744     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 745       nedges ++;
 746   FOR_EACH_EDGE (e, ei, bb->succs)
 747     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 748       e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
 749     else
 750       e->probability = 0;
 751 }
 752
 753 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
 754    note if not already present.  Remove now useless REG_BR_PRED notes.  */
 755
 756 static void
 757 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
 758 {
 759   rtx prob_note;
 760   rtx *pnote;
 761   rtx note;
 762   int best_probability = PROB_EVEN;
 763   enum br_predictor best_predictor = END_PREDICTORS;
 764   int combined_probability = REG_BR_PROB_BASE / 2;
 765   int d;
 766   bool first_match = false;
 767   bool found = false;
 768
 769   if (!can_predict_insn_p (insn))
 770     {
 771       set_even_probabilities (bb);
 772       return;
 773     }
 774
 775   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
 776   pnote = &REG_NOTES (insn);
 777   if (dump_file)
 778     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
 779              bb->index);
 780
 781   /* We implement "first match" heuristics and use probability guessed
 782      by predictor with smallest index.  */
 783   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 784     if (REG_NOTE_KIND (note) == REG_BR_PRED)
 785       {
 786         enum br_predictor predictor = ((enum br_predictor)
 787                                        INTVAL (XEXP (XEXP (note, 0), 0)));
 788         int probability = INTVAL (XEXP (XEXP (note, 0), 1));
 789
 790         found = true;
 791         if (best_predictor > predictor)
 792           best_probability = probability, best_predictor = predictor;
 793
 794         d = (combined_probability * probability
 795              + (REG_BR_PROB_BASE - combined_probability)
 796              * (REG_BR_PROB_BASE - probability));
 797
 798         /* Use FP math to avoid overflows of 32bit integers.  */
 799         if (d == 0)
 800           /* If one probability is 0% and one 100%, avoid division by zero.  */
 801           combined_probability = REG_BR_PROB_BASE / 2;
 802         else
 803           combined_probability = (((double) combined_probability) * probability
 804                                   * REG_BR_PROB_BASE / d + 0.5);
 805       }
 806
 807   /* Decide which heuristic to use.  In case we didn't match anything,
 808      use no_prediction heuristic, in case we did match, use either
 809      first match or Dempster-Shaffer theory depending on the flags.  */
 810
 811   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 812     first_match = true;
 813
 814   if (!found)
 815     dump_prediction (dump_file, PRED_NO_PREDICTION,
 816                      combined_probability, bb, true);
 817   else
 818     {
 819       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
 820                        bb, !first_match);
 821       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
 822                        bb, first_match);
 823     }
 824
 825   if (first_match)
 826     combined_probability = best_probability;
 827   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 828
 829   while (*pnote)
 830     {
 831       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
 832         {
 833           enum br_predictor predictor = ((enum br_predictor)
 834                                          INTVAL (XEXP (XEXP (*pnote, 0), 0)));
 835           int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
 836
 837           dump_prediction (dump_file, predictor, probability, bb,
 838                            !first_match || best_predictor == predictor);
 839           *pnote = XEXP (*pnote, 1);
 840         }
 841       else
 842         pnote = &XEXP (*pnote, 1);
 843     }
 844
 845   if (!prob_note)
 846     {
 847       add_int_reg_note (insn, REG_BR_PROB, combined_probability);
 848
 849       /* Save the prediction into CFG in case we are seeing non-degenerated
 850          conditional jump.  */
 851       if (!single_succ_p (bb))
 852         {
 853           BRANCH_EDGE (bb)->probability = combined_probability;
 854           FALLTHRU_EDGE (bb)->probability
 855             = REG_BR_PROB_BASE - combined_probability;
 856         }
 857     }
 858   else if (!single_succ_p (bb))
 859     {
 860       int prob = XINT (prob_note, 0);
 861
 862       BRANCH_EDGE (bb)->probability = prob;
 863       FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
 864     }
 865   else
 866     single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
 867 }
 868
 869 /* Combine predictions into single probability and store them into CFG.
 870    Remove now useless prediction entries.
 871    If DRY_RUN is set, only produce dumps and do not modify profile.  */
 872
 873 static void
 874 combine_predictions_for_bb (basic_block bb, bool dry_run)
 875 {
 876   int best_probability = PROB_EVEN;
 877   enum br_predictor best_predictor = END_PREDICTORS;
 878   int combined_probability = REG_BR_PROB_BASE / 2;
 879   int d;
 880   bool first_match = false;
 881   bool found = false;
 882   struct edge_prediction *pred;
 883   int nedges = 0;
 884   edge e, first = NULL, second = NULL;
 885   edge_iterator ei;
 886
 887   FOR_EACH_EDGE (e, ei, bb->succs)
 888     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 889       {
 890         nedges ++;
 891         if (first && !second)
 892           second = e;
 893         if (!first)
 894           first = e;
 895       }
 896
 897   /* When there is no successor or only one choice, prediction is easy.
 898
 899      We are lazy for now and predict only basic blocks with two outgoing
 900      edges.  It is possible to predict generic case too, but we have to
 901      ignore first match heuristics and do more involved combining.  Implement
 902      this later.  */
 903   if (nedges != 2)
 904     {
 905       if (!bb->count && !dry_run)
 906         set_even_probabilities (bb);
 907       clear_bb_predictions (bb);
 908       if (dump_file)
 909         fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
 910                  nedges, bb->index);
 911       return;
 912     }
 913
 914   if (dump_file)
 915     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
 916
 917   edge_prediction **preds = bb_predictions->get (bb);
 918   if (preds)
 919     {
 920       /* We implement "first match" heuristics and use probability guessed
 921          by predictor with smallest index.  */
 922       for (pred = *preds; pred; pred = pred->ep_next)
 923         {
 924           enum br_predictor predictor = pred->ep_predictor;
 925           int probability = pred->ep_probability;
 926
 927           if (pred->ep_edge != first)
 928             probability = REG_BR_PROB_BASE - probability;
 929
 930           found = true;
 931           /* First match heuristics would be widly confused if we predicted
 932              both directions.  */
 933           if (best_predictor > predictor)
 934             {
 935               struct edge_prediction *pred2;
 936               int prob = probability;
 937
 938               for (pred2 = (struct edge_prediction *) *preds;
 939                    pred2; pred2 = pred2->ep_next)
 940                if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
 941                  {
 942                    int probability2 = pred->ep_probability;
 943
 944                    if (pred2->ep_edge != first)
 945                      probability2 = REG_BR_PROB_BASE - probability2;
 946
 947                    if ((probability < REG_BR_PROB_BASE / 2) !=
 948                        (probability2 < REG_BR_PROB_BASE / 2))
 949                      break;
 950
 951                    /* If the same predictor later gave better result, go for it! */
 952                    if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
 953                        || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
 954                      prob = probability2;
 955                  }
 956               if (!pred2)
 957                 best_probability = prob, best_predictor = predictor;
 958             }
 959
 960           d = (combined_probability * probability
 961                + (REG_BR_PROB_BASE - combined_probability)
 962                * (REG_BR_PROB_BASE - probability));
 963
 964           /* Use FP math to avoid overflows of 32bit integers.  */
 965           if (d == 0)
 966             /* If one probability is 0% and one 100%, avoid division by zero.  */
 967             combined_probability = REG_BR_PROB_BASE / 2;
 968           else
 969             combined_probability = (((double) combined_probability)
 970                                     * probability
 971                                     * REG_BR_PROB_BASE / d + 0.5);
 972         }
 973     }
 974
 975   /* Decide which heuristic to use.  In case we didn't match anything,
 976      use no_prediction heuristic, in case we did match, use either
 977      first match or Dempster-Shaffer theory depending on the flags.  */
 978
 979   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 980     first_match = true;
 981
 982   if (!found)
 983     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
 984   else
 985     {
 986       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
 987                        !first_match);
 988       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
 989                        first_match);
 990     }
 991
 992   if (first_match)
 993     combined_probability = best_probability;
 994   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 995
 996   if (preds)
 997     {
 998       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
 999         {
1000           enum br_predictor predictor = pred->ep_predictor;
1001           int probability = pred->ep_probability;
1002
1003           if (pred->ep_edge != EDGE_SUCC (bb, 0))
1004             probability = REG_BR_PROB_BASE - probability;
1005           dump_prediction (dump_file, predictor, probability, bb,
1006                            !first_match || best_predictor == predictor);
1007         }
1008     }
1009   clear_bb_predictions (bb);
1010
1011   if (!bb->count && !dry_run)
1012     {
1013       first->probability = combined_probability;
1014       second->probability = REG_BR_PROB_BASE - combined_probability;
1015     }
1016 }
1017
1018 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1019    Return the SSA_NAME if the condition satisfies, NULL otherwise.
1020
1021    T1 and T2 should be one of the following cases:
1022      1. T1 is SSA_NAME, T2 is NULL
1023      2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1024      3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1025
1026 static tree
1027 strips_small_constant (tree t1, tree t2)
1028 {
1029   tree ret = NULL;
1030   int value = 0;
1031
1032   if (!t1)
1033     return NULL;
1034   else if (TREE_CODE (t1) == SSA_NAME)
1035     ret = t1;
1036   else if (tree_fits_shwi_p (t1))
1037     value = tree_to_shwi (t1);
1038   else
1039     return NULL;
1040
1041   if (!t2)
1042     return ret;
1043   else if (tree_fits_shwi_p (t2))
1044     value = tree_to_shwi (t2);
1045   else if (TREE_CODE (t2) == SSA_NAME)
1046     {
1047       if (ret)
1048         return NULL;
1049       else
1050         ret = t2;
1051     }
1052
1053   if (value <= 4 && value >= -4)
1054     return ret;
1055   else
1056     return NULL;
1057 }
1058
1059 /* Return the SSA_NAME in T or T's operands.
1060    Return NULL if SSA_NAME cannot be found.  */
1061
1062 static tree
1063 get_base_value (tree t)
1064 {
1065   if (TREE_CODE (t) == SSA_NAME)
1066     return t;
1067
1068   if (!BINARY_CLASS_P (t))
1069     return NULL;
1070
1071   switch (TREE_OPERAND_LENGTH (t))
1072     {
1073     case 1:
1074       return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1075     case 2:
1076       return strips_small_constant (TREE_OPERAND (t, 0),
1077                                     TREE_OPERAND (t, 1));
1078     default:
1079       return NULL;
1080     }
1081 }
1082
1083 /* Check the compare STMT in LOOP. If it compares an induction
1084    variable to a loop invariant, return true, and save
1085    LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1086    Otherwise return false and set LOOP_INVAIANT to NULL.  */
1087
1088 static bool
1089 is_comparison_with_loop_invariant_p (gcond *stmt, struct loop *loop,
1090                                      tree *loop_invariant,
1091                                      enum tree_code *compare_code,
1092                                      tree *loop_step,
1093                                      tree *loop_iv_base)
1094 {
1095   tree op0, op1, bound, base;
1096   affine_iv iv0, iv1;
1097   enum tree_code code;
1098   tree step;
1099
1100   code = gimple_cond_code (stmt);
1101   *loop_invariant = NULL;
1102
1103   switch (code)
1104     {
1105     case GT_EXPR:
1106     case GE_EXPR:
1107     case NE_EXPR:
1108     case LT_EXPR:
1109     case LE_EXPR:
1110     case EQ_EXPR:
1111       break;
1112
1113     default:
1114       return false;
1115     }
1116
1117   op0 = gimple_cond_lhs (stmt);
1118   op1 = gimple_cond_rhs (stmt);
1119
1120   if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1121        || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1122     return false;
1123   if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1124     return false;
1125   if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1126     return false;
1127   if (TREE_CODE (iv0.step) != INTEGER_CST
1128       || TREE_CODE (iv1.step) != INTEGER_CST)
1129     return false;
1130   if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1131       || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1132     return false;
1133
1134   if (integer_zerop (iv0.step))
1135     {
1136       if (code != NE_EXPR && code != EQ_EXPR)
1137         code = invert_tree_comparison (code, false);
1138       bound = iv0.base;
1139       base = iv1.base;
1140       if (tree_fits_shwi_p (iv1.step))
1141         step = iv1.step;
1142       else
1143         return false;
1144     }
1145   else
1146     {
1147       bound = iv1.base;
1148       base = iv0.base;
1149       if (tree_fits_shwi_p (iv0.step))
1150         step = iv0.step;
1151       else
1152         return false;
1153     }
1154
1155   if (TREE_CODE (bound) != INTEGER_CST)
1156     bound = get_base_value (bound);
1157   if (!bound)
1158     return false;
1159   if (TREE_CODE (base) != INTEGER_CST)
1160     base = get_base_value (base);
1161   if (!base)
1162     return false;
1163
1164   *loop_invariant = bound;
1165   *compare_code = code;
1166   *loop_step = step;
1167   *loop_iv_base = base;
1168   return true;
1169 }
1170
1171 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1172
1173 static bool
1174 expr_coherent_p (tree t1, tree t2)
1175 {
1176   gimple *stmt;
1177   tree ssa_name_1 = NULL;
1178   tree ssa_name_2 = NULL;
1179
1180   gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1181   gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1182
1183   if (t1 == t2)
1184     return true;
1185
1186   if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1187     return true;
1188   if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1189     return false;
1190
1191   /* Check to see if t1 is expressed/defined with t2.  */
1192   stmt = SSA_NAME_DEF_STMT (t1);
1193   gcc_assert (stmt != NULL);
1194   if (is_gimple_assign (stmt))
1195     {
1196       ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1197       if (ssa_name_1 && ssa_name_1 == t2)
1198         return true;
1199     }
1200
1201   /* Check to see if t2 is expressed/defined with t1.  */
1202   stmt = SSA_NAME_DEF_STMT (t2);
1203   gcc_assert (stmt != NULL);
1204   if (is_gimple_assign (stmt))
1205     {
1206       ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1207       if (ssa_name_2 && ssa_name_2 == t1)
1208         return true;
1209     }
1210
1211   /* Compare if t1 and t2's def_stmts are identical.  */
1212   if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1213     return true;
1214   else
1215     return false;
1216 }
1217
1218 /* Predict branch probability of BB when BB contains a branch that compares
1219    an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1220    loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1221
1222    E.g.
1223      for (int i = 0; i < bound; i++) {
1224        if (i < bound - 2)
1225          computation_1();
1226        else
1227          computation_2();
1228      }
1229
1230   In this loop, we will predict the branch inside the loop to be taken.  */
1231
1232 static void
1233 predict_iv_comparison (struct loop *loop, basic_block bb,
1234                        tree loop_bound_var,
1235                        tree loop_iv_base_var,
1236                        enum tree_code loop_bound_code,
1237                        int loop_bound_step)
1238 {
1239   gimple *stmt;
1240   tree compare_var, compare_base;
1241   enum tree_code compare_code;
1242   tree compare_step_var;
1243   edge then_edge;
1244   edge_iterator ei;
1245
1246   if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1247       || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1248       || predicted_by_p (bb, PRED_LOOP_EXIT))
1249     return;
1250
1251   stmt = last_stmt (bb);
1252   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1253     return;
1254   if (!is_comparison_with_loop_invariant_p (as_a <gcond *> (stmt),
1255                                             loop, &compare_var,
1256                                             &compare_code,
1257                                             &compare_step_var,
1258                                             &compare_base))
1259     return;
1260
1261   /* Find the taken edge.  */
1262   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1263     if (then_edge->flags & EDGE_TRUE_VALUE)
1264       break;
1265
1266   /* When comparing an IV to a loop invariant, NE is more likely to be
1267      taken while EQ is more likely to be not-taken.  */
1268   if (compare_code == NE_EXPR)
1269     {
1270       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1271       return;
1272     }
1273   else if (compare_code == EQ_EXPR)
1274     {
1275       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1276       return;
1277     }
1278
1279   if (!expr_coherent_p (loop_iv_base_var, compare_base))
1280     return;
1281
1282   /* If loop bound, base and compare bound are all constants, we can
1283      calculate the probability directly.  */
1284   if (tree_fits_shwi_p (loop_bound_var)
1285       && tree_fits_shwi_p (compare_var)
1286       && tree_fits_shwi_p (compare_base))
1287     {
1288       int probability;
1289       bool overflow, overall_overflow = false;
1290       widest_int compare_count, tem;
1291
1292       /* (loop_bound - base) / compare_step */
1293       tem = wi::sub (wi::to_widest (loop_bound_var),
1294                      wi::to_widest (compare_base), SIGNED, &overflow);
1295       overall_overflow |= overflow;
1296       widest_int loop_count = wi::div_trunc (tem,
1297                                              wi::to_widest (compare_step_var),
1298                                              SIGNED, &overflow);
1299       overall_overflow |= overflow;
1300
1301       if (!wi::neg_p (wi::to_widest (compare_step_var))
1302           ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1303         {
1304           /* (loop_bound - compare_bound) / compare_step */
1305           tem = wi::sub (wi::to_widest (loop_bound_var),
1306                          wi::to_widest (compare_var), SIGNED, &overflow);
1307           overall_overflow |= overflow;
1308           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1309                                          SIGNED, &overflow);
1310           overall_overflow |= overflow;
1311         }
1312       else
1313         {
1314           /* (compare_bound - base) / compare_step */
1315           tem = wi::sub (wi::to_widest (compare_var),
1316                          wi::to_widest (compare_base), SIGNED, &overflow);
1317           overall_overflow |= overflow;
1318           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1319                                          SIGNED, &overflow);
1320           overall_overflow |= overflow;
1321         }
1322       if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1323         ++compare_count;
1324       if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1325         ++loop_count;
1326       if (wi::neg_p (compare_count))
1327         compare_count = 0;
1328       if (wi::neg_p (loop_count))
1329         loop_count = 0;
1330       if (loop_count == 0)
1331         probability = 0;
1332       else if (wi::cmps (compare_count, loop_count) == 1)
1333         probability = REG_BR_PROB_BASE;
1334       else
1335         {
1336           tem = compare_count * REG_BR_PROB_BASE;
1337           tem = wi::udiv_trunc (tem, loop_count);
1338           probability = tem.to_uhwi ();
1339         }
1340
1341       if (!overall_overflow)
1342         predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1343
1344       return;
1345     }
1346
1347   if (expr_coherent_p (loop_bound_var, compare_var))
1348     {
1349       if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1350           && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1351         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1352       else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1353                && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1354         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1355       else if (loop_bound_code == NE_EXPR)
1356         {
1357           /* If the loop backedge condition is "(i != bound)", we do
1358              the comparison based on the step of IV:
1359              * step < 0 : backedge condition is like (i > bound)
1360              * step > 0 : backedge condition is like (i < bound)  */
1361           gcc_assert (loop_bound_step != 0);
1362           if (loop_bound_step > 0
1363               && (compare_code == LT_EXPR
1364                   || compare_code == LE_EXPR))
1365             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1366           else if (loop_bound_step < 0
1367                    && (compare_code == GT_EXPR
1368                        || compare_code == GE_EXPR))
1369             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1370           else
1371             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1372         }
1373       else
1374         /* The branch is predicted not-taken if loop_bound_code is
1375            opposite with compare_code.  */
1376         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1377     }
1378   else if (expr_coherent_p (loop_iv_base_var, compare_var))
1379     {
1380       /* For cases like:
1381            for (i = s; i < h; i++)
1382              if (i > s + 2) ....
1383          The branch should be predicted taken.  */
1384       if (loop_bound_step > 0
1385           && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1386         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1387       else if (loop_bound_step < 0
1388                && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1389         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1390       else
1391         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1392     }
1393 }
1394
1395 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1396    exits are resulted from short-circuit conditions that will generate an
1397    if_tmp. E.g.:
1398
1399    if (foo() || global > 10)
1400      break;
1401
1402    This will be translated into:
1403
1404    BB3:
1405      loop header...
1406    BB4:
1407      if foo() goto BB6 else goto BB5
1408    BB5:
1409      if global > 10 goto BB6 else goto BB7
1410    BB6:
1411      goto BB7
1412    BB7:
1413      iftmp = (PHI 0(BB5), 1(BB6))
1414      if iftmp == 1 goto BB8 else goto BB3
1415    BB8:
1416      outside of the loop...
1417
1418    The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1419    From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1420    exits. This function takes BB7->BB8 as input, and finds out the extra loop
1421    exits to predict them using PRED_LOOP_EXIT.  */
1422
1423 static void
1424 predict_extra_loop_exits (edge exit_edge)
1425 {
1426   unsigned i;
1427   bool check_value_one;
1428   gimple *lhs_def_stmt;
1429   gphi *phi_stmt;
1430   tree cmp_rhs, cmp_lhs;
1431   gimple *last;
1432   gcond *cmp_stmt;
1433
1434   last = last_stmt (exit_edge->src);
1435   if (!last)
1436     return;
1437   cmp_stmt = dyn_cast <gcond *> (last);
1438   if (!cmp_stmt)
1439     return;
1440
1441   cmp_rhs = gimple_cond_rhs (cmp_stmt);
1442   cmp_lhs = gimple_cond_lhs (cmp_stmt);
1443   if (!TREE_CONSTANT (cmp_rhs)
1444       || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1445     return;
1446   if (TREE_CODE (cmp_lhs) != SSA_NAME)
1447     return;
1448
1449   /* If check_value_one is true, only the phi_args with value '1' will lead
1450      to loop exit. Otherwise, only the phi_args with value '0' will lead to
1451      loop exit.  */
1452   check_value_one = (((integer_onep (cmp_rhs))
1453                     ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1454                     ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1455
1456   lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1457   if (!lhs_def_stmt)
1458     return;
1459
1460   phi_stmt = dyn_cast <gphi *> (lhs_def_stmt);
1461   if (!phi_stmt)
1462     return;
1463
1464   for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1465     {
1466       edge e1;
1467       edge_iterator ei;
1468       tree val = gimple_phi_arg_def (phi_stmt, i);
1469       edge e = gimple_phi_arg_edge (phi_stmt, i);
1470
1471       if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1472         continue;
1473       if ((check_value_one ^ integer_onep (val)) == 1)
1474         continue;
1475       if (EDGE_COUNT (e->src->succs) != 1)
1476         {
1477           predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1478           continue;
1479         }
1480
1481       FOR_EACH_EDGE (e1, ei, e->src->preds)
1482         predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1483     }
1484 }
1485
1486 /* Predict edge probabilities by exploiting loop structure.  */
1487
1488 static void
1489 predict_loops (void)
1490 {
1491   struct loop *loop;
1492
1493   /* Try to predict out blocks in a loop that are not part of a
1494      natural loop.  */
1495   FOR_EACH_LOOP (loop, 0)
1496     {
1497       basic_block bb, *bbs;
1498       unsigned j, n_exits;
1499       vec<edge> exits;
1500       struct tree_niter_desc niter_desc;
1501       edge ex;
1502       struct nb_iter_bound *nb_iter;
1503       enum tree_code loop_bound_code = ERROR_MARK;
1504       tree loop_bound_step = NULL;
1505       tree loop_bound_var = NULL;
1506       tree loop_iv_base = NULL;
1507       gcond *stmt = NULL;
1508
1509       exits = get_loop_exit_edges (loop);
1510       n_exits = exits.length ();
1511       if (!n_exits)
1512         {
1513           exits.release ();
1514           continue;
1515         }
1516
1517       FOR_EACH_VEC_ELT (exits, j, ex)
1518         {
1519           tree niter = NULL;
1520           HOST_WIDE_INT nitercst;
1521           int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1522           int probability;
1523           enum br_predictor predictor;
1524
1525           predict_extra_loop_exits (ex);
1526
1527           if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1528             niter = niter_desc.niter;
1529           if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1530             niter = loop_niter_by_eval (loop, ex);
1531
1532           if (TREE_CODE (niter) == INTEGER_CST)
1533             {
1534               if (tree_fits_uhwi_p (niter)
1535                   && max
1536                   && compare_tree_int (niter, max - 1) == -1)
1537                 nitercst = tree_to_uhwi (niter) + 1;
1538               else
1539                 nitercst = max;
1540               predictor = PRED_LOOP_ITERATIONS;
1541             }
1542           /* If we have just one exit and we can derive some information about
1543              the number of iterations of the loop from the statements inside
1544              the loop, use it to predict this exit.  */
1545           else if (n_exits == 1)
1546             {
1547               nitercst = estimated_stmt_executions_int (loop);
1548               if (nitercst < 0)
1549                 continue;
1550               if (nitercst > max)
1551                 nitercst = max;
1552
1553               predictor = PRED_LOOP_ITERATIONS_GUESSED;
1554             }
1555           else
1556             continue;
1557
1558           /* If the prediction for number of iterations is zero, do not
1559              predict the exit edges.  */
1560           if (nitercst == 0)
1561             continue;
1562
1563           probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1564           predict_edge (ex, predictor, probability);
1565         }
1566       exits.release ();
1567
1568       /* Find information about loop bound variables.  */
1569       for (nb_iter = loop->bounds; nb_iter;
1570            nb_iter = nb_iter->next)
1571         if (nb_iter->stmt
1572             && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1573           {
1574             stmt = as_a <gcond *> (nb_iter->stmt);
1575             break;
1576           }
1577       if (!stmt && last_stmt (loop->header)
1578           && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1579         stmt = as_a <gcond *> (last_stmt (loop->header));
1580       if (stmt)
1581         is_comparison_with_loop_invariant_p (stmt, loop,
1582                                              &loop_bound_var,
1583                                              &loop_bound_code,
1584                                              &loop_bound_step,
1585                                              &loop_iv_base);
1586
1587       bbs = get_loop_body (loop);
1588
1589       for (j = 0; j < loop->num_nodes; j++)
1590         {
1591           int header_found = 0;
1592           edge e;
1593           edge_iterator ei;
1594
1595           bb = bbs[j];
1596
1597           /* Bypass loop heuristics on continue statement.  These
1598              statements construct loops via "non-loop" constructs
1599              in the source language and are better to be handled
1600              separately.  */
1601           if (predicted_by_p (bb, PRED_CONTINUE))
1602             continue;
1603
1604           /* Loop branch heuristics - predict an edge back to a
1605              loop's head as taken.  */
1606           if (bb == loop->latch)
1607             {
1608               e = find_edge (loop->latch, loop->header);
1609               if (e)
1610                 {
1611                   header_found = 1;
1612                   predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1613                 }
1614             }
1615
1616           /* Loop exit heuristics - predict an edge exiting the loop if the
1617              conditional has no loop header successors as not taken.  */
1618           if (!header_found
1619               /* If we already used more reliable loop exit predictors, do not
1620                  bother with PRED_LOOP_EXIT.  */
1621               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1622               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1623             {
1624               /* For loop with many exits we don't want to predict all exits
1625                  with the pretty large probability, because if all exits are
1626                  considered in row, the loop would be predicted to iterate
1627                  almost never.  The code to divide probability by number of
1628                  exits is very rough.  It should compute the number of exits
1629                  taken in each patch through function (not the overall number
1630                  of exits that might be a lot higher for loops with wide switch
1631                  statements in them) and compute n-th square root.
1632
1633                  We limit the minimal probability by 2% to avoid
1634                  EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1635                  as this was causing regression in perl benchmark containing such
1636                  a wide loop.  */
1637
1638               int probability = ((REG_BR_PROB_BASE
1639                                   - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1640                                  / n_exits);
1641               if (probability < HITRATE (2))
1642                 probability = HITRATE (2);
1643               FOR_EACH_EDGE (e, ei, bb->succs)
1644                 if (e->dest->index < NUM_FIXED_BLOCKS
1645                     || !flow_bb_inside_loop_p (loop, e->dest))
1646                   predict_edge (e, PRED_LOOP_EXIT, probability);
1647             }
1648           if (loop_bound_var)
1649             predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1650                                    loop_bound_code,
1651                                    tree_to_shwi (loop_bound_step));
1652         }
1653
1654       /* Free basic blocks from get_loop_body.  */
1655       free (bbs);
1656     }
1657 }
1658
1659 /* Attempt to predict probabilities of BB outgoing edges using local
1660    properties.  */
1661 static void
1662 bb_estimate_probability_locally (basic_block bb)
1663 {
1664   rtx_insn *last_insn = BB_END (bb);
1665   rtx cond;
1666
1667   if (! can_predict_insn_p (last_insn))
1668     return;
1669   cond = get_condition (last_insn, NULL, false, false);
1670   if (! cond)
1671     return;
1672
1673   /* Try "pointer heuristic."
1674      A comparison ptr == 0 is predicted as false.
1675      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1676   if (COMPARISON_P (cond)
1677       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1678           || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1679     {
1680       if (GET_CODE (cond) == EQ)
1681         predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1682       else if (GET_CODE (cond) == NE)
1683         predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1684     }
1685   else
1686
1687   /* Try "opcode heuristic."
1688      EQ tests are usually false and NE tests are usually true. Also,
1689      most quantities are positive, so we can make the appropriate guesses
1690      about signed comparisons against zero.  */
1691     switch (GET_CODE (cond))
1692       {
1693       case CONST_INT:
1694         /* Unconditional branch.  */
1695         predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1696                           cond == const0_rtx ? NOT_TAKEN : TAKEN);
1697         break;
1698
1699       case EQ:
1700       case UNEQ:
1701         /* Floating point comparisons appears to behave in a very
1702            unpredictable way because of special role of = tests in
1703            FP code.  */
1704         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1705           ;
1706         /* Comparisons with 0 are often used for booleans and there is
1707            nothing useful to predict about them.  */
1708         else if (XEXP (cond, 1) == const0_rtx
1709                  || XEXP (cond, 0) == const0_rtx)
1710           ;
1711         else
1712           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1713         break;
1714
1715       case NE:
1716       case LTGT:
1717         /* Floating point comparisons appears to behave in a very
1718            unpredictable way because of special role of = tests in
1719            FP code.  */
1720         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1721           ;
1722         /* Comparisons with 0 are often used for booleans and there is
1723            nothing useful to predict about them.  */
1724         else if (XEXP (cond, 1) == const0_rtx
1725                  || XEXP (cond, 0) == const0_rtx)
1726           ;
1727         else
1728           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1729         break;
1730
1731       case ORDERED:
1732         predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1733         break;
1734
1735       case UNORDERED:
1736         predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1737         break;
1738
1739       case LE:
1740       case LT:
1741         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1742             || XEXP (cond, 1) == constm1_rtx)
1743           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1744         break;
1745
1746       case GE:
1747       case GT:
1748         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1749             || XEXP (cond, 1) == constm1_rtx)
1750           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1751         break;
1752
1753       default:
1754         break;
1755       }
1756 }
1757
1758 /* Set edge->probability for each successor edge of BB.  */
1759 void
1760 guess_outgoing_edge_probabilities (basic_block bb)
1761 {
1762   bb_estimate_probability_locally (bb);
1763   combine_predictions_for_insn (BB_END (bb), bb);
1764 }
1765 \f
1766 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1767
1768 /* Helper function for expr_expected_value.  */
1769
1770 static tree
1771 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1772                        tree op1, bitmap visited, enum br_predictor *predictor)
1773 {
1774   gimple *def;
1775
1776   if (predictor)
1777     *predictor = PRED_UNCONDITIONAL;
1778
1779   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1780     {
1781       if (TREE_CONSTANT (op0))
1782         return op0;
1783
1784       if (code != SSA_NAME)
1785         return NULL_TREE;
1786
1787       def = SSA_NAME_DEF_STMT (op0);
1788
1789       /* If we were already here, break the infinite cycle.  */
1790       if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1791         return NULL;
1792
1793       if (gimple_code (def) == GIMPLE_PHI)
1794         {
1795           /* All the arguments of the PHI node must have the same constant
1796              length.  */
1797           int i, n = gimple_phi_num_args (def);
1798           tree val = NULL, new_val;
1799
1800           for (i = 0; i < n; i++)
1801             {
1802               tree arg = PHI_ARG_DEF (def, i);
1803               enum br_predictor predictor2;
1804
1805               /* If this PHI has itself as an argument, we cannot
1806                  determine the string length of this argument.  However,
1807                  if we can find an expected constant value for the other
1808                  PHI args then we can still be sure that this is
1809                  likely a constant.  So be optimistic and just
1810                  continue with the next argument.  */
1811               if (arg == PHI_RESULT (def))
1812                 continue;
1813
1814               new_val = expr_expected_value (arg, visited, &predictor2);
1815
1816               /* It is difficult to combine value predictors.  Simply assume
1817                  that later predictor is weaker and take its prediction.  */
1818               if (predictor && *predictor < predictor2)
1819                 *predictor = predictor2;
1820               if (!new_val)
1821                 return NULL;
1822               if (!val)
1823                 val = new_val;
1824               else if (!operand_equal_p (val, new_val, false))
1825                 return NULL;
1826             }
1827           return val;
1828         }
1829       if (is_gimple_assign (def))
1830         {
1831           if (gimple_assign_lhs (def) != op0)
1832             return NULL;
1833
1834           return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1835                                         gimple_assign_rhs1 (def),
1836                                         gimple_assign_rhs_code (def),
1837                                         gimple_assign_rhs2 (def),
1838                                         visited, predictor);
1839         }
1840
1841       if (is_gimple_call (def))
1842         {
1843           tree decl = gimple_call_fndecl (def);
1844           if (!decl)
1845             {
1846               if (gimple_call_internal_p (def)
1847                   && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1848                 {
1849                   gcc_assert (gimple_call_num_args (def) == 3);
1850                   tree val = gimple_call_arg (def, 0);
1851                   if (TREE_CONSTANT (val))
1852                     return val;
1853                   if (predictor)
1854                     {
1855                       tree val2 = gimple_call_arg (def, 2);
1856                       gcc_assert (TREE_CODE (val2) == INTEGER_CST
1857                                   && tree_fits_uhwi_p (val2)
1858                                   && tree_to_uhwi (val2) < END_PREDICTORS);
1859                       *predictor = (enum br_predictor) tree_to_uhwi (val2);
1860                     }
1861                   return gimple_call_arg (def, 1);
1862                 }
1863               return NULL;
1864             }
1865           if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1866             switch (DECL_FUNCTION_CODE (decl))
1867               {
1868               case BUILT_IN_EXPECT:
1869                 {
1870                   tree val;
1871                   if (gimple_call_num_args (def) != 2)
1872                     return NULL;
1873                   val = gimple_call_arg (def, 0);
1874                   if (TREE_CONSTANT (val))
1875                     return val;
1876                   if (predictor)
1877                     *predictor = PRED_BUILTIN_EXPECT;
1878                   return gimple_call_arg (def, 1);
1879                 }
1880
1881               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1882               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1883               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1884               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1885               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1886               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1887               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1888               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1889               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1890               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1891               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1892               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1893               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1894                 /* Assume that any given atomic operation has low contention,
1895                    and thus the compare-and-swap operation succeeds.  */
1896                 if (predictor)
1897                   *predictor = PRED_COMPARE_AND_SWAP;
1898                 return boolean_true_node;
1899               default:
1900                 break;
1901             }
1902         }
1903
1904       return NULL;
1905     }
1906
1907   if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1908     {
1909       tree res;
1910       enum br_predictor predictor2;
1911       op0 = expr_expected_value (op0, visited, predictor);
1912       if (!op0)
1913         return NULL;
1914       op1 = expr_expected_value (op1, visited, &predictor2);
1915       if (predictor && *predictor < predictor2)
1916         *predictor = predictor2;
1917       if (!op1)
1918         return NULL;
1919       res = fold_build2 (code, type, op0, op1);
1920       if (TREE_CONSTANT (res))
1921         return res;
1922       return NULL;
1923     }
1924   if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1925     {
1926       tree res;
1927       op0 = expr_expected_value (op0, visited, predictor);
1928       if (!op0)
1929         return NULL;
1930       res = fold_build1 (code, type, op0);
1931       if (TREE_CONSTANT (res))
1932         return res;
1933       return NULL;
1934     }
1935   return NULL;
1936 }
1937
1938 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1939    The function is used by builtin_expect branch predictor so the evidence
1940    must come from this construct and additional possible constant folding.
1941
1942    We may want to implement more involved value guess (such as value range
1943    propagation based prediction), but such tricks shall go to new
1944    implementation.  */
1945
1946 static tree
1947 expr_expected_value (tree expr, bitmap visited,
1948                      enum br_predictor *predictor)
1949 {
1950   enum tree_code code;
1951   tree op0, op1;
1952
1953   if (TREE_CONSTANT (expr))
1954     {
1955       if (predictor)
1956         *predictor = PRED_UNCONDITIONAL;
1957       return expr;
1958     }
1959
1960   extract_ops_from_tree (expr, &code, &op0, &op1);
1961   return expr_expected_value_1 (TREE_TYPE (expr),
1962                                 op0, code, op1, visited, predictor);
1963 }
1964 \f
1965 /* Predict using opcode of the last statement in basic block.  */
1966 static void
1967 tree_predict_by_opcode (basic_block bb)
1968 {
1969   gimple *stmt = last_stmt (bb);
1970   edge then_edge;
1971   tree op0, op1;
1972   tree type;
1973   tree val;
1974   enum tree_code cmp;
1975   bitmap visited;
1976   edge_iterator ei;
1977   enum br_predictor predictor;
1978
1979   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1980     return;
1981   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1982     if (then_edge->flags & EDGE_TRUE_VALUE)
1983       break;
1984   op0 = gimple_cond_lhs (stmt);
1985   op1 = gimple_cond_rhs (stmt);
1986   cmp = gimple_cond_code (stmt);
1987   type = TREE_TYPE (op0);
1988   visited = BITMAP_ALLOC (NULL);
1989   val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
1990                                &predictor);
1991   BITMAP_FREE (visited);
1992   if (val && TREE_CODE (val) == INTEGER_CST)
1993     {
1994       if (predictor == PRED_BUILTIN_EXPECT)
1995         {
1996           int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
1997
1998           gcc_assert (percent >= 0 && percent <= 100);
1999           if (integer_zerop (val))
2000             percent = 100 - percent;
2001           predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
2002         }
2003       else
2004         predict_edge (then_edge, predictor,
2005                       integer_zerop (val) ? NOT_TAKEN : TAKEN);
2006     }
2007   /* Try "pointer heuristic."
2008      A comparison ptr == 0 is predicted as false.
2009      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
2010   if (POINTER_TYPE_P (type))
2011     {
2012       if (cmp == EQ_EXPR)
2013         predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2014       else if (cmp == NE_EXPR)
2015         predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2016     }
2017   else
2018
2019   /* Try "opcode heuristic."
2020      EQ tests are usually false and NE tests are usually true. Also,
2021      most quantities are positive, so we can make the appropriate guesses
2022      about signed comparisons against zero.  */
2023     switch (cmp)
2024       {
2025       case EQ_EXPR:
2026       case UNEQ_EXPR:
2027         /* Floating point comparisons appears to behave in a very
2028            unpredictable way because of special role of = tests in
2029            FP code.  */
2030         if (FLOAT_TYPE_P (type))
2031           ;
2032         /* Comparisons with 0 are often used for booleans and there is
2033            nothing useful to predict about them.  */
2034         else if (integer_zerop (op0) || integer_zerop (op1))
2035           ;
2036         else
2037           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2038         break;
2039
2040       case NE_EXPR:
2041       case LTGT_EXPR:
2042         /* Floating point comparisons appears to behave in a very
2043            unpredictable way because of special role of = tests in
2044            FP code.  */
2045         if (FLOAT_TYPE_P (type))
2046           ;
2047         /* Comparisons with 0 are often used for booleans and there is
2048            nothing useful to predict about them.  */
2049         else if (integer_zerop (op0)
2050                  || integer_zerop (op1))
2051           ;
2052         else
2053           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2054         break;
2055
2056       case ORDERED_EXPR:
2057         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2058         break;
2059
2060       case UNORDERED_EXPR:
2061         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2062         break;
2063
2064       case LE_EXPR:
2065       case LT_EXPR:
2066         if (integer_zerop (op1)
2067             || integer_onep (op1)
2068             || integer_all_onesp (op1)
2069             || real_zerop (op1)
2070             || real_onep (op1)
2071             || real_minus_onep (op1))
2072           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2073         break;
2074
2075       case GE_EXPR:
2076       case GT_EXPR:
2077         if (integer_zerop (op1)
2078             || integer_onep (op1)
2079             || integer_all_onesp (op1)
2080             || real_zerop (op1)
2081             || real_onep (op1)
2082             || real_minus_onep (op1))
2083           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2084         break;
2085
2086       default:
2087         break;
2088       }
2089 }
2090
2091 /* Try to guess whether the value of return means error code.  */
2092
2093 static enum br_predictor
2094 return_prediction (tree val, enum prediction *prediction)
2095 {
2096   /* VOID.  */
2097   if (!val)
2098     return PRED_NO_PREDICTION;
2099   /* Different heuristics for pointers and scalars.  */
2100   if (POINTER_TYPE_P (TREE_TYPE (val)))
2101     {
2102       /* NULL is usually not returned.  */
2103       if (integer_zerop (val))
2104         {
2105           *prediction = NOT_TAKEN;
2106           return PRED_NULL_RETURN;
2107         }
2108     }
2109   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2110     {
2111       /* Negative return values are often used to indicate
2112          errors.  */
2113       if (TREE_CODE (val) == INTEGER_CST
2114           && tree_int_cst_sgn (val) < 0)
2115         {
2116           *prediction = NOT_TAKEN;
2117           return PRED_NEGATIVE_RETURN;
2118         }
2119       /* Constant return values seems to be commonly taken.
2120          Zero/one often represent booleans so exclude them from the
2121          heuristics.  */
2122       if (TREE_CONSTANT (val)
2123           && (!integer_zerop (val) && !integer_onep (val)))
2124         {
2125           *prediction = TAKEN;
2126           return PRED_CONST_RETURN;
2127         }
2128     }
2129   return PRED_NO_PREDICTION;
2130 }
2131
2132 /* Find the basic block with return expression and look up for possible
2133    return value trying to apply RETURN_PREDICTION heuristics.  */
2134 static void
2135 apply_return_prediction (void)
2136 {
2137   greturn *return_stmt = NULL;
2138   tree return_val;
2139   edge e;
2140   gphi *phi;
2141   int phi_num_args, i;
2142   enum br_predictor pred;
2143   enum prediction direction;
2144   edge_iterator ei;
2145
2146   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2147     {
2148       gimple *last = last_stmt (e->src);
2149       if (last
2150           && gimple_code (last) == GIMPLE_RETURN)
2151         {
2152           return_stmt = as_a <greturn *> (last);
2153           break;
2154         }
2155     }
2156   if (!e)
2157     return;
2158   return_val = gimple_return_retval (return_stmt);
2159   if (!return_val)
2160     return;
2161   if (TREE_CODE (return_val) != SSA_NAME
2162       || !SSA_NAME_DEF_STMT (return_val)
2163       || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2164     return;
2165   phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val));
2166   phi_num_args = gimple_phi_num_args (phi);
2167   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2168
2169   /* Avoid the degenerate case where all return values form the function
2170      belongs to same category (ie they are all positive constants)
2171      so we can hardly say something about them.  */
2172   for (i = 1; i < phi_num_args; i++)
2173     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2174       break;
2175   if (i != phi_num_args)
2176     for (i = 0; i < phi_num_args; i++)
2177       {
2178         pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2179         if (pred != PRED_NO_PREDICTION)
2180           predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2181                                          direction);
2182       }
2183 }
2184
2185 /* Look for basic block that contains unlikely to happen events
2186    (such as noreturn calls) and mark all paths leading to execution
2187    of this basic blocks as unlikely.  */
2188
2189 static void
2190 tree_bb_level_predictions (void)
2191 {
2192   basic_block bb;
2193   bool has_return_edges = false;
2194   edge e;
2195   edge_iterator ei;
2196
2197   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2198     if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2199       {
2200         has_return_edges = true;
2201         break;
2202       }
2203
2204   apply_return_prediction ();
2205
2206   FOR_EACH_BB_FN (bb, cfun)
2207     {
2208       gimple_stmt_iterator gsi;
2209
2210       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2211         {
2212           gimple *stmt = gsi_stmt (gsi);
2213           tree decl;
2214
2215           if (is_gimple_call (stmt))
2216             {
2217               if ((gimple_call_flags (stmt) & ECF_NORETURN)
2218                   && has_return_edges)
2219                 predict_paths_leading_to (bb, PRED_NORETURN,
2220                                           NOT_TAKEN);
2221               decl = gimple_call_fndecl (stmt);
2222               if (decl
2223                   && lookup_attribute ("cold",
2224                                        DECL_ATTRIBUTES (decl)))
2225                 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2226                                           NOT_TAKEN);
2227             }
2228           else if (gimple_code (stmt) == GIMPLE_PREDICT)
2229             {
2230               predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2231                                         gimple_predict_outcome (stmt));
2232               /* Keep GIMPLE_PREDICT around so early inlining will propagate
2233                  hints to callers.  */
2234             }
2235         }
2236     }
2237 }
2238
2239 /* Callback for hash_map::traverse, asserts that the pointer map is
2240    empty.  */
2241
2242 bool
2243 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2244                  void *)
2245 {
2246   gcc_assert (!value);
2247   return false;
2248 }
2249
2250 /* Predict branch probabilities and estimate profile for basic block BB.  */
2251
2252 static void
2253 tree_estimate_probability_bb (basic_block bb)
2254 {
2255   edge e;
2256   edge_iterator ei;
2257   gimple *last;
2258
2259   FOR_EACH_EDGE (e, ei, bb->succs)
2260     {
2261       /* Predict edges to user labels with attributes.  */
2262       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2263         {
2264           gimple_stmt_iterator gi;
2265           for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2266             {
2267               glabel *label_stmt = dyn_cast <glabel *> (gsi_stmt (gi));
2268               tree decl;
2269
2270               if (!label_stmt)
2271                 break;
2272               decl = gimple_label_label (label_stmt);
2273               if (DECL_ARTIFICIAL (decl))
2274                 continue;
2275
2276               /* Finally, we have a user-defined label.  */
2277               if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2278                 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2279               else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2280                 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2281             }
2282         }
2283
2284       /* Predict early returns to be probable, as we've already taken
2285          care for error returns and other cases are often used for
2286          fast paths through function.
2287
2288          Since we've already removed the return statements, we are
2289          looking for CFG like:
2290
2291          if (conditional)
2292          {
2293          ..
2294          goto return_block
2295          }
2296          some other blocks
2297          return_block:
2298          return_stmt.  */
2299       if (e->dest != bb->next_bb
2300           && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2301           && single_succ_p (e->dest)
2302           && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2303           && (last = last_stmt (e->dest)) != NULL
2304           && gimple_code (last) == GIMPLE_RETURN)
2305         {
2306           edge e1;
2307           edge_iterator ei1;
2308
2309           if (single_succ_p (bb))
2310             {
2311               FOR_EACH_EDGE (e1, ei1, bb->preds)
2312                 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2313                     && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2314                     && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2315                   predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2316             }
2317           else
2318             if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2319                 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2320                 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2321               predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2322         }
2323
2324       /* Look for block we are guarding (ie we dominate it,
2325          but it doesn't postdominate us).  */
2326       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2327           && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2328           && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2329         {
2330           gimple_stmt_iterator bi;
2331
2332           /* The call heuristic claims that a guarded function call
2333              is improbable.  This is because such calls are often used
2334              to signal exceptional situations such as printing error
2335              messages.  */
2336           for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2337                gsi_next (&bi))
2338             {
2339               gimple *stmt = gsi_stmt (bi);
2340               if (is_gimple_call (stmt)
2341                   /* Constant and pure calls are hardly used to signalize
2342                      something exceptional.  */
2343                   && gimple_has_side_effects (stmt))
2344                 {
2345                   predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2346                   break;
2347                 }
2348             }
2349         }
2350     }
2351   tree_predict_by_opcode (bb);
2352 }
2353
2354 /* Predict branch probabilities and estimate profile of the tree CFG.
2355    This function can be called from the loop optimizers to recompute
2356    the profile information.
2357    If DRY_RUN is set, do not modify CFG and only produce dump files.  */
2358
2359 void
2360 tree_estimate_probability (bool dry_run)
2361 {
2362   basic_block bb;
2363
2364   add_noreturn_fake_exit_edges ();
2365   connect_infinite_loops_to_exit ();
2366   /* We use loop_niter_by_eval, which requires that the loops have
2367      preheaders.  */
2368   create_preheaders (CP_SIMPLE_PREHEADERS);
2369   calculate_dominance_info (CDI_POST_DOMINATORS);
2370
2371   bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2372   tree_bb_level_predictions ();
2373   record_loop_exits ();
2374
2375   if (number_of_loops (cfun) > 1)
2376     predict_loops ();
2377
2378   FOR_EACH_BB_FN (bb, cfun)
2379     tree_estimate_probability_bb (bb);
2380
2381   FOR_EACH_BB_FN (bb, cfun)
2382     combine_predictions_for_bb (bb, dry_run);
2383
2384   if (flag_checking)
2385     bb_predictions->traverse<void *, assert_is_empty> (NULL);
2386
2387   delete bb_predictions;
2388   bb_predictions = NULL;
2389
2390   if (!dry_run)
2391     estimate_bb_frequencies (false);
2392   free_dominance_info (CDI_POST_DOMINATORS);
2393   remove_fake_exit_edges ();
2394 }
2395 \f
2396 /* Predict edges to successors of CUR whose sources are not postdominated by
2397    BB by PRED and recurse to all postdominators.  */
2398
2399 static void
2400 predict_paths_for_bb (basic_block cur, basic_block bb,
2401                       enum br_predictor pred,
2402                       enum prediction taken,
2403                       bitmap visited)
2404 {
2405   edge e;
2406   edge_iterator ei;
2407   basic_block son;
2408
2409   /* We are looking for all edges forming edge cut induced by
2410      set of all blocks postdominated by BB.  */
2411   FOR_EACH_EDGE (e, ei, cur->preds)
2412     if (e->src->index >= NUM_FIXED_BLOCKS
2413         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2414     {
2415       edge e2;
2416       edge_iterator ei2;
2417       bool found = false;
2418
2419       /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2420       if (e->flags & (EDGE_EH | EDGE_FAKE))
2421         continue;
2422       gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2423
2424       /* See if there is an edge from e->src that is not abnormal
2425          and does not lead to BB.  */
2426       FOR_EACH_EDGE (e2, ei2, e->src->succs)
2427         if (e2 != e
2428             && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2429             && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2430           {
2431             found = true;
2432             break;
2433           }
2434
2435       /* If there is non-abnormal path leaving e->src, predict edge
2436          using predictor.  Otherwise we need to look for paths
2437          leading to e->src.
2438
2439          The second may lead to infinite loop in the case we are predicitng
2440          regions that are only reachable by abnormal edges.  We simply
2441          prevent visiting given BB twice.  */
2442       if (found)
2443         {
2444           if (!edge_predicted_by_p (e, pred, taken))
2445             predict_edge_def (e, pred, taken);
2446         }
2447       else if (bitmap_set_bit (visited, e->src->index))
2448         predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2449     }
2450   for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2451        son;
2452        son = next_dom_son (CDI_POST_DOMINATORS, son))
2453     predict_paths_for_bb (son, bb, pred, taken, visited);
2454 }
2455
2456 /* Sets branch probabilities according to PREDiction and
2457    FLAGS.  */
2458
2459 static void
2460 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2461                           enum prediction taken)
2462 {
2463   bitmap visited = BITMAP_ALLOC (NULL);
2464   predict_paths_for_bb (bb, bb, pred, taken, visited);
2465   BITMAP_FREE (visited);
2466 }
2467
2468 /* Like predict_paths_leading_to but take edge instead of basic block.  */
2469
2470 static void
2471 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2472                                enum prediction taken)
2473 {
2474   bool has_nonloop_edge = false;
2475   edge_iterator ei;
2476   edge e2;
2477
2478   basic_block bb = e->src;
2479   FOR_EACH_EDGE (e2, ei, bb->succs)
2480     if (e2->dest != e->src && e2->dest != e->dest
2481         && !(e->flags & (EDGE_EH | EDGE_FAKE))
2482         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2483       {
2484         has_nonloop_edge = true;
2485         break;
2486       }
2487   if (!has_nonloop_edge)
2488     {
2489       bitmap visited = BITMAP_ALLOC (NULL);
2490       predict_paths_for_bb (bb, bb, pred, taken, visited);
2491       BITMAP_FREE (visited);
2492     }
2493   else
2494     predict_edge_def (e, pred, taken);
2495 }
2496 \f
2497 /* This is used to carry information about basic blocks.  It is
2498    attached to the AUX field of the standard CFG block.  */
2499
2500 struct block_info
2501 {
2502   /* Estimated frequency of execution of basic_block.  */
2503   sreal frequency;
2504
2505   /* To keep queue of basic blocks to process.  */
2506   basic_block next;
2507
2508   /* Number of predecessors we need to visit first.  */
2509   int npredecessors;
2510 };
2511
2512 /* Similar information for edges.  */
2513 struct edge_prob_info
2514 {
2515   /* In case edge is a loopback edge, the probability edge will be reached
2516      in case header is.  Estimated number of iterations of the loop can be
2517      then computed as 1 / (1 - back_edge_prob).  */
2518   sreal back_edge_prob;
2519   /* True if the edge is a loopback edge in the natural loop.  */
2520   unsigned int back_edge:1;
2521 };
2522
2523 #define BLOCK_INFO(B)   ((block_info *) (B)->aux)
2524 #undef EDGE_INFO
2525 #define EDGE_INFO(E)    ((edge_prob_info *) (E)->aux)
2526
2527 /* Helper function for estimate_bb_frequencies.
2528    Propagate the frequencies in blocks marked in
2529    TOVISIT, starting in HEAD.  */
2530
2531 static void
2532 propagate_freq (basic_block head, bitmap tovisit)
2533 {
2534   basic_block bb;
2535   basic_block last;
2536   unsigned i;
2537   edge e;
2538   basic_block nextbb;
2539   bitmap_iterator bi;
2540
2541   /* For each basic block we need to visit count number of his predecessors
2542      we need to visit first.  */
2543   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2544     {
2545       edge_iterator ei;
2546       int count = 0;
2547
2548       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2549
2550       FOR_EACH_EDGE (e, ei, bb->preds)
2551         {
2552           bool visit = bitmap_bit_p (tovisit, e->src->index);
2553
2554           if (visit && !(e->flags & EDGE_DFS_BACK))
2555             count++;
2556           else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2557             fprintf (dump_file,
2558                      "Irreducible region hit, ignoring edge to %i->%i\n",
2559                      e->src->index, bb->index);
2560         }
2561       BLOCK_INFO (bb)->npredecessors = count;
2562       /* When function never returns, we will never process exit block.  */
2563       if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2564         bb->count = bb->frequency = 0;
2565     }
2566
2567   BLOCK_INFO (head)->frequency = 1;
2568   last = head;
2569   for (bb = head; bb; bb = nextbb)
2570     {
2571       edge_iterator ei;
2572       sreal cyclic_probability = 0;
2573       sreal frequency = 0;
2574
2575       nextbb = BLOCK_INFO (bb)->next;
2576       BLOCK_INFO (bb)->next = NULL;
2577
2578       /* Compute frequency of basic block.  */
2579       if (bb != head)
2580         {
2581           if (flag_checking)
2582             FOR_EACH_EDGE (e, ei, bb->preds)
2583               gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2584                           || (e->flags & EDGE_DFS_BACK));
2585
2586           FOR_EACH_EDGE (e, ei, bb->preds)
2587             if (EDGE_INFO (e)->back_edge)
2588               {
2589                 cyclic_probability += EDGE_INFO (e)->back_edge_prob;
2590               }
2591             else if (!(e->flags & EDGE_DFS_BACK))
2592               {
2593                 /*  frequency += (e->probability
2594                                   * BLOCK_INFO (e->src)->frequency /
2595                                   REG_BR_PROB_BASE);  */
2596
2597                 sreal tmp = e->probability;
2598                 tmp *= BLOCK_INFO (e->src)->frequency;
2599                 tmp *= real_inv_br_prob_base;
2600                 frequency += tmp;
2601               }
2602
2603           if (cyclic_probability == 0)
2604             {
2605               BLOCK_INFO (bb)->frequency = frequency;
2606             }
2607           else
2608             {
2609               if (cyclic_probability > real_almost_one)
2610                 cyclic_probability = real_almost_one;
2611
2612               /* BLOCK_INFO (bb)->frequency = frequency
2613                                               / (1 - cyclic_probability) */
2614
2615               cyclic_probability = sreal (1) - cyclic_probability;
2616               BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
2617             }
2618         }
2619
2620       bitmap_clear_bit (tovisit, bb->index);
2621
2622       e = find_edge (bb, head);
2623       if (e)
2624         {
2625           /* EDGE_INFO (e)->back_edge_prob
2626              = ((e->probability * BLOCK_INFO (bb)->frequency)
2627              / REG_BR_PROB_BASE); */
2628
2629           sreal tmp = e->probability;
2630           tmp *= BLOCK_INFO (bb)->frequency;
2631           EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
2632         }
2633
2634       /* Propagate to successor blocks.  */
2635       FOR_EACH_EDGE (e, ei, bb->succs)
2636         if (!(e->flags & EDGE_DFS_BACK)
2637             && BLOCK_INFO (e->dest)->npredecessors)
2638           {
2639             BLOCK_INFO (e->dest)->npredecessors--;
2640             if (!BLOCK_INFO (e->dest)->npredecessors)
2641               {
2642                 if (!nextbb)
2643                   nextbb = e->dest;
2644                 else
2645                   BLOCK_INFO (last)->next = e->dest;
2646
2647                 last = e->dest;
2648               }
2649           }
2650     }
2651 }
2652
2653 /* Estimate frequencies in loops at same nest level.  */
2654
2655 static void
2656 estimate_loops_at_level (struct loop *first_loop)
2657 {
2658   struct loop *loop;
2659
2660   for (loop = first_loop; loop; loop = loop->next)
2661     {
2662       edge e;
2663       basic_block *bbs;
2664       unsigned i;
2665       bitmap tovisit = BITMAP_ALLOC (NULL);
2666
2667       estimate_loops_at_level (loop->inner);
2668
2669       /* Find current loop back edge and mark it.  */
2670       e = loop_latch_edge (loop);
2671       EDGE_INFO (e)->back_edge = 1;
2672
2673       bbs = get_loop_body (loop);
2674       for (i = 0; i < loop->num_nodes; i++)
2675         bitmap_set_bit (tovisit, bbs[i]->index);
2676       free (bbs);
2677       propagate_freq (loop->header, tovisit);
2678       BITMAP_FREE (tovisit);
2679     }
2680 }
2681
2682 /* Propagates frequencies through structure of loops.  */
2683
2684 static void
2685 estimate_loops (void)
2686 {
2687   bitmap tovisit = BITMAP_ALLOC (NULL);
2688   basic_block bb;
2689
2690   /* Start by estimating the frequencies in the loops.  */
2691   if (number_of_loops (cfun) > 1)
2692     estimate_loops_at_level (current_loops->tree_root->inner);
2693
2694   /* Now propagate the frequencies through all the blocks.  */
2695   FOR_ALL_BB_FN (bb, cfun)
2696     {
2697       bitmap_set_bit (tovisit, bb->index);
2698     }
2699   propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2700   BITMAP_FREE (tovisit);
2701 }
2702
2703 /* Drop the profile for NODE to guessed, and update its frequency based on
2704    whether it is expected to be hot given the CALL_COUNT.  */
2705
2706 static void
2707 drop_profile (struct cgraph_node *node, gcov_type call_count)
2708 {
2709   struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2710   /* In the case where this was called by another function with a
2711      dropped profile, call_count will be 0. Since there are no
2712      non-zero call counts to this function, we don't know for sure
2713      whether it is hot, and therefore it will be marked normal below.  */
2714   bool hot = maybe_hot_count_p (NULL, call_count);
2715
2716   if (dump_file)
2717     fprintf (dump_file,
2718              "Dropping 0 profile for %s/%i. %s based on calls.\n",
2719              node->name (), node->order,
2720              hot ? "Function is hot" : "Function is normal");
2721   /* We only expect to miss profiles for functions that are reached
2722      via non-zero call edges in cases where the function may have
2723      been linked from another module or library (COMDATs and extern
2724      templates). See the comments below for handle_missing_profiles.
2725      Also, only warn in cases where the missing counts exceed the
2726      number of training runs. In certain cases with an execv followed
2727      by a no-return call the profile for the no-return call is not
2728      dumped and there can be a mismatch.  */
2729   if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2730       && call_count > profile_info->runs)
2731     {
2732       if (flag_profile_correction)
2733         {
2734           if (dump_file)
2735             fprintf (dump_file,
2736                      "Missing counts for called function %s/%i\n",
2737                      node->name (), node->order);
2738         }
2739       else
2740         warning (0, "Missing counts for called function %s/%i",
2741                  node->name (), node->order);
2742     }
2743
2744   profile_status_for_fn (fn)
2745       = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2746   node->frequency
2747       = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2748 }
2749
2750 /* In the case of COMDAT routines, multiple object files will contain the same
2751    function and the linker will select one for the binary. In that case
2752    all the other copies from the profile instrument binary will be missing
2753    profile counts. Look for cases where this happened, due to non-zero
2754    call counts going to 0-count functions, and drop the profile to guessed
2755    so that we can use the estimated probabilities and avoid optimizing only
2756    for size.
2757
2758    The other case where the profile may be missing is when the routine
2759    is not going to be emitted to the object file, e.g. for "extern template"
2760    class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2761    all other cases of non-zero calls to 0-count functions.  */
2762
2763 void
2764 handle_missing_profiles (void)
2765 {
2766   struct cgraph_node *node;
2767   int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2768   vec<struct cgraph_node *> worklist;
2769   worklist.create (64);
2770
2771   /* See if 0 count function has non-0 count callers.  In this case we
2772      lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2773   FOR_EACH_DEFINED_FUNCTION (node)
2774     {
2775       struct cgraph_edge *e;
2776       gcov_type call_count = 0;
2777       gcov_type max_tp_first_run = 0;
2778       struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2779
2780       if (node->count)
2781         continue;
2782       for (e = node->callers; e; e = e->next_caller)
2783       {
2784         call_count += e->count;
2785
2786         if (e->caller->tp_first_run > max_tp_first_run)
2787           max_tp_first_run = e->caller->tp_first_run;
2788       }
2789
2790       /* If time profile is missing, let assign the maximum that comes from
2791          caller functions.  */
2792       if (!node->tp_first_run && max_tp_first_run)
2793         node->tp_first_run = max_tp_first_run + 1;
2794
2795       if (call_count
2796           && fn && fn->cfg
2797           && (call_count * unlikely_count_fraction >= profile_info->runs))
2798         {
2799           drop_profile (node, call_count);
2800           worklist.safe_push (node);
2801         }
2802     }
2803
2804   /* Propagate the profile dropping to other 0-count COMDATs that are
2805      potentially called by COMDATs we already dropped the profile on.  */
2806   while (worklist.length () > 0)
2807     {
2808       struct cgraph_edge *e;
2809
2810       node = worklist.pop ();
2811       for (e = node->callees; e; e = e->next_caller)
2812         {
2813           struct cgraph_node *callee = e->callee;
2814           struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2815
2816           if (callee->count > 0)
2817             continue;
2818           if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2819               && profile_status_for_fn (fn) == PROFILE_READ)
2820             {
2821               drop_profile (node, 0);
2822               worklist.safe_push (callee);
2823             }
2824         }
2825     }
2826   worklist.release ();
2827 }
2828
2829 /* Convert counts measured by profile driven feedback to frequencies.
2830    Return nonzero iff there was any nonzero execution count.  */
2831
2832 int
2833 counts_to_freqs (void)
2834 {
2835   gcov_type count_max, true_count_max = 0;
2836   basic_block bb;
2837
2838   /* Don't overwrite the estimated frequencies when the profile for
2839      the function is missing.  We may drop this function PROFILE_GUESSED
2840      later in drop_profile ().  */
2841   if (!flag_auto_profile && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2842     return 0;
2843
2844   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2845     true_count_max = MAX (bb->count, true_count_max);
2846
2847   count_max = MAX (true_count_max, 1);
2848   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2849     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2850
2851   return true_count_max;
2852 }
2853
2854 /* Return true if function is likely to be expensive, so there is no point to
2855    optimize performance of prologue, epilogue or do inlining at the expense
2856    of code size growth.  THRESHOLD is the limit of number of instructions
2857    function can execute at average to be still considered not expensive.  */
2858
2859 bool
2860 expensive_function_p (int threshold)
2861 {
2862   unsigned int sum = 0;
2863   basic_block bb;
2864   unsigned int limit;
2865
2866   /* We can not compute accurately for large thresholds due to scaled
2867      frequencies.  */
2868   gcc_assert (threshold <= BB_FREQ_MAX);
2869
2870   /* Frequencies are out of range.  This either means that function contains
2871      internal loop executing more than BB_FREQ_MAX times or profile feedback
2872      is available and function has not been executed at all.  */
2873   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2874     return true;
2875
2876   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2877   limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2878   FOR_EACH_BB_FN (bb, cfun)
2879     {
2880       rtx_insn *insn;
2881
2882       FOR_BB_INSNS (bb, insn)
2883         if (active_insn_p (insn))
2884           {
2885             sum += bb->frequency;
2886             if (sum > limit)
2887               return true;
2888         }
2889     }
2890
2891   return false;
2892 }
2893
2894 /* Estimate and propagate basic block frequencies using the given branch
2895    probabilities.  If FORCE is true, the frequencies are used to estimate
2896    the counts even when there are already non-zero profile counts.  */
2897
2898 void
2899 estimate_bb_frequencies (bool force)
2900 {
2901   basic_block bb;
2902   sreal freq_max;
2903
2904   if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
2905     {
2906       static int real_values_initialized = 0;
2907
2908       if (!real_values_initialized)
2909         {
2910           real_values_initialized = 1;
2911           real_br_prob_base = REG_BR_PROB_BASE;
2912           real_bb_freq_max = BB_FREQ_MAX;
2913           real_one_half = sreal (1, -1);
2914           real_inv_br_prob_base = sreal (1) / real_br_prob_base;
2915           real_almost_one = sreal (1) - real_inv_br_prob_base;
2916         }
2917
2918       mark_dfs_back_edges ();
2919
2920       single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2921          REG_BR_PROB_BASE;
2922
2923       /* Set up block info for each basic block.  */
2924       alloc_aux_for_blocks (sizeof (block_info));
2925       alloc_aux_for_edges (sizeof (edge_prob_info));
2926       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2927         {
2928           edge e;
2929           edge_iterator ei;
2930
2931           FOR_EACH_EDGE (e, ei, bb->succs)
2932             {
2933               EDGE_INFO (e)->back_edge_prob = e->probability;
2934               EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
2935             }
2936         }
2937
2938       /* First compute frequencies locally for each loop from innermost
2939          to outermost to examine frequencies for back edges.  */
2940       estimate_loops ();
2941
2942       freq_max = 0;
2943       FOR_EACH_BB_FN (bb, cfun)
2944         if (freq_max < BLOCK_INFO (bb)->frequency)
2945           freq_max = BLOCK_INFO (bb)->frequency;
2946
2947       freq_max = real_bb_freq_max / freq_max;
2948       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2949         {
2950           sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
2951           bb->frequency = tmp.to_int ();
2952         }
2953
2954       free_aux_for_blocks ();
2955       free_aux_for_edges ();
2956     }
2957   compute_function_frequency ();
2958 }
2959
2960 /* Decide whether function is hot, cold or unlikely executed.  */
2961 void
2962 compute_function_frequency (void)
2963 {
2964   basic_block bb;
2965   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2966
2967   if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2968       || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2969     node->only_called_at_startup = true;
2970   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2971     node->only_called_at_exit = true;
2972
2973   if (profile_status_for_fn (cfun) != PROFILE_READ)
2974     {
2975       int flags = flags_from_decl_or_type (current_function_decl);
2976       if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2977           != NULL)
2978         node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2979       else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2980                != NULL)
2981         node->frequency = NODE_FREQUENCY_HOT;
2982       else if (flags & ECF_NORETURN)
2983         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2984       else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
2985         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2986       else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2987                || DECL_STATIC_DESTRUCTOR (current_function_decl))
2988         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2989       return;
2990     }
2991
2992   /* Only first time try to drop function into unlikely executed.
2993      After inlining the roundoff errors may confuse us.
2994      Ipa-profile pass will drop functions only called from unlikely
2995      functions to unlikely and that is most of what we care about.  */
2996   if (!cfun->after_inlining)
2997     node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2998   FOR_EACH_BB_FN (bb, cfun)
2999     {
3000       if (maybe_hot_bb_p (cfun, bb))
3001         {
3002           node->frequency = NODE_FREQUENCY_HOT;
3003           return;
3004         }
3005       if (!probably_never_executed_bb_p (cfun, bb))
3006         node->frequency = NODE_FREQUENCY_NORMAL;
3007     }
3008 }
3009
3010 /* Build PREDICT_EXPR.  */
3011 tree
3012 build_predict_expr (enum br_predictor predictor, enum prediction taken)
3013 {
3014   tree t = build1 (PREDICT_EXPR, void_type_node,
3015                    build_int_cst (integer_type_node, predictor));
3016   SET_PREDICT_EXPR_OUTCOME (t, taken);
3017   return t;
3018 }
3019
3020 const char *
3021 predictor_name (enum br_predictor predictor)
3022 {
3023   return predictor_info[predictor].name;
3024 }
3025
3026 /* Predict branch probabilities and estimate profile of the tree CFG. */
3027
3028 namespace {
3029
3030 const pass_data pass_data_profile =
3031 {
3032   GIMPLE_PASS, /* type */
3033   "profile_estimate", /* name */
3034   OPTGROUP_NONE, /* optinfo_flags */
3035   TV_BRANCH_PROB, /* tv_id */
3036   PROP_cfg, /* properties_required */
3037   0, /* properties_provided */
3038   0, /* properties_destroyed */
3039   0, /* todo_flags_start */
3040   0, /* todo_flags_finish */
3041 };
3042
3043 class pass_profile : public gimple_opt_pass
3044 {
3045 public:
3046   pass_profile (gcc::context *ctxt)
3047     : gimple_opt_pass (pass_data_profile, ctxt)
3048   {}
3049
3050   /* opt_pass methods: */
3051   virtual bool gate (function *) { return flag_guess_branch_prob; }
3052   virtual unsigned int execute (function *);
3053
3054 }; // class pass_profile
3055
3056 unsigned int
3057 pass_profile::execute (function *fun)
3058 {
3059   unsigned nb_loops;
3060
3061   if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
3062     return 0;
3063
3064   loop_optimizer_init (LOOPS_NORMAL);
3065   if (dump_file && (dump_flags & TDF_DETAILS))
3066     flow_loops_dump (dump_file, NULL, 0);
3067
3068   mark_irreducible_loops ();
3069
3070   nb_loops = number_of_loops (fun);
3071   if (nb_loops > 1)
3072     scev_initialize ();
3073
3074   tree_estimate_probability (false);
3075
3076   if (nb_loops > 1)
3077     scev_finalize ();
3078
3079   loop_optimizer_finalize ();
3080   if (dump_file && (dump_flags & TDF_DETAILS))
3081     gimple_dump_cfg (dump_file, dump_flags);
3082  if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3083     profile_status_for_fn (fun) = PROFILE_GUESSED;
3084   return 0;
3085 }
3086
3087 } // anon namespace
3088
3089 gimple_opt_pass *
3090 make_pass_profile (gcc::context *ctxt)
3091 {
3092   return new pass_profile (ctxt);
3093 }
3094
3095 namespace {
3096
3097 const pass_data pass_data_strip_predict_hints =
3098 {
3099   GIMPLE_PASS, /* type */
3100   "*strip_predict_hints", /* name */
3101   OPTGROUP_NONE, /* optinfo_flags */
3102   TV_BRANCH_PROB, /* tv_id */
3103   PROP_cfg, /* properties_required */
3104   0, /* properties_provided */
3105   0, /* properties_destroyed */
3106   0, /* todo_flags_start */
3107   0, /* todo_flags_finish */
3108 };
3109
3110 class pass_strip_predict_hints : public gimple_opt_pass
3111 {
3112 public:
3113   pass_strip_predict_hints (gcc::context *ctxt)
3114     : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3115   {}
3116
3117   /* opt_pass methods: */
3118   opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3119   virtual unsigned int execute (function *);
3120
3121 }; // class pass_strip_predict_hints
3122
3123 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3124    we no longer need.  */
3125 unsigned int
3126 pass_strip_predict_hints::execute (function *fun)
3127 {
3128   basic_block bb;
3129   gimple *ass_stmt;
3130   tree var;
3131
3132   FOR_EACH_BB_FN (bb, fun)
3133     {
3134       gimple_stmt_iterator bi;
3135       for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3136         {
3137           gimple *stmt = gsi_stmt (bi);
3138
3139           if (gimple_code (stmt) == GIMPLE_PREDICT)
3140             {
3141               gsi_remove (&bi, true);
3142               continue;
3143             }
3144           else if (is_gimple_call (stmt))
3145             {
3146               tree fndecl = gimple_call_fndecl (stmt);
3147
3148               if ((fndecl
3149                    && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3150                    && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3151                    && gimple_call_num_args (stmt) == 2)
3152                   || (gimple_call_internal_p (stmt)
3153                       && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3154                 {
3155                   var = gimple_call_lhs (stmt);
3156                   if (var)
3157                     {
3158                       ass_stmt
3159                         = gimple_build_assign (var, gimple_call_arg (stmt, 0));
3160                       gsi_replace (&bi, ass_stmt, true);
3161                     }
3162                   else
3163                     {
3164                       gsi_remove (&bi, true);
3165                       continue;
3166                     }
3167                 }
3168             }
3169           gsi_next (&bi);
3170         }
3171     }
3172   return 0;
3173 }
3174
3175 } // anon namespace
3176
3177 gimple_opt_pass *
3178 make_pass_strip_predict_hints (gcc::context *ctxt)
3179 {
3180   return new pass_strip_predict_hints (ctxt);
3181 }
3182
3183 /* Rebuild function frequencies.  Passes are in general expected to
3184    maintain profile by hand, however in some cases this is not possible:
3185    for example when inlining several functions with loops freuqencies might run
3186    out of scale and thus needs to be recomputed.  */
3187
3188 void
3189 rebuild_frequencies (void)
3190 {
3191   timevar_push (TV_REBUILD_FREQUENCIES);
3192
3193   /* When the max bb count in the function is small, there is a higher
3194      chance that there were truncation errors in the integer scaling
3195      of counts by inlining and other optimizations. This could lead
3196      to incorrect classification of code as being cold when it isn't.
3197      In that case, force the estimation of bb counts/frequencies from the
3198      branch probabilities, rather than computing frequencies from counts,
3199      which may also lead to frequencies incorrectly reduced to 0. There
3200      is less precision in the probabilities, so we only do this for small
3201      max counts.  */
3202   gcov_type count_max = 0;
3203   basic_block bb;
3204   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3205     count_max = MAX (bb->count, count_max);
3206
3207   if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3208       || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
3209           && count_max < REG_BR_PROB_BASE/10))
3210     {
3211       loop_optimizer_init (0);
3212       add_noreturn_fake_exit_edges ();
3213       mark_irreducible_loops ();
3214       connect_infinite_loops_to_exit ();
3215       estimate_bb_frequencies (true);
3216       remove_fake_exit_edges ();
3217       loop_optimizer_finalize ();
3218     }
3219   else if (profile_status_for_fn (cfun) == PROFILE_READ)
3220     counts_to_freqs ();
3221   else
3222     gcc_unreachable ();
3223   timevar_pop (TV_REBUILD_FREQUENCIES);
3224 }
3225
3226 /* Perform a dry run of the branch prediction pass and report comparsion of
3227    the predicted and real profile into the dump file.  */
3228
3229 void
3230 report_predictor_hitrates (void)
3231 {
3232   unsigned nb_loops;
3233
3234   loop_optimizer_init (LOOPS_NORMAL);
3235   if (dump_file && (dump_flags & TDF_DETAILS))
3236     flow_loops_dump (dump_file, NULL, 0);
3237
3238   mark_irreducible_loops ();
3239
3240   nb_loops = number_of_loops (cfun);
3241   if (nb_loops > 1)
3242     scev_initialize ();
3243
3244   tree_estimate_probability (true);
3245
3246   if (nb_loops > 1)
3247     scev_finalize ();
3248
3249   loop_optimizer_finalize ();
3250 }
3251