gcc/predict.c

   1 /* Branch prediction routines for the GNU compiler.
   2    Copyright (C) 2000-2013 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* References:
  21
  22    [1] "Branch Prediction for Free"
  23        Ball and Larus; PLDI '93.
  24    [2] "Static Branch Frequency and Program Profile Analysis"
  25        Wu and Larus; MICRO-27.
  26    [3] "Corpus-based Static Branch Prediction"
  27        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
  28
  29
  30 #include "config.h"
  31 #include "system.h"
  32 #include "coretypes.h"
  33 #include "tm.h"
  34 #include "tree.h"
  35 #include "calls.h"
  36 #include "rtl.h"
  37 #include "tm_p.h"
  38 #include "hard-reg-set.h"
  39 #include "basic-block.h"
  40 #include "insn-config.h"
  41 #include "regs.h"
  42 #include "flags.h"
  43 #include "function.h"
  44 #include "except.h"
  45 #include "diagnostic-core.h"
  46 #include "recog.h"
  47 #include "expr.h"
  48 #include "predict.h"
  49 #include "coverage.h"
  50 #include "sreal.h"
  51 #include "params.h"
  52 #include "target.h"
  53 #include "cfgloop.h"
  54 #include "gimple.h"
  55 #include "gimple-iterator.h"
  56 #include "gimple-ssa.h"
  57 #include "cgraph.h"
  58 #include "tree-cfg.h"
  59 #include "tree-phinodes.h"
  60 #include "ssa-iterators.h"
  61 #include "tree-ssa-loop-niter.h"
  62 #include "tree-ssa-loop.h"
  63 #include "ggc.h"
  64 #include "tree-pass.h"
  65 #include "tree-scalar-evolution.h"
  66 #include "cfgloop.h"
  67 #include "pointer-set.h"
  68
  69 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
  70                    1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
  71 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
  72              real_inv_br_prob_base, real_one_half, real_bb_freq_max;
  73
  74 /* Random guesstimation given names.
  75    PROV_VERY_UNLIKELY should be small enough so basic block predicted
  76    by it gets below HOT_BB_FREQUENCY_FRACTION.  */
  77 #define PROB_VERY_UNLIKELY      (REG_BR_PROB_BASE / 2000 - 1)
  78 #define PROB_EVEN               (REG_BR_PROB_BASE / 2)
  79 #define PROB_VERY_LIKELY        (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
  80 #define PROB_ALWAYS             (REG_BR_PROB_BASE)
  81
  82 static void combine_predictions_for_insn (rtx, basic_block);
  83 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
  84 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
  85 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
  86 static bool can_predict_insn_p (const_rtx);
  87
  88 /* Information we hold about each branch predictor.
  89    Filled using information from predict.def.  */
  90
  91 struct predictor_info
  92 {
  93   const char *const name;       /* Name used in the debugging dumps.  */
  94   const int hitrate;            /* Expected hitrate used by
  95                                    predict_insn_def call.  */
  96   const int flags;
  97 };
  98
  99 /* Use given predictor without Dempster-Shaffer theory if it matches
 100    using first_match heuristics.  */
 101 #define PRED_FLAG_FIRST_MATCH 1
 102
 103 /* Recompute hitrate in percent to our representation.  */
 104
 105 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
 106
 107 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
 108 static const struct predictor_info predictor_info[]= {
 109 #include "predict.def"
 110
 111   /* Upper bound on predictors.  */
 112   {NULL, 0, 0}
 113 };
 114 #undef DEF_PREDICTOR
 115
 116 /* Return TRUE if frequency FREQ is considered to be hot.  */
 117
 118 static inline bool
 119 maybe_hot_frequency_p (struct function *fun, int freq)
 120 {
 121   struct cgraph_node *node = cgraph_get_node (fun->decl);
 122   if (!profile_info || !flag_branch_probabilities)
 123     {
 124       if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
 125         return false;
 126       if (node->frequency == NODE_FREQUENCY_HOT)
 127         return true;
 128     }
 129   if (profile_status_for_function (fun) == PROFILE_ABSENT)
 130     return true;
 131   if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 132       && freq < (ENTRY_BLOCK_PTR_FOR_FUNCTION (fun)->frequency * 2 / 3))
 133     return false;
 134   if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
 135     return false;
 136   if (freq < (ENTRY_BLOCK_PTR_FOR_FUNCTION (fun)->frequency
 137               / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 138     return false;
 139   return true;
 140 }
 141
 142 static gcov_type min_count = -1;
 143
 144 /* Determine the threshold for hot BB counts.  */
 145
 146 gcov_type
 147 get_hot_bb_threshold ()
 148 {
 149   gcov_working_set_t *ws;
 150   if (min_count == -1)
 151     {
 152       ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
 153       gcc_assert (ws);
 154       min_count = ws->min_counter;
 155     }
 156   return min_count;
 157 }
 158
 159 /* Set the threshold for hot BB counts.  */
 160
 161 void
 162 set_hot_bb_threshold (gcov_type min)
 163 {
 164   min_count = min;
 165 }
 166
 167 /* Return TRUE if frequency FREQ is considered to be hot.  */
 168
 169 static inline bool
 170 maybe_hot_count_p (struct function *fun, gcov_type count)
 171 {
 172   if (fun && profile_status_for_function (fun) != PROFILE_READ)
 173     return true;
 174   /* Code executed at most once is not hot.  */
 175   if (profile_info->runs >= count)
 176     return false;
 177   return (count >= get_hot_bb_threshold ());
 178 }
 179
 180 /* Return true in case BB can be CPU intensive and should be optimized
 181    for maximal performance.  */
 182
 183 bool
 184 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
 185 {
 186   gcc_checking_assert (fun);
 187   if (profile_status_for_function (fun) == PROFILE_READ)
 188     return maybe_hot_count_p (fun, bb->count);
 189   return maybe_hot_frequency_p (fun, bb->frequency);
 190 }
 191
 192 /* Return true if the call can be hot.  */
 193
 194 bool
 195 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
 196 {
 197   if (profile_info && flag_branch_probabilities
 198       && !maybe_hot_count_p (NULL,
 199                              edge->count))
 200     return false;
 201   if (edge->caller->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED
 202       || (edge->callee
 203           && edge->callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 204     return false;
 205   if (edge->caller->frequency > NODE_FREQUENCY_UNLIKELY_EXECUTED
 206       && (edge->callee
 207           && edge->callee->frequency <= NODE_FREQUENCY_EXECUTED_ONCE))
 208     return false;
 209   if (optimize_size)
 210     return false;
 211   if (edge->caller->frequency == NODE_FREQUENCY_HOT)
 212     return true;
 213   if (edge->caller->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 214       && edge->frequency < CGRAPH_FREQ_BASE * 3 / 2)
 215     return false;
 216   if (flag_guess_branch_prob)
 217     {
 218       if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0
 219           || edge->frequency <= (CGRAPH_FREQ_BASE
 220                                  / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 221         return false;
 222     }
 223   return true;
 224 }
 225
 226 /* Return true in case BB can be CPU intensive and should be optimized
 227    for maximal performance.  */
 228
 229 bool
 230 maybe_hot_edge_p (edge e)
 231 {
 232   if (profile_status == PROFILE_READ)
 233     return maybe_hot_count_p (cfun, e->count);
 234   return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
 235 }
 236
 237
 238
 239 /* Return true if profile COUNT and FREQUENCY, or function FUN static
 240    node frequency reflects never being executed.  */
 241
 242 static bool
 243 probably_never_executed (struct function *fun,
 244                          gcov_type count, int frequency)
 245 {
 246   gcc_checking_assert (fun);
 247   if (profile_status_for_function (fun) == PROFILE_READ)
 248     {
 249       int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
 250       if (count * unlikely_count_fraction >= profile_info->runs)
 251         return false;
 252       if (!frequency)
 253         return true;
 254       if (!ENTRY_BLOCK_PTR->frequency)
 255         return false;
 256       if (ENTRY_BLOCK_PTR->count)
 257         {
 258           gcov_type computed_count;
 259           /* Check for possibility of overflow, in which case entry bb count
 260              is large enough to do the division first without losing much
 261              precision.  */
 262           if (ENTRY_BLOCK_PTR->count < REG_BR_PROB_BASE * REG_BR_PROB_BASE)
 263             {
 264               gcov_type scaled_count
 265                   = frequency * ENTRY_BLOCK_PTR->count * unlikely_count_fraction;
 266               computed_count = RDIV (scaled_count, ENTRY_BLOCK_PTR->frequency);
 267             }
 268           else
 269             {
 270               computed_count = RDIV (ENTRY_BLOCK_PTR->count,
 271                                      ENTRY_BLOCK_PTR->frequency);
 272               computed_count *= frequency * unlikely_count_fraction;
 273             }
 274           if (computed_count >= profile_info->runs)
 275             return false;
 276         }
 277       return true;
 278     }
 279   if ((!profile_info || !flag_branch_probabilities)
 280       && (cgraph_get_node (fun->decl)->frequency
 281           == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 282     return true;
 283   return false;
 284 }
 285
 286
 287 /* Return true in case BB is probably never executed.  */
 288
 289 bool
 290 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
 291 {
 292   return probably_never_executed (fun, bb->count, bb->frequency);
 293 }
 294
 295
 296 /* Return true in case edge E is probably never executed.  */
 297
 298 bool
 299 probably_never_executed_edge_p (struct function *fun, edge e)
 300 {
 301   return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
 302 }
 303
 304 /* Return true if NODE should be optimized for size.  */
 305
 306 bool
 307 cgraph_optimize_for_size_p (struct cgraph_node *node)
 308 {
 309   if (optimize_size)
 310     return true;
 311   if (node && (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 312     return true;
 313   else
 314     return false;
 315 }
 316
 317 /* Return true when current function should always be optimized for size.  */
 318
 319 bool
 320 optimize_function_for_size_p (struct function *fun)
 321 {
 322   if (optimize_size)
 323     return true;
 324   if (!fun || !fun->decl)
 325     return false;
 326   return cgraph_optimize_for_size_p (cgraph_get_node (fun->decl));
 327 }
 328
 329 /* Return true when current function should always be optimized for speed.  */
 330
 331 bool
 332 optimize_function_for_speed_p (struct function *fun)
 333 {
 334   return !optimize_function_for_size_p (fun);
 335 }
 336
 337 /* Return TRUE when BB should be optimized for size.  */
 338
 339 bool
 340 optimize_bb_for_size_p (const_basic_block bb)
 341 {
 342   return optimize_function_for_size_p (cfun) || !maybe_hot_bb_p (cfun, bb);
 343 }
 344
 345 /* Return TRUE when BB should be optimized for speed.  */
 346
 347 bool
 348 optimize_bb_for_speed_p (const_basic_block bb)
 349 {
 350   return !optimize_bb_for_size_p (bb);
 351 }
 352
 353 /* Return TRUE when BB should be optimized for size.  */
 354
 355 bool
 356 optimize_edge_for_size_p (edge e)
 357 {
 358   return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
 359 }
 360
 361 /* Return TRUE when BB should be optimized for speed.  */
 362
 363 bool
 364 optimize_edge_for_speed_p (edge e)
 365 {
 366   return !optimize_edge_for_size_p (e);
 367 }
 368
 369 /* Return TRUE when BB should be optimized for size.  */
 370
 371 bool
 372 optimize_insn_for_size_p (void)
 373 {
 374   return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
 375 }
 376
 377 /* Return TRUE when BB should be optimized for speed.  */
 378
 379 bool
 380 optimize_insn_for_speed_p (void)
 381 {
 382   return !optimize_insn_for_size_p ();
 383 }
 384
 385 /* Return TRUE when LOOP should be optimized for size.  */
 386
 387 bool
 388 optimize_loop_for_size_p (struct loop *loop)
 389 {
 390   return optimize_bb_for_size_p (loop->header);
 391 }
 392
 393 /* Return TRUE when LOOP should be optimized for speed.  */
 394
 395 bool
 396 optimize_loop_for_speed_p (struct loop *loop)
 397 {
 398   return optimize_bb_for_speed_p (loop->header);
 399 }
 400
 401 /* Return TRUE when LOOP nest should be optimized for speed.  */
 402
 403 bool
 404 optimize_loop_nest_for_speed_p (struct loop *loop)
 405 {
 406   struct loop *l = loop;
 407   if (optimize_loop_for_speed_p (loop))
 408     return true;
 409   l = loop->inner;
 410   while (l && l != loop)
 411     {
 412       if (optimize_loop_for_speed_p (l))
 413         return true;
 414       if (l->inner)
 415         l = l->inner;
 416       else if (l->next)
 417         l = l->next;
 418       else
 419         {
 420           while (l != loop && !l->next)
 421             l = loop_outer (l);
 422           if (l != loop)
 423             l = l->next;
 424         }
 425     }
 426   return false;
 427 }
 428
 429 /* Return TRUE when LOOP nest should be optimized for size.  */
 430
 431 bool
 432 optimize_loop_nest_for_size_p (struct loop *loop)
 433 {
 434   return !optimize_loop_nest_for_speed_p (loop);
 435 }
 436
 437 /* Return true when edge E is likely to be well predictable by branch
 438    predictor.  */
 439
 440 bool
 441 predictable_edge_p (edge e)
 442 {
 443   if (profile_status == PROFILE_ABSENT)
 444     return false;
 445   if ((e->probability
 446        <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
 447       || (REG_BR_PROB_BASE - e->probability
 448           <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
 449     return true;
 450   return false;
 451 }
 452
 453
 454 /* Set RTL expansion for BB profile.  */
 455
 456 void
 457 rtl_profile_for_bb (basic_block bb)
 458 {
 459   crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
 460 }
 461
 462 /* Set RTL expansion for edge profile.  */
 463
 464 void
 465 rtl_profile_for_edge (edge e)
 466 {
 467   crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
 468 }
 469
 470 /* Set RTL expansion to default mode (i.e. when profile info is not known).  */
 471 void
 472 default_rtl_profile (void)
 473 {
 474   crtl->maybe_hot_insn_p = true;
 475 }
 476
 477 /* Return true if the one of outgoing edges is already predicted by
 478    PREDICTOR.  */
 479
 480 bool
 481 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 482 {
 483   rtx note;
 484   if (!INSN_P (BB_END (bb)))
 485     return false;
 486   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
 487     if (REG_NOTE_KIND (note) == REG_BR_PRED
 488         && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
 489       return true;
 490   return false;
 491 }
 492
 493 /* This map contains for a basic block the list of predictions for the
 494    outgoing edges.  */
 495
 496 static struct pointer_map_t *bb_predictions;
 497
 498 /*  Structure representing predictions in tree level. */
 499
 500 struct edge_prediction {
 501     struct edge_prediction *ep_next;
 502     edge ep_edge;
 503     enum br_predictor ep_predictor;
 504     int ep_probability;
 505 };
 506
 507 /* Return true if the one of outgoing edges is already predicted by
 508    PREDICTOR.  */
 509
 510 bool
 511 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 512 {
 513   struct edge_prediction *i;
 514   void **preds = pointer_map_contains (bb_predictions, bb);
 515
 516   if (!preds)
 517     return false;
 518
 519   for (i = (struct edge_prediction *) *preds; i; i = i->ep_next)
 520     if (i->ep_predictor == predictor)
 521       return true;
 522   return false;
 523 }
 524
 525 /* Return true when the probability of edge is reliable.
 526
 527    The profile guessing code is good at predicting branch outcome (ie.
 528    taken/not taken), that is predicted right slightly over 75% of time.
 529    It is however notoriously poor on predicting the probability itself.
 530    In general the profile appear a lot flatter (with probabilities closer
 531    to 50%) than the reality so it is bad idea to use it to drive optimization
 532    such as those disabling dynamic branch prediction for well predictable
 533    branches.
 534
 535    There are two exceptions - edges leading to noreturn edges and edges
 536    predicted by number of iterations heuristics are predicted well.  This macro
 537    should be able to distinguish those, but at the moment it simply check for
 538    noreturn heuristic that is only one giving probability over 99% or bellow
 539    1%.  In future we might want to propagate reliability information across the
 540    CFG if we find this information useful on multiple places.   */
 541 static bool
 542 probability_reliable_p (int prob)
 543 {
 544   return (profile_status == PROFILE_READ
 545           || (profile_status == PROFILE_GUESSED
 546               && (prob <= HITRATE (1) || prob >= HITRATE (99))));
 547 }
 548
 549 /* Same predicate as above, working on edges.  */
 550 bool
 551 edge_probability_reliable_p (const_edge e)
 552 {
 553   return probability_reliable_p (e->probability);
 554 }
 555
 556 /* Same predicate as edge_probability_reliable_p, working on notes.  */
 557 bool
 558 br_prob_note_reliable_p (const_rtx note)
 559 {
 560   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
 561   return probability_reliable_p (XINT (note, 0));
 562 }
 563
 564 static void
 565 predict_insn (rtx insn, enum br_predictor predictor, int probability)
 566 {
 567   gcc_assert (any_condjump_p (insn));
 568   if (!flag_guess_branch_prob)
 569     return;
 570
 571   add_reg_note (insn, REG_BR_PRED,
 572                 gen_rtx_CONCAT (VOIDmode,
 573                                 GEN_INT ((int) predictor),
 574                                 GEN_INT ((int) probability)));
 575 }
 576
 577 /* Predict insn by given predictor.  */
 578
 579 void
 580 predict_insn_def (rtx insn, enum br_predictor predictor,
 581                   enum prediction taken)
 582 {
 583    int probability = predictor_info[(int) predictor].hitrate;
 584
 585    if (taken != TAKEN)
 586      probability = REG_BR_PROB_BASE - probability;
 587
 588    predict_insn (insn, predictor, probability);
 589 }
 590
 591 /* Predict edge E with given probability if possible.  */
 592
 593 void
 594 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
 595 {
 596   rtx last_insn;
 597   last_insn = BB_END (e->src);
 598
 599   /* We can store the branch prediction information only about
 600      conditional jumps.  */
 601   if (!any_condjump_p (last_insn))
 602     return;
 603
 604   /* We always store probability of branching.  */
 605   if (e->flags & EDGE_FALLTHRU)
 606     probability = REG_BR_PROB_BASE - probability;
 607
 608   predict_insn (last_insn, predictor, probability);
 609 }
 610
 611 /* Predict edge E with the given PROBABILITY.  */
 612 void
 613 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
 614 {
 615   gcc_assert (profile_status != PROFILE_GUESSED);
 616   if ((e->src != ENTRY_BLOCK_PTR && EDGE_COUNT (e->src->succs) > 1)
 617       && flag_guess_branch_prob && optimize)
 618     {
 619       struct edge_prediction *i = XNEW (struct edge_prediction);
 620       void **preds = pointer_map_insert (bb_predictions, e->src);
 621
 622       i->ep_next = (struct edge_prediction *) *preds;
 623       *preds = i;
 624       i->ep_probability = probability;
 625       i->ep_predictor = predictor;
 626       i->ep_edge = e;
 627     }
 628 }
 629
 630 /* Remove all predictions on given basic block that are attached
 631    to edge E.  */
 632 void
 633 remove_predictions_associated_with_edge (edge e)
 634 {
 635   void **preds;
 636
 637   if (!bb_predictions)
 638     return;
 639
 640   preds = pointer_map_contains (bb_predictions, e->src);
 641
 642   if (preds)
 643     {
 644       struct edge_prediction **prediction = (struct edge_prediction **) preds;
 645       struct edge_prediction *next;
 646
 647       while (*prediction)
 648         {
 649           if ((*prediction)->ep_edge == e)
 650             {
 651               next = (*prediction)->ep_next;
 652               free (*prediction);
 653               *prediction = next;
 654             }
 655           else
 656             prediction = &((*prediction)->ep_next);
 657         }
 658     }
 659 }
 660
 661 /* Clears the list of predictions stored for BB.  */
 662
 663 static void
 664 clear_bb_predictions (basic_block bb)
 665 {
 666   void **preds = pointer_map_contains (bb_predictions, bb);
 667   struct edge_prediction *pred, *next;
 668
 669   if (!preds)
 670     return;
 671
 672   for (pred = (struct edge_prediction *) *preds; pred; pred = next)
 673     {
 674       next = pred->ep_next;
 675       free (pred);
 676     }
 677   *preds = NULL;
 678 }
 679
 680 /* Return true when we can store prediction on insn INSN.
 681    At the moment we represent predictions only on conditional
 682    jumps, not at computed jump or other complicated cases.  */
 683 static bool
 684 can_predict_insn_p (const_rtx insn)
 685 {
 686   return (JUMP_P (insn)
 687           && any_condjump_p (insn)
 688           && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
 689 }
 690
 691 /* Predict edge E by given predictor if possible.  */
 692
 693 void
 694 predict_edge_def (edge e, enum br_predictor predictor,
 695                   enum prediction taken)
 696 {
 697    int probability = predictor_info[(int) predictor].hitrate;
 698
 699    if (taken != TAKEN)
 700      probability = REG_BR_PROB_BASE - probability;
 701
 702    predict_edge (e, predictor, probability);
 703 }
 704
 705 /* Invert all branch predictions or probability notes in the INSN.  This needs
 706    to be done each time we invert the condition used by the jump.  */
 707
 708 void
 709 invert_br_probabilities (rtx insn)
 710 {
 711   rtx note;
 712
 713   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 714     if (REG_NOTE_KIND (note) == REG_BR_PROB)
 715       XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
 716     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
 717       XEXP (XEXP (note, 0), 1)
 718         = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
 719 }
 720
 721 /* Dump information about the branch prediction to the output file.  */
 722
 723 static void
 724 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
 725                  basic_block bb, int used)
 726 {
 727   edge e;
 728   edge_iterator ei;
 729
 730   if (!file)
 731     return;
 732
 733   FOR_EACH_EDGE (e, ei, bb->succs)
 734     if (! (e->flags & EDGE_FALLTHRU))
 735       break;
 736
 737   fprintf (file, "  %s heuristics%s: %.1f%%",
 738            predictor_info[predictor].name,
 739            used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
 740
 741   if (bb->count)
 742     {
 743       fprintf (file, "  exec ");
 744       fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
 745       if (e)
 746         {
 747           fprintf (file, " hit ");
 748           fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count);
 749           fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
 750         }
 751     }
 752
 753   fprintf (file, "\n");
 754 }
 755
 756 /* We can not predict the probabilities of outgoing edges of bb.  Set them
 757    evenly and hope for the best.  */
 758 static void
 759 set_even_probabilities (basic_block bb)
 760 {
 761   int nedges = 0;
 762   edge e;
 763   edge_iterator ei;
 764
 765   FOR_EACH_EDGE (e, ei, bb->succs)
 766     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 767       nedges ++;
 768   FOR_EACH_EDGE (e, ei, bb->succs)
 769     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 770       e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
 771     else
 772       e->probability = 0;
 773 }
 774
 775 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
 776    note if not already present.  Remove now useless REG_BR_PRED notes.  */
 777
 778 static void
 779 combine_predictions_for_insn (rtx insn, basic_block bb)
 780 {
 781   rtx prob_note;
 782   rtx *pnote;
 783   rtx note;
 784   int best_probability = PROB_EVEN;
 785   enum br_predictor best_predictor = END_PREDICTORS;
 786   int combined_probability = REG_BR_PROB_BASE / 2;
 787   int d;
 788   bool first_match = false;
 789   bool found = false;
 790
 791   if (!can_predict_insn_p (insn))
 792     {
 793       set_even_probabilities (bb);
 794       return;
 795     }
 796
 797   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
 798   pnote = &REG_NOTES (insn);
 799   if (dump_file)
 800     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
 801              bb->index);
 802
 803   /* We implement "first match" heuristics and use probability guessed
 804      by predictor with smallest index.  */
 805   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 806     if (REG_NOTE_KIND (note) == REG_BR_PRED)
 807       {
 808         enum br_predictor predictor = ((enum br_predictor)
 809                                        INTVAL (XEXP (XEXP (note, 0), 0)));
 810         int probability = INTVAL (XEXP (XEXP (note, 0), 1));
 811
 812         found = true;
 813         if (best_predictor > predictor)
 814           best_probability = probability, best_predictor = predictor;
 815
 816         d = (combined_probability * probability
 817              + (REG_BR_PROB_BASE - combined_probability)
 818              * (REG_BR_PROB_BASE - probability));
 819
 820         /* Use FP math to avoid overflows of 32bit integers.  */
 821         if (d == 0)
 822           /* If one probability is 0% and one 100%, avoid division by zero.  */
 823           combined_probability = REG_BR_PROB_BASE / 2;
 824         else
 825           combined_probability = (((double) combined_probability) * probability
 826                                   * REG_BR_PROB_BASE / d + 0.5);
 827       }
 828
 829   /* Decide which heuristic to use.  In case we didn't match anything,
 830      use no_prediction heuristic, in case we did match, use either
 831      first match or Dempster-Shaffer theory depending on the flags.  */
 832
 833   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 834     first_match = true;
 835
 836   if (!found)
 837     dump_prediction (dump_file, PRED_NO_PREDICTION,
 838                      combined_probability, bb, true);
 839   else
 840     {
 841       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
 842                        bb, !first_match);
 843       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
 844                        bb, first_match);
 845     }
 846
 847   if (first_match)
 848     combined_probability = best_probability;
 849   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 850
 851   while (*pnote)
 852     {
 853       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
 854         {
 855           enum br_predictor predictor = ((enum br_predictor)
 856                                          INTVAL (XEXP (XEXP (*pnote, 0), 0)));
 857           int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
 858
 859           dump_prediction (dump_file, predictor, probability, bb,
 860                            !first_match || best_predictor == predictor);
 861           *pnote = XEXP (*pnote, 1);
 862         }
 863       else
 864         pnote = &XEXP (*pnote, 1);
 865     }
 866
 867   if (!prob_note)
 868     {
 869       add_int_reg_note (insn, REG_BR_PROB, combined_probability);
 870
 871       /* Save the prediction into CFG in case we are seeing non-degenerated
 872          conditional jump.  */
 873       if (!single_succ_p (bb))
 874         {
 875           BRANCH_EDGE (bb)->probability = combined_probability;
 876           FALLTHRU_EDGE (bb)->probability
 877             = REG_BR_PROB_BASE - combined_probability;
 878         }
 879     }
 880   else if (!single_succ_p (bb))
 881     {
 882       int prob = XINT (prob_note, 0);
 883
 884       BRANCH_EDGE (bb)->probability = prob;
 885       FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
 886     }
 887   else
 888     single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
 889 }
 890
 891 /* Combine predictions into single probability and store them into CFG.
 892    Remove now useless prediction entries.  */
 893
 894 static void
 895 combine_predictions_for_bb (basic_block bb)
 896 {
 897   int best_probability = PROB_EVEN;
 898   enum br_predictor best_predictor = END_PREDICTORS;
 899   int combined_probability = REG_BR_PROB_BASE / 2;
 900   int d;
 901   bool first_match = false;
 902   bool found = false;
 903   struct edge_prediction *pred;
 904   int nedges = 0;
 905   edge e, first = NULL, second = NULL;
 906   edge_iterator ei;
 907   void **preds;
 908
 909   FOR_EACH_EDGE (e, ei, bb->succs)
 910     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 911       {
 912         nedges ++;
 913         if (first && !second)
 914           second = e;
 915         if (!first)
 916           first = e;
 917       }
 918
 919   /* When there is no successor or only one choice, prediction is easy.
 920
 921      We are lazy for now and predict only basic blocks with two outgoing
 922      edges.  It is possible to predict generic case too, but we have to
 923      ignore first match heuristics and do more involved combining.  Implement
 924      this later.  */
 925   if (nedges != 2)
 926     {
 927       if (!bb->count)
 928         set_even_probabilities (bb);
 929       clear_bb_predictions (bb);
 930       if (dump_file)
 931         fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
 932                  nedges, bb->index);
 933       return;
 934     }
 935
 936   if (dump_file)
 937     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
 938
 939   preds = pointer_map_contains (bb_predictions, bb);
 940   if (preds)
 941     {
 942       /* We implement "first match" heuristics and use probability guessed
 943          by predictor with smallest index.  */
 944       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
 945         {
 946           enum br_predictor predictor = pred->ep_predictor;
 947           int probability = pred->ep_probability;
 948
 949           if (pred->ep_edge != first)
 950             probability = REG_BR_PROB_BASE - probability;
 951
 952           found = true;
 953           /* First match heuristics would be widly confused if we predicted
 954              both directions.  */
 955           if (best_predictor > predictor)
 956             {
 957               struct edge_prediction *pred2;
 958               int prob = probability;
 959
 960               for (pred2 = (struct edge_prediction *) *preds; pred2; pred2 = pred2->ep_next)
 961                if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
 962                  {
 963                    int probability2 = pred->ep_probability;
 964
 965                    if (pred2->ep_edge != first)
 966                      probability2 = REG_BR_PROB_BASE - probability2;
 967
 968                    if ((probability < REG_BR_PROB_BASE / 2) !=
 969                        (probability2 < REG_BR_PROB_BASE / 2))
 970                      break;
 971
 972                    /* If the same predictor later gave better result, go for it! */
 973                    if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
 974                        || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
 975                      prob = probability2;
 976                  }
 977               if (!pred2)
 978                 best_probability = prob, best_predictor = predictor;
 979             }
 980
 981           d = (combined_probability * probability
 982                + (REG_BR_PROB_BASE - combined_probability)
 983                * (REG_BR_PROB_BASE - probability));
 984
 985           /* Use FP math to avoid overflows of 32bit integers.  */
 986           if (d == 0)
 987             /* If one probability is 0% and one 100%, avoid division by zero.  */
 988             combined_probability = REG_BR_PROB_BASE / 2;
 989           else
 990             combined_probability = (((double) combined_probability)
 991                                     * probability
 992                                     * REG_BR_PROB_BASE / d + 0.5);
 993         }
 994     }
 995
 996   /* Decide which heuristic to use.  In case we didn't match anything,
 997      use no_prediction heuristic, in case we did match, use either
 998      first match or Dempster-Shaffer theory depending on the flags.  */
 999
1000   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
1001     first_match = true;
1002
1003   if (!found)
1004     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
1005   else
1006     {
1007       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
1008                        !first_match);
1009       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
1010                        first_match);
1011     }
1012
1013   if (first_match)
1014     combined_probability = best_probability;
1015   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
1016
1017   if (preds)
1018     {
1019       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
1020         {
1021           enum br_predictor predictor = pred->ep_predictor;
1022           int probability = pred->ep_probability;
1023
1024           if (pred->ep_edge != EDGE_SUCC (bb, 0))
1025             probability = REG_BR_PROB_BASE - probability;
1026           dump_prediction (dump_file, predictor, probability, bb,
1027                            !first_match || best_predictor == predictor);
1028         }
1029     }
1030   clear_bb_predictions (bb);
1031
1032   if (!bb->count)
1033     {
1034       first->probability = combined_probability;
1035       second->probability = REG_BR_PROB_BASE - combined_probability;
1036     }
1037 }
1038
1039 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1040    Return the SSA_NAME if the condition satisfies, NULL otherwise.
1041
1042    T1 and T2 should be one of the following cases:
1043      1. T1 is SSA_NAME, T2 is NULL
1044      2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1045      3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1046
1047 static tree
1048 strips_small_constant (tree t1, tree t2)
1049 {
1050   tree ret = NULL;
1051   int value = 0;
1052
1053   if (!t1)
1054     return NULL;
1055   else if (TREE_CODE (t1) == SSA_NAME)
1056     ret = t1;
1057   else if (tree_fits_shwi_p (t1))
1058     value = tree_to_shwi (t1);
1059   else
1060     return NULL;
1061
1062   if (!t2)
1063     return ret;
1064   else if (tree_fits_shwi_p (t2))
1065     value = tree_to_shwi (t2);
1066   else if (TREE_CODE (t2) == SSA_NAME)
1067     {
1068       if (ret)
1069         return NULL;
1070       else
1071         ret = t2;
1072     }
1073
1074   if (value <= 4 && value >= -4)
1075     return ret;
1076   else
1077     return NULL;
1078 }
1079
1080 /* Return the SSA_NAME in T or T's operands.
1081    Return NULL if SSA_NAME cannot be found.  */
1082
1083 static tree
1084 get_base_value (tree t)
1085 {
1086   if (TREE_CODE (t) == SSA_NAME)
1087     return t;
1088
1089   if (!BINARY_CLASS_P (t))
1090     return NULL;
1091
1092   switch (TREE_OPERAND_LENGTH (t))
1093     {
1094     case 1:
1095       return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1096     case 2:
1097       return strips_small_constant (TREE_OPERAND (t, 0),
1098                                     TREE_OPERAND (t, 1));
1099     default:
1100       return NULL;
1101     }
1102 }
1103
1104 /* Check the compare STMT in LOOP. If it compares an induction
1105    variable to a loop invariant, return true, and save
1106    LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1107    Otherwise return false and set LOOP_INVAIANT to NULL.  */
1108
1109 static bool
1110 is_comparison_with_loop_invariant_p (gimple stmt, struct loop *loop,
1111                                      tree *loop_invariant,
1112                                      enum tree_code *compare_code,
1113                                      tree *loop_step,
1114                                      tree *loop_iv_base)
1115 {
1116   tree op0, op1, bound, base;
1117   affine_iv iv0, iv1;
1118   enum tree_code code;
1119   tree step;
1120
1121   code = gimple_cond_code (stmt);
1122   *loop_invariant = NULL;
1123
1124   switch (code)
1125     {
1126     case GT_EXPR:
1127     case GE_EXPR:
1128     case NE_EXPR:
1129     case LT_EXPR:
1130     case LE_EXPR:
1131     case EQ_EXPR:
1132       break;
1133
1134     default:
1135       return false;
1136     }
1137
1138   op0 = gimple_cond_lhs (stmt);
1139   op1 = gimple_cond_rhs (stmt);
1140
1141   if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1142        || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1143     return false;
1144   if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1145     return false;
1146   if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1147     return false;
1148   if (TREE_CODE (iv0.step) != INTEGER_CST
1149       || TREE_CODE (iv1.step) != INTEGER_CST)
1150     return false;
1151   if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1152       || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1153     return false;
1154
1155   if (integer_zerop (iv0.step))
1156     {
1157       if (code != NE_EXPR && code != EQ_EXPR)
1158         code = invert_tree_comparison (code, false);
1159       bound = iv0.base;
1160       base = iv1.base;
1161       if (tree_fits_shwi_p (iv1.step))
1162         step = iv1.step;
1163       else
1164         return false;
1165     }
1166   else
1167     {
1168       bound = iv1.base;
1169       base = iv0.base;
1170       if (tree_fits_shwi_p (iv0.step))
1171         step = iv0.step;
1172       else
1173         return false;
1174     }
1175
1176   if (TREE_CODE (bound) != INTEGER_CST)
1177     bound = get_base_value (bound);
1178   if (!bound)
1179     return false;
1180   if (TREE_CODE (base) != INTEGER_CST)
1181     base = get_base_value (base);
1182   if (!base)
1183     return false;
1184
1185   *loop_invariant = bound;
1186   *compare_code = code;
1187   *loop_step = step;
1188   *loop_iv_base = base;
1189   return true;
1190 }
1191
1192 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1193
1194 static bool
1195 expr_coherent_p (tree t1, tree t2)
1196 {
1197   gimple stmt;
1198   tree ssa_name_1 = NULL;
1199   tree ssa_name_2 = NULL;
1200
1201   gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1202   gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1203
1204   if (t1 == t2)
1205     return true;
1206
1207   if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1208     return true;
1209   if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1210     return false;
1211
1212   /* Check to see if t1 is expressed/defined with t2.  */
1213   stmt = SSA_NAME_DEF_STMT (t1);
1214   gcc_assert (stmt != NULL);
1215   if (is_gimple_assign (stmt))
1216     {
1217       ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1218       if (ssa_name_1 && ssa_name_1 == t2)
1219         return true;
1220     }
1221
1222   /* Check to see if t2 is expressed/defined with t1.  */
1223   stmt = SSA_NAME_DEF_STMT (t2);
1224   gcc_assert (stmt != NULL);
1225   if (is_gimple_assign (stmt))
1226     {
1227       ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1228       if (ssa_name_2 && ssa_name_2 == t1)
1229         return true;
1230     }
1231
1232   /* Compare if t1 and t2's def_stmts are identical.  */
1233   if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1234     return true;
1235   else
1236     return false;
1237 }
1238
1239 /* Predict branch probability of BB when BB contains a branch that compares
1240    an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1241    loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1242
1243    E.g.
1244      for (int i = 0; i < bound; i++) {
1245        if (i < bound - 2)
1246          computation_1();
1247        else
1248          computation_2();
1249      }
1250
1251   In this loop, we will predict the branch inside the loop to be taken.  */
1252
1253 static void
1254 predict_iv_comparison (struct loop *loop, basic_block bb,
1255                        tree loop_bound_var,
1256                        tree loop_iv_base_var,
1257                        enum tree_code loop_bound_code,
1258                        int loop_bound_step)
1259 {
1260   gimple stmt;
1261   tree compare_var, compare_base;
1262   enum tree_code compare_code;
1263   tree compare_step_var;
1264   edge then_edge;
1265   edge_iterator ei;
1266
1267   if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1268       || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1269       || predicted_by_p (bb, PRED_LOOP_EXIT))
1270     return;
1271
1272   stmt = last_stmt (bb);
1273   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1274     return;
1275   if (!is_comparison_with_loop_invariant_p (stmt, loop, &compare_var,
1276                                             &compare_code,
1277                                             &compare_step_var,
1278                                             &compare_base))
1279     return;
1280
1281   /* Find the taken edge.  */
1282   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1283     if (then_edge->flags & EDGE_TRUE_VALUE)
1284       break;
1285
1286   /* When comparing an IV to a loop invariant, NE is more likely to be
1287      taken while EQ is more likely to be not-taken.  */
1288   if (compare_code == NE_EXPR)
1289     {
1290       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1291       return;
1292     }
1293   else if (compare_code == EQ_EXPR)
1294     {
1295       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1296       return;
1297     }
1298
1299   if (!expr_coherent_p (loop_iv_base_var, compare_base))
1300     return;
1301
1302   /* If loop bound, base and compare bound are all constants, we can
1303      calculate the probability directly.  */
1304   if (tree_fits_shwi_p (loop_bound_var)
1305       && tree_fits_shwi_p (compare_var)
1306       && tree_fits_shwi_p (compare_base))
1307     {
1308       int probability;
1309       bool of, overflow = false;
1310       double_int mod, compare_count, tem, loop_count;
1311
1312       double_int loop_bound = tree_to_double_int (loop_bound_var);
1313       double_int compare_bound = tree_to_double_int (compare_var);
1314       double_int base = tree_to_double_int (compare_base);
1315       double_int compare_step = tree_to_double_int (compare_step_var);
1316
1317       /* (loop_bound - base) / compare_step */
1318       tem = loop_bound.sub_with_overflow (base, &of);
1319       overflow |= of;
1320       loop_count = tem.divmod_with_overflow (compare_step,
1321                                               0, TRUNC_DIV_EXPR,
1322                                               &mod, &of);
1323       overflow |= of;
1324
1325       if ((!compare_step.is_negative ())
1326           ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1327         {
1328           /* (loop_bound - compare_bound) / compare_step */
1329           tem = loop_bound.sub_with_overflow (compare_bound, &of);
1330           overflow |= of;
1331           compare_count = tem.divmod_with_overflow (compare_step,
1332                                                      0, TRUNC_DIV_EXPR,
1333                                                      &mod, &of);
1334           overflow |= of;
1335         }
1336       else
1337         {
1338           /* (compare_bound - base) / compare_step */
1339           tem = compare_bound.sub_with_overflow (base, &of);
1340           overflow |= of;
1341           compare_count = tem.divmod_with_overflow (compare_step,
1342                                                      0, TRUNC_DIV_EXPR,
1343                                                      &mod, &of);
1344           overflow |= of;
1345         }
1346       if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1347         ++compare_count;
1348       if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1349         ++loop_count;
1350       if (compare_count.is_negative ())
1351         compare_count = double_int_zero;
1352       if (loop_count.is_negative ())
1353         loop_count = double_int_zero;
1354       if (loop_count.is_zero ())
1355         probability = 0;
1356       else if (compare_count.scmp (loop_count) == 1)
1357         probability = REG_BR_PROB_BASE;
1358       else
1359         {
1360           /* If loop_count is too big, such that REG_BR_PROB_BASE * loop_count
1361              could overflow, shift both loop_count and compare_count right
1362              a bit so that it doesn't overflow.  Note both counts are known not
1363              to be negative at this point.  */
1364           int clz_bits = clz_hwi (loop_count.high);
1365           gcc_assert (REG_BR_PROB_BASE < 32768);
1366           if (clz_bits < 16)
1367             {
1368               loop_count.arshift (16 - clz_bits, HOST_BITS_PER_DOUBLE_INT);
1369               compare_count.arshift (16 - clz_bits, HOST_BITS_PER_DOUBLE_INT);
1370             }
1371           tem = compare_count.mul_with_sign (double_int::from_shwi
1372                                             (REG_BR_PROB_BASE), true, &of);
1373           gcc_assert (!of);
1374           tem = tem.divmod (loop_count, true, TRUNC_DIV_EXPR, &mod);
1375           probability = tem.to_uhwi ();
1376         }
1377
1378       if (!overflow)
1379         predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1380
1381       return;
1382     }
1383
1384   if (expr_coherent_p (loop_bound_var, compare_var))
1385     {
1386       if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1387           && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1388         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1389       else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1390                && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1391         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1392       else if (loop_bound_code == NE_EXPR)
1393         {
1394           /* If the loop backedge condition is "(i != bound)", we do
1395              the comparison based on the step of IV:
1396              * step < 0 : backedge condition is like (i > bound)
1397              * step > 0 : backedge condition is like (i < bound)  */
1398           gcc_assert (loop_bound_step != 0);
1399           if (loop_bound_step > 0
1400               && (compare_code == LT_EXPR
1401                   || compare_code == LE_EXPR))
1402             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1403           else if (loop_bound_step < 0
1404                    && (compare_code == GT_EXPR
1405                        || compare_code == GE_EXPR))
1406             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1407           else
1408             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1409         }
1410       else
1411         /* The branch is predicted not-taken if loop_bound_code is
1412            opposite with compare_code.  */
1413         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1414     }
1415   else if (expr_coherent_p (loop_iv_base_var, compare_var))
1416     {
1417       /* For cases like:
1418            for (i = s; i < h; i++)
1419              if (i > s + 2) ....
1420          The branch should be predicted taken.  */
1421       if (loop_bound_step > 0
1422           && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1423         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1424       else if (loop_bound_step < 0
1425                && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1426         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1427       else
1428         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1429     }
1430 }
1431
1432 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1433    exits are resulted from short-circuit conditions that will generate an
1434    if_tmp. E.g.:
1435
1436    if (foo() || global > 10)
1437      break;
1438
1439    This will be translated into:
1440
1441    BB3:
1442      loop header...
1443    BB4:
1444      if foo() goto BB6 else goto BB5
1445    BB5:
1446      if global > 10 goto BB6 else goto BB7
1447    BB6:
1448      goto BB7
1449    BB7:
1450      iftmp = (PHI 0(BB5), 1(BB6))
1451      if iftmp == 1 goto BB8 else goto BB3
1452    BB8:
1453      outside of the loop...
1454
1455    The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1456    From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1457    exits. This function takes BB7->BB8 as input, and finds out the extra loop
1458    exits to predict them using PRED_LOOP_EXIT.  */
1459
1460 static void
1461 predict_extra_loop_exits (edge exit_edge)
1462 {
1463   unsigned i;
1464   bool check_value_one;
1465   gimple phi_stmt;
1466   tree cmp_rhs, cmp_lhs;
1467   gimple cmp_stmt = last_stmt (exit_edge->src);
1468
1469   if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1470     return;
1471   cmp_rhs = gimple_cond_rhs (cmp_stmt);
1472   cmp_lhs = gimple_cond_lhs (cmp_stmt);
1473   if (!TREE_CONSTANT (cmp_rhs)
1474       || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1475     return;
1476   if (TREE_CODE (cmp_lhs) != SSA_NAME)
1477     return;
1478
1479   /* If check_value_one is true, only the phi_args with value '1' will lead
1480      to loop exit. Otherwise, only the phi_args with value '0' will lead to
1481      loop exit.  */
1482   check_value_one = (((integer_onep (cmp_rhs))
1483                     ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1484                     ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1485
1486   phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1487   if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
1488     return;
1489
1490   for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1491     {
1492       edge e1;
1493       edge_iterator ei;
1494       tree val = gimple_phi_arg_def (phi_stmt, i);
1495       edge e = gimple_phi_arg_edge (phi_stmt, i);
1496
1497       if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1498         continue;
1499       if ((check_value_one ^ integer_onep (val)) == 1)
1500         continue;
1501       if (EDGE_COUNT (e->src->succs) != 1)
1502         {
1503           predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1504           continue;
1505         }
1506
1507       FOR_EACH_EDGE (e1, ei, e->src->preds)
1508         predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1509     }
1510 }
1511
1512 /* Predict edge probabilities by exploiting loop structure.  */
1513
1514 static void
1515 predict_loops (void)
1516 {
1517   struct loop *loop;
1518
1519   /* Try to predict out blocks in a loop that are not part of a
1520      natural loop.  */
1521   FOR_EACH_LOOP (loop, 0)
1522     {
1523       basic_block bb, *bbs;
1524       unsigned j, n_exits;
1525       vec<edge> exits;
1526       struct tree_niter_desc niter_desc;
1527       edge ex;
1528       struct nb_iter_bound *nb_iter;
1529       enum tree_code loop_bound_code = ERROR_MARK;
1530       tree loop_bound_step = NULL;
1531       tree loop_bound_var = NULL;
1532       tree loop_iv_base = NULL;
1533       gimple stmt = NULL;
1534
1535       exits = get_loop_exit_edges (loop);
1536       n_exits = exits.length ();
1537       if (!n_exits)
1538         {
1539           exits.release ();
1540           continue;
1541         }
1542
1543       FOR_EACH_VEC_ELT (exits, j, ex)
1544         {
1545           tree niter = NULL;
1546           HOST_WIDE_INT nitercst;
1547           int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1548           int probability;
1549           enum br_predictor predictor;
1550
1551           predict_extra_loop_exits (ex);
1552
1553           if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1554             niter = niter_desc.niter;
1555           if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1556             niter = loop_niter_by_eval (loop, ex);
1557
1558           if (TREE_CODE (niter) == INTEGER_CST)
1559             {
1560               if (tree_fits_uhwi_p (niter)
1561                   && max
1562                   && compare_tree_int (niter, max - 1) == -1)
1563                 nitercst = tree_to_uhwi (niter) + 1;
1564               else
1565                 nitercst = max;
1566               predictor = PRED_LOOP_ITERATIONS;
1567             }
1568           /* If we have just one exit and we can derive some information about
1569              the number of iterations of the loop from the statements inside
1570              the loop, use it to predict this exit.  */
1571           else if (n_exits == 1)
1572             {
1573               nitercst = estimated_stmt_executions_int (loop);
1574               if (nitercst < 0)
1575                 continue;
1576               if (nitercst > max)
1577                 nitercst = max;
1578
1579               predictor = PRED_LOOP_ITERATIONS_GUESSED;
1580             }
1581           else
1582             continue;
1583
1584           /* If the prediction for number of iterations is zero, do not
1585              predict the exit edges.  */
1586           if (nitercst == 0)
1587             continue;
1588
1589           probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1590           predict_edge (ex, predictor, probability);
1591         }
1592       exits.release ();
1593
1594       /* Find information about loop bound variables.  */
1595       for (nb_iter = loop->bounds; nb_iter;
1596            nb_iter = nb_iter->next)
1597         if (nb_iter->stmt
1598             && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1599           {
1600             stmt = nb_iter->stmt;
1601             break;
1602           }
1603       if (!stmt && last_stmt (loop->header)
1604           && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1605         stmt = last_stmt (loop->header);
1606       if (stmt)
1607         is_comparison_with_loop_invariant_p (stmt, loop,
1608                                              &loop_bound_var,
1609                                              &loop_bound_code,
1610                                              &loop_bound_step,
1611                                              &loop_iv_base);
1612
1613       bbs = get_loop_body (loop);
1614
1615       for (j = 0; j < loop->num_nodes; j++)
1616         {
1617           int header_found = 0;
1618           edge e;
1619           edge_iterator ei;
1620
1621           bb = bbs[j];
1622
1623           /* Bypass loop heuristics on continue statement.  These
1624              statements construct loops via "non-loop" constructs
1625              in the source language and are better to be handled
1626              separately.  */
1627           if (predicted_by_p (bb, PRED_CONTINUE))
1628             continue;
1629
1630           /* Loop branch heuristics - predict an edge back to a
1631              loop's head as taken.  */
1632           if (bb == loop->latch)
1633             {
1634               e = find_edge (loop->latch, loop->header);
1635               if (e)
1636                 {
1637                   header_found = 1;
1638                   predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1639                 }
1640             }
1641
1642           /* Loop exit heuristics - predict an edge exiting the loop if the
1643              conditional has no loop header successors as not taken.  */
1644           if (!header_found
1645               /* If we already used more reliable loop exit predictors, do not
1646                  bother with PRED_LOOP_EXIT.  */
1647               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1648               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1649             {
1650               /* For loop with many exits we don't want to predict all exits
1651                  with the pretty large probability, because if all exits are
1652                  considered in row, the loop would be predicted to iterate
1653                  almost never.  The code to divide probability by number of
1654                  exits is very rough.  It should compute the number of exits
1655                  taken in each patch through function (not the overall number
1656                  of exits that might be a lot higher for loops with wide switch
1657                  statements in them) and compute n-th square root.
1658
1659                  We limit the minimal probability by 2% to avoid
1660                  EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1661                  as this was causing regression in perl benchmark containing such
1662                  a wide loop.  */
1663
1664               int probability = ((REG_BR_PROB_BASE
1665                                   - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1666                                  / n_exits);
1667               if (probability < HITRATE (2))
1668                 probability = HITRATE (2);
1669               FOR_EACH_EDGE (e, ei, bb->succs)
1670                 if (e->dest->index < NUM_FIXED_BLOCKS
1671                     || !flow_bb_inside_loop_p (loop, e->dest))
1672                   predict_edge (e, PRED_LOOP_EXIT, probability);
1673             }
1674           if (loop_bound_var)
1675             predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1676                                    loop_bound_code,
1677                                    tree_to_shwi (loop_bound_step));
1678         }
1679
1680       /* Free basic blocks from get_loop_body.  */
1681       free (bbs);
1682     }
1683 }
1684
1685 /* Attempt to predict probabilities of BB outgoing edges using local
1686    properties.  */
1687 static void
1688 bb_estimate_probability_locally (basic_block bb)
1689 {
1690   rtx last_insn = BB_END (bb);
1691   rtx cond;
1692
1693   if (! can_predict_insn_p (last_insn))
1694     return;
1695   cond = get_condition (last_insn, NULL, false, false);
1696   if (! cond)
1697     return;
1698
1699   /* Try "pointer heuristic."
1700      A comparison ptr == 0 is predicted as false.
1701      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1702   if (COMPARISON_P (cond)
1703       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1704           || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1705     {
1706       if (GET_CODE (cond) == EQ)
1707         predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1708       else if (GET_CODE (cond) == NE)
1709         predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1710     }
1711   else
1712
1713   /* Try "opcode heuristic."
1714      EQ tests are usually false and NE tests are usually true. Also,
1715      most quantities are positive, so we can make the appropriate guesses
1716      about signed comparisons against zero.  */
1717     switch (GET_CODE (cond))
1718       {
1719       case CONST_INT:
1720         /* Unconditional branch.  */
1721         predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1722                           cond == const0_rtx ? NOT_TAKEN : TAKEN);
1723         break;
1724
1725       case EQ:
1726       case UNEQ:
1727         /* Floating point comparisons appears to behave in a very
1728            unpredictable way because of special role of = tests in
1729            FP code.  */
1730         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1731           ;
1732         /* Comparisons with 0 are often used for booleans and there is
1733            nothing useful to predict about them.  */
1734         else if (XEXP (cond, 1) == const0_rtx
1735                  || XEXP (cond, 0) == const0_rtx)
1736           ;
1737         else
1738           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1739         break;
1740
1741       case NE:
1742       case LTGT:
1743         /* Floating point comparisons appears to behave in a very
1744            unpredictable way because of special role of = tests in
1745            FP code.  */
1746         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1747           ;
1748         /* Comparisons with 0 are often used for booleans and there is
1749            nothing useful to predict about them.  */
1750         else if (XEXP (cond, 1) == const0_rtx
1751                  || XEXP (cond, 0) == const0_rtx)
1752           ;
1753         else
1754           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1755         break;
1756
1757       case ORDERED:
1758         predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1759         break;
1760
1761       case UNORDERED:
1762         predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1763         break;
1764
1765       case LE:
1766       case LT:
1767         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1768             || XEXP (cond, 1) == constm1_rtx)
1769           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1770         break;
1771
1772       case GE:
1773       case GT:
1774         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1775             || XEXP (cond, 1) == constm1_rtx)
1776           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1777         break;
1778
1779       default:
1780         break;
1781       }
1782 }
1783
1784 /* Set edge->probability for each successor edge of BB.  */
1785 void
1786 guess_outgoing_edge_probabilities (basic_block bb)
1787 {
1788   bb_estimate_probability_locally (bb);
1789   combine_predictions_for_insn (BB_END (bb), bb);
1790 }
1791 \f
1792 static tree expr_expected_value (tree, bitmap);
1793
1794 /* Helper function for expr_expected_value.  */
1795
1796 static tree
1797 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1798                        tree op1, bitmap visited)
1799 {
1800   gimple def;
1801
1802   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1803     {
1804       if (TREE_CONSTANT (op0))
1805         return op0;
1806
1807       if (code != SSA_NAME)
1808         return NULL_TREE;
1809
1810       def = SSA_NAME_DEF_STMT (op0);
1811
1812       /* If we were already here, break the infinite cycle.  */
1813       if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1814         return NULL;
1815
1816       if (gimple_code (def) == GIMPLE_PHI)
1817         {
1818           /* All the arguments of the PHI node must have the same constant
1819              length.  */
1820           int i, n = gimple_phi_num_args (def);
1821           tree val = NULL, new_val;
1822
1823           for (i = 0; i < n; i++)
1824             {
1825               tree arg = PHI_ARG_DEF (def, i);
1826
1827               /* If this PHI has itself as an argument, we cannot
1828                  determine the string length of this argument.  However,
1829                  if we can find an expected constant value for the other
1830                  PHI args then we can still be sure that this is
1831                  likely a constant.  So be optimistic and just
1832                  continue with the next argument.  */
1833               if (arg == PHI_RESULT (def))
1834                 continue;
1835
1836               new_val = expr_expected_value (arg, visited);
1837               if (!new_val)
1838                 return NULL;
1839               if (!val)
1840                 val = new_val;
1841               else if (!operand_equal_p (val, new_val, false))
1842                 return NULL;
1843             }
1844           return val;
1845         }
1846       if (is_gimple_assign (def))
1847         {
1848           if (gimple_assign_lhs (def) != op0)
1849             return NULL;
1850
1851           return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1852                                         gimple_assign_rhs1 (def),
1853                                         gimple_assign_rhs_code (def),
1854                                         gimple_assign_rhs2 (def),
1855                                         visited);
1856         }
1857
1858       if (is_gimple_call (def))
1859         {
1860           tree decl = gimple_call_fndecl (def);
1861           if (!decl)
1862             return NULL;
1863           if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1864             switch (DECL_FUNCTION_CODE (decl))
1865               {
1866               case BUILT_IN_EXPECT:
1867                 {
1868                   tree val;
1869                   if (gimple_call_num_args (def) != 2)
1870                     return NULL;
1871                   val = gimple_call_arg (def, 0);
1872                   if (TREE_CONSTANT (val))
1873                     return val;
1874                   return gimple_call_arg (def, 1);
1875                 }
1876
1877               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1878               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1879               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1880               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1881               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1882               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1883               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1884               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1885               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1886               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1887               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1888               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1889               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1890                 /* Assume that any given atomic operation has low contention,
1891                    and thus the compare-and-swap operation succeeds.  */
1892                 return boolean_true_node;
1893             }
1894         }
1895
1896       return NULL;
1897     }
1898
1899   if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1900     {
1901       tree res;
1902       op0 = expr_expected_value (op0, visited);
1903       if (!op0)
1904         return NULL;
1905       op1 = expr_expected_value (op1, visited);
1906       if (!op1)
1907         return NULL;
1908       res = fold_build2 (code, type, op0, op1);
1909       if (TREE_CONSTANT (res))
1910         return res;
1911       return NULL;
1912     }
1913   if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1914     {
1915       tree res;
1916       op0 = expr_expected_value (op0, visited);
1917       if (!op0)
1918         return NULL;
1919       res = fold_build1 (code, type, op0);
1920       if (TREE_CONSTANT (res))
1921         return res;
1922       return NULL;
1923     }
1924   return NULL;
1925 }
1926
1927 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1928    The function is used by builtin_expect branch predictor so the evidence
1929    must come from this construct and additional possible constant folding.
1930
1931    We may want to implement more involved value guess (such as value range
1932    propagation based prediction), but such tricks shall go to new
1933    implementation.  */
1934
1935 static tree
1936 expr_expected_value (tree expr, bitmap visited)
1937 {
1938   enum tree_code code;
1939   tree op0, op1;
1940
1941   if (TREE_CONSTANT (expr))
1942     return expr;
1943
1944   extract_ops_from_tree (expr, &code, &op0, &op1);
1945   return expr_expected_value_1 (TREE_TYPE (expr),
1946                                 op0, code, op1, visited);
1947 }
1948
1949 \f
1950 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
1951    we no longer need.  */
1952 static unsigned int
1953 strip_predict_hints (void)
1954 {
1955   basic_block bb;
1956   gimple ass_stmt;
1957   tree var;
1958
1959   FOR_EACH_BB (bb)
1960     {
1961       gimple_stmt_iterator bi;
1962       for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
1963         {
1964           gimple stmt = gsi_stmt (bi);
1965
1966           if (gimple_code (stmt) == GIMPLE_PREDICT)
1967             {
1968               gsi_remove (&bi, true);
1969               continue;
1970             }
1971           else if (gimple_code (stmt) == GIMPLE_CALL)
1972             {
1973               tree fndecl = gimple_call_fndecl (stmt);
1974
1975               if (fndecl
1976                   && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
1977                   && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
1978                   && gimple_call_num_args (stmt) == 2)
1979                 {
1980                   var = gimple_call_lhs (stmt);
1981                   if (var)
1982                     {
1983                       ass_stmt
1984                         = gimple_build_assign (var, gimple_call_arg (stmt, 0));
1985                       gsi_replace (&bi, ass_stmt, true);
1986                     }
1987                   else
1988                     {
1989                       gsi_remove (&bi, true);
1990                       continue;
1991                     }
1992                 }
1993             }
1994           gsi_next (&bi);
1995         }
1996     }
1997   return 0;
1998 }
1999 \f
2000 /* Predict using opcode of the last statement in basic block.  */
2001 static void
2002 tree_predict_by_opcode (basic_block bb)
2003 {
2004   gimple stmt = last_stmt (bb);
2005   edge then_edge;
2006   tree op0, op1;
2007   tree type;
2008   tree val;
2009   enum tree_code cmp;
2010   bitmap visited;
2011   edge_iterator ei;
2012
2013   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
2014     return;
2015   FOR_EACH_EDGE (then_edge, ei, bb->succs)
2016     if (then_edge->flags & EDGE_TRUE_VALUE)
2017       break;
2018   op0 = gimple_cond_lhs (stmt);
2019   op1 = gimple_cond_rhs (stmt);
2020   cmp = gimple_cond_code (stmt);
2021   type = TREE_TYPE (op0);
2022   visited = BITMAP_ALLOC (NULL);
2023   val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited);
2024   BITMAP_FREE (visited);
2025   if (val)
2026     {
2027       int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
2028
2029       gcc_assert (percent >= 0 && percent <= 100);
2030       if (integer_zerop (val))
2031         percent = 100 - percent;
2032       predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
2033     }
2034   /* Try "pointer heuristic."
2035      A comparison ptr == 0 is predicted as false.
2036      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
2037   if (POINTER_TYPE_P (type))
2038     {
2039       if (cmp == EQ_EXPR)
2040         predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2041       else if (cmp == NE_EXPR)
2042         predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2043     }
2044   else
2045
2046   /* Try "opcode heuristic."
2047      EQ tests are usually false and NE tests are usually true. Also,
2048      most quantities are positive, so we can make the appropriate guesses
2049      about signed comparisons against zero.  */
2050     switch (cmp)
2051       {
2052       case EQ_EXPR:
2053       case UNEQ_EXPR:
2054         /* Floating point comparisons appears to behave in a very
2055            unpredictable way because of special role of = tests in
2056            FP code.  */
2057         if (FLOAT_TYPE_P (type))
2058           ;
2059         /* Comparisons with 0 are often used for booleans and there is
2060            nothing useful to predict about them.  */
2061         else if (integer_zerop (op0) || integer_zerop (op1))
2062           ;
2063         else
2064           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2065         break;
2066
2067       case NE_EXPR:
2068       case LTGT_EXPR:
2069         /* Floating point comparisons appears to behave in a very
2070            unpredictable way because of special role of = tests in
2071            FP code.  */
2072         if (FLOAT_TYPE_P (type))
2073           ;
2074         /* Comparisons with 0 are often used for booleans and there is
2075            nothing useful to predict about them.  */
2076         else if (integer_zerop (op0)
2077                  || integer_zerop (op1))
2078           ;
2079         else
2080           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2081         break;
2082
2083       case ORDERED_EXPR:
2084         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2085         break;
2086
2087       case UNORDERED_EXPR:
2088         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2089         break;
2090
2091       case LE_EXPR:
2092       case LT_EXPR:
2093         if (integer_zerop (op1)
2094             || integer_onep (op1)
2095             || integer_all_onesp (op1)
2096             || real_zerop (op1)
2097             || real_onep (op1)
2098             || real_minus_onep (op1))
2099           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2100         break;
2101
2102       case GE_EXPR:
2103       case GT_EXPR:
2104         if (integer_zerop (op1)
2105             || integer_onep (op1)
2106             || integer_all_onesp (op1)
2107             || real_zerop (op1)
2108             || real_onep (op1)
2109             || real_minus_onep (op1))
2110           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2111         break;
2112
2113       default:
2114         break;
2115       }
2116 }
2117
2118 /* Try to guess whether the value of return means error code.  */
2119
2120 static enum br_predictor
2121 return_prediction (tree val, enum prediction *prediction)
2122 {
2123   /* VOID.  */
2124   if (!val)
2125     return PRED_NO_PREDICTION;
2126   /* Different heuristics for pointers and scalars.  */
2127   if (POINTER_TYPE_P (TREE_TYPE (val)))
2128     {
2129       /* NULL is usually not returned.  */
2130       if (integer_zerop (val))
2131         {
2132           *prediction = NOT_TAKEN;
2133           return PRED_NULL_RETURN;
2134         }
2135     }
2136   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2137     {
2138       /* Negative return values are often used to indicate
2139          errors.  */
2140       if (TREE_CODE (val) == INTEGER_CST
2141           && tree_int_cst_sgn (val) < 0)
2142         {
2143           *prediction = NOT_TAKEN;
2144           return PRED_NEGATIVE_RETURN;
2145         }
2146       /* Constant return values seems to be commonly taken.
2147          Zero/one often represent booleans so exclude them from the
2148          heuristics.  */
2149       if (TREE_CONSTANT (val)
2150           && (!integer_zerop (val) && !integer_onep (val)))
2151         {
2152           *prediction = TAKEN;
2153           return PRED_CONST_RETURN;
2154         }
2155     }
2156   return PRED_NO_PREDICTION;
2157 }
2158
2159 /* Find the basic block with return expression and look up for possible
2160    return value trying to apply RETURN_PREDICTION heuristics.  */
2161 static void
2162 apply_return_prediction (void)
2163 {
2164   gimple return_stmt = NULL;
2165   tree return_val;
2166   edge e;
2167   gimple phi;
2168   int phi_num_args, i;
2169   enum br_predictor pred;
2170   enum prediction direction;
2171   edge_iterator ei;
2172
2173   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2174     {
2175       return_stmt = last_stmt (e->src);
2176       if (return_stmt
2177           && gimple_code (return_stmt) == GIMPLE_RETURN)
2178         break;
2179     }
2180   if (!e)
2181     return;
2182   return_val = gimple_return_retval (return_stmt);
2183   if (!return_val)
2184     return;
2185   if (TREE_CODE (return_val) != SSA_NAME
2186       || !SSA_NAME_DEF_STMT (return_val)
2187       || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2188     return;
2189   phi = SSA_NAME_DEF_STMT (return_val);
2190   phi_num_args = gimple_phi_num_args (phi);
2191   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2192
2193   /* Avoid the degenerate case where all return values form the function
2194      belongs to same category (ie they are all positive constants)
2195      so we can hardly say something about them.  */
2196   for (i = 1; i < phi_num_args; i++)
2197     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2198       break;
2199   if (i != phi_num_args)
2200     for (i = 0; i < phi_num_args; i++)
2201       {
2202         pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2203         if (pred != PRED_NO_PREDICTION)
2204           predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2205                                          direction);
2206       }
2207 }
2208
2209 /* Look for basic block that contains unlikely to happen events
2210    (such as noreturn calls) and mark all paths leading to execution
2211    of this basic blocks as unlikely.  */
2212
2213 static void
2214 tree_bb_level_predictions (void)
2215 {
2216   basic_block bb;
2217   bool has_return_edges = false;
2218   edge e;
2219   edge_iterator ei;
2220
2221   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2222     if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2223       {
2224         has_return_edges = true;
2225         break;
2226       }
2227
2228   apply_return_prediction ();
2229
2230   FOR_EACH_BB (bb)
2231     {
2232       gimple_stmt_iterator gsi;
2233
2234       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2235         {
2236           gimple stmt = gsi_stmt (gsi);
2237           tree decl;
2238
2239           if (is_gimple_call (stmt))
2240             {
2241               if ((gimple_call_flags (stmt) & ECF_NORETURN)
2242                   && has_return_edges)
2243                 predict_paths_leading_to (bb, PRED_NORETURN,
2244                                           NOT_TAKEN);
2245               decl = gimple_call_fndecl (stmt);
2246               if (decl
2247                   && lookup_attribute ("cold",
2248                                        DECL_ATTRIBUTES (decl)))
2249                 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2250                                           NOT_TAKEN);
2251             }
2252           else if (gimple_code (stmt) == GIMPLE_PREDICT)
2253             {
2254               predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2255                                         gimple_predict_outcome (stmt));
2256               /* Keep GIMPLE_PREDICT around so early inlining will propagate
2257                  hints to callers.  */
2258             }
2259         }
2260     }
2261 }
2262
2263 #ifdef ENABLE_CHECKING
2264
2265 /* Callback for pointer_map_traverse, asserts that the pointer map is
2266    empty.  */
2267
2268 static bool
2269 assert_is_empty (const void *key ATTRIBUTE_UNUSED, void **value,
2270                  void *data ATTRIBUTE_UNUSED)
2271 {
2272   gcc_assert (!*value);
2273   return false;
2274 }
2275 #endif
2276
2277 /* Predict branch probabilities and estimate profile for basic block BB.  */
2278
2279 static void
2280 tree_estimate_probability_bb (basic_block bb)
2281 {
2282   edge e;
2283   edge_iterator ei;
2284   gimple last;
2285
2286   FOR_EACH_EDGE (e, ei, bb->succs)
2287     {
2288       /* Predict edges to user labels with attributes.  */
2289       if (e->dest != EXIT_BLOCK_PTR)
2290         {
2291           gimple_stmt_iterator gi;
2292           for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2293             {
2294               gimple stmt = gsi_stmt (gi);
2295               tree decl;
2296
2297               if (gimple_code (stmt) != GIMPLE_LABEL)
2298                 break;
2299               decl = gimple_label_label (stmt);
2300               if (DECL_ARTIFICIAL (decl))
2301                 continue;
2302
2303               /* Finally, we have a user-defined label.  */
2304               if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2305                 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2306               else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2307                 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2308             }
2309         }
2310
2311       /* Predict early returns to be probable, as we've already taken
2312          care for error returns and other cases are often used for
2313          fast paths through function.
2314
2315          Since we've already removed the return statements, we are
2316          looking for CFG like:
2317
2318          if (conditional)
2319          {
2320          ..
2321          goto return_block
2322          }
2323          some other blocks
2324          return_block:
2325          return_stmt.  */
2326       if (e->dest != bb->next_bb
2327           && e->dest != EXIT_BLOCK_PTR
2328           && single_succ_p (e->dest)
2329           && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR
2330           && (last = last_stmt (e->dest)) != NULL
2331           && gimple_code (last) == GIMPLE_RETURN)
2332         {
2333           edge e1;
2334           edge_iterator ei1;
2335
2336           if (single_succ_p (bb))
2337             {
2338               FOR_EACH_EDGE (e1, ei1, bb->preds)
2339                 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2340                     && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2341                     && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2342                   predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2343             }
2344           else
2345             if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2346                 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2347                 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2348               predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2349         }
2350
2351       /* Look for block we are guarding (ie we dominate it,
2352          but it doesn't postdominate us).  */
2353       if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
2354           && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2355           && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2356         {
2357           gimple_stmt_iterator bi;
2358
2359           /* The call heuristic claims that a guarded function call
2360              is improbable.  This is because such calls are often used
2361              to signal exceptional situations such as printing error
2362              messages.  */
2363           for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2364                gsi_next (&bi))
2365             {
2366               gimple stmt = gsi_stmt (bi);
2367               if (is_gimple_call (stmt)
2368                   /* Constant and pure calls are hardly used to signalize
2369                      something exceptional.  */
2370                   && gimple_has_side_effects (stmt))
2371                 {
2372                   predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2373                   break;
2374                 }
2375             }
2376         }
2377     }
2378   tree_predict_by_opcode (bb);
2379 }
2380
2381 /* Predict branch probabilities and estimate profile of the tree CFG.
2382    This function can be called from the loop optimizers to recompute
2383    the profile information.  */
2384
2385 void
2386 tree_estimate_probability (void)
2387 {
2388   basic_block bb;
2389
2390   add_noreturn_fake_exit_edges ();
2391   connect_infinite_loops_to_exit ();
2392   /* We use loop_niter_by_eval, which requires that the loops have
2393      preheaders.  */
2394   create_preheaders (CP_SIMPLE_PREHEADERS);
2395   calculate_dominance_info (CDI_POST_DOMINATORS);
2396
2397   bb_predictions = pointer_map_create ();
2398   tree_bb_level_predictions ();
2399   record_loop_exits ();
2400
2401   if (number_of_loops (cfun) > 1)
2402     predict_loops ();
2403
2404   FOR_EACH_BB (bb)
2405     tree_estimate_probability_bb (bb);
2406
2407   FOR_EACH_BB (bb)
2408     combine_predictions_for_bb (bb);
2409
2410 #ifdef ENABLE_CHECKING
2411   pointer_map_traverse (bb_predictions, assert_is_empty, NULL);
2412 #endif
2413   pointer_map_destroy (bb_predictions);
2414   bb_predictions = NULL;
2415
2416   estimate_bb_frequencies (false);
2417   free_dominance_info (CDI_POST_DOMINATORS);
2418   remove_fake_exit_edges ();
2419 }
2420
2421 /* Predict branch probabilities and estimate profile of the tree CFG.
2422    This is the driver function for PASS_PROFILE.  */
2423
2424 static unsigned int
2425 tree_estimate_probability_driver (void)
2426 {
2427   unsigned nb_loops;
2428
2429   loop_optimizer_init (LOOPS_NORMAL);
2430   if (dump_file && (dump_flags & TDF_DETAILS))
2431     flow_loops_dump (dump_file, NULL, 0);
2432
2433   mark_irreducible_loops ();
2434
2435   nb_loops = number_of_loops (cfun);
2436   if (nb_loops > 1)
2437     scev_initialize ();
2438
2439   tree_estimate_probability ();
2440
2441   if (nb_loops > 1)
2442     scev_finalize ();
2443
2444   loop_optimizer_finalize ();
2445   if (dump_file && (dump_flags & TDF_DETAILS))
2446     gimple_dump_cfg (dump_file, dump_flags);
2447   if (profile_status == PROFILE_ABSENT)
2448     profile_status = PROFILE_GUESSED;
2449   return 0;
2450 }
2451 \f
2452 /* Predict edges to successors of CUR whose sources are not postdominated by
2453    BB by PRED and recurse to all postdominators.  */
2454
2455 static void
2456 predict_paths_for_bb (basic_block cur, basic_block bb,
2457                       enum br_predictor pred,
2458                       enum prediction taken,
2459                       bitmap visited)
2460 {
2461   edge e;
2462   edge_iterator ei;
2463   basic_block son;
2464
2465   /* We are looking for all edges forming edge cut induced by
2466      set of all blocks postdominated by BB.  */
2467   FOR_EACH_EDGE (e, ei, cur->preds)
2468     if (e->src->index >= NUM_FIXED_BLOCKS
2469         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2470     {
2471       edge e2;
2472       edge_iterator ei2;
2473       bool found = false;
2474
2475       /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2476       if (e->flags & (EDGE_EH | EDGE_FAKE))
2477         continue;
2478       gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2479
2480       /* See if there is an edge from e->src that is not abnormal
2481          and does not lead to BB.  */
2482       FOR_EACH_EDGE (e2, ei2, e->src->succs)
2483         if (e2 != e
2484             && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2485             && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2486           {
2487             found = true;
2488             break;
2489           }
2490
2491       /* If there is non-abnormal path leaving e->src, predict edge
2492          using predictor.  Otherwise we need to look for paths
2493          leading to e->src.
2494
2495          The second may lead to infinite loop in the case we are predicitng
2496          regions that are only reachable by abnormal edges.  We simply
2497          prevent visiting given BB twice.  */
2498       if (found)
2499         predict_edge_def (e, pred, taken);
2500       else if (bitmap_set_bit (visited, e->src->index))
2501         predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2502     }
2503   for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2504        son;
2505        son = next_dom_son (CDI_POST_DOMINATORS, son))
2506     predict_paths_for_bb (son, bb, pred, taken, visited);
2507 }
2508
2509 /* Sets branch probabilities according to PREDiction and
2510    FLAGS.  */
2511
2512 static void
2513 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2514                           enum prediction taken)
2515 {
2516   bitmap visited = BITMAP_ALLOC (NULL);
2517   predict_paths_for_bb (bb, bb, pred, taken, visited);
2518   BITMAP_FREE (visited);
2519 }
2520
2521 /* Like predict_paths_leading_to but take edge instead of basic block.  */
2522
2523 static void
2524 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2525                                enum prediction taken)
2526 {
2527   bool has_nonloop_edge = false;
2528   edge_iterator ei;
2529   edge e2;
2530
2531   basic_block bb = e->src;
2532   FOR_EACH_EDGE (e2, ei, bb->succs)
2533     if (e2->dest != e->src && e2->dest != e->dest
2534         && !(e->flags & (EDGE_EH | EDGE_FAKE))
2535         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2536       {
2537         has_nonloop_edge = true;
2538         break;
2539       }
2540   if (!has_nonloop_edge)
2541     {
2542       bitmap visited = BITMAP_ALLOC (NULL);
2543       predict_paths_for_bb (bb, bb, pred, taken, visited);
2544       BITMAP_FREE (visited);
2545     }
2546   else
2547     predict_edge_def (e, pred, taken);
2548 }
2549 \f
2550 /* This is used to carry information about basic blocks.  It is
2551    attached to the AUX field of the standard CFG block.  */
2552
2553 typedef struct block_info_def
2554 {
2555   /* Estimated frequency of execution of basic_block.  */
2556   sreal frequency;
2557
2558   /* To keep queue of basic blocks to process.  */
2559   basic_block next;
2560
2561   /* Number of predecessors we need to visit first.  */
2562   int npredecessors;
2563 } *block_info;
2564
2565 /* Similar information for edges.  */
2566 typedef struct edge_info_def
2567 {
2568   /* In case edge is a loopback edge, the probability edge will be reached
2569      in case header is.  Estimated number of iterations of the loop can be
2570      then computed as 1 / (1 - back_edge_prob).  */
2571   sreal back_edge_prob;
2572   /* True if the edge is a loopback edge in the natural loop.  */
2573   unsigned int back_edge:1;
2574 } *edge_info;
2575
2576 #define BLOCK_INFO(B)   ((block_info) (B)->aux)
2577 #define EDGE_INFO(E)    ((edge_info) (E)->aux)
2578
2579 /* Helper function for estimate_bb_frequencies.
2580    Propagate the frequencies in blocks marked in
2581    TOVISIT, starting in HEAD.  */
2582
2583 static void
2584 propagate_freq (basic_block head, bitmap tovisit)
2585 {
2586   basic_block bb;
2587   basic_block last;
2588   unsigned i;
2589   edge e;
2590   basic_block nextbb;
2591   bitmap_iterator bi;
2592
2593   /* For each basic block we need to visit count number of his predecessors
2594      we need to visit first.  */
2595   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2596     {
2597       edge_iterator ei;
2598       int count = 0;
2599
2600       bb = BASIC_BLOCK (i);
2601
2602       FOR_EACH_EDGE (e, ei, bb->preds)
2603         {
2604           bool visit = bitmap_bit_p (tovisit, e->src->index);
2605
2606           if (visit && !(e->flags & EDGE_DFS_BACK))
2607             count++;
2608           else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2609             fprintf (dump_file,
2610                      "Irreducible region hit, ignoring edge to %i->%i\n",
2611                      e->src->index, bb->index);
2612         }
2613       BLOCK_INFO (bb)->npredecessors = count;
2614       /* When function never returns, we will never process exit block.  */
2615       if (!count && bb == EXIT_BLOCK_PTR)
2616         bb->count = bb->frequency = 0;
2617     }
2618
2619   memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
2620   last = head;
2621   for (bb = head; bb; bb = nextbb)
2622     {
2623       edge_iterator ei;
2624       sreal cyclic_probability, frequency;
2625
2626       memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
2627       memcpy (&frequency, &real_zero, sizeof (real_zero));
2628
2629       nextbb = BLOCK_INFO (bb)->next;
2630       BLOCK_INFO (bb)->next = NULL;
2631
2632       /* Compute frequency of basic block.  */
2633       if (bb != head)
2634         {
2635 #ifdef ENABLE_CHECKING
2636           FOR_EACH_EDGE (e, ei, bb->preds)
2637             gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2638                         || (e->flags & EDGE_DFS_BACK));
2639 #endif
2640
2641           FOR_EACH_EDGE (e, ei, bb->preds)
2642             if (EDGE_INFO (e)->back_edge)
2643               {
2644                 sreal_add (&cyclic_probability, &cyclic_probability,
2645                            &EDGE_INFO (e)->back_edge_prob);
2646               }
2647             else if (!(e->flags & EDGE_DFS_BACK))
2648               {
2649                 sreal tmp;
2650
2651                 /*  frequency += (e->probability
2652                                   * BLOCK_INFO (e->src)->frequency /
2653                                   REG_BR_PROB_BASE);  */
2654
2655                 sreal_init (&tmp, e->probability, 0);
2656                 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
2657                 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
2658                 sreal_add (&frequency, &frequency, &tmp);
2659               }
2660
2661           if (sreal_compare (&cyclic_probability, &real_zero) == 0)
2662             {
2663               memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
2664                       sizeof (frequency));
2665             }
2666           else
2667             {
2668               if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
2669                 {
2670                   memcpy (&cyclic_probability, &real_almost_one,
2671                           sizeof (real_almost_one));
2672                 }
2673
2674               /* BLOCK_INFO (bb)->frequency = frequency
2675                                               / (1 - cyclic_probability) */
2676
2677               sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
2678               sreal_div (&BLOCK_INFO (bb)->frequency,
2679                          &frequency, &cyclic_probability);
2680             }
2681         }
2682
2683       bitmap_clear_bit (tovisit, bb->index);
2684
2685       e = find_edge (bb, head);
2686       if (e)
2687         {
2688           sreal tmp;
2689
2690           /* EDGE_INFO (e)->back_edge_prob
2691              = ((e->probability * BLOCK_INFO (bb)->frequency)
2692              / REG_BR_PROB_BASE); */
2693
2694           sreal_init (&tmp, e->probability, 0);
2695           sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
2696           sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2697                      &tmp, &real_inv_br_prob_base);
2698         }
2699
2700       /* Propagate to successor blocks.  */
2701       FOR_EACH_EDGE (e, ei, bb->succs)
2702         if (!(e->flags & EDGE_DFS_BACK)
2703             && BLOCK_INFO (e->dest)->npredecessors)
2704           {
2705             BLOCK_INFO (e->dest)->npredecessors--;
2706             if (!BLOCK_INFO (e->dest)->npredecessors)
2707               {
2708                 if (!nextbb)
2709                   nextbb = e->dest;
2710                 else
2711                   BLOCK_INFO (last)->next = e->dest;
2712
2713                 last = e->dest;
2714               }
2715           }
2716     }
2717 }
2718
2719 /* Estimate frequencies in loops at same nest level.  */
2720
2721 static void
2722 estimate_loops_at_level (struct loop *first_loop)
2723 {
2724   struct loop *loop;
2725
2726   for (loop = first_loop; loop; loop = loop->next)
2727     {
2728       edge e;
2729       basic_block *bbs;
2730       unsigned i;
2731       bitmap tovisit = BITMAP_ALLOC (NULL);
2732
2733       estimate_loops_at_level (loop->inner);
2734
2735       /* Find current loop back edge and mark it.  */
2736       e = loop_latch_edge (loop);
2737       EDGE_INFO (e)->back_edge = 1;
2738
2739       bbs = get_loop_body (loop);
2740       for (i = 0; i < loop->num_nodes; i++)
2741         bitmap_set_bit (tovisit, bbs[i]->index);
2742       free (bbs);
2743       propagate_freq (loop->header, tovisit);
2744       BITMAP_FREE (tovisit);
2745     }
2746 }
2747
2748 /* Propagates frequencies through structure of loops.  */
2749
2750 static void
2751 estimate_loops (void)
2752 {
2753   bitmap tovisit = BITMAP_ALLOC (NULL);
2754   basic_block bb;
2755
2756   /* Start by estimating the frequencies in the loops.  */
2757   if (number_of_loops (cfun) > 1)
2758     estimate_loops_at_level (current_loops->tree_root->inner);
2759
2760   /* Now propagate the frequencies through all the blocks.  */
2761   FOR_ALL_BB (bb)
2762     {
2763       bitmap_set_bit (tovisit, bb->index);
2764     }
2765   propagate_freq (ENTRY_BLOCK_PTR, tovisit);
2766   BITMAP_FREE (tovisit);
2767 }
2768
2769 /* Drop the profile for NODE to guessed, and update its frequency based on
2770    whether it is expected to be hot given the CALL_COUNT.  */
2771
2772 static void
2773 drop_profile (struct cgraph_node *node, gcov_type call_count)
2774 {
2775   struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2776   /* In the case where this was called by another function with a
2777      dropped profile, call_count will be 0. Since there are no
2778      non-zero call counts to this function, we don't know for sure
2779      whether it is hot, and therefore it will be marked normal below.  */
2780   bool hot = maybe_hot_count_p (NULL, call_count);
2781
2782   if (dump_file)
2783     fprintf (dump_file,
2784              "Dropping 0 profile for %s/%i. %s based on calls.\n",
2785              node->name (), node->order,
2786              hot ? "Function is hot" : "Function is normal");
2787   /* We only expect to miss profiles for functions that are reached
2788      via non-zero call edges in cases where the function may have
2789      been linked from another module or library (COMDATs and extern
2790      templates). See the comments below for handle_missing_profiles.
2791      Also, only warn in cases where the missing counts exceed the
2792      number of training runs. In certain cases with an execv followed
2793      by a no-return call the profile for the no-return call is not
2794      dumped and there can be a mismatch.  */
2795   if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2796       && call_count > profile_info->runs)
2797     {
2798       if (flag_profile_correction)
2799         {
2800           if (dump_file)
2801             fprintf (dump_file,
2802                      "Missing counts for called function %s/%i\n",
2803                      node->name (), node->order);
2804         }
2805       else
2806         warning (0, "Missing counts for called function %s/%i",
2807                  node->name (), node->order);
2808     }
2809
2810   profile_status_for_function (fn)
2811       = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2812   node->frequency
2813       = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2814 }
2815
2816 /* In the case of COMDAT routines, multiple object files will contain the same
2817    function and the linker will select one for the binary. In that case
2818    all the other copies from the profile instrument binary will be missing
2819    profile counts. Look for cases where this happened, due to non-zero
2820    call counts going to 0-count functions, and drop the profile to guessed
2821    so that we can use the estimated probabilities and avoid optimizing only
2822    for size.
2823
2824    The other case where the profile may be missing is when the routine
2825    is not going to be emitted to the object file, e.g. for "extern template"
2826    class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2827    all other cases of non-zero calls to 0-count functions.  */
2828
2829 void
2830 handle_missing_profiles (void)
2831 {
2832   struct cgraph_node *node;
2833   int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2834   vec<struct cgraph_node *> worklist;
2835   worklist.create (64);
2836
2837   /* See if 0 count function has non-0 count callers.  In this case we
2838      lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2839   FOR_EACH_DEFINED_FUNCTION (node)
2840     {
2841       struct cgraph_edge *e;
2842       gcov_type call_count = 0;
2843       struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2844
2845       if (node->count)
2846         continue;
2847       for (e = node->callers; e; e = e->next_caller)
2848         call_count += e->count;
2849       if (call_count
2850           && fn && fn->cfg
2851           && (call_count * unlikely_count_fraction >= profile_info->runs))
2852         {
2853           drop_profile (node, call_count);
2854           worklist.safe_push (node);
2855         }
2856     }
2857
2858   /* Propagate the profile dropping to other 0-count COMDATs that are
2859      potentially called by COMDATs we already dropped the profile on.  */
2860   while (worklist.length () > 0)
2861     {
2862       struct cgraph_edge *e;
2863
2864       node = worklist.pop ();
2865       for (e = node->callees; e; e = e->next_caller)
2866         {
2867           struct cgraph_node *callee = e->callee;
2868           struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2869
2870           if (callee->count > 0)
2871             continue;
2872           if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2873               && profile_status_for_function (fn) == PROFILE_READ)
2874             {
2875               drop_profile (node, 0);
2876               worklist.safe_push (callee);
2877             }
2878         }
2879     }
2880   worklist.release ();
2881 }
2882
2883 /* Convert counts measured by profile driven feedback to frequencies.
2884    Return nonzero iff there was any nonzero execution count.  */
2885
2886 int
2887 counts_to_freqs (void)
2888 {
2889   gcov_type count_max, true_count_max = 0;
2890   basic_block bb;
2891
2892   /* Don't overwrite the estimated frequencies when the profile for
2893      the function is missing.  We may drop this function PROFILE_GUESSED
2894      later in drop_profile ().  */
2895   if (!ENTRY_BLOCK_PTR->count)
2896     return 0;
2897
2898   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2899     true_count_max = MAX (bb->count, true_count_max);
2900
2901   count_max = MAX (true_count_max, 1);
2902   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2903     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2904
2905   return true_count_max;
2906 }
2907
2908 /* Return true if function is likely to be expensive, so there is no point to
2909    optimize performance of prologue, epilogue or do inlining at the expense
2910    of code size growth.  THRESHOLD is the limit of number of instructions
2911    function can execute at average to be still considered not expensive.  */
2912
2913 bool
2914 expensive_function_p (int threshold)
2915 {
2916   unsigned int sum = 0;
2917   basic_block bb;
2918   unsigned int limit;
2919
2920   /* We can not compute accurately for large thresholds due to scaled
2921      frequencies.  */
2922   gcc_assert (threshold <= BB_FREQ_MAX);
2923
2924   /* Frequencies are out of range.  This either means that function contains
2925      internal loop executing more than BB_FREQ_MAX times or profile feedback
2926      is available and function has not been executed at all.  */
2927   if (ENTRY_BLOCK_PTR->frequency == 0)
2928     return true;
2929
2930   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2931   limit = ENTRY_BLOCK_PTR->frequency * threshold;
2932   FOR_EACH_BB (bb)
2933     {
2934       rtx insn;
2935
2936       FOR_BB_INSNS (bb, insn)
2937         if (active_insn_p (insn))
2938           {
2939             sum += bb->frequency;
2940             if (sum > limit)
2941               return true;
2942         }
2943     }
2944
2945   return false;
2946 }
2947
2948 /* Estimate and propagate basic block frequencies using the given branch
2949    probabilities.  If FORCE is true, the frequencies are used to estimate
2950    the counts even when there are already non-zero profile counts.  */
2951
2952 void
2953 estimate_bb_frequencies (bool force)
2954 {
2955   basic_block bb;
2956   sreal freq_max;
2957
2958   if (force || profile_status != PROFILE_READ || !counts_to_freqs ())
2959     {
2960       static int real_values_initialized = 0;
2961
2962       if (!real_values_initialized)
2963         {
2964           real_values_initialized = 1;
2965           sreal_init (&real_zero, 0, 0);
2966           sreal_init (&real_one, 1, 0);
2967           sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
2968           sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
2969           sreal_init (&real_one_half, 1, -1);
2970           sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
2971           sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
2972         }
2973
2974       mark_dfs_back_edges ();
2975
2976       single_succ_edge (ENTRY_BLOCK_PTR)->probability = REG_BR_PROB_BASE;
2977
2978       /* Set up block info for each basic block.  */
2979       alloc_aux_for_blocks (sizeof (struct block_info_def));
2980       alloc_aux_for_edges (sizeof (struct edge_info_def));
2981       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2982         {
2983           edge e;
2984           edge_iterator ei;
2985
2986           FOR_EACH_EDGE (e, ei, bb->succs)
2987             {
2988               sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
2989               sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2990                          &EDGE_INFO (e)->back_edge_prob,
2991                          &real_inv_br_prob_base);
2992             }
2993         }
2994
2995       /* First compute frequencies locally for each loop from innermost
2996          to outermost to examine frequencies for back edges.  */
2997       estimate_loops ();
2998
2999       memcpy (&freq_max, &real_zero, sizeof (real_zero));
3000       FOR_EACH_BB (bb)
3001         if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
3002           memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
3003
3004       sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
3005       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
3006         {
3007           sreal tmp;
3008
3009           sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
3010           sreal_add (&tmp, &tmp, &real_one_half);
3011           bb->frequency = sreal_to_int (&tmp);
3012         }
3013
3014       free_aux_for_blocks ();
3015       free_aux_for_edges ();
3016     }
3017   compute_function_frequency ();
3018 }
3019
3020 /* Decide whether function is hot, cold or unlikely executed.  */
3021 void
3022 compute_function_frequency (void)
3023 {
3024   basic_block bb;
3025   struct cgraph_node *node = cgraph_get_node (current_function_decl);
3026
3027   if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3028       || MAIN_NAME_P (DECL_NAME (current_function_decl)))
3029     node->only_called_at_startup = true;
3030   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
3031     node->only_called_at_exit = true;
3032
3033   if (profile_status != PROFILE_READ)
3034     {
3035       int flags = flags_from_decl_or_type (current_function_decl);
3036       if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
3037           != NULL)
3038         node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3039       else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
3040                != NULL)
3041         node->frequency = NODE_FREQUENCY_HOT;
3042       else if (flags & ECF_NORETURN)
3043         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3044       else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
3045         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3046       else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3047                || DECL_STATIC_DESTRUCTOR (current_function_decl))
3048         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3049       return;
3050     }
3051
3052   /* Only first time try to drop function into unlikely executed.
3053      After inlining the roundoff errors may confuse us.
3054      Ipa-profile pass will drop functions only called from unlikely
3055      functions to unlikely and that is most of what we care about.  */
3056   if (!cfun->after_inlining)
3057     node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3058   FOR_EACH_BB (bb)
3059     {
3060       if (maybe_hot_bb_p (cfun, bb))
3061         {
3062           node->frequency = NODE_FREQUENCY_HOT;
3063           return;
3064         }
3065       if (!probably_never_executed_bb_p (cfun, bb))
3066         node->frequency = NODE_FREQUENCY_NORMAL;
3067     }
3068 }
3069
3070 static bool
3071 gate_estimate_probability (void)
3072 {
3073   return flag_guess_branch_prob;
3074 }
3075
3076 /* Build PREDICT_EXPR.  */
3077 tree
3078 build_predict_expr (enum br_predictor predictor, enum prediction taken)
3079 {
3080   tree t = build1 (PREDICT_EXPR, void_type_node,
3081                    build_int_cst (integer_type_node, predictor));
3082   SET_PREDICT_EXPR_OUTCOME (t, taken);
3083   return t;
3084 }
3085
3086 const char *
3087 predictor_name (enum br_predictor predictor)
3088 {
3089   return predictor_info[predictor].name;
3090 }
3091
3092 namespace {
3093
3094 const pass_data pass_data_profile =
3095 {
3096   GIMPLE_PASS, /* type */
3097   "profile_estimate", /* name */
3098   OPTGROUP_NONE, /* optinfo_flags */
3099   true, /* has_gate */
3100   true, /* has_execute */
3101   TV_BRANCH_PROB, /* tv_id */
3102   PROP_cfg, /* properties_required */
3103   0, /* properties_provided */
3104   0, /* properties_destroyed */
3105   0, /* todo_flags_start */
3106   TODO_verify_ssa, /* todo_flags_finish */
3107 };
3108
3109 class pass_profile : public gimple_opt_pass
3110 {
3111 public:
3112   pass_profile (gcc::context *ctxt)
3113     : gimple_opt_pass (pass_data_profile, ctxt)
3114   {}
3115
3116   /* opt_pass methods: */
3117   bool gate () { return gate_estimate_probability (); }
3118   unsigned int execute () { return tree_estimate_probability_driver (); }
3119
3120 }; // class pass_profile
3121
3122 } // anon namespace
3123
3124 gimple_opt_pass *
3125 make_pass_profile (gcc::context *ctxt)
3126 {
3127   return new pass_profile (ctxt);
3128 }
3129
3130 namespace {
3131
3132 const pass_data pass_data_strip_predict_hints =
3133 {
3134   GIMPLE_PASS, /* type */
3135   "*strip_predict_hints", /* name */
3136   OPTGROUP_NONE, /* optinfo_flags */
3137   false, /* has_gate */
3138   true, /* has_execute */
3139   TV_BRANCH_PROB, /* tv_id */
3140   PROP_cfg, /* properties_required */
3141   0, /* properties_provided */
3142   0, /* properties_destroyed */
3143   0, /* todo_flags_start */
3144   TODO_verify_ssa, /* todo_flags_finish */
3145 };
3146
3147 class pass_strip_predict_hints : public gimple_opt_pass
3148 {
3149 public:
3150   pass_strip_predict_hints (gcc::context *ctxt)
3151     : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3152   {}
3153
3154   /* opt_pass methods: */
3155   opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3156   unsigned int execute () { return strip_predict_hints (); }
3157
3158 }; // class pass_strip_predict_hints
3159
3160 } // anon namespace
3161
3162 gimple_opt_pass *
3163 make_pass_strip_predict_hints (gcc::context *ctxt)
3164 {
3165   return new pass_strip_predict_hints (ctxt);
3166 }
3167
3168 /* Rebuild function frequencies.  Passes are in general expected to
3169    maintain profile by hand, however in some cases this is not possible:
3170    for example when inlining several functions with loops freuqencies might run
3171    out of scale and thus needs to be recomputed.  */
3172
3173 void
3174 rebuild_frequencies (void)
3175 {
3176   timevar_push (TV_REBUILD_FREQUENCIES);
3177
3178   /* When the max bb count in the function is small, there is a higher
3179      chance that there were truncation errors in the integer scaling
3180      of counts by inlining and other optimizations. This could lead
3181      to incorrect classification of code as being cold when it isn't.
3182      In that case, force the estimation of bb counts/frequencies from the
3183      branch probabilities, rather than computing frequencies from counts,
3184      which may also lead to frequencies incorrectly reduced to 0. There
3185      is less precision in the probabilities, so we only do this for small
3186      max counts.  */
3187   gcov_type count_max = 0;
3188   basic_block bb;
3189   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
3190     count_max = MAX (bb->count, count_max);
3191
3192   if (profile_status == PROFILE_GUESSED
3193       || (profile_status == PROFILE_READ && count_max < REG_BR_PROB_BASE/10))
3194     {
3195       loop_optimizer_init (0);
3196       add_noreturn_fake_exit_edges ();
3197       mark_irreducible_loops ();
3198       connect_infinite_loops_to_exit ();
3199       estimate_bb_frequencies (true);
3200       remove_fake_exit_edges ();
3201       loop_optimizer_finalize ();
3202     }
3203   else if (profile_status == PROFILE_READ)
3204     counts_to_freqs ();
3205   else
3206     gcc_unreachable ();
3207   timevar_pop (TV_REBUILD_FREQUENCIES);
3208 }