gcc/predict.c

   1 /* Branch prediction routines for the GNU compiler.
   2    Copyright (C) 2000-2013 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* References:
  21
  22    [1] "Branch Prediction for Free"
  23        Ball and Larus; PLDI '93.
  24    [2] "Static Branch Frequency and Program Profile Analysis"
  25        Wu and Larus; MICRO-27.
  26    [3] "Corpus-based Static Branch Prediction"
  27        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
  28
  29
  30 #include "config.h"
  31 #include "system.h"
  32 #include "coretypes.h"
  33 #include "tm.h"
  34 #include "tree.h"
  35 #include "rtl.h"
  36 #include "tm_p.h"
  37 #include "hard-reg-set.h"
  38 #include "basic-block.h"
  39 #include "insn-config.h"
  40 #include "regs.h"
  41 #include "flags.h"
  42 #include "function.h"
  43 #include "except.h"
  44 #include "diagnostic-core.h"
  45 #include "recog.h"
  46 #include "expr.h"
  47 #include "predict.h"
  48 #include "coverage.h"
  49 #include "sreal.h"
  50 #include "params.h"
  51 #include "target.h"
  52 #include "cfgloop.h"
  53 #include "tree-ssa.h"
  54 #include "ggc.h"
  55 #include "tree-pass.h"
  56 #include "tree-scalar-evolution.h"
  57 #include "cfgloop.h"
  58 #include "pointer-set.h"
  59
  60 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
  61                    1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
  62 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
  63              real_inv_br_prob_base, real_one_half, real_bb_freq_max;
  64
  65 /* Random guesstimation given names.
  66    PROV_VERY_UNLIKELY should be small enough so basic block predicted
  67    by it gets below HOT_BB_FREQUENCY_FRACTION.  */
  68 #define PROB_VERY_UNLIKELY      (REG_BR_PROB_BASE / 2000 - 1)
  69 #define PROB_EVEN               (REG_BR_PROB_BASE / 2)
  70 #define PROB_VERY_LIKELY        (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
  71 #define PROB_ALWAYS             (REG_BR_PROB_BASE)
  72
  73 static void combine_predictions_for_insn (rtx, basic_block);
  74 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
  75 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
  76 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
  77 static bool can_predict_insn_p (const_rtx);
  78
  79 /* Information we hold about each branch predictor.
  80    Filled using information from predict.def.  */
  81
  82 struct predictor_info
  83 {
  84   const char *const name;       /* Name used in the debugging dumps.  */
  85   const int hitrate;            /* Expected hitrate used by
  86                                    predict_insn_def call.  */
  87   const int flags;
  88 };
  89
  90 /* Use given predictor without Dempster-Shaffer theory if it matches
  91    using first_match heuristics.  */
  92 #define PRED_FLAG_FIRST_MATCH 1
  93
  94 /* Recompute hitrate in percent to our representation.  */
  95
  96 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
  97
  98 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
  99 static const struct predictor_info predictor_info[]= {
 100 #include "predict.def"
 101
 102   /* Upper bound on predictors.  */
 103   {NULL, 0, 0}
 104 };
 105 #undef DEF_PREDICTOR
 106
 107 /* Return TRUE if frequency FREQ is considered to be hot.  */
 108
 109 static inline bool
 110 maybe_hot_frequency_p (struct function *fun, int freq)
 111 {
 112   struct cgraph_node *node = cgraph_get_node (fun->decl);
 113   if (!profile_info || !flag_branch_probabilities)
 114     {
 115       if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
 116         return false;
 117       if (node->frequency == NODE_FREQUENCY_HOT)
 118         return true;
 119     }
 120   if (profile_status_for_function (fun) == PROFILE_ABSENT)
 121     return true;
 122   if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 123       && freq < (ENTRY_BLOCK_PTR_FOR_FUNCTION (fun)->frequency * 2 / 3))
 124     return false;
 125   if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
 126     return false;
 127   if (freq < (ENTRY_BLOCK_PTR_FOR_FUNCTION (fun)->frequency
 128               / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 129     return false;
 130   return true;
 131 }
 132
 133 static gcov_type min_count = -1;
 134
 135 /* Determine the threshold for hot BB counts.  */
 136
 137 gcov_type
 138 get_hot_bb_threshold ()
 139 {
 140   gcov_working_set_t *ws;
 141   if (min_count == -1)
 142     {
 143       ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
 144       gcc_assert (ws);
 145       min_count = ws->min_counter;
 146     }
 147   return min_count;
 148 }
 149
 150 /* Set the threshold for hot BB counts.  */
 151
 152 void
 153 set_hot_bb_threshold (gcov_type min)
 154 {
 155   min_count = min;
 156 }
 157
 158 /* Return TRUE if frequency FREQ is considered to be hot.  */
 159
 160 static inline bool
 161 maybe_hot_count_p (struct function *fun, gcov_type count)
 162 {
 163   if (fun && profile_status_for_function (fun) != PROFILE_READ)
 164     return true;
 165   /* Code executed at most once is not hot.  */
 166   if (profile_info->runs >= count)
 167     return false;
 168   return (count >= get_hot_bb_threshold ());
 169 }
 170
 171 /* Return true in case BB can be CPU intensive and should be optimized
 172    for maximal performance.  */
 173
 174 bool
 175 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
 176 {
 177   gcc_checking_assert (fun);
 178   if (profile_status_for_function (fun) == PROFILE_READ)
 179     return maybe_hot_count_p (fun, bb->count);
 180   return maybe_hot_frequency_p (fun, bb->frequency);
 181 }
 182
 183 /* Return true if the call can be hot.  */
 184
 185 bool
 186 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
 187 {
 188   if (profile_info && flag_branch_probabilities
 189       && !maybe_hot_count_p (NULL,
 190                              edge->count))
 191     return false;
 192   if (edge->caller->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED
 193       || (edge->callee
 194           && edge->callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 195     return false;
 196   if (edge->caller->frequency > NODE_FREQUENCY_UNLIKELY_EXECUTED
 197       && (edge->callee
 198           && edge->callee->frequency <= NODE_FREQUENCY_EXECUTED_ONCE))
 199     return false;
 200   if (optimize_size)
 201     return false;
 202   if (edge->caller->frequency == NODE_FREQUENCY_HOT)
 203     return true;
 204   if (edge->caller->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 205       && edge->frequency < CGRAPH_FREQ_BASE * 3 / 2)
 206     return false;
 207   if (flag_guess_branch_prob)
 208     {
 209       if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0
 210           || edge->frequency <= (CGRAPH_FREQ_BASE
 211                                  / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 212         return false;
 213     }
 214   return true;
 215 }
 216
 217 /* Return true in case BB can be CPU intensive and should be optimized
 218    for maximal performance.  */
 219
 220 bool
 221 maybe_hot_edge_p (edge e)
 222 {
 223   if (profile_status == PROFILE_READ)
 224     return maybe_hot_count_p (cfun, e->count);
 225   return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
 226 }
 227
 228
 229
 230 /* Return true if profile COUNT and FREQUENCY, or function FUN static
 231    node frequency reflects never being executed.  */
 232
 233 static bool
 234 probably_never_executed (struct function *fun,
 235                          gcov_type count, int frequency)
 236 {
 237   gcc_checking_assert (fun);
 238   if (profile_status_for_function (fun) == PROFILE_READ)
 239     {
 240       int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
 241       if (count * unlikely_count_fraction >= profile_info->runs)
 242         return false;
 243       if (!frequency)
 244         return true;
 245       if (!ENTRY_BLOCK_PTR->frequency)
 246         return false;
 247       if (ENTRY_BLOCK_PTR->count)
 248         {
 249           gcov_type computed_count;
 250           /* Check for possibility of overflow, in which case entry bb count
 251              is large enough to do the division first without losing much
 252              precision.  */
 253           if (ENTRY_BLOCK_PTR->count < REG_BR_PROB_BASE * REG_BR_PROB_BASE)
 254             {
 255               gcov_type scaled_count
 256                   = frequency * ENTRY_BLOCK_PTR->count * unlikely_count_fraction;
 257               computed_count = RDIV (scaled_count, ENTRY_BLOCK_PTR->frequency);
 258             }
 259           else
 260             {
 261               computed_count = RDIV (ENTRY_BLOCK_PTR->count,
 262                                      ENTRY_BLOCK_PTR->frequency);
 263               computed_count *= frequency * unlikely_count_fraction;
 264             }
 265           if (computed_count >= profile_info->runs)
 266             return false;
 267         }
 268       return true;
 269     }
 270   if ((!profile_info || !flag_branch_probabilities)
 271       && (cgraph_get_node (fun->decl)->frequency
 272           == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 273     return true;
 274   return false;
 275 }
 276
 277
 278 /* Return true in case BB is probably never executed.  */
 279
 280 bool
 281 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
 282 {
 283   return probably_never_executed (fun, bb->count, bb->frequency);
 284 }
 285
 286
 287 /* Return true in case edge E is probably never executed.  */
 288
 289 bool
 290 probably_never_executed_edge_p (struct function *fun, edge e)
 291 {
 292   return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
 293 }
 294
 295 /* Return true if NODE should be optimized for size.  */
 296
 297 bool
 298 cgraph_optimize_for_size_p (struct cgraph_node *node)
 299 {
 300   if (optimize_size)
 301     return true;
 302   if (node && (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 303     return true;
 304   else
 305     return false;
 306 }
 307
 308 /* Return true when current function should always be optimized for size.  */
 309
 310 bool
 311 optimize_function_for_size_p (struct function *fun)
 312 {
 313   if (optimize_size)
 314     return true;
 315   if (!fun || !fun->decl)
 316     return false;
 317   return cgraph_optimize_for_size_p (cgraph_get_node (fun->decl));
 318 }
 319
 320 /* Return true when current function should always be optimized for speed.  */
 321
 322 bool
 323 optimize_function_for_speed_p (struct function *fun)
 324 {
 325   return !optimize_function_for_size_p (fun);
 326 }
 327
 328 /* Return TRUE when BB should be optimized for size.  */
 329
 330 bool
 331 optimize_bb_for_size_p (const_basic_block bb)
 332 {
 333   return optimize_function_for_size_p (cfun) || !maybe_hot_bb_p (cfun, bb);
 334 }
 335
 336 /* Return TRUE when BB should be optimized for speed.  */
 337
 338 bool
 339 optimize_bb_for_speed_p (const_basic_block bb)
 340 {
 341   return !optimize_bb_for_size_p (bb);
 342 }
 343
 344 /* Return TRUE when BB should be optimized for size.  */
 345
 346 bool
 347 optimize_edge_for_size_p (edge e)
 348 {
 349   return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
 350 }
 351
 352 /* Return TRUE when BB should be optimized for speed.  */
 353
 354 bool
 355 optimize_edge_for_speed_p (edge e)
 356 {
 357   return !optimize_edge_for_size_p (e);
 358 }
 359
 360 /* Return TRUE when BB should be optimized for size.  */
 361
 362 bool
 363 optimize_insn_for_size_p (void)
 364 {
 365   return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
 366 }
 367
 368 /* Return TRUE when BB should be optimized for speed.  */
 369
 370 bool
 371 optimize_insn_for_speed_p (void)
 372 {
 373   return !optimize_insn_for_size_p ();
 374 }
 375
 376 /* Return TRUE when LOOP should be optimized for size.  */
 377
 378 bool
 379 optimize_loop_for_size_p (struct loop *loop)
 380 {
 381   return optimize_bb_for_size_p (loop->header);
 382 }
 383
 384 /* Return TRUE when LOOP should be optimized for speed.  */
 385
 386 bool
 387 optimize_loop_for_speed_p (struct loop *loop)
 388 {
 389   return optimize_bb_for_speed_p (loop->header);
 390 }
 391
 392 /* Return TRUE when LOOP nest should be optimized for speed.  */
 393
 394 bool
 395 optimize_loop_nest_for_speed_p (struct loop *loop)
 396 {
 397   struct loop *l = loop;
 398   if (optimize_loop_for_speed_p (loop))
 399     return true;
 400   l = loop->inner;
 401   while (l && l != loop)
 402     {
 403       if (optimize_loop_for_speed_p (l))
 404         return true;
 405       if (l->inner)
 406         l = l->inner;
 407       else if (l->next)
 408         l = l->next;
 409       else
 410         {
 411           while (l != loop && !l->next)
 412             l = loop_outer (l);
 413           if (l != loop)
 414             l = l->next;
 415         }
 416     }
 417   return false;
 418 }
 419
 420 /* Return TRUE when LOOP nest should be optimized for size.  */
 421
 422 bool
 423 optimize_loop_nest_for_size_p (struct loop *loop)
 424 {
 425   return !optimize_loop_nest_for_speed_p (loop);
 426 }
 427
 428 /* Return true when edge E is likely to be well predictable by branch
 429    predictor.  */
 430
 431 bool
 432 predictable_edge_p (edge e)
 433 {
 434   if (profile_status == PROFILE_ABSENT)
 435     return false;
 436   if ((e->probability
 437        <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
 438       || (REG_BR_PROB_BASE - e->probability
 439           <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
 440     return true;
 441   return false;
 442 }
 443
 444
 445 /* Set RTL expansion for BB profile.  */
 446
 447 void
 448 rtl_profile_for_bb (basic_block bb)
 449 {
 450   crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
 451 }
 452
 453 /* Set RTL expansion for edge profile.  */
 454
 455 void
 456 rtl_profile_for_edge (edge e)
 457 {
 458   crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
 459 }
 460
 461 /* Set RTL expansion to default mode (i.e. when profile info is not known).  */
 462 void
 463 default_rtl_profile (void)
 464 {
 465   crtl->maybe_hot_insn_p = true;
 466 }
 467
 468 /* Return true if the one of outgoing edges is already predicted by
 469    PREDICTOR.  */
 470
 471 bool
 472 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 473 {
 474   rtx note;
 475   if (!INSN_P (BB_END (bb)))
 476     return false;
 477   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
 478     if (REG_NOTE_KIND (note) == REG_BR_PRED
 479         && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
 480       return true;
 481   return false;
 482 }
 483
 484 /* This map contains for a basic block the list of predictions for the
 485    outgoing edges.  */
 486
 487 static struct pointer_map_t *bb_predictions;
 488
 489 /*  Structure representing predictions in tree level. */
 490
 491 struct edge_prediction {
 492     struct edge_prediction *ep_next;
 493     edge ep_edge;
 494     enum br_predictor ep_predictor;
 495     int ep_probability;
 496 };
 497
 498 /* Return true if the one of outgoing edges is already predicted by
 499    PREDICTOR.  */
 500
 501 bool
 502 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 503 {
 504   struct edge_prediction *i;
 505   void **preds = pointer_map_contains (bb_predictions, bb);
 506
 507   if (!preds)
 508     return false;
 509
 510   for (i = (struct edge_prediction *) *preds; i; i = i->ep_next)
 511     if (i->ep_predictor == predictor)
 512       return true;
 513   return false;
 514 }
 515
 516 /* Return true when the probability of edge is reliable.
 517
 518    The profile guessing code is good at predicting branch outcome (ie.
 519    taken/not taken), that is predicted right slightly over 75% of time.
 520    It is however notoriously poor on predicting the probability itself.
 521    In general the profile appear a lot flatter (with probabilities closer
 522    to 50%) than the reality so it is bad idea to use it to drive optimization
 523    such as those disabling dynamic branch prediction for well predictable
 524    branches.
 525
 526    There are two exceptions - edges leading to noreturn edges and edges
 527    predicted by number of iterations heuristics are predicted well.  This macro
 528    should be able to distinguish those, but at the moment it simply check for
 529    noreturn heuristic that is only one giving probability over 99% or bellow
 530    1%.  In future we might want to propagate reliability information across the
 531    CFG if we find this information useful on multiple places.   */
 532 static bool
 533 probability_reliable_p (int prob)
 534 {
 535   return (profile_status == PROFILE_READ
 536           || (profile_status == PROFILE_GUESSED
 537               && (prob <= HITRATE (1) || prob >= HITRATE (99))));
 538 }
 539
 540 /* Same predicate as above, working on edges.  */
 541 bool
 542 edge_probability_reliable_p (const_edge e)
 543 {
 544   return probability_reliable_p (e->probability);
 545 }
 546
 547 /* Same predicate as edge_probability_reliable_p, working on notes.  */
 548 bool
 549 br_prob_note_reliable_p (const_rtx note)
 550 {
 551   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
 552   return probability_reliable_p (XINT (note, 0));
 553 }
 554
 555 static void
 556 predict_insn (rtx insn, enum br_predictor predictor, int probability)
 557 {
 558   gcc_assert (any_condjump_p (insn));
 559   if (!flag_guess_branch_prob)
 560     return;
 561
 562   add_reg_note (insn, REG_BR_PRED,
 563                 gen_rtx_CONCAT (VOIDmode,
 564                                 GEN_INT ((int) predictor),
 565                                 GEN_INT ((int) probability)));
 566 }
 567
 568 /* Predict insn by given predictor.  */
 569
 570 void
 571 predict_insn_def (rtx insn, enum br_predictor predictor,
 572                   enum prediction taken)
 573 {
 574    int probability = predictor_info[(int) predictor].hitrate;
 575
 576    if (taken != TAKEN)
 577      probability = REG_BR_PROB_BASE - probability;
 578
 579    predict_insn (insn, predictor, probability);
 580 }
 581
 582 /* Predict edge E with given probability if possible.  */
 583
 584 void
 585 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
 586 {
 587   rtx last_insn;
 588   last_insn = BB_END (e->src);
 589
 590   /* We can store the branch prediction information only about
 591      conditional jumps.  */
 592   if (!any_condjump_p (last_insn))
 593     return;
 594
 595   /* We always store probability of branching.  */
 596   if (e->flags & EDGE_FALLTHRU)
 597     probability = REG_BR_PROB_BASE - probability;
 598
 599   predict_insn (last_insn, predictor, probability);
 600 }
 601
 602 /* Predict edge E with the given PROBABILITY.  */
 603 void
 604 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
 605 {
 606   gcc_assert (profile_status != PROFILE_GUESSED);
 607   if ((e->src != ENTRY_BLOCK_PTR && EDGE_COUNT (e->src->succs) > 1)
 608       && flag_guess_branch_prob && optimize)
 609     {
 610       struct edge_prediction *i = XNEW (struct edge_prediction);
 611       void **preds = pointer_map_insert (bb_predictions, e->src);
 612
 613       i->ep_next = (struct edge_prediction *) *preds;
 614       *preds = i;
 615       i->ep_probability = probability;
 616       i->ep_predictor = predictor;
 617       i->ep_edge = e;
 618     }
 619 }
 620
 621 /* Remove all predictions on given basic block that are attached
 622    to edge E.  */
 623 void
 624 remove_predictions_associated_with_edge (edge e)
 625 {
 626   void **preds;
 627
 628   if (!bb_predictions)
 629     return;
 630
 631   preds = pointer_map_contains (bb_predictions, e->src);
 632
 633   if (preds)
 634     {
 635       struct edge_prediction **prediction = (struct edge_prediction **) preds;
 636       struct edge_prediction *next;
 637
 638       while (*prediction)
 639         {
 640           if ((*prediction)->ep_edge == e)
 641             {
 642               next = (*prediction)->ep_next;
 643               free (*prediction);
 644               *prediction = next;
 645             }
 646           else
 647             prediction = &((*prediction)->ep_next);
 648         }
 649     }
 650 }
 651
 652 /* Clears the list of predictions stored for BB.  */
 653
 654 static void
 655 clear_bb_predictions (basic_block bb)
 656 {
 657   void **preds = pointer_map_contains (bb_predictions, bb);
 658   struct edge_prediction *pred, *next;
 659
 660   if (!preds)
 661     return;
 662
 663   for (pred = (struct edge_prediction *) *preds; pred; pred = next)
 664     {
 665       next = pred->ep_next;
 666       free (pred);
 667     }
 668   *preds = NULL;
 669 }
 670
 671 /* Return true when we can store prediction on insn INSN.
 672    At the moment we represent predictions only on conditional
 673    jumps, not at computed jump or other complicated cases.  */
 674 static bool
 675 can_predict_insn_p (const_rtx insn)
 676 {
 677   return (JUMP_P (insn)
 678           && any_condjump_p (insn)
 679           && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
 680 }
 681
 682 /* Predict edge E by given predictor if possible.  */
 683
 684 void
 685 predict_edge_def (edge e, enum br_predictor predictor,
 686                   enum prediction taken)
 687 {
 688    int probability = predictor_info[(int) predictor].hitrate;
 689
 690    if (taken != TAKEN)
 691      probability = REG_BR_PROB_BASE - probability;
 692
 693    predict_edge (e, predictor, probability);
 694 }
 695
 696 /* Invert all branch predictions or probability notes in the INSN.  This needs
 697    to be done each time we invert the condition used by the jump.  */
 698
 699 void
 700 invert_br_probabilities (rtx insn)
 701 {
 702   rtx note;
 703
 704   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 705     if (REG_NOTE_KIND (note) == REG_BR_PROB)
 706       XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
 707     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
 708       XEXP (XEXP (note, 0), 1)
 709         = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
 710 }
 711
 712 /* Dump information about the branch prediction to the output file.  */
 713
 714 static void
 715 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
 716                  basic_block bb, int used)
 717 {
 718   edge e;
 719   edge_iterator ei;
 720
 721   if (!file)
 722     return;
 723
 724   FOR_EACH_EDGE (e, ei, bb->succs)
 725     if (! (e->flags & EDGE_FALLTHRU))
 726       break;
 727
 728   fprintf (file, "  %s heuristics%s: %.1f%%",
 729            predictor_info[predictor].name,
 730            used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
 731
 732   if (bb->count)
 733     {
 734       fprintf (file, "  exec ");
 735       fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
 736       if (e)
 737         {
 738           fprintf (file, " hit ");
 739           fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count);
 740           fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
 741         }
 742     }
 743
 744   fprintf (file, "\n");
 745 }
 746
 747 /* We can not predict the probabilities of outgoing edges of bb.  Set them
 748    evenly and hope for the best.  */
 749 static void
 750 set_even_probabilities (basic_block bb)
 751 {
 752   int nedges = 0;
 753   edge e;
 754   edge_iterator ei;
 755
 756   FOR_EACH_EDGE (e, ei, bb->succs)
 757     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 758       nedges ++;
 759   FOR_EACH_EDGE (e, ei, bb->succs)
 760     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 761       e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
 762     else
 763       e->probability = 0;
 764 }
 765
 766 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
 767    note if not already present.  Remove now useless REG_BR_PRED notes.  */
 768
 769 static void
 770 combine_predictions_for_insn (rtx insn, basic_block bb)
 771 {
 772   rtx prob_note;
 773   rtx *pnote;
 774   rtx note;
 775   int best_probability = PROB_EVEN;
 776   enum br_predictor best_predictor = END_PREDICTORS;
 777   int combined_probability = REG_BR_PROB_BASE / 2;
 778   int d;
 779   bool first_match = false;
 780   bool found = false;
 781
 782   if (!can_predict_insn_p (insn))
 783     {
 784       set_even_probabilities (bb);
 785       return;
 786     }
 787
 788   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
 789   pnote = &REG_NOTES (insn);
 790   if (dump_file)
 791     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
 792              bb->index);
 793
 794   /* We implement "first match" heuristics and use probability guessed
 795      by predictor with smallest index.  */
 796   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 797     if (REG_NOTE_KIND (note) == REG_BR_PRED)
 798       {
 799         enum br_predictor predictor = ((enum br_predictor)
 800                                        INTVAL (XEXP (XEXP (note, 0), 0)));
 801         int probability = INTVAL (XEXP (XEXP (note, 0), 1));
 802
 803         found = true;
 804         if (best_predictor > predictor)
 805           best_probability = probability, best_predictor = predictor;
 806
 807         d = (combined_probability * probability
 808              + (REG_BR_PROB_BASE - combined_probability)
 809              * (REG_BR_PROB_BASE - probability));
 810
 811         /* Use FP math to avoid overflows of 32bit integers.  */
 812         if (d == 0)
 813           /* If one probability is 0% and one 100%, avoid division by zero.  */
 814           combined_probability = REG_BR_PROB_BASE / 2;
 815         else
 816           combined_probability = (((double) combined_probability) * probability
 817                                   * REG_BR_PROB_BASE / d + 0.5);
 818       }
 819
 820   /* Decide which heuristic to use.  In case we didn't match anything,
 821      use no_prediction heuristic, in case we did match, use either
 822      first match or Dempster-Shaffer theory depending on the flags.  */
 823
 824   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 825     first_match = true;
 826
 827   if (!found)
 828     dump_prediction (dump_file, PRED_NO_PREDICTION,
 829                      combined_probability, bb, true);
 830   else
 831     {
 832       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
 833                        bb, !first_match);
 834       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
 835                        bb, first_match);
 836     }
 837
 838   if (first_match)
 839     combined_probability = best_probability;
 840   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 841
 842   while (*pnote)
 843     {
 844       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
 845         {
 846           enum br_predictor predictor = ((enum br_predictor)
 847                                          INTVAL (XEXP (XEXP (*pnote, 0), 0)));
 848           int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
 849
 850           dump_prediction (dump_file, predictor, probability, bb,
 851                            !first_match || best_predictor == predictor);
 852           *pnote = XEXP (*pnote, 1);
 853         }
 854       else
 855         pnote = &XEXP (*pnote, 1);
 856     }
 857
 858   if (!prob_note)
 859     {
 860       add_int_reg_note (insn, REG_BR_PROB, combined_probability);
 861
 862       /* Save the prediction into CFG in case we are seeing non-degenerated
 863          conditional jump.  */
 864       if (!single_succ_p (bb))
 865         {
 866           BRANCH_EDGE (bb)->probability = combined_probability;
 867           FALLTHRU_EDGE (bb)->probability
 868             = REG_BR_PROB_BASE - combined_probability;
 869         }
 870     }
 871   else if (!single_succ_p (bb))
 872     {
 873       int prob = XINT (prob_note, 0);
 874
 875       BRANCH_EDGE (bb)->probability = prob;
 876       FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
 877     }
 878   else
 879     single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
 880 }
 881
 882 /* Combine predictions into single probability and store them into CFG.
 883    Remove now useless prediction entries.  */
 884
 885 static void
 886 combine_predictions_for_bb (basic_block bb)
 887 {
 888   int best_probability = PROB_EVEN;
 889   enum br_predictor best_predictor = END_PREDICTORS;
 890   int combined_probability = REG_BR_PROB_BASE / 2;
 891   int d;
 892   bool first_match = false;
 893   bool found = false;
 894   struct edge_prediction *pred;
 895   int nedges = 0;
 896   edge e, first = NULL, second = NULL;
 897   edge_iterator ei;
 898   void **preds;
 899
 900   FOR_EACH_EDGE (e, ei, bb->succs)
 901     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 902       {
 903         nedges ++;
 904         if (first && !second)
 905           second = e;
 906         if (!first)
 907           first = e;
 908       }
 909
 910   /* When there is no successor or only one choice, prediction is easy.
 911
 912      We are lazy for now and predict only basic blocks with two outgoing
 913      edges.  It is possible to predict generic case too, but we have to
 914      ignore first match heuristics and do more involved combining.  Implement
 915      this later.  */
 916   if (nedges != 2)
 917     {
 918       if (!bb->count)
 919         set_even_probabilities (bb);
 920       clear_bb_predictions (bb);
 921       if (dump_file)
 922         fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
 923                  nedges, bb->index);
 924       return;
 925     }
 926
 927   if (dump_file)
 928     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
 929
 930   preds = pointer_map_contains (bb_predictions, bb);
 931   if (preds)
 932     {
 933       /* We implement "first match" heuristics and use probability guessed
 934          by predictor with smallest index.  */
 935       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
 936         {
 937           enum br_predictor predictor = pred->ep_predictor;
 938           int probability = pred->ep_probability;
 939
 940           if (pred->ep_edge != first)
 941             probability = REG_BR_PROB_BASE - probability;
 942
 943           found = true;
 944           /* First match heuristics would be widly confused if we predicted
 945              both directions.  */
 946           if (best_predictor > predictor)
 947             {
 948               struct edge_prediction *pred2;
 949               int prob = probability;
 950
 951               for (pred2 = (struct edge_prediction *) *preds; pred2; pred2 = pred2->ep_next)
 952                if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
 953                  {
 954                    int probability2 = pred->ep_probability;
 955
 956                    if (pred2->ep_edge != first)
 957                      probability2 = REG_BR_PROB_BASE - probability2;
 958
 959                    if ((probability < REG_BR_PROB_BASE / 2) !=
 960                        (probability2 < REG_BR_PROB_BASE / 2))
 961                      break;
 962
 963                    /* If the same predictor later gave better result, go for it! */
 964                    if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
 965                        || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
 966                      prob = probability2;
 967                  }
 968               if (!pred2)
 969                 best_probability = prob, best_predictor = predictor;
 970             }
 971
 972           d = (combined_probability * probability
 973                + (REG_BR_PROB_BASE - combined_probability)
 974                * (REG_BR_PROB_BASE - probability));
 975
 976           /* Use FP math to avoid overflows of 32bit integers.  */
 977           if (d == 0)
 978             /* If one probability is 0% and one 100%, avoid division by zero.  */
 979             combined_probability = REG_BR_PROB_BASE / 2;
 980           else
 981             combined_probability = (((double) combined_probability)
 982                                     * probability
 983                                     * REG_BR_PROB_BASE / d + 0.5);
 984         }
 985     }
 986
 987   /* Decide which heuristic to use.  In case we didn't match anything,
 988      use no_prediction heuristic, in case we did match, use either
 989      first match or Dempster-Shaffer theory depending on the flags.  */
 990
 991   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 992     first_match = true;
 993
 994   if (!found)
 995     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
 996   else
 997     {
 998       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
 999                        !first_match);
1000       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
1001                        first_match);
1002     }
1003
1004   if (first_match)
1005     combined_probability = best_probability;
1006   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
1007
1008   if (preds)
1009     {
1010       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
1011         {
1012           enum br_predictor predictor = pred->ep_predictor;
1013           int probability = pred->ep_probability;
1014
1015           if (pred->ep_edge != EDGE_SUCC (bb, 0))
1016             probability = REG_BR_PROB_BASE - probability;
1017           dump_prediction (dump_file, predictor, probability, bb,
1018                            !first_match || best_predictor == predictor);
1019         }
1020     }
1021   clear_bb_predictions (bb);
1022
1023   if (!bb->count)
1024     {
1025       first->probability = combined_probability;
1026       second->probability = REG_BR_PROB_BASE - combined_probability;
1027     }
1028 }
1029
1030 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1031    Return the SSA_NAME if the condition satisfies, NULL otherwise.
1032
1033    T1 and T2 should be one of the following cases:
1034      1. T1 is SSA_NAME, T2 is NULL
1035      2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1036      3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1037
1038 static tree
1039 strips_small_constant (tree t1, tree t2)
1040 {
1041   tree ret = NULL;
1042   int value = 0;
1043
1044   if (!t1)
1045     return NULL;
1046   else if (TREE_CODE (t1) == SSA_NAME)
1047     ret = t1;
1048   else if (host_integerp (t1, 0))
1049     value = tree_low_cst (t1, 0);
1050   else
1051     return NULL;
1052
1053   if (!t2)
1054     return ret;
1055   else if (host_integerp (t2, 0))
1056     value = tree_low_cst (t2, 0);
1057   else if (TREE_CODE (t2) == SSA_NAME)
1058     {
1059       if (ret)
1060         return NULL;
1061       else
1062         ret = t2;
1063     }
1064
1065   if (value <= 4 && value >= -4)
1066     return ret;
1067   else
1068     return NULL;
1069 }
1070
1071 /* Return the SSA_NAME in T or T's operands.
1072    Return NULL if SSA_NAME cannot be found.  */
1073
1074 static tree
1075 get_base_value (tree t)
1076 {
1077   if (TREE_CODE (t) == SSA_NAME)
1078     return t;
1079
1080   if (!BINARY_CLASS_P (t))
1081     return NULL;
1082
1083   switch (TREE_OPERAND_LENGTH (t))
1084     {
1085     case 1:
1086       return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1087     case 2:
1088       return strips_small_constant (TREE_OPERAND (t, 0),
1089                                     TREE_OPERAND (t, 1));
1090     default:
1091       return NULL;
1092     }
1093 }
1094
1095 /* Check the compare STMT in LOOP. If it compares an induction
1096    variable to a loop invariant, return true, and save
1097    LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1098    Otherwise return false and set LOOP_INVAIANT to NULL.  */
1099
1100 static bool
1101 is_comparison_with_loop_invariant_p (gimple stmt, struct loop *loop,
1102                                      tree *loop_invariant,
1103                                      enum tree_code *compare_code,
1104                                      tree *loop_step,
1105                                      tree *loop_iv_base)
1106 {
1107   tree op0, op1, bound, base;
1108   affine_iv iv0, iv1;
1109   enum tree_code code;
1110   tree step;
1111
1112   code = gimple_cond_code (stmt);
1113   *loop_invariant = NULL;
1114
1115   switch (code)
1116     {
1117     case GT_EXPR:
1118     case GE_EXPR:
1119     case NE_EXPR:
1120     case LT_EXPR:
1121     case LE_EXPR:
1122     case EQ_EXPR:
1123       break;
1124
1125     default:
1126       return false;
1127     }
1128
1129   op0 = gimple_cond_lhs (stmt);
1130   op1 = gimple_cond_rhs (stmt);
1131
1132   if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1133        || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1134     return false;
1135   if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1136     return false;
1137   if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1138     return false;
1139   if (TREE_CODE (iv0.step) != INTEGER_CST
1140       || TREE_CODE (iv1.step) != INTEGER_CST)
1141     return false;
1142   if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1143       || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1144     return false;
1145
1146   if (integer_zerop (iv0.step))
1147     {
1148       if (code != NE_EXPR && code != EQ_EXPR)
1149         code = invert_tree_comparison (code, false);
1150       bound = iv0.base;
1151       base = iv1.base;
1152       if (host_integerp (iv1.step, 0))
1153         step = iv1.step;
1154       else
1155         return false;
1156     }
1157   else
1158     {
1159       bound = iv1.base;
1160       base = iv0.base;
1161       if (host_integerp (iv0.step, 0))
1162         step = iv0.step;
1163       else
1164         return false;
1165     }
1166
1167   if (TREE_CODE (bound) != INTEGER_CST)
1168     bound = get_base_value (bound);
1169   if (!bound)
1170     return false;
1171   if (TREE_CODE (base) != INTEGER_CST)
1172     base = get_base_value (base);
1173   if (!base)
1174     return false;
1175
1176   *loop_invariant = bound;
1177   *compare_code = code;
1178   *loop_step = step;
1179   *loop_iv_base = base;
1180   return true;
1181 }
1182
1183 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1184
1185 static bool
1186 expr_coherent_p (tree t1, tree t2)
1187 {
1188   gimple stmt;
1189   tree ssa_name_1 = NULL;
1190   tree ssa_name_2 = NULL;
1191
1192   gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1193   gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1194
1195   if (t1 == t2)
1196     return true;
1197
1198   if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1199     return true;
1200   if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1201     return false;
1202
1203   /* Check to see if t1 is expressed/defined with t2.  */
1204   stmt = SSA_NAME_DEF_STMT (t1);
1205   gcc_assert (stmt != NULL);
1206   if (is_gimple_assign (stmt))
1207     {
1208       ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1209       if (ssa_name_1 && ssa_name_1 == t2)
1210         return true;
1211     }
1212
1213   /* Check to see if t2 is expressed/defined with t1.  */
1214   stmt = SSA_NAME_DEF_STMT (t2);
1215   gcc_assert (stmt != NULL);
1216   if (is_gimple_assign (stmt))
1217     {
1218       ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1219       if (ssa_name_2 && ssa_name_2 == t1)
1220         return true;
1221     }
1222
1223   /* Compare if t1 and t2's def_stmts are identical.  */
1224   if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1225     return true;
1226   else
1227     return false;
1228 }
1229
1230 /* Predict branch probability of BB when BB contains a branch that compares
1231    an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1232    loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1233
1234    E.g.
1235      for (int i = 0; i < bound; i++) {
1236        if (i < bound - 2)
1237          computation_1();
1238        else
1239          computation_2();
1240      }
1241
1242   In this loop, we will predict the branch inside the loop to be taken.  */
1243
1244 static void
1245 predict_iv_comparison (struct loop *loop, basic_block bb,
1246                        tree loop_bound_var,
1247                        tree loop_iv_base_var,
1248                        enum tree_code loop_bound_code,
1249                        int loop_bound_step)
1250 {
1251   gimple stmt;
1252   tree compare_var, compare_base;
1253   enum tree_code compare_code;
1254   tree compare_step_var;
1255   edge then_edge;
1256   edge_iterator ei;
1257
1258   if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1259       || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1260       || predicted_by_p (bb, PRED_LOOP_EXIT))
1261     return;
1262
1263   stmt = last_stmt (bb);
1264   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1265     return;
1266   if (!is_comparison_with_loop_invariant_p (stmt, loop, &compare_var,
1267                                             &compare_code,
1268                                             &compare_step_var,
1269                                             &compare_base))
1270     return;
1271
1272   /* Find the taken edge.  */
1273   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1274     if (then_edge->flags & EDGE_TRUE_VALUE)
1275       break;
1276
1277   /* When comparing an IV to a loop invariant, NE is more likely to be
1278      taken while EQ is more likely to be not-taken.  */
1279   if (compare_code == NE_EXPR)
1280     {
1281       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1282       return;
1283     }
1284   else if (compare_code == EQ_EXPR)
1285     {
1286       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1287       return;
1288     }
1289
1290   if (!expr_coherent_p (loop_iv_base_var, compare_base))
1291     return;
1292
1293   /* If loop bound, base and compare bound are all constants, we can
1294      calculate the probability directly.  */
1295   if (host_integerp (loop_bound_var, 0)
1296       && host_integerp (compare_var, 0)
1297       && host_integerp (compare_base, 0))
1298     {
1299       int probability;
1300       bool of, overflow = false;
1301       double_int mod, compare_count, tem, loop_count;
1302
1303       double_int loop_bound = tree_to_double_int (loop_bound_var);
1304       double_int compare_bound = tree_to_double_int (compare_var);
1305       double_int base = tree_to_double_int (compare_base);
1306       double_int compare_step = tree_to_double_int (compare_step_var);
1307
1308       /* (loop_bound - base) / compare_step */
1309       tem = loop_bound.sub_with_overflow (base, &of);
1310       overflow |= of;
1311       loop_count = tem.divmod_with_overflow (compare_step,
1312                                               0, TRUNC_DIV_EXPR,
1313                                               &mod, &of);
1314       overflow |= of;
1315
1316       if ((!compare_step.is_negative ())
1317           ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1318         {
1319           /* (loop_bound - compare_bound) / compare_step */
1320           tem = loop_bound.sub_with_overflow (compare_bound, &of);
1321           overflow |= of;
1322           compare_count = tem.divmod_with_overflow (compare_step,
1323                                                      0, TRUNC_DIV_EXPR,
1324                                                      &mod, &of);
1325           overflow |= of;
1326         }
1327       else
1328         {
1329           /* (compare_bound - base) / compare_step */
1330           tem = compare_bound.sub_with_overflow (base, &of);
1331           overflow |= of;
1332           compare_count = tem.divmod_with_overflow (compare_step,
1333                                                      0, TRUNC_DIV_EXPR,
1334                                                      &mod, &of);
1335           overflow |= of;
1336         }
1337       if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1338         ++compare_count;
1339       if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1340         ++loop_count;
1341       if (compare_count.is_negative ())
1342         compare_count = double_int_zero;
1343       if (loop_count.is_negative ())
1344         loop_count = double_int_zero;
1345       if (loop_count.is_zero ())
1346         probability = 0;
1347       else if (compare_count.scmp (loop_count) == 1)
1348         probability = REG_BR_PROB_BASE;
1349       else
1350         {
1351           /* If loop_count is too big, such that REG_BR_PROB_BASE * loop_count
1352              could overflow, shift both loop_count and compare_count right
1353              a bit so that it doesn't overflow.  Note both counts are known not
1354              to be negative at this point.  */
1355           int clz_bits = clz_hwi (loop_count.high);
1356           gcc_assert (REG_BR_PROB_BASE < 32768);
1357           if (clz_bits < 16)
1358             {
1359               loop_count.arshift (16 - clz_bits, HOST_BITS_PER_DOUBLE_INT);
1360               compare_count.arshift (16 - clz_bits, HOST_BITS_PER_DOUBLE_INT);
1361             }
1362           tem = compare_count.mul_with_sign (double_int::from_shwi
1363                                             (REG_BR_PROB_BASE), true, &of);
1364           gcc_assert (!of);
1365           tem = tem.divmod (loop_count, true, TRUNC_DIV_EXPR, &mod);
1366           probability = tem.to_uhwi ();
1367         }
1368
1369       if (!overflow)
1370         predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1371
1372       return;
1373     }
1374
1375   if (expr_coherent_p (loop_bound_var, compare_var))
1376     {
1377       if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1378           && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1379         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1380       else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1381                && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1382         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1383       else if (loop_bound_code == NE_EXPR)
1384         {
1385           /* If the loop backedge condition is "(i != bound)", we do
1386              the comparison based on the step of IV:
1387              * step < 0 : backedge condition is like (i > bound)
1388              * step > 0 : backedge condition is like (i < bound)  */
1389           gcc_assert (loop_bound_step != 0);
1390           if (loop_bound_step > 0
1391               && (compare_code == LT_EXPR
1392                   || compare_code == LE_EXPR))
1393             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1394           else if (loop_bound_step < 0
1395                    && (compare_code == GT_EXPR
1396                        || compare_code == GE_EXPR))
1397             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1398           else
1399             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1400         }
1401       else
1402         /* The branch is predicted not-taken if loop_bound_code is
1403            opposite with compare_code.  */
1404         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1405     }
1406   else if (expr_coherent_p (loop_iv_base_var, compare_var))
1407     {
1408       /* For cases like:
1409            for (i = s; i < h; i++)
1410              if (i > s + 2) ....
1411          The branch should be predicted taken.  */
1412       if (loop_bound_step > 0
1413           && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1414         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1415       else if (loop_bound_step < 0
1416                && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1417         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1418       else
1419         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1420     }
1421 }
1422
1423 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1424    exits are resulted from short-circuit conditions that will generate an
1425    if_tmp. E.g.:
1426
1427    if (foo() || global > 10)
1428      break;
1429
1430    This will be translated into:
1431
1432    BB3:
1433      loop header...
1434    BB4:
1435      if foo() goto BB6 else goto BB5
1436    BB5:
1437      if global > 10 goto BB6 else goto BB7
1438    BB6:
1439      goto BB7
1440    BB7:
1441      iftmp = (PHI 0(BB5), 1(BB6))
1442      if iftmp == 1 goto BB8 else goto BB3
1443    BB8:
1444      outside of the loop...
1445
1446    The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1447    From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1448    exits. This function takes BB7->BB8 as input, and finds out the extra loop
1449    exits to predict them using PRED_LOOP_EXIT.  */
1450
1451 static void
1452 predict_extra_loop_exits (edge exit_edge)
1453 {
1454   unsigned i;
1455   bool check_value_one;
1456   gimple phi_stmt;
1457   tree cmp_rhs, cmp_lhs;
1458   gimple cmp_stmt = last_stmt (exit_edge->src);
1459
1460   if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1461     return;
1462   cmp_rhs = gimple_cond_rhs (cmp_stmt);
1463   cmp_lhs = gimple_cond_lhs (cmp_stmt);
1464   if (!TREE_CONSTANT (cmp_rhs)
1465       || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1466     return;
1467   if (TREE_CODE (cmp_lhs) != SSA_NAME)
1468     return;
1469
1470   /* If check_value_one is true, only the phi_args with value '1' will lead
1471      to loop exit. Otherwise, only the phi_args with value '0' will lead to
1472      loop exit.  */
1473   check_value_one = (((integer_onep (cmp_rhs))
1474                     ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1475                     ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1476
1477   phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1478   if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
1479     return;
1480
1481   for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1482     {
1483       edge e1;
1484       edge_iterator ei;
1485       tree val = gimple_phi_arg_def (phi_stmt, i);
1486       edge e = gimple_phi_arg_edge (phi_stmt, i);
1487
1488       if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1489         continue;
1490       if ((check_value_one ^ integer_onep (val)) == 1)
1491         continue;
1492       if (EDGE_COUNT (e->src->succs) != 1)
1493         {
1494           predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1495           continue;
1496         }
1497
1498       FOR_EACH_EDGE (e1, ei, e->src->preds)
1499         predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1500     }
1501 }
1502
1503 /* Predict edge probabilities by exploiting loop structure.  */
1504
1505 static void
1506 predict_loops (void)
1507 {
1508   loop_iterator li;
1509   struct loop *loop;
1510
1511   /* Try to predict out blocks in a loop that are not part of a
1512      natural loop.  */
1513   FOR_EACH_LOOP (li, loop, 0)
1514     {
1515       basic_block bb, *bbs;
1516       unsigned j, n_exits;
1517       vec<edge> exits;
1518       struct tree_niter_desc niter_desc;
1519       edge ex;
1520       struct nb_iter_bound *nb_iter;
1521       enum tree_code loop_bound_code = ERROR_MARK;
1522       tree loop_bound_step = NULL;
1523       tree loop_bound_var = NULL;
1524       tree loop_iv_base = NULL;
1525       gimple stmt = NULL;
1526
1527       exits = get_loop_exit_edges (loop);
1528       n_exits = exits.length ();
1529       if (!n_exits)
1530         {
1531           exits.release ();
1532           continue;
1533         }
1534
1535       FOR_EACH_VEC_ELT (exits, j, ex)
1536         {
1537           tree niter = NULL;
1538           HOST_WIDE_INT nitercst;
1539           int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1540           int probability;
1541           enum br_predictor predictor;
1542
1543           predict_extra_loop_exits (ex);
1544
1545           if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1546             niter = niter_desc.niter;
1547           if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1548             niter = loop_niter_by_eval (loop, ex);
1549
1550           if (TREE_CODE (niter) == INTEGER_CST)
1551             {
1552               if (host_integerp (niter, 1)
1553                   && max
1554                   && compare_tree_int (niter, max - 1) == -1)
1555                 nitercst = tree_low_cst (niter, 1) + 1;
1556               else
1557                 nitercst = max;
1558               predictor = PRED_LOOP_ITERATIONS;
1559             }
1560           /* If we have just one exit and we can derive some information about
1561              the number of iterations of the loop from the statements inside
1562              the loop, use it to predict this exit.  */
1563           else if (n_exits == 1)
1564             {
1565               nitercst = estimated_stmt_executions_int (loop);
1566               if (nitercst < 0)
1567                 continue;
1568               if (nitercst > max)
1569                 nitercst = max;
1570
1571               predictor = PRED_LOOP_ITERATIONS_GUESSED;
1572             }
1573           else
1574             continue;
1575
1576           /* If the prediction for number of iterations is zero, do not
1577              predict the exit edges.  */
1578           if (nitercst == 0)
1579             continue;
1580
1581           probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1582           predict_edge (ex, predictor, probability);
1583         }
1584       exits.release ();
1585
1586       /* Find information about loop bound variables.  */
1587       for (nb_iter = loop->bounds; nb_iter;
1588            nb_iter = nb_iter->next)
1589         if (nb_iter->stmt
1590             && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1591           {
1592             stmt = nb_iter->stmt;
1593             break;
1594           }
1595       if (!stmt && last_stmt (loop->header)
1596           && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1597         stmt = last_stmt (loop->header);
1598       if (stmt)
1599         is_comparison_with_loop_invariant_p (stmt, loop,
1600                                              &loop_bound_var,
1601                                              &loop_bound_code,
1602                                              &loop_bound_step,
1603                                              &loop_iv_base);
1604
1605       bbs = get_loop_body (loop);
1606
1607       for (j = 0; j < loop->num_nodes; j++)
1608         {
1609           int header_found = 0;
1610           edge e;
1611           edge_iterator ei;
1612
1613           bb = bbs[j];
1614
1615           /* Bypass loop heuristics on continue statement.  These
1616              statements construct loops via "non-loop" constructs
1617              in the source language and are better to be handled
1618              separately.  */
1619           if (predicted_by_p (bb, PRED_CONTINUE))
1620             continue;
1621
1622           /* Loop branch heuristics - predict an edge back to a
1623              loop's head as taken.  */
1624           if (bb == loop->latch)
1625             {
1626               e = find_edge (loop->latch, loop->header);
1627               if (e)
1628                 {
1629                   header_found = 1;
1630                   predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1631                 }
1632             }
1633
1634           /* Loop exit heuristics - predict an edge exiting the loop if the
1635              conditional has no loop header successors as not taken.  */
1636           if (!header_found
1637               /* If we already used more reliable loop exit predictors, do not
1638                  bother with PRED_LOOP_EXIT.  */
1639               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1640               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1641             {
1642               /* For loop with many exits we don't want to predict all exits
1643                  with the pretty large probability, because if all exits are
1644                  considered in row, the loop would be predicted to iterate
1645                  almost never.  The code to divide probability by number of
1646                  exits is very rough.  It should compute the number of exits
1647                  taken in each patch through function (not the overall number
1648                  of exits that might be a lot higher for loops with wide switch
1649                  statements in them) and compute n-th square root.
1650
1651                  We limit the minimal probability by 2% to avoid
1652                  EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1653                  as this was causing regression in perl benchmark containing such
1654                  a wide loop.  */
1655
1656               int probability = ((REG_BR_PROB_BASE
1657                                   - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1658                                  / n_exits);
1659               if (probability < HITRATE (2))
1660                 probability = HITRATE (2);
1661               FOR_EACH_EDGE (e, ei, bb->succs)
1662                 if (e->dest->index < NUM_FIXED_BLOCKS
1663                     || !flow_bb_inside_loop_p (loop, e->dest))
1664                   predict_edge (e, PRED_LOOP_EXIT, probability);
1665             }
1666           if (loop_bound_var)
1667             predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1668                                    loop_bound_code,
1669                                    tree_low_cst (loop_bound_step, 0));
1670         }
1671
1672       /* Free basic blocks from get_loop_body.  */
1673       free (bbs);
1674     }
1675 }
1676
1677 /* Attempt to predict probabilities of BB outgoing edges using local
1678    properties.  */
1679 static void
1680 bb_estimate_probability_locally (basic_block bb)
1681 {
1682   rtx last_insn = BB_END (bb);
1683   rtx cond;
1684
1685   if (! can_predict_insn_p (last_insn))
1686     return;
1687   cond = get_condition (last_insn, NULL, false, false);
1688   if (! cond)
1689     return;
1690
1691   /* Try "pointer heuristic."
1692      A comparison ptr == 0 is predicted as false.
1693      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1694   if (COMPARISON_P (cond)
1695       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1696           || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1697     {
1698       if (GET_CODE (cond) == EQ)
1699         predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1700       else if (GET_CODE (cond) == NE)
1701         predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1702     }
1703   else
1704
1705   /* Try "opcode heuristic."
1706      EQ tests are usually false and NE tests are usually true. Also,
1707      most quantities are positive, so we can make the appropriate guesses
1708      about signed comparisons against zero.  */
1709     switch (GET_CODE (cond))
1710       {
1711       case CONST_INT:
1712         /* Unconditional branch.  */
1713         predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1714                           cond == const0_rtx ? NOT_TAKEN : TAKEN);
1715         break;
1716
1717       case EQ:
1718       case UNEQ:
1719         /* Floating point comparisons appears to behave in a very
1720            unpredictable way because of special role of = tests in
1721            FP code.  */
1722         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1723           ;
1724         /* Comparisons with 0 are often used for booleans and there is
1725            nothing useful to predict about them.  */
1726         else if (XEXP (cond, 1) == const0_rtx
1727                  || XEXP (cond, 0) == const0_rtx)
1728           ;
1729         else
1730           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1731         break;
1732
1733       case NE:
1734       case LTGT:
1735         /* Floating point comparisons appears to behave in a very
1736            unpredictable way because of special role of = tests in
1737            FP code.  */
1738         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1739           ;
1740         /* Comparisons with 0 are often used for booleans and there is
1741            nothing useful to predict about them.  */
1742         else if (XEXP (cond, 1) == const0_rtx
1743                  || XEXP (cond, 0) == const0_rtx)
1744           ;
1745         else
1746           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1747         break;
1748
1749       case ORDERED:
1750         predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1751         break;
1752
1753       case UNORDERED:
1754         predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1755         break;
1756
1757       case LE:
1758       case LT:
1759         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1760             || XEXP (cond, 1) == constm1_rtx)
1761           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1762         break;
1763
1764       case GE:
1765       case GT:
1766         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1767             || XEXP (cond, 1) == constm1_rtx)
1768           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1769         break;
1770
1771       default:
1772         break;
1773       }
1774 }
1775
1776 /* Set edge->probability for each successor edge of BB.  */
1777 void
1778 guess_outgoing_edge_probabilities (basic_block bb)
1779 {
1780   bb_estimate_probability_locally (bb);
1781   combine_predictions_for_insn (BB_END (bb), bb);
1782 }
1783 \f
1784 static tree expr_expected_value (tree, bitmap);
1785
1786 /* Helper function for expr_expected_value.  */
1787
1788 static tree
1789 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1790                        tree op1, bitmap visited)
1791 {
1792   gimple def;
1793
1794   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1795     {
1796       if (TREE_CONSTANT (op0))
1797         return op0;
1798
1799       if (code != SSA_NAME)
1800         return NULL_TREE;
1801
1802       def = SSA_NAME_DEF_STMT (op0);
1803
1804       /* If we were already here, break the infinite cycle.  */
1805       if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1806         return NULL;
1807
1808       if (gimple_code (def) == GIMPLE_PHI)
1809         {
1810           /* All the arguments of the PHI node must have the same constant
1811              length.  */
1812           int i, n = gimple_phi_num_args (def);
1813           tree val = NULL, new_val;
1814
1815           for (i = 0; i < n; i++)
1816             {
1817               tree arg = PHI_ARG_DEF (def, i);
1818
1819               /* If this PHI has itself as an argument, we cannot
1820                  determine the string length of this argument.  However,
1821                  if we can find an expected constant value for the other
1822                  PHI args then we can still be sure that this is
1823                  likely a constant.  So be optimistic and just
1824                  continue with the next argument.  */
1825               if (arg == PHI_RESULT (def))
1826                 continue;
1827
1828               new_val = expr_expected_value (arg, visited);
1829               if (!new_val)
1830                 return NULL;
1831               if (!val)
1832                 val = new_val;
1833               else if (!operand_equal_p (val, new_val, false))
1834                 return NULL;
1835             }
1836           return val;
1837         }
1838       if (is_gimple_assign (def))
1839         {
1840           if (gimple_assign_lhs (def) != op0)
1841             return NULL;
1842
1843           return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1844                                         gimple_assign_rhs1 (def),
1845                                         gimple_assign_rhs_code (def),
1846                                         gimple_assign_rhs2 (def),
1847                                         visited);
1848         }
1849
1850       if (is_gimple_call (def))
1851         {
1852           tree decl = gimple_call_fndecl (def);
1853           if (!decl)
1854             return NULL;
1855           if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1856             switch (DECL_FUNCTION_CODE (decl))
1857               {
1858               case BUILT_IN_EXPECT:
1859                 {
1860                   tree val;
1861                   if (gimple_call_num_args (def) != 2)
1862                     return NULL;
1863                   val = gimple_call_arg (def, 0);
1864                   if (TREE_CONSTANT (val))
1865                     return val;
1866                   return gimple_call_arg (def, 1);
1867                 }
1868
1869               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1870               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1871               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1872               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1873               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1874               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1875               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1876               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1877               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1878               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1879               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1880               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1881               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1882                 /* Assume that any given atomic operation has low contention,
1883                    and thus the compare-and-swap operation succeeds.  */
1884                 return boolean_true_node;
1885             }
1886         }
1887
1888       return NULL;
1889     }
1890
1891   if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1892     {
1893       tree res;
1894       op0 = expr_expected_value (op0, visited);
1895       if (!op0)
1896         return NULL;
1897       op1 = expr_expected_value (op1, visited);
1898       if (!op1)
1899         return NULL;
1900       res = fold_build2 (code, type, op0, op1);
1901       if (TREE_CONSTANT (res))
1902         return res;
1903       return NULL;
1904     }
1905   if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1906     {
1907       tree res;
1908       op0 = expr_expected_value (op0, visited);
1909       if (!op0)
1910         return NULL;
1911       res = fold_build1 (code, type, op0);
1912       if (TREE_CONSTANT (res))
1913         return res;
1914       return NULL;
1915     }
1916   return NULL;
1917 }
1918
1919 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1920    The function is used by builtin_expect branch predictor so the evidence
1921    must come from this construct and additional possible constant folding.
1922
1923    We may want to implement more involved value guess (such as value range
1924    propagation based prediction), but such tricks shall go to new
1925    implementation.  */
1926
1927 static tree
1928 expr_expected_value (tree expr, bitmap visited)
1929 {
1930   enum tree_code code;
1931   tree op0, op1;
1932
1933   if (TREE_CONSTANT (expr))
1934     return expr;
1935
1936   extract_ops_from_tree (expr, &code, &op0, &op1);
1937   return expr_expected_value_1 (TREE_TYPE (expr),
1938                                 op0, code, op1, visited);
1939 }
1940
1941 \f
1942 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
1943    we no longer need.  */
1944 static unsigned int
1945 strip_predict_hints (void)
1946 {
1947   basic_block bb;
1948   gimple ass_stmt;
1949   tree var;
1950
1951   FOR_EACH_BB (bb)
1952     {
1953       gimple_stmt_iterator bi;
1954       for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
1955         {
1956           gimple stmt = gsi_stmt (bi);
1957
1958           if (gimple_code (stmt) == GIMPLE_PREDICT)
1959             {
1960               gsi_remove (&bi, true);
1961               continue;
1962             }
1963           else if (gimple_code (stmt) == GIMPLE_CALL)
1964             {
1965               tree fndecl = gimple_call_fndecl (stmt);
1966
1967               if (fndecl
1968                   && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
1969                   && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
1970                   && gimple_call_num_args (stmt) == 2)
1971                 {
1972                   var = gimple_call_lhs (stmt);
1973                   if (var)
1974                     {
1975                       ass_stmt
1976                         = gimple_build_assign (var, gimple_call_arg (stmt, 0));
1977                       gsi_replace (&bi, ass_stmt, true);
1978                     }
1979                   else
1980                     {
1981                       gsi_remove (&bi, true);
1982                       continue;
1983                     }
1984                 }
1985             }
1986           gsi_next (&bi);
1987         }
1988     }
1989   return 0;
1990 }
1991 \f
1992 /* Predict using opcode of the last statement in basic block.  */
1993 static void
1994 tree_predict_by_opcode (basic_block bb)
1995 {
1996   gimple stmt = last_stmt (bb);
1997   edge then_edge;
1998   tree op0, op1;
1999   tree type;
2000   tree val;
2001   enum tree_code cmp;
2002   bitmap visited;
2003   edge_iterator ei;
2004
2005   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
2006     return;
2007   FOR_EACH_EDGE (then_edge, ei, bb->succs)
2008     if (then_edge->flags & EDGE_TRUE_VALUE)
2009       break;
2010   op0 = gimple_cond_lhs (stmt);
2011   op1 = gimple_cond_rhs (stmt);
2012   cmp = gimple_cond_code (stmt);
2013   type = TREE_TYPE (op0);
2014   visited = BITMAP_ALLOC (NULL);
2015   val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited);
2016   BITMAP_FREE (visited);
2017   if (val)
2018     {
2019       int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
2020
2021       gcc_assert (percent >= 0 && percent <= 100);
2022       if (integer_zerop (val))
2023         percent = 100 - percent;
2024       predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
2025     }
2026   /* Try "pointer heuristic."
2027      A comparison ptr == 0 is predicted as false.
2028      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
2029   if (POINTER_TYPE_P (type))
2030     {
2031       if (cmp == EQ_EXPR)
2032         predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2033       else if (cmp == NE_EXPR)
2034         predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2035     }
2036   else
2037
2038   /* Try "opcode heuristic."
2039      EQ tests are usually false and NE tests are usually true. Also,
2040      most quantities are positive, so we can make the appropriate guesses
2041      about signed comparisons against zero.  */
2042     switch (cmp)
2043       {
2044       case EQ_EXPR:
2045       case UNEQ_EXPR:
2046         /* Floating point comparisons appears to behave in a very
2047            unpredictable way because of special role of = tests in
2048            FP code.  */
2049         if (FLOAT_TYPE_P (type))
2050           ;
2051         /* Comparisons with 0 are often used for booleans and there is
2052            nothing useful to predict about them.  */
2053         else if (integer_zerop (op0) || integer_zerop (op1))
2054           ;
2055         else
2056           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2057         break;
2058
2059       case NE_EXPR:
2060       case LTGT_EXPR:
2061         /* Floating point comparisons appears to behave in a very
2062            unpredictable way because of special role of = tests in
2063            FP code.  */
2064         if (FLOAT_TYPE_P (type))
2065           ;
2066         /* Comparisons with 0 are often used for booleans and there is
2067            nothing useful to predict about them.  */
2068         else if (integer_zerop (op0)
2069                  || integer_zerop (op1))
2070           ;
2071         else
2072           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2073         break;
2074
2075       case ORDERED_EXPR:
2076         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2077         break;
2078
2079       case UNORDERED_EXPR:
2080         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2081         break;
2082
2083       case LE_EXPR:
2084       case LT_EXPR:
2085         if (integer_zerop (op1)
2086             || integer_onep (op1)
2087             || integer_all_onesp (op1)
2088             || real_zerop (op1)
2089             || real_onep (op1)
2090             || real_minus_onep (op1))
2091           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2092         break;
2093
2094       case GE_EXPR:
2095       case GT_EXPR:
2096         if (integer_zerop (op1)
2097             || integer_onep (op1)
2098             || integer_all_onesp (op1)
2099             || real_zerop (op1)
2100             || real_onep (op1)
2101             || real_minus_onep (op1))
2102           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2103         break;
2104
2105       default:
2106         break;
2107       }
2108 }
2109
2110 /* Try to guess whether the value of return means error code.  */
2111
2112 static enum br_predictor
2113 return_prediction (tree val, enum prediction *prediction)
2114 {
2115   /* VOID.  */
2116   if (!val)
2117     return PRED_NO_PREDICTION;
2118   /* Different heuristics for pointers and scalars.  */
2119   if (POINTER_TYPE_P (TREE_TYPE (val)))
2120     {
2121       /* NULL is usually not returned.  */
2122       if (integer_zerop (val))
2123         {
2124           *prediction = NOT_TAKEN;
2125           return PRED_NULL_RETURN;
2126         }
2127     }
2128   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2129     {
2130       /* Negative return values are often used to indicate
2131          errors.  */
2132       if (TREE_CODE (val) == INTEGER_CST
2133           && tree_int_cst_sgn (val) < 0)
2134         {
2135           *prediction = NOT_TAKEN;
2136           return PRED_NEGATIVE_RETURN;
2137         }
2138       /* Constant return values seems to be commonly taken.
2139          Zero/one often represent booleans so exclude them from the
2140          heuristics.  */
2141       if (TREE_CONSTANT (val)
2142           && (!integer_zerop (val) && !integer_onep (val)))
2143         {
2144           *prediction = TAKEN;
2145           return PRED_CONST_RETURN;
2146         }
2147     }
2148   return PRED_NO_PREDICTION;
2149 }
2150
2151 /* Find the basic block with return expression and look up for possible
2152    return value trying to apply RETURN_PREDICTION heuristics.  */
2153 static void
2154 apply_return_prediction (void)
2155 {
2156   gimple return_stmt = NULL;
2157   tree return_val;
2158   edge e;
2159   gimple phi;
2160   int phi_num_args, i;
2161   enum br_predictor pred;
2162   enum prediction direction;
2163   edge_iterator ei;
2164
2165   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2166     {
2167       return_stmt = last_stmt (e->src);
2168       if (return_stmt
2169           && gimple_code (return_stmt) == GIMPLE_RETURN)
2170         break;
2171     }
2172   if (!e)
2173     return;
2174   return_val = gimple_return_retval (return_stmt);
2175   if (!return_val)
2176     return;
2177   if (TREE_CODE (return_val) != SSA_NAME
2178       || !SSA_NAME_DEF_STMT (return_val)
2179       || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2180     return;
2181   phi = SSA_NAME_DEF_STMT (return_val);
2182   phi_num_args = gimple_phi_num_args (phi);
2183   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2184
2185   /* Avoid the degenerate case where all return values form the function
2186      belongs to same category (ie they are all positive constants)
2187      so we can hardly say something about them.  */
2188   for (i = 1; i < phi_num_args; i++)
2189     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2190       break;
2191   if (i != phi_num_args)
2192     for (i = 0; i < phi_num_args; i++)
2193       {
2194         pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2195         if (pred != PRED_NO_PREDICTION)
2196           predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2197                                          direction);
2198       }
2199 }
2200
2201 /* Look for basic block that contains unlikely to happen events
2202    (such as noreturn calls) and mark all paths leading to execution
2203    of this basic blocks as unlikely.  */
2204
2205 static void
2206 tree_bb_level_predictions (void)
2207 {
2208   basic_block bb;
2209   bool has_return_edges = false;
2210   edge e;
2211   edge_iterator ei;
2212
2213   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2214     if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2215       {
2216         has_return_edges = true;
2217         break;
2218       }
2219
2220   apply_return_prediction ();
2221
2222   FOR_EACH_BB (bb)
2223     {
2224       gimple_stmt_iterator gsi;
2225
2226       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2227         {
2228           gimple stmt = gsi_stmt (gsi);
2229           tree decl;
2230
2231           if (is_gimple_call (stmt))
2232             {
2233               if ((gimple_call_flags (stmt) & ECF_NORETURN)
2234                   && has_return_edges)
2235                 predict_paths_leading_to (bb, PRED_NORETURN,
2236                                           NOT_TAKEN);
2237               decl = gimple_call_fndecl (stmt);
2238               if (decl
2239                   && lookup_attribute ("cold",
2240                                        DECL_ATTRIBUTES (decl)))
2241                 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2242                                           NOT_TAKEN);
2243             }
2244           else if (gimple_code (stmt) == GIMPLE_PREDICT)
2245             {
2246               predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2247                                         gimple_predict_outcome (stmt));
2248               /* Keep GIMPLE_PREDICT around so early inlining will propagate
2249                  hints to callers.  */
2250             }
2251         }
2252     }
2253 }
2254
2255 #ifdef ENABLE_CHECKING
2256
2257 /* Callback for pointer_map_traverse, asserts that the pointer map is
2258    empty.  */
2259
2260 static bool
2261 assert_is_empty (const void *key ATTRIBUTE_UNUSED, void **value,
2262                  void *data ATTRIBUTE_UNUSED)
2263 {
2264   gcc_assert (!*value);
2265   return false;
2266 }
2267 #endif
2268
2269 /* Predict branch probabilities and estimate profile for basic block BB.  */
2270
2271 static void
2272 tree_estimate_probability_bb (basic_block bb)
2273 {
2274   edge e;
2275   edge_iterator ei;
2276   gimple last;
2277
2278   FOR_EACH_EDGE (e, ei, bb->succs)
2279     {
2280       /* Predict edges to user labels with attributes.  */
2281       if (e->dest != EXIT_BLOCK_PTR)
2282         {
2283           gimple_stmt_iterator gi;
2284           for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2285             {
2286               gimple stmt = gsi_stmt (gi);
2287               tree decl;
2288
2289               if (gimple_code (stmt) != GIMPLE_LABEL)
2290                 break;
2291               decl = gimple_label_label (stmt);
2292               if (DECL_ARTIFICIAL (decl))
2293                 continue;
2294
2295               /* Finally, we have a user-defined label.  */
2296               if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2297                 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2298               else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2299                 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2300             }
2301         }
2302
2303       /* Predict early returns to be probable, as we've already taken
2304          care for error returns and other cases are often used for
2305          fast paths through function.
2306
2307          Since we've already removed the return statements, we are
2308          looking for CFG like:
2309
2310          if (conditional)
2311          {
2312          ..
2313          goto return_block
2314          }
2315          some other blocks
2316          return_block:
2317          return_stmt.  */
2318       if (e->dest != bb->next_bb
2319           && e->dest != EXIT_BLOCK_PTR
2320           && single_succ_p (e->dest)
2321           && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR
2322           && (last = last_stmt (e->dest)) != NULL
2323           && gimple_code (last) == GIMPLE_RETURN)
2324         {
2325           edge e1;
2326           edge_iterator ei1;
2327
2328           if (single_succ_p (bb))
2329             {
2330               FOR_EACH_EDGE (e1, ei1, bb->preds)
2331                 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2332                     && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2333                     && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2334                   predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2335             }
2336           else
2337             if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2338                 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2339                 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2340               predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2341         }
2342
2343       /* Look for block we are guarding (ie we dominate it,
2344          but it doesn't postdominate us).  */
2345       if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
2346           && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2347           && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2348         {
2349           gimple_stmt_iterator bi;
2350
2351           /* The call heuristic claims that a guarded function call
2352              is improbable.  This is because such calls are often used
2353              to signal exceptional situations such as printing error
2354              messages.  */
2355           for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2356                gsi_next (&bi))
2357             {
2358               gimple stmt = gsi_stmt (bi);
2359               if (is_gimple_call (stmt)
2360                   /* Constant and pure calls are hardly used to signalize
2361                      something exceptional.  */
2362                   && gimple_has_side_effects (stmt))
2363                 {
2364                   predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2365                   break;
2366                 }
2367             }
2368         }
2369     }
2370   tree_predict_by_opcode (bb);
2371 }
2372
2373 /* Predict branch probabilities and estimate profile of the tree CFG.
2374    This function can be called from the loop optimizers to recompute
2375    the profile information.  */
2376
2377 void
2378 tree_estimate_probability (void)
2379 {
2380   basic_block bb;
2381
2382   add_noreturn_fake_exit_edges ();
2383   connect_infinite_loops_to_exit ();
2384   /* We use loop_niter_by_eval, which requires that the loops have
2385      preheaders.  */
2386   create_preheaders (CP_SIMPLE_PREHEADERS);
2387   calculate_dominance_info (CDI_POST_DOMINATORS);
2388
2389   bb_predictions = pointer_map_create ();
2390   tree_bb_level_predictions ();
2391   record_loop_exits ();
2392
2393   if (number_of_loops (cfun) > 1)
2394     predict_loops ();
2395
2396   FOR_EACH_BB (bb)
2397     tree_estimate_probability_bb (bb);
2398
2399   FOR_EACH_BB (bb)
2400     combine_predictions_for_bb (bb);
2401
2402 #ifdef ENABLE_CHECKING
2403   pointer_map_traverse (bb_predictions, assert_is_empty, NULL);
2404 #endif
2405   pointer_map_destroy (bb_predictions);
2406   bb_predictions = NULL;
2407
2408   estimate_bb_frequencies (false);
2409   free_dominance_info (CDI_POST_DOMINATORS);
2410   remove_fake_exit_edges ();
2411 }
2412
2413 /* Predict branch probabilities and estimate profile of the tree CFG.
2414    This is the driver function for PASS_PROFILE.  */
2415
2416 static unsigned int
2417 tree_estimate_probability_driver (void)
2418 {
2419   unsigned nb_loops;
2420
2421   loop_optimizer_init (LOOPS_NORMAL);
2422   if (dump_file && (dump_flags & TDF_DETAILS))
2423     flow_loops_dump (dump_file, NULL, 0);
2424
2425   mark_irreducible_loops ();
2426
2427   nb_loops = number_of_loops (cfun);
2428   if (nb_loops > 1)
2429     scev_initialize ();
2430
2431   tree_estimate_probability ();
2432
2433   if (nb_loops > 1)
2434     scev_finalize ();
2435
2436   loop_optimizer_finalize ();
2437   if (dump_file && (dump_flags & TDF_DETAILS))
2438     gimple_dump_cfg (dump_file, dump_flags);
2439   if (profile_status == PROFILE_ABSENT)
2440     profile_status = PROFILE_GUESSED;
2441   return 0;
2442 }
2443 \f
2444 /* Predict edges to successors of CUR whose sources are not postdominated by
2445    BB by PRED and recurse to all postdominators.  */
2446
2447 static void
2448 predict_paths_for_bb (basic_block cur, basic_block bb,
2449                       enum br_predictor pred,
2450                       enum prediction taken,
2451                       bitmap visited)
2452 {
2453   edge e;
2454   edge_iterator ei;
2455   basic_block son;
2456
2457   /* We are looking for all edges forming edge cut induced by
2458      set of all blocks postdominated by BB.  */
2459   FOR_EACH_EDGE (e, ei, cur->preds)
2460     if (e->src->index >= NUM_FIXED_BLOCKS
2461         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2462     {
2463       edge e2;
2464       edge_iterator ei2;
2465       bool found = false;
2466
2467       /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2468       if (e->flags & (EDGE_EH | EDGE_FAKE))
2469         continue;
2470       gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2471
2472       /* See if there is an edge from e->src that is not abnormal
2473          and does not lead to BB.  */
2474       FOR_EACH_EDGE (e2, ei2, e->src->succs)
2475         if (e2 != e
2476             && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2477             && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2478           {
2479             found = true;
2480             break;
2481           }
2482
2483       /* If there is non-abnormal path leaving e->src, predict edge
2484          using predictor.  Otherwise we need to look for paths
2485          leading to e->src.
2486
2487          The second may lead to infinite loop in the case we are predicitng
2488          regions that are only reachable by abnormal edges.  We simply
2489          prevent visiting given BB twice.  */
2490       if (found)
2491         predict_edge_def (e, pred, taken);
2492       else if (bitmap_set_bit (visited, e->src->index))
2493         predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2494     }
2495   for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2496        son;
2497        son = next_dom_son (CDI_POST_DOMINATORS, son))
2498     predict_paths_for_bb (son, bb, pred, taken, visited);
2499 }
2500
2501 /* Sets branch probabilities according to PREDiction and
2502    FLAGS.  */
2503
2504 static void
2505 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2506                           enum prediction taken)
2507 {
2508   bitmap visited = BITMAP_ALLOC (NULL);
2509   predict_paths_for_bb (bb, bb, pred, taken, visited);
2510   BITMAP_FREE (visited);
2511 }
2512
2513 /* Like predict_paths_leading_to but take edge instead of basic block.  */
2514
2515 static void
2516 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2517                                enum prediction taken)
2518 {
2519   bool has_nonloop_edge = false;
2520   edge_iterator ei;
2521   edge e2;
2522
2523   basic_block bb = e->src;
2524   FOR_EACH_EDGE (e2, ei, bb->succs)
2525     if (e2->dest != e->src && e2->dest != e->dest
2526         && !(e->flags & (EDGE_EH | EDGE_FAKE))
2527         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2528       {
2529         has_nonloop_edge = true;
2530         break;
2531       }
2532   if (!has_nonloop_edge)
2533     {
2534       bitmap visited = BITMAP_ALLOC (NULL);
2535       predict_paths_for_bb (bb, bb, pred, taken, visited);
2536       BITMAP_FREE (visited);
2537     }
2538   else
2539     predict_edge_def (e, pred, taken);
2540 }
2541 \f
2542 /* This is used to carry information about basic blocks.  It is
2543    attached to the AUX field of the standard CFG block.  */
2544
2545 typedef struct block_info_def
2546 {
2547   /* Estimated frequency of execution of basic_block.  */
2548   sreal frequency;
2549
2550   /* To keep queue of basic blocks to process.  */
2551   basic_block next;
2552
2553   /* Number of predecessors we need to visit first.  */
2554   int npredecessors;
2555 } *block_info;
2556
2557 /* Similar information for edges.  */
2558 typedef struct edge_info_def
2559 {
2560   /* In case edge is a loopback edge, the probability edge will be reached
2561      in case header is.  Estimated number of iterations of the loop can be
2562      then computed as 1 / (1 - back_edge_prob).  */
2563   sreal back_edge_prob;
2564   /* True if the edge is a loopback edge in the natural loop.  */
2565   unsigned int back_edge:1;
2566 } *edge_info;
2567
2568 #define BLOCK_INFO(B)   ((block_info) (B)->aux)
2569 #define EDGE_INFO(E)    ((edge_info) (E)->aux)
2570
2571 /* Helper function for estimate_bb_frequencies.
2572    Propagate the frequencies in blocks marked in
2573    TOVISIT, starting in HEAD.  */
2574
2575 static void
2576 propagate_freq (basic_block head, bitmap tovisit)
2577 {
2578   basic_block bb;
2579   basic_block last;
2580   unsigned i;
2581   edge e;
2582   basic_block nextbb;
2583   bitmap_iterator bi;
2584
2585   /* For each basic block we need to visit count number of his predecessors
2586      we need to visit first.  */
2587   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2588     {
2589       edge_iterator ei;
2590       int count = 0;
2591
2592       bb = BASIC_BLOCK (i);
2593
2594       FOR_EACH_EDGE (e, ei, bb->preds)
2595         {
2596           bool visit = bitmap_bit_p (tovisit, e->src->index);
2597
2598           if (visit && !(e->flags & EDGE_DFS_BACK))
2599             count++;
2600           else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2601             fprintf (dump_file,
2602                      "Irreducible region hit, ignoring edge to %i->%i\n",
2603                      e->src->index, bb->index);
2604         }
2605       BLOCK_INFO (bb)->npredecessors = count;
2606       /* When function never returns, we will never process exit block.  */
2607       if (!count && bb == EXIT_BLOCK_PTR)
2608         bb->count = bb->frequency = 0;
2609     }
2610
2611   memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
2612   last = head;
2613   for (bb = head; bb; bb = nextbb)
2614     {
2615       edge_iterator ei;
2616       sreal cyclic_probability, frequency;
2617
2618       memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
2619       memcpy (&frequency, &real_zero, sizeof (real_zero));
2620
2621       nextbb = BLOCK_INFO (bb)->next;
2622       BLOCK_INFO (bb)->next = NULL;
2623
2624       /* Compute frequency of basic block.  */
2625       if (bb != head)
2626         {
2627 #ifdef ENABLE_CHECKING
2628           FOR_EACH_EDGE (e, ei, bb->preds)
2629             gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2630                         || (e->flags & EDGE_DFS_BACK));
2631 #endif
2632
2633           FOR_EACH_EDGE (e, ei, bb->preds)
2634             if (EDGE_INFO (e)->back_edge)
2635               {
2636                 sreal_add (&cyclic_probability, &cyclic_probability,
2637                            &EDGE_INFO (e)->back_edge_prob);
2638               }
2639             else if (!(e->flags & EDGE_DFS_BACK))
2640               {
2641                 sreal tmp;
2642
2643                 /*  frequency += (e->probability
2644                                   * BLOCK_INFO (e->src)->frequency /
2645                                   REG_BR_PROB_BASE);  */
2646
2647                 sreal_init (&tmp, e->probability, 0);
2648                 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
2649                 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
2650                 sreal_add (&frequency, &frequency, &tmp);
2651               }
2652
2653           if (sreal_compare (&cyclic_probability, &real_zero) == 0)
2654             {
2655               memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
2656                       sizeof (frequency));
2657             }
2658           else
2659             {
2660               if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
2661                 {
2662                   memcpy (&cyclic_probability, &real_almost_one,
2663                           sizeof (real_almost_one));
2664                 }
2665
2666               /* BLOCK_INFO (bb)->frequency = frequency
2667                                               / (1 - cyclic_probability) */
2668
2669               sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
2670               sreal_div (&BLOCK_INFO (bb)->frequency,
2671                          &frequency, &cyclic_probability);
2672             }
2673         }
2674
2675       bitmap_clear_bit (tovisit, bb->index);
2676
2677       e = find_edge (bb, head);
2678       if (e)
2679         {
2680           sreal tmp;
2681
2682           /* EDGE_INFO (e)->back_edge_prob
2683              = ((e->probability * BLOCK_INFO (bb)->frequency)
2684              / REG_BR_PROB_BASE); */
2685
2686           sreal_init (&tmp, e->probability, 0);
2687           sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
2688           sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2689                      &tmp, &real_inv_br_prob_base);
2690         }
2691
2692       /* Propagate to successor blocks.  */
2693       FOR_EACH_EDGE (e, ei, bb->succs)
2694         if (!(e->flags & EDGE_DFS_BACK)
2695             && BLOCK_INFO (e->dest)->npredecessors)
2696           {
2697             BLOCK_INFO (e->dest)->npredecessors--;
2698             if (!BLOCK_INFO (e->dest)->npredecessors)
2699               {
2700                 if (!nextbb)
2701                   nextbb = e->dest;
2702                 else
2703                   BLOCK_INFO (last)->next = e->dest;
2704
2705                 last = e->dest;
2706               }
2707           }
2708     }
2709 }
2710
2711 /* Estimate frequencies in loops at same nest level.  */
2712
2713 static void
2714 estimate_loops_at_level (struct loop *first_loop)
2715 {
2716   struct loop *loop;
2717
2718   for (loop = first_loop; loop; loop = loop->next)
2719     {
2720       edge e;
2721       basic_block *bbs;
2722       unsigned i;
2723       bitmap tovisit = BITMAP_ALLOC (NULL);
2724
2725       estimate_loops_at_level (loop->inner);
2726
2727       /* Find current loop back edge and mark it.  */
2728       e = loop_latch_edge (loop);
2729       EDGE_INFO (e)->back_edge = 1;
2730
2731       bbs = get_loop_body (loop);
2732       for (i = 0; i < loop->num_nodes; i++)
2733         bitmap_set_bit (tovisit, bbs[i]->index);
2734       free (bbs);
2735       propagate_freq (loop->header, tovisit);
2736       BITMAP_FREE (tovisit);
2737     }
2738 }
2739
2740 /* Propagates frequencies through structure of loops.  */
2741
2742 static void
2743 estimate_loops (void)
2744 {
2745   bitmap tovisit = BITMAP_ALLOC (NULL);
2746   basic_block bb;
2747
2748   /* Start by estimating the frequencies in the loops.  */
2749   if (number_of_loops (cfun) > 1)
2750     estimate_loops_at_level (current_loops->tree_root->inner);
2751
2752   /* Now propagate the frequencies through all the blocks.  */
2753   FOR_ALL_BB (bb)
2754     {
2755       bitmap_set_bit (tovisit, bb->index);
2756     }
2757   propagate_freq (ENTRY_BLOCK_PTR, tovisit);
2758   BITMAP_FREE (tovisit);
2759 }
2760
2761 /* Convert counts measured by profile driven feedback to frequencies.
2762    Return nonzero iff there was any nonzero execution count.  */
2763
2764 int
2765 counts_to_freqs (void)
2766 {
2767   gcov_type count_max, true_count_max = 0;
2768   basic_block bb;
2769
2770   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2771     true_count_max = MAX (bb->count, true_count_max);
2772
2773   count_max = MAX (true_count_max, 1);
2774   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2775     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2776
2777   return true_count_max;
2778 }
2779
2780 /* Return true if function is likely to be expensive, so there is no point to
2781    optimize performance of prologue, epilogue or do inlining at the expense
2782    of code size growth.  THRESHOLD is the limit of number of instructions
2783    function can execute at average to be still considered not expensive.  */
2784
2785 bool
2786 expensive_function_p (int threshold)
2787 {
2788   unsigned int sum = 0;
2789   basic_block bb;
2790   unsigned int limit;
2791
2792   /* We can not compute accurately for large thresholds due to scaled
2793      frequencies.  */
2794   gcc_assert (threshold <= BB_FREQ_MAX);
2795
2796   /* Frequencies are out of range.  This either means that function contains
2797      internal loop executing more than BB_FREQ_MAX times or profile feedback
2798      is available and function has not been executed at all.  */
2799   if (ENTRY_BLOCK_PTR->frequency == 0)
2800     return true;
2801
2802   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2803   limit = ENTRY_BLOCK_PTR->frequency * threshold;
2804   FOR_EACH_BB (bb)
2805     {
2806       rtx insn;
2807
2808       FOR_BB_INSNS (bb, insn)
2809         if (active_insn_p (insn))
2810           {
2811             sum += bb->frequency;
2812             if (sum > limit)
2813               return true;
2814         }
2815     }
2816
2817   return false;
2818 }
2819
2820 /* Estimate and propagate basic block frequencies using the given branch
2821    probabilities.  If FORCE is true, the frequencies are used to estimate
2822    the counts even when there are already non-zero profile counts.  */
2823
2824 void
2825 estimate_bb_frequencies (bool force)
2826 {
2827   basic_block bb;
2828   sreal freq_max;
2829
2830   if (force || profile_status != PROFILE_READ || !counts_to_freqs ())
2831     {
2832       static int real_values_initialized = 0;
2833
2834       if (!real_values_initialized)
2835         {
2836           real_values_initialized = 1;
2837           sreal_init (&real_zero, 0, 0);
2838           sreal_init (&real_one, 1, 0);
2839           sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
2840           sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
2841           sreal_init (&real_one_half, 1, -1);
2842           sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
2843           sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
2844         }
2845
2846       mark_dfs_back_edges ();
2847
2848       single_succ_edge (ENTRY_BLOCK_PTR)->probability = REG_BR_PROB_BASE;
2849
2850       /* Set up block info for each basic block.  */
2851       alloc_aux_for_blocks (sizeof (struct block_info_def));
2852       alloc_aux_for_edges (sizeof (struct edge_info_def));
2853       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2854         {
2855           edge e;
2856           edge_iterator ei;
2857
2858           FOR_EACH_EDGE (e, ei, bb->succs)
2859             {
2860               sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
2861               sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2862                          &EDGE_INFO (e)->back_edge_prob,
2863                          &real_inv_br_prob_base);
2864             }
2865         }
2866
2867       /* First compute frequencies locally for each loop from innermost
2868          to outermost to examine frequencies for back edges.  */
2869       estimate_loops ();
2870
2871       memcpy (&freq_max, &real_zero, sizeof (real_zero));
2872       FOR_EACH_BB (bb)
2873         if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
2874           memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
2875
2876       sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
2877       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2878         {
2879           sreal tmp;
2880
2881           sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
2882           sreal_add (&tmp, &tmp, &real_one_half);
2883           bb->frequency = sreal_to_int (&tmp);
2884         }
2885
2886       free_aux_for_blocks ();
2887       free_aux_for_edges ();
2888     }
2889   compute_function_frequency ();
2890 }
2891
2892 /* Decide whether function is hot, cold or unlikely executed.  */
2893 void
2894 compute_function_frequency (void)
2895 {
2896   basic_block bb;
2897   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2898
2899   if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2900       || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2901     node->only_called_at_startup = true;
2902   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2903     node->only_called_at_exit = true;
2904
2905   if (profile_status != PROFILE_READ)
2906     {
2907       int flags = flags_from_decl_or_type (current_function_decl);
2908       if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2909           != NULL)
2910         node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2911       else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2912                != NULL)
2913         node->frequency = NODE_FREQUENCY_HOT;
2914       else if (flags & ECF_NORETURN)
2915         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2916       else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
2917         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2918       else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2919                || DECL_STATIC_DESTRUCTOR (current_function_decl))
2920         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2921       return;
2922     }
2923
2924   /* Only first time try to drop function into unlikely executed.
2925      After inlining the roundoff errors may confuse us.
2926      Ipa-profile pass will drop functions only called from unlikely
2927      functions to unlikely and that is most of what we care about.  */
2928   if (!cfun->after_inlining)
2929     node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2930   FOR_EACH_BB (bb)
2931     {
2932       if (maybe_hot_bb_p (cfun, bb))
2933         {
2934           node->frequency = NODE_FREQUENCY_HOT;
2935           return;
2936         }
2937       if (!probably_never_executed_bb_p (cfun, bb))
2938         node->frequency = NODE_FREQUENCY_NORMAL;
2939     }
2940 }
2941
2942 static bool
2943 gate_estimate_probability (void)
2944 {
2945   return flag_guess_branch_prob;
2946 }
2947
2948 /* Build PREDICT_EXPR.  */
2949 tree
2950 build_predict_expr (enum br_predictor predictor, enum prediction taken)
2951 {
2952   tree t = build1 (PREDICT_EXPR, void_type_node,
2953                    build_int_cst (integer_type_node, predictor));
2954   SET_PREDICT_EXPR_OUTCOME (t, taken);
2955   return t;
2956 }
2957
2958 const char *
2959 predictor_name (enum br_predictor predictor)
2960 {
2961   return predictor_info[predictor].name;
2962 }
2963
2964 namespace {
2965
2966 const pass_data pass_data_profile =
2967 {
2968   GIMPLE_PASS, /* type */
2969   "profile_estimate", /* name */
2970   OPTGROUP_NONE, /* optinfo_flags */
2971   true, /* has_gate */
2972   true, /* has_execute */
2973   TV_BRANCH_PROB, /* tv_id */
2974   PROP_cfg, /* properties_required */
2975   0, /* properties_provided */
2976   0, /* properties_destroyed */
2977   0, /* todo_flags_start */
2978   TODO_verify_ssa, /* todo_flags_finish */
2979 };
2980
2981 class pass_profile : public gimple_opt_pass
2982 {
2983 public:
2984   pass_profile (gcc::context *ctxt)
2985     : gimple_opt_pass (pass_data_profile, ctxt)
2986   {}
2987
2988   /* opt_pass methods: */
2989   bool gate () { return gate_estimate_probability (); }
2990   unsigned int execute () { return tree_estimate_probability_driver (); }
2991
2992 }; // class pass_profile
2993
2994 } // anon namespace
2995
2996 gimple_opt_pass *
2997 make_pass_profile (gcc::context *ctxt)
2998 {
2999   return new pass_profile (ctxt);
3000 }
3001
3002 namespace {
3003
3004 const pass_data pass_data_strip_predict_hints =
3005 {
3006   GIMPLE_PASS, /* type */
3007   "*strip_predict_hints", /* name */
3008   OPTGROUP_NONE, /* optinfo_flags */
3009   false, /* has_gate */
3010   true, /* has_execute */
3011   TV_BRANCH_PROB, /* tv_id */
3012   PROP_cfg, /* properties_required */
3013   0, /* properties_provided */
3014   0, /* properties_destroyed */
3015   0, /* todo_flags_start */
3016   TODO_verify_ssa, /* todo_flags_finish */
3017 };
3018
3019 class pass_strip_predict_hints : public gimple_opt_pass
3020 {
3021 public:
3022   pass_strip_predict_hints (gcc::context *ctxt)
3023     : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3024   {}
3025
3026   /* opt_pass methods: */
3027   opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3028   unsigned int execute () { return strip_predict_hints (); }
3029
3030 }; // class pass_strip_predict_hints
3031
3032 } // anon namespace
3033
3034 gimple_opt_pass *
3035 make_pass_strip_predict_hints (gcc::context *ctxt)
3036 {
3037   return new pass_strip_predict_hints (ctxt);
3038 }
3039
3040 /* Rebuild function frequencies.  Passes are in general expected to
3041    maintain profile by hand, however in some cases this is not possible:
3042    for example when inlining several functions with loops freuqencies might run
3043    out of scale and thus needs to be recomputed.  */
3044
3045 void
3046 rebuild_frequencies (void)
3047 {
3048   timevar_push (TV_REBUILD_FREQUENCIES);
3049
3050   /* When the max bb count in the function is small, there is a higher
3051      chance that there were truncation errors in the integer scaling
3052      of counts by inlining and other optimizations. This could lead
3053      to incorrect classification of code as being cold when it isn't.
3054      In that case, force the estimation of bb counts/frequencies from the
3055      branch probabilities, rather than computing frequencies from counts,
3056      which may also lead to frequencies incorrectly reduced to 0. There
3057      is less precision in the probabilities, so we only do this for small
3058      max counts.  */
3059   gcov_type count_max = 0;
3060   basic_block bb;
3061   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
3062     count_max = MAX (bb->count, count_max);
3063
3064   if (profile_status == PROFILE_GUESSED
3065       || (profile_status == PROFILE_READ && count_max < REG_BR_PROB_BASE/10))
3066     {
3067       loop_optimizer_init (0);
3068       add_noreturn_fake_exit_edges ();
3069       mark_irreducible_loops ();
3070       connect_infinite_loops_to_exit ();
3071       estimate_bb_frequencies (true);
3072       remove_fake_exit_edges ();
3073       loop_optimizer_finalize ();
3074     }
3075   else if (profile_status == PROFILE_READ)
3076     counts_to_freqs ();
3077   else
3078     gcc_unreachable ();
3079   timevar_pop (TV_REBUILD_FREQUENCIES);
3080 }