gcc/predict.c

   1 /* Branch prediction routines for the GNU compiler.
   2    Copyright (C) 2000-2013 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* References:
  21
  22    [1] "Branch Prediction for Free"
  23        Ball and Larus; PLDI '93.
  24    [2] "Static Branch Frequency and Program Profile Analysis"
  25        Wu and Larus; MICRO-27.
  26    [3] "Corpus-based Static Branch Prediction"
  27        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
  28
  29
  30 #include "config.h"
  31 #include "system.h"
  32 #include "coretypes.h"
  33 #include "tm.h"
  34 #include "tree.h"
  35 #include "rtl.h"
  36 #include "tm_p.h"
  37 #include "hard-reg-set.h"
  38 #include "basic-block.h"
  39 #include "insn-config.h"
  40 #include "regs.h"
  41 #include "flags.h"
  42 #include "function.h"
  43 #include "except.h"
  44 #include "diagnostic-core.h"
  45 #include "recog.h"
  46 #include "expr.h"
  47 #include "predict.h"
  48 #include "coverage.h"
  49 #include "sreal.h"
  50 #include "params.h"
  51 #include "target.h"
  52 #include "cfgloop.h"
  53 #include "tree-ssa.h"
  54 #include "ggc.h"
  55 #include "tree-pass.h"
  56 #include "tree-scalar-evolution.h"
  57 #include "cfgloop.h"
  58 #include "pointer-set.h"
  59
  60 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
  61                    1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
  62 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
  63              real_inv_br_prob_base, real_one_half, real_bb_freq_max;
  64
  65 /* Random guesstimation given names.
  66    PROV_VERY_UNLIKELY should be small enough so basic block predicted
  67    by it gets below HOT_BB_FREQUENCY_FRACTION.  */
  68 #define PROB_VERY_UNLIKELY      (REG_BR_PROB_BASE / 2000 - 1)
  69 #define PROB_EVEN               (REG_BR_PROB_BASE / 2)
  70 #define PROB_VERY_LIKELY        (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
  71 #define PROB_ALWAYS             (REG_BR_PROB_BASE)
  72
  73 static void combine_predictions_for_insn (rtx, basic_block);
  74 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
  75 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
  76 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
  77 static bool can_predict_insn_p (const_rtx);
  78
  79 /* Information we hold about each branch predictor.
  80    Filled using information from predict.def.  */
  81
  82 struct predictor_info
  83 {
  84   const char *const name;       /* Name used in the debugging dumps.  */
  85   const int hitrate;            /* Expected hitrate used by
  86                                    predict_insn_def call.  */
  87   const int flags;
  88 };
  89
  90 /* Use given predictor without Dempster-Shaffer theory if it matches
  91    using first_match heuristics.  */
  92 #define PRED_FLAG_FIRST_MATCH 1
  93
  94 /* Recompute hitrate in percent to our representation.  */
  95
  96 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
  97
  98 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
  99 static const struct predictor_info predictor_info[]= {
 100 #include "predict.def"
 101
 102   /* Upper bound on predictors.  */
 103   {NULL, 0, 0}
 104 };
 105 #undef DEF_PREDICTOR
 106
 107 /* Return TRUE if frequency FREQ is considered to be hot.  */
 108
 109 static inline bool
 110 maybe_hot_frequency_p (struct function *fun, int freq)
 111 {
 112   struct cgraph_node *node = cgraph_get_node (fun->decl);
 113   if (!profile_info || !flag_branch_probabilities)
 114     {
 115       if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
 116         return false;
 117       if (node->frequency == NODE_FREQUENCY_HOT)
 118         return true;
 119     }
 120   if (profile_status_for_function (fun) == PROFILE_ABSENT)
 121     return true;
 122   if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 123       && freq < (ENTRY_BLOCK_PTR_FOR_FUNCTION (fun)->frequency * 2 / 3))
 124     return false;
 125   if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
 126     return false;
 127   if (freq < (ENTRY_BLOCK_PTR_FOR_FUNCTION (fun)->frequency
 128               / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 129     return false;
 130   return true;
 131 }
 132
 133 static gcov_type min_count = -1;
 134
 135 /* Determine the threshold for hot BB counts.  */
 136
 137 gcov_type
 138 get_hot_bb_threshold ()
 139 {
 140   gcov_working_set_t *ws;
 141   if (min_count == -1)
 142     {
 143       ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
 144       gcc_assert (ws);
 145       min_count = ws->min_counter;
 146     }
 147   return min_count;
 148 }
 149
 150 /* Set the threshold for hot BB counts.  */
 151
 152 void
 153 set_hot_bb_threshold (gcov_type min)
 154 {
 155   min_count = min;
 156 }
 157
 158 /* Return TRUE if frequency FREQ is considered to be hot.  */
 159
 160 static inline bool
 161 maybe_hot_count_p (struct function *fun, gcov_type count)
 162 {
 163   if (fun && profile_status_for_function (fun) != PROFILE_READ)
 164     return true;
 165   /* Code executed at most once is not hot.  */
 166   if (profile_info->runs >= count)
 167     return false;
 168   return (count >= get_hot_bb_threshold ());
 169 }
 170
 171 /* Return true in case BB can be CPU intensive and should be optimized
 172    for maximal performance.  */
 173
 174 bool
 175 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
 176 {
 177   gcc_checking_assert (fun);
 178   if (profile_status_for_function (fun) == PROFILE_READ)
 179     return maybe_hot_count_p (fun, bb->count);
 180   return maybe_hot_frequency_p (fun, bb->frequency);
 181 }
 182
 183 /* Return true if the call can be hot.  */
 184
 185 bool
 186 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
 187 {
 188   if (profile_info && flag_branch_probabilities
 189       && !maybe_hot_count_p (NULL,
 190                              edge->count))
 191     return false;
 192   if (edge->caller->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED
 193       || (edge->callee
 194           && edge->callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 195     return false;
 196   if (edge->caller->frequency > NODE_FREQUENCY_UNLIKELY_EXECUTED
 197       && (edge->callee
 198           && edge->callee->frequency <= NODE_FREQUENCY_EXECUTED_ONCE))
 199     return false;
 200   if (optimize_size)
 201     return false;
 202   if (edge->caller->frequency == NODE_FREQUENCY_HOT)
 203     return true;
 204   if (edge->caller->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 205       && edge->frequency < CGRAPH_FREQ_BASE * 3 / 2)
 206     return false;
 207   if (flag_guess_branch_prob)
 208     {
 209       if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0
 210           || edge->frequency <= (CGRAPH_FREQ_BASE
 211                                  / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 212         return false;
 213     }
 214   return true;
 215 }
 216
 217 /* Return true in case BB can be CPU intensive and should be optimized
 218    for maximal performance.  */
 219
 220 bool
 221 maybe_hot_edge_p (edge e)
 222 {
 223   if (profile_status == PROFILE_READ)
 224     return maybe_hot_count_p (cfun, e->count);
 225   return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
 226 }
 227
 228
 229
 230 /* Return true if profile COUNT and FREQUENCY, or function FUN static
 231    node frequency reflects never being executed.  */
 232
 233 static bool
 234 probably_never_executed (struct function *fun,
 235                          gcov_type count, int frequency)
 236 {
 237   gcc_checking_assert (fun);
 238   if (profile_status_for_function (fun) == PROFILE_READ)
 239     {
 240       if ((count * 4 + profile_info->runs / 2) / profile_info->runs > 0)
 241         return false;
 242       if (!frequency)
 243         return true;
 244       if (!ENTRY_BLOCK_PTR->frequency)
 245         return false;
 246       if (ENTRY_BLOCK_PTR->count && ENTRY_BLOCK_PTR->count < REG_BR_PROB_BASE)
 247         {
 248           return (RDIV (frequency * ENTRY_BLOCK_PTR->count,
 249                         ENTRY_BLOCK_PTR->frequency)
 250                   < REG_BR_PROB_BASE / 4);
 251         }
 252       return true;
 253     }
 254   if ((!profile_info || !flag_branch_probabilities)
 255       && (cgraph_get_node (fun->decl)->frequency
 256           == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 257     return true;
 258   return false;
 259 }
 260
 261
 262 /* Return true in case BB is probably never executed.  */
 263
 264 bool
 265 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
 266 {
 267   return probably_never_executed (fun, bb->count, bb->frequency);
 268 }
 269
 270
 271 /* Return true in case edge E is probably never executed.  */
 272
 273 bool
 274 probably_never_executed_edge_p (struct function *fun, edge e)
 275 {
 276   return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
 277 }
 278
 279 /* Return true if NODE should be optimized for size.  */
 280
 281 bool
 282 cgraph_optimize_for_size_p (struct cgraph_node *node)
 283 {
 284   if (optimize_size)
 285     return true;
 286   if (node && (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 287     return true;
 288   else
 289     return false;
 290 }
 291
 292 /* Return true when current function should always be optimized for size.  */
 293
 294 bool
 295 optimize_function_for_size_p (struct function *fun)
 296 {
 297   if (optimize_size)
 298     return true;
 299   if (!fun || !fun->decl)
 300     return false;
 301   return cgraph_optimize_for_size_p (cgraph_get_node (fun->decl));
 302 }
 303
 304 /* Return true when current function should always be optimized for speed.  */
 305
 306 bool
 307 optimize_function_for_speed_p (struct function *fun)
 308 {
 309   return !optimize_function_for_size_p (fun);
 310 }
 311
 312 /* Return TRUE when BB should be optimized for size.  */
 313
 314 bool
 315 optimize_bb_for_size_p (const_basic_block bb)
 316 {
 317   return optimize_function_for_size_p (cfun) || !maybe_hot_bb_p (cfun, bb);
 318 }
 319
 320 /* Return TRUE when BB should be optimized for speed.  */
 321
 322 bool
 323 optimize_bb_for_speed_p (const_basic_block bb)
 324 {
 325   return !optimize_bb_for_size_p (bb);
 326 }
 327
 328 /* Return TRUE when BB should be optimized for size.  */
 329
 330 bool
 331 optimize_edge_for_size_p (edge e)
 332 {
 333   return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
 334 }
 335
 336 /* Return TRUE when BB should be optimized for speed.  */
 337
 338 bool
 339 optimize_edge_for_speed_p (edge e)
 340 {
 341   return !optimize_edge_for_size_p (e);
 342 }
 343
 344 /* Return TRUE when BB should be optimized for size.  */
 345
 346 bool
 347 optimize_insn_for_size_p (void)
 348 {
 349   return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
 350 }
 351
 352 /* Return TRUE when BB should be optimized for speed.  */
 353
 354 bool
 355 optimize_insn_for_speed_p (void)
 356 {
 357   return !optimize_insn_for_size_p ();
 358 }
 359
 360 /* Return TRUE when LOOP should be optimized for size.  */
 361
 362 bool
 363 optimize_loop_for_size_p (struct loop *loop)
 364 {
 365   return optimize_bb_for_size_p (loop->header);
 366 }
 367
 368 /* Return TRUE when LOOP should be optimized for speed.  */
 369
 370 bool
 371 optimize_loop_for_speed_p (struct loop *loop)
 372 {
 373   return optimize_bb_for_speed_p (loop->header);
 374 }
 375
 376 /* Return TRUE when LOOP nest should be optimized for speed.  */
 377
 378 bool
 379 optimize_loop_nest_for_speed_p (struct loop *loop)
 380 {
 381   struct loop *l = loop;
 382   if (optimize_loop_for_speed_p (loop))
 383     return true;
 384   l = loop->inner;
 385   while (l && l != loop)
 386     {
 387       if (optimize_loop_for_speed_p (l))
 388         return true;
 389       if (l->inner)
 390         l = l->inner;
 391       else if (l->next)
 392         l = l->next;
 393       else
 394         {
 395           while (l != loop && !l->next)
 396             l = loop_outer (l);
 397           if (l != loop)
 398             l = l->next;
 399         }
 400     }
 401   return false;
 402 }
 403
 404 /* Return TRUE when LOOP nest should be optimized for size.  */
 405
 406 bool
 407 optimize_loop_nest_for_size_p (struct loop *loop)
 408 {
 409   return !optimize_loop_nest_for_speed_p (loop);
 410 }
 411
 412 /* Return true when edge E is likely to be well predictable by branch
 413    predictor.  */
 414
 415 bool
 416 predictable_edge_p (edge e)
 417 {
 418   if (profile_status == PROFILE_ABSENT)
 419     return false;
 420   if ((e->probability
 421        <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
 422       || (REG_BR_PROB_BASE - e->probability
 423           <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
 424     return true;
 425   return false;
 426 }
 427
 428
 429 /* Set RTL expansion for BB profile.  */
 430
 431 void
 432 rtl_profile_for_bb (basic_block bb)
 433 {
 434   crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
 435 }
 436
 437 /* Set RTL expansion for edge profile.  */
 438
 439 void
 440 rtl_profile_for_edge (edge e)
 441 {
 442   crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
 443 }
 444
 445 /* Set RTL expansion to default mode (i.e. when profile info is not known).  */
 446 void
 447 default_rtl_profile (void)
 448 {
 449   crtl->maybe_hot_insn_p = true;
 450 }
 451
 452 /* Return true if the one of outgoing edges is already predicted by
 453    PREDICTOR.  */
 454
 455 bool
 456 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 457 {
 458   rtx note;
 459   if (!INSN_P (BB_END (bb)))
 460     return false;
 461   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
 462     if (REG_NOTE_KIND (note) == REG_BR_PRED
 463         && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
 464       return true;
 465   return false;
 466 }
 467
 468 /* This map contains for a basic block the list of predictions for the
 469    outgoing edges.  */
 470
 471 static struct pointer_map_t *bb_predictions;
 472
 473 /*  Structure representing predictions in tree level. */
 474
 475 struct edge_prediction {
 476     struct edge_prediction *ep_next;
 477     edge ep_edge;
 478     enum br_predictor ep_predictor;
 479     int ep_probability;
 480 };
 481
 482 /* Return true if the one of outgoing edges is already predicted by
 483    PREDICTOR.  */
 484
 485 bool
 486 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 487 {
 488   struct edge_prediction *i;
 489   void **preds = pointer_map_contains (bb_predictions, bb);
 490
 491   if (!preds)
 492     return false;
 493
 494   for (i = (struct edge_prediction *) *preds; i; i = i->ep_next)
 495     if (i->ep_predictor == predictor)
 496       return true;
 497   return false;
 498 }
 499
 500 /* Return true when the probability of edge is reliable.
 501
 502    The profile guessing code is good at predicting branch outcome (ie.
 503    taken/not taken), that is predicted right slightly over 75% of time.
 504    It is however notoriously poor on predicting the probability itself.
 505    In general the profile appear a lot flatter (with probabilities closer
 506    to 50%) than the reality so it is bad idea to use it to drive optimization
 507    such as those disabling dynamic branch prediction for well predictable
 508    branches.
 509
 510    There are two exceptions - edges leading to noreturn edges and edges
 511    predicted by number of iterations heuristics are predicted well.  This macro
 512    should be able to distinguish those, but at the moment it simply check for
 513    noreturn heuristic that is only one giving probability over 99% or bellow
 514    1%.  In future we might want to propagate reliability information across the
 515    CFG if we find this information useful on multiple places.   */
 516 static bool
 517 probability_reliable_p (int prob)
 518 {
 519   return (profile_status == PROFILE_READ
 520           || (profile_status == PROFILE_GUESSED
 521               && (prob <= HITRATE (1) || prob >= HITRATE (99))));
 522 }
 523
 524 /* Same predicate as above, working on edges.  */
 525 bool
 526 edge_probability_reliable_p (const_edge e)
 527 {
 528   return probability_reliable_p (e->probability);
 529 }
 530
 531 /* Same predicate as edge_probability_reliable_p, working on notes.  */
 532 bool
 533 br_prob_note_reliable_p (const_rtx note)
 534 {
 535   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
 536   return probability_reliable_p (XINT (note, 0));
 537 }
 538
 539 static void
 540 predict_insn (rtx insn, enum br_predictor predictor, int probability)
 541 {
 542   gcc_assert (any_condjump_p (insn));
 543   if (!flag_guess_branch_prob)
 544     return;
 545
 546   add_reg_note (insn, REG_BR_PRED,
 547                 gen_rtx_CONCAT (VOIDmode,
 548                                 GEN_INT ((int) predictor),
 549                                 GEN_INT ((int) probability)));
 550 }
 551
 552 /* Predict insn by given predictor.  */
 553
 554 void
 555 predict_insn_def (rtx insn, enum br_predictor predictor,
 556                   enum prediction taken)
 557 {
 558    int probability = predictor_info[(int) predictor].hitrate;
 559
 560    if (taken != TAKEN)
 561      probability = REG_BR_PROB_BASE - probability;
 562
 563    predict_insn (insn, predictor, probability);
 564 }
 565
 566 /* Predict edge E with given probability if possible.  */
 567
 568 void
 569 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
 570 {
 571   rtx last_insn;
 572   last_insn = BB_END (e->src);
 573
 574   /* We can store the branch prediction information only about
 575      conditional jumps.  */
 576   if (!any_condjump_p (last_insn))
 577     return;
 578
 579   /* We always store probability of branching.  */
 580   if (e->flags & EDGE_FALLTHRU)
 581     probability = REG_BR_PROB_BASE - probability;
 582
 583   predict_insn (last_insn, predictor, probability);
 584 }
 585
 586 /* Predict edge E with the given PROBABILITY.  */
 587 void
 588 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
 589 {
 590   gcc_assert (profile_status != PROFILE_GUESSED);
 591   if ((e->src != ENTRY_BLOCK_PTR && EDGE_COUNT (e->src->succs) > 1)
 592       && flag_guess_branch_prob && optimize)
 593     {
 594       struct edge_prediction *i = XNEW (struct edge_prediction);
 595       void **preds = pointer_map_insert (bb_predictions, e->src);
 596
 597       i->ep_next = (struct edge_prediction *) *preds;
 598       *preds = i;
 599       i->ep_probability = probability;
 600       i->ep_predictor = predictor;
 601       i->ep_edge = e;
 602     }
 603 }
 604
 605 /* Remove all predictions on given basic block that are attached
 606    to edge E.  */
 607 void
 608 remove_predictions_associated_with_edge (edge e)
 609 {
 610   void **preds;
 611
 612   if (!bb_predictions)
 613     return;
 614
 615   preds = pointer_map_contains (bb_predictions, e->src);
 616
 617   if (preds)
 618     {
 619       struct edge_prediction **prediction = (struct edge_prediction **) preds;
 620       struct edge_prediction *next;
 621
 622       while (*prediction)
 623         {
 624           if ((*prediction)->ep_edge == e)
 625             {
 626               next = (*prediction)->ep_next;
 627               free (*prediction);
 628               *prediction = next;
 629             }
 630           else
 631             prediction = &((*prediction)->ep_next);
 632         }
 633     }
 634 }
 635
 636 /* Clears the list of predictions stored for BB.  */
 637
 638 static void
 639 clear_bb_predictions (basic_block bb)
 640 {
 641   void **preds = pointer_map_contains (bb_predictions, bb);
 642   struct edge_prediction *pred, *next;
 643
 644   if (!preds)
 645     return;
 646
 647   for (pred = (struct edge_prediction *) *preds; pred; pred = next)
 648     {
 649       next = pred->ep_next;
 650       free (pred);
 651     }
 652   *preds = NULL;
 653 }
 654
 655 /* Return true when we can store prediction on insn INSN.
 656    At the moment we represent predictions only on conditional
 657    jumps, not at computed jump or other complicated cases.  */
 658 static bool
 659 can_predict_insn_p (const_rtx insn)
 660 {
 661   return (JUMP_P (insn)
 662           && any_condjump_p (insn)
 663           && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
 664 }
 665
 666 /* Predict edge E by given predictor if possible.  */
 667
 668 void
 669 predict_edge_def (edge e, enum br_predictor predictor,
 670                   enum prediction taken)
 671 {
 672    int probability = predictor_info[(int) predictor].hitrate;
 673
 674    if (taken != TAKEN)
 675      probability = REG_BR_PROB_BASE - probability;
 676
 677    predict_edge (e, predictor, probability);
 678 }
 679
 680 /* Invert all branch predictions or probability notes in the INSN.  This needs
 681    to be done each time we invert the condition used by the jump.  */
 682
 683 void
 684 invert_br_probabilities (rtx insn)
 685 {
 686   rtx note;
 687
 688   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 689     if (REG_NOTE_KIND (note) == REG_BR_PROB)
 690       XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
 691     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
 692       XEXP (XEXP (note, 0), 1)
 693         = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
 694 }
 695
 696 /* Dump information about the branch prediction to the output file.  */
 697
 698 static void
 699 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
 700                  basic_block bb, int used)
 701 {
 702   edge e;
 703   edge_iterator ei;
 704
 705   if (!file)
 706     return;
 707
 708   FOR_EACH_EDGE (e, ei, bb->succs)
 709     if (! (e->flags & EDGE_FALLTHRU))
 710       break;
 711
 712   fprintf (file, "  %s heuristics%s: %.1f%%",
 713            predictor_info[predictor].name,
 714            used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
 715
 716   if (bb->count)
 717     {
 718       fprintf (file, "  exec ");
 719       fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
 720       if (e)
 721         {
 722           fprintf (file, " hit ");
 723           fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count);
 724           fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
 725         }
 726     }
 727
 728   fprintf (file, "\n");
 729 }
 730
 731 /* We can not predict the probabilities of outgoing edges of bb.  Set them
 732    evenly and hope for the best.  */
 733 static void
 734 set_even_probabilities (basic_block bb)
 735 {
 736   int nedges = 0;
 737   edge e;
 738   edge_iterator ei;
 739
 740   FOR_EACH_EDGE (e, ei, bb->succs)
 741     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 742       nedges ++;
 743   FOR_EACH_EDGE (e, ei, bb->succs)
 744     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 745       e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
 746     else
 747       e->probability = 0;
 748 }
 749
 750 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
 751    note if not already present.  Remove now useless REG_BR_PRED notes.  */
 752
 753 static void
 754 combine_predictions_for_insn (rtx insn, basic_block bb)
 755 {
 756   rtx prob_note;
 757   rtx *pnote;
 758   rtx note;
 759   int best_probability = PROB_EVEN;
 760   enum br_predictor best_predictor = END_PREDICTORS;
 761   int combined_probability = REG_BR_PROB_BASE / 2;
 762   int d;
 763   bool first_match = false;
 764   bool found = false;
 765
 766   if (!can_predict_insn_p (insn))
 767     {
 768       set_even_probabilities (bb);
 769       return;
 770     }
 771
 772   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
 773   pnote = &REG_NOTES (insn);
 774   if (dump_file)
 775     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
 776              bb->index);
 777
 778   /* We implement "first match" heuristics and use probability guessed
 779      by predictor with smallest index.  */
 780   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 781     if (REG_NOTE_KIND (note) == REG_BR_PRED)
 782       {
 783         enum br_predictor predictor = ((enum br_predictor)
 784                                        INTVAL (XEXP (XEXP (note, 0), 0)));
 785         int probability = INTVAL (XEXP (XEXP (note, 0), 1));
 786
 787         found = true;
 788         if (best_predictor > predictor)
 789           best_probability = probability, best_predictor = predictor;
 790
 791         d = (combined_probability * probability
 792              + (REG_BR_PROB_BASE - combined_probability)
 793              * (REG_BR_PROB_BASE - probability));
 794
 795         /* Use FP math to avoid overflows of 32bit integers.  */
 796         if (d == 0)
 797           /* If one probability is 0% and one 100%, avoid division by zero.  */
 798           combined_probability = REG_BR_PROB_BASE / 2;
 799         else
 800           combined_probability = (((double) combined_probability) * probability
 801                                   * REG_BR_PROB_BASE / d + 0.5);
 802       }
 803
 804   /* Decide which heuristic to use.  In case we didn't match anything,
 805      use no_prediction heuristic, in case we did match, use either
 806      first match or Dempster-Shaffer theory depending on the flags.  */
 807
 808   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 809     first_match = true;
 810
 811   if (!found)
 812     dump_prediction (dump_file, PRED_NO_PREDICTION,
 813                      combined_probability, bb, true);
 814   else
 815     {
 816       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
 817                        bb, !first_match);
 818       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
 819                        bb, first_match);
 820     }
 821
 822   if (first_match)
 823     combined_probability = best_probability;
 824   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 825
 826   while (*pnote)
 827     {
 828       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
 829         {
 830           enum br_predictor predictor = ((enum br_predictor)
 831                                          INTVAL (XEXP (XEXP (*pnote, 0), 0)));
 832           int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
 833
 834           dump_prediction (dump_file, predictor, probability, bb,
 835                            !first_match || best_predictor == predictor);
 836           *pnote = XEXP (*pnote, 1);
 837         }
 838       else
 839         pnote = &XEXP (*pnote, 1);
 840     }
 841
 842   if (!prob_note)
 843     {
 844       add_int_reg_note (insn, REG_BR_PROB, combined_probability);
 845
 846       /* Save the prediction into CFG in case we are seeing non-degenerated
 847          conditional jump.  */
 848       if (!single_succ_p (bb))
 849         {
 850           BRANCH_EDGE (bb)->probability = combined_probability;
 851           FALLTHRU_EDGE (bb)->probability
 852             = REG_BR_PROB_BASE - combined_probability;
 853         }
 854     }
 855   else if (!single_succ_p (bb))
 856     {
 857       int prob = XINT (prob_note, 0);
 858
 859       BRANCH_EDGE (bb)->probability = prob;
 860       FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
 861     }
 862   else
 863     single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
 864 }
 865
 866 /* Combine predictions into single probability and store them into CFG.
 867    Remove now useless prediction entries.  */
 868
 869 static void
 870 combine_predictions_for_bb (basic_block bb)
 871 {
 872   int best_probability = PROB_EVEN;
 873   enum br_predictor best_predictor = END_PREDICTORS;
 874   int combined_probability = REG_BR_PROB_BASE / 2;
 875   int d;
 876   bool first_match = false;
 877   bool found = false;
 878   struct edge_prediction *pred;
 879   int nedges = 0;
 880   edge e, first = NULL, second = NULL;
 881   edge_iterator ei;
 882   void **preds;
 883
 884   FOR_EACH_EDGE (e, ei, bb->succs)
 885     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 886       {
 887         nedges ++;
 888         if (first && !second)
 889           second = e;
 890         if (!first)
 891           first = e;
 892       }
 893
 894   /* When there is no successor or only one choice, prediction is easy.
 895
 896      We are lazy for now and predict only basic blocks with two outgoing
 897      edges.  It is possible to predict generic case too, but we have to
 898      ignore first match heuristics and do more involved combining.  Implement
 899      this later.  */
 900   if (nedges != 2)
 901     {
 902       if (!bb->count)
 903         set_even_probabilities (bb);
 904       clear_bb_predictions (bb);
 905       if (dump_file)
 906         fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
 907                  nedges, bb->index);
 908       return;
 909     }
 910
 911   if (dump_file)
 912     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
 913
 914   preds = pointer_map_contains (bb_predictions, bb);
 915   if (preds)
 916     {
 917       /* We implement "first match" heuristics and use probability guessed
 918          by predictor with smallest index.  */
 919       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
 920         {
 921           enum br_predictor predictor = pred->ep_predictor;
 922           int probability = pred->ep_probability;
 923
 924           if (pred->ep_edge != first)
 925             probability = REG_BR_PROB_BASE - probability;
 926
 927           found = true;
 928           /* First match heuristics would be widly confused if we predicted
 929              both directions.  */
 930           if (best_predictor > predictor)
 931             {
 932               struct edge_prediction *pred2;
 933               int prob = probability;
 934
 935               for (pred2 = (struct edge_prediction *) *preds; pred2; pred2 = pred2->ep_next)
 936                if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
 937                  {
 938                    int probability2 = pred->ep_probability;
 939
 940                    if (pred2->ep_edge != first)
 941                      probability2 = REG_BR_PROB_BASE - probability2;
 942
 943                    if ((probability < REG_BR_PROB_BASE / 2) !=
 944                        (probability2 < REG_BR_PROB_BASE / 2))
 945                      break;
 946
 947                    /* If the same predictor later gave better result, go for it! */
 948                    if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
 949                        || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
 950                      prob = probability2;
 951                  }
 952               if (!pred2)
 953                 best_probability = prob, best_predictor = predictor;
 954             }
 955
 956           d = (combined_probability * probability
 957                + (REG_BR_PROB_BASE - combined_probability)
 958                * (REG_BR_PROB_BASE - probability));
 959
 960           /* Use FP math to avoid overflows of 32bit integers.  */
 961           if (d == 0)
 962             /* If one probability is 0% and one 100%, avoid division by zero.  */
 963             combined_probability = REG_BR_PROB_BASE / 2;
 964           else
 965             combined_probability = (((double) combined_probability)
 966                                     * probability
 967                                     * REG_BR_PROB_BASE / d + 0.5);
 968         }
 969     }
 970
 971   /* Decide which heuristic to use.  In case we didn't match anything,
 972      use no_prediction heuristic, in case we did match, use either
 973      first match or Dempster-Shaffer theory depending on the flags.  */
 974
 975   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 976     first_match = true;
 977
 978   if (!found)
 979     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
 980   else
 981     {
 982       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
 983                        !first_match);
 984       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
 985                        first_match);
 986     }
 987
 988   if (first_match)
 989     combined_probability = best_probability;
 990   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 991
 992   if (preds)
 993     {
 994       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
 995         {
 996           enum br_predictor predictor = pred->ep_predictor;
 997           int probability = pred->ep_probability;
 998
 999           if (pred->ep_edge != EDGE_SUCC (bb, 0))
1000             probability = REG_BR_PROB_BASE - probability;
1001           dump_prediction (dump_file, predictor, probability, bb,
1002                            !first_match || best_predictor == predictor);
1003         }
1004     }
1005   clear_bb_predictions (bb);
1006
1007   if (!bb->count)
1008     {
1009       first->probability = combined_probability;
1010       second->probability = REG_BR_PROB_BASE - combined_probability;
1011     }
1012 }
1013
1014 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1015    Return the SSA_NAME if the condition satisfies, NULL otherwise.
1016
1017    T1 and T2 should be one of the following cases:
1018      1. T1 is SSA_NAME, T2 is NULL
1019      2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1020      3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1021
1022 static tree
1023 strips_small_constant (tree t1, tree t2)
1024 {
1025   tree ret = NULL;
1026   int value = 0;
1027
1028   if (!t1)
1029     return NULL;
1030   else if (TREE_CODE (t1) == SSA_NAME)
1031     ret = t1;
1032   else if (host_integerp (t1, 0))
1033     value = tree_low_cst (t1, 0);
1034   else
1035     return NULL;
1036
1037   if (!t2)
1038     return ret;
1039   else if (host_integerp (t2, 0))
1040     value = tree_low_cst (t2, 0);
1041   else if (TREE_CODE (t2) == SSA_NAME)
1042     {
1043       if (ret)
1044         return NULL;
1045       else
1046         ret = t2;
1047     }
1048
1049   if (value <= 4 && value >= -4)
1050     return ret;
1051   else
1052     return NULL;
1053 }
1054
1055 /* Return the SSA_NAME in T or T's operands.
1056    Return NULL if SSA_NAME cannot be found.  */
1057
1058 static tree
1059 get_base_value (tree t)
1060 {
1061   if (TREE_CODE (t) == SSA_NAME)
1062     return t;
1063
1064   if (!BINARY_CLASS_P (t))
1065     return NULL;
1066
1067   switch (TREE_OPERAND_LENGTH (t))
1068     {
1069     case 1:
1070       return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1071     case 2:
1072       return strips_small_constant (TREE_OPERAND (t, 0),
1073                                     TREE_OPERAND (t, 1));
1074     default:
1075       return NULL;
1076     }
1077 }
1078
1079 /* Check the compare STMT in LOOP. If it compares an induction
1080    variable to a loop invariant, return true, and save
1081    LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1082    Otherwise return false and set LOOP_INVAIANT to NULL.  */
1083
1084 static bool
1085 is_comparison_with_loop_invariant_p (gimple stmt, struct loop *loop,
1086                                      tree *loop_invariant,
1087                                      enum tree_code *compare_code,
1088                                      tree *loop_step,
1089                                      tree *loop_iv_base)
1090 {
1091   tree op0, op1, bound, base;
1092   affine_iv iv0, iv1;
1093   enum tree_code code;
1094   tree step;
1095
1096   code = gimple_cond_code (stmt);
1097   *loop_invariant = NULL;
1098
1099   switch (code)
1100     {
1101     case GT_EXPR:
1102     case GE_EXPR:
1103     case NE_EXPR:
1104     case LT_EXPR:
1105     case LE_EXPR:
1106     case EQ_EXPR:
1107       break;
1108
1109     default:
1110       return false;
1111     }
1112
1113   op0 = gimple_cond_lhs (stmt);
1114   op1 = gimple_cond_rhs (stmt);
1115
1116   if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1117        || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1118     return false;
1119   if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1120     return false;
1121   if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1122     return false;
1123   if (TREE_CODE (iv0.step) != INTEGER_CST
1124       || TREE_CODE (iv1.step) != INTEGER_CST)
1125     return false;
1126   if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1127       || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1128     return false;
1129
1130   if (integer_zerop (iv0.step))
1131     {
1132       if (code != NE_EXPR && code != EQ_EXPR)
1133         code = invert_tree_comparison (code, false);
1134       bound = iv0.base;
1135       base = iv1.base;
1136       if (host_integerp (iv1.step, 0))
1137         step = iv1.step;
1138       else
1139         return false;
1140     }
1141   else
1142     {
1143       bound = iv1.base;
1144       base = iv0.base;
1145       if (host_integerp (iv0.step, 0))
1146         step = iv0.step;
1147       else
1148         return false;
1149     }
1150
1151   if (TREE_CODE (bound) != INTEGER_CST)
1152     bound = get_base_value (bound);
1153   if (!bound)
1154     return false;
1155   if (TREE_CODE (base) != INTEGER_CST)
1156     base = get_base_value (base);
1157   if (!base)
1158     return false;
1159
1160   *loop_invariant = bound;
1161   *compare_code = code;
1162   *loop_step = step;
1163   *loop_iv_base = base;
1164   return true;
1165 }
1166
1167 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1168
1169 static bool
1170 expr_coherent_p (tree t1, tree t2)
1171 {
1172   gimple stmt;
1173   tree ssa_name_1 = NULL;
1174   tree ssa_name_2 = NULL;
1175
1176   gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1177   gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1178
1179   if (t1 == t2)
1180     return true;
1181
1182   if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1183     return true;
1184   if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1185     return false;
1186
1187   /* Check to see if t1 is expressed/defined with t2.  */
1188   stmt = SSA_NAME_DEF_STMT (t1);
1189   gcc_assert (stmt != NULL);
1190   if (is_gimple_assign (stmt))
1191     {
1192       ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1193       if (ssa_name_1 && ssa_name_1 == t2)
1194         return true;
1195     }
1196
1197   /* Check to see if t2 is expressed/defined with t1.  */
1198   stmt = SSA_NAME_DEF_STMT (t2);
1199   gcc_assert (stmt != NULL);
1200   if (is_gimple_assign (stmt))
1201     {
1202       ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1203       if (ssa_name_2 && ssa_name_2 == t1)
1204         return true;
1205     }
1206
1207   /* Compare if t1 and t2's def_stmts are identical.  */
1208   if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1209     return true;
1210   else
1211     return false;
1212 }
1213
1214 /* Predict branch probability of BB when BB contains a branch that compares
1215    an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1216    loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1217
1218    E.g.
1219      for (int i = 0; i < bound; i++) {
1220        if (i < bound - 2)
1221          computation_1();
1222        else
1223          computation_2();
1224      }
1225
1226   In this loop, we will predict the branch inside the loop to be taken.  */
1227
1228 static void
1229 predict_iv_comparison (struct loop *loop, basic_block bb,
1230                        tree loop_bound_var,
1231                        tree loop_iv_base_var,
1232                        enum tree_code loop_bound_code,
1233                        int loop_bound_step)
1234 {
1235   gimple stmt;
1236   tree compare_var, compare_base;
1237   enum tree_code compare_code;
1238   tree compare_step_var;
1239   edge then_edge;
1240   edge_iterator ei;
1241
1242   if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1243       || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1244       || predicted_by_p (bb, PRED_LOOP_EXIT))
1245     return;
1246
1247   stmt = last_stmt (bb);
1248   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1249     return;
1250   if (!is_comparison_with_loop_invariant_p (stmt, loop, &compare_var,
1251                                             &compare_code,
1252                                             &compare_step_var,
1253                                             &compare_base))
1254     return;
1255
1256   /* Find the taken edge.  */
1257   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1258     if (then_edge->flags & EDGE_TRUE_VALUE)
1259       break;
1260
1261   /* When comparing an IV to a loop invariant, NE is more likely to be
1262      taken while EQ is more likely to be not-taken.  */
1263   if (compare_code == NE_EXPR)
1264     {
1265       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1266       return;
1267     }
1268   else if (compare_code == EQ_EXPR)
1269     {
1270       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1271       return;
1272     }
1273
1274   if (!expr_coherent_p (loop_iv_base_var, compare_base))
1275     return;
1276
1277   /* If loop bound, base and compare bound are all constants, we can
1278      calculate the probability directly.  */
1279   if (host_integerp (loop_bound_var, 0)
1280       && host_integerp (compare_var, 0)
1281       && host_integerp (compare_base, 0))
1282     {
1283       int probability;
1284       bool of, overflow = false;
1285       double_int mod, compare_count, tem, loop_count;
1286
1287       double_int loop_bound = tree_to_double_int (loop_bound_var);
1288       double_int compare_bound = tree_to_double_int (compare_var);
1289       double_int base = tree_to_double_int (compare_base);
1290       double_int compare_step = tree_to_double_int (compare_step_var);
1291
1292       /* (loop_bound - base) / compare_step */
1293       tem = loop_bound.sub_with_overflow (base, &of);
1294       overflow |= of;
1295       loop_count = tem.divmod_with_overflow (compare_step,
1296                                               0, TRUNC_DIV_EXPR,
1297                                               &mod, &of);
1298       overflow |= of;
1299
1300       if ((!compare_step.is_negative ())
1301           ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1302         {
1303           /* (loop_bound - compare_bound) / compare_step */
1304           tem = loop_bound.sub_with_overflow (compare_bound, &of);
1305           overflow |= of;
1306           compare_count = tem.divmod_with_overflow (compare_step,
1307                                                      0, TRUNC_DIV_EXPR,
1308                                                      &mod, &of);
1309           overflow |= of;
1310         }
1311       else
1312         {
1313           /* (compare_bound - base) / compare_step */
1314           tem = compare_bound.sub_with_overflow (base, &of);
1315           overflow |= of;
1316           compare_count = tem.divmod_with_overflow (compare_step,
1317                                                      0, TRUNC_DIV_EXPR,
1318                                                      &mod, &of);
1319           overflow |= of;
1320         }
1321       if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1322         ++compare_count;
1323       if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1324         ++loop_count;
1325       if (compare_count.is_negative ())
1326         compare_count = double_int_zero;
1327       if (loop_count.is_negative ())
1328         loop_count = double_int_zero;
1329       if (loop_count.is_zero ())
1330         probability = 0;
1331       else if (compare_count.scmp (loop_count) == 1)
1332         probability = REG_BR_PROB_BASE;
1333       else
1334         {
1335           /* If loop_count is too big, such that REG_BR_PROB_BASE * loop_count
1336              could overflow, shift both loop_count and compare_count right
1337              a bit so that it doesn't overflow.  Note both counts are known not
1338              to be negative at this point.  */
1339           int clz_bits = clz_hwi (loop_count.high);
1340           gcc_assert (REG_BR_PROB_BASE < 32768);
1341           if (clz_bits < 16)
1342             {
1343               loop_count.arshift (16 - clz_bits, HOST_BITS_PER_DOUBLE_INT);
1344               compare_count.arshift (16 - clz_bits, HOST_BITS_PER_DOUBLE_INT);
1345             }
1346           tem = compare_count.mul_with_sign (double_int::from_shwi
1347                                             (REG_BR_PROB_BASE), true, &of);
1348           gcc_assert (!of);
1349           tem = tem.divmod (loop_count, true, TRUNC_DIV_EXPR, &mod);
1350           probability = tem.to_uhwi ();
1351         }
1352
1353       if (!overflow)
1354         predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1355
1356       return;
1357     }
1358
1359   if (expr_coherent_p (loop_bound_var, compare_var))
1360     {
1361       if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1362           && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1363         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1364       else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1365                && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1366         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1367       else if (loop_bound_code == NE_EXPR)
1368         {
1369           /* If the loop backedge condition is "(i != bound)", we do
1370              the comparison based on the step of IV:
1371              * step < 0 : backedge condition is like (i > bound)
1372              * step > 0 : backedge condition is like (i < bound)  */
1373           gcc_assert (loop_bound_step != 0);
1374           if (loop_bound_step > 0
1375               && (compare_code == LT_EXPR
1376                   || compare_code == LE_EXPR))
1377             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1378           else if (loop_bound_step < 0
1379                    && (compare_code == GT_EXPR
1380                        || compare_code == GE_EXPR))
1381             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1382           else
1383             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1384         }
1385       else
1386         /* The branch is predicted not-taken if loop_bound_code is
1387            opposite with compare_code.  */
1388         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1389     }
1390   else if (expr_coherent_p (loop_iv_base_var, compare_var))
1391     {
1392       /* For cases like:
1393            for (i = s; i < h; i++)
1394              if (i > s + 2) ....
1395          The branch should be predicted taken.  */
1396       if (loop_bound_step > 0
1397           && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1398         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1399       else if (loop_bound_step < 0
1400                && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1401         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1402       else
1403         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1404     }
1405 }
1406
1407 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1408    exits are resulted from short-circuit conditions that will generate an
1409    if_tmp. E.g.:
1410
1411    if (foo() || global > 10)
1412      break;
1413
1414    This will be translated into:
1415
1416    BB3:
1417      loop header...
1418    BB4:
1419      if foo() goto BB6 else goto BB5
1420    BB5:
1421      if global > 10 goto BB6 else goto BB7
1422    BB6:
1423      goto BB7
1424    BB7:
1425      iftmp = (PHI 0(BB5), 1(BB6))
1426      if iftmp == 1 goto BB8 else goto BB3
1427    BB8:
1428      outside of the loop...
1429
1430    The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1431    From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1432    exits. This function takes BB7->BB8 as input, and finds out the extra loop
1433    exits to predict them using PRED_LOOP_EXIT.  */
1434
1435 static void
1436 predict_extra_loop_exits (edge exit_edge)
1437 {
1438   unsigned i;
1439   bool check_value_one;
1440   gimple phi_stmt;
1441   tree cmp_rhs, cmp_lhs;
1442   gimple cmp_stmt = last_stmt (exit_edge->src);
1443
1444   if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1445     return;
1446   cmp_rhs = gimple_cond_rhs (cmp_stmt);
1447   cmp_lhs = gimple_cond_lhs (cmp_stmt);
1448   if (!TREE_CONSTANT (cmp_rhs)
1449       || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1450     return;
1451   if (TREE_CODE (cmp_lhs) != SSA_NAME)
1452     return;
1453
1454   /* If check_value_one is true, only the phi_args with value '1' will lead
1455      to loop exit. Otherwise, only the phi_args with value '0' will lead to
1456      loop exit.  */
1457   check_value_one = (((integer_onep (cmp_rhs))
1458                     ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1459                     ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1460
1461   phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1462   if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
1463     return;
1464
1465   for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1466     {
1467       edge e1;
1468       edge_iterator ei;
1469       tree val = gimple_phi_arg_def (phi_stmt, i);
1470       edge e = gimple_phi_arg_edge (phi_stmt, i);
1471
1472       if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1473         continue;
1474       if ((check_value_one ^ integer_onep (val)) == 1)
1475         continue;
1476       if (EDGE_COUNT (e->src->succs) != 1)
1477         {
1478           predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1479           continue;
1480         }
1481
1482       FOR_EACH_EDGE (e1, ei, e->src->preds)
1483         predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1484     }
1485 }
1486
1487 /* Predict edge probabilities by exploiting loop structure.  */
1488
1489 static void
1490 predict_loops (void)
1491 {
1492   loop_iterator li;
1493   struct loop *loop;
1494
1495   /* Try to predict out blocks in a loop that are not part of a
1496      natural loop.  */
1497   FOR_EACH_LOOP (li, loop, 0)
1498     {
1499       basic_block bb, *bbs;
1500       unsigned j, n_exits;
1501       vec<edge> exits;
1502       struct tree_niter_desc niter_desc;
1503       edge ex;
1504       struct nb_iter_bound *nb_iter;
1505       enum tree_code loop_bound_code = ERROR_MARK;
1506       tree loop_bound_step = NULL;
1507       tree loop_bound_var = NULL;
1508       tree loop_iv_base = NULL;
1509       gimple stmt = NULL;
1510
1511       exits = get_loop_exit_edges (loop);
1512       n_exits = exits.length ();
1513       if (!n_exits)
1514         {
1515           exits.release ();
1516           continue;
1517         }
1518
1519       FOR_EACH_VEC_ELT (exits, j, ex)
1520         {
1521           tree niter = NULL;
1522           HOST_WIDE_INT nitercst;
1523           int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1524           int probability;
1525           enum br_predictor predictor;
1526
1527           predict_extra_loop_exits (ex);
1528
1529           if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1530             niter = niter_desc.niter;
1531           if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1532             niter = loop_niter_by_eval (loop, ex);
1533
1534           if (TREE_CODE (niter) == INTEGER_CST)
1535             {
1536               if (host_integerp (niter, 1)
1537                   && max
1538                   && compare_tree_int (niter, max - 1) == -1)
1539                 nitercst = tree_low_cst (niter, 1) + 1;
1540               else
1541                 nitercst = max;
1542               predictor = PRED_LOOP_ITERATIONS;
1543             }
1544           /* If we have just one exit and we can derive some information about
1545              the number of iterations of the loop from the statements inside
1546              the loop, use it to predict this exit.  */
1547           else if (n_exits == 1)
1548             {
1549               nitercst = estimated_stmt_executions_int (loop);
1550               if (nitercst < 0)
1551                 continue;
1552               if (nitercst > max)
1553                 nitercst = max;
1554
1555               predictor = PRED_LOOP_ITERATIONS_GUESSED;
1556             }
1557           else
1558             continue;
1559
1560           /* If the prediction for number of iterations is zero, do not
1561              predict the exit edges.  */
1562           if (nitercst == 0)
1563             continue;
1564
1565           probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1566           predict_edge (ex, predictor, probability);
1567         }
1568       exits.release ();
1569
1570       /* Find information about loop bound variables.  */
1571       for (nb_iter = loop->bounds; nb_iter;
1572            nb_iter = nb_iter->next)
1573         if (nb_iter->stmt
1574             && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1575           {
1576             stmt = nb_iter->stmt;
1577             break;
1578           }
1579       if (!stmt && last_stmt (loop->header)
1580           && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1581         stmt = last_stmt (loop->header);
1582       if (stmt)
1583         is_comparison_with_loop_invariant_p (stmt, loop,
1584                                              &loop_bound_var,
1585                                              &loop_bound_code,
1586                                              &loop_bound_step,
1587                                              &loop_iv_base);
1588
1589       bbs = get_loop_body (loop);
1590
1591       for (j = 0; j < loop->num_nodes; j++)
1592         {
1593           int header_found = 0;
1594           edge e;
1595           edge_iterator ei;
1596
1597           bb = bbs[j];
1598
1599           /* Bypass loop heuristics on continue statement.  These
1600              statements construct loops via "non-loop" constructs
1601              in the source language and are better to be handled
1602              separately.  */
1603           if (predicted_by_p (bb, PRED_CONTINUE))
1604             continue;
1605
1606           /* Loop branch heuristics - predict an edge back to a
1607              loop's head as taken.  */
1608           if (bb == loop->latch)
1609             {
1610               e = find_edge (loop->latch, loop->header);
1611               if (e)
1612                 {
1613                   header_found = 1;
1614                   predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1615                 }
1616             }
1617
1618           /* Loop exit heuristics - predict an edge exiting the loop if the
1619              conditional has no loop header successors as not taken.  */
1620           if (!header_found
1621               /* If we already used more reliable loop exit predictors, do not
1622                  bother with PRED_LOOP_EXIT.  */
1623               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1624               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1625             {
1626               /* For loop with many exits we don't want to predict all exits
1627                  with the pretty large probability, because if all exits are
1628                  considered in row, the loop would be predicted to iterate
1629                  almost never.  The code to divide probability by number of
1630                  exits is very rough.  It should compute the number of exits
1631                  taken in each patch through function (not the overall number
1632                  of exits that might be a lot higher for loops with wide switch
1633                  statements in them) and compute n-th square root.
1634
1635                  We limit the minimal probability by 2% to avoid
1636                  EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1637                  as this was causing regression in perl benchmark containing such
1638                  a wide loop.  */
1639
1640               int probability = ((REG_BR_PROB_BASE
1641                                   - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1642                                  / n_exits);
1643               if (probability < HITRATE (2))
1644                 probability = HITRATE (2);
1645               FOR_EACH_EDGE (e, ei, bb->succs)
1646                 if (e->dest->index < NUM_FIXED_BLOCKS
1647                     || !flow_bb_inside_loop_p (loop, e->dest))
1648                   predict_edge (e, PRED_LOOP_EXIT, probability);
1649             }
1650           if (loop_bound_var)
1651             predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1652                                    loop_bound_code,
1653                                    tree_low_cst (loop_bound_step, 0));
1654         }
1655
1656       /* Free basic blocks from get_loop_body.  */
1657       free (bbs);
1658     }
1659 }
1660
1661 /* Attempt to predict probabilities of BB outgoing edges using local
1662    properties.  */
1663 static void
1664 bb_estimate_probability_locally (basic_block bb)
1665 {
1666   rtx last_insn = BB_END (bb);
1667   rtx cond;
1668
1669   if (! can_predict_insn_p (last_insn))
1670     return;
1671   cond = get_condition (last_insn, NULL, false, false);
1672   if (! cond)
1673     return;
1674
1675   /* Try "pointer heuristic."
1676      A comparison ptr == 0 is predicted as false.
1677      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1678   if (COMPARISON_P (cond)
1679       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1680           || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1681     {
1682       if (GET_CODE (cond) == EQ)
1683         predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1684       else if (GET_CODE (cond) == NE)
1685         predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1686     }
1687   else
1688
1689   /* Try "opcode heuristic."
1690      EQ tests are usually false and NE tests are usually true. Also,
1691      most quantities are positive, so we can make the appropriate guesses
1692      about signed comparisons against zero.  */
1693     switch (GET_CODE (cond))
1694       {
1695       case CONST_INT:
1696         /* Unconditional branch.  */
1697         predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1698                           cond == const0_rtx ? NOT_TAKEN : TAKEN);
1699         break;
1700
1701       case EQ:
1702       case UNEQ:
1703         /* Floating point comparisons appears to behave in a very
1704            unpredictable way because of special role of = tests in
1705            FP code.  */
1706         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1707           ;
1708         /* Comparisons with 0 are often used for booleans and there is
1709            nothing useful to predict about them.  */
1710         else if (XEXP (cond, 1) == const0_rtx
1711                  || XEXP (cond, 0) == const0_rtx)
1712           ;
1713         else
1714           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1715         break;
1716
1717       case NE:
1718       case LTGT:
1719         /* Floating point comparisons appears to behave in a very
1720            unpredictable way because of special role of = tests in
1721            FP code.  */
1722         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1723           ;
1724         /* Comparisons with 0 are often used for booleans and there is
1725            nothing useful to predict about them.  */
1726         else if (XEXP (cond, 1) == const0_rtx
1727                  || XEXP (cond, 0) == const0_rtx)
1728           ;
1729         else
1730           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1731         break;
1732
1733       case ORDERED:
1734         predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1735         break;
1736
1737       case UNORDERED:
1738         predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1739         break;
1740
1741       case LE:
1742       case LT:
1743         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1744             || XEXP (cond, 1) == constm1_rtx)
1745           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1746         break;
1747
1748       case GE:
1749       case GT:
1750         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1751             || XEXP (cond, 1) == constm1_rtx)
1752           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1753         break;
1754
1755       default:
1756         break;
1757       }
1758 }
1759
1760 /* Set edge->probability for each successor edge of BB.  */
1761 void
1762 guess_outgoing_edge_probabilities (basic_block bb)
1763 {
1764   bb_estimate_probability_locally (bb);
1765   combine_predictions_for_insn (BB_END (bb), bb);
1766 }
1767 \f
1768 static tree expr_expected_value (tree, bitmap);
1769
1770 /* Helper function for expr_expected_value.  */
1771
1772 static tree
1773 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1774                        tree op1, bitmap visited)
1775 {
1776   gimple def;
1777
1778   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1779     {
1780       if (TREE_CONSTANT (op0))
1781         return op0;
1782
1783       if (code != SSA_NAME)
1784         return NULL_TREE;
1785
1786       def = SSA_NAME_DEF_STMT (op0);
1787
1788       /* If we were already here, break the infinite cycle.  */
1789       if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1790         return NULL;
1791
1792       if (gimple_code (def) == GIMPLE_PHI)
1793         {
1794           /* All the arguments of the PHI node must have the same constant
1795              length.  */
1796           int i, n = gimple_phi_num_args (def);
1797           tree val = NULL, new_val;
1798
1799           for (i = 0; i < n; i++)
1800             {
1801               tree arg = PHI_ARG_DEF (def, i);
1802
1803               /* If this PHI has itself as an argument, we cannot
1804                  determine the string length of this argument.  However,
1805                  if we can find an expected constant value for the other
1806                  PHI args then we can still be sure that this is
1807                  likely a constant.  So be optimistic and just
1808                  continue with the next argument.  */
1809               if (arg == PHI_RESULT (def))
1810                 continue;
1811
1812               new_val = expr_expected_value (arg, visited);
1813               if (!new_val)
1814                 return NULL;
1815               if (!val)
1816                 val = new_val;
1817               else if (!operand_equal_p (val, new_val, false))
1818                 return NULL;
1819             }
1820           return val;
1821         }
1822       if (is_gimple_assign (def))
1823         {
1824           if (gimple_assign_lhs (def) != op0)
1825             return NULL;
1826
1827           return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1828                                         gimple_assign_rhs1 (def),
1829                                         gimple_assign_rhs_code (def),
1830                                         gimple_assign_rhs2 (def),
1831                                         visited);
1832         }
1833
1834       if (is_gimple_call (def))
1835         {
1836           tree decl = gimple_call_fndecl (def);
1837           if (!decl)
1838             return NULL;
1839           if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1840             switch (DECL_FUNCTION_CODE (decl))
1841               {
1842               case BUILT_IN_EXPECT:
1843                 {
1844                   tree val;
1845                   if (gimple_call_num_args (def) != 2)
1846                     return NULL;
1847                   val = gimple_call_arg (def, 0);
1848                   if (TREE_CONSTANT (val))
1849                     return val;
1850                   return gimple_call_arg (def, 1);
1851                 }
1852
1853               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1854               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1855               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1856               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1857               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1858               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1859               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1860               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1861               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1862               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1863               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1864               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1865               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1866                 /* Assume that any given atomic operation has low contention,
1867                    and thus the compare-and-swap operation succeeds.  */
1868                 return boolean_true_node;
1869             }
1870         }
1871
1872       return NULL;
1873     }
1874
1875   if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1876     {
1877       tree res;
1878       op0 = expr_expected_value (op0, visited);
1879       if (!op0)
1880         return NULL;
1881       op1 = expr_expected_value (op1, visited);
1882       if (!op1)
1883         return NULL;
1884       res = fold_build2 (code, type, op0, op1);
1885       if (TREE_CONSTANT (res))
1886         return res;
1887       return NULL;
1888     }
1889   if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1890     {
1891       tree res;
1892       op0 = expr_expected_value (op0, visited);
1893       if (!op0)
1894         return NULL;
1895       res = fold_build1 (code, type, op0);
1896       if (TREE_CONSTANT (res))
1897         return res;
1898       return NULL;
1899     }
1900   return NULL;
1901 }
1902
1903 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1904    The function is used by builtin_expect branch predictor so the evidence
1905    must come from this construct and additional possible constant folding.
1906
1907    We may want to implement more involved value guess (such as value range
1908    propagation based prediction), but such tricks shall go to new
1909    implementation.  */
1910
1911 static tree
1912 expr_expected_value (tree expr, bitmap visited)
1913 {
1914   enum tree_code code;
1915   tree op0, op1;
1916
1917   if (TREE_CONSTANT (expr))
1918     return expr;
1919
1920   extract_ops_from_tree (expr, &code, &op0, &op1);
1921   return expr_expected_value_1 (TREE_TYPE (expr),
1922                                 op0, code, op1, visited);
1923 }
1924
1925 \f
1926 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
1927    we no longer need.  */
1928 static unsigned int
1929 strip_predict_hints (void)
1930 {
1931   basic_block bb;
1932   gimple ass_stmt;
1933   tree var;
1934
1935   FOR_EACH_BB (bb)
1936     {
1937       gimple_stmt_iterator bi;
1938       for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
1939         {
1940           gimple stmt = gsi_stmt (bi);
1941
1942           if (gimple_code (stmt) == GIMPLE_PREDICT)
1943             {
1944               gsi_remove (&bi, true);
1945               continue;
1946             }
1947           else if (gimple_code (stmt) == GIMPLE_CALL)
1948             {
1949               tree fndecl = gimple_call_fndecl (stmt);
1950
1951               if (fndecl
1952                   && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
1953                   && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
1954                   && gimple_call_num_args (stmt) == 2)
1955                 {
1956                   var = gimple_call_lhs (stmt);
1957                   if (var)
1958                     {
1959                       ass_stmt
1960                         = gimple_build_assign (var, gimple_call_arg (stmt, 0));
1961                       gsi_replace (&bi, ass_stmt, true);
1962                     }
1963                   else
1964                     {
1965                       gsi_remove (&bi, true);
1966                       continue;
1967                     }
1968                 }
1969             }
1970           gsi_next (&bi);
1971         }
1972     }
1973   return 0;
1974 }
1975 \f
1976 /* Predict using opcode of the last statement in basic block.  */
1977 static void
1978 tree_predict_by_opcode (basic_block bb)
1979 {
1980   gimple stmt = last_stmt (bb);
1981   edge then_edge;
1982   tree op0, op1;
1983   tree type;
1984   tree val;
1985   enum tree_code cmp;
1986   bitmap visited;
1987   edge_iterator ei;
1988
1989   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1990     return;
1991   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1992     if (then_edge->flags & EDGE_TRUE_VALUE)
1993       break;
1994   op0 = gimple_cond_lhs (stmt);
1995   op1 = gimple_cond_rhs (stmt);
1996   cmp = gimple_cond_code (stmt);
1997   type = TREE_TYPE (op0);
1998   visited = BITMAP_ALLOC (NULL);
1999   val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited);
2000   BITMAP_FREE (visited);
2001   if (val)
2002     {
2003       if (integer_zerop (val))
2004         predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, NOT_TAKEN);
2005       else
2006         predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, TAKEN);
2007       return;
2008     }
2009   /* Try "pointer heuristic."
2010      A comparison ptr == 0 is predicted as false.
2011      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
2012   if (POINTER_TYPE_P (type))
2013     {
2014       if (cmp == EQ_EXPR)
2015         predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2016       else if (cmp == NE_EXPR)
2017         predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2018     }
2019   else
2020
2021   /* Try "opcode heuristic."
2022      EQ tests are usually false and NE tests are usually true. Also,
2023      most quantities are positive, so we can make the appropriate guesses
2024      about signed comparisons against zero.  */
2025     switch (cmp)
2026       {
2027       case EQ_EXPR:
2028       case UNEQ_EXPR:
2029         /* Floating point comparisons appears to behave in a very
2030            unpredictable way because of special role of = tests in
2031            FP code.  */
2032         if (FLOAT_TYPE_P (type))
2033           ;
2034         /* Comparisons with 0 are often used for booleans and there is
2035            nothing useful to predict about them.  */
2036         else if (integer_zerop (op0) || integer_zerop (op1))
2037           ;
2038         else
2039           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2040         break;
2041
2042       case NE_EXPR:
2043       case LTGT_EXPR:
2044         /* Floating point comparisons appears to behave in a very
2045            unpredictable way because of special role of = tests in
2046            FP code.  */
2047         if (FLOAT_TYPE_P (type))
2048           ;
2049         /* Comparisons with 0 are often used for booleans and there is
2050            nothing useful to predict about them.  */
2051         else if (integer_zerop (op0)
2052                  || integer_zerop (op1))
2053           ;
2054         else
2055           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2056         break;
2057
2058       case ORDERED_EXPR:
2059         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2060         break;
2061
2062       case UNORDERED_EXPR:
2063         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2064         break;
2065
2066       case LE_EXPR:
2067       case LT_EXPR:
2068         if (integer_zerop (op1)
2069             || integer_onep (op1)
2070             || integer_all_onesp (op1)
2071             || real_zerop (op1)
2072             || real_onep (op1)
2073             || real_minus_onep (op1))
2074           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2075         break;
2076
2077       case GE_EXPR:
2078       case GT_EXPR:
2079         if (integer_zerop (op1)
2080             || integer_onep (op1)
2081             || integer_all_onesp (op1)
2082             || real_zerop (op1)
2083             || real_onep (op1)
2084             || real_minus_onep (op1))
2085           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2086         break;
2087
2088       default:
2089         break;
2090       }
2091 }
2092
2093 /* Try to guess whether the value of return means error code.  */
2094
2095 static enum br_predictor
2096 return_prediction (tree val, enum prediction *prediction)
2097 {
2098   /* VOID.  */
2099   if (!val)
2100     return PRED_NO_PREDICTION;
2101   /* Different heuristics for pointers and scalars.  */
2102   if (POINTER_TYPE_P (TREE_TYPE (val)))
2103     {
2104       /* NULL is usually not returned.  */
2105       if (integer_zerop (val))
2106         {
2107           *prediction = NOT_TAKEN;
2108           return PRED_NULL_RETURN;
2109         }
2110     }
2111   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2112     {
2113       /* Negative return values are often used to indicate
2114          errors.  */
2115       if (TREE_CODE (val) == INTEGER_CST
2116           && tree_int_cst_sgn (val) < 0)
2117         {
2118           *prediction = NOT_TAKEN;
2119           return PRED_NEGATIVE_RETURN;
2120         }
2121       /* Constant return values seems to be commonly taken.
2122          Zero/one often represent booleans so exclude them from the
2123          heuristics.  */
2124       if (TREE_CONSTANT (val)
2125           && (!integer_zerop (val) && !integer_onep (val)))
2126         {
2127           *prediction = TAKEN;
2128           return PRED_CONST_RETURN;
2129         }
2130     }
2131   return PRED_NO_PREDICTION;
2132 }
2133
2134 /* Find the basic block with return expression and look up for possible
2135    return value trying to apply RETURN_PREDICTION heuristics.  */
2136 static void
2137 apply_return_prediction (void)
2138 {
2139   gimple return_stmt = NULL;
2140   tree return_val;
2141   edge e;
2142   gimple phi;
2143   int phi_num_args, i;
2144   enum br_predictor pred;
2145   enum prediction direction;
2146   edge_iterator ei;
2147
2148   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2149     {
2150       return_stmt = last_stmt (e->src);
2151       if (return_stmt
2152           && gimple_code (return_stmt) == GIMPLE_RETURN)
2153         break;
2154     }
2155   if (!e)
2156     return;
2157   return_val = gimple_return_retval (return_stmt);
2158   if (!return_val)
2159     return;
2160   if (TREE_CODE (return_val) != SSA_NAME
2161       || !SSA_NAME_DEF_STMT (return_val)
2162       || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2163     return;
2164   phi = SSA_NAME_DEF_STMT (return_val);
2165   phi_num_args = gimple_phi_num_args (phi);
2166   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2167
2168   /* Avoid the degenerate case where all return values form the function
2169      belongs to same category (ie they are all positive constants)
2170      so we can hardly say something about them.  */
2171   for (i = 1; i < phi_num_args; i++)
2172     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2173       break;
2174   if (i != phi_num_args)
2175     for (i = 0; i < phi_num_args; i++)
2176       {
2177         pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2178         if (pred != PRED_NO_PREDICTION)
2179           predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2180                                          direction);
2181       }
2182 }
2183
2184 /* Look for basic block that contains unlikely to happen events
2185    (such as noreturn calls) and mark all paths leading to execution
2186    of this basic blocks as unlikely.  */
2187
2188 static void
2189 tree_bb_level_predictions (void)
2190 {
2191   basic_block bb;
2192   bool has_return_edges = false;
2193   edge e;
2194   edge_iterator ei;
2195
2196   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2197     if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2198       {
2199         has_return_edges = true;
2200         break;
2201       }
2202
2203   apply_return_prediction ();
2204
2205   FOR_EACH_BB (bb)
2206     {
2207       gimple_stmt_iterator gsi;
2208
2209       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2210         {
2211           gimple stmt = gsi_stmt (gsi);
2212           tree decl;
2213
2214           if (is_gimple_call (stmt))
2215             {
2216               if ((gimple_call_flags (stmt) & ECF_NORETURN)
2217                   && has_return_edges)
2218                 predict_paths_leading_to (bb, PRED_NORETURN,
2219                                           NOT_TAKEN);
2220               decl = gimple_call_fndecl (stmt);
2221               if (decl
2222                   && lookup_attribute ("cold",
2223                                        DECL_ATTRIBUTES (decl)))
2224                 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2225                                           NOT_TAKEN);
2226             }
2227           else if (gimple_code (stmt) == GIMPLE_PREDICT)
2228             {
2229               predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2230                                         gimple_predict_outcome (stmt));
2231               /* Keep GIMPLE_PREDICT around so early inlining will propagate
2232                  hints to callers.  */
2233             }
2234         }
2235     }
2236 }
2237
2238 #ifdef ENABLE_CHECKING
2239
2240 /* Callback for pointer_map_traverse, asserts that the pointer map is
2241    empty.  */
2242
2243 static bool
2244 assert_is_empty (const void *key ATTRIBUTE_UNUSED, void **value,
2245                  void *data ATTRIBUTE_UNUSED)
2246 {
2247   gcc_assert (!*value);
2248   return false;
2249 }
2250 #endif
2251
2252 /* Predict branch probabilities and estimate profile for basic block BB.  */
2253
2254 static void
2255 tree_estimate_probability_bb (basic_block bb)
2256 {
2257   edge e;
2258   edge_iterator ei;
2259   gimple last;
2260
2261   FOR_EACH_EDGE (e, ei, bb->succs)
2262     {
2263       /* Predict edges to user labels with attributes.  */
2264       if (e->dest != EXIT_BLOCK_PTR)
2265         {
2266           gimple_stmt_iterator gi;
2267           for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2268             {
2269               gimple stmt = gsi_stmt (gi);
2270               tree decl;
2271
2272               if (gimple_code (stmt) != GIMPLE_LABEL)
2273                 break;
2274               decl = gimple_label_label (stmt);
2275               if (DECL_ARTIFICIAL (decl))
2276                 continue;
2277
2278               /* Finally, we have a user-defined label.  */
2279               if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2280                 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2281               else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2282                 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2283             }
2284         }
2285
2286       /* Predict early returns to be probable, as we've already taken
2287          care for error returns and other cases are often used for
2288          fast paths through function.
2289
2290          Since we've already removed the return statements, we are
2291          looking for CFG like:
2292
2293          if (conditional)
2294          {
2295          ..
2296          goto return_block
2297          }
2298          some other blocks
2299          return_block:
2300          return_stmt.  */
2301       if (e->dest != bb->next_bb
2302           && e->dest != EXIT_BLOCK_PTR
2303           && single_succ_p (e->dest)
2304           && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR
2305           && (last = last_stmt (e->dest)) != NULL
2306           && gimple_code (last) == GIMPLE_RETURN)
2307         {
2308           edge e1;
2309           edge_iterator ei1;
2310
2311           if (single_succ_p (bb))
2312             {
2313               FOR_EACH_EDGE (e1, ei1, bb->preds)
2314                 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2315                     && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2316                     && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2317                   predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2318             }
2319           else
2320             if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2321                 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2322                 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2323               predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2324         }
2325
2326       /* Look for block we are guarding (ie we dominate it,
2327          but it doesn't postdominate us).  */
2328       if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
2329           && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2330           && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2331         {
2332           gimple_stmt_iterator bi;
2333
2334           /* The call heuristic claims that a guarded function call
2335              is improbable.  This is because such calls are often used
2336              to signal exceptional situations such as printing error
2337              messages.  */
2338           for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2339                gsi_next (&bi))
2340             {
2341               gimple stmt = gsi_stmt (bi);
2342               if (is_gimple_call (stmt)
2343                   /* Constant and pure calls are hardly used to signalize
2344                      something exceptional.  */
2345                   && gimple_has_side_effects (stmt))
2346                 {
2347                   predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2348                   break;
2349                 }
2350             }
2351         }
2352     }
2353   tree_predict_by_opcode (bb);
2354 }
2355
2356 /* Predict branch probabilities and estimate profile of the tree CFG.
2357    This function can be called from the loop optimizers to recompute
2358    the profile information.  */
2359
2360 void
2361 tree_estimate_probability (void)
2362 {
2363   basic_block bb;
2364
2365   add_noreturn_fake_exit_edges ();
2366   connect_infinite_loops_to_exit ();
2367   /* We use loop_niter_by_eval, which requires that the loops have
2368      preheaders.  */
2369   create_preheaders (CP_SIMPLE_PREHEADERS);
2370   calculate_dominance_info (CDI_POST_DOMINATORS);
2371
2372   bb_predictions = pointer_map_create ();
2373   tree_bb_level_predictions ();
2374   record_loop_exits ();
2375
2376   if (number_of_loops (cfun) > 1)
2377     predict_loops ();
2378
2379   FOR_EACH_BB (bb)
2380     tree_estimate_probability_bb (bb);
2381
2382   FOR_EACH_BB (bb)
2383     combine_predictions_for_bb (bb);
2384
2385 #ifdef ENABLE_CHECKING
2386   pointer_map_traverse (bb_predictions, assert_is_empty, NULL);
2387 #endif
2388   pointer_map_destroy (bb_predictions);
2389   bb_predictions = NULL;
2390
2391   estimate_bb_frequencies ();
2392   free_dominance_info (CDI_POST_DOMINATORS);
2393   remove_fake_exit_edges ();
2394 }
2395
2396 /* Predict branch probabilities and estimate profile of the tree CFG.
2397    This is the driver function for PASS_PROFILE.  */
2398
2399 static unsigned int
2400 tree_estimate_probability_driver (void)
2401 {
2402   unsigned nb_loops;
2403
2404   loop_optimizer_init (LOOPS_NORMAL);
2405   if (dump_file && (dump_flags & TDF_DETAILS))
2406     flow_loops_dump (dump_file, NULL, 0);
2407
2408   mark_irreducible_loops ();
2409
2410   nb_loops = number_of_loops (cfun);
2411   if (nb_loops > 1)
2412     scev_initialize ();
2413
2414   tree_estimate_probability ();
2415
2416   if (nb_loops > 1)
2417     scev_finalize ();
2418
2419   loop_optimizer_finalize ();
2420   if (dump_file && (dump_flags & TDF_DETAILS))
2421     gimple_dump_cfg (dump_file, dump_flags);
2422   if (profile_status == PROFILE_ABSENT)
2423     profile_status = PROFILE_GUESSED;
2424   return 0;
2425 }
2426 \f
2427 /* Predict edges to successors of CUR whose sources are not postdominated by
2428    BB by PRED and recurse to all postdominators.  */
2429
2430 static void
2431 predict_paths_for_bb (basic_block cur, basic_block bb,
2432                       enum br_predictor pred,
2433                       enum prediction taken,
2434                       bitmap visited)
2435 {
2436   edge e;
2437   edge_iterator ei;
2438   basic_block son;
2439
2440   /* We are looking for all edges forming edge cut induced by
2441      set of all blocks postdominated by BB.  */
2442   FOR_EACH_EDGE (e, ei, cur->preds)
2443     if (e->src->index >= NUM_FIXED_BLOCKS
2444         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2445     {
2446       edge e2;
2447       edge_iterator ei2;
2448       bool found = false;
2449
2450       /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2451       if (e->flags & (EDGE_EH | EDGE_FAKE))
2452         continue;
2453       gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2454
2455       /* See if there is an edge from e->src that is not abnormal
2456          and does not lead to BB.  */
2457       FOR_EACH_EDGE (e2, ei2, e->src->succs)
2458         if (e2 != e
2459             && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2460             && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2461           {
2462             found = true;
2463             break;
2464           }
2465
2466       /* If there is non-abnormal path leaving e->src, predict edge
2467          using predictor.  Otherwise we need to look for paths
2468          leading to e->src.
2469
2470          The second may lead to infinite loop in the case we are predicitng
2471          regions that are only reachable by abnormal edges.  We simply
2472          prevent visiting given BB twice.  */
2473       if (found)
2474         predict_edge_def (e, pred, taken);
2475       else if (bitmap_set_bit (visited, e->src->index))
2476         predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2477     }
2478   for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2479        son;
2480        son = next_dom_son (CDI_POST_DOMINATORS, son))
2481     predict_paths_for_bb (son, bb, pred, taken, visited);
2482 }
2483
2484 /* Sets branch probabilities according to PREDiction and
2485    FLAGS.  */
2486
2487 static void
2488 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2489                           enum prediction taken)
2490 {
2491   bitmap visited = BITMAP_ALLOC (NULL);
2492   predict_paths_for_bb (bb, bb, pred, taken, visited);
2493   BITMAP_FREE (visited);
2494 }
2495
2496 /* Like predict_paths_leading_to but take edge instead of basic block.  */
2497
2498 static void
2499 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2500                                enum prediction taken)
2501 {
2502   bool has_nonloop_edge = false;
2503   edge_iterator ei;
2504   edge e2;
2505
2506   basic_block bb = e->src;
2507   FOR_EACH_EDGE (e2, ei, bb->succs)
2508     if (e2->dest != e->src && e2->dest != e->dest
2509         && !(e->flags & (EDGE_EH | EDGE_FAKE))
2510         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2511       {
2512         has_nonloop_edge = true;
2513         break;
2514       }
2515   if (!has_nonloop_edge)
2516     {
2517       bitmap visited = BITMAP_ALLOC (NULL);
2518       predict_paths_for_bb (bb, bb, pred, taken, visited);
2519       BITMAP_FREE (visited);
2520     }
2521   else
2522     predict_edge_def (e, pred, taken);
2523 }
2524 \f
2525 /* This is used to carry information about basic blocks.  It is
2526    attached to the AUX field of the standard CFG block.  */
2527
2528 typedef struct block_info_def
2529 {
2530   /* Estimated frequency of execution of basic_block.  */
2531   sreal frequency;
2532
2533   /* To keep queue of basic blocks to process.  */
2534   basic_block next;
2535
2536   /* Number of predecessors we need to visit first.  */
2537   int npredecessors;
2538 } *block_info;
2539
2540 /* Similar information for edges.  */
2541 typedef struct edge_info_def
2542 {
2543   /* In case edge is a loopback edge, the probability edge will be reached
2544      in case header is.  Estimated number of iterations of the loop can be
2545      then computed as 1 / (1 - back_edge_prob).  */
2546   sreal back_edge_prob;
2547   /* True if the edge is a loopback edge in the natural loop.  */
2548   unsigned int back_edge:1;
2549 } *edge_info;
2550
2551 #define BLOCK_INFO(B)   ((block_info) (B)->aux)
2552 #define EDGE_INFO(E)    ((edge_info) (E)->aux)
2553
2554 /* Helper function for estimate_bb_frequencies.
2555    Propagate the frequencies in blocks marked in
2556    TOVISIT, starting in HEAD.  */
2557
2558 static void
2559 propagate_freq (basic_block head, bitmap tovisit)
2560 {
2561   basic_block bb;
2562   basic_block last;
2563   unsigned i;
2564   edge e;
2565   basic_block nextbb;
2566   bitmap_iterator bi;
2567
2568   /* For each basic block we need to visit count number of his predecessors
2569      we need to visit first.  */
2570   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2571     {
2572       edge_iterator ei;
2573       int count = 0;
2574
2575       bb = BASIC_BLOCK (i);
2576
2577       FOR_EACH_EDGE (e, ei, bb->preds)
2578         {
2579           bool visit = bitmap_bit_p (tovisit, e->src->index);
2580
2581           if (visit && !(e->flags & EDGE_DFS_BACK))
2582             count++;
2583           else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2584             fprintf (dump_file,
2585                      "Irreducible region hit, ignoring edge to %i->%i\n",
2586                      e->src->index, bb->index);
2587         }
2588       BLOCK_INFO (bb)->npredecessors = count;
2589       /* When function never returns, we will never process exit block.  */
2590       if (!count && bb == EXIT_BLOCK_PTR)
2591         bb->count = bb->frequency = 0;
2592     }
2593
2594   memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
2595   last = head;
2596   for (bb = head; bb; bb = nextbb)
2597     {
2598       edge_iterator ei;
2599       sreal cyclic_probability, frequency;
2600
2601       memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
2602       memcpy (&frequency, &real_zero, sizeof (real_zero));
2603
2604       nextbb = BLOCK_INFO (bb)->next;
2605       BLOCK_INFO (bb)->next = NULL;
2606
2607       /* Compute frequency of basic block.  */
2608       if (bb != head)
2609         {
2610 #ifdef ENABLE_CHECKING
2611           FOR_EACH_EDGE (e, ei, bb->preds)
2612             gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2613                         || (e->flags & EDGE_DFS_BACK));
2614 #endif
2615
2616           FOR_EACH_EDGE (e, ei, bb->preds)
2617             if (EDGE_INFO (e)->back_edge)
2618               {
2619                 sreal_add (&cyclic_probability, &cyclic_probability,
2620                            &EDGE_INFO (e)->back_edge_prob);
2621               }
2622             else if (!(e->flags & EDGE_DFS_BACK))
2623               {
2624                 sreal tmp;
2625
2626                 /*  frequency += (e->probability
2627                                   * BLOCK_INFO (e->src)->frequency /
2628                                   REG_BR_PROB_BASE);  */
2629
2630                 sreal_init (&tmp, e->probability, 0);
2631                 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
2632                 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
2633                 sreal_add (&frequency, &frequency, &tmp);
2634               }
2635
2636           if (sreal_compare (&cyclic_probability, &real_zero) == 0)
2637             {
2638               memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
2639                       sizeof (frequency));
2640             }
2641           else
2642             {
2643               if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
2644                 {
2645                   memcpy (&cyclic_probability, &real_almost_one,
2646                           sizeof (real_almost_one));
2647                 }
2648
2649               /* BLOCK_INFO (bb)->frequency = frequency
2650                                               / (1 - cyclic_probability) */
2651
2652               sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
2653               sreal_div (&BLOCK_INFO (bb)->frequency,
2654                          &frequency, &cyclic_probability);
2655             }
2656         }
2657
2658       bitmap_clear_bit (tovisit, bb->index);
2659
2660       e = find_edge (bb, head);
2661       if (e)
2662         {
2663           sreal tmp;
2664
2665           /* EDGE_INFO (e)->back_edge_prob
2666              = ((e->probability * BLOCK_INFO (bb)->frequency)
2667              / REG_BR_PROB_BASE); */
2668
2669           sreal_init (&tmp, e->probability, 0);
2670           sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
2671           sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2672                      &tmp, &real_inv_br_prob_base);
2673         }
2674
2675       /* Propagate to successor blocks.  */
2676       FOR_EACH_EDGE (e, ei, bb->succs)
2677         if (!(e->flags & EDGE_DFS_BACK)
2678             && BLOCK_INFO (e->dest)->npredecessors)
2679           {
2680             BLOCK_INFO (e->dest)->npredecessors--;
2681             if (!BLOCK_INFO (e->dest)->npredecessors)
2682               {
2683                 if (!nextbb)
2684                   nextbb = e->dest;
2685                 else
2686                   BLOCK_INFO (last)->next = e->dest;
2687
2688                 last = e->dest;
2689               }
2690           }
2691     }
2692 }
2693
2694 /* Estimate probabilities of loopback edges in loops at same nest level.  */
2695
2696 static void
2697 estimate_loops_at_level (struct loop *first_loop)
2698 {
2699   struct loop *loop;
2700
2701   for (loop = first_loop; loop; loop = loop->next)
2702     {
2703       edge e;
2704       basic_block *bbs;
2705       unsigned i;
2706       bitmap tovisit = BITMAP_ALLOC (NULL);
2707
2708       estimate_loops_at_level (loop->inner);
2709
2710       /* Find current loop back edge and mark it.  */
2711       e = loop_latch_edge (loop);
2712       EDGE_INFO (e)->back_edge = 1;
2713
2714       bbs = get_loop_body (loop);
2715       for (i = 0; i < loop->num_nodes; i++)
2716         bitmap_set_bit (tovisit, bbs[i]->index);
2717       free (bbs);
2718       propagate_freq (loop->header, tovisit);
2719       BITMAP_FREE (tovisit);
2720     }
2721 }
2722
2723 /* Propagates frequencies through structure of loops.  */
2724
2725 static void
2726 estimate_loops (void)
2727 {
2728   bitmap tovisit = BITMAP_ALLOC (NULL);
2729   basic_block bb;
2730
2731   /* Start by estimating the frequencies in the loops.  */
2732   if (number_of_loops (cfun) > 1)
2733     estimate_loops_at_level (current_loops->tree_root->inner);
2734
2735   /* Now propagate the frequencies through all the blocks.  */
2736   FOR_ALL_BB (bb)
2737     {
2738       bitmap_set_bit (tovisit, bb->index);
2739     }
2740   propagate_freq (ENTRY_BLOCK_PTR, tovisit);
2741   BITMAP_FREE (tovisit);
2742 }
2743
2744 /* Convert counts measured by profile driven feedback to frequencies.
2745    Return nonzero iff there was any nonzero execution count.  */
2746
2747 int
2748 counts_to_freqs (void)
2749 {
2750   gcov_type count_max, true_count_max = 0;
2751   basic_block bb;
2752
2753   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2754     true_count_max = MAX (bb->count, true_count_max);
2755
2756   count_max = MAX (true_count_max, 1);
2757   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2758     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2759
2760   return true_count_max;
2761 }
2762
2763 /* Return true if function is likely to be expensive, so there is no point to
2764    optimize performance of prologue, epilogue or do inlining at the expense
2765    of code size growth.  THRESHOLD is the limit of number of instructions
2766    function can execute at average to be still considered not expensive.  */
2767
2768 bool
2769 expensive_function_p (int threshold)
2770 {
2771   unsigned int sum = 0;
2772   basic_block bb;
2773   unsigned int limit;
2774
2775   /* We can not compute accurately for large thresholds due to scaled
2776      frequencies.  */
2777   gcc_assert (threshold <= BB_FREQ_MAX);
2778
2779   /* Frequencies are out of range.  This either means that function contains
2780      internal loop executing more than BB_FREQ_MAX times or profile feedback
2781      is available and function has not been executed at all.  */
2782   if (ENTRY_BLOCK_PTR->frequency == 0)
2783     return true;
2784
2785   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2786   limit = ENTRY_BLOCK_PTR->frequency * threshold;
2787   FOR_EACH_BB (bb)
2788     {
2789       rtx insn;
2790
2791       FOR_BB_INSNS (bb, insn)
2792         if (active_insn_p (insn))
2793           {
2794             sum += bb->frequency;
2795             if (sum > limit)
2796               return true;
2797         }
2798     }
2799
2800   return false;
2801 }
2802
2803 /* Estimate basic blocks frequency by given branch probabilities.  */
2804
2805 void
2806 estimate_bb_frequencies (void)
2807 {
2808   basic_block bb;
2809   sreal freq_max;
2810
2811   if (profile_status != PROFILE_READ || !counts_to_freqs ())
2812     {
2813       static int real_values_initialized = 0;
2814
2815       if (!real_values_initialized)
2816         {
2817           real_values_initialized = 1;
2818           sreal_init (&real_zero, 0, 0);
2819           sreal_init (&real_one, 1, 0);
2820           sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
2821           sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
2822           sreal_init (&real_one_half, 1, -1);
2823           sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
2824           sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
2825         }
2826
2827       mark_dfs_back_edges ();
2828
2829       single_succ_edge (ENTRY_BLOCK_PTR)->probability = REG_BR_PROB_BASE;
2830
2831       /* Set up block info for each basic block.  */
2832       alloc_aux_for_blocks (sizeof (struct block_info_def));
2833       alloc_aux_for_edges (sizeof (struct edge_info_def));
2834       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2835         {
2836           edge e;
2837           edge_iterator ei;
2838
2839           FOR_EACH_EDGE (e, ei, bb->succs)
2840             {
2841               sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
2842               sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2843                          &EDGE_INFO (e)->back_edge_prob,
2844                          &real_inv_br_prob_base);
2845             }
2846         }
2847
2848       /* First compute probabilities locally for each loop from innermost
2849          to outermost to examine probabilities for back edges.  */
2850       estimate_loops ();
2851
2852       memcpy (&freq_max, &real_zero, sizeof (real_zero));
2853       FOR_EACH_BB (bb)
2854         if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
2855           memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
2856
2857       sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
2858       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2859         {
2860           sreal tmp;
2861
2862           sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
2863           sreal_add (&tmp, &tmp, &real_one_half);
2864           bb->frequency = sreal_to_int (&tmp);
2865         }
2866
2867       free_aux_for_blocks ();
2868       free_aux_for_edges ();
2869     }
2870   compute_function_frequency ();
2871 }
2872
2873 /* Decide whether function is hot, cold or unlikely executed.  */
2874 void
2875 compute_function_frequency (void)
2876 {
2877   basic_block bb;
2878   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2879
2880   if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2881       || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2882     node->only_called_at_startup = true;
2883   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2884     node->only_called_at_exit = true;
2885
2886   if (profile_status != PROFILE_READ)
2887     {
2888       int flags = flags_from_decl_or_type (current_function_decl);
2889       if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2890           != NULL)
2891         node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2892       else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2893                != NULL)
2894         node->frequency = NODE_FREQUENCY_HOT;
2895       else if (flags & ECF_NORETURN)
2896         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2897       else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
2898         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2899       else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2900                || DECL_STATIC_DESTRUCTOR (current_function_decl))
2901         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2902       return;
2903     }
2904
2905   /* Only first time try to drop function into unlikely executed.
2906      After inlining the roundoff errors may confuse us.
2907      Ipa-profile pass will drop functions only called from unlikely
2908      functions to unlikely and that is most of what we care about.  */
2909   if (!cfun->after_inlining)
2910     node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2911   FOR_EACH_BB (bb)
2912     {
2913       if (maybe_hot_bb_p (cfun, bb))
2914         {
2915           node->frequency = NODE_FREQUENCY_HOT;
2916           return;
2917         }
2918       if (!probably_never_executed_bb_p (cfun, bb))
2919         node->frequency = NODE_FREQUENCY_NORMAL;
2920     }
2921 }
2922
2923 static bool
2924 gate_estimate_probability (void)
2925 {
2926   return flag_guess_branch_prob;
2927 }
2928
2929 /* Build PREDICT_EXPR.  */
2930 tree
2931 build_predict_expr (enum br_predictor predictor, enum prediction taken)
2932 {
2933   tree t = build1 (PREDICT_EXPR, void_type_node,
2934                    build_int_cst (integer_type_node, predictor));
2935   SET_PREDICT_EXPR_OUTCOME (t, taken);
2936   return t;
2937 }
2938
2939 const char *
2940 predictor_name (enum br_predictor predictor)
2941 {
2942   return predictor_info[predictor].name;
2943 }
2944
2945 namespace {
2946
2947 const pass_data pass_data_profile =
2948 {
2949   GIMPLE_PASS, /* type */
2950   "profile_estimate", /* name */
2951   OPTGROUP_NONE, /* optinfo_flags */
2952   true, /* has_gate */
2953   true, /* has_execute */
2954   TV_BRANCH_PROB, /* tv_id */
2955   PROP_cfg, /* properties_required */
2956   0, /* properties_provided */
2957   0, /* properties_destroyed */
2958   0, /* todo_flags_start */
2959   TODO_verify_ssa, /* todo_flags_finish */
2960 };
2961
2962 class pass_profile : public gimple_opt_pass
2963 {
2964 public:
2965   pass_profile (gcc::context *ctxt)
2966     : gimple_opt_pass (pass_data_profile, ctxt)
2967   {}
2968
2969   /* opt_pass methods: */
2970   bool gate () { return gate_estimate_probability (); }
2971   unsigned int execute () { return tree_estimate_probability_driver (); }
2972
2973 }; // class pass_profile
2974
2975 } // anon namespace
2976
2977 gimple_opt_pass *
2978 make_pass_profile (gcc::context *ctxt)
2979 {
2980   return new pass_profile (ctxt);
2981 }
2982
2983 namespace {
2984
2985 const pass_data pass_data_strip_predict_hints =
2986 {
2987   GIMPLE_PASS, /* type */
2988   "*strip_predict_hints", /* name */
2989   OPTGROUP_NONE, /* optinfo_flags */
2990   false, /* has_gate */
2991   true, /* has_execute */
2992   TV_BRANCH_PROB, /* tv_id */
2993   PROP_cfg, /* properties_required */
2994   0, /* properties_provided */
2995   0, /* properties_destroyed */
2996   0, /* todo_flags_start */
2997   TODO_verify_ssa, /* todo_flags_finish */
2998 };
2999
3000 class pass_strip_predict_hints : public gimple_opt_pass
3001 {
3002 public:
3003   pass_strip_predict_hints (gcc::context *ctxt)
3004     : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3005   {}
3006
3007   /* opt_pass methods: */
3008   opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3009   unsigned int execute () { return strip_predict_hints (); }
3010
3011 }; // class pass_strip_predict_hints
3012
3013 } // anon namespace
3014
3015 gimple_opt_pass *
3016 make_pass_strip_predict_hints (gcc::context *ctxt)
3017 {
3018   return new pass_strip_predict_hints (ctxt);
3019 }
3020
3021 /* Rebuild function frequencies.  Passes are in general expected to
3022    maintain profile by hand, however in some cases this is not possible:
3023    for example when inlining several functions with loops freuqencies might run
3024    out of scale and thus needs to be recomputed.  */
3025
3026 void
3027 rebuild_frequencies (void)
3028 {
3029   timevar_push (TV_REBUILD_FREQUENCIES);
3030   if (profile_status == PROFILE_GUESSED)
3031     {
3032       loop_optimizer_init (0);
3033       add_noreturn_fake_exit_edges ();
3034       mark_irreducible_loops ();
3035       connect_infinite_loops_to_exit ();
3036       estimate_bb_frequencies ();
3037       remove_fake_exit_edges ();
3038       loop_optimizer_finalize ();
3039     }
3040   else if (profile_status == PROFILE_READ)
3041     counts_to_freqs ();
3042   else
3043     gcc_unreachable ();
3044   timevar_pop (TV_REBUILD_FREQUENCIES);
3045 }