gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003, 2004, 2007, 2008, 2009, 2010, 2011
   3    Free Software Foundation, Inc.
   4    Contributed by Jan Hubicka
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 /*  Inlining decision heuristics
  23
  24     The implementation of inliner is organized as follows:
  25
  26     inlining heuristics limits
  27
  28       can_inline_edge_p allow to check that particular inlining is allowed
  29       by the limits specified by user (allowed function growth, growth and so
  30       on).
  31
  32       Functions are inlined when it is obvious the result is profitable (such
  33       as functions called once or when inlining reduce code size).
  34       In addition to that we perform inlining of small functions and recursive
  35       inlining.
  36
  37     inlining heuristics
  38
  39        The inliner itself is split into two passes:
  40
  41        pass_early_inlining
  42
  43          Simple local inlining pass inlining callees into current function.
  44          This pass makes no use of whole unit analysis and thus it can do only
  45          very simple decisions based on local properties.
  46
  47          The strength of the pass is that it is run in topological order
  48          (reverse postorder) on the callgraph. Functions are converted into SSA
  49          form just before this pass and optimized subsequently. As a result, the
  50          callees of the function seen by the early inliner was already optimized
  51          and results of early inlining adds a lot of optimization opportunities
  52          for the local optimization.
  53
  54          The pass handle the obvious inlining decisions within the compilation
  55          unit - inlining auto inline functions, inlining for size and
  56          flattening.
  57
  58          main strength of the pass is the ability to eliminate abstraction
  59          penalty in C++ code (via combination of inlining and early
  60          optimization) and thus improve quality of analysis done by real IPA
  61          optimizers.
  62
  63          Because of lack of whole unit knowledge, the pass can not really make
  64          good code size/performance tradeoffs.  It however does very simple
  65          speculative inlining allowing code size to grow by
  66          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  67          optimizations performed later are very likely to eliminate the cost.
  68
  69        pass_ipa_inline
  70
  71          This is the real inliner able to handle inlining with whole program
  72          knowledge. It performs following steps:
  73
  74          1) inlining of small functions.  This is implemented by greedy
  75          algorithm ordering all inlinable cgraph edges by their badness and
  76          inlining them in this order as long as inline limits allows doing so.
  77
  78          This heuristics is not very good on inlining recursive calls. Recursive
  79          calls can be inlined with results similar to loop unrolling. To do so,
  80          special purpose recursive inliner is executed on function when
  81          recursive edge is met as viable candidate.
  82
  83          2) Unreachable functions are removed from callgraph.  Inlining leads
  84          to devirtualization and other modification of callgraph so functions
  85          may become unreachable during the process. Also functions declared as
  86          extern inline or virtual functions are removed, since after inlining
  87          we no longer need the offline bodies.
  88
  89          3) Functions called once and not exported from the unit are inlined.
  90          This should almost always lead to reduction of code size by eliminating
  91          the need for offline copy of the function.  */
  92
  93 #include "config.h"
  94 #include "system.h"
  95 #include "coretypes.h"
  96 #include "tm.h"
  97 #include "tree.h"
  98 #include "tree-inline.h"
  99 #include "langhooks.h"
 100 #include "flags.h"
 101 #include "cgraph.h"
 102 #include "diagnostic.h"
 103 #include "gimple-pretty-print.h"
 104 #include "timevar.h"
 105 #include "params.h"
 106 #include "fibheap.h"
 107 #include "intl.h"
 108 #include "tree-pass.h"
 109 #include "coverage.h"
 110 #include "ggc.h"
 111 #include "rtl.h"
 112 #include "tree-flow.h"
 113 #include "ipa-prop.h"
 114 #include "except.h"
 115 #include "target.h"
 116 #include "ipa-inline.h"
 117 #include "ipa-utils.h"
 118
 119 /* Statistics we collect about inlining algorithm.  */
 120 static int overall_size;
 121 static gcov_type max_count;
 122
 123 /* Return false when inlining edge E would lead to violating
 124    limits on function unit growth or stack usage growth.
 125
 126    The relative function body growth limit is present generally
 127    to avoid problems with non-linear behavior of the compiler.
 128    To allow inlining huge functions into tiny wrapper, the limit
 129    is always based on the bigger of the two functions considered.
 130
 131    For stack growth limits we always base the growth in stack usage
 132    of the callers.  We want to prevent applications from segfaulting
 133    on stack overflow when functions with huge stack frames gets
 134    inlined. */
 135
 136 static bool
 137 caller_growth_limits (struct cgraph_edge *e)
 138 {
 139   struct cgraph_node *to = e->caller;
 140   struct cgraph_node *what = e->callee;
 141   int newsize;
 142   int limit = 0;
 143   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 144   struct inline_summary *info, *what_info, *outer_info = inline_summary (to);
 145
 146   /* Look for function e->caller is inlined to.  While doing
 147      so work out the largest function body on the way.  As
 148      described above, we want to base our function growth
 149      limits based on that.  Not on the self size of the
 150      outer function, not on the self size of inline code
 151      we immediately inline to.  This is the most relaxed
 152      interpretation of the rule "do not grow large functions
 153      too much in order to prevent compiler from exploding".  */
 154   while (true)
 155     {
 156       info = inline_summary (to);
 157       if (limit < info->self_size)
 158         limit = info->self_size;
 159       if (stack_size_limit < info->estimated_self_stack_size)
 160         stack_size_limit = info->estimated_self_stack_size;
 161       if (to->global.inlined_to)
 162         to = to->callers->caller;
 163       else
 164         break;
 165     }
 166
 167   what_info = inline_summary (what);
 168
 169   if (limit < what_info->self_size)
 170     limit = what_info->self_size;
 171
 172   limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
 173
 174   /* Check the size after inlining against the function limits.  But allow
 175      the function to shrink if it went over the limits by forced inlining.  */
 176   newsize = estimate_size_after_inlining (to, e);
 177   if (newsize >= info->size
 178       && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
 179       && newsize > limit)
 180     {
 181       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 182       return false;
 183     }
 184
 185   if (!what_info->estimated_stack_size)
 186     return true;
 187
 188   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 189      due to large i/o datastructures used by the Fortran front-end.
 190      We ought to ignore this limit when we know that the edge is executed
 191      on every invocation of the caller (i.e. its call statement dominates
 192      exit block).  We do not track this information, yet.  */
 193   stack_size_limit += ((gcov_type)stack_size_limit
 194                        * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
 195
 196   inlined_stack = (outer_info->stack_frame_offset
 197                    + outer_info->estimated_self_stack_size
 198                    + what_info->estimated_stack_size);
 199   /* Check new stack consumption with stack consumption at the place
 200      stack is used.  */
 201   if (inlined_stack > stack_size_limit
 202       /* If function already has large stack usage from sibling
 203          inline call, we can inline, too.
 204          This bit overoptimistically assume that we are good at stack
 205          packing.  */
 206       && inlined_stack > info->estimated_stack_size
 207       && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
 208     {
 209       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 210       return false;
 211     }
 212   return true;
 213 }
 214
 215 /* Dump info about why inlining has failed.  */
 216
 217 static void
 218 report_inline_failed_reason (struct cgraph_edge *e)
 219 {
 220   if (dump_file)
 221     {
 222       fprintf (dump_file, "  not inlinable: %s/%i -> %s/%i, %s\n",
 223                cgraph_node_name (e->caller), e->caller->uid,
 224                cgraph_node_name (e->callee), e->callee->uid,
 225                cgraph_inline_failed_string (e->inline_failed));
 226     }
 227 }
 228
 229 /* Decide if we can inline the edge and possibly update
 230    inline_failed reason.
 231    We check whether inlining is possible at all and whether
 232    caller growth limits allow doing so.
 233
 234    if REPORT is true, output reason to the dump file.  */
 235
 236 static bool
 237 can_inline_edge_p (struct cgraph_edge *e, bool report)
 238 {
 239   bool inlinable = true;
 240   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->caller->decl);
 241   tree callee_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->callee->decl);
 242
 243   gcc_assert (e->inline_failed);
 244
 245   if (!e->callee->analyzed)
 246     {
 247       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 248       inlinable = false;
 249     }
 250   else if (!inline_summary (e->callee)->inlinable)
 251     {
 252       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 253       inlinable = false;
 254     }
 255   else if (cgraph_function_body_availability (e->callee) <= AVAIL_OVERWRITABLE)
 256     {
 257       e->inline_failed = CIF_OVERWRITABLE;
 258       return false;
 259     }
 260   else if (e->call_stmt_cannot_inline_p)
 261     {
 262       e->inline_failed = CIF_MISMATCHED_ARGUMENTS;
 263       inlinable = false;
 264     }
 265   /* Don't inline if the functions have different EH personalities.  */
 266   else if (DECL_FUNCTION_PERSONALITY (e->caller->decl)
 267            && DECL_FUNCTION_PERSONALITY (e->callee->decl)
 268            && (DECL_FUNCTION_PERSONALITY (e->caller->decl)
 269                != DECL_FUNCTION_PERSONALITY (e->callee->decl)))
 270     {
 271       e->inline_failed = CIF_EH_PERSONALITY;
 272       inlinable = false;
 273     }
 274   /* Don't inline if the callee can throw non-call exceptions but the
 275      caller cannot.
 276      FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is missing.
 277      Move the flag into cgraph node or mirror it in the inline summary.  */
 278   else if (DECL_STRUCT_FUNCTION (e->callee->decl)
 279            && DECL_STRUCT_FUNCTION
 280                 (e->callee->decl)->can_throw_non_call_exceptions
 281            && !(DECL_STRUCT_FUNCTION (e->caller->decl)
 282                 && DECL_STRUCT_FUNCTION
 283                      (e->caller->decl)->can_throw_non_call_exceptions))
 284     {
 285       e->inline_failed = CIF_NON_CALL_EXCEPTIONS;
 286       inlinable = false;
 287     }
 288   /* Check compatibility of target optimization options.  */
 289   else if (!targetm.target_option.can_inline_p (e->caller->decl,
 290                                                 e->callee->decl))
 291     {
 292       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 293       inlinable = false;
 294     }
 295   /* Check if caller growth allows the inlining.  */
 296   else if (!DECL_DISREGARD_INLINE_LIMITS (e->callee->decl)
 297            && !lookup_attribute ("flatten",
 298                                  DECL_ATTRIBUTES
 299                                    (e->caller->global.inlined_to
 300                                     ? e->caller->global.inlined_to->decl
 301                                     : e->caller->decl))
 302            && !caller_growth_limits (e))
 303     inlinable = false;
 304   /* Don't inline a function with a higher optimization level than the
 305      caller.  FIXME: this is really just tip of iceberg of handling
 306      optimization attribute.  */
 307   else if (caller_tree != callee_tree)
 308     {
 309       struct cl_optimization *caller_opt
 310         = TREE_OPTIMIZATION ((caller_tree)
 311                              ? caller_tree
 312                              : optimization_default_node);
 313
 314       struct cl_optimization *callee_opt
 315         = TREE_OPTIMIZATION ((callee_tree)
 316                              ? callee_tree
 317                              : optimization_default_node);
 318
 319       if ((caller_opt->x_optimize > callee_opt->x_optimize)
 320           || (caller_opt->x_optimize_size != callee_opt->x_optimize_size))
 321         {
 322           e->inline_failed = CIF_TARGET_OPTIMIZATION_MISMATCH;
 323           inlinable = false;
 324         }
 325     }
 326
 327   /* Be sure that the cannot_inline_p flag is up to date.  */
 328   gcc_checking_assert (!e->call_stmt
 329                        || (gimple_call_cannot_inline_p (e->call_stmt)
 330                            == e->call_stmt_cannot_inline_p)
 331                        /* In -flto-partition=none mode we really keep things out of
 332                           sync because call_stmt_cannot_inline_p is set at cgraph
 333                           merging when function bodies are not there yet.  */
 334                        || (in_lto_p && !gimple_call_cannot_inline_p (e->call_stmt)));
 335   if (!inlinable && report)
 336     report_inline_failed_reason (e);
 337   return inlinable;
 338 }
 339
 340
 341 /* Return true if the edge E is inlinable during early inlining.  */
 342
 343 static bool
 344 can_early_inline_edge_p (struct cgraph_edge *e)
 345 {
 346   /* Early inliner might get called at WPA stage when IPA pass adds new
 347      function.  In this case we can not really do any of early inlining
 348      because function bodies are missing.  */
 349   if (!gimple_has_body_p (e->callee->decl))
 350     {
 351       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 352       return false;
 353     }
 354   /* In early inliner some of callees may not be in SSA form yet
 355      (i.e. the callgraph is cyclic and we did not process
 356      the callee by early inliner, yet).  We don't have CIF code for this
 357      case; later we will re-do the decision in the real inliner.  */
 358   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
 359       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->callee->decl)))
 360     {
 361       if (dump_file)
 362         fprintf (dump_file, "  edge not inlinable: not in SSA form\n");
 363       return false;
 364     }
 365   if (!can_inline_edge_p (e, true))
 366     return false;
 367   return true;
 368 }
 369
 370
 371 /* Return true when N is leaf function.  Accept cheap builtins
 372    in leaf functions.  */
 373
 374 static bool
 375 leaf_node_p (struct cgraph_node *n)
 376 {
 377   struct cgraph_edge *e;
 378   for (e = n->callees; e; e = e->next_callee)
 379     if (!is_inexpensive_builtin (e->callee->decl))
 380       return false;
 381   return true;
 382 }
 383
 384
 385 /* Return true if we are interested in inlining small function.  */
 386
 387 static bool
 388 want_early_inline_function_p (struct cgraph_edge *e)
 389 {
 390   bool want_inline = true;
 391
 392   if (DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
 393     ;
 394   else if (!DECL_DECLARED_INLINE_P (e->callee->decl)
 395            && !flag_inline_small_functions)
 396     {
 397       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 398       report_inline_failed_reason (e);
 399       want_inline = false;
 400     }
 401   else
 402     {
 403       int growth = estimate_edge_growth (e);
 404       if (growth <= 0)
 405         ;
 406       else if (!cgraph_maybe_hot_edge_p (e)
 407                && growth > 0)
 408         {
 409           if (dump_file)
 410             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 411                      "call is cold and code would grow by %i\n",
 412                      cgraph_node_name (e->caller), e->caller->uid,
 413                      cgraph_node_name (e->callee), e->callee->uid,
 414                      growth);
 415           want_inline = false;
 416         }
 417       else if (!leaf_node_p (e->callee)
 418                && growth > 0)
 419         {
 420           if (dump_file)
 421             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 422                      "callee is not leaf and code would grow by %i\n",
 423                      cgraph_node_name (e->caller), e->caller->uid,
 424                      cgraph_node_name (e->callee), e->callee->uid,
 425                      growth);
 426           want_inline = false;
 427         }
 428       else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 429         {
 430           if (dump_file)
 431             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 432                      "growth %i exceeds --param early-inlining-insns\n",
 433                      cgraph_node_name (e->caller), e->caller->uid,
 434                      cgraph_node_name (e->callee), e->callee->uid,
 435                      growth);
 436           want_inline = false;
 437         }
 438     }
 439   return want_inline;
 440 }
 441
 442 /* Return true if we are interested in inlining small function.
 443    When REPORT is true, report reason to dump file.  */
 444
 445 static bool
 446 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 447 {
 448   bool want_inline = true;
 449
 450   if (DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
 451     ;
 452   else if (!DECL_DECLARED_INLINE_P (e->callee->decl)
 453            && !flag_inline_small_functions)
 454     {
 455       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 456       want_inline = false;
 457     }
 458   else
 459     {
 460       int growth = estimate_edge_growth (e);
 461
 462       if (growth <= 0)
 463         ;
 464       else if (DECL_DECLARED_INLINE_P (e->callee->decl)
 465                && growth >= MAX_INLINE_INSNS_SINGLE)
 466         {
 467           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 468           want_inline = false;
 469         }
 470       else if (!DECL_DECLARED_INLINE_P (e->callee->decl)
 471                && !flag_inline_functions)
 472         {
 473           e->inline_failed = CIF_NOT_DECLARED_INLINED;
 474           want_inline = false;
 475         }
 476       else if (!DECL_DECLARED_INLINE_P (e->callee->decl)
 477                && growth >= MAX_INLINE_INSNS_AUTO)
 478         {
 479           e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 480           want_inline = false;
 481         }
 482       /* If call is cold, do not inline when function body would grow.
 483          Still inline when the overall unit size will shrink because the offline
 484          copy of function being eliminated.
 485
 486          This is slightly wrong on aggressive side:  it is entirely possible
 487          that function is called many times with a context where inlining
 488          reduces code size and few times with a context where inlining increase
 489          code size.  Resoluting growth estimate will be negative even if it
 490          would make more sense to keep offline copy and do not inline into the
 491          call sites that makes the code size grow.
 492
 493          When badness orders the calls in a way that code reducing calls come
 494          first, this situation is not a problem at all: after inlining all
 495          "good" calls, we will realize that keeping the function around is
 496          better.  */
 497       else if (!cgraph_maybe_hot_edge_p (e)
 498                && (DECL_EXTERNAL (e->callee->decl)
 499
 500                    /* Unlike for functions called once, we play unsafe with
 501                       COMDATs.  We can allow that since we know functions
 502                       in consideration are small (and thus risk is small) and
 503                       moreover grow estimates already accounts that COMDAT
 504                       functions may or may not disappear when eliminated from
 505                       current unit. With good probability making aggressive
 506                       choice in all units is going to make overall program
 507                       smaller.
 508
 509                       Consequently we ask cgraph_can_remove_if_no_direct_calls_p
 510                       instead of
 511                       cgraph_will_be_removed_from_program_if_no_direct_calls  */
 512
 513                    || !cgraph_can_remove_if_no_direct_calls_p (e->callee)
 514                    || estimate_growth (e->callee) > 0))
 515         {
 516           e->inline_failed = CIF_UNLIKELY_CALL;
 517           want_inline = false;
 518         }
 519     }
 520   if (!want_inline && report)
 521     report_inline_failed_reason (e);
 522   return want_inline;
 523 }
 524
 525 /* EDGE is self recursive edge.
 526    We hand two cases - when function A is inlining into itself
 527    or when function A is being inlined into another inliner copy of function
 528    A within function B.
 529
 530    In first case OUTER_NODE points to the toplevel copy of A, while
 531    in the second case OUTER_NODE points to the outermost copy of A in B.
 532
 533    In both cases we want to be extra selective since
 534    inlining the call will just introduce new recursive calls to appear.  */
 535
 536 static bool
 537 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 538                                    struct cgraph_node *outer_node,
 539                                    bool peeling,
 540                                    int depth)
 541 {
 542   char const *reason = NULL;
 543   bool want_inline = true;
 544   int caller_freq = CGRAPH_FREQ_BASE;
 545   int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
 546
 547   if (DECL_DECLARED_INLINE_P (edge->callee->decl))
 548     max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
 549
 550   if (!cgraph_maybe_hot_edge_p (edge))
 551     {
 552       reason = "recursive call is cold";
 553       want_inline = false;
 554     }
 555   else if (max_count && !outer_node->count)
 556     {
 557       reason = "not executed in profile";
 558       want_inline = false;
 559     }
 560   else if (depth > max_depth)
 561     {
 562       reason = "--param max-inline-recursive-depth exceeded.";
 563       want_inline = false;
 564     }
 565
 566   if (outer_node->global.inlined_to)
 567     caller_freq = outer_node->callers->frequency;
 568
 569   if (!want_inline)
 570     ;
 571   /* Inlining of self recursive function into copy of itself within other function
 572      is transformation similar to loop peeling.
 573
 574      Peeling is profitable if we can inline enough copies to make probability
 575      of actual call to the self recursive function very small.  Be sure that
 576      the probability of recursion is small.
 577
 578      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
 579      This way the expected number of recision is at most max_depth.  */
 580   else if (peeling)
 581     {
 582       int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
 583                                          / max_depth);
 584       int i;
 585       for (i = 1; i < depth; i++)
 586         max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
 587       if (max_count
 588           && (edge->count * CGRAPH_FREQ_BASE / outer_node->count
 589               >= max_prob))
 590         {
 591           reason = "profile of recursive call is too large";
 592           want_inline = false;
 593         }
 594       if (!max_count
 595           && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq
 596               >= max_prob))
 597         {
 598           reason = "frequency of recursive call is too large";
 599           want_inline = false;
 600         }
 601     }
 602   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
 603      depth is large.  We reduce function call overhead and increase chances that
 604      things fit in hardware return predictor.
 605
 606      Recursive inlining might however increase cost of stack frame setup
 607      actually slowing down functions whose recursion tree is wide rather than
 608      deep.
 609
 610      Deciding reliably on when to do recursive inlining without profile feedback
 611      is tricky.  For now we disable recursive inlining when probability of self
 612      recursion is low.
 613
 614      Recursive inlining of self recursive call within loop also results in large loop
 615      depths that generally optimize badly.  We may want to throttle down inlining
 616      in those cases.  In particular this seems to happen in one of libstdc++ rb tree
 617      methods.  */
 618   else
 619     {
 620       if (max_count
 621           && (edge->count * 100 / outer_node->count
 622               <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 623         {
 624           reason = "profile of recursive call is too small";
 625           want_inline = false;
 626         }
 627       else if (!max_count
 628                && (edge->frequency * 100 / caller_freq
 629                    <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 630         {
 631           reason = "frequency of recursive call is too small";
 632           want_inline = false;
 633         }
 634     }
 635   if (!want_inline && dump_file)
 636     fprintf (dump_file, "   not inlining recursively: %s\n", reason);
 637   return want_inline;
 638 }
 639
 640
 641 /* Decide if NODE is called once inlining it would eliminate need
 642    for the offline copy of function.  */
 643
 644 static bool
 645 want_inline_function_called_once_p (struct cgraph_node *node)
 646 {
 647    /* Already inlined?  */
 648    if (node->global.inlined_to)
 649      return false;
 650    /* Zero or more then one callers?  */
 651    if (!node->callers
 652        || node->callers->next_caller)
 653      return false;
 654    /* Recursive call makes no sense to inline.  */
 655    if (node->callers->caller == node)
 656      return false;
 657    /* External functions are not really in the unit, so inlining
 658       them when called once would just increase the program size.  */
 659    if (DECL_EXTERNAL (node->decl))
 660      return false;
 661    /* Offline body must be optimized out.  */
 662    if (!cgraph_will_be_removed_from_program_if_no_direct_calls (node))
 663      return false;
 664    if (!can_inline_edge_p (node->callers, true))
 665      return false;
 666    return true;
 667 }
 668
 669 /* A cost model driving the inlining heuristics in a way so the edges with
 670    smallest badness are inlined first.  After each inlining is performed
 671    the costs of all caller edges of nodes affected are recomputed so the
 672    metrics may accurately depend on values such as number of inlinable callers
 673    of the function or function body size.  */
 674
 675 static int
 676 edge_badness (struct cgraph_edge *edge, bool dump)
 677 {
 678   gcov_type badness;
 679   int growth, time_growth;
 680   struct inline_summary *callee_info = inline_summary (edge->callee);
 681
 682   if (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl))
 683     return INT_MIN;
 684
 685   growth = estimate_edge_growth (edge);
 686   time_growth = estimate_edge_time (edge);
 687
 688   if (dump)
 689     {
 690       fprintf (dump_file, "    Badness calculation for %s -> %s\n",
 691                cgraph_node_name (edge->caller),
 692                cgraph_node_name (edge->callee));
 693       fprintf (dump_file, "      growth size %i, time %i\n",
 694                growth,
 695                time_growth);
 696     }
 697
 698   /* Always prefer inlining saving code size.  */
 699   if (growth <= 0)
 700     {
 701       badness = INT_MIN - growth;
 702       if (dump)
 703         fprintf (dump_file, "      %i: Growth %i < 0\n", (int) badness,
 704                  growth);
 705     }
 706
 707   /* When profiling is available, base priorities -(#calls / growth).
 708      So we optimize for overall number of "executed" inlined calls.  */
 709   else if (max_count)
 710     {
 711       int benefitperc;
 712       benefitperc = (((gcov_type)callee_info->time
 713                      * edge->frequency / CGRAPH_FREQ_BASE - time_growth) * 100
 714                      / (callee_info->time + 1) + 1);
 715       benefitperc = MIN (benefitperc, 100);
 716       benefitperc = MAX (benefitperc, 0);
 717       badness =
 718         ((int)
 719          ((double) edge->count * INT_MIN / max_count / 100) *
 720          benefitperc) / growth;
 721
 722       /* Be sure that insanity of the profile won't lead to increasing counts
 723          in the scalling and thus to overflow in the computation above.  */
 724       gcc_assert (max_count >= edge->count);
 725       if (dump)
 726         {
 727           fprintf (dump_file,
 728                    "      %i (relative %f): profile info. Relative count %f"
 729                    " * Relative benefit %f\n",
 730                    (int) badness, (double) badness / INT_MIN,
 731                    (double) edge->count / max_count,
 732                    (double) benefitperc);
 733         }
 734     }
 735
 736   /* When function local profile is available, base priorities on
 737      growth / frequency, so we optimize for overall frequency of inlined
 738      calls.  This is not too accurate since while the call might be frequent
 739      within function, the function itself is infrequent.
 740
 741      Other objective to optimize for is number of different calls inlined.
 742      We add the estimated growth after inlining all functions to bias the
 743      priorities slightly in this direction (so fewer times called functions
 744      of the same size gets priority).  */
 745   else if (flag_guess_branch_prob)
 746     {
 747       int div = edge->frequency * 100 / CGRAPH_FREQ_BASE + 1;
 748       int benefitperc;
 749       int growth_for_all;
 750       badness = growth * 10000;
 751       benefitperc = (((gcov_type)callee_info->time
 752                      * edge->frequency / CGRAPH_FREQ_BASE - time_growth) * 100
 753                      / (callee_info->time + 1) + 1);
 754       benefitperc = MIN (benefitperc, 100);
 755       benefitperc = MAX (benefitperc, 0);
 756       div *= benefitperc;
 757
 758       /* Decrease badness if call is nested.  */
 759       /* Compress the range so we don't overflow.  */
 760       if (div > 10000)
 761         div = 10000 + ceil_log2 (div) - 8;
 762       if (div < 1)
 763         div = 1;
 764       if (badness > 0)
 765         badness /= div;
 766       growth_for_all = estimate_growth (edge->callee);
 767       badness += growth_for_all;
 768       if (badness > INT_MAX)
 769         badness = INT_MAX;
 770       if (dump)
 771         {
 772           fprintf (dump_file,
 773                    "      %i: guessed profile. frequency %i, overall growth %i,"
 774                    " benefit %i%%, divisor %i\n",
 775                    (int) badness, edge->frequency, growth_for_all,
 776                    benefitperc, div);
 777         }
 778     }
 779   /* When function local profile is not available or it does not give
 780      useful information (ie frequency is zero), base the cost on
 781      loop nest and overall size growth, so we optimize for overall number
 782      of functions fully inlined in program.  */
 783   else
 784     {
 785       int nest = MIN (inline_edge_summary (edge)->loop_depth, 8);
 786       badness = estimate_growth (edge->callee) * 256;
 787
 788       /* Decrease badness if call is nested.  */
 789       if (badness > 0)
 790         badness >>= nest;
 791       else
 792         {
 793           badness <<= nest;
 794         }
 795       if (dump)
 796         fprintf (dump_file, "      %i: no profile. nest %i\n", (int) badness,
 797                  nest);
 798     }
 799
 800   /* Ensure that we did not overflow in all the fixed point math above.  */
 801   gcc_assert (badness >= INT_MIN);
 802   gcc_assert (badness <= INT_MAX - 1);
 803   /* Make recursive inlining happen always after other inlining is done.  */
 804   if (cgraph_edge_recursive_p (edge))
 805     return badness + 1;
 806   else
 807     return badness;
 808 }
 809
 810 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
 811 static inline void
 812 update_edge_key (fibheap_t heap, struct cgraph_edge *edge)
 813 {
 814   int badness = edge_badness (edge, false);
 815   if (edge->aux)
 816     {
 817       fibnode_t n = (fibnode_t) edge->aux;
 818       gcc_checking_assert (n->data == edge);
 819
 820       /* fibheap_replace_key only decrease the keys.
 821          When we increase the key we do not update heap
 822          and instead re-insert the element once it becomes
 823          a minimum of heap.  */
 824       if (badness < n->key)
 825         {
 826           fibheap_replace_key (heap, n, badness);
 827           if (dump_file && (dump_flags & TDF_DETAILS))
 828             {
 829               fprintf (dump_file,
 830                        "  decreasing badness %s/%i -> %s/%i, %i to %i\n",
 831                        cgraph_node_name (edge->caller), edge->caller->uid,
 832                        cgraph_node_name (edge->callee), edge->callee->uid,
 833                        (int)n->key,
 834                        badness);
 835             }
 836           gcc_checking_assert (n->key == badness);
 837         }
 838     }
 839   else
 840     {
 841        if (dump_file && (dump_flags & TDF_DETAILS))
 842          {
 843            fprintf (dump_file,
 844                     "  enqueuing call %s/%i -> %s/%i, badness %i\n",
 845                     cgraph_node_name (edge->caller), edge->caller->uid,
 846                     cgraph_node_name (edge->callee), edge->callee->uid,
 847                     badness);
 848          }
 849       edge->aux = fibheap_insert (heap, badness, edge);
 850     }
 851 }
 852
 853
 854 /* NODE was inlined.
 855    All caller edges needs to be resetted because
 856    size estimates change. Similarly callees needs reset
 857    because better context may be known.  */
 858
 859 static void
 860 reset_edge_caches (struct cgraph_node *node)
 861 {
 862   struct cgraph_edge *edge;
 863   struct cgraph_edge *e = node->callees;
 864   struct cgraph_node *where = node;
 865
 866   if (where->global.inlined_to)
 867     where = where->global.inlined_to;
 868
 869   /* WHERE body size has changed, the cached growth is invalid.  */
 870   reset_node_growth_cache (where);
 871
 872   for (edge = where->callers; edge; edge = edge->next_caller)
 873     if (edge->inline_failed)
 874       reset_edge_growth_cache (edge);
 875
 876   if (!e)
 877     return;
 878
 879   while (true)
 880     if (!e->inline_failed && e->callee->callees)
 881       e = e->callee->callees;
 882     else
 883       {
 884         if (e->inline_failed)
 885           reset_edge_growth_cache (e);
 886         if (e->next_callee)
 887           e = e->next_callee;
 888         else
 889           {
 890             do
 891               {
 892                 if (e->caller == node)
 893                   return;
 894                 e = e->caller->callers;
 895               }
 896             while (!e->next_callee);
 897             e = e->next_callee;
 898           }
 899       }
 900 }
 901
 902 /* Recompute HEAP nodes for each of caller of NODE.
 903    UPDATED_NODES track nodes we already visited, to avoid redundant work.
 904    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
 905    it is inlinable. Otherwise check all edges.  */
 906
 907 static void
 908 update_caller_keys (fibheap_t heap, struct cgraph_node *node,
 909                     bitmap updated_nodes,
 910                     struct cgraph_edge *check_inlinablity_for)
 911 {
 912   struct cgraph_edge *edge;
 913
 914   if (!inline_summary (node)->inlinable
 915       || cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE
 916       || node->global.inlined_to)
 917     return;
 918   if (!bitmap_set_bit (updated_nodes, node->uid))
 919     return;
 920
 921   for (edge = node->callers; edge; edge = edge->next_caller)
 922     if (edge->inline_failed)
 923       {
 924         if (!check_inlinablity_for
 925             || check_inlinablity_for == edge)
 926           {
 927             if (can_inline_edge_p (edge, false)
 928                 && want_inline_small_function_p (edge, false))
 929               update_edge_key (heap, edge);
 930             else if (edge->aux)
 931               {
 932                 report_inline_failed_reason (edge);
 933                 fibheap_delete_node (heap, (fibnode_t) edge->aux);
 934                 edge->aux = NULL;
 935               }
 936           }
 937         else if (edge->aux)
 938           update_edge_key (heap, edge);
 939       }
 940 }
 941
 942 /* Recompute HEAP nodes for each uninlined call in NODE.
 943    This is used when we know that edge badnesses are going only to increase
 944    (we introduced new call site) and thus all we need is to insert newly
 945    created edges into heap.  */
 946
 947 static void
 948 update_callee_keys (fibheap_t heap, struct cgraph_node *node,
 949                     bitmap updated_nodes)
 950 {
 951   struct cgraph_edge *e = node->callees;
 952
 953   if (!e)
 954     return;
 955   while (true)
 956     if (!e->inline_failed && e->callee->callees)
 957       e = e->callee->callees;
 958     else
 959       {
 960         /* We inlined and thus callees might have different number of calls.
 961            Reset their caches  */
 962         reset_node_growth_cache (e->callee);
 963         if (e->inline_failed
 964             && inline_summary (e->callee)->inlinable
 965             && cgraph_function_body_availability (e->callee) >= AVAIL_AVAILABLE
 966             && !bitmap_bit_p (updated_nodes, e->callee->uid))
 967           {
 968             if (can_inline_edge_p (e, false)
 969                 && want_inline_small_function_p (e, false))
 970               update_edge_key (heap, e);
 971             else if (e->aux)
 972               {
 973                 report_inline_failed_reason (e);
 974                 fibheap_delete_node (heap, (fibnode_t) e->aux);
 975                 e->aux = NULL;
 976               }
 977           }
 978         if (e->next_callee)
 979           e = e->next_callee;
 980         else
 981           {
 982             do
 983               {
 984                 if (e->caller == node)
 985                   return;
 986                 e = e->caller->callers;
 987               }
 988             while (!e->next_callee);
 989             e = e->next_callee;
 990           }
 991       }
 992 }
 993
 994 /* Recompute heap nodes for each of caller edges of each of callees.
 995    Walk recursively into all inline clones.  */
 996
 997 static void
 998 update_all_callee_keys (fibheap_t heap, struct cgraph_node *node,
 999                         bitmap updated_nodes)
1000 {
1001   struct cgraph_edge *e = node->callees;
1002
1003   if (!e)
1004     return;
1005   while (true)
1006     if (!e->inline_failed && e->callee->callees)
1007       e = e->callee->callees;
1008     else
1009       {
1010         /* We inlined and thus callees might have different number of calls.
1011            Reset their caches  */
1012         reset_node_growth_cache (e->callee);
1013         if (e->inline_failed)
1014           update_caller_keys (heap, e->callee, updated_nodes, e);
1015         if (e->next_callee)
1016           e = e->next_callee;
1017         else
1018           {
1019             do
1020               {
1021                 if (e->caller == node)
1022                   return;
1023                 e = e->caller->callers;
1024               }
1025             while (!e->next_callee);
1026             e = e->next_callee;
1027           }
1028       }
1029 }
1030
1031 /* Enqueue all recursive calls from NODE into priority queue depending on
1032    how likely we want to recursively inline the call.  */
1033
1034 static void
1035 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1036                         fibheap_t heap)
1037 {
1038   struct cgraph_edge *e;
1039   for (e = where->callees; e; e = e->next_callee)
1040     if (e->callee == node)
1041       {
1042         /* When profile feedback is available, prioritize by expected number
1043            of calls.  */
1044         fibheap_insert (heap,
1045                         !max_count ? -e->frequency
1046                         : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
1047                         e);
1048       }
1049   for (e = where->callees; e; e = e->next_callee)
1050     if (!e->inline_failed)
1051       lookup_recursive_calls (node, e->callee, heap);
1052 }
1053
1054 /* Decide on recursive inlining: in the case function has recursive calls,
1055    inline until body size reaches given argument.  If any new indirect edges
1056    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1057    is NULL.  */
1058
1059 static bool
1060 recursive_inlining (struct cgraph_edge *edge,
1061                     VEC (cgraph_edge_p, heap) **new_edges)
1062 {
1063   int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1064   fibheap_t heap;
1065   struct cgraph_node *node;
1066   struct cgraph_edge *e;
1067   struct cgraph_node *master_clone = NULL, *next;
1068   int depth = 0;
1069   int n = 0;
1070
1071   node = edge->caller;
1072   if (node->global.inlined_to)
1073     node = node->global.inlined_to;
1074
1075   if (DECL_DECLARED_INLINE_P (node->decl))
1076     limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1077
1078   /* Make sure that function is small enough to be considered for inlining.  */
1079   if (estimate_size_after_inlining (node, edge)  >= limit)
1080     return false;
1081   heap = fibheap_new ();
1082   lookup_recursive_calls (node, node, heap);
1083   if (fibheap_empty (heap))
1084     {
1085       fibheap_delete (heap);
1086       return false;
1087     }
1088
1089   if (dump_file)
1090     fprintf (dump_file,
1091              "  Performing recursive inlining on %s\n",
1092              cgraph_node_name (node));
1093
1094   /* Do the inlining and update list of recursive call during process.  */
1095   while (!fibheap_empty (heap))
1096     {
1097       struct cgraph_edge *curr
1098         = (struct cgraph_edge *) fibheap_extract_min (heap);
1099       struct cgraph_node *cnode;
1100
1101       if (estimate_size_after_inlining (node, curr) > limit)
1102         break;
1103
1104       if (!can_inline_edge_p (curr, true))
1105         continue;
1106
1107       depth = 1;
1108       for (cnode = curr->caller;
1109            cnode->global.inlined_to; cnode = cnode->callers->caller)
1110         if (node->decl == curr->callee->decl)
1111           depth++;
1112
1113       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1114         continue;
1115
1116       if (dump_file)
1117         {
1118           fprintf (dump_file,
1119                    "   Inlining call of depth %i", depth);
1120           if (node->count)
1121             {
1122               fprintf (dump_file, " called approx. %.2f times per call",
1123                        (double)curr->count / node->count);
1124             }
1125           fprintf (dump_file, "\n");
1126         }
1127       if (!master_clone)
1128         {
1129           /* We need original clone to copy around.  */
1130           master_clone = cgraph_clone_node (node, node->decl,
1131                                             node->count, CGRAPH_FREQ_BASE,
1132                                             false, NULL);
1133           for (e = master_clone->callees; e; e = e->next_callee)
1134             if (!e->inline_failed)
1135               clone_inlined_nodes (e, true, false, NULL);
1136         }
1137
1138       cgraph_redirect_edge_callee (curr, master_clone);
1139       inline_call (curr, false, new_edges, &overall_size);
1140       lookup_recursive_calls (node, curr->callee, heap);
1141       n++;
1142     }
1143
1144   if (!fibheap_empty (heap) && dump_file)
1145     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1146   fibheap_delete (heap);
1147
1148   if (!master_clone)
1149     return false;
1150
1151   if (dump_file)
1152     fprintf (dump_file,
1153              "\n   Inlined %i times, "
1154              "body grown from size %i to %i, time %i to %i\n", n,
1155              inline_summary (master_clone)->size, inline_summary (node)->size,
1156              inline_summary (master_clone)->time, inline_summary (node)->time);
1157
1158   /* Remove master clone we used for inlining.  We rely that clones inlined
1159      into master clone gets queued just before master clone so we don't
1160      need recursion.  */
1161   for (node = cgraph_nodes; node != master_clone;
1162        node = next)
1163     {
1164       next = node->next;
1165       if (node->global.inlined_to == master_clone)
1166         cgraph_remove_node (node);
1167     }
1168   cgraph_remove_node (master_clone);
1169   return true;
1170 }
1171
1172
1173 /* Given whole compilation unit estimate of INSNS, compute how large we can
1174    allow the unit to grow.  */
1175
1176 static int
1177 compute_max_insns (int insns)
1178 {
1179   int max_insns = insns;
1180   if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1181     max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1182
1183   return ((HOST_WIDEST_INT) max_insns
1184           * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1185 }
1186
1187
1188 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1189
1190 static void
1191 add_new_edges_to_heap (fibheap_t heap, VEC (cgraph_edge_p, heap) *new_edges)
1192 {
1193   while (VEC_length (cgraph_edge_p, new_edges) > 0)
1194     {
1195       struct cgraph_edge *edge = VEC_pop (cgraph_edge_p, new_edges);
1196
1197       gcc_assert (!edge->aux);
1198       if (inline_summary (edge->callee)->inlinable
1199           && edge->inline_failed
1200           && can_inline_edge_p (edge, true)
1201           && want_inline_small_function_p (edge, true))
1202         edge->aux = fibheap_insert (heap, edge_badness (edge, false), edge);
1203     }
1204 }
1205
1206
1207 /* We use greedy algorithm for inlining of small functions:
1208    All inline candidates are put into prioritized heap ordered in
1209    increasing badness.
1210
1211    The inlining of small functions is bounded by unit growth parameters.  */
1212
1213 static void
1214 inline_small_functions (void)
1215 {
1216   struct cgraph_node *node;
1217   struct cgraph_edge *edge;
1218   fibheap_t heap = fibheap_new ();
1219   bitmap updated_nodes = BITMAP_ALLOC (NULL);
1220   int min_size, max_size;
1221   VEC (cgraph_edge_p, heap) *new_indirect_edges = NULL;
1222   int initial_size = 0;
1223
1224   if (flag_indirect_inlining)
1225     new_indirect_edges = VEC_alloc (cgraph_edge_p, heap, 8);
1226
1227   if (dump_file)
1228     fprintf (dump_file,
1229              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1230              initial_size);
1231
1232   /* Compute overall unit size and other global parameters used by badness
1233      metrics.  */
1234
1235   max_count = 0;
1236   initialize_growth_caches ();
1237
1238   for (node = cgraph_nodes; node; node = node->next)
1239     if (node->analyzed
1240         && !node->global.inlined_to)
1241       {
1242         struct inline_summary *info = inline_summary (node);
1243
1244         if (!DECL_EXTERNAL (node->decl))
1245           initial_size += info->size;
1246
1247         for (edge = node->callers; edge; edge = edge->next_caller)
1248           if (max_count < edge->count)
1249             max_count = edge->count;
1250       }
1251
1252   overall_size = initial_size;
1253   max_size = compute_max_insns (overall_size);
1254   min_size = overall_size;
1255
1256   /* Populate the heeap with all edges we might inline.  */
1257
1258   for (node = cgraph_nodes; node; node = node->next)
1259     if (node->analyzed
1260         && !node->global.inlined_to)
1261       {
1262         if (dump_file)
1263           fprintf (dump_file, "Enqueueing calls of %s/%i.\n",
1264                    cgraph_node_name (node), node->uid);
1265
1266         for (edge = node->callers; edge; edge = edge->next_caller)
1267           if (edge->inline_failed
1268               && can_inline_edge_p (edge, true)
1269               && want_inline_small_function_p (edge, true)
1270               && edge->inline_failed)
1271             {
1272               gcc_assert (!edge->aux);
1273               update_edge_key (heap, edge);
1274             }
1275       }
1276
1277   gcc_assert (in_lto_p
1278               || !max_count
1279               || (profile_info && flag_branch_probabilities));
1280
1281   while (!fibheap_empty (heap))
1282     {
1283       int old_size = overall_size;
1284       struct cgraph_node *where, *callee;
1285       int badness = fibheap_min_key (heap);
1286       int current_badness;
1287       int growth;
1288
1289       edge = (struct cgraph_edge *) fibheap_extract_min (heap);
1290       gcc_assert (edge->aux);
1291       edge->aux = NULL;
1292       if (!edge->inline_failed)
1293         continue;
1294
1295       /* Be sure that caches are maintained consistent.  */
1296 #ifdef ENABLE_CHECKING
1297       reset_edge_growth_cache (edge);
1298       reset_node_growth_cache (edge->callee);
1299 #endif
1300
1301       /* When updating the edge costs, we only decrease badness in the keys.
1302          Increases of badness are handled lazilly; when we see key with out
1303          of date value on it, we re-insert it now.  */
1304       current_badness = edge_badness (edge, false);
1305       gcc_assert (current_badness >= badness);
1306       if (current_badness != badness)
1307         {
1308           edge->aux = fibheap_insert (heap, current_badness, edge);
1309           continue;
1310         }
1311
1312       if (!can_inline_edge_p (edge, true))
1313         continue;
1314
1315       callee = edge->callee;
1316       growth = estimate_edge_growth (edge);
1317       if (dump_file)
1318         {
1319           fprintf (dump_file,
1320                    "\nConsidering %s with %i size\n",
1321                    cgraph_node_name (edge->callee),
1322                    inline_summary (edge->callee)->size);
1323           fprintf (dump_file,
1324                    " to be inlined into %s in %s:%i\n"
1325                    " Estimated growth after inlined into all is %+i insns.\n"
1326                    " Estimated badness is %i, frequency %.2f.\n",
1327                    cgraph_node_name (edge->caller),
1328                    flag_wpa ? "unknown"
1329                    : gimple_filename ((const_gimple) edge->call_stmt),
1330                    flag_wpa ? -1
1331                    : gimple_lineno ((const_gimple) edge->call_stmt),
1332                    estimate_growth (edge->callee),
1333                    badness,
1334                    edge->frequency / (double)CGRAPH_FREQ_BASE);
1335           if (edge->count)
1336             fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n",
1337                      edge->count);
1338           if (dump_flags & TDF_DETAILS)
1339             edge_badness (edge, true);
1340         }
1341
1342       if (overall_size + growth > max_size
1343           && !DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl))
1344         {
1345           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1346           report_inline_failed_reason (edge);
1347           continue;
1348         }
1349
1350       if (!want_inline_small_function_p (edge, true))
1351         continue;
1352
1353       /* Heuristics for inlining small functions works poorly for
1354          recursive calls where we do efect similar to loop unrolling.
1355          When inliing such edge seems profitable, leave decision on
1356          specific inliner.  */
1357       if (cgraph_edge_recursive_p (edge))
1358         {
1359           where = edge->caller;
1360           if (where->global.inlined_to)
1361             where = where->global.inlined_to;
1362           if (!recursive_inlining (edge,
1363                                    flag_indirect_inlining
1364                                    ? &new_indirect_edges : NULL))
1365             {
1366               edge->inline_failed = CIF_RECURSIVE_INLINING;
1367               continue;
1368             }
1369           reset_edge_caches (where);
1370           /* Recursive inliner inlines all recursive calls of the function
1371              at once. Consequently we need to update all callee keys.  */
1372           if (flag_indirect_inlining)
1373             add_new_edges_to_heap (heap, new_indirect_edges);
1374           update_all_callee_keys (heap, where, updated_nodes);
1375         }
1376       else
1377         {
1378           struct cgraph_node *callee;
1379           struct cgraph_node *outer_node = NULL;
1380           int depth = 0;
1381
1382           /* Consider the case where self recursive function A is inlined into B.
1383              This is desired optimization in some cases, since it leads to effect
1384              similar of loop peeling and we might completely optimize out the
1385              recursive call.  However we must be extra selective.  */
1386
1387           where = edge->caller;
1388           while (where->global.inlined_to)
1389             {
1390               if (where->decl == edge->callee->decl)
1391                 outer_node = where, depth++;
1392               where = where->callers->caller;
1393             }
1394           if (outer_node
1395               && !want_inline_self_recursive_call_p (edge, outer_node,
1396                                                      true, depth))
1397             {
1398               edge->inline_failed
1399                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
1400                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
1401               continue;
1402             }
1403           else if (depth && dump_file)
1404             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
1405
1406           callee = edge->callee;
1407           gcc_checking_assert (!callee->global.inlined_to);
1408           inline_call (edge, true, &new_indirect_edges, &overall_size);
1409           if (flag_indirect_inlining)
1410             add_new_edges_to_heap (heap, new_indirect_edges);
1411
1412           reset_edge_caches (edge->callee);
1413           reset_node_growth_cache (callee);
1414
1415           /* We inlined last offline copy to the body.  This might lead
1416              to callees of function having fewer call sites and thus they
1417              may need updating.  */
1418           if (callee->global.inlined_to)
1419             update_all_callee_keys (heap, callee, updated_nodes);
1420           else
1421             update_callee_keys (heap, edge->callee, updated_nodes);
1422         }
1423       where = edge->caller;
1424       if (where->global.inlined_to)
1425         where = where->global.inlined_to;
1426
1427       /* Our profitability metric can depend on local properties
1428          such as number of inlinable calls and size of the function body.
1429          After inlining these properties might change for the function we
1430          inlined into (since it's body size changed) and for the functions
1431          called by function we inlined (since number of it inlinable callers
1432          might change).  */
1433       update_caller_keys (heap, where, updated_nodes, NULL);
1434
1435       /* We removed one call of the function we just inlined.  If offline
1436          copy is still needed, be sure to update the keys.  */
1437       if (callee != where && !callee->global.inlined_to)
1438         update_caller_keys (heap, callee, updated_nodes, NULL);
1439       bitmap_clear (updated_nodes);
1440
1441       if (dump_file)
1442         {
1443           fprintf (dump_file,
1444                    " Inlined into %s which now has time %i and size %i,"
1445                    "net change of %+i.\n",
1446                    cgraph_node_name (edge->caller),
1447                    inline_summary (edge->caller)->time,
1448                    inline_summary (edge->caller)->size,
1449                    overall_size - old_size);
1450         }
1451       if (min_size > overall_size)
1452         {
1453           min_size = overall_size;
1454           max_size = compute_max_insns (min_size);
1455
1456           if (dump_file)
1457             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
1458         }
1459     }
1460
1461   free_growth_caches ();
1462   if (new_indirect_edges)
1463     VEC_free (cgraph_edge_p, heap, new_indirect_edges);
1464   fibheap_delete (heap);
1465   if (dump_file)
1466     fprintf (dump_file,
1467              "Unit growth for small function inlining: %i->%i (%i%%)\n",
1468              initial_size, overall_size,
1469              initial_size ? overall_size * 100 / (initial_size) - 100: 0);
1470   BITMAP_FREE (updated_nodes);
1471 }
1472
1473 /* Flatten NODE.  Performed both during early inlining and
1474    at IPA inlining time.  */
1475
1476 static void
1477 flatten_function (struct cgraph_node *node, bool early)
1478 {
1479   struct cgraph_edge *e;
1480
1481   /* We shouldn't be called recursively when we are being processed.  */
1482   gcc_assert (node->aux == NULL);
1483
1484   node->aux = (void *) node;
1485
1486   for (e = node->callees; e; e = e->next_callee)
1487     {
1488       struct cgraph_node *orig_callee;
1489
1490       /* We've hit cycle?  It is time to give up.  */
1491       if (e->callee->aux)
1492         {
1493           if (dump_file)
1494             fprintf (dump_file,
1495                      "Not inlining %s into %s to avoid cycle.\n",
1496                      cgraph_node_name (e->callee),
1497                      cgraph_node_name (e->caller));
1498           e->inline_failed = CIF_RECURSIVE_INLINING;
1499           continue;
1500         }
1501
1502       /* When the edge is already inlined, we just need to recurse into
1503          it in order to fully flatten the leaves.  */
1504       if (!e->inline_failed)
1505         {
1506           flatten_function (e->callee, early);
1507           continue;
1508         }
1509
1510       /* Flatten attribute needs to be processed during late inlining. For
1511          extra code quality we however do flattening during early optimization,
1512          too.  */
1513       if (!early
1514           ? !can_inline_edge_p (e, true)
1515           : !can_early_inline_edge_p (e))
1516         continue;
1517
1518       if (cgraph_edge_recursive_p (e))
1519         {
1520           if (dump_file)
1521             fprintf (dump_file, "Not inlining: recursive call.\n");
1522           continue;
1523         }
1524
1525       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
1526           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->callee->decl)))
1527         {
1528           if (dump_file)
1529             fprintf (dump_file, "Not inlining: SSA form does not match.\n");
1530           continue;
1531         }
1532
1533       /* Inline the edge and flatten the inline clone.  Avoid
1534          recursing through the original node if the node was cloned.  */
1535       if (dump_file)
1536         fprintf (dump_file, " Inlining %s into %s.\n",
1537                  cgraph_node_name (e->callee),
1538                  cgraph_node_name (e->caller));
1539       orig_callee = e->callee;
1540       inline_call (e, true, NULL, NULL);
1541       if (e->callee != orig_callee)
1542         orig_callee->aux = (void *) node;
1543       flatten_function (e->callee, early);
1544       if (e->callee != orig_callee)
1545         orig_callee->aux = NULL;
1546     }
1547
1548   node->aux = NULL;
1549 }
1550
1551 /* Decide on the inlining.  We do so in the topological order to avoid
1552    expenses on updating data structures.  */
1553
1554 static unsigned int
1555 ipa_inline (void)
1556 {
1557   struct cgraph_node *node;
1558   int nnodes;
1559   struct cgraph_node **order =
1560     XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
1561   int i;
1562
1563   if (in_lto_p && flag_indirect_inlining)
1564     ipa_update_after_lto_read ();
1565   if (flag_indirect_inlining)
1566     ipa_create_all_structures_for_iinln ();
1567
1568   if (dump_file)
1569     dump_inline_summaries (dump_file);
1570
1571   nnodes = ipa_reverse_postorder (order);
1572
1573   for (node = cgraph_nodes; node; node = node->next)
1574     node->aux = 0;
1575
1576   if (dump_file)
1577     fprintf (dump_file, "\nFlattening functions:\n");
1578
1579   /* In the first pass handle functions to be flattened.  Do this with
1580      a priority so none of our later choices will make this impossible.  */
1581   for (i = nnodes - 1; i >= 0; i--)
1582     {
1583       node = order[i];
1584
1585       /* Handle nodes to be flattened.
1586          Ideally when processing callees we stop inlining at the
1587          entry of cycles, possibly cloning that entry point and
1588          try to flatten itself turning it into a self-recursive
1589          function.  */
1590       if (lookup_attribute ("flatten",
1591                             DECL_ATTRIBUTES (node->decl)) != NULL)
1592         {
1593           if (dump_file)
1594             fprintf (dump_file,
1595                      "Flattening %s\n", cgraph_node_name (node));
1596           flatten_function (node, false);
1597         }
1598     }
1599
1600   inline_small_functions ();
1601   cgraph_remove_unreachable_nodes (true, dump_file);
1602   free (order);
1603
1604   /* We already perform some inlining of functions called once during
1605      inlining small functions above.  After unreachable nodes are removed,
1606      we still might do a quick check that nothing new is found.  */
1607   if (flag_inline_functions_called_once)
1608     {
1609       int cold;
1610       if (dump_file)
1611         fprintf (dump_file, "\nDeciding on functions called once:\n");
1612
1613       /* Inlining one function called once has good chance of preventing
1614          inlining other function into the same callee.  Ideally we should
1615          work in priority order, but probably inlining hot functions first
1616          is good cut without the extra pain of maintaining the queue.
1617
1618          ??? this is not really fitting the bill perfectly: inlining function
1619          into callee often leads to better optimization of callee due to
1620          increased context for optimization.
1621          For example if main() function calls a function that outputs help
1622          and then function that does the main optmization, we should inline
1623          the second with priority even if both calls are cold by themselves.
1624
1625          We probably want to implement new predicate replacing our use of
1626          maybe_hot_edge interpreted as maybe_hot_edge || callee is known
1627          to be hot.  */
1628       for (cold = 0; cold <= 1; cold ++)
1629         {
1630           for (node = cgraph_nodes; node; node = node->next)
1631             {
1632               if (want_inline_function_called_once_p (node)
1633                   && (cold
1634                       || cgraph_maybe_hot_edge_p (node->callers)))
1635                 {
1636                   struct cgraph_node *caller = node->callers->caller;
1637
1638                   if (dump_file)
1639                     {
1640                       fprintf (dump_file,
1641                                "\nInlining %s size %i.\n",
1642                                cgraph_node_name (node), inline_summary (node)->size);
1643                       fprintf (dump_file,
1644                                " Called once from %s %i insns.\n",
1645                                cgraph_node_name (node->callers->caller),
1646                                inline_summary (node->callers->caller)->size);
1647                     }
1648
1649                   inline_call (node->callers, true, NULL, NULL);
1650                   if (dump_file)
1651                     fprintf (dump_file,
1652                              " Inlined into %s which now has %i size\n",
1653                              cgraph_node_name (caller),
1654                              inline_summary (caller)->size);
1655                 }
1656             }
1657         }
1658     }
1659
1660   /* Free ipa-prop structures if they are no longer needed.  */
1661   if (flag_indirect_inlining)
1662     ipa_free_all_structures_after_iinln ();
1663
1664   if (dump_file)
1665     fprintf (dump_file,
1666              "\nInlined %i calls, eliminated %i functions\n\n",
1667              ncalls_inlined, nfunctions_inlined);
1668
1669   if (dump_file)
1670     dump_inline_summaries (dump_file);
1671   /* In WPA we use inline summaries for partitioning process.  */
1672   if (!flag_wpa)
1673     inline_free_summary ();
1674   return 0;
1675 }
1676
1677 /* Inline always-inline function calls in NODE.  */
1678
1679 static bool
1680 inline_always_inline_functions (struct cgraph_node *node)
1681 {
1682   struct cgraph_edge *e;
1683   bool inlined = false;
1684
1685   for (e = node->callees; e; e = e->next_callee)
1686     {
1687       if (!DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
1688         continue;
1689
1690       if (cgraph_edge_recursive_p (e))
1691         {
1692           if (dump_file)
1693             fprintf (dump_file, "  Not inlining recursive call to %s.\n",
1694                      cgraph_node_name (e->callee));
1695           e->inline_failed = CIF_RECURSIVE_INLINING;
1696           continue;
1697         }
1698
1699       if (!can_early_inline_edge_p (e))
1700         continue;
1701
1702       if (dump_file)
1703         fprintf (dump_file, "  Inlining %s into %s (always_inline).\n",
1704                  cgraph_node_name (e->callee),
1705                  cgraph_node_name (e->caller));
1706       inline_call (e, true, NULL, NULL);
1707       inlined = true;
1708     }
1709
1710   return inlined;
1711 }
1712
1713 /* Decide on the inlining.  We do so in the topological order to avoid
1714    expenses on updating data structures.  */
1715
1716 static bool
1717 early_inline_small_functions (struct cgraph_node *node)
1718 {
1719   struct cgraph_edge *e;
1720   bool inlined = false;
1721
1722   for (e = node->callees; e; e = e->next_callee)
1723     {
1724       if (!inline_summary (e->callee)->inlinable
1725           || !e->inline_failed)
1726         continue;
1727
1728       /* Do not consider functions not declared inline.  */
1729       if (!DECL_DECLARED_INLINE_P (e->callee->decl)
1730           && !flag_inline_small_functions
1731           && !flag_inline_functions)
1732         continue;
1733
1734       if (dump_file)
1735         fprintf (dump_file, "Considering inline candidate %s.\n",
1736                  cgraph_node_name (e->callee));
1737
1738       if (!can_early_inline_edge_p (e))
1739         continue;
1740
1741       if (cgraph_edge_recursive_p (e))
1742         {
1743           if (dump_file)
1744             fprintf (dump_file, "  Not inlining: recursive call.\n");
1745           continue;
1746         }
1747
1748       if (!want_early_inline_function_p (e))
1749         continue;
1750
1751       if (dump_file)
1752         fprintf (dump_file, " Inlining %s into %s.\n",
1753                  cgraph_node_name (e->callee),
1754                  cgraph_node_name (e->caller));
1755       inline_call (e, true, NULL, NULL);
1756       inlined = true;
1757     }
1758
1759   return inlined;
1760 }
1761
1762 /* Do inlining of small functions.  Doing so early helps profiling and other
1763    passes to be somewhat more effective and avoids some code duplication in
1764    later real inlining pass for testcases with very many function calls.  */
1765 static unsigned int
1766 early_inliner (void)
1767 {
1768   struct cgraph_node *node = cgraph_get_node (current_function_decl);
1769   struct cgraph_edge *edge;
1770   unsigned int todo = 0;
1771   int iterations = 0;
1772   bool inlined = false;
1773
1774   if (seen_error ())
1775     return 0;
1776
1777   /* Do nothing if datastructures for ipa-inliner are already computed.  This
1778      happens when some pass decides to construct new function and
1779      cgraph_add_new_function calls lowering passes and early optimization on
1780      it.  This may confuse ourself when early inliner decide to inline call to
1781      function clone, because function clones don't have parameter list in
1782      ipa-prop matching their signature.  */
1783   if (ipa_node_params_vector)
1784     return 0;
1785
1786 #ifdef ENABLE_CHECKING
1787   verify_cgraph_node (node);
1788 #endif
1789
1790   /* Even when not optimizing or not inlining inline always-inline
1791      functions.  */
1792   inlined = inline_always_inline_functions (node);
1793
1794   if (!optimize
1795       || flag_no_inline
1796       || !flag_early_inlining
1797       /* Never inline regular functions into always-inline functions
1798          during incremental inlining.  This sucks as functions calling
1799          always inline functions will get less optimized, but at the
1800          same time inlining of functions calling always inline
1801          function into an always inline function might introduce
1802          cycles of edges to be always inlined in the callgraph.
1803
1804          We might want to be smarter and just avoid this type of inlining.  */
1805       || DECL_DISREGARD_INLINE_LIMITS (node->decl))
1806     ;
1807   else if (lookup_attribute ("flatten",
1808                              DECL_ATTRIBUTES (node->decl)) != NULL)
1809     {
1810       /* When the function is marked to be flattened, recursively inline
1811          all calls in it.  */
1812       if (dump_file)
1813         fprintf (dump_file,
1814                  "Flattening %s\n", cgraph_node_name (node));
1815       flatten_function (node, true);
1816       inlined = true;
1817     }
1818   else
1819     {
1820       /* We iterate incremental inlining to get trivial cases of indirect
1821          inlining.  */
1822       while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
1823              && early_inline_small_functions (node))
1824         {
1825           timevar_push (TV_INTEGRATION);
1826           todo |= optimize_inline_calls (current_function_decl);
1827
1828           /* Technically we ought to recompute inline parameters so the new
1829              iteration of early inliner works as expected.  We however have
1830              values approximately right and thus we only need to update edge
1831              info that might be cleared out for newly discovered edges.  */
1832           for (edge = node->callees; edge; edge = edge->next_callee)
1833             {
1834               struct inline_edge_summary *es = inline_edge_summary (edge);
1835               es->call_stmt_size
1836                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
1837               es->call_stmt_time
1838                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
1839             }
1840           timevar_pop (TV_INTEGRATION);
1841           iterations++;
1842           inlined = false;
1843         }
1844       if (dump_file)
1845         fprintf (dump_file, "Iterations: %i\n", iterations);
1846     }
1847
1848   if (inlined)
1849     {
1850       timevar_push (TV_INTEGRATION);
1851       todo |= optimize_inline_calls (current_function_decl);
1852       timevar_pop (TV_INTEGRATION);
1853     }
1854
1855   cfun->always_inline_functions_inlined = true;
1856
1857   return todo;
1858 }
1859
1860 struct gimple_opt_pass pass_early_inline =
1861 {
1862  {
1863   GIMPLE_PASS,
1864   "einline",                            /* name */
1865   NULL,                                 /* gate */
1866   early_inliner,                        /* execute */
1867   NULL,                                 /* sub */
1868   NULL,                                 /* next */
1869   0,                                    /* static_pass_number */
1870   TV_INLINE_HEURISTICS,                 /* tv_id */
1871   PROP_ssa,                             /* properties_required */
1872   0,                                    /* properties_provided */
1873   0,                                    /* properties_destroyed */
1874   0,                                    /* todo_flags_start */
1875   TODO_dump_func                        /* todo_flags_finish */
1876  }
1877 };
1878
1879
1880 /* When to run IPA inlining.  Inlining of always-inline functions
1881    happens during early inlining.  */
1882
1883 static bool
1884 gate_ipa_inline (void)
1885 {
1886   /* ???  We'd like to skip this if not optimizing or not inlining as
1887      all always-inline functions have been processed by early
1888      inlining already.  But this at least breaks EH with C++ as
1889      we need to unconditionally run fixup_cfg even at -O0.
1890      So leave it on unconditionally for now.  */
1891   return 1;
1892 }
1893
1894 struct ipa_opt_pass_d pass_ipa_inline =
1895 {
1896  {
1897   IPA_PASS,
1898   "inline",                             /* name */
1899   gate_ipa_inline,                      /* gate */
1900   ipa_inline,                           /* execute */
1901   NULL,                                 /* sub */
1902   NULL,                                 /* next */
1903   0,                                    /* static_pass_number */
1904   TV_INLINE_HEURISTICS,                 /* tv_id */
1905   0,                                    /* properties_required */
1906   0,                                    /* properties_provided */
1907   0,                                    /* properties_destroyed */
1908   TODO_remove_functions,                /* todo_flags_finish */
1909   TODO_dump_cgraph | TODO_dump_func
1910   | TODO_remove_functions | TODO_ggc_collect    /* todo_flags_finish */
1911  },
1912  inline_generate_summary,               /* generate_summary */
1913  inline_write_summary,                  /* write_summary */
1914  inline_read_summary,                   /* read_summary */
1915  NULL,                                  /* write_optimization_summary */
1916  NULL,                                  /* read_optimization_summary */
1917  NULL,                                  /* stmt_fixup */
1918  0,                                     /* TODOs */
1919  inline_transform,                      /* function_transform */
1920  NULL,                                  /* variable_transform */
1921 };