gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003, 2004, 2007, 2008, 2009, 2010, 2011
   3    Free Software Foundation, Inc.
   4    Contributed by Jan Hubicka
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 /*  Inlining decision heuristics
  23
  24     The implementation of inliner is organized as follows:
  25
  26     inlining heuristics limits
  27
  28       can_inline_edge_p allow to check that particular inlining is allowed
  29       by the limits specified by user (allowed function growth, growth and so
  30       on).
  31
  32       Functions are inlined when it is obvious the result is profitable (such
  33       as functions called once or when inlining reduce code size).
  34       In addition to that we perform inlining of small functions and recursive
  35       inlining.
  36
  37     inlining heuristics
  38
  39        The inliner itself is split into two passes:
  40
  41        pass_early_inlining
  42
  43          Simple local inlining pass inlining callees into current function.
  44          This pass makes no use of whole unit analysis and thus it can do only
  45          very simple decisions based on local properties.
  46
  47          The strength of the pass is that it is run in topological order
  48          (reverse postorder) on the callgraph. Functions are converted into SSA
  49          form just before this pass and optimized subsequently. As a result, the
  50          callees of the function seen by the early inliner was already optimized
  51          and results of early inlining adds a lot of optimization opportunities
  52          for the local optimization.
  53
  54          The pass handle the obvious inlining decisions within the compilation
  55          unit - inlining auto inline functions, inlining for size and
  56          flattening.
  57
  58          main strength of the pass is the ability to eliminate abstraction
  59          penalty in C++ code (via combination of inlining and early
  60          optimization) and thus improve quality of analysis done by real IPA
  61          optimizers.
  62
  63          Because of lack of whole unit knowledge, the pass can not really make
  64          good code size/performance tradeoffs.  It however does very simple
  65          speculative inlining allowing code size to grow by
  66          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  67          optimizations performed later are very likely to eliminate the cost.
  68
  69        pass_ipa_inline
  70
  71          This is the real inliner able to handle inlining with whole program
  72          knowledge. It performs following steps:
  73
  74          1) inlining of small functions.  This is implemented by greedy
  75          algorithm ordering all inlinable cgraph edges by their badness and
  76          inlining them in this order as long as inline limits allows doing so.
  77
  78          This heuristics is not very good on inlining recursive calls. Recursive
  79          calls can be inlined with results similar to loop unrolling. To do so,
  80          special purpose recursive inliner is executed on function when
  81          recursive edge is met as viable candidate.
  82
  83          2) Unreachable functions are removed from callgraph.  Inlining leads
  84          to devirtualization and other modification of callgraph so functions
  85          may become unreachable during the process. Also functions declared as
  86          extern inline or virtual functions are removed, since after inlining
  87          we no longer need the offline bodies.
  88
  89          3) Functions called once and not exported from the unit are inlined.
  90          This should almost always lead to reduction of code size by eliminating
  91          the need for offline copy of the function.  */
  92
  93 #include "config.h"
  94 #include "system.h"
  95 #include "coretypes.h"
  96 #include "tm.h"
  97 #include "tree.h"
  98 #include "tree-inline.h"
  99 #include "langhooks.h"
 100 #include "flags.h"
 101 #include "cgraph.h"
 102 #include "diagnostic.h"
 103 #include "gimple-pretty-print.h"
 104 #include "timevar.h"
 105 #include "params.h"
 106 #include "fibheap.h"
 107 #include "intl.h"
 108 #include "tree-pass.h"
 109 #include "coverage.h"
 110 #include "ggc.h"
 111 #include "rtl.h"
 112 #include "tree-flow.h"
 113 #include "ipa-prop.h"
 114 #include "except.h"
 115 #include "target.h"
 116 #include "ipa-inline.h"
 117 #include "ipa-utils.h"
 118
 119 /* Statistics we collect about inlining algorithm.  */
 120 static int overall_size;
 121 static gcov_type max_count;
 122
 123 /* Return false when inlining edge E would lead to violating
 124    limits on function unit growth or stack usage growth.
 125
 126    The relative function body growth limit is present generally
 127    to avoid problems with non-linear behavior of the compiler.
 128    To allow inlining huge functions into tiny wrapper, the limit
 129    is always based on the bigger of the two functions considered.
 130
 131    For stack growth limits we always base the growth in stack usage
 132    of the callers.  We want to prevent applications from segfaulting
 133    on stack overflow when functions with huge stack frames gets
 134    inlined. */
 135
 136 static bool
 137 caller_growth_limits (struct cgraph_edge *e)
 138 {
 139   struct cgraph_node *to = e->caller;
 140   struct cgraph_node *what = e->callee;
 141   int newsize;
 142   int limit = 0;
 143   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 144   struct inline_summary *info, *what_info, *outer_info = inline_summary (to);
 145
 146   /* Look for function e->caller is inlined to.  While doing
 147      so work out the largest function body on the way.  As
 148      described above, we want to base our function growth
 149      limits based on that.  Not on the self size of the
 150      outer function, not on the self size of inline code
 151      we immediately inline to.  This is the most relaxed
 152      interpretation of the rule "do not grow large functions
 153      too much in order to prevent compiler from exploding".  */
 154   while (true)
 155     {
 156       info = inline_summary (to);
 157       if (limit < info->self_size)
 158         limit = info->self_size;
 159       if (stack_size_limit < info->estimated_self_stack_size)
 160         stack_size_limit = info->estimated_self_stack_size;
 161       if (to->global.inlined_to)
 162         to = to->callers->caller;
 163       else
 164         break;
 165     }
 166
 167   what_info = inline_summary (what);
 168
 169   if (limit < what_info->self_size)
 170     limit = what_info->self_size;
 171
 172   limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
 173
 174   /* Check the size after inlining against the function limits.  But allow
 175      the function to shrink if it went over the limits by forced inlining.  */
 176   newsize = estimate_size_after_inlining (to, e);
 177   if (newsize >= info->size
 178       && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
 179       && newsize > limit)
 180     {
 181       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 182       return false;
 183     }
 184
 185   if (!what_info->estimated_stack_size)
 186     return true;
 187
 188   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 189      due to large i/o datastructures used by the Fortran front-end.
 190      We ought to ignore this limit when we know that the edge is executed
 191      on every invocation of the caller (i.e. its call statement dominates
 192      exit block).  We do not track this information, yet.  */
 193   stack_size_limit += ((gcov_type)stack_size_limit
 194                        * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
 195
 196   inlined_stack = (outer_info->stack_frame_offset
 197                    + outer_info->estimated_self_stack_size
 198                    + what_info->estimated_stack_size);
 199   /* Check new stack consumption with stack consumption at the place
 200      stack is used.  */
 201   if (inlined_stack > stack_size_limit
 202       /* If function already has large stack usage from sibling
 203          inline call, we can inline, too.
 204          This bit overoptimistically assume that we are good at stack
 205          packing.  */
 206       && inlined_stack > info->estimated_stack_size
 207       && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
 208     {
 209       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 210       return false;
 211     }
 212   return true;
 213 }
 214
 215 /* Dump info about why inlining has failed.  */
 216
 217 static void
 218 report_inline_failed_reason (struct cgraph_edge *e)
 219 {
 220   if (dump_file)
 221     {
 222       fprintf (dump_file, "  not inlinable: %s/%i -> %s/%i, %s\n",
 223                cgraph_node_name (e->caller), e->caller->uid,
 224                cgraph_node_name (e->callee), e->callee->uid,
 225                cgraph_inline_failed_string (e->inline_failed));
 226     }
 227 }
 228
 229 /* Decide if we can inline the edge and possibly update
 230    inline_failed reason.
 231    We check whether inlining is possible at all and whether
 232    caller growth limits allow doing so.
 233
 234    if REPORT is true, output reason to the dump file.  */
 235
 236 static bool
 237 can_inline_edge_p (struct cgraph_edge *e, bool report)
 238 {
 239   bool inlinable = true;
 240   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->caller->decl);
 241   tree callee_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->callee->decl);
 242
 243   gcc_assert (e->inline_failed);
 244
 245   if (!e->callee->analyzed)
 246     {
 247       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 248       inlinable = false;
 249     }
 250   else if (!inline_summary (e->callee)->inlinable)
 251     {
 252       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 253       inlinable = false;
 254     }
 255   else if (cgraph_function_body_availability (e->callee) <= AVAIL_OVERWRITABLE)
 256     {
 257       e->inline_failed = CIF_OVERWRITABLE;
 258       return false;
 259     }
 260   else if (e->call_stmt_cannot_inline_p)
 261     {
 262       e->inline_failed = CIF_MISMATCHED_ARGUMENTS;
 263       inlinable = false;
 264     }
 265   /* Don't inline if the functions have different EH personalities.  */
 266   else if (DECL_FUNCTION_PERSONALITY (e->caller->decl)
 267            && DECL_FUNCTION_PERSONALITY (e->callee->decl)
 268            && (DECL_FUNCTION_PERSONALITY (e->caller->decl)
 269                != DECL_FUNCTION_PERSONALITY (e->callee->decl)))
 270     {
 271       e->inline_failed = CIF_EH_PERSONALITY;
 272       inlinable = false;
 273     }
 274   /* Don't inline if the callee can throw non-call exceptions but the
 275      caller cannot.
 276      FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is missing.
 277      Move the flag into cgraph node or mirror it in the inline summary.  */
 278   else if (DECL_STRUCT_FUNCTION (e->callee->decl)
 279            && DECL_STRUCT_FUNCTION
 280                 (e->callee->decl)->can_throw_non_call_exceptions
 281            && !(DECL_STRUCT_FUNCTION (e->caller->decl)
 282                 && DECL_STRUCT_FUNCTION
 283                      (e->caller->decl)->can_throw_non_call_exceptions))
 284     {
 285       e->inline_failed = CIF_NON_CALL_EXCEPTIONS;
 286       inlinable = false;
 287     }
 288   /* Check compatibility of target optimization options.  */
 289   else if (!targetm.target_option.can_inline_p (e->caller->decl,
 290                                                 e->callee->decl))
 291     {
 292       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 293       inlinable = false;
 294     }
 295   /* Check if caller growth allows the inlining.  */
 296   else if (!DECL_DISREGARD_INLINE_LIMITS (e->callee->decl)
 297            && !lookup_attribute ("flatten",
 298                                  DECL_ATTRIBUTES
 299                                    (e->caller->global.inlined_to
 300                                     ? e->caller->global.inlined_to->decl
 301                                     : e->caller->decl))
 302            && !caller_growth_limits (e))
 303     inlinable = false;
 304   /* Don't inline a function with a higher optimization level than the
 305      caller.  FIXME: this is really just tip of iceberg of handling
 306      optimization attribute.  */
 307   else if (caller_tree != callee_tree)
 308     {
 309       struct cl_optimization *caller_opt
 310         = TREE_OPTIMIZATION ((caller_tree)
 311                              ? caller_tree
 312                              : optimization_default_node);
 313
 314       struct cl_optimization *callee_opt
 315         = TREE_OPTIMIZATION ((callee_tree)
 316                              ? callee_tree
 317                              : optimization_default_node);
 318
 319       if ((caller_opt->x_optimize > callee_opt->x_optimize)
 320           || (caller_opt->x_optimize_size != callee_opt->x_optimize_size))
 321         {
 322           e->inline_failed = CIF_TARGET_OPTIMIZATION_MISMATCH;
 323           inlinable = false;
 324         }
 325     }
 326
 327   /* Be sure that the cannot_inline_p flag is up to date.  */
 328   gcc_checking_assert (!e->call_stmt
 329                        || (gimple_call_cannot_inline_p (e->call_stmt)
 330                            == e->call_stmt_cannot_inline_p)
 331                        /* In -flto-partition=none mode we really keep things out of
 332                           sync because call_stmt_cannot_inline_p is set at cgraph
 333                           merging when function bodies are not there yet.  */
 334                        || (in_lto_p && !gimple_call_cannot_inline_p (e->call_stmt)));
 335   if (!inlinable && report)
 336     report_inline_failed_reason (e);
 337   return inlinable;
 338 }
 339
 340
 341 /* Return true if the edge E is inlinable during early inlining.  */
 342
 343 static bool
 344 can_early_inline_edge_p (struct cgraph_edge *e)
 345 {
 346   /* Early inliner might get called at WPA stage when IPA pass adds new
 347      function.  In this case we can not really do any of early inlining
 348      because function bodies are missing.  */
 349   if (!gimple_has_body_p (e->callee->decl))
 350     {
 351       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 352       return false;
 353     }
 354   /* In early inliner some of callees may not be in SSA form yet
 355      (i.e. the callgraph is cyclic and we did not process
 356      the callee by early inliner, yet).  We don't have CIF code for this
 357      case; later we will re-do the decision in the real inliner.  */
 358   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
 359       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->callee->decl)))
 360     {
 361       if (dump_file)
 362         fprintf (dump_file, "  edge not inlinable: not in SSA form\n");
 363       return false;
 364     }
 365   if (!can_inline_edge_p (e, true))
 366     return false;
 367   return true;
 368 }
 369
 370
 371 /* Return true when N is leaf function.  Accept cheap builtins
 372    in leaf functions.  */
 373
 374 static bool
 375 leaf_node_p (struct cgraph_node *n)
 376 {
 377   struct cgraph_edge *e;
 378   for (e = n->callees; e; e = e->next_callee)
 379     if (!is_inexpensive_builtin (e->callee->decl))
 380       return false;
 381   return true;
 382 }
 383
 384
 385 /* Return true if we are interested in inlining small function.  */
 386
 387 static bool
 388 want_early_inline_function_p (struct cgraph_edge *e)
 389 {
 390   bool want_inline = true;
 391
 392   if (DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
 393     ;
 394   else if (!DECL_DECLARED_INLINE_P (e->callee->decl)
 395            && !flag_inline_small_functions)
 396     {
 397       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 398       report_inline_failed_reason (e);
 399       want_inline = false;
 400     }
 401   else
 402     {
 403       int growth = estimate_edge_growth (e);
 404       if (growth <= 0)
 405         ;
 406       else if (!cgraph_maybe_hot_edge_p (e)
 407                && growth > 0)
 408         {
 409           if (dump_file)
 410             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 411                      "call is cold and code would grow by %i\n",
 412                      cgraph_node_name (e->caller), e->caller->uid,
 413                      cgraph_node_name (e->callee), e->callee->uid,
 414                      growth);
 415           want_inline = false;
 416         }
 417       else if (!leaf_node_p (e->callee)
 418                && growth > 0)
 419         {
 420           if (dump_file)
 421             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 422                      "callee is not leaf and code would grow by %i\n",
 423                      cgraph_node_name (e->caller), e->caller->uid,
 424                      cgraph_node_name (e->callee), e->callee->uid,
 425                      growth);
 426           want_inline = false;
 427         }
 428       else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 429         {
 430           if (dump_file)
 431             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 432                      "growth %i exceeds --param early-inlining-insns\n",
 433                      cgraph_node_name (e->caller), e->caller->uid,
 434                      cgraph_node_name (e->callee), e->callee->uid,
 435                      growth);
 436           want_inline = false;
 437         }
 438     }
 439   return want_inline;
 440 }
 441
 442 /* Return true if we are interested in inlining small function.
 443    When REPORT is true, report reason to dump file.  */
 444
 445 static bool
 446 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 447 {
 448   bool want_inline = true;
 449
 450   if (DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
 451     ;
 452   else if (!DECL_DECLARED_INLINE_P (e->callee->decl)
 453            && !flag_inline_small_functions)
 454     {
 455       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 456       want_inline = false;
 457     }
 458   else
 459     {
 460       int growth = estimate_edge_growth (e);
 461
 462       if (growth <= 0)
 463         ;
 464       else if (DECL_DECLARED_INLINE_P (e->callee->decl)
 465                && growth >= MAX_INLINE_INSNS_SINGLE)
 466         {
 467           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 468           want_inline = false;
 469         }
 470       else if (!DECL_DECLARED_INLINE_P (e->callee->decl)
 471                && !flag_inline_functions)
 472         {
 473           e->inline_failed = CIF_NOT_DECLARED_INLINED;
 474           want_inline = false;
 475         }
 476       else if (!DECL_DECLARED_INLINE_P (e->callee->decl)
 477                && growth >= MAX_INLINE_INSNS_AUTO)
 478         {
 479           e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 480           want_inline = false;
 481         }
 482       /* If call is cold, do not inline when function body would grow.
 483          Still inline when the overall unit size will shrink because the offline
 484          copy of function being eliminated.
 485
 486          This is slightly wrong on aggressive side:  it is entirely possible
 487          that function is called many times with a context where inlining
 488          reduces code size and few times with a context where inlining increase
 489          code size.  Resoluting growth estimate will be negative even if it
 490          would make more sense to keep offline copy and do not inline into the
 491          call sites that makes the code size grow.
 492
 493          When badness orders the calls in a way that code reducing calls come
 494          first, this situation is not a problem at all: after inlining all
 495          "good" calls, we will realize that keeping the function around is
 496          better.  */
 497       else if (!cgraph_maybe_hot_edge_p (e)
 498                && (DECL_EXTERNAL (e->callee->decl)
 499
 500                    /* Unlike for functions called once, we play unsafe with
 501                       COMDATs.  We can allow that since we know functions
 502                       in consideration are small (and thus risk is small) and
 503                       moreover grow estimates already accounts that COMDAT
 504                       functions may or may not disappear when eliminated from
 505                       current unit. With good probability making aggressive
 506                       choice in all units is going to make overall program
 507                       smaller.
 508
 509                       Consequently we ask cgraph_can_remove_if_no_direct_calls_p
 510                       instead of
 511                       cgraph_will_be_removed_from_program_if_no_direct_calls  */
 512
 513                    || !cgraph_can_remove_if_no_direct_calls_p (e->callee)
 514                    || estimate_growth (e->callee) > 0))
 515         {
 516           e->inline_failed = CIF_UNLIKELY_CALL;
 517           want_inline = false;
 518         }
 519     }
 520   if (!want_inline && report)
 521     report_inline_failed_reason (e);
 522   return want_inline;
 523 }
 524
 525 /* EDGE is self recursive edge.
 526    We hand two cases - when function A is inlining into itself
 527    or when function A is being inlined into another inliner copy of function
 528    A within function B.
 529
 530    In first case OUTER_NODE points to the toplevel copy of A, while
 531    in the second case OUTER_NODE points to the outermost copy of A in B.
 532
 533    In both cases we want to be extra selective since
 534    inlining the call will just introduce new recursive calls to appear.  */
 535
 536 static bool
 537 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 538                                    struct cgraph_node *outer_node,
 539                                    bool peeling,
 540                                    int depth)
 541 {
 542   char const *reason = NULL;
 543   bool want_inline = true;
 544   int caller_freq = CGRAPH_FREQ_BASE;
 545   int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
 546
 547   if (DECL_DECLARED_INLINE_P (edge->callee->decl))
 548     max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
 549
 550   if (!cgraph_maybe_hot_edge_p (edge))
 551     {
 552       reason = "recursive call is cold";
 553       want_inline = false;
 554     }
 555   else if (max_count && !outer_node->count)
 556     {
 557       reason = "not executed in profile";
 558       want_inline = false;
 559     }
 560   else if (depth > max_depth)
 561     {
 562       reason = "--param max-inline-recursive-depth exceeded.";
 563       want_inline = false;
 564     }
 565
 566   if (outer_node->global.inlined_to)
 567     caller_freq = outer_node->callers->frequency;
 568
 569   if (!want_inline)
 570     ;
 571   /* Inlining of self recursive function into copy of itself within other function
 572      is transformation similar to loop peeling.
 573
 574      Peeling is profitable if we can inline enough copies to make probability
 575      of actual call to the self recursive function very small.  Be sure that
 576      the probability of recursion is small.
 577
 578      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
 579      This way the expected number of recision is at most max_depth.  */
 580   else if (peeling)
 581     {
 582       int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
 583                                          / max_depth);
 584       int i;
 585       for (i = 1; i < depth; i++)
 586         max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
 587       if (max_count
 588           && (edge->count * CGRAPH_FREQ_BASE / outer_node->count
 589               >= max_prob))
 590         {
 591           reason = "profile of recursive call is too large";
 592           want_inline = false;
 593         }
 594       if (!max_count
 595           && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq
 596               >= max_prob))
 597         {
 598           reason = "frequency of recursive call is too large";
 599           want_inline = false;
 600         }
 601     }
 602   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
 603      depth is large.  We reduce function call overhead and increase chances that
 604      things fit in hardware return predictor.
 605
 606      Recursive inlining might however increase cost of stack frame setup
 607      actually slowing down functions whose recursion tree is wide rather than
 608      deep.
 609
 610      Deciding reliably on when to do recursive inlining without profile feedback
 611      is tricky.  For now we disable recursive inlining when probability of self
 612      recursion is low.
 613
 614      Recursive inlining of self recursive call within loop also results in large loop
 615      depths that generally optimize badly.  We may want to throttle down inlining
 616      in those cases.  In particular this seems to happen in one of libstdc++ rb tree
 617      methods.  */
 618   else
 619     {
 620       if (max_count
 621           && (edge->count * 100 / outer_node->count
 622               <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 623         {
 624           reason = "profile of recursive call is too small";
 625           want_inline = false;
 626         }
 627       else if (!max_count
 628                && (edge->frequency * 100 / caller_freq
 629                    <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 630         {
 631           reason = "frequency of recursive call is too small";
 632           want_inline = false;
 633         }
 634     }
 635   if (!want_inline && dump_file)
 636     fprintf (dump_file, "   not inlining recursively: %s\n", reason);
 637   return want_inline;
 638 }
 639
 640
 641 /* Decide if NODE is called once inlining it would eliminate need
 642    for the offline copy of function.  */
 643
 644 static bool
 645 want_inline_function_called_once_p (struct cgraph_node *node)
 646 {
 647    /* Already inlined?  */
 648    if (node->global.inlined_to)
 649      return false;
 650    /* Zero or more then one callers?  */
 651    if (!node->callers
 652        || node->callers->next_caller)
 653      return false;
 654    /* Recursive call makes no sense to inline.  */
 655    if (node->callers->caller == node)
 656      return false;
 657    /* External functions are not really in the unit, so inlining
 658       them when called once would just increase the program size.  */
 659    if (DECL_EXTERNAL (node->decl))
 660      return false;
 661    /* Offline body must be optimized out.  */
 662    if (!cgraph_will_be_removed_from_program_if_no_direct_calls (node))
 663      return false;
 664    if (!can_inline_edge_p (node->callers, true))
 665      return false;
 666    return true;
 667 }
 668
 669 /* A cost model driving the inlining heuristics in a way so the edges with
 670    smallest badness are inlined first.  After each inlining is performed
 671    the costs of all caller edges of nodes affected are recomputed so the
 672    metrics may accurately depend on values such as number of inlinable callers
 673    of the function or function body size.  */
 674
 675 static int
 676 edge_badness (struct cgraph_edge *edge, bool dump)
 677 {
 678   gcov_type badness;
 679   int growth, time_growth;
 680   struct inline_summary *callee_info = inline_summary (edge->callee);
 681
 682   if (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl))
 683     return INT_MIN;
 684
 685   growth = estimate_edge_growth (edge);
 686   time_growth = estimate_edge_time (edge);
 687
 688   if (dump)
 689     {
 690       fprintf (dump_file, "    Badness calculation for %s -> %s\n",
 691                cgraph_node_name (edge->caller),
 692                cgraph_node_name (edge->callee));
 693       fprintf (dump_file, "      growth size %i, time %i\n",
 694                growth,
 695                time_growth);
 696     }
 697
 698   /* Always prefer inlining saving code size.  */
 699   if (growth <= 0)
 700     {
 701       badness = INT_MIN - growth;
 702       if (dump)
 703         fprintf (dump_file, "      %i: Growth %i < 0\n", (int) badness,
 704                  growth);
 705     }
 706
 707   /* When profiling is available, base priorities -(#calls / growth).
 708      So we optimize for overall number of "executed" inlined calls.  */
 709   else if (max_count)
 710     {
 711       int benefitperc;
 712       benefitperc = (((gcov_type)callee_info->time
 713                      * edge->frequency / CGRAPH_FREQ_BASE - time_growth) * 100
 714                      / (callee_info->time + 1) + 1);
 715       benefitperc = MIN (benefitperc, 100);
 716       benefitperc = MAX (benefitperc, 0);
 717       badness =
 718         ((int)
 719          ((double) edge->count * INT_MIN / max_count / 100) *
 720          benefitperc) / growth;
 721
 722       /* Be sure that insanity of the profile won't lead to increasing counts
 723          in the scalling and thus to overflow in the computation above.  */
 724       gcc_assert (max_count >= edge->count);
 725       if (dump)
 726         {
 727           fprintf (dump_file,
 728                    "      %i (relative %f): profile info. Relative count %f"
 729                    " * Relative benefit %f\n",
 730                    (int) badness, (double) badness / INT_MIN,
 731                    (double) edge->count / max_count,
 732                    (double) benefitperc);
 733         }
 734     }
 735
 736   /* When function local profile is available, base priorities on
 737      growth / frequency, so we optimize for overall frequency of inlined
 738      calls.  This is not too accurate since while the call might be frequent
 739      within function, the function itself is infrequent.
 740
 741      Other objective to optimize for is number of different calls inlined.
 742      We add the estimated growth after inlining all functions to bias the
 743      priorities slightly in this direction (so fewer times called functions
 744      of the same size gets priority).  */
 745   else if (flag_guess_branch_prob)
 746     {
 747       int div = edge->frequency * 100 / CGRAPH_FREQ_BASE + 1;
 748       int benefitperc;
 749       int growth_for_all;
 750       badness = growth * 10000;
 751       benefitperc = (((gcov_type)callee_info->time
 752                      * edge->frequency / CGRAPH_FREQ_BASE - time_growth) * 100
 753                      / (callee_info->time + 1) + 1);
 754       benefitperc = MIN (benefitperc, 100);
 755       benefitperc = MAX (benefitperc, 0);
 756       div *= benefitperc;
 757
 758       /* Decrease badness if call is nested.  */
 759       /* Compress the range so we don't overflow.  */
 760       if (div > 10000)
 761         div = 10000 + ceil_log2 (div) - 8;
 762       if (div < 1)
 763         div = 1;
 764       if (badness > 0)
 765         badness /= div;
 766       growth_for_all = estimate_growth (edge->callee);
 767       badness += growth_for_all;
 768       if (badness > INT_MAX)
 769         badness = INT_MAX;
 770       if (dump)
 771         {
 772           fprintf (dump_file,
 773                    "      %i: guessed profile. frequency %i, overall growth %i,"
 774                    " benefit %i%%, divisor %i\n",
 775                    (int) badness, edge->frequency, growth_for_all,
 776                    benefitperc, div);
 777         }
 778     }
 779   /* When function local profile is not available or it does not give
 780      useful information (ie frequency is zero), base the cost on
 781      loop nest and overall size growth, so we optimize for overall number
 782      of functions fully inlined in program.  */
 783   else
 784     {
 785       int nest = MIN (inline_edge_summary (edge)->loop_depth, 8);
 786       badness = estimate_growth (edge->callee) * 256;
 787
 788       /* Decrease badness if call is nested.  */
 789       if (badness > 0)
 790         badness >>= nest;
 791       else
 792         {
 793           badness <<= nest;
 794         }
 795       if (dump)
 796         fprintf (dump_file, "      %i: no profile. nest %i\n", (int) badness,
 797                  nest);
 798     }
 799
 800   /* Ensure that we did not overflow in all the fixed point math above.  */
 801   gcc_assert (badness >= INT_MIN);
 802   gcc_assert (badness <= INT_MAX - 1);
 803   /* Make recursive inlining happen always after other inlining is done.  */
 804   if (cgraph_edge_recursive_p (edge))
 805     return badness + 1;
 806   else
 807     return badness;
 808 }
 809
 810 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
 811 static inline void
 812 update_edge_key (fibheap_t heap, struct cgraph_edge *edge)
 813 {
 814   int badness = edge_badness (edge, false);
 815   if (edge->aux)
 816     {
 817       fibnode_t n = (fibnode_t) edge->aux;
 818       gcc_checking_assert (n->data == edge);
 819
 820       /* fibheap_replace_key only decrease the keys.
 821          When we increase the key we do not update heap
 822          and instead re-insert the element once it becomes
 823          a minimum of heap.  */
 824       if (badness < n->key)
 825         {
 826           fibheap_replace_key (heap, n, badness);
 827           if (dump_file && (dump_flags & TDF_DETAILS))
 828             {
 829               fprintf (dump_file,
 830                        "  decreasing badness %s/%i -> %s/%i, %i to %i\n",
 831                        cgraph_node_name (edge->caller), edge->caller->uid,
 832                        cgraph_node_name (edge->callee), edge->callee->uid,
 833                        (int)n->key,
 834                        badness);
 835             }
 836           gcc_checking_assert (n->key == badness);
 837         }
 838     }
 839   else
 840     {
 841        if (dump_file && (dump_flags & TDF_DETAILS))
 842          {
 843            fprintf (dump_file,
 844                     "  enqueuing call %s/%i -> %s/%i, badness %i\n",
 845                     cgraph_node_name (edge->caller), edge->caller->uid,
 846                     cgraph_node_name (edge->callee), edge->callee->uid,
 847                     badness);
 848          }
 849       edge->aux = fibheap_insert (heap, badness, edge);
 850     }
 851 }
 852
 853
 854 /* NODE was inlined.
 855    All caller edges needs to be resetted because
 856    size estimates change. Similarly callees needs reset
 857    because better context may be known.  */
 858
 859 static void
 860 reset_edge_caches (struct cgraph_node *node)
 861 {
 862   struct cgraph_edge *edge;
 863   struct cgraph_edge *e = node->callees;
 864   struct cgraph_node *where = node;
 865
 866   if (where->global.inlined_to)
 867     where = where->global.inlined_to;
 868
 869   /* WHERE body size has changed, the cached growth is invalid.  */
 870   reset_node_growth_cache (where);
 871
 872   for (edge = where->callers; edge; edge = edge->next_caller)
 873     if (edge->inline_failed)
 874       reset_edge_growth_cache (edge);
 875
 876   if (!e)
 877     return;
 878
 879   while (true)
 880     if (!e->inline_failed && e->callee->callees)
 881       e = e->callee->callees;
 882     else
 883       {
 884         if (e->inline_failed)
 885           reset_edge_growth_cache (e);
 886         if (e->next_callee)
 887           e = e->next_callee;
 888         else
 889           {
 890             do
 891               {
 892                 if (e->caller == node)
 893                   return;
 894                 e = e->caller->callers;
 895               }
 896             while (!e->next_callee);
 897             e = e->next_callee;
 898           }
 899       }
 900 }
 901
 902 /* Recompute HEAP nodes for each of caller of NODE.
 903    UPDATED_NODES track nodes we already visited, to avoid redundant work.
 904    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
 905    it is inlinable. Otherwise check all edges.  */
 906
 907 static void
 908 update_caller_keys (fibheap_t heap, struct cgraph_node *node,
 909                     bitmap updated_nodes,
 910                     struct cgraph_edge *check_inlinablity_for)
 911 {
 912   struct cgraph_edge *edge;
 913
 914   if (!inline_summary (node)->inlinable
 915       || cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE
 916       || node->global.inlined_to)
 917     return;
 918   if (!bitmap_set_bit (updated_nodes, node->uid))
 919     return;
 920
 921   for (edge = node->callers; edge; edge = edge->next_caller)
 922     if (edge->inline_failed)
 923       {
 924         if (!check_inlinablity_for
 925             || check_inlinablity_for == edge)
 926           {
 927             if (can_inline_edge_p (edge, false)
 928                 && want_inline_small_function_p (edge, false))
 929               update_edge_key (heap, edge);
 930             else if (edge->aux)
 931               {
 932                 report_inline_failed_reason (edge);
 933                 fibheap_delete_node (heap, (fibnode_t) edge->aux);
 934                 edge->aux = NULL;
 935               }
 936           }
 937         else if (edge->aux)
 938           update_edge_key (heap, edge);
 939       }
 940 }
 941
 942 /* Recompute HEAP nodes for each uninlined call in NODE.
 943    This is used when we know that edge badnesses are going only to increase
 944    (we introduced new call site) and thus all we need is to insert newly
 945    created edges into heap.  */
 946
 947 static void
 948 update_callee_keys (fibheap_t heap, struct cgraph_node *node,
 949                     bitmap updated_nodes)
 950 {
 951   struct cgraph_edge *e = node->callees;
 952
 953   if (!e)
 954     return;
 955   while (true)
 956     if (!e->inline_failed && e->callee->callees)
 957       e = e->callee->callees;
 958     else
 959       {
 960         /* We do not reset callee growth cache here.  Since we added a new call,
 961            growth chould have just increased and consequentely badness metric
 962            don't need updating.  */
 963         if (e->inline_failed
 964             && inline_summary (e->callee)->inlinable
 965             && cgraph_function_body_availability (e->callee) >= AVAIL_AVAILABLE
 966             && !bitmap_bit_p (updated_nodes, e->callee->uid))
 967           {
 968             if (can_inline_edge_p (e, false)
 969                 && want_inline_small_function_p (e, false))
 970               update_edge_key (heap, e);
 971             else if (e->aux)
 972               {
 973                 report_inline_failed_reason (e);
 974                 fibheap_delete_node (heap, (fibnode_t) e->aux);
 975                 e->aux = NULL;
 976               }
 977           }
 978         if (e->next_callee)
 979           e = e->next_callee;
 980         else
 981           {
 982             do
 983               {
 984                 if (e->caller == node)
 985                   return;
 986                 e = e->caller->callers;
 987               }
 988             while (!e->next_callee);
 989             e = e->next_callee;
 990           }
 991       }
 992 }
 993
 994 /* Recompute heap nodes for each of caller edges of each of callees.
 995    Walk recursively into all inline clones.  */
 996
 997 static void
 998 update_all_callee_keys (fibheap_t heap, struct cgraph_node *node,
 999                         bitmap updated_nodes)
1000 {
1001   struct cgraph_edge *e = node->callees;
1002
1003   if (!e)
1004     return;
1005   while (true)
1006     if (!e->inline_failed && e->callee->callees)
1007       e = e->callee->callees;
1008     else
1009       {
1010         /* We inlined and thus callees might have different number of calls.
1011            Reset their caches  */
1012         reset_node_growth_cache (e->callee);
1013         if (e->inline_failed)
1014           update_caller_keys (heap, e->callee, updated_nodes, e);
1015         if (e->next_callee)
1016           e = e->next_callee;
1017         else
1018           {
1019             do
1020               {
1021                 if (e->caller == node)
1022                   return;
1023                 e = e->caller->callers;
1024               }
1025             while (!e->next_callee);
1026             e = e->next_callee;
1027           }
1028       }
1029 }
1030
1031 /* Enqueue all recursive calls from NODE into priority queue depending on
1032    how likely we want to recursively inline the call.  */
1033
1034 static void
1035 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1036                         fibheap_t heap)
1037 {
1038   struct cgraph_edge *e;
1039   for (e = where->callees; e; e = e->next_callee)
1040     if (e->callee == node)
1041       {
1042         /* When profile feedback is available, prioritize by expected number
1043            of calls.  */
1044         fibheap_insert (heap,
1045                         !max_count ? -e->frequency
1046                         : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
1047                         e);
1048       }
1049   for (e = where->callees; e; e = e->next_callee)
1050     if (!e->inline_failed)
1051       lookup_recursive_calls (node, e->callee, heap);
1052 }
1053
1054 /* Decide on recursive inlining: in the case function has recursive calls,
1055    inline until body size reaches given argument.  If any new indirect edges
1056    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1057    is NULL.  */
1058
1059 static bool
1060 recursive_inlining (struct cgraph_edge *edge,
1061                     VEC (cgraph_edge_p, heap) **new_edges)
1062 {
1063   int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1064   fibheap_t heap;
1065   struct cgraph_node *node;
1066   struct cgraph_edge *e;
1067   struct cgraph_node *master_clone = NULL, *next;
1068   int depth = 0;
1069   int n = 0;
1070
1071   node = edge->caller;
1072   if (node->global.inlined_to)
1073     node = node->global.inlined_to;
1074
1075   if (DECL_DECLARED_INLINE_P (node->decl))
1076     limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1077
1078   /* Make sure that function is small enough to be considered for inlining.  */
1079   if (estimate_size_after_inlining (node, edge)  >= limit)
1080     return false;
1081   heap = fibheap_new ();
1082   lookup_recursive_calls (node, node, heap);
1083   if (fibheap_empty (heap))
1084     {
1085       fibheap_delete (heap);
1086       return false;
1087     }
1088
1089   if (dump_file)
1090     fprintf (dump_file,
1091              "  Performing recursive inlining on %s\n",
1092              cgraph_node_name (node));
1093
1094   /* Do the inlining and update list of recursive call during process.  */
1095   while (!fibheap_empty (heap))
1096     {
1097       struct cgraph_edge *curr
1098         = (struct cgraph_edge *) fibheap_extract_min (heap);
1099       struct cgraph_node *cnode;
1100
1101       if (estimate_size_after_inlining (node, curr) > limit)
1102         break;
1103
1104       if (!can_inline_edge_p (curr, true))
1105         continue;
1106
1107       depth = 1;
1108       for (cnode = curr->caller;
1109            cnode->global.inlined_to; cnode = cnode->callers->caller)
1110         if (node->decl == curr->callee->decl)
1111           depth++;
1112
1113       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1114         continue;
1115
1116       if (dump_file)
1117         {
1118           fprintf (dump_file,
1119                    "   Inlining call of depth %i", depth);
1120           if (node->count)
1121             {
1122               fprintf (dump_file, " called approx. %.2f times per call",
1123                        (double)curr->count / node->count);
1124             }
1125           fprintf (dump_file, "\n");
1126         }
1127       if (!master_clone)
1128         {
1129           /* We need original clone to copy around.  */
1130           master_clone = cgraph_clone_node (node, node->decl,
1131                                             node->count, CGRAPH_FREQ_BASE,
1132                                             false, NULL);
1133           for (e = master_clone->callees; e; e = e->next_callee)
1134             if (!e->inline_failed)
1135               clone_inlined_nodes (e, true, false, NULL);
1136         }
1137
1138       cgraph_redirect_edge_callee (curr, master_clone);
1139       inline_call (curr, false, new_edges, &overall_size);
1140       lookup_recursive_calls (node, curr->callee, heap);
1141       n++;
1142     }
1143
1144   if (!fibheap_empty (heap) && dump_file)
1145     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1146   fibheap_delete (heap);
1147
1148   if (!master_clone)
1149     return false;
1150
1151   if (dump_file)
1152     fprintf (dump_file,
1153              "\n   Inlined %i times, "
1154              "body grown from size %i to %i, time %i to %i\n", n,
1155              inline_summary (master_clone)->size, inline_summary (node)->size,
1156              inline_summary (master_clone)->time, inline_summary (node)->time);
1157
1158   /* Remove master clone we used for inlining.  We rely that clones inlined
1159      into master clone gets queued just before master clone so we don't
1160      need recursion.  */
1161   for (node = cgraph_nodes; node != master_clone;
1162        node = next)
1163     {
1164       next = node->next;
1165       if (node->global.inlined_to == master_clone)
1166         cgraph_remove_node (node);
1167     }
1168   cgraph_remove_node (master_clone);
1169   return true;
1170 }
1171
1172
1173 /* Given whole compilation unit estimate of INSNS, compute how large we can
1174    allow the unit to grow.  */
1175
1176 static int
1177 compute_max_insns (int insns)
1178 {
1179   int max_insns = insns;
1180   if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1181     max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1182
1183   return ((HOST_WIDEST_INT) max_insns
1184           * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1185 }
1186
1187
1188 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1189
1190 static void
1191 add_new_edges_to_heap (fibheap_t heap, VEC (cgraph_edge_p, heap) *new_edges)
1192 {
1193   while (VEC_length (cgraph_edge_p, new_edges) > 0)
1194     {
1195       struct cgraph_edge *edge = VEC_pop (cgraph_edge_p, new_edges);
1196
1197       gcc_assert (!edge->aux);
1198       if (inline_summary (edge->callee)->inlinable
1199           && edge->inline_failed
1200           && can_inline_edge_p (edge, true)
1201           && want_inline_small_function_p (edge, true))
1202         edge->aux = fibheap_insert (heap, edge_badness (edge, false), edge);
1203     }
1204 }
1205
1206
1207 /* We use greedy algorithm for inlining of small functions:
1208    All inline candidates are put into prioritized heap ordered in
1209    increasing badness.
1210
1211    The inlining of small functions is bounded by unit growth parameters.  */
1212
1213 static void
1214 inline_small_functions (void)
1215 {
1216   struct cgraph_node *node;
1217   struct cgraph_edge *edge;
1218   fibheap_t heap = fibheap_new ();
1219   bitmap updated_nodes = BITMAP_ALLOC (NULL);
1220   int min_size, max_size;
1221   VEC (cgraph_edge_p, heap) *new_indirect_edges = NULL;
1222   int initial_size = 0;
1223
1224   if (flag_indirect_inlining)
1225     new_indirect_edges = VEC_alloc (cgraph_edge_p, heap, 8);
1226
1227   if (dump_file)
1228     fprintf (dump_file,
1229              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1230              initial_size);
1231
1232   /* Compute overall unit size and other global parameters used by badness
1233      metrics.  */
1234
1235   max_count = 0;
1236   initialize_growth_caches ();
1237
1238   FOR_EACH_DEFINED_FUNCTION (node)
1239     if (!node->global.inlined_to)
1240       {
1241         struct inline_summary *info = inline_summary (node);
1242
1243         if (!DECL_EXTERNAL (node->decl))
1244           initial_size += info->size;
1245
1246         for (edge = node->callers; edge; edge = edge->next_caller)
1247           if (max_count < edge->count)
1248             max_count = edge->count;
1249       }
1250
1251   overall_size = initial_size;
1252   max_size = compute_max_insns (overall_size);
1253   min_size = overall_size;
1254
1255   /* Populate the heeap with all edges we might inline.  */
1256
1257   FOR_EACH_DEFINED_FUNCTION (node)
1258     if (!node->global.inlined_to)
1259       {
1260         if (dump_file)
1261           fprintf (dump_file, "Enqueueing calls of %s/%i.\n",
1262                    cgraph_node_name (node), node->uid);
1263
1264         for (edge = node->callers; edge; edge = edge->next_caller)
1265           if (edge->inline_failed
1266               && can_inline_edge_p (edge, true)
1267               && want_inline_small_function_p (edge, true)
1268               && edge->inline_failed)
1269             {
1270               gcc_assert (!edge->aux);
1271               update_edge_key (heap, edge);
1272             }
1273       }
1274
1275   gcc_assert (in_lto_p
1276               || !max_count
1277               || (profile_info && flag_branch_probabilities));
1278
1279   while (!fibheap_empty (heap))
1280     {
1281       int old_size = overall_size;
1282       struct cgraph_node *where, *callee;
1283       int badness = fibheap_min_key (heap);
1284       int current_badness;
1285       int growth;
1286
1287       edge = (struct cgraph_edge *) fibheap_extract_min (heap);
1288       gcc_assert (edge->aux);
1289       edge->aux = NULL;
1290       if (!edge->inline_failed)
1291         continue;
1292
1293       /* Be sure that caches are maintained consistent.  */
1294 #ifdef ENABLE_CHECKING
1295       reset_edge_growth_cache (edge);
1296       reset_node_growth_cache (edge->callee);
1297 #endif
1298
1299       /* When updating the edge costs, we only decrease badness in the keys.
1300          Increases of badness are handled lazilly; when we see key with out
1301          of date value on it, we re-insert it now.  */
1302       current_badness = edge_badness (edge, false);
1303       gcc_assert (current_badness >= badness);
1304       if (current_badness != badness)
1305         {
1306           edge->aux = fibheap_insert (heap, current_badness, edge);
1307           continue;
1308         }
1309
1310       if (!can_inline_edge_p (edge, true))
1311         continue;
1312
1313       callee = edge->callee;
1314       growth = estimate_edge_growth (edge);
1315       if (dump_file)
1316         {
1317           fprintf (dump_file,
1318                    "\nConsidering %s with %i size\n",
1319                    cgraph_node_name (edge->callee),
1320                    inline_summary (edge->callee)->size);
1321           fprintf (dump_file,
1322                    " to be inlined into %s in %s:%i\n"
1323                    " Estimated growth after inlined into all is %+i insns.\n"
1324                    " Estimated badness is %i, frequency %.2f.\n",
1325                    cgraph_node_name (edge->caller),
1326                    flag_wpa ? "unknown"
1327                    : gimple_filename ((const_gimple) edge->call_stmt),
1328                    flag_wpa ? -1
1329                    : gimple_lineno ((const_gimple) edge->call_stmt),
1330                    estimate_growth (edge->callee),
1331                    badness,
1332                    edge->frequency / (double)CGRAPH_FREQ_BASE);
1333           if (edge->count)
1334             fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n",
1335                      edge->count);
1336           if (dump_flags & TDF_DETAILS)
1337             edge_badness (edge, true);
1338         }
1339
1340       if (overall_size + growth > max_size
1341           && !DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl))
1342         {
1343           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1344           report_inline_failed_reason (edge);
1345           continue;
1346         }
1347
1348       if (!want_inline_small_function_p (edge, true))
1349         continue;
1350
1351       /* Heuristics for inlining small functions works poorly for
1352          recursive calls where we do efect similar to loop unrolling.
1353          When inliing such edge seems profitable, leave decision on
1354          specific inliner.  */
1355       if (cgraph_edge_recursive_p (edge))
1356         {
1357           where = edge->caller;
1358           if (where->global.inlined_to)
1359             where = where->global.inlined_to;
1360           if (!recursive_inlining (edge,
1361                                    flag_indirect_inlining
1362                                    ? &new_indirect_edges : NULL))
1363             {
1364               edge->inline_failed = CIF_RECURSIVE_INLINING;
1365               continue;
1366             }
1367           reset_edge_caches (where);
1368           /* Recursive inliner inlines all recursive calls of the function
1369              at once. Consequently we need to update all callee keys.  */
1370           if (flag_indirect_inlining)
1371             add_new_edges_to_heap (heap, new_indirect_edges);
1372           update_all_callee_keys (heap, where, updated_nodes);
1373         }
1374       else
1375         {
1376           struct cgraph_node *callee;
1377           struct cgraph_node *outer_node = NULL;
1378           int depth = 0;
1379
1380           /* Consider the case where self recursive function A is inlined into B.
1381              This is desired optimization in some cases, since it leads to effect
1382              similar of loop peeling and we might completely optimize out the
1383              recursive call.  However we must be extra selective.  */
1384
1385           where = edge->caller;
1386           while (where->global.inlined_to)
1387             {
1388               if (where->decl == edge->callee->decl)
1389                 outer_node = where, depth++;
1390               where = where->callers->caller;
1391             }
1392           if (outer_node
1393               && !want_inline_self_recursive_call_p (edge, outer_node,
1394                                                      true, depth))
1395             {
1396               edge->inline_failed
1397                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
1398                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
1399               continue;
1400             }
1401           else if (depth && dump_file)
1402             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
1403
1404           callee = edge->callee;
1405           gcc_checking_assert (!callee->global.inlined_to);
1406           inline_call (edge, true, &new_indirect_edges, &overall_size);
1407           if (flag_indirect_inlining)
1408             add_new_edges_to_heap (heap, new_indirect_edges);
1409
1410           reset_edge_caches (edge->callee);
1411           reset_node_growth_cache (callee);
1412
1413           /* We inlined last offline copy to the body.  This might lead
1414              to callees of function having fewer call sites and thus they
1415              may need updating.  */
1416           if (callee->global.inlined_to)
1417             update_all_callee_keys (heap, callee, updated_nodes);
1418           else
1419             update_callee_keys (heap, edge->callee, updated_nodes);
1420         }
1421       where = edge->caller;
1422       if (where->global.inlined_to)
1423         where = where->global.inlined_to;
1424
1425       /* Our profitability metric can depend on local properties
1426          such as number of inlinable calls and size of the function body.
1427          After inlining these properties might change for the function we
1428          inlined into (since it's body size changed) and for the functions
1429          called by function we inlined (since number of it inlinable callers
1430          might change).  */
1431       update_caller_keys (heap, where, updated_nodes, NULL);
1432
1433       /* We removed one call of the function we just inlined.  If offline
1434          copy is still needed, be sure to update the keys.  */
1435       if (callee != where && !callee->global.inlined_to)
1436         update_caller_keys (heap, callee, updated_nodes, NULL);
1437       bitmap_clear (updated_nodes);
1438
1439       if (dump_file)
1440         {
1441           fprintf (dump_file,
1442                    " Inlined into %s which now has time %i and size %i,"
1443                    "net change of %+i.\n",
1444                    cgraph_node_name (edge->caller),
1445                    inline_summary (edge->caller)->time,
1446                    inline_summary (edge->caller)->size,
1447                    overall_size - old_size);
1448         }
1449       if (min_size > overall_size)
1450         {
1451           min_size = overall_size;
1452           max_size = compute_max_insns (min_size);
1453
1454           if (dump_file)
1455             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
1456         }
1457     }
1458
1459   free_growth_caches ();
1460   if (new_indirect_edges)
1461     VEC_free (cgraph_edge_p, heap, new_indirect_edges);
1462   fibheap_delete (heap);
1463   if (dump_file)
1464     fprintf (dump_file,
1465              "Unit growth for small function inlining: %i->%i (%i%%)\n",
1466              initial_size, overall_size,
1467              initial_size ? overall_size * 100 / (initial_size) - 100: 0);
1468   BITMAP_FREE (updated_nodes);
1469 }
1470
1471 /* Flatten NODE.  Performed both during early inlining and
1472    at IPA inlining time.  */
1473
1474 static void
1475 flatten_function (struct cgraph_node *node, bool early)
1476 {
1477   struct cgraph_edge *e;
1478
1479   /* We shouldn't be called recursively when we are being processed.  */
1480   gcc_assert (node->aux == NULL);
1481
1482   node->aux = (void *) node;
1483
1484   for (e = node->callees; e; e = e->next_callee)
1485     {
1486       struct cgraph_node *orig_callee;
1487
1488       /* We've hit cycle?  It is time to give up.  */
1489       if (e->callee->aux)
1490         {
1491           if (dump_file)
1492             fprintf (dump_file,
1493                      "Not inlining %s into %s to avoid cycle.\n",
1494                      cgraph_node_name (e->callee),
1495                      cgraph_node_name (e->caller));
1496           e->inline_failed = CIF_RECURSIVE_INLINING;
1497           continue;
1498         }
1499
1500       /* When the edge is already inlined, we just need to recurse into
1501          it in order to fully flatten the leaves.  */
1502       if (!e->inline_failed)
1503         {
1504           flatten_function (e->callee, early);
1505           continue;
1506         }
1507
1508       /* Flatten attribute needs to be processed during late inlining. For
1509          extra code quality we however do flattening during early optimization,
1510          too.  */
1511       if (!early
1512           ? !can_inline_edge_p (e, true)
1513           : !can_early_inline_edge_p (e))
1514         continue;
1515
1516       if (cgraph_edge_recursive_p (e))
1517         {
1518           if (dump_file)
1519             fprintf (dump_file, "Not inlining: recursive call.\n");
1520           continue;
1521         }
1522
1523       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
1524           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->callee->decl)))
1525         {
1526           if (dump_file)
1527             fprintf (dump_file, "Not inlining: SSA form does not match.\n");
1528           continue;
1529         }
1530
1531       /* Inline the edge and flatten the inline clone.  Avoid
1532          recursing through the original node if the node was cloned.  */
1533       if (dump_file)
1534         fprintf (dump_file, " Inlining %s into %s.\n",
1535                  cgraph_node_name (e->callee),
1536                  cgraph_node_name (e->caller));
1537       orig_callee = e->callee;
1538       inline_call (e, true, NULL, NULL);
1539       if (e->callee != orig_callee)
1540         orig_callee->aux = (void *) node;
1541       flatten_function (e->callee, early);
1542       if (e->callee != orig_callee)
1543         orig_callee->aux = NULL;
1544     }
1545
1546   node->aux = NULL;
1547 }
1548
1549 /* Decide on the inlining.  We do so in the topological order to avoid
1550    expenses on updating data structures.  */
1551
1552 static unsigned int
1553 ipa_inline (void)
1554 {
1555   struct cgraph_node *node;
1556   int nnodes;
1557   struct cgraph_node **order =
1558     XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
1559   int i;
1560
1561   if (in_lto_p && flag_indirect_inlining)
1562     ipa_update_after_lto_read ();
1563   if (flag_indirect_inlining)
1564     ipa_create_all_structures_for_iinln ();
1565
1566   if (dump_file)
1567     dump_inline_summaries (dump_file);
1568
1569   nnodes = ipa_reverse_postorder (order);
1570
1571   for (node = cgraph_nodes; node; node = node->next)
1572     node->aux = 0;
1573
1574   if (dump_file)
1575     fprintf (dump_file, "\nFlattening functions:\n");
1576
1577   /* In the first pass handle functions to be flattened.  Do this with
1578      a priority so none of our later choices will make this impossible.  */
1579   for (i = nnodes - 1; i >= 0; i--)
1580     {
1581       node = order[i];
1582
1583       /* Handle nodes to be flattened.
1584          Ideally when processing callees we stop inlining at the
1585          entry of cycles, possibly cloning that entry point and
1586          try to flatten itself turning it into a self-recursive
1587          function.  */
1588       if (lookup_attribute ("flatten",
1589                             DECL_ATTRIBUTES (node->decl)) != NULL)
1590         {
1591           if (dump_file)
1592             fprintf (dump_file,
1593                      "Flattening %s\n", cgraph_node_name (node));
1594           flatten_function (node, false);
1595         }
1596     }
1597
1598   inline_small_functions ();
1599   cgraph_remove_unreachable_nodes (true, dump_file);
1600   free (order);
1601
1602   /* We already perform some inlining of functions called once during
1603      inlining small functions above.  After unreachable nodes are removed,
1604      we still might do a quick check that nothing new is found.  */
1605   if (flag_inline_functions_called_once)
1606     {
1607       int cold;
1608       if (dump_file)
1609         fprintf (dump_file, "\nDeciding on functions called once:\n");
1610
1611       /* Inlining one function called once has good chance of preventing
1612          inlining other function into the same callee.  Ideally we should
1613          work in priority order, but probably inlining hot functions first
1614          is good cut without the extra pain of maintaining the queue.
1615
1616          ??? this is not really fitting the bill perfectly: inlining function
1617          into callee often leads to better optimization of callee due to
1618          increased context for optimization.
1619          For example if main() function calls a function that outputs help
1620          and then function that does the main optmization, we should inline
1621          the second with priority even if both calls are cold by themselves.
1622
1623          We probably want to implement new predicate replacing our use of
1624          maybe_hot_edge interpreted as maybe_hot_edge || callee is known
1625          to be hot.  */
1626       for (cold = 0; cold <= 1; cold ++)
1627         {
1628           for (node = cgraph_nodes; node; node = node->next)
1629             {
1630               if (want_inline_function_called_once_p (node)
1631                   && (cold
1632                       || cgraph_maybe_hot_edge_p (node->callers)))
1633                 {
1634                   struct cgraph_node *caller = node->callers->caller;
1635
1636                   if (dump_file)
1637                     {
1638                       fprintf (dump_file,
1639                                "\nInlining %s size %i.\n",
1640                                cgraph_node_name (node), inline_summary (node)->size);
1641                       fprintf (dump_file,
1642                                " Called once from %s %i insns.\n",
1643                                cgraph_node_name (node->callers->caller),
1644                                inline_summary (node->callers->caller)->size);
1645                     }
1646
1647                   inline_call (node->callers, true, NULL, NULL);
1648                   if (dump_file)
1649                     fprintf (dump_file,
1650                              " Inlined into %s which now has %i size\n",
1651                              cgraph_node_name (caller),
1652                              inline_summary (caller)->size);
1653                 }
1654             }
1655         }
1656     }
1657
1658   /* Free ipa-prop structures if they are no longer needed.  */
1659   if (flag_indirect_inlining)
1660     ipa_free_all_structures_after_iinln ();
1661
1662   if (dump_file)
1663     fprintf (dump_file,
1664              "\nInlined %i calls, eliminated %i functions\n\n",
1665              ncalls_inlined, nfunctions_inlined);
1666
1667   if (dump_file)
1668     dump_inline_summaries (dump_file);
1669   /* In WPA we use inline summaries for partitioning process.  */
1670   if (!flag_wpa)
1671     inline_free_summary ();
1672   return 0;
1673 }
1674
1675 /* Inline always-inline function calls in NODE.  */
1676
1677 static bool
1678 inline_always_inline_functions (struct cgraph_node *node)
1679 {
1680   struct cgraph_edge *e;
1681   bool inlined = false;
1682
1683   for (e = node->callees; e; e = e->next_callee)
1684     {
1685       if (!DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
1686         continue;
1687
1688       if (cgraph_edge_recursive_p (e))
1689         {
1690           if (dump_file)
1691             fprintf (dump_file, "  Not inlining recursive call to %s.\n",
1692                      cgraph_node_name (e->callee));
1693           e->inline_failed = CIF_RECURSIVE_INLINING;
1694           continue;
1695         }
1696
1697       if (!can_early_inline_edge_p (e))
1698         continue;
1699
1700       if (dump_file)
1701         fprintf (dump_file, "  Inlining %s into %s (always_inline).\n",
1702                  cgraph_node_name (e->callee),
1703                  cgraph_node_name (e->caller));
1704       inline_call (e, true, NULL, NULL);
1705       inlined = true;
1706     }
1707
1708   return inlined;
1709 }
1710
1711 /* Decide on the inlining.  We do so in the topological order to avoid
1712    expenses on updating data structures.  */
1713
1714 static bool
1715 early_inline_small_functions (struct cgraph_node *node)
1716 {
1717   struct cgraph_edge *e;
1718   bool inlined = false;
1719
1720   for (e = node->callees; e; e = e->next_callee)
1721     {
1722       if (!inline_summary (e->callee)->inlinable
1723           || !e->inline_failed)
1724         continue;
1725
1726       /* Do not consider functions not declared inline.  */
1727       if (!DECL_DECLARED_INLINE_P (e->callee->decl)
1728           && !flag_inline_small_functions
1729           && !flag_inline_functions)
1730         continue;
1731
1732       if (dump_file)
1733         fprintf (dump_file, "Considering inline candidate %s.\n",
1734                  cgraph_node_name (e->callee));
1735
1736       if (!can_early_inline_edge_p (e))
1737         continue;
1738
1739       if (cgraph_edge_recursive_p (e))
1740         {
1741           if (dump_file)
1742             fprintf (dump_file, "  Not inlining: recursive call.\n");
1743           continue;
1744         }
1745
1746       if (!want_early_inline_function_p (e))
1747         continue;
1748
1749       if (dump_file)
1750         fprintf (dump_file, " Inlining %s into %s.\n",
1751                  cgraph_node_name (e->callee),
1752                  cgraph_node_name (e->caller));
1753       inline_call (e, true, NULL, NULL);
1754       inlined = true;
1755     }
1756
1757   return inlined;
1758 }
1759
1760 /* Do inlining of small functions.  Doing so early helps profiling and other
1761    passes to be somewhat more effective and avoids some code duplication in
1762    later real inlining pass for testcases with very many function calls.  */
1763 static unsigned int
1764 early_inliner (void)
1765 {
1766   struct cgraph_node *node = cgraph_get_node (current_function_decl);
1767   struct cgraph_edge *edge;
1768   unsigned int todo = 0;
1769   int iterations = 0;
1770   bool inlined = false;
1771
1772   if (seen_error ())
1773     return 0;
1774
1775   /* Do nothing if datastructures for ipa-inliner are already computed.  This
1776      happens when some pass decides to construct new function and
1777      cgraph_add_new_function calls lowering passes and early optimization on
1778      it.  This may confuse ourself when early inliner decide to inline call to
1779      function clone, because function clones don't have parameter list in
1780      ipa-prop matching their signature.  */
1781   if (ipa_node_params_vector)
1782     return 0;
1783
1784 #ifdef ENABLE_CHECKING
1785   verify_cgraph_node (node);
1786 #endif
1787
1788   /* Even when not optimizing or not inlining inline always-inline
1789      functions.  */
1790   inlined = inline_always_inline_functions (node);
1791
1792   if (!optimize
1793       || flag_no_inline
1794       || !flag_early_inlining
1795       /* Never inline regular functions into always-inline functions
1796          during incremental inlining.  This sucks as functions calling
1797          always inline functions will get less optimized, but at the
1798          same time inlining of functions calling always inline
1799          function into an always inline function might introduce
1800          cycles of edges to be always inlined in the callgraph.
1801
1802          We might want to be smarter and just avoid this type of inlining.  */
1803       || DECL_DISREGARD_INLINE_LIMITS (node->decl))
1804     ;
1805   else if (lookup_attribute ("flatten",
1806                              DECL_ATTRIBUTES (node->decl)) != NULL)
1807     {
1808       /* When the function is marked to be flattened, recursively inline
1809          all calls in it.  */
1810       if (dump_file)
1811         fprintf (dump_file,
1812                  "Flattening %s\n", cgraph_node_name (node));
1813       flatten_function (node, true);
1814       inlined = true;
1815     }
1816   else
1817     {
1818       /* We iterate incremental inlining to get trivial cases of indirect
1819          inlining.  */
1820       while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
1821              && early_inline_small_functions (node))
1822         {
1823           timevar_push (TV_INTEGRATION);
1824           todo |= optimize_inline_calls (current_function_decl);
1825
1826           /* Technically we ought to recompute inline parameters so the new
1827              iteration of early inliner works as expected.  We however have
1828              values approximately right and thus we only need to update edge
1829              info that might be cleared out for newly discovered edges.  */
1830           for (edge = node->callees; edge; edge = edge->next_callee)
1831             {
1832               struct inline_edge_summary *es = inline_edge_summary (edge);
1833               es->call_stmt_size
1834                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
1835               es->call_stmt_time
1836                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
1837             }
1838           timevar_pop (TV_INTEGRATION);
1839           iterations++;
1840           inlined = false;
1841         }
1842       if (dump_file)
1843         fprintf (dump_file, "Iterations: %i\n", iterations);
1844     }
1845
1846   if (inlined)
1847     {
1848       timevar_push (TV_INTEGRATION);
1849       todo |= optimize_inline_calls (current_function_decl);
1850       timevar_pop (TV_INTEGRATION);
1851     }
1852
1853   cfun->always_inline_functions_inlined = true;
1854
1855   return todo;
1856 }
1857
1858 struct gimple_opt_pass pass_early_inline =
1859 {
1860  {
1861   GIMPLE_PASS,
1862   "einline",                            /* name */
1863   NULL,                                 /* gate */
1864   early_inliner,                        /* execute */
1865   NULL,                                 /* sub */
1866   NULL,                                 /* next */
1867   0,                                    /* static_pass_number */
1868   TV_INLINE_HEURISTICS,                 /* tv_id */
1869   PROP_ssa,                             /* properties_required */
1870   0,                                    /* properties_provided */
1871   0,                                    /* properties_destroyed */
1872   0,                                    /* todo_flags_start */
1873   TODO_dump_func                        /* todo_flags_finish */
1874  }
1875 };
1876
1877
1878 /* When to run IPA inlining.  Inlining of always-inline functions
1879    happens during early inlining.  */
1880
1881 static bool
1882 gate_ipa_inline (void)
1883 {
1884   /* ???  We'd like to skip this if not optimizing or not inlining as
1885      all always-inline functions have been processed by early
1886      inlining already.  But this at least breaks EH with C++ as
1887      we need to unconditionally run fixup_cfg even at -O0.
1888      So leave it on unconditionally for now.  */
1889   return 1;
1890 }
1891
1892 struct ipa_opt_pass_d pass_ipa_inline =
1893 {
1894  {
1895   IPA_PASS,
1896   "inline",                             /* name */
1897   gate_ipa_inline,                      /* gate */
1898   ipa_inline,                           /* execute */
1899   NULL,                                 /* sub */
1900   NULL,                                 /* next */
1901   0,                                    /* static_pass_number */
1902   TV_INLINE_HEURISTICS,                 /* tv_id */
1903   0,                                    /* properties_required */
1904   0,                                    /* properties_provided */
1905   0,                                    /* properties_destroyed */
1906   TODO_remove_functions,                /* todo_flags_finish */
1907   TODO_dump_cgraph | TODO_dump_func
1908   | TODO_remove_functions | TODO_ggc_collect    /* todo_flags_finish */
1909  },
1910  inline_generate_summary,               /* generate_summary */
1911  inline_write_summary,                  /* write_summary */
1912  inline_read_summary,                   /* read_summary */
1913  NULL,                                  /* write_optimization_summary */
1914  NULL,                                  /* read_optimization_summary */
1915  NULL,                                  /* stmt_fixup */
1916  0,                                     /* TODOs */
1917  inline_transform,                      /* function_transform */
1918  NULL,                                  /* variable_transform */
1919 };