gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003-2020 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /*  Inlining decision heuristics
  22
  23     The implementation of inliner is organized as follows:
  24
  25     inlining heuristics limits
  26
  27       can_inline_edge_p allow to check that particular inlining is allowed
  28       by the limits specified by user (allowed function growth, growth and so
  29       on).
  30
  31       Functions are inlined when it is obvious the result is profitable (such
  32       as functions called once or when inlining reduce code size).
  33       In addition to that we perform inlining of small functions and recursive
  34       inlining.
  35
  36     inlining heuristics
  37
  38        The inliner itself is split into two passes:
  39
  40        pass_early_inlining
  41
  42          Simple local inlining pass inlining callees into current function.
  43          This pass makes no use of whole unit analysis and thus it can do only
  44          very simple decisions based on local properties.
  45
  46          The strength of the pass is that it is run in topological order
  47          (reverse postorder) on the callgraph. Functions are converted into SSA
  48          form just before this pass and optimized subsequently. As a result, the
  49          callees of the function seen by the early inliner was already optimized
  50          and results of early inlining adds a lot of optimization opportunities
  51          for the local optimization.
  52
  53          The pass handle the obvious inlining decisions within the compilation
  54          unit - inlining auto inline functions, inlining for size and
  55          flattening.
  56
  57          main strength of the pass is the ability to eliminate abstraction
  58          penalty in C++ code (via combination of inlining and early
  59          optimization) and thus improve quality of analysis done by real IPA
  60          optimizers.
  61
  62          Because of lack of whole unit knowledge, the pass cannot really make
  63          good code size/performance tradeoffs.  It however does very simple
  64          speculative inlining allowing code size to grow by
  65          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  66          optimizations performed later are very likely to eliminate the cost.
  67
  68        pass_ipa_inline
  69
  70          This is the real inliner able to handle inlining with whole program
  71          knowledge. It performs following steps:
  72
  73          1) inlining of small functions.  This is implemented by greedy
  74          algorithm ordering all inlinable cgraph edges by their badness and
  75          inlining them in this order as long as inline limits allows doing so.
  76
  77          This heuristics is not very good on inlining recursive calls. Recursive
  78          calls can be inlined with results similar to loop unrolling. To do so,
  79          special purpose recursive inliner is executed on function when
  80          recursive edge is met as viable candidate.
  81
  82          2) Unreachable functions are removed from callgraph.  Inlining leads
  83          to devirtualization and other modification of callgraph so functions
  84          may become unreachable during the process. Also functions declared as
  85          extern inline or virtual functions are removed, since after inlining
  86          we no longer need the offline bodies.
  87
  88          3) Functions called once and not exported from the unit are inlined.
  89          This should almost always lead to reduction of code size by eliminating
  90          the need for offline copy of the function.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "backend.h"
  96 #include "target.h"
  97 #include "rtl.h"
  98 #include "tree.h"
  99 #include "gimple.h"
 100 #include "alloc-pool.h"
 101 #include "tree-pass.h"
 102 #include "gimple-ssa.h"
 103 #include "cgraph.h"
 104 #include "lto-streamer.h"
 105 #include "trans-mem.h"
 106 #include "calls.h"
 107 #include "tree-inline.h"
 108 #include "profile.h"
 109 #include "symbol-summary.h"
 110 #include "tree-vrp.h"
 111 #include "ipa-prop.h"
 112 #include "ipa-fnsummary.h"
 113 #include "ipa-inline.h"
 114 #include "ipa-utils.h"
 115 #include "sreal.h"
 116 #include "auto-profile.h"
 117 #include "builtins.h"
 118 #include "fibonacci_heap.h"
 119 #include "stringpool.h"
 120 #include "attribs.h"
 121 #include "asan.h"
 122
 123 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
 124 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
 125
 126 /* Statistics we collect about inlining algorithm.  */
 127 static int overall_size;
 128 static profile_count max_count;
 129 static profile_count spec_rem;
 130
 131 /* Return false when inlining edge E would lead to violating
 132    limits on function unit growth or stack usage growth.
 133
 134    The relative function body growth limit is present generally
 135    to avoid problems with non-linear behavior of the compiler.
 136    To allow inlining huge functions into tiny wrapper, the limit
 137    is always based on the bigger of the two functions considered.
 138
 139    For stack growth limits we always base the growth in stack usage
 140    of the callers.  We want to prevent applications from segfaulting
 141    on stack overflow when functions with huge stack frames gets
 142    inlined. */
 143
 144 static bool
 145 caller_growth_limits (struct cgraph_edge *e)
 146 {
 147   struct cgraph_node *to = e->caller;
 148   struct cgraph_node *what = e->callee->ultimate_alias_target ();
 149   int newsize;
 150   int limit = 0;
 151   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 152   ipa_size_summary *outer_info = ipa_size_summaries->get (to);
 153
 154   /* Look for function e->caller is inlined to.  While doing
 155      so work out the largest function body on the way.  As
 156      described above, we want to base our function growth
 157      limits based on that.  Not on the self size of the
 158      outer function, not on the self size of inline code
 159      we immediately inline to.  This is the most relaxed
 160      interpretation of the rule "do not grow large functions
 161      too much in order to prevent compiler from exploding".  */
 162   while (true)
 163     {
 164       ipa_size_summary *size_info = ipa_size_summaries->get (to);
 165       if (limit < size_info->self_size)
 166         limit = size_info->self_size;
 167       if (stack_size_limit < size_info->estimated_self_stack_size)
 168         stack_size_limit = size_info->estimated_self_stack_size;
 169       if (to->inlined_to)
 170         to = to->callers->caller;
 171       else
 172         break;
 173     }
 174
 175   ipa_fn_summary *what_info = ipa_fn_summaries->get (what);
 176   ipa_size_summary *what_size_info = ipa_size_summaries->get (what);
 177
 178   if (limit < what_size_info->self_size)
 179     limit = what_size_info->self_size;
 180
 181   limit += limit * opt_for_fn (to->decl, param_large_function_growth) / 100;
 182
 183   /* Check the size after inlining against the function limits.  But allow
 184      the function to shrink if it went over the limits by forced inlining.  */
 185   newsize = estimate_size_after_inlining (to, e);
 186   if (newsize >= ipa_size_summaries->get (what)->size
 187       && newsize > opt_for_fn (to->decl, param_large_function_insns)
 188       && newsize > limit)
 189     {
 190       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 191       return false;
 192     }
 193
 194   if (!what_info->estimated_stack_size)
 195     return true;
 196
 197   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 198      due to large i/o datastructures used by the Fortran front-end.
 199      We ought to ignore this limit when we know that the edge is executed
 200      on every invocation of the caller (i.e. its call statement dominates
 201      exit block).  We do not track this information, yet.  */
 202   stack_size_limit += ((gcov_type)stack_size_limit
 203                        * opt_for_fn (to->decl, param_stack_frame_growth)
 204                        / 100);
 205
 206   inlined_stack = (ipa_get_stack_frame_offset (to)
 207                    + outer_info->estimated_self_stack_size
 208                    + what_info->estimated_stack_size);
 209   /* Check new stack consumption with stack consumption at the place
 210      stack is used.  */
 211   if (inlined_stack > stack_size_limit
 212       /* If function already has large stack usage from sibling
 213          inline call, we can inline, too.
 214          This bit overoptimistically assume that we are good at stack
 215          packing.  */
 216       && inlined_stack > ipa_fn_summaries->get (to)->estimated_stack_size
 217       && inlined_stack > opt_for_fn (to->decl, param_large_stack_frame))
 218     {
 219       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 220       return false;
 221     }
 222   return true;
 223 }
 224
 225 /* Dump info about why inlining has failed.  */
 226
 227 static void
 228 report_inline_failed_reason (struct cgraph_edge *e)
 229 {
 230   if (dump_enabled_p ())
 231     {
 232       dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 233                        "  not inlinable: %C -> %C, %s\n",
 234                        e->caller, e->callee,
 235                        cgraph_inline_failed_string (e->inline_failed));
 236       if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
 237            || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 238           && e->caller->lto_file_data
 239           && e->callee->ultimate_alias_target ()->lto_file_data)
 240         {
 241           dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 242                            "  LTO objects: %s, %s\n",
 243                            e->caller->lto_file_data->file_name,
 244                            e->callee->ultimate_alias_target ()->lto_file_data->file_name);
 245         }
 246       if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
 247         if (dump_file)
 248           cl_target_option_print_diff
 249             (dump_file, 2, target_opts_for_fn (e->caller->decl),
 250              target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 251       if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 252         if (dump_file)
 253           cl_optimization_print_diff
 254             (dump_file, 2, opts_for_fn (e->caller->decl),
 255              opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 256     }
 257 }
 258
 259  /* Decide whether sanitizer-related attributes allow inlining. */
 260
 261 static bool
 262 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
 263 {
 264   if (!caller || !callee)
 265     return true;
 266
 267   /* Follow clang and allow inlining for always_inline functions.  */
 268   if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)))
 269     return true;
 270
 271   const sanitize_code codes[] =
 272     {
 273       SANITIZE_ADDRESS,
 274       SANITIZE_THREAD,
 275       SANITIZE_UNDEFINED,
 276       SANITIZE_UNDEFINED_NONDEFAULT,
 277       SANITIZE_POINTER_COMPARE,
 278       SANITIZE_POINTER_SUBTRACT
 279     };
 280
 281   for (unsigned i = 0; i < sizeof (codes) / sizeof (codes[0]); i++)
 282     if (sanitize_flags_p (codes[i], caller)
 283         != sanitize_flags_p (codes[i], callee))
 284       return false;
 285
 286   return true;
 287 }
 288
 289 /* Used for flags where it is safe to inline when caller's value is
 290    grater than callee's.  */
 291 #define check_maybe_up(flag) \
 292       (opts_for_fn (caller->decl)->x_##flag             \
 293        != opts_for_fn (callee->decl)->x_##flag          \
 294        && (!always_inline                               \
 295            || opts_for_fn (caller->decl)->x_##flag      \
 296               < opts_for_fn (callee->decl)->x_##flag))
 297 /* Used for flags where it is safe to inline when caller's value is
 298    smaller than callee's.  */
 299 #define check_maybe_down(flag) \
 300       (opts_for_fn (caller->decl)->x_##flag             \
 301        != opts_for_fn (callee->decl)->x_##flag          \
 302        && (!always_inline                               \
 303            || opts_for_fn (caller->decl)->x_##flag      \
 304               > opts_for_fn (callee->decl)->x_##flag))
 305 /* Used for flags where exact match is needed for correctness.  */
 306 #define check_match(flag) \
 307       (opts_for_fn (caller->decl)->x_##flag             \
 308        != opts_for_fn (callee->decl)->x_##flag)
 309
 310 /* Decide if we can inline the edge and possibly update
 311    inline_failed reason.
 312    We check whether inlining is possible at all and whether
 313    caller growth limits allow doing so.
 314
 315    if REPORT is true, output reason to the dump file. */
 316
 317 static bool
 318 can_inline_edge_p (struct cgraph_edge *e, bool report,
 319                    bool early = false)
 320 {
 321   gcc_checking_assert (e->inline_failed);
 322
 323   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 324     {
 325       if (report)
 326         report_inline_failed_reason (e);
 327       return false;
 328     }
 329
 330   bool inlinable = true;
 331   enum availability avail;
 332   cgraph_node *caller = (e->caller->inlined_to
 333                          ? e->caller->inlined_to : e->caller);
 334   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 335
 336   if (!callee->definition)
 337     {
 338       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 339       inlinable = false;
 340     }
 341   if (!early && (!opt_for_fn (callee->decl, optimize)
 342                  || !opt_for_fn (caller->decl, optimize)))
 343     {
 344       e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
 345       inlinable = false;
 346     }
 347   else if (callee->calls_comdat_local)
 348     {
 349       e->inline_failed = CIF_USES_COMDAT_LOCAL;
 350       inlinable = false;
 351     }
 352   else if (avail <= AVAIL_INTERPOSABLE)
 353     {
 354       e->inline_failed = CIF_OVERWRITABLE;
 355       inlinable = false;
 356     }
 357   /* All edges with call_stmt_cannot_inline_p should have inline_failed
 358      initialized to one of FINAL_ERROR reasons.  */
 359   else if (e->call_stmt_cannot_inline_p)
 360     gcc_unreachable ();
 361   /* Don't inline if the functions have different EH personalities.  */
 362   else if (DECL_FUNCTION_PERSONALITY (caller->decl)
 363            && DECL_FUNCTION_PERSONALITY (callee->decl)
 364            && (DECL_FUNCTION_PERSONALITY (caller->decl)
 365                != DECL_FUNCTION_PERSONALITY (callee->decl)))
 366     {
 367       e->inline_failed = CIF_EH_PERSONALITY;
 368       inlinable = false;
 369     }
 370   /* TM pure functions should not be inlined into non-TM_pure
 371      functions.  */
 372   else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
 373     {
 374       e->inline_failed = CIF_UNSPECIFIED;
 375       inlinable = false;
 376     }
 377   /* Check compatibility of target optimization options.  */
 378   else if (!targetm.target_option.can_inline_p (caller->decl,
 379                                                 callee->decl))
 380     {
 381       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 382       inlinable = false;
 383     }
 384   else if (ipa_fn_summaries->get (callee) == NULL
 385            || !ipa_fn_summaries->get (callee)->inlinable)
 386     {
 387       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 388       inlinable = false;
 389     }
 390   /* Don't inline a function with mismatched sanitization attributes. */
 391   else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
 392     {
 393       e->inline_failed = CIF_SANITIZE_ATTRIBUTE_MISMATCH;
 394       inlinable = false;
 395     }
 396   if (!inlinable && report)
 397     report_inline_failed_reason (e);
 398   return inlinable;
 399 }
 400
 401 /* Return inlining_insns_single limit for function N. If HINT is true
 402    scale up the bound.  */
 403
 404 static int
 405 inline_insns_single (cgraph_node *n, bool hint)
 406 {
 407   if (hint)
 408     return opt_for_fn (n->decl, param_max_inline_insns_single)
 409            * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
 410   return opt_for_fn (n->decl, param_max_inline_insns_single);
 411 }
 412
 413 /* Return inlining_insns_auto limit for function N. If HINT is true
 414    scale up the bound.   */
 415
 416 static int
 417 inline_insns_auto (cgraph_node *n, bool hint)
 418 {
 419   int max_inline_insns_auto = opt_for_fn (n->decl, param_max_inline_insns_auto);
 420   if (hint)
 421     return max_inline_insns_auto
 422            * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
 423   return max_inline_insns_auto;
 424 }
 425
 426 /* Decide if we can inline the edge and possibly update
 427    inline_failed reason.
 428    We check whether inlining is possible at all and whether
 429    caller growth limits allow doing so.
 430
 431    if REPORT is true, output reason to the dump file.
 432
 433    if DISREGARD_LIMITS is true, ignore size limits.  */
 434
 435 static bool
 436 can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report,
 437                              bool disregard_limits = false, bool early = false)
 438 {
 439   gcc_checking_assert (e->inline_failed);
 440
 441   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 442     {
 443       if (report)
 444         report_inline_failed_reason (e);
 445       return false;
 446     }
 447
 448   bool inlinable = true;
 449   enum availability avail;
 450   cgraph_node *caller = (e->caller->inlined_to
 451                          ? e->caller->inlined_to : e->caller);
 452   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 453   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
 454   tree callee_tree
 455     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
 456   /* Check if caller growth allows the inlining.  */
 457   if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 458       && !disregard_limits
 459       && !lookup_attribute ("flatten",
 460                  DECL_ATTRIBUTES (caller->decl))
 461       && !caller_growth_limits (e))
 462     inlinable = false;
 463   else if (callee->externally_visible
 464            && !DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 465            && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
 466     {
 467       e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
 468       inlinable = false;
 469     }
 470   /* Don't inline a function with a higher optimization level than the
 471      caller.  FIXME: this is really just tip of iceberg of handling
 472      optimization attribute.  */
 473   else if (caller_tree != callee_tree)
 474     {
 475       bool always_inline =
 476              (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 477               && lookup_attribute ("always_inline",
 478                                    DECL_ATTRIBUTES (callee->decl)));
 479       ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
 480       ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
 481
 482      /* Until GCC 4.9 we did not check the semantics-altering flags
 483         below and inlined across optimization boundaries.
 484         Enabling checks below breaks several packages by refusing
 485         to inline library always_inline functions. See PR65873.
 486         Disable the check for early inlining for now until better solution
 487         is found.  */
 488      if (always_inline && early)
 489         ;
 490       /* There are some options that change IL semantics which means
 491          we cannot inline in these cases for correctness reason.
 492          Not even for always_inline declared functions.  */
 493      else if (check_match (flag_wrapv)
 494               || check_match (flag_trapv)
 495               || check_match (flag_pcc_struct_return)
 496               || check_maybe_down (optimize_debug)
 497               /* When caller or callee does FP math, be sure FP codegen flags
 498                  compatible.  */
 499               || ((caller_info->fp_expressions && callee_info->fp_expressions)
 500                   && (check_maybe_up (flag_rounding_math)
 501                       || check_maybe_up (flag_trapping_math)
 502                       || check_maybe_down (flag_unsafe_math_optimizations)
 503                       || check_maybe_down (flag_finite_math_only)
 504                       || check_maybe_up (flag_signaling_nans)
 505                       || check_maybe_down (flag_cx_limited_range)
 506                       || check_maybe_up (flag_signed_zeros)
 507                       || check_maybe_down (flag_associative_math)
 508                       || check_maybe_down (flag_reciprocal_math)
 509                       || check_maybe_down (flag_fp_int_builtin_inexact)
 510                       /* Strictly speaking only when the callee contains function
 511                          calls that may end up setting errno.  */
 512                       || check_maybe_up (flag_errno_math)))
 513               /* We do not want to make code compiled with exceptions to be
 514                  brought into a non-EH function unless we know that the callee
 515                  does not throw.
 516                  This is tracked by DECL_FUNCTION_PERSONALITY.  */
 517               || (check_maybe_up (flag_non_call_exceptions)
 518                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 519               || (check_maybe_up (flag_exceptions)
 520                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 521               /* When devirtualization is disabled for callee, it is not safe
 522                  to inline it as we possibly mangled the type info.
 523                  Allow early inlining of always inlines.  */
 524               || (!early && check_maybe_down (flag_devirtualize)))
 525         {
 526           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 527           inlinable = false;
 528         }
 529       /* gcc.dg/pr43564.c.  Apply user-forced inline even at -O0.  */
 530       else if (always_inline)
 531         ;
 532       /* When user added an attribute to the callee honor it.  */
 533       else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
 534                && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
 535         {
 536           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 537           inlinable = false;
 538         }
 539       /* If explicit optimize attribute are not used, the mismatch is caused
 540          by different command line options used to build different units.
 541          Do not care about COMDAT functions - those are intended to be
 542          optimized with the optimization flags of module they are used in.
 543          Also do not care about mixing up size/speed optimization when
 544          DECL_DISREGARD_INLINE_LIMITS is set.  */
 545       else if ((callee->merged_comdat
 546                 && !lookup_attribute ("optimize",
 547                                       DECL_ATTRIBUTES (caller->decl)))
 548                || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 549         ;
 550       /* If mismatch is caused by merging two LTO units with different
 551          optimization flags we want to be bit nicer.  However never inline
 552          if one of functions is not optimized at all.  */
 553       else if (!opt_for_fn (callee->decl, optimize)
 554                || !opt_for_fn (caller->decl, optimize))
 555         {
 556           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 557           inlinable = false;
 558         }
 559       /* If callee is optimized for size and caller is not, allow inlining if
 560          code shrinks or we are in param_max_inline_insns_single limit and
 561          callee is inline (and thus likely an unified comdat).
 562          This will allow caller to run faster.  */
 563       else if (opt_for_fn (callee->decl, optimize_size)
 564                > opt_for_fn (caller->decl, optimize_size))
 565         {
 566           int growth = estimate_edge_growth (e);
 567           if (growth > opt_for_fn (caller->decl, param_max_inline_insns_size)
 568               && (!DECL_DECLARED_INLINE_P (callee->decl)
 569                   && growth >= MAX (inline_insns_single (caller, false),
 570                                     inline_insns_auto (caller, false))))
 571             {
 572               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 573               inlinable = false;
 574             }
 575         }
 576       /* If callee is more aggressively optimized for performance than caller,
 577          we generally want to inline only cheap (runtime wise) functions.  */
 578       else if (opt_for_fn (callee->decl, optimize_size)
 579                < opt_for_fn (caller->decl, optimize_size)
 580                || (opt_for_fn (callee->decl, optimize)
 581                    > opt_for_fn (caller->decl, optimize)))
 582         {
 583           if (estimate_edge_time (e)
 584               >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
 585             {
 586               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 587               inlinable = false;
 588             }
 589         }
 590
 591     }
 592
 593   if (!inlinable && report)
 594     report_inline_failed_reason (e);
 595   return inlinable;
 596 }
 597
 598
 599 /* Return true if the edge E is inlinable during early inlining.  */
 600
 601 static bool
 602 can_early_inline_edge_p (struct cgraph_edge *e)
 603 {
 604   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 605   /* Early inliner might get called at WPA stage when IPA pass adds new
 606      function.  In this case we cannot really do any of early inlining
 607      because function bodies are missing.  */
 608   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 609     return false;
 610   if (!gimple_has_body_p (callee->decl))
 611     {
 612       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 613       return false;
 614     }
 615   /* In early inliner some of callees may not be in SSA form yet
 616      (i.e. the callgraph is cyclic and we did not process
 617      the callee by early inliner, yet).  We don't have CIF code for this
 618      case; later we will re-do the decision in the real inliner.  */
 619   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
 620       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
 621     {
 622       if (dump_enabled_p ())
 623         dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 624                          "  edge not inlinable: not in SSA form\n");
 625       return false;
 626     }
 627   if (!can_inline_edge_p (e, true, true)
 628       || !can_inline_edge_by_limits_p (e, true, false, true))
 629     return false;
 630   return true;
 631 }
 632
 633
 634 /* Return number of calls in N.  Ignore cheap builtins.  */
 635
 636 static int
 637 num_calls (struct cgraph_node *n)
 638 {
 639   struct cgraph_edge *e;
 640   int num = 0;
 641
 642   for (e = n->callees; e; e = e->next_callee)
 643     if (!is_inexpensive_builtin (e->callee->decl))
 644       num++;
 645   return num;
 646 }
 647
 648
 649 /* Return true if we are interested in inlining small function.  */
 650
 651 static bool
 652 want_early_inline_function_p (struct cgraph_edge *e)
 653 {
 654   bool want_inline = true;
 655   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 656
 657   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 658     ;
 659   /* For AutoFDO, we need to make sure that before profile summary, all
 660      hot paths' IR look exactly the same as profiled binary. As a result,
 661      in einliner, we will disregard size limit and inline those callsites
 662      that are:
 663        * inlined in the profiled binary, and
 664        * the cloned callee has enough samples to be considered "hot".  */
 665   else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
 666     ;
 667   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 668            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 669     {
 670       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 671       report_inline_failed_reason (e);
 672       want_inline = false;
 673     }
 674   else
 675     {
 676       /* First take care of very large functions.  */
 677       int min_growth = estimate_min_edge_growth (e), growth = 0;
 678       int n;
 679       int early_inlining_insns = param_early_inlining_insns;
 680
 681       if (min_growth > early_inlining_insns)
 682         {
 683           if (dump_enabled_p ())
 684             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 685                              "  will not early inline: %C->%C, "
 686                              "call is cold and code would grow "
 687                              "at least by %i\n",
 688                              e->caller, callee,
 689                              min_growth);
 690           want_inline = false;
 691         }
 692       else
 693         growth = estimate_edge_growth (e);
 694
 695
 696       if (!want_inline || growth <= param_max_inline_insns_size)
 697         ;
 698       else if (!e->maybe_hot_p ())
 699         {
 700           if (dump_enabled_p ())
 701             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 702                              "  will not early inline: %C->%C, "
 703                              "call is cold and code would grow by %i\n",
 704                              e->caller, callee,
 705                              growth);
 706           want_inline = false;
 707         }
 708       else if (growth > early_inlining_insns)
 709         {
 710           if (dump_enabled_p ())
 711             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 712                              "  will not early inline: %C->%C, "
 713                              "growth %i exceeds --param early-inlining-insns\n",
 714                              e->caller, callee, growth);
 715           want_inline = false;
 716         }
 717       else if ((n = num_calls (callee)) != 0
 718                && growth * (n + 1) > early_inlining_insns)
 719         {
 720           if (dump_enabled_p ())
 721             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 722                              "  will not early inline: %C->%C, "
 723                              "growth %i exceeds --param early-inlining-insns "
 724                              "divided by number of calls\n",
 725                              e->caller, callee, growth);
 726           want_inline = false;
 727         }
 728     }
 729   return want_inline;
 730 }
 731
 732 /* Compute time of the edge->caller + edge->callee execution when inlining
 733    does not happen.  */
 734
 735 inline sreal
 736 compute_uninlined_call_time (struct cgraph_edge *edge,
 737                              sreal uninlined_call_time,
 738                              sreal freq)
 739 {
 740   cgraph_node *caller = (edge->caller->inlined_to
 741                          ? edge->caller->inlined_to
 742                          : edge->caller);
 743
 744   if (freq > 0)
 745     uninlined_call_time *= freq;
 746   else
 747     uninlined_call_time = uninlined_call_time >> 11;
 748
 749   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 750   return uninlined_call_time + caller_time;
 751 }
 752
 753 /* Same as compute_uinlined_call_time but compute time when inlining
 754    does happen.  */
 755
 756 inline sreal
 757 compute_inlined_call_time (struct cgraph_edge *edge,
 758                            sreal time,
 759                            sreal freq)
 760 {
 761   cgraph_node *caller = (edge->caller->inlined_to
 762                          ? edge->caller->inlined_to
 763                          : edge->caller);
 764   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 765
 766   if (freq > 0)
 767     time *= freq;
 768   else
 769     time = time >> 11;
 770
 771   /* This calculation should match one in ipa-inline-analysis.c
 772      (estimate_edge_size_and_time).  */
 773   time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
 774   time += caller_time;
 775   if (time <= 0)
 776     time = ((sreal) 1) >> 8;
 777   gcc_checking_assert (time >= 0);
 778   return time;
 779 }
 780
 781 /* Determine time saved by inlining EDGE of frequency FREQ
 782    where callee's runtime w/o inlining is UNINLINED_TYPE
 783    and with inlined is INLINED_TYPE.  */
 784
 785 inline sreal
 786 inlining_speedup (struct cgraph_edge *edge,
 787                   sreal freq,
 788                   sreal uninlined_time,
 789                   sreal inlined_time)
 790 {
 791   sreal speedup = uninlined_time - inlined_time;
 792   /* Handling of call_time should match one in ipa-inline-fnsummary.c
 793      (estimate_edge_size_and_time).  */
 794   sreal call_time = ipa_call_summaries->get (edge)->call_stmt_time;
 795
 796   if (freq > 0)
 797     {
 798       speedup = (speedup + call_time);
 799       if (freq != 1)
 800        speedup = speedup * freq;
 801     }
 802   else if (freq == 0)
 803     speedup = speedup >> 11;
 804   gcc_checking_assert (speedup >= 0);
 805   return speedup;
 806 }
 807
 808 /* Return true if the speedup for inlining E is bigger than
 809    PARAM_MAX_INLINE_MIN_SPEEDUP.  */
 810
 811 static bool
 812 big_speedup_p (struct cgraph_edge *e)
 813 {
 814   sreal unspec_time;
 815   sreal spec_time = estimate_edge_time (e, &unspec_time);
 816   sreal freq = e->sreal_frequency ();
 817   sreal time = compute_uninlined_call_time (e, unspec_time, freq);
 818   sreal inlined_time = compute_inlined_call_time (e, spec_time, freq);
 819   cgraph_node *caller = (e->caller->inlined_to
 820                          ? e->caller->inlined_to
 821                          : e->caller);
 822   int limit = opt_for_fn (caller->decl, param_inline_min_speedup);
 823
 824   if ((time - inlined_time) * 100 > time * limit)
 825     return true;
 826   return false;
 827 }
 828
 829 /* Return true if we are interested in inlining small function.
 830    When REPORT is true, report reason to dump file.  */
 831
 832 static bool
 833 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 834 {
 835   bool want_inline = true;
 836   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 837   cgraph_node *to  = (e->caller->inlined_to
 838                       ? e->caller->inlined_to : e->caller);
 839
 840   /* Allow this function to be called before can_inline_edge_p,
 841      since it's usually cheaper.  */
 842   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 843     want_inline = false;
 844   else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 845     ;
 846   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 847            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 848     {
 849       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 850       want_inline = false;
 851     }
 852   /* Do fast and conservative check if the function can be good
 853      inline candidate.  */
 854   else if ((!DECL_DECLARED_INLINE_P (callee->decl)
 855            && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
 856            && ipa_fn_summaries->get (callee)->min_size
 857                 - ipa_call_summaries->get (e)->call_stmt_size
 858               > inline_insns_auto (e->caller, true))
 859     {
 860       e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 861       want_inline = false;
 862     }
 863   else if ((DECL_DECLARED_INLINE_P (callee->decl)
 864             || e->count.ipa ().nonzero_p ())
 865            && ipa_fn_summaries->get (callee)->min_size
 866                 - ipa_call_summaries->get (e)->call_stmt_size
 867               > inline_insns_single (e->caller, true))
 868     {
 869       e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
 870                           ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
 871                           : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
 872       want_inline = false;
 873     }
 874   else
 875     {
 876       int growth = estimate_edge_growth (e);
 877       ipa_hints hints = estimate_edge_hints (e);
 878       bool apply_hints = (hints & (INLINE_HINT_indirect_call
 879                                    | INLINE_HINT_known_hot
 880                                    | INLINE_HINT_loop_iterations
 881                                    | INLINE_HINT_loop_stride));
 882
 883       if (growth <= opt_for_fn (to->decl,
 884                                 param_max_inline_insns_size))
 885         ;
 886       /* Apply param_max_inline_insns_single limit.  Do not do so when
 887          hints suggests that inlining given function is very profitable.
 888          Avoid computation of big_speedup_p when not necessary to change
 889          outcome of decision.  */
 890       else if (DECL_DECLARED_INLINE_P (callee->decl)
 891                && growth >= inline_insns_single (e->caller, apply_hints)
 892                && (apply_hints
 893                    || growth >= inline_insns_single (e->caller, true)
 894                    || !big_speedup_p (e)))
 895         {
 896           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 897           want_inline = false;
 898         }
 899       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 900                && !opt_for_fn (e->caller->decl, flag_inline_functions)
 901                && growth >= opt_for_fn (to->decl,
 902                                         param_max_inline_insns_small))
 903         {
 904           /* growth_positive_p is expensive, always test it last.  */
 905           if (growth >= inline_insns_single (e->caller, false)
 906               || growth_positive_p (callee, e, growth))
 907             {
 908               e->inline_failed = CIF_NOT_DECLARED_INLINED;
 909               want_inline = false;
 910             }
 911         }
 912       /* Apply param_max_inline_insns_auto limit for functions not declared
 913          inline.  Bypass the limit when speedup seems big.  */
 914       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 915                && growth >= inline_insns_auto (e->caller, apply_hints)
 916                && (apply_hints
 917                    || growth >= inline_insns_auto (e->caller, true)
 918                    || !big_speedup_p (e)))
 919         {
 920           /* growth_positive_p is expensive, always test it last.  */
 921           if (growth >= inline_insns_single (e->caller, false)
 922               || growth_positive_p (callee, e, growth))
 923             {
 924               e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 925               want_inline = false;
 926             }
 927         }
 928       /* If call is cold, do not inline when function body would grow. */
 929       else if (!e->maybe_hot_p ()
 930                && (growth >= inline_insns_single (e->caller, false)
 931                    || growth_positive_p (callee, e, growth)))
 932         {
 933           e->inline_failed = CIF_UNLIKELY_CALL;
 934           want_inline = false;
 935         }
 936     }
 937   if (!want_inline && report)
 938     report_inline_failed_reason (e);
 939   return want_inline;
 940 }
 941
 942 /* EDGE is self recursive edge.
 943    We handle two cases - when function A is inlining into itself
 944    or when function A is being inlined into another inliner copy of function
 945    A within function B.
 946
 947    In first case OUTER_NODE points to the toplevel copy of A, while
 948    in the second case OUTER_NODE points to the outermost copy of A in B.
 949
 950    In both cases we want to be extra selective since
 951    inlining the call will just introduce new recursive calls to appear.  */
 952
 953 static bool
 954 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 955                                    struct cgraph_node *outer_node,
 956                                    bool peeling,
 957                                    int depth)
 958 {
 959   char const *reason = NULL;
 960   bool want_inline = true;
 961   sreal caller_freq = 1;
 962   int max_depth = opt_for_fn (outer_node->decl,
 963                               param_max_inline_recursive_depth_auto);
 964
 965   if (DECL_DECLARED_INLINE_P (edge->caller->decl))
 966     max_depth = opt_for_fn (outer_node->decl,
 967                             param_max_inline_recursive_depth);
 968
 969   if (!edge->maybe_hot_p ())
 970     {
 971       reason = "recursive call is cold";
 972       want_inline = false;
 973     }
 974   else if (depth > max_depth)
 975     {
 976       reason = "--param max-inline-recursive-depth exceeded.";
 977       want_inline = false;
 978     }
 979   else if (outer_node->inlined_to
 980            && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
 981     {
 982       reason = "caller frequency is 0";
 983       want_inline = false;
 984     }
 985
 986   if (!want_inline)
 987     ;
 988   /* Inlining of self recursive function into copy of itself within other
 989      function is transformation similar to loop peeling.
 990
 991      Peeling is profitable if we can inline enough copies to make probability
 992      of actual call to the self recursive function very small.  Be sure that
 993      the probability of recursion is small.
 994
 995      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
 996      This way the expected number of recursion is at most max_depth.  */
 997   else if (peeling)
 998     {
 999       sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
1000       int i;
1001       for (i = 1; i < depth; i++)
1002         max_prob = max_prob * max_prob;
1003       if (edge->sreal_frequency () >= max_prob * caller_freq)
1004         {
1005           reason = "frequency of recursive call is too large";
1006           want_inline = false;
1007         }
1008     }
1009   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
1010      recursion depth is large.  We reduce function call overhead and increase
1011      chances that things fit in hardware return predictor.
1012
1013      Recursive inlining might however increase cost of stack frame setup
1014      actually slowing down functions whose recursion tree is wide rather than
1015      deep.
1016
1017      Deciding reliably on when to do recursive inlining without profile feedback
1018      is tricky.  For now we disable recursive inlining when probability of self
1019      recursion is low.
1020
1021      Recursive inlining of self recursive call within loop also results in
1022      large loop depths that generally optimize badly.  We may want to throttle
1023      down inlining in those cases.  In particular this seems to happen in one
1024      of libstdc++ rb tree methods.  */
1025   else
1026     {
1027       if (edge->sreal_frequency () * 100
1028           <= caller_freq
1029              * opt_for_fn (outer_node->decl,
1030                            param_min_inline_recursive_probability))
1031         {
1032           reason = "frequency of recursive call is too small";
1033           want_inline = false;
1034         }
1035     }
1036   if (!want_inline && dump_enabled_p ())
1037     dump_printf_loc (MSG_MISSED_OPTIMIZATION, edge->call_stmt,
1038                      "   not inlining recursively: %s\n", reason);
1039   return want_inline;
1040 }
1041
1042 /* Return true when NODE has uninlinable caller;
1043    set HAS_HOT_CALL if it has hot call.
1044    Worker for cgraph_for_node_and_aliases.  */
1045
1046 static bool
1047 check_callers (struct cgraph_node *node, void *has_hot_call)
1048 {
1049   struct cgraph_edge *e;
1050    for (e = node->callers; e; e = e->next_caller)
1051      {
1052        if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
1053            || !opt_for_fn (e->caller->decl, optimize))
1054          return true;
1055        if (!can_inline_edge_p (e, true))
1056          return true;
1057        if (e->recursive_p ())
1058          return true;
1059        if (!can_inline_edge_by_limits_p (e, true))
1060          return true;
1061        if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
1062          *(bool *)has_hot_call = true;
1063      }
1064   return false;
1065 }
1066
1067 /* If NODE has a caller, return true.  */
1068
1069 static bool
1070 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
1071 {
1072   if (node->callers)
1073     return true;
1074   return false;
1075 }
1076
1077 /* Decide if inlining NODE would reduce unit size by eliminating
1078    the offline copy of function.
1079    When COLD is true the cold calls are considered, too.  */
1080
1081 static bool
1082 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
1083 {
1084   bool has_hot_call = false;
1085
1086   /* Aliases gets inlined along with the function they alias.  */
1087   if (node->alias)
1088     return false;
1089   /* Already inlined?  */
1090   if (node->inlined_to)
1091     return false;
1092   /* Does it have callers?  */
1093   if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
1094     return false;
1095   /* Inlining into all callers would increase size?  */
1096   if (growth_positive_p (node, NULL, INT_MIN) > 0)
1097     return false;
1098   /* All inlines must be possible.  */
1099   if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1100                                          true))
1101     return false;
1102   if (!cold && !has_hot_call)
1103     return false;
1104   return true;
1105 }
1106
1107 /* Return true if WHERE of SIZE is a possible candidate for wrapper heuristics
1108    in estimate_edge_badness.  */
1109
1110 static bool
1111 wrapper_heuristics_may_apply (struct cgraph_node *where, int size)
1112 {
1113   return size < (DECL_DECLARED_INLINE_P (where->decl)
1114                  ? inline_insns_single (where, false)
1115                  : inline_insns_auto (where, false));
1116 }
1117
1118 /* A cost model driving the inlining heuristics in a way so the edges with
1119    smallest badness are inlined first.  After each inlining is performed
1120    the costs of all caller edges of nodes affected are recomputed so the
1121    metrics may accurately depend on values such as number of inlinable callers
1122    of the function or function body size.  */
1123
1124 static sreal
1125 edge_badness (struct cgraph_edge *edge, bool dump)
1126 {
1127   sreal badness;
1128   int growth;
1129   sreal edge_time, unspec_edge_time;
1130   struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1131   class ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1132   ipa_hints hints;
1133   cgraph_node *caller = (edge->caller->inlined_to
1134                          ? edge->caller->inlined_to
1135                          : edge->caller);
1136
1137   growth = estimate_edge_growth (edge);
1138   edge_time = estimate_edge_time (edge, &unspec_edge_time);
1139   hints = estimate_edge_hints (edge);
1140   gcc_checking_assert (edge_time >= 0);
1141   /* Check that inlined time is better, but tolerate some roundoff issues.
1142      FIXME: When callee profile drops to 0 we account calls more.  This
1143      should be fixed by never doing that.  */
1144   gcc_checking_assert ((edge_time * 100
1145                         - callee_info->time * 101).to_int () <= 0
1146                         || callee->count.ipa ().initialized_p ());
1147   gcc_checking_assert (growth <= ipa_size_summaries->get (callee)->size);
1148
1149   if (dump)
1150     {
1151       fprintf (dump_file, "    Badness calculation for %s -> %s\n",
1152                edge->caller->dump_name (),
1153                edge->callee->dump_name ());
1154       fprintf (dump_file, "      size growth %i, time %f unspec %f ",
1155                growth,
1156                edge_time.to_double (),
1157                unspec_edge_time.to_double ());
1158       ipa_dump_hints (dump_file, hints);
1159       if (big_speedup_p (edge))
1160         fprintf (dump_file, " big_speedup");
1161       fprintf (dump_file, "\n");
1162     }
1163
1164   /* Always prefer inlining saving code size.  */
1165   if (growth <= 0)
1166     {
1167       badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1168       if (dump)
1169         fprintf (dump_file, "      %f: Growth %d <= 0\n", badness.to_double (),
1170                  growth);
1171     }
1172    /* Inlining into EXTERNAL functions is not going to change anything unless
1173       they are themselves inlined.  */
1174    else if (DECL_EXTERNAL (caller->decl))
1175     {
1176       if (dump)
1177         fprintf (dump_file, "      max: function is external\n");
1178       return sreal::max ();
1179     }
1180   /* When profile is available. Compute badness as:
1181
1182                  time_saved * caller_count
1183      goodness =  -------------------------------------------------
1184                  growth_of_caller * overall_growth * combined_size
1185
1186      badness = - goodness
1187
1188      Again use negative value to make calls with profile appear hotter
1189      then calls without.
1190   */
1191   else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1192            || caller->count.ipa ().nonzero_p ())
1193     {
1194       sreal numerator, denominator;
1195       int overall_growth;
1196       sreal freq = edge->sreal_frequency ();
1197
1198       numerator = inlining_speedup (edge, freq, unspec_edge_time, edge_time);
1199       if (numerator <= 0)
1200         numerator = ((sreal) 1 >> 8);
1201       if (caller->count.ipa ().nonzero_p ())
1202         numerator *= caller->count.ipa ().to_gcov_type ();
1203       else if (caller->count.ipa ().initialized_p ())
1204         numerator = numerator >> 11;
1205       denominator = growth;
1206
1207       overall_growth = callee_info->growth;
1208
1209       /* Look for inliner wrappers of the form:
1210
1211          inline_caller ()
1212            {
1213              do_fast_job...
1214              if (need_more_work)
1215                noninline_callee ();
1216            }
1217          Without penalizing this case, we usually inline noninline_callee
1218          into the inline_caller because overall_growth is small preventing
1219          further inlining of inline_caller.
1220
1221          Penalize only callgraph edges to functions with small overall
1222          growth ...
1223         */
1224       if (growth > overall_growth
1225           /* ... and having only one caller which is not inlined ... */
1226           && callee_info->single_caller
1227           && !edge->caller->inlined_to
1228           /* ... and edges executed only conditionally ... */
1229           && freq < 1
1230           /* ... consider case where callee is not inline but caller is ... */
1231           && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1232                && DECL_DECLARED_INLINE_P (caller->decl))
1233               /* ... or when early optimizers decided to split and edge
1234                  frequency still indicates splitting is a win ... */
1235               || (callee->split_part && !caller->split_part
1236                   && freq * 100
1237                          < opt_for_fn (caller->decl,
1238                                        param_partial_inlining_entry_probability)
1239                   /* ... and do not overwrite user specified hints.   */
1240                   && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1241                       || DECL_DECLARED_INLINE_P (caller->decl)))))
1242         {
1243           ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1244           int caller_growth = caller_info->growth;
1245
1246           /* Only apply the penalty when caller looks like inline candidate,
1247              and it is not called once.  */
1248           if (!caller_info->single_caller && overall_growth < caller_growth
1249               && caller_info->inlinable
1250               && wrapper_heuristics_may_apply
1251                  (caller, ipa_size_summaries->get (caller)->size))
1252             {
1253               if (dump)
1254                 fprintf (dump_file,
1255                          "     Wrapper penalty. Increasing growth %i to %i\n",
1256                          overall_growth, caller_growth);
1257               overall_growth = caller_growth;
1258             }
1259         }
1260       if (overall_growth > 0)
1261         {
1262           /* Strongly prefer functions with few callers that can be inlined
1263              fully.  The square root here leads to smaller binaries at average.
1264              Watch however for extreme cases and return to linear function
1265              when growth is large.  */
1266           if (overall_growth < 256)
1267             overall_growth *= overall_growth;
1268           else
1269             overall_growth += 256 * 256 - 256;
1270           denominator *= overall_growth;
1271         }
1272       denominator *= ipa_size_summaries->get (caller)->size + growth;
1273
1274       badness = - numerator / denominator;
1275
1276       if (dump)
1277         {
1278           fprintf (dump_file,
1279                    "      %f: guessed profile. frequency %f, count %" PRId64
1280                    " caller count %" PRId64
1281                    " time saved %f"
1282                    " overall growth %i (current) %i (original)"
1283                    " %i (compensated)\n",
1284                    badness.to_double (),
1285                    freq.to_double (),
1286                    edge->count.ipa ().initialized_p () ? edge->count.ipa ().to_gcov_type () : -1,
1287                    caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1,
1288                    inlining_speedup (edge, freq, unspec_edge_time, edge_time).to_double (),
1289                    estimate_growth (callee),
1290                    callee_info->growth, overall_growth);
1291         }
1292     }
1293   /* When function local profile is not available or it does not give
1294      useful information (i.e. frequency is zero), base the cost on
1295      loop nest and overall size growth, so we optimize for overall number
1296      of functions fully inlined in program.  */
1297   else
1298     {
1299       int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1300       badness = growth;
1301
1302       /* Decrease badness if call is nested.  */
1303       if (badness > 0)
1304         badness = badness >> nest;
1305       else
1306         badness = badness << nest;
1307       if (dump)
1308         fprintf (dump_file, "      %f: no profile. nest %i\n",
1309                  badness.to_double (), nest);
1310     }
1311   gcc_checking_assert (badness != 0);
1312
1313   if (edge->recursive_p ())
1314     badness = badness.shift (badness > 0 ? 4 : -4);
1315   if ((hints & (INLINE_HINT_indirect_call
1316                 | INLINE_HINT_loop_iterations
1317                 | INLINE_HINT_loop_stride))
1318       || callee_info->growth <= 0)
1319     badness = badness.shift (badness > 0 ? -2 : 2);
1320   if (hints & (INLINE_HINT_same_scc))
1321     badness = badness.shift (badness > 0 ? 3 : -3);
1322   else if (hints & (INLINE_HINT_in_scc))
1323     badness = badness.shift (badness > 0 ? 2 : -2);
1324   else if (hints & (INLINE_HINT_cross_module))
1325     badness = badness.shift (badness > 0 ? 1 : -1);
1326   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1327     badness = badness.shift (badness > 0 ? -4 : 4);
1328   else if ((hints & INLINE_HINT_declared_inline))
1329     badness = badness.shift (badness > 0 ? -3 : 3);
1330   if (dump)
1331     fprintf (dump_file, "      Adjusted by hints %f\n", badness.to_double ());
1332   return badness;
1333 }
1334
1335 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1336 static inline void
1337 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1338 {
1339   sreal badness = edge_badness (edge, false);
1340   if (edge->aux)
1341     {
1342       edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1343       gcc_checking_assert (n->get_data () == edge);
1344
1345       /* fibonacci_heap::replace_key does busy updating of the
1346          heap that is unnecessarily expensive.
1347          We do lazy increases: after extracting minimum if the key
1348          turns out to be out of date, it is re-inserted into heap
1349          with correct value.  */
1350       if (badness < n->get_key ())
1351         {
1352           if (dump_file && (dump_flags & TDF_DETAILS))
1353             {
1354               fprintf (dump_file,
1355                        "  decreasing badness %s -> %s, %f to %f\n",
1356                        edge->caller->dump_name (),
1357                        edge->callee->dump_name (),
1358                        n->get_key ().to_double (),
1359                        badness.to_double ());
1360             }
1361           heap->decrease_key (n, badness);
1362         }
1363     }
1364   else
1365     {
1366        if (dump_file && (dump_flags & TDF_DETAILS))
1367          {
1368            fprintf (dump_file,
1369                     "  enqueuing call %s -> %s, badness %f\n",
1370                     edge->caller->dump_name (),
1371                     edge->callee->dump_name (),
1372                     badness.to_double ());
1373          }
1374       edge->aux = heap->insert (badness, edge);
1375     }
1376 }
1377
1378
1379 /* NODE was inlined.
1380    All caller edges needs to be reset because
1381    size estimates change. Similarly callees needs reset
1382    because better context may be known.  */
1383
1384 static void
1385 reset_edge_caches (struct cgraph_node *node)
1386 {
1387   struct cgraph_edge *edge;
1388   struct cgraph_edge *e = node->callees;
1389   struct cgraph_node *where = node;
1390   struct ipa_ref *ref;
1391
1392   if (where->inlined_to)
1393     where = where->inlined_to;
1394
1395   reset_node_cache (where);
1396
1397   if (edge_growth_cache != NULL)
1398     for (edge = where->callers; edge; edge = edge->next_caller)
1399       if (edge->inline_failed)
1400         edge_growth_cache->remove (edge);
1401
1402   FOR_EACH_ALIAS (where, ref)
1403     reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1404
1405   if (!e)
1406     return;
1407
1408   while (true)
1409     if (!e->inline_failed && e->callee->callees)
1410       e = e->callee->callees;
1411     else
1412       {
1413         if (edge_growth_cache != NULL && e->inline_failed)
1414           edge_growth_cache->remove (e);
1415         if (e->next_callee)
1416           e = e->next_callee;
1417         else
1418           {
1419             do
1420               {
1421                 if (e->caller == node)
1422                   return;
1423                 e = e->caller->callers;
1424               }
1425             while (!e->next_callee);
1426             e = e->next_callee;
1427           }
1428       }
1429 }
1430
1431 /* Recompute HEAP nodes for each of caller of NODE.
1432    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1433    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1434    it is inlinable. Otherwise check all edges.  */
1435
1436 static void
1437 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1438                     bitmap updated_nodes,
1439                     struct cgraph_edge *check_inlinablity_for)
1440 {
1441   struct cgraph_edge *edge;
1442   struct ipa_ref *ref;
1443
1444   if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1445       || node->inlined_to)
1446     return;
1447   if (!bitmap_set_bit (updated_nodes, node->get_uid ()))
1448     return;
1449
1450   FOR_EACH_ALIAS (node, ref)
1451     {
1452       struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1453       update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1454     }
1455
1456   for (edge = node->callers; edge; edge = edge->next_caller)
1457     if (edge->inline_failed)
1458       {
1459         if (!check_inlinablity_for
1460             || check_inlinablity_for == edge)
1461           {
1462             if (can_inline_edge_p (edge, false)
1463                 && want_inline_small_function_p (edge, false)
1464                 && can_inline_edge_by_limits_p (edge, false))
1465               update_edge_key (heap, edge);
1466             else if (edge->aux)
1467               {
1468                 report_inline_failed_reason (edge);
1469                 heap->delete_node ((edge_heap_node_t *) edge->aux);
1470                 edge->aux = NULL;
1471               }
1472           }
1473         else if (edge->aux)
1474           update_edge_key (heap, edge);
1475       }
1476 }
1477
1478 /* Recompute HEAP nodes for each uninlined call in NODE
1479    If UPDATE_SINCE is non-NULL check if edges called within that function
1480    are inlinable (typically UPDATE_SINCE is the inline clone we introduced
1481    where all edges have new context).
1482
1483    This is used when we know that edge badnesses are going only to increase
1484    (we introduced new call site) and thus all we need is to insert newly
1485    created edges into heap.  */
1486
1487 static void
1488 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1489                     struct cgraph_node *update_since,
1490                     bitmap updated_nodes)
1491 {
1492   struct cgraph_edge *e = node->callees;
1493   bool check_inlinability = update_since == node;
1494
1495   if (!e)
1496     return;
1497   while (true)
1498     if (!e->inline_failed && e->callee->callees)
1499       {
1500         if (e->callee == update_since)
1501           check_inlinability = true;
1502         e = e->callee->callees;
1503       }
1504     else
1505       {
1506         enum availability avail;
1507         struct cgraph_node *callee;
1508         if (!check_inlinability)
1509           {
1510             if (e->aux
1511                 && !bitmap_bit_p (updated_nodes,
1512                                   e->callee->ultimate_alias_target
1513                                     (&avail, e->caller)->get_uid ()))
1514               update_edge_key (heap, e);
1515           }
1516         /* We do not reset callee growth cache here.  Since we added a new call,
1517            growth should have just increased and consequently badness metric
1518            don't need updating.  */
1519         else if (e->inline_failed
1520                  && (callee = e->callee->ultimate_alias_target (&avail,
1521                                                                 e->caller))
1522                  && avail >= AVAIL_AVAILABLE
1523                  && ipa_fn_summaries->get (callee) != NULL
1524                  && ipa_fn_summaries->get (callee)->inlinable
1525                  && !bitmap_bit_p (updated_nodes, callee->get_uid ()))
1526           {
1527             if (can_inline_edge_p (e, false)
1528                 && want_inline_small_function_p (e, false)
1529                 && can_inline_edge_by_limits_p (e, false))
1530               {
1531                 gcc_checking_assert (check_inlinability || can_inline_edge_p (e, false));
1532                 gcc_checking_assert (check_inlinability || e->aux);
1533                 update_edge_key (heap, e);
1534               }
1535             else if (e->aux)
1536               {
1537                 report_inline_failed_reason (e);
1538                 heap->delete_node ((edge_heap_node_t *) e->aux);
1539                 e->aux = NULL;
1540               }
1541           }
1542         /* In case we redirected to unreachable node we only need to remove the
1543            fibheap entry.  */
1544         else if (e->aux)
1545           {
1546             heap->delete_node ((edge_heap_node_t *) e->aux);
1547             e->aux = NULL;
1548           }
1549         if (e->next_callee)
1550           e = e->next_callee;
1551         else
1552           {
1553             do
1554               {
1555                 if (e->caller == node)
1556                   return;
1557                 if (e->caller == update_since)
1558                   check_inlinability = false;
1559                 e = e->caller->callers;
1560               }
1561             while (!e->next_callee);
1562             e = e->next_callee;
1563           }
1564       }
1565 }
1566
1567 /* Enqueue all recursive calls from NODE into priority queue depending on
1568    how likely we want to recursively inline the call.  */
1569
1570 static void
1571 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1572                         edge_heap_t *heap)
1573 {
1574   struct cgraph_edge *e;
1575   enum availability avail;
1576
1577   for (e = where->callees; e; e = e->next_callee)
1578     if (e->callee == node
1579         || (e->callee->ultimate_alias_target (&avail, e->caller) == node
1580             && avail > AVAIL_INTERPOSABLE))
1581       heap->insert (-e->sreal_frequency (), e);
1582   for (e = where->callees; e; e = e->next_callee)
1583     if (!e->inline_failed)
1584       lookup_recursive_calls (node, e->callee, heap);
1585 }
1586
1587 /* Decide on recursive inlining: in the case function has recursive calls,
1588    inline until body size reaches given argument.  If any new indirect edges
1589    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1590    is NULL.  */
1591
1592 static bool
1593 recursive_inlining (struct cgraph_edge *edge,
1594                     vec<cgraph_edge *> *new_edges)
1595 {
1596   cgraph_node *to  = (edge->caller->inlined_to
1597                       ? edge->caller->inlined_to : edge->caller);
1598   int limit = opt_for_fn (to->decl,
1599                           param_max_inline_insns_recursive_auto);
1600   edge_heap_t heap (sreal::min ());
1601   struct cgraph_node *node;
1602   struct cgraph_edge *e;
1603   struct cgraph_node *master_clone = NULL, *next;
1604   int depth = 0;
1605   int n = 0;
1606
1607   node = edge->caller;
1608   if (node->inlined_to)
1609     node = node->inlined_to;
1610
1611   if (DECL_DECLARED_INLINE_P (node->decl))
1612     limit = opt_for_fn (to->decl, param_max_inline_insns_recursive);
1613
1614   /* Make sure that function is small enough to be considered for inlining.  */
1615   if (estimate_size_after_inlining (node, edge)  >= limit)
1616     return false;
1617   lookup_recursive_calls (node, node, &heap);
1618   if (heap.empty ())
1619     return false;
1620
1621   if (dump_file)
1622     fprintf (dump_file,
1623              "  Performing recursive inlining on %s\n", node->dump_name ());
1624
1625   /* Do the inlining and update list of recursive call during process.  */
1626   while (!heap.empty ())
1627     {
1628       struct cgraph_edge *curr = heap.extract_min ();
1629       struct cgraph_node *cnode, *dest = curr->callee;
1630
1631       if (!can_inline_edge_p (curr, true)
1632           || !can_inline_edge_by_limits_p (curr, true))
1633         continue;
1634
1635       /* MASTER_CLONE is produced in the case we already started modified
1636          the function. Be sure to redirect edge to the original body before
1637          estimating growths otherwise we will be seeing growths after inlining
1638          the already modified body.  */
1639       if (master_clone)
1640         {
1641           curr->redirect_callee (master_clone);
1642           if (edge_growth_cache != NULL)
1643             edge_growth_cache->remove (curr);
1644         }
1645
1646       if (estimate_size_after_inlining (node, curr) > limit)
1647         {
1648           curr->redirect_callee (dest);
1649           if (edge_growth_cache != NULL)
1650             edge_growth_cache->remove (curr);
1651           break;
1652         }
1653
1654       depth = 1;
1655       for (cnode = curr->caller;
1656            cnode->inlined_to; cnode = cnode->callers->caller)
1657         if (node->decl
1658             == curr->callee->ultimate_alias_target ()->decl)
1659           depth++;
1660
1661       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1662         {
1663           curr->redirect_callee (dest);
1664           if (edge_growth_cache != NULL)
1665             edge_growth_cache->remove (curr);
1666           continue;
1667         }
1668
1669       if (dump_file)
1670         {
1671           fprintf (dump_file,
1672                    "   Inlining call of depth %i", depth);
1673           if (node->count.nonzero_p () && curr->count.initialized_p ())
1674             {
1675               fprintf (dump_file, " called approx. %.2f times per call",
1676                        (double)curr->count.to_gcov_type ()
1677                        / node->count.to_gcov_type ());
1678             }
1679           fprintf (dump_file, "\n");
1680         }
1681       if (!master_clone)
1682         {
1683           /* We need original clone to copy around.  */
1684           master_clone = node->create_clone (node->decl, node->count,
1685             false, vNULL, true, NULL, NULL);
1686           for (e = master_clone->callees; e; e = e->next_callee)
1687             if (!e->inline_failed)
1688               clone_inlined_nodes (e, true, false, NULL);
1689           curr->redirect_callee (master_clone);
1690           if (edge_growth_cache != NULL)
1691             edge_growth_cache->remove (curr);
1692         }
1693
1694       inline_call (curr, false, new_edges, &overall_size, true);
1695       reset_node_cache (node);
1696       lookup_recursive_calls (node, curr->callee, &heap);
1697       n++;
1698     }
1699
1700   if (!heap.empty () && dump_file)
1701     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1702
1703   if (!master_clone)
1704     return false;
1705
1706   if (dump_enabled_p ())
1707     dump_printf_loc (MSG_NOTE, edge->call_stmt,
1708                      "\n   Inlined %i times, "
1709                      "body grown from size %i to %i, time %f to %f\n", n,
1710                      ipa_size_summaries->get (master_clone)->size,
1711                      ipa_size_summaries->get (node)->size,
1712                      ipa_fn_summaries->get (master_clone)->time.to_double (),
1713                      ipa_fn_summaries->get (node)->time.to_double ());
1714
1715   /* Remove master clone we used for inlining.  We rely that clones inlined
1716      into master clone gets queued just before master clone so we don't
1717      need recursion.  */
1718   for (node = symtab->first_function (); node != master_clone;
1719        node = next)
1720     {
1721       next = symtab->next_function (node);
1722       if (node->inlined_to == master_clone)
1723         node->remove ();
1724     }
1725   master_clone->remove ();
1726   return true;
1727 }
1728
1729
1730 /* Given whole compilation unit estimate of INSNS, compute how large we can
1731    allow the unit to grow.  */
1732
1733 static int64_t
1734 compute_max_insns (cgraph_node *node, int insns)
1735 {
1736   int max_insns = insns;
1737   if (max_insns < opt_for_fn (node->decl, param_large_unit_insns))
1738     max_insns = opt_for_fn (node->decl, param_large_unit_insns);
1739
1740   return ((int64_t) max_insns
1741           * (100 + opt_for_fn (node->decl, param_inline_unit_growth)) / 100);
1742 }
1743
1744
1745 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1746
1747 static void
1748 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> new_edges)
1749 {
1750   while (new_edges.length () > 0)
1751     {
1752       struct cgraph_edge *edge = new_edges.pop ();
1753
1754       gcc_assert (!edge->aux);
1755       gcc_assert (edge->callee);
1756       if (edge->inline_failed
1757           && can_inline_edge_p (edge, true)
1758           && want_inline_small_function_p (edge, true)
1759           && can_inline_edge_by_limits_p (edge, true))
1760         edge->aux = heap->insert (edge_badness (edge, false), edge);
1761     }
1762 }
1763
1764 /* Remove EDGE from the fibheap.  */
1765
1766 static void
1767 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1768 {
1769   if (e->aux)
1770     {
1771       ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1772       e->aux = NULL;
1773     }
1774 }
1775
1776 /* Return true if speculation of edge E seems useful.
1777    If ANTICIPATE_INLINING is true, be conservative and hope that E
1778    may get inlined.  */
1779
1780 bool
1781 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1782 {
1783   /* If we have already decided to inline the edge, it seems useful.  */
1784   if (!e->inline_failed)
1785     return true;
1786
1787   enum availability avail;
1788   struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1789                                                                  e->caller);
1790
1791   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1792
1793   if (!e->maybe_hot_p ())
1794     return false;
1795
1796   /* See if IP optimizations found something potentially useful about the
1797      function.  For now we look only for CONST/PURE flags.  Almost everything
1798      else we propagate is useless.  */
1799   if (avail >= AVAIL_AVAILABLE)
1800     {
1801       int ecf_flags = flags_from_decl_or_type (target->decl);
1802       if (ecf_flags & ECF_CONST)
1803         {
1804           if (!(e->speculative_call_indirect_edge ()->indirect_info
1805                 ->ecf_flags & ECF_CONST))
1806             return true;
1807         }
1808       else if (ecf_flags & ECF_PURE)
1809         {
1810           if (!(e->speculative_call_indirect_edge ()->indirect_info
1811                 ->ecf_flags & ECF_PURE))
1812             return true;
1813         }
1814     }
1815   /* If we did not managed to inline the function nor redirect
1816      to an ipa-cp clone (that are seen by having local flag set),
1817      it is probably pointless to inline it unless hardware is missing
1818      indirect call predictor.  */
1819   if (!anticipate_inlining && !target->local)
1820     return false;
1821   /* For overwritable targets there is not much to do.  */
1822   if (!can_inline_edge_p (e, false)
1823       || !can_inline_edge_by_limits_p (e, false, true))
1824     return false;
1825   /* OK, speculation seems interesting.  */
1826   return true;
1827 }
1828
1829 /* We know that EDGE is not going to be inlined.
1830    See if we can remove speculation.  */
1831
1832 static void
1833 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1834 {
1835   if (edge->speculative && !speculation_useful_p (edge, false))
1836     {
1837       struct cgraph_node *node = edge->caller;
1838       struct cgraph_node *where = node->inlined_to
1839                                   ? node->inlined_to : node;
1840       auto_bitmap updated_nodes;
1841
1842       if (edge->count.ipa ().initialized_p ())
1843         spec_rem += edge->count.ipa ();
1844       cgraph_edge::resolve_speculation (edge);
1845       reset_edge_caches (where);
1846       ipa_update_overall_fn_summary (where);
1847       update_caller_keys (edge_heap, where,
1848                           updated_nodes, NULL);
1849       update_callee_keys (edge_heap, where, NULL,
1850                           updated_nodes);
1851     }
1852 }
1853
1854 /* Return true if NODE should be accounted for overall size estimate.
1855    Skip all nodes optimized for size so we can measure the growth of hot
1856    part of program no matter of the padding.  */
1857
1858 bool
1859 inline_account_function_p (struct cgraph_node *node)
1860 {
1861    return (!DECL_EXTERNAL (node->decl)
1862            && !opt_for_fn (node->decl, optimize_size)
1863            && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
1864 }
1865
1866 /* Count number of callers of NODE and store it into DATA (that
1867    points to int.  Worker for cgraph_for_node_and_aliases.  */
1868
1869 static bool
1870 sum_callers (struct cgraph_node *node, void *data)
1871 {
1872   struct cgraph_edge *e;
1873   int *num_calls = (int *)data;
1874
1875   for (e = node->callers; e; e = e->next_caller)
1876     (*num_calls)++;
1877   return false;
1878 }
1879
1880 /* We only propagate across edges with non-interposable callee.  */
1881
1882 inline bool
1883 ignore_edge_p (struct cgraph_edge *e)
1884 {
1885   enum availability avail;
1886   e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
1887   return (avail <= AVAIL_INTERPOSABLE);
1888 }
1889
1890 /* We use greedy algorithm for inlining of small functions:
1891    All inline candidates are put into prioritized heap ordered in
1892    increasing badness.
1893
1894    The inlining of small functions is bounded by unit growth parameters.  */
1895
1896 static void
1897 inline_small_functions (void)
1898 {
1899   struct cgraph_node *node;
1900   struct cgraph_edge *edge;
1901   edge_heap_t edge_heap (sreal::min ());
1902   auto_bitmap updated_nodes;
1903   int min_size;
1904   auto_vec<cgraph_edge *> new_indirect_edges;
1905   int initial_size = 0;
1906   struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1907   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1908   new_indirect_edges.create (8);
1909
1910   edge_removal_hook_holder
1911     = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1912
1913   /* Compute overall unit size and other global parameters used by badness
1914      metrics.  */
1915
1916   max_count = profile_count::uninitialized ();
1917   ipa_reduced_postorder (order, true, ignore_edge_p);
1918   free (order);
1919
1920   FOR_EACH_DEFINED_FUNCTION (node)
1921     if (!node->inlined_to)
1922       {
1923         if (!node->alias && node->analyzed
1924             && (node->has_gimple_body_p () || node->thunk.thunk_p)
1925             && opt_for_fn (node->decl, optimize))
1926           {
1927             class ipa_fn_summary *info = ipa_fn_summaries->get (node);
1928             struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1929
1930             /* Do not account external functions, they will be optimized out
1931                if not inlined.  Also only count the non-cold portion of program.  */
1932             if (inline_account_function_p (node))
1933               initial_size += ipa_size_summaries->get (node)->size;
1934             info->growth = estimate_growth (node);
1935
1936             int num_calls = 0;
1937             node->call_for_symbol_and_aliases (sum_callers, &num_calls,
1938                                                true);
1939             if (num_calls == 1)
1940               info->single_caller = true;
1941             if (dfs && dfs->next_cycle)
1942               {
1943                 struct cgraph_node *n2;
1944                 int id = dfs->scc_no + 1;
1945                 for (n2 = node; n2;
1946                      n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
1947                   if (opt_for_fn (n2->decl, optimize))
1948                     {
1949                       ipa_fn_summary *info2 = ipa_fn_summaries->get
1950                          (n2->inlined_to ? n2->inlined_to : n2);
1951                       if (info2->scc_no)
1952                         break;
1953                       info2->scc_no = id;
1954                     }
1955               }
1956           }
1957
1958         for (edge = node->callers; edge; edge = edge->next_caller)
1959           max_count = max_count.max (edge->count.ipa ());
1960       }
1961   ipa_free_postorder_info ();
1962   initialize_growth_caches ();
1963
1964   if (dump_file)
1965     fprintf (dump_file,
1966              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1967              initial_size);
1968
1969   overall_size = initial_size;
1970   min_size = overall_size;
1971
1972   /* Populate the heap with all edges we might inline.  */
1973
1974   FOR_EACH_DEFINED_FUNCTION (node)
1975     {
1976       bool update = false;
1977       struct cgraph_edge *next = NULL;
1978       bool has_speculative = false;
1979
1980       if (!opt_for_fn (node->decl, optimize))
1981         continue;
1982
1983       if (dump_file)
1984         fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
1985
1986       for (edge = node->callees; edge; edge = edge->next_callee)
1987         {
1988           if (edge->inline_failed
1989               && !edge->aux
1990               && can_inline_edge_p (edge, true)
1991               && want_inline_small_function_p (edge, true)
1992               && can_inline_edge_by_limits_p (edge, true)
1993               && edge->inline_failed)
1994             {
1995               gcc_assert (!edge->aux);
1996               update_edge_key (&edge_heap, edge);
1997             }
1998           if (edge->speculative)
1999             has_speculative = true;
2000         }
2001       if (has_speculative)
2002         for (edge = node->callees; edge; edge = next)
2003           {
2004             next = edge->next_callee;
2005             if (edge->speculative
2006                 && !speculation_useful_p (edge, edge->aux != NULL))
2007               {
2008                 cgraph_edge::resolve_speculation (edge);
2009                 update = true;
2010               }
2011           }
2012       if (update)
2013         {
2014           struct cgraph_node *where = node->inlined_to
2015                                       ? node->inlined_to : node;
2016           ipa_update_overall_fn_summary (where);
2017           reset_edge_caches (where);
2018           update_caller_keys (&edge_heap, where,
2019                               updated_nodes, NULL);
2020           update_callee_keys (&edge_heap, where, NULL,
2021                               updated_nodes);
2022           bitmap_clear (updated_nodes);
2023         }
2024     }
2025
2026   gcc_assert (in_lto_p
2027               || !(max_count > 0)
2028               || (profile_info && flag_branch_probabilities));
2029
2030   while (!edge_heap.empty ())
2031     {
2032       int old_size = overall_size;
2033       struct cgraph_node *where, *callee;
2034       sreal badness = edge_heap.min_key ();
2035       sreal current_badness;
2036       int growth;
2037
2038       edge = edge_heap.extract_min ();
2039       gcc_assert (edge->aux);
2040       edge->aux = NULL;
2041       if (!edge->inline_failed || !edge->callee->analyzed)
2042         continue;
2043
2044       /* Be sure that caches are maintained consistent.
2045          This check is affected by scaling roundoff errors when compiling for
2046          IPA this we skip it in that case.  */
2047       if (flag_checking && !edge->callee->count.ipa_p ()
2048           && (!max_count.initialized_p () || !max_count.nonzero_p ()))
2049         {
2050           sreal cached_badness = edge_badness (edge, false);
2051
2052           int old_size_est = estimate_edge_size (edge);
2053           sreal old_time_est = estimate_edge_time (edge);
2054           int old_hints_est = estimate_edge_hints (edge);
2055
2056           if (edge_growth_cache != NULL)
2057             edge_growth_cache->remove (edge);
2058           reset_node_cache (edge->caller->inlined_to
2059                             ? edge->caller->inlined_to
2060                             : edge->caller);
2061           gcc_assert (old_size_est == estimate_edge_size (edge));
2062           gcc_assert (old_time_est == estimate_edge_time (edge));
2063           /* FIXME:
2064
2065              gcc_assert (old_hints_est == estimate_edge_hints (edge));
2066
2067              fails with profile feedback because some hints depends on
2068              maybe_hot_edge_p predicate and because callee gets inlined to other
2069              calls, the edge may become cold.
2070              This ought to be fixed by computing relative probabilities
2071              for given invocation but that will be better done once whole
2072              code is converted to sreals.  Disable for now and revert to "wrong"
2073              value so enable/disable checking paths agree.  */
2074           edge_growth_cache->get (edge)->hints = old_hints_est + 1;
2075
2076           /* When updating the edge costs, we only decrease badness in the keys.
2077              Increases of badness are handled lazily; when we see key with out
2078              of date value on it, we re-insert it now.  */
2079           current_badness = edge_badness (edge, false);
2080           gcc_assert (cached_badness == current_badness);
2081           gcc_assert (current_badness >= badness);
2082         }
2083       else
2084         current_badness = edge_badness (edge, false);
2085       if (current_badness != badness)
2086         {
2087           if (edge_heap.min () && current_badness > edge_heap.min_key ())
2088             {
2089               edge->aux = edge_heap.insert (current_badness, edge);
2090               continue;
2091             }
2092           else
2093             badness = current_badness;
2094         }
2095
2096       if (!can_inline_edge_p (edge, true)
2097           || !can_inline_edge_by_limits_p (edge, true))
2098         {
2099           resolve_noninline_speculation (&edge_heap, edge);
2100           continue;
2101         }
2102
2103       callee = edge->callee->ultimate_alias_target ();
2104       growth = estimate_edge_growth (edge);
2105       if (dump_file)
2106         {
2107           fprintf (dump_file,
2108                    "\nConsidering %s with %i size\n",
2109                    callee->dump_name (),
2110                    ipa_size_summaries->get (callee)->size);
2111           fprintf (dump_file,
2112                    " to be inlined into %s in %s:%i\n"
2113                    " Estimated badness is %f, frequency %.2f.\n",
2114                    edge->caller->dump_name (),
2115                    edge->call_stmt
2116                    && (LOCATION_LOCUS (gimple_location ((const gimple *)
2117                                                         edge->call_stmt))
2118                        > BUILTINS_LOCATION)
2119                    ? gimple_filename ((const gimple *) edge->call_stmt)
2120                    : "unknown",
2121                    edge->call_stmt
2122                    ? gimple_lineno ((const gimple *) edge->call_stmt)
2123                    : -1,
2124                    badness.to_double (),
2125                    edge->sreal_frequency ().to_double ());
2126           if (edge->count.ipa ().initialized_p ())
2127             {
2128               fprintf (dump_file, " Called ");
2129               edge->count.ipa ().dump (dump_file);
2130               fprintf (dump_file, " times\n");
2131             }
2132           if (dump_flags & TDF_DETAILS)
2133             edge_badness (edge, true);
2134         }
2135
2136       where = edge->caller;
2137
2138       if (overall_size + growth > compute_max_insns (where, min_size)
2139           && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2140         {
2141           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
2142           report_inline_failed_reason (edge);
2143           resolve_noninline_speculation (&edge_heap, edge);
2144           continue;
2145         }
2146
2147       if (!want_inline_small_function_p (edge, true))
2148         {
2149           resolve_noninline_speculation (&edge_heap, edge);
2150           continue;
2151         }
2152
2153       profile_count old_count = callee->count;
2154
2155       /* Heuristics for inlining small functions work poorly for
2156          recursive calls where we do effects similar to loop unrolling.
2157          When inlining such edge seems profitable, leave decision on
2158          specific inliner.  */
2159       if (edge->recursive_p ())
2160         {
2161           if (where->inlined_to)
2162             where = where->inlined_to;
2163           if (!recursive_inlining (edge,
2164                                    opt_for_fn (edge->caller->decl,
2165                                                flag_indirect_inlining)
2166                                    ? &new_indirect_edges : NULL))
2167             {
2168               edge->inline_failed = CIF_RECURSIVE_INLINING;
2169               resolve_noninline_speculation (&edge_heap, edge);
2170               continue;
2171             }
2172           reset_edge_caches (where);
2173           /* Recursive inliner inlines all recursive calls of the function
2174              at once. Consequently we need to update all callee keys.  */
2175           if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2176             add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2177           update_callee_keys (&edge_heap, where, where, updated_nodes);
2178           bitmap_clear (updated_nodes);
2179         }
2180       else
2181         {
2182           struct cgraph_node *outer_node = NULL;
2183           int depth = 0;
2184
2185           /* Consider the case where self recursive function A is inlined
2186              into B.  This is desired optimization in some cases, since it
2187              leads to effect similar of loop peeling and we might completely
2188              optimize out the recursive call.  However we must be extra
2189              selective.  */
2190
2191           where = edge->caller;
2192           while (where->inlined_to)
2193             {
2194               if (where->decl == callee->decl)
2195                 outer_node = where, depth++;
2196               where = where->callers->caller;
2197             }
2198           if (outer_node
2199               && !want_inline_self_recursive_call_p (edge, outer_node,
2200                                                      true, depth))
2201             {
2202               edge->inline_failed
2203                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2204                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2205               resolve_noninline_speculation (&edge_heap, edge);
2206               continue;
2207             }
2208           else if (depth && dump_file)
2209             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2210
2211           gcc_checking_assert (!callee->inlined_to);
2212
2213           int old_size = ipa_size_summaries->get (where)->size;
2214           sreal old_time = ipa_fn_summaries->get (where)->time;
2215
2216           inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2217           reset_edge_caches (edge->callee);
2218           add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2219
2220           /* If caller's size and time increased we do not need to update
2221              all edges because badness is not going to decrease.  */
2222           if (old_size <= ipa_size_summaries->get (where)->size
2223               && old_time <= ipa_fn_summaries->get (where)->time
2224               /* Wrapper penalty may be non-monotonous in this respect.
2225                  Fortunately it only affects small functions.  */
2226               && !wrapper_heuristics_may_apply (where, old_size))
2227             update_callee_keys (&edge_heap, edge->callee, edge->callee,
2228                                 updated_nodes);
2229           else
2230             update_callee_keys (&edge_heap, where,
2231                                 edge->callee,
2232                                 updated_nodes);
2233         }
2234       where = edge->caller;
2235       if (where->inlined_to)
2236         where = where->inlined_to;
2237
2238       /* Our profitability metric can depend on local properties
2239          such as number of inlinable calls and size of the function body.
2240          After inlining these properties might change for the function we
2241          inlined into (since it's body size changed) and for the functions
2242          called by function we inlined (since number of it inlinable callers
2243          might change).  */
2244       update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2245       /* Offline copy count has possibly changed, recompute if profile is
2246          available.  */
2247       struct cgraph_node *n
2248               = cgraph_node::get (edge->callee->decl)->ultimate_alias_target ();
2249       if (n != edge->callee && n->analyzed && !(n->count == old_count)
2250           && n->count.ipa_p ())
2251         update_callee_keys (&edge_heap, n, NULL, updated_nodes);
2252       bitmap_clear (updated_nodes);
2253
2254       if (dump_enabled_p ())
2255         {
2256           ipa_fn_summary *s = ipa_fn_summaries->get (where);
2257
2258           /* dump_printf can't handle %+i.  */
2259           char buf_net_change[100];
2260           snprintf (buf_net_change, sizeof buf_net_change, "%+i",
2261                     overall_size - old_size);
2262
2263           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, edge->call_stmt,
2264                            " Inlined %C into %C which now has time %f and "
2265                            "size %i, net change of %s%s.\n",
2266                            edge->callee, edge->caller,
2267                            s->time.to_double (),
2268                            ipa_size_summaries->get (edge->caller)->size,
2269                            buf_net_change,
2270                            cross_module_call_p (edge) ? " (cross module)":"");
2271         }
2272       if (min_size > overall_size)
2273         {
2274           min_size = overall_size;
2275
2276           if (dump_file)
2277             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2278         }
2279     }
2280
2281   free_growth_caches ();
2282   if (dump_enabled_p ())
2283     dump_printf (MSG_NOTE,
2284                  "Unit growth for small function inlining: %i->%i (%i%%)\n",
2285                  initial_size, overall_size,
2286                  initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2287   symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2288 }
2289
2290 /* Flatten NODE.  Performed both during early inlining and
2291    at IPA inlining time.  */
2292
2293 static void
2294 flatten_function (struct cgraph_node *node, bool early, bool update)
2295 {
2296   struct cgraph_edge *e;
2297
2298   /* We shouldn't be called recursively when we are being processed.  */
2299   gcc_assert (node->aux == NULL);
2300
2301   node->aux = (void *) node;
2302
2303   for (e = node->callees; e; e = e->next_callee)
2304     {
2305       struct cgraph_node *orig_callee;
2306       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2307
2308       /* We've hit cycle?  It is time to give up.  */
2309       if (callee->aux)
2310         {
2311           if (dump_enabled_p ())
2312             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2313                              "Not inlining %C into %C to avoid cycle.\n",
2314                              callee, e->caller);
2315           if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
2316             e->inline_failed = CIF_RECURSIVE_INLINING;
2317           continue;
2318         }
2319
2320       /* When the edge is already inlined, we just need to recurse into
2321          it in order to fully flatten the leaves.  */
2322       if (!e->inline_failed)
2323         {
2324           flatten_function (callee, early, false);
2325           continue;
2326         }
2327
2328       /* Flatten attribute needs to be processed during late inlining. For
2329          extra code quality we however do flattening during early optimization,
2330          too.  */
2331       if (!early
2332           ? !can_inline_edge_p (e, true)
2333             && !can_inline_edge_by_limits_p (e, true)
2334           : !can_early_inline_edge_p (e))
2335         continue;
2336
2337       if (e->recursive_p ())
2338         {
2339           if (dump_enabled_p ())
2340             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2341                              "Not inlining: recursive call.\n");
2342           continue;
2343         }
2344
2345       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2346           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2347         {
2348           if (dump_enabled_p ())
2349             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2350                              "Not inlining: SSA form does not match.\n");
2351           continue;
2352         }
2353
2354       /* Inline the edge and flatten the inline clone.  Avoid
2355          recursing through the original node if the node was cloned.  */
2356       if (dump_enabled_p ())
2357         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2358                          " Inlining %C into %C.\n",
2359                          callee, e->caller);
2360       orig_callee = callee;
2361       inline_call (e, true, NULL, NULL, false);
2362       if (e->callee != orig_callee)
2363         orig_callee->aux = (void *) node;
2364       flatten_function (e->callee, early, false);
2365       if (e->callee != orig_callee)
2366         orig_callee->aux = NULL;
2367     }
2368
2369   node->aux = NULL;
2370   cgraph_node *where = node->inlined_to ? node->inlined_to : node;
2371   if (update && opt_for_fn (where->decl, optimize))
2372     ipa_update_overall_fn_summary (where);
2373 }
2374
2375 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
2376    DATA points to number of calls originally found so we avoid infinite
2377    recursion.  */
2378
2379 static bool
2380 inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2381                          hash_set<cgraph_node *> *callers)
2382 {
2383   int *num_calls = (int *)data;
2384   bool callee_removed = false;
2385
2386   while (node->callers && !node->inlined_to)
2387     {
2388       struct cgraph_node *caller = node->callers->caller;
2389
2390       if (!can_inline_edge_p (node->callers, true)
2391           || !can_inline_edge_by_limits_p (node->callers, true)
2392           || node->callers->recursive_p ())
2393         {
2394           if (dump_file)
2395             fprintf (dump_file, "Uninlinable call found; giving up.\n");
2396           *num_calls = 0;
2397           return false;
2398         }
2399
2400       if (dump_file)
2401         {
2402           cgraph_node *ultimate = node->ultimate_alias_target ();
2403           fprintf (dump_file,
2404                    "\nInlining %s size %i.\n",
2405                    ultimate->dump_name (),
2406                    ipa_size_summaries->get (ultimate)->size);
2407           fprintf (dump_file,
2408                    " Called once from %s %i insns.\n",
2409                    node->callers->caller->dump_name (),
2410                    ipa_size_summaries->get (node->callers->caller)->size);
2411         }
2412
2413       /* Remember which callers we inlined to, delaying updating the
2414          overall summary.  */
2415       callers->add (node->callers->caller);
2416       inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2417       if (dump_file)
2418         fprintf (dump_file,
2419                  " Inlined into %s which now has %i size\n",
2420                  caller->dump_name (),
2421                  ipa_size_summaries->get (caller)->size);
2422       if (!(*num_calls)--)
2423         {
2424           if (dump_file)
2425             fprintf (dump_file, "New calls found; giving up.\n");
2426           return callee_removed;
2427         }
2428       if (callee_removed)
2429         return true;
2430     }
2431   return false;
2432 }
2433
2434 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2435    update.  */
2436
2437 static bool
2438 inline_to_all_callers (struct cgraph_node *node, void *data)
2439 {
2440   hash_set<cgraph_node *> callers;
2441   bool res = inline_to_all_callers_1 (node, data, &callers);
2442   /* Perform the delayed update of the overall summary of all callers
2443      processed.  This avoids quadratic behavior in the cases where
2444      we have a lot of calls to the same function.  */
2445   for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2446        i != callers.end (); ++i)
2447     ipa_update_overall_fn_summary ((*i)->inlined_to ? (*i)->inlined_to : *i);
2448   return res;
2449 }
2450
2451 /* Output overall time estimate.  */
2452 static void
2453 dump_overall_stats (void)
2454 {
2455   sreal sum_weighted = 0, sum = 0;
2456   struct cgraph_node *node;
2457
2458   FOR_EACH_DEFINED_FUNCTION (node)
2459     if (!node->inlined_to
2460         && !node->alias)
2461       {
2462         ipa_fn_summary *s = ipa_fn_summaries->get (node);
2463         if (s != NULL)
2464           {
2465           sum += s->time;
2466           if (node->count.ipa ().initialized_p ())
2467             sum_weighted += s->time * node->count.ipa ().to_gcov_type ();
2468           }
2469       }
2470   fprintf (dump_file, "Overall time estimate: "
2471            "%f weighted by profile: "
2472            "%f\n", sum.to_double (), sum_weighted.to_double ());
2473 }
2474
2475 /* Output some useful stats about inlining.  */
2476
2477 static void
2478 dump_inline_stats (void)
2479 {
2480   int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2481   int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2482   int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2483   int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2484   int64_t  inlined_speculative = 0, inlined_speculative_ply = 0;
2485   int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2486   int64_t reason[CIF_N_REASONS][2];
2487   sreal reason_freq[CIF_N_REASONS];
2488   int i;
2489   struct cgraph_node *node;
2490
2491   memset (reason, 0, sizeof (reason));
2492   for (i=0; i < CIF_N_REASONS; i++)
2493     reason_freq[i] = 0;
2494   FOR_EACH_DEFINED_FUNCTION (node)
2495   {
2496     struct cgraph_edge *e;
2497     for (e = node->callees; e; e = e->next_callee)
2498       {
2499         if (e->inline_failed)
2500           {
2501             if (e->count.ipa ().initialized_p ())
2502               reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2503             reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
2504             reason[(int) e->inline_failed][1] ++;
2505             if (DECL_VIRTUAL_P (e->callee->decl)
2506                 && e->count.ipa ().initialized_p ())
2507               {
2508                 if (e->indirect_inlining_edge)
2509                   noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2510                 else
2511                   noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2512               }
2513             else if (e->count.ipa ().initialized_p ())
2514               {
2515                 if (e->indirect_inlining_edge)
2516                   noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2517                 else
2518                   noninlined_cnt += e->count.ipa ().to_gcov_type ();
2519               }
2520           }
2521         else if (e->count.ipa ().initialized_p ())
2522           {
2523             if (e->speculative)
2524               {
2525                 if (DECL_VIRTUAL_P (e->callee->decl))
2526                   inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2527                 else
2528                   inlined_speculative += e->count.ipa ().to_gcov_type ();
2529               }
2530             else if (DECL_VIRTUAL_P (e->callee->decl))
2531               {
2532                 if (e->indirect_inlining_edge)
2533                   inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2534                 else
2535                   inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2536               }
2537             else
2538               {
2539                 if (e->indirect_inlining_edge)
2540                   inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2541                 else
2542                   inlined_cnt += e->count.ipa ().to_gcov_type ();
2543               }
2544           }
2545       }
2546     for (e = node->indirect_calls; e; e = e->next_callee)
2547       if (e->indirect_info->polymorphic
2548           & e->count.ipa ().initialized_p ())
2549         indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2550       else if (e->count.ipa ().initialized_p ())
2551         indirect_cnt += e->count.ipa ().to_gcov_type ();
2552   }
2553   if (max_count.initialized_p ())
2554     {
2555       fprintf (dump_file,
2556                "Inlined %" PRId64 " + speculative "
2557                "%" PRId64 " + speculative polymorphic "
2558                "%" PRId64 " + previously indirect "
2559                "%" PRId64 " + virtual "
2560                "%" PRId64 " + virtual and previously indirect "
2561                "%" PRId64 "\n" "Not inlined "
2562                "%" PRId64 " + previously indirect "
2563                "%" PRId64 " + virtual "
2564                "%" PRId64 " + virtual and previously indirect "
2565                "%" PRId64 " + still indirect "
2566                "%" PRId64 " + still indirect polymorphic "
2567                "%" PRId64 "\n", inlined_cnt,
2568                inlined_speculative, inlined_speculative_ply,
2569                inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2570                noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2571                noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2572       fprintf (dump_file, "Removed speculations ");
2573       spec_rem.dump (dump_file);
2574       fprintf (dump_file, "\n");
2575     }
2576   dump_overall_stats ();
2577   fprintf (dump_file, "\nWhy inlining failed?\n");
2578   for (i = 0; i < CIF_N_REASONS; i++)
2579     if (reason[i][1])
2580       fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
2581                cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2582                (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
2583 }
2584
2585 /* Called when node is removed.  */
2586
2587 static void
2588 flatten_remove_node_hook (struct cgraph_node *node, void *data)
2589 {
2590   if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
2591     return;
2592
2593   hash_set<struct cgraph_node *> *removed
2594     = (hash_set<struct cgraph_node *> *) data;
2595   removed->add (node);
2596 }
2597
2598 /* Decide on the inlining.  We do so in the topological order to avoid
2599    expenses on updating data structures.  */
2600
2601 static unsigned int
2602 ipa_inline (void)
2603 {
2604   struct cgraph_node *node;
2605   int nnodes;
2606   struct cgraph_node **order;
2607   int i, j;
2608   int cold;
2609   bool remove_functions = false;
2610
2611   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2612
2613   if (dump_file)
2614     ipa_dump_fn_summaries (dump_file);
2615
2616   nnodes = ipa_reverse_postorder (order);
2617   spec_rem = profile_count::zero ();
2618
2619   FOR_EACH_FUNCTION (node)
2620     {
2621       node->aux = 0;
2622
2623       /* Recompute the default reasons for inlining because they may have
2624          changed during merging.  */
2625       if (in_lto_p)
2626         {
2627           for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2628             {
2629               gcc_assert (e->inline_failed);
2630               initialize_inline_failed (e);
2631             }
2632           for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2633             initialize_inline_failed (e);
2634         }
2635     }
2636
2637   if (dump_file)
2638     fprintf (dump_file, "\nFlattening functions:\n");
2639
2640   /* First shrink order array, so that it only contains nodes with
2641      flatten attribute.  */
2642   for (i = nnodes - 1, j = i; i >= 0; i--)
2643     {
2644       node = order[i];
2645       if (node->definition
2646           /* Do not try to flatten aliases.  These may happen for example when
2647              creating local aliases.  */
2648           && !node->alias
2649           && lookup_attribute ("flatten",
2650                                DECL_ATTRIBUTES (node->decl)) != NULL)
2651         order[j--] = order[i];
2652     }
2653
2654   /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2655      nodes with flatten attribute.  If there is more than one such
2656      node, we need to register a node removal hook, as flatten_function
2657      could remove other nodes with flatten attribute.  See PR82801.  */
2658   struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
2659   hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
2660   if (j < nnodes - 2)
2661     {
2662       flatten_removed_nodes = new hash_set<struct cgraph_node *>;
2663       node_removal_hook_holder
2664         = symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
2665                                            flatten_removed_nodes);
2666     }
2667
2668   /* In the first pass handle functions to be flattened.  Do this with
2669      a priority so none of our later choices will make this impossible.  */
2670   for (i = nnodes - 1; i > j; i--)
2671     {
2672       node = order[i];
2673       if (flatten_removed_nodes
2674           && flatten_removed_nodes->contains (node))
2675         continue;
2676
2677       /* Handle nodes to be flattened.
2678          Ideally when processing callees we stop inlining at the
2679          entry of cycles, possibly cloning that entry point and
2680          try to flatten itself turning it into a self-recursive
2681          function.  */
2682       if (dump_file)
2683         fprintf (dump_file, "Flattening %s\n", node->dump_name ());
2684       flatten_function (node, false, true);
2685     }
2686
2687   if (j < nnodes - 2)
2688     {
2689       symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
2690       delete flatten_removed_nodes;
2691     }
2692   free (order);
2693
2694   if (dump_file)
2695     dump_overall_stats ();
2696
2697   inline_small_functions ();
2698
2699   gcc_assert (symtab->state == IPA_SSA);
2700   symtab->state = IPA_SSA_AFTER_INLINING;
2701   /* Do first after-inlining removal.  We want to remove all "stale" extern
2702      inline functions and virtual functions so we really know what is called
2703      once.  */
2704   symtab->remove_unreachable_nodes (dump_file);
2705
2706   /* Inline functions with a property that after inlining into all callers the
2707      code size will shrink because the out-of-line copy is eliminated.
2708      We do this regardless on the callee size as long as function growth limits
2709      are met.  */
2710   if (dump_file)
2711     fprintf (dump_file,
2712              "\nDeciding on functions to be inlined into all callers and "
2713              "removing useless speculations:\n");
2714
2715   /* Inlining one function called once has good chance of preventing
2716      inlining other function into the same callee.  Ideally we should
2717      work in priority order, but probably inlining hot functions first
2718      is good cut without the extra pain of maintaining the queue.
2719
2720      ??? this is not really fitting the bill perfectly: inlining function
2721      into callee often leads to better optimization of callee due to
2722      increased context for optimization.
2723      For example if main() function calls a function that outputs help
2724      and then function that does the main optimization, we should inline
2725      the second with priority even if both calls are cold by themselves.
2726
2727      We probably want to implement new predicate replacing our use of
2728      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2729      to be hot.  */
2730   for (cold = 0; cold <= 1; cold ++)
2731     {
2732       FOR_EACH_DEFINED_FUNCTION (node)
2733         {
2734           struct cgraph_edge *edge, *next;
2735           bool update=false;
2736
2737           if (!opt_for_fn (node->decl, optimize)
2738               || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2739             continue;
2740
2741           for (edge = node->callees; edge; edge = next)
2742             {
2743               next = edge->next_callee;
2744               if (edge->speculative && !speculation_useful_p (edge, false))
2745                 {
2746                   if (edge->count.ipa ().initialized_p ())
2747                     spec_rem += edge->count.ipa ();
2748                   cgraph_edge::resolve_speculation (edge);
2749                   update = true;
2750                   remove_functions = true;
2751                 }
2752             }
2753           if (update)
2754             {
2755               struct cgraph_node *where = node->inlined_to
2756                                           ? node->inlined_to : node;
2757               reset_edge_caches (where);
2758               ipa_update_overall_fn_summary (where);
2759             }
2760           if (want_inline_function_to_all_callers_p (node, cold))
2761             {
2762               int num_calls = 0;
2763               node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2764                                                  true);
2765               while (node->call_for_symbol_and_aliases
2766                        (inline_to_all_callers, &num_calls, true))
2767                 ;
2768               remove_functions = true;
2769             }
2770         }
2771     }
2772
2773   /* Free ipa-prop structures if they are no longer needed.  */
2774   ipa_free_all_structures_after_iinln ();
2775
2776   if (dump_enabled_p ())
2777     dump_printf (MSG_NOTE,
2778                  "\nInlined %i calls, eliminated %i functions\n\n",
2779                  ncalls_inlined, nfunctions_inlined);
2780   if (dump_file)
2781     dump_inline_stats ();
2782
2783   if (dump_file)
2784     ipa_dump_fn_summaries (dump_file);
2785   return remove_functions ? TODO_remove_functions : 0;
2786 }
2787
2788 /* Inline always-inline function calls in NODE.  */
2789
2790 static bool
2791 inline_always_inline_functions (struct cgraph_node *node)
2792 {
2793   struct cgraph_edge *e;
2794   bool inlined = false;
2795
2796   for (e = node->callees; e; e = e->next_callee)
2797     {
2798       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2799       if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2800         continue;
2801
2802       if (e->recursive_p ())
2803         {
2804           if (dump_enabled_p ())
2805             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2806                              "  Not inlining recursive call to %C.\n",
2807                              e->callee);
2808           e->inline_failed = CIF_RECURSIVE_INLINING;
2809           continue;
2810         }
2811
2812       if (!can_early_inline_edge_p (e))
2813         {
2814           /* Set inlined to true if the callee is marked "always_inline" but
2815              is not inlinable.  This will allow flagging an error later in
2816              expand_call_inline in tree-inline.c.  */
2817           if (lookup_attribute ("always_inline",
2818                                  DECL_ATTRIBUTES (callee->decl)) != NULL)
2819             inlined = true;
2820           continue;
2821         }
2822
2823       if (dump_enabled_p ())
2824         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2825                          "  Inlining %C into %C (always_inline).\n",
2826                          e->callee, e->caller);
2827       inline_call (e, true, NULL, NULL, false);
2828       inlined = true;
2829     }
2830   if (inlined)
2831     ipa_update_overall_fn_summary (node);
2832
2833   return inlined;
2834 }
2835
2836 /* Decide on the inlining.  We do so in the topological order to avoid
2837    expenses on updating data structures.  */
2838
2839 static bool
2840 early_inline_small_functions (struct cgraph_node *node)
2841 {
2842   struct cgraph_edge *e;
2843   bool inlined = false;
2844
2845   for (e = node->callees; e; e = e->next_callee)
2846     {
2847       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2848
2849       /* We can encounter not-yet-analyzed function during
2850          early inlining on callgraphs with strongly
2851          connected components.  */
2852       ipa_fn_summary *s = ipa_fn_summaries->get (callee);
2853       if (s == NULL || !s->inlinable || !e->inline_failed)
2854         continue;
2855
2856       /* Do not consider functions not declared inline.  */
2857       if (!DECL_DECLARED_INLINE_P (callee->decl)
2858           && !opt_for_fn (node->decl, flag_inline_small_functions)
2859           && !opt_for_fn (node->decl, flag_inline_functions))
2860         continue;
2861
2862       if (dump_enabled_p ())
2863         dump_printf_loc (MSG_NOTE, e->call_stmt,
2864                          "Considering inline candidate %C.\n",
2865                          callee);
2866
2867       if (!can_early_inline_edge_p (e))
2868         continue;
2869
2870       if (e->recursive_p ())
2871         {
2872           if (dump_enabled_p ())
2873             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2874                              "  Not inlining: recursive call.\n");
2875           continue;
2876         }
2877
2878       if (!want_early_inline_function_p (e))
2879         continue;
2880
2881       if (dump_enabled_p ())
2882         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2883                          " Inlining %C into %C.\n",
2884                          callee, e->caller);
2885       inline_call (e, true, NULL, NULL, false);
2886       inlined = true;
2887     }
2888
2889   if (inlined)
2890     ipa_update_overall_fn_summary (node);
2891
2892   return inlined;
2893 }
2894
2895 unsigned int
2896 early_inliner (function *fun)
2897 {
2898   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2899   struct cgraph_edge *edge;
2900   unsigned int todo = 0;
2901   int iterations = 0;
2902   bool inlined = false;
2903
2904   if (seen_error ())
2905     return 0;
2906
2907   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2908      happens when some pass decides to construct new function and
2909      cgraph_add_new_function calls lowering passes and early optimization on
2910      it.  This may confuse ourself when early inliner decide to inline call to
2911      function clone, because function clones don't have parameter list in
2912      ipa-prop matching their signature.  */
2913   if (ipa_node_params_sum)
2914     return 0;
2915
2916   if (flag_checking)
2917     node->verify ();
2918   node->remove_all_references ();
2919
2920   /* Even when not optimizing or not inlining inline always-inline
2921      functions.  */
2922   inlined = inline_always_inline_functions (node);
2923
2924   if (!optimize
2925       || flag_no_inline
2926       || !flag_early_inlining
2927       /* Never inline regular functions into always-inline functions
2928          during incremental inlining.  This sucks as functions calling
2929          always inline functions will get less optimized, but at the
2930          same time inlining of functions calling always inline
2931          function into an always inline function might introduce
2932          cycles of edges to be always inlined in the callgraph.
2933
2934          We might want to be smarter and just avoid this type of inlining.  */
2935       || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
2936           && lookup_attribute ("always_inline",
2937                                DECL_ATTRIBUTES (node->decl))))
2938     ;
2939   else if (lookup_attribute ("flatten",
2940                              DECL_ATTRIBUTES (node->decl)) != NULL)
2941     {
2942       /* When the function is marked to be flattened, recursively inline
2943          all calls in it.  */
2944       if (dump_enabled_p ())
2945         dump_printf (MSG_OPTIMIZED_LOCATIONS,
2946                      "Flattening %C\n", node);
2947       flatten_function (node, true, true);
2948       inlined = true;
2949     }
2950   else
2951     {
2952       /* If some always_inline functions was inlined, apply the changes.
2953          This way we will not account always inline into growth limits and
2954          moreover we will inline calls from always inlines that we skipped
2955          previously because of conditional above.  */
2956       if (inlined)
2957         {
2958           timevar_push (TV_INTEGRATION);
2959           todo |= optimize_inline_calls (current_function_decl);
2960           /* optimize_inline_calls call above might have introduced new
2961              statements that don't have inline parameters computed.  */
2962           for (edge = node->callees; edge; edge = edge->next_callee)
2963             {
2964               /* We can enounter not-yet-analyzed function during
2965                  early inlining on callgraphs with strongly
2966                  connected components.  */
2967               ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2968               es->call_stmt_size
2969                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2970               es->call_stmt_time
2971                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2972             }
2973           ipa_update_overall_fn_summary (node);
2974           inlined = false;
2975           timevar_pop (TV_INTEGRATION);
2976         }
2977       /* We iterate incremental inlining to get trivial cases of indirect
2978          inlining.  */
2979       while (iterations < opt_for_fn (node->decl,
2980                                       param_early_inliner_max_iterations)
2981              && early_inline_small_functions (node))
2982         {
2983           timevar_push (TV_INTEGRATION);
2984           todo |= optimize_inline_calls (current_function_decl);
2985
2986           /* Technically we ought to recompute inline parameters so the new
2987              iteration of early inliner works as expected.  We however have
2988              values approximately right and thus we only need to update edge
2989              info that might be cleared out for newly discovered edges.  */
2990           for (edge = node->callees; edge; edge = edge->next_callee)
2991             {
2992               /* We have no summary for new bound store calls yet.  */
2993               ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2994               es->call_stmt_size
2995                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2996               es->call_stmt_time
2997                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2998             }
2999           if (iterations < opt_for_fn (node->decl,
3000                                        param_early_inliner_max_iterations) - 1)
3001             ipa_update_overall_fn_summary (node);
3002           timevar_pop (TV_INTEGRATION);
3003           iterations++;
3004           inlined = false;
3005         }
3006       if (dump_file)
3007         fprintf (dump_file, "Iterations: %i\n", iterations);
3008     }
3009
3010   if (inlined)
3011     {
3012       timevar_push (TV_INTEGRATION);
3013       todo |= optimize_inline_calls (current_function_decl);
3014       timevar_pop (TV_INTEGRATION);
3015     }
3016
3017   fun->always_inline_functions_inlined = true;
3018
3019   return todo;
3020 }
3021
3022 /* Do inlining of small functions.  Doing so early helps profiling and other
3023    passes to be somewhat more effective and avoids some code duplication in
3024    later real inlining pass for testcases with very many function calls.  */
3025
3026 namespace {
3027
3028 const pass_data pass_data_early_inline =
3029 {
3030   GIMPLE_PASS, /* type */
3031   "einline", /* name */
3032   OPTGROUP_INLINE, /* optinfo_flags */
3033   TV_EARLY_INLINING, /* tv_id */
3034   PROP_ssa, /* properties_required */
3035   0, /* properties_provided */
3036   0, /* properties_destroyed */
3037   0, /* todo_flags_start */
3038   0, /* todo_flags_finish */
3039 };
3040
3041 class pass_early_inline : public gimple_opt_pass
3042 {
3043 public:
3044   pass_early_inline (gcc::context *ctxt)
3045     : gimple_opt_pass (pass_data_early_inline, ctxt)
3046   {}
3047
3048   /* opt_pass methods: */
3049   virtual unsigned int execute (function *);
3050
3051 }; // class pass_early_inline
3052
3053 unsigned int
3054 pass_early_inline::execute (function *fun)
3055 {
3056   return early_inliner (fun);
3057 }
3058
3059 } // anon namespace
3060
3061 gimple_opt_pass *
3062 make_pass_early_inline (gcc::context *ctxt)
3063 {
3064   return new pass_early_inline (ctxt);
3065 }
3066
3067 namespace {
3068
3069 const pass_data pass_data_ipa_inline =
3070 {
3071   IPA_PASS, /* type */
3072   "inline", /* name */
3073   OPTGROUP_INLINE, /* optinfo_flags */
3074   TV_IPA_INLINING, /* tv_id */
3075   0, /* properties_required */
3076   0, /* properties_provided */
3077   0, /* properties_destroyed */
3078   0, /* todo_flags_start */
3079   ( TODO_dump_symtab ), /* todo_flags_finish */
3080 };
3081
3082 class pass_ipa_inline : public ipa_opt_pass_d
3083 {
3084 public:
3085   pass_ipa_inline (gcc::context *ctxt)
3086     : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
3087                       NULL, /* generate_summary */
3088                       NULL, /* write_summary */
3089                       NULL, /* read_summary */
3090                       NULL, /* write_optimization_summary */
3091                       NULL, /* read_optimization_summary */
3092                       NULL, /* stmt_fixup */
3093                       0, /* function_transform_todo_flags_start */
3094                       inline_transform, /* function_transform */
3095                       NULL) /* variable_transform */
3096   {}
3097
3098   /* opt_pass methods: */
3099   virtual unsigned int execute (function *) { return ipa_inline (); }
3100
3101 }; // class pass_ipa_inline
3102
3103 } // anon namespace
3104
3105 ipa_opt_pass_d *
3106 make_pass_ipa_inline (gcc::context *ctxt)
3107 {
3108   return new pass_ipa_inline (ctxt);
3109 }