gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /*  Inlining decision heuristics
  22
  23     The implementation of inliner is organized as follows:
  24
  25     inlining heuristics limits
  26
  27       can_inline_edge_p allow to check that particular inlining is allowed
  28       by the limits specified by user (allowed function growth, growth and so
  29       on).
  30
  31       Functions are inlined when it is obvious the result is profitable (such
  32       as functions called once or when inlining reduce code size).
  33       In addition to that we perform inlining of small functions and recursive
  34       inlining.
  35
  36     inlining heuristics
  37
  38        The inliner itself is split into two passes:
  39
  40        pass_early_inlining
  41
  42          Simple local inlining pass inlining callees into current function.
  43          This pass makes no use of whole unit analysis and thus it can do only
  44          very simple decisions based on local properties.
  45
  46          The strength of the pass is that it is run in topological order
  47          (reverse postorder) on the callgraph. Functions are converted into SSA
  48          form just before this pass and optimized subsequently. As a result, the
  49          callees of the function seen by the early inliner was already optimized
  50          and results of early inlining adds a lot of optimization opportunities
  51          for the local optimization.
  52
  53          The pass handle the obvious inlining decisions within the compilation
  54          unit - inlining auto inline functions, inlining for size and
  55          flattening.
  56
  57          main strength of the pass is the ability to eliminate abstraction
  58          penalty in C++ code (via combination of inlining and early
  59          optimization) and thus improve quality of analysis done by real IPA
  60          optimizers.
  61
  62          Because of lack of whole unit knowledge, the pass can not really make
  63          good code size/performance tradeoffs.  It however does very simple
  64          speculative inlining allowing code size to grow by
  65          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  66          optimizations performed later are very likely to eliminate the cost.
  67
  68        pass_ipa_inline
  69
  70          This is the real inliner able to handle inlining with whole program
  71          knowledge. It performs following steps:
  72
  73          1) inlining of small functions.  This is implemented by greedy
  74          algorithm ordering all inlinable cgraph edges by their badness and
  75          inlining them in this order as long as inline limits allows doing so.
  76
  77          This heuristics is not very good on inlining recursive calls. Recursive
  78          calls can be inlined with results similar to loop unrolling. To do so,
  79          special purpose recursive inliner is executed on function when
  80          recursive edge is met as viable candidate.
  81
  82          2) Unreachable functions are removed from callgraph.  Inlining leads
  83          to devirtualization and other modification of callgraph so functions
  84          may become unreachable during the process. Also functions declared as
  85          extern inline or virtual functions are removed, since after inlining
  86          we no longer need the offline bodies.
  87
  88          3) Functions called once and not exported from the unit are inlined.
  89          This should almost always lead to reduction of code size by eliminating
  90          the need for offline copy of the function.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "tm.h"
  96 #include "tree.h"
  97 #include "trans-mem.h"
  98 #include "calls.h"
  99 #include "tree-inline.h"
 100 #include "langhooks.h"
 101 #include "flags.h"
 102 #include "diagnostic.h"
 103 #include "gimple-pretty-print.h"
 104 #include "params.h"
 105 #include "fibheap.h"
 106 #include "intl.h"
 107 #include "tree-pass.h"
 108 #include "coverage.h"
 109 #include "ggc.h"
 110 #include "rtl.h"
 111 #include "bitmap.h"
 112 #include "gimple.h"
 113 #include "gimple-ssa.h"
 114 #include "ipa-prop.h"
 115 #include "except.h"
 116 #include "target.h"
 117 #include "ipa-inline.h"
 118 #include "ipa-utils.h"
 119 #include "sreal.h"
 120 #include "cilk.h"
 121
 122 /* Statistics we collect about inlining algorithm.  */
 123 static int overall_size;
 124 static gcov_type max_count;
 125 static sreal max_count_real, max_relbenefit_real, half_int_min_real;
 126
 127 /* Return false when inlining edge E would lead to violating
 128    limits on function unit growth or stack usage growth.
 129
 130    The relative function body growth limit is present generally
 131    to avoid problems with non-linear behavior of the compiler.
 132    To allow inlining huge functions into tiny wrapper, the limit
 133    is always based on the bigger of the two functions considered.
 134
 135    For stack growth limits we always base the growth in stack usage
 136    of the callers.  We want to prevent applications from segfaulting
 137    on stack overflow when functions with huge stack frames gets
 138    inlined. */
 139
 140 static bool
 141 caller_growth_limits (struct cgraph_edge *e)
 142 {
 143   struct cgraph_node *to = e->caller;
 144   struct cgraph_node *what = cgraph_function_or_thunk_node (e->callee, NULL);
 145   int newsize;
 146   int limit = 0;
 147   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 148   struct inline_summary *info, *what_info, *outer_info = inline_summary (to);
 149
 150   /* Look for function e->caller is inlined to.  While doing
 151      so work out the largest function body on the way.  As
 152      described above, we want to base our function growth
 153      limits based on that.  Not on the self size of the
 154      outer function, not on the self size of inline code
 155      we immediately inline to.  This is the most relaxed
 156      interpretation of the rule "do not grow large functions
 157      too much in order to prevent compiler from exploding".  */
 158   while (true)
 159     {
 160       info = inline_summary (to);
 161       if (limit < info->self_size)
 162         limit = info->self_size;
 163       if (stack_size_limit < info->estimated_self_stack_size)
 164         stack_size_limit = info->estimated_self_stack_size;
 165       if (to->global.inlined_to)
 166         to = to->callers->caller;
 167       else
 168         break;
 169     }
 170
 171   what_info = inline_summary (what);
 172
 173   if (limit < what_info->self_size)
 174     limit = what_info->self_size;
 175
 176   limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
 177
 178   /* Check the size after inlining against the function limits.  But allow
 179      the function to shrink if it went over the limits by forced inlining.  */
 180   newsize = estimate_size_after_inlining (to, e);
 181   if (newsize >= info->size
 182       && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
 183       && newsize > limit)
 184     {
 185       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 186       return false;
 187     }
 188
 189   if (!what_info->estimated_stack_size)
 190     return true;
 191
 192   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 193      due to large i/o datastructures used by the Fortran front-end.
 194      We ought to ignore this limit when we know that the edge is executed
 195      on every invocation of the caller (i.e. its call statement dominates
 196      exit block).  We do not track this information, yet.  */
 197   stack_size_limit += ((gcov_type)stack_size_limit
 198                        * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
 199
 200   inlined_stack = (outer_info->stack_frame_offset
 201                    + outer_info->estimated_self_stack_size
 202                    + what_info->estimated_stack_size);
 203   /* Check new stack consumption with stack consumption at the place
 204      stack is used.  */
 205   if (inlined_stack > stack_size_limit
 206       /* If function already has large stack usage from sibling
 207          inline call, we can inline, too.
 208          This bit overoptimistically assume that we are good at stack
 209          packing.  */
 210       && inlined_stack > info->estimated_stack_size
 211       && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
 212     {
 213       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 214       return false;
 215     }
 216   return true;
 217 }
 218
 219 /* Dump info about why inlining has failed.  */
 220
 221 static void
 222 report_inline_failed_reason (struct cgraph_edge *e)
 223 {
 224   if (dump_file)
 225     {
 226       fprintf (dump_file, "  not inlinable: %s/%i -> %s/%i, %s\n",
 227                xstrdup (e->caller->name ()), e->caller->order,
 228                xstrdup (e->callee->name ()), e->callee->order,
 229                cgraph_inline_failed_string (e->inline_failed));
 230     }
 231 }
 232
 233 /* Decide if we can inline the edge and possibly update
 234    inline_failed reason.
 235    We check whether inlining is possible at all and whether
 236    caller growth limits allow doing so.
 237
 238    if REPORT is true, output reason to the dump file.
 239
 240    if DISREGARD_LIMITES is true, ignore size limits.*/
 241
 242 static bool
 243 can_inline_edge_p (struct cgraph_edge *e, bool report,
 244                    bool disregard_limits = false)
 245 {
 246   bool inlinable = true;
 247   enum availability avail;
 248   struct cgraph_node *callee
 249     = cgraph_function_or_thunk_node (e->callee, &avail);
 250   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->caller->decl);
 251   tree callee_tree
 252     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
 253   struct function *caller_cfun = DECL_STRUCT_FUNCTION (e->caller->decl);
 254   struct function *callee_cfun
 255     = callee ? DECL_STRUCT_FUNCTION (callee->decl) : NULL;
 256
 257   if (!caller_cfun && e->caller->clone_of)
 258     caller_cfun = DECL_STRUCT_FUNCTION (e->caller->clone_of->decl);
 259
 260   if (!callee_cfun && callee && callee->clone_of)
 261     callee_cfun = DECL_STRUCT_FUNCTION (callee->clone_of->decl);
 262
 263   gcc_assert (e->inline_failed);
 264
 265   if (!callee || !callee->definition)
 266     {
 267       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 268       inlinable = false;
 269     }
 270   else if (!inline_summary (callee)->inlinable
 271            || (caller_cfun && fn_contains_cilk_spawn_p (caller_cfun)))
 272     {
 273       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 274       inlinable = false;
 275     }
 276   else if (avail <= AVAIL_OVERWRITABLE)
 277     {
 278       e->inline_failed = CIF_OVERWRITABLE;
 279       inlinable = false;
 280     }
 281   else if (e->call_stmt_cannot_inline_p)
 282     {
 283       if (e->inline_failed != CIF_FUNCTION_NOT_OPTIMIZED)
 284         e->inline_failed = CIF_MISMATCHED_ARGUMENTS;
 285       inlinable = false;
 286     }
 287   /* Don't inline if the functions have different EH personalities.  */
 288   else if (DECL_FUNCTION_PERSONALITY (e->caller->decl)
 289            && DECL_FUNCTION_PERSONALITY (callee->decl)
 290            && (DECL_FUNCTION_PERSONALITY (e->caller->decl)
 291                != DECL_FUNCTION_PERSONALITY (callee->decl)))
 292     {
 293       e->inline_failed = CIF_EH_PERSONALITY;
 294       inlinable = false;
 295     }
 296   /* TM pure functions should not be inlined into non-TM_pure
 297      functions.  */
 298   else if (is_tm_pure (callee->decl)
 299            && !is_tm_pure (e->caller->decl))
 300     {
 301       e->inline_failed = CIF_UNSPECIFIED;
 302       inlinable = false;
 303     }
 304   /* Don't inline if the callee can throw non-call exceptions but the
 305      caller cannot.
 306      FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is missing.
 307      Move the flag into cgraph node or mirror it in the inline summary.  */
 308   else if (callee_cfun && callee_cfun->can_throw_non_call_exceptions
 309            && !(caller_cfun && caller_cfun->can_throw_non_call_exceptions))
 310     {
 311       e->inline_failed = CIF_NON_CALL_EXCEPTIONS;
 312       inlinable = false;
 313     }
 314   /* Check compatibility of target optimization options.  */
 315   else if (!targetm.target_option.can_inline_p (e->caller->decl,
 316                                                 callee->decl))
 317     {
 318       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 319       inlinable = false;
 320     }
 321   /* Check if caller growth allows the inlining.  */
 322   else if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 323            && !disregard_limits
 324            && !lookup_attribute ("flatten",
 325                                  DECL_ATTRIBUTES
 326                                    (e->caller->global.inlined_to
 327                                     ? e->caller->global.inlined_to->decl
 328                                     : e->caller->decl))
 329            && !caller_growth_limits (e))
 330     inlinable = false;
 331   /* Don't inline a function with a higher optimization level than the
 332      caller.  FIXME: this is really just tip of iceberg of handling
 333      optimization attribute.  */
 334   else if (caller_tree != callee_tree)
 335     {
 336       struct cl_optimization *caller_opt
 337         = TREE_OPTIMIZATION ((caller_tree)
 338                              ? caller_tree
 339                              : optimization_default_node);
 340
 341       struct cl_optimization *callee_opt
 342         = TREE_OPTIMIZATION ((callee_tree)
 343                              ? callee_tree
 344                              : optimization_default_node);
 345
 346       if (((caller_opt->x_optimize > callee_opt->x_optimize)
 347            || (caller_opt->x_optimize_size != callee_opt->x_optimize_size))
 348           /* gcc.dg/pr43564.c.  Look at forced inline even in -O0.  */
 349           && !DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
 350         {
 351           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 352           inlinable = false;
 353         }
 354     }
 355
 356   if (!inlinable && report)
 357     report_inline_failed_reason (e);
 358   return inlinable;
 359 }
 360
 361
 362 /* Return true if the edge E is inlinable during early inlining.  */
 363
 364 static bool
 365 can_early_inline_edge_p (struct cgraph_edge *e)
 366 {
 367   struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee,
 368                                                               NULL);
 369   /* Early inliner might get called at WPA stage when IPA pass adds new
 370      function.  In this case we can not really do any of early inlining
 371      because function bodies are missing.  */
 372   if (!gimple_has_body_p (callee->decl))
 373     {
 374       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 375       return false;
 376     }
 377   /* In early inliner some of callees may not be in SSA form yet
 378      (i.e. the callgraph is cyclic and we did not process
 379      the callee by early inliner, yet).  We don't have CIF code for this
 380      case; later we will re-do the decision in the real inliner.  */
 381   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
 382       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
 383     {
 384       if (dump_file)
 385         fprintf (dump_file, "  edge not inlinable: not in SSA form\n");
 386       return false;
 387     }
 388   if (!can_inline_edge_p (e, true))
 389     return false;
 390   return true;
 391 }
 392
 393
 394 /* Return number of calls in N.  Ignore cheap builtins.  */
 395
 396 static int
 397 num_calls (struct cgraph_node *n)
 398 {
 399   struct cgraph_edge *e;
 400   int num = 0;
 401
 402   for (e = n->callees; e; e = e->next_callee)
 403     if (!is_inexpensive_builtin (e->callee->decl))
 404       num++;
 405   return num;
 406 }
 407
 408
 409 /* Return true if we are interested in inlining small function.  */
 410
 411 static bool
 412 want_early_inline_function_p (struct cgraph_edge *e)
 413 {
 414   bool want_inline = true;
 415   struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
 416
 417   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 418     ;
 419   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 420            && !flag_inline_small_functions)
 421     {
 422       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 423       report_inline_failed_reason (e);
 424       want_inline = false;
 425     }
 426   else
 427     {
 428       int growth = estimate_edge_growth (e);
 429       int n;
 430
 431       if (growth <= 0)
 432         ;
 433       else if (!cgraph_maybe_hot_edge_p (e)
 434                && growth > 0)
 435         {
 436           if (dump_file)
 437             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 438                      "call is cold and code would grow by %i\n",
 439                      xstrdup (e->caller->name ()),
 440                      e->caller->order,
 441                      xstrdup (callee->name ()), callee->order,
 442                      growth);
 443           want_inline = false;
 444         }
 445       else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 446         {
 447           if (dump_file)
 448             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 449                      "growth %i exceeds --param early-inlining-insns\n",
 450                      xstrdup (e->caller->name ()),
 451                      e->caller->order,
 452                      xstrdup (callee->name ()), callee->order,
 453                      growth);
 454           want_inline = false;
 455         }
 456       else if ((n = num_calls (callee)) != 0
 457                && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 458         {
 459           if (dump_file)
 460             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 461                      "growth %i exceeds --param early-inlining-insns "
 462                      "divided by number of calls\n",
 463                      xstrdup (e->caller->name ()),
 464                      e->caller->order,
 465                      xstrdup (callee->name ()), callee->order,
 466                      growth);
 467           want_inline = false;
 468         }
 469     }
 470   return want_inline;
 471 }
 472
 473 /* Compute time of the edge->caller + edge->callee execution when inlining
 474    does not happen.  */
 475
 476 inline gcov_type
 477 compute_uninlined_call_time (struct inline_summary *callee_info,
 478                              struct cgraph_edge *edge)
 479 {
 480   gcov_type uninlined_call_time =
 481     RDIV ((gcov_type)callee_info->time * MAX (edge->frequency, 1),
 482           CGRAPH_FREQ_BASE);
 483   gcov_type caller_time = inline_summary (edge->caller->global.inlined_to
 484                                           ? edge->caller->global.inlined_to
 485                                           : edge->caller)->time;
 486   return uninlined_call_time + caller_time;
 487 }
 488
 489 /* Same as compute_uinlined_call_time but compute time when inlining
 490    does happen.  */
 491
 492 inline gcov_type
 493 compute_inlined_call_time (struct cgraph_edge *edge,
 494                            int edge_time)
 495 {
 496   gcov_type caller_time = inline_summary (edge->caller->global.inlined_to
 497                                           ? edge->caller->global.inlined_to
 498                                           : edge->caller)->time;
 499   gcov_type time = (caller_time
 500                     + RDIV (((gcov_type) edge_time
 501                              - inline_edge_summary (edge)->call_stmt_time)
 502                     * MAX (edge->frequency, 1), CGRAPH_FREQ_BASE));
 503   /* Possible one roundoff error, but watch for overflows.  */
 504   gcc_checking_assert (time >= INT_MIN / 2);
 505   if (time < 0)
 506     time = 0;
 507   return time;
 508 }
 509
 510 /* Return true if the speedup for inlining E is bigger than
 511    PARAM_MAX_INLINE_MIN_SPEEDUP.  */
 512
 513 static bool
 514 big_speedup_p (struct cgraph_edge *e)
 515 {
 516   gcov_type time = compute_uninlined_call_time (inline_summary (e->callee),
 517                                                 e);
 518   gcov_type inlined_time = compute_inlined_call_time (e,
 519                                                       estimate_edge_time (e));
 520   if (time - inlined_time
 521       > RDIV (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP), 100))
 522     return true;
 523   return false;
 524 }
 525
 526 /* Return true if we are interested in inlining small function.
 527    When REPORT is true, report reason to dump file.  */
 528
 529 static bool
 530 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 531 {
 532   bool want_inline = true;
 533   struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
 534
 535   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 536     ;
 537   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 538            && !flag_inline_small_functions)
 539     {
 540       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 541       want_inline = false;
 542     }
 543   else
 544     {
 545       int growth = estimate_edge_growth (e);
 546       inline_hints hints = estimate_edge_hints (e);
 547       bool big_speedup = big_speedup_p (e);
 548
 549       if (growth <= 0)
 550         ;
 551       /* Apply MAX_INLINE_INSNS_SINGLE limit.  Do not do so when
 552          hints suggests that inlining given function is very profitable.  */
 553       else if (DECL_DECLARED_INLINE_P (callee->decl)
 554                && growth >= MAX_INLINE_INSNS_SINGLE
 555                && !big_speedup
 556                && !(hints & (INLINE_HINT_indirect_call
 557                              | INLINE_HINT_loop_iterations
 558                              | INLINE_HINT_array_index
 559                              | INLINE_HINT_loop_stride)))
 560         {
 561           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 562           want_inline = false;
 563         }
 564       /* Before giving up based on fact that caller size will grow, allow
 565          functions that are called few times and eliminating the offline
 566          copy will lead to overall code size reduction.
 567          Not all of these will be handled by subsequent inlining of functions
 568          called once: in particular weak functions are not handled or funcitons
 569          that inline to multiple calls but a lot of bodies is optimized out.
 570          Finally we want to inline earlier to allow inlining of callbacks.
 571
 572          This is slightly wrong on aggressive side:  it is entirely possible
 573          that function is called many times with a context where inlining
 574          reduces code size and few times with a context where inlining increase
 575          code size.  Resoluting growth estimate will be negative even if it
 576          would make more sense to keep offline copy and do not inline into the
 577          call sites that makes the code size grow.
 578
 579          When badness orders the calls in a way that code reducing calls come
 580          first, this situation is not a problem at all: after inlining all
 581          "good" calls, we will realize that keeping the function around is
 582          better.  */
 583       else if (growth <= MAX_INLINE_INSNS_SINGLE
 584                /* Unlike for functions called once, we play unsafe with
 585                   COMDATs.  We can allow that since we know functions
 586                   in consideration are small (and thus risk is small) and
 587                   moreover grow estimates already accounts that COMDAT
 588                   functions may or may not disappear when eliminated from
 589                   current unit. With good probability making aggressive
 590                   choice in all units is going to make overall program
 591                   smaller.
 592
 593                   Consequently we ask cgraph_can_remove_if_no_direct_calls_p
 594                   instead of
 595                   cgraph_will_be_removed_from_program_if_no_direct_calls  */
 596                 && !DECL_EXTERNAL (callee->decl)
 597                 && cgraph_can_remove_if_no_direct_calls_p (callee)
 598                 && estimate_growth (callee) <= 0)
 599         ;
 600       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 601                && !flag_inline_functions)
 602         {
 603           e->inline_failed = CIF_NOT_DECLARED_INLINED;
 604           want_inline = false;
 605         }
 606       /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
 607          Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
 608          inlining given function is very profitable.  */
 609       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 610                && !big_speedup
 611                && growth >= ((hints & (INLINE_HINT_indirect_call
 612                                        | INLINE_HINT_loop_iterations
 613                                        | INLINE_HINT_array_index
 614                                        | INLINE_HINT_loop_stride))
 615                              ? MAX (MAX_INLINE_INSNS_AUTO,
 616                                     MAX_INLINE_INSNS_SINGLE)
 617                              : MAX_INLINE_INSNS_AUTO))
 618         {
 619           e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 620           want_inline = false;
 621         }
 622       /* If call is cold, do not inline when function body would grow. */
 623       else if (!cgraph_maybe_hot_edge_p (e))
 624         {
 625           e->inline_failed = CIF_UNLIKELY_CALL;
 626           want_inline = false;
 627         }
 628     }
 629   if (!want_inline && report)
 630     report_inline_failed_reason (e);
 631   return want_inline;
 632 }
 633
 634 /* EDGE is self recursive edge.
 635    We hand two cases - when function A is inlining into itself
 636    or when function A is being inlined into another inliner copy of function
 637    A within function B.
 638
 639    In first case OUTER_NODE points to the toplevel copy of A, while
 640    in the second case OUTER_NODE points to the outermost copy of A in B.
 641
 642    In both cases we want to be extra selective since
 643    inlining the call will just introduce new recursive calls to appear.  */
 644
 645 static bool
 646 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 647                                    struct cgraph_node *outer_node,
 648                                    bool peeling,
 649                                    int depth)
 650 {
 651   char const *reason = NULL;
 652   bool want_inline = true;
 653   int caller_freq = CGRAPH_FREQ_BASE;
 654   int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
 655
 656   if (DECL_DECLARED_INLINE_P (edge->caller->decl))
 657     max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
 658
 659   if (!cgraph_maybe_hot_edge_p (edge))
 660     {
 661       reason = "recursive call is cold";
 662       want_inline = false;
 663     }
 664   else if (max_count && !outer_node->count)
 665     {
 666       reason = "not executed in profile";
 667       want_inline = false;
 668     }
 669   else if (depth > max_depth)
 670     {
 671       reason = "--param max-inline-recursive-depth exceeded.";
 672       want_inline = false;
 673     }
 674
 675   if (outer_node->global.inlined_to)
 676     caller_freq = outer_node->callers->frequency;
 677
 678   if (!want_inline)
 679     ;
 680   /* Inlining of self recursive function into copy of itself within other function
 681      is transformation similar to loop peeling.
 682
 683      Peeling is profitable if we can inline enough copies to make probability
 684      of actual call to the self recursive function very small.  Be sure that
 685      the probability of recursion is small.
 686
 687      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
 688      This way the expected number of recision is at most max_depth.  */
 689   else if (peeling)
 690     {
 691       int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
 692                                          / max_depth);
 693       int i;
 694       for (i = 1; i < depth; i++)
 695         max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
 696       if (max_count
 697           && (edge->count * CGRAPH_FREQ_BASE / outer_node->count
 698               >= max_prob))
 699         {
 700           reason = "profile of recursive call is too large";
 701           want_inline = false;
 702         }
 703       if (!max_count
 704           && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq
 705               >= max_prob))
 706         {
 707           reason = "frequency of recursive call is too large";
 708           want_inline = false;
 709         }
 710     }
 711   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
 712      depth is large.  We reduce function call overhead and increase chances that
 713      things fit in hardware return predictor.
 714
 715      Recursive inlining might however increase cost of stack frame setup
 716      actually slowing down functions whose recursion tree is wide rather than
 717      deep.
 718
 719      Deciding reliably on when to do recursive inlining without profile feedback
 720      is tricky.  For now we disable recursive inlining when probability of self
 721      recursion is low.
 722
 723      Recursive inlining of self recursive call within loop also results in large loop
 724      depths that generally optimize badly.  We may want to throttle down inlining
 725      in those cases.  In particular this seems to happen in one of libstdc++ rb tree
 726      methods.  */
 727   else
 728     {
 729       if (max_count
 730           && (edge->count * 100 / outer_node->count
 731               <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 732         {
 733           reason = "profile of recursive call is too small";
 734           want_inline = false;
 735         }
 736       else if (!max_count
 737                && (edge->frequency * 100 / caller_freq
 738                    <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 739         {
 740           reason = "frequency of recursive call is too small";
 741           want_inline = false;
 742         }
 743     }
 744   if (!want_inline && dump_file)
 745     fprintf (dump_file, "   not inlining recursively: %s\n", reason);
 746   return want_inline;
 747 }
 748
 749 /* Return true when NODE has uninlinable caller;
 750    set HAS_HOT_CALL if it has hot call.
 751    Worker for cgraph_for_node_and_aliases.  */
 752
 753 static bool
 754 check_callers (struct cgraph_node *node, void *has_hot_call)
 755 {
 756   struct cgraph_edge *e;
 757    for (e = node->callers; e; e = e->next_caller)
 758      {
 759        if (!can_inline_edge_p (e, true))
 760          return true;
 761        if (!has_hot_call && cgraph_maybe_hot_edge_p (e))
 762          *(bool *)has_hot_call = true;
 763      }
 764   return false;
 765 }
 766
 767 /* If NODE has a caller, return true.  */
 768
 769 static bool
 770 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
 771 {
 772   if (node->callers)
 773     return true;
 774   return false;
 775 }
 776
 777 /* Decide if inlining NODE would reduce unit size by eliminating
 778    the offline copy of function.
 779    When COLD is true the cold calls are considered, too.  */
 780
 781 static bool
 782 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
 783 {
 784    struct cgraph_node *function = cgraph_function_or_thunk_node (node, NULL);
 785    bool has_hot_call = false;
 786
 787    /* Does it have callers?  */
 788    if (!cgraph_for_node_and_aliases (node, has_caller_p, NULL, true))
 789      return false;
 790    /* Already inlined?  */
 791    if (function->global.inlined_to)
 792      return false;
 793    if (cgraph_function_or_thunk_node (node, NULL) != node)
 794      return false;
 795    /* Inlining into all callers would increase size?  */
 796    if (estimate_growth (node) > 0)
 797      return false;
 798    /* All inlines must be possible.  */
 799    if (cgraph_for_node_and_aliases (node, check_callers, &has_hot_call, true))
 800      return false;
 801    if (!cold && !has_hot_call)
 802      return false;
 803    return true;
 804 }
 805
 806 #define RELATIVE_TIME_BENEFIT_RANGE (INT_MAX / 64)
 807
 808 /* Return relative time improvement for inlining EDGE in range
 809    1...RELATIVE_TIME_BENEFIT_RANGE  */
 810
 811 static inline int
 812 relative_time_benefit (struct inline_summary *callee_info,
 813                        struct cgraph_edge *edge,
 814                        int edge_time)
 815 {
 816   gcov_type relbenefit;
 817   gcov_type uninlined_call_time = compute_uninlined_call_time (callee_info, edge);
 818   gcov_type inlined_call_time = compute_inlined_call_time (edge, edge_time);
 819
 820   /* Inlining into extern inline function is not a win.  */
 821   if (DECL_EXTERNAL (edge->caller->global.inlined_to
 822                      ? edge->caller->global.inlined_to->decl
 823                      : edge->caller->decl))
 824     return 1;
 825
 826   /* Watch overflows.  */
 827   gcc_checking_assert (uninlined_call_time >= 0);
 828   gcc_checking_assert (inlined_call_time >= 0);
 829   gcc_checking_assert (uninlined_call_time >= inlined_call_time);
 830
 831   /* Compute relative time benefit, i.e. how much the call becomes faster.
 832      ??? perhaps computing how much the caller+calle together become faster
 833      would lead to more realistic results.  */
 834   if (!uninlined_call_time)
 835     uninlined_call_time = 1;
 836   relbenefit =
 837     RDIV (((gcov_type)uninlined_call_time - inlined_call_time) * RELATIVE_TIME_BENEFIT_RANGE,
 838           uninlined_call_time);
 839   relbenefit = MIN (relbenefit, RELATIVE_TIME_BENEFIT_RANGE);
 840   gcc_checking_assert (relbenefit >= 0);
 841   relbenefit = MAX (relbenefit, 1);
 842   return relbenefit;
 843 }
 844
 845
 846 /* A cost model driving the inlining heuristics in a way so the edges with
 847    smallest badness are inlined first.  After each inlining is performed
 848    the costs of all caller edges of nodes affected are recomputed so the
 849    metrics may accurately depend on values such as number of inlinable callers
 850    of the function or function body size.  */
 851
 852 static int
 853 edge_badness (struct cgraph_edge *edge, bool dump)
 854 {
 855   gcov_type badness;
 856   int growth, edge_time;
 857   struct cgraph_node *callee = cgraph_function_or_thunk_node (edge->callee,
 858                                                               NULL);
 859   struct inline_summary *callee_info = inline_summary (callee);
 860   inline_hints hints;
 861
 862   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 863     return INT_MIN;
 864
 865   growth = estimate_edge_growth (edge);
 866   edge_time = estimate_edge_time (edge);
 867   hints = estimate_edge_hints (edge);
 868   gcc_checking_assert (edge_time >= 0);
 869   gcc_checking_assert (edge_time <= callee_info->time);
 870   gcc_checking_assert (growth <= callee_info->size);
 871
 872   if (dump)
 873     {
 874       fprintf (dump_file, "    Badness calculation for %s/%i -> %s/%i\n",
 875                xstrdup (edge->caller->name ()),
 876                edge->caller->order,
 877                xstrdup (callee->name ()),
 878                edge->callee->order);
 879       fprintf (dump_file, "      size growth %i, time %i ",
 880                growth,
 881                edge_time);
 882       dump_inline_hints (dump_file, hints);
 883       if (big_speedup_p (edge))
 884         fprintf (dump_file, " big_speedup");
 885       fprintf (dump_file, "\n");
 886     }
 887
 888   /* Always prefer inlining saving code size.  */
 889   if (growth <= 0)
 890     {
 891       badness = INT_MIN / 2 + growth;
 892       if (dump)
 893         fprintf (dump_file, "      %i: Growth %i <= 0\n", (int) badness,
 894                  growth);
 895     }
 896
 897   /* When profiling is available, compute badness as:
 898
 899                 relative_edge_count * relative_time_benefit
 900      goodness = -------------------------------------------
 901                 growth_f_caller
 902      badness = -goodness
 903
 904     The fraction is upside down, because on edge counts and time beneits
 905     the bounds are known. Edge growth is essentially unlimited.  */
 906
 907   else if (max_count)
 908     {
 909       sreal tmp, relbenefit_real, growth_real;
 910       int relbenefit = relative_time_benefit (callee_info, edge, edge_time);
 911       /* Capping edge->count to max_count. edge->count can be larger than
 912          max_count if an inline adds new edges which increase max_count
 913          after max_count is computed.  */
 914       gcov_type edge_count = edge->count > max_count ? max_count : edge->count;
 915
 916       sreal_init (&relbenefit_real, relbenefit, 0);
 917       sreal_init (&growth_real, growth, 0);
 918
 919       /* relative_edge_count.  */
 920       sreal_init (&tmp, edge_count, 0);
 921       sreal_div (&tmp, &tmp, &max_count_real);
 922
 923       /* relative_time_benefit.  */
 924       sreal_mul (&tmp, &tmp, &relbenefit_real);
 925       sreal_div (&tmp, &tmp, &max_relbenefit_real);
 926
 927       /* growth_f_caller.  */
 928       sreal_mul (&tmp, &tmp, &half_int_min_real);
 929       sreal_div (&tmp, &tmp, &growth_real);
 930
 931       badness = -1 * sreal_to_int (&tmp);
 932
 933       if (dump)
 934         {
 935           fprintf (dump_file,
 936                    "      %i (relative %f): profile info. Relative count %f%s"
 937                    " * Relative benefit %f\n",
 938                    (int) badness, (double) badness / INT_MIN,
 939                    (double) edge_count / max_count,
 940                    edge->count > max_count ? " (capped to max_count)" : "",
 941                    relbenefit * 100.0 / RELATIVE_TIME_BENEFIT_RANGE);
 942         }
 943     }
 944
 945   /* When function local profile is available. Compute badness as:
 946
 947                  relative_time_benefit
 948      goodness =  ---------------------------------
 949                  growth_of_caller * overall_growth
 950
 951      badness = - goodness
 952
 953      compensated by the inline hints.
 954   */
 955   else if (flag_guess_branch_prob)
 956     {
 957       badness = (relative_time_benefit (callee_info, edge, edge_time)
 958                  * (INT_MIN / 16 / RELATIVE_TIME_BENEFIT_RANGE));
 959       badness /= (MIN (65536/2, growth) * MIN (65536/2, MAX (1, callee_info->growth)));
 960       gcc_checking_assert (badness <=0 && badness >= INT_MIN / 16);
 961       if ((hints & (INLINE_HINT_indirect_call
 962                     | INLINE_HINT_loop_iterations
 963                     | INLINE_HINT_array_index
 964                     | INLINE_HINT_loop_stride))
 965           || callee_info->growth <= 0)
 966         badness *= 8;
 967       if (hints & (INLINE_HINT_same_scc))
 968         badness /= 16;
 969       else if (hints & (INLINE_HINT_in_scc))
 970         badness /= 8;
 971       else if (hints & (INLINE_HINT_cross_module))
 972         badness /= 2;
 973       gcc_checking_assert (badness <= 0 && badness >= INT_MIN / 2);
 974       if ((hints & INLINE_HINT_declared_inline) && badness >= INT_MIN / 32)
 975         badness *= 16;
 976       if (dump)
 977         {
 978           fprintf (dump_file,
 979                    "      %i: guessed profile. frequency %f,"
 980                    " benefit %f%%, time w/o inlining %i, time w inlining %i"
 981                    " overall growth %i (current) %i (original)\n",
 982                    (int) badness, (double)edge->frequency / CGRAPH_FREQ_BASE,
 983                    relative_time_benefit (callee_info, edge, edge_time) * 100.0
 984                    / RELATIVE_TIME_BENEFIT_RANGE,
 985                    (int)compute_uninlined_call_time (callee_info, edge),
 986                    (int)compute_inlined_call_time (edge, edge_time),
 987                    estimate_growth (callee),
 988                    callee_info->growth);
 989         }
 990     }
 991   /* When function local profile is not available or it does not give
 992      useful information (ie frequency is zero), base the cost on
 993      loop nest and overall size growth, so we optimize for overall number
 994      of functions fully inlined in program.  */
 995   else
 996     {
 997       int nest = MIN (inline_edge_summary (edge)->loop_depth, 8);
 998       badness = growth * 256;
 999
1000       /* Decrease badness if call is nested.  */
1001       if (badness > 0)
1002         badness >>= nest;
1003       else
1004         {
1005           badness <<= nest;
1006         }
1007       if (dump)
1008         fprintf (dump_file, "      %i: no profile. nest %i\n", (int) badness,
1009                  nest);
1010     }
1011
1012   /* Ensure that we did not overflow in all the fixed point math above.  */
1013   gcc_assert (badness >= INT_MIN);
1014   gcc_assert (badness <= INT_MAX - 1);
1015   /* Make recursive inlining happen always after other inlining is done.  */
1016   if (cgraph_edge_recursive_p (edge))
1017     return badness + 1;
1018   else
1019     return badness;
1020 }
1021
1022 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1023 static inline void
1024 update_edge_key (fibheap_t heap, struct cgraph_edge *edge)
1025 {
1026   int badness = edge_badness (edge, false);
1027   if (edge->aux)
1028     {
1029       fibnode_t n = (fibnode_t) edge->aux;
1030       gcc_checking_assert (n->data == edge);
1031
1032       /* fibheap_replace_key only decrease the keys.
1033          When we increase the key we do not update heap
1034          and instead re-insert the element once it becomes
1035          a minimum of heap.  */
1036       if (badness < n->key)
1037         {
1038           if (dump_file && (dump_flags & TDF_DETAILS))
1039             {
1040               fprintf (dump_file,
1041                        "  decreasing badness %s/%i -> %s/%i, %i to %i\n",
1042                        xstrdup (edge->caller->name ()),
1043                        edge->caller->order,
1044                        xstrdup (edge->callee->name ()),
1045                        edge->callee->order,
1046                        (int)n->key,
1047                        badness);
1048             }
1049           fibheap_replace_key (heap, n, badness);
1050           gcc_checking_assert (n->key == badness);
1051         }
1052     }
1053   else
1054     {
1055        if (dump_file && (dump_flags & TDF_DETAILS))
1056          {
1057            fprintf (dump_file,
1058                     "  enqueuing call %s/%i -> %s/%i, badness %i\n",
1059                     xstrdup (edge->caller->name ()),
1060                     edge->caller->order,
1061                     xstrdup (edge->callee->name ()),
1062                     edge->callee->order,
1063                     badness);
1064          }
1065       edge->aux = fibheap_insert (heap, badness, edge);
1066     }
1067 }
1068
1069
1070 /* NODE was inlined.
1071    All caller edges needs to be resetted because
1072    size estimates change. Similarly callees needs reset
1073    because better context may be known.  */
1074
1075 static void
1076 reset_edge_caches (struct cgraph_node *node)
1077 {
1078   struct cgraph_edge *edge;
1079   struct cgraph_edge *e = node->callees;
1080   struct cgraph_node *where = node;
1081   int i;
1082   struct ipa_ref *ref;
1083
1084   if (where->global.inlined_to)
1085     where = where->global.inlined_to;
1086
1087   /* WHERE body size has changed, the cached growth is invalid.  */
1088   reset_node_growth_cache (where);
1089
1090   for (edge = where->callers; edge; edge = edge->next_caller)
1091     if (edge->inline_failed)
1092       reset_edge_growth_cache (edge);
1093   for (i = 0; ipa_ref_list_referring_iterate (&where->ref_list,
1094                                               i, ref); i++)
1095     if (ref->use == IPA_REF_ALIAS)
1096       reset_edge_caches (ipa_ref_referring_node (ref));
1097
1098   if (!e)
1099     return;
1100
1101   while (true)
1102     if (!e->inline_failed && e->callee->callees)
1103       e = e->callee->callees;
1104     else
1105       {
1106         if (e->inline_failed)
1107           reset_edge_growth_cache (e);
1108         if (e->next_callee)
1109           e = e->next_callee;
1110         else
1111           {
1112             do
1113               {
1114                 if (e->caller == node)
1115                   return;
1116                 e = e->caller->callers;
1117               }
1118             while (!e->next_callee);
1119             e = e->next_callee;
1120           }
1121       }
1122 }
1123
1124 /* Recompute HEAP nodes for each of caller of NODE.
1125    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1126    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1127    it is inlinable. Otherwise check all edges.  */
1128
1129 static void
1130 update_caller_keys (fibheap_t heap, struct cgraph_node *node,
1131                     bitmap updated_nodes,
1132                     struct cgraph_edge *check_inlinablity_for)
1133 {
1134   struct cgraph_edge *edge;
1135   int i;
1136   struct ipa_ref *ref;
1137
1138   if ((!node->alias && !inline_summary (node)->inlinable)
1139       || node->global.inlined_to)
1140     return;
1141   if (!bitmap_set_bit (updated_nodes, node->uid))
1142     return;
1143
1144   for (i = 0; ipa_ref_list_referring_iterate (&node->ref_list,
1145                                               i, ref); i++)
1146     if (ref->use == IPA_REF_ALIAS)
1147       {
1148         struct cgraph_node *alias = ipa_ref_referring_node (ref);
1149         update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1150       }
1151
1152   for (edge = node->callers; edge; edge = edge->next_caller)
1153     if (edge->inline_failed)
1154       {
1155         if (!check_inlinablity_for
1156             || check_inlinablity_for == edge)
1157           {
1158             if (can_inline_edge_p (edge, false)
1159                 && want_inline_small_function_p (edge, false))
1160               update_edge_key (heap, edge);
1161             else if (edge->aux)
1162               {
1163                 report_inline_failed_reason (edge);
1164                 fibheap_delete_node (heap, (fibnode_t) edge->aux);
1165                 edge->aux = NULL;
1166               }
1167           }
1168         else if (edge->aux)
1169           update_edge_key (heap, edge);
1170       }
1171 }
1172
1173 /* Recompute HEAP nodes for each uninlined call in NODE.
1174    This is used when we know that edge badnesses are going only to increase
1175    (we introduced new call site) and thus all we need is to insert newly
1176    created edges into heap.  */
1177
1178 static void
1179 update_callee_keys (fibheap_t heap, struct cgraph_node *node,
1180                     bitmap updated_nodes)
1181 {
1182   struct cgraph_edge *e = node->callees;
1183
1184   if (!e)
1185     return;
1186   while (true)
1187     if (!e->inline_failed && e->callee->callees)
1188       e = e->callee->callees;
1189     else
1190       {
1191         enum availability avail;
1192         struct cgraph_node *callee;
1193         /* We do not reset callee growth cache here.  Since we added a new call,
1194            growth chould have just increased and consequentely badness metric
1195            don't need updating.  */
1196         if (e->inline_failed
1197             && (callee = cgraph_function_or_thunk_node (e->callee, &avail))
1198             && inline_summary (callee)->inlinable
1199             && avail >= AVAIL_AVAILABLE
1200             && !bitmap_bit_p (updated_nodes, callee->uid))
1201           {
1202             if (can_inline_edge_p (e, false)
1203                 && want_inline_small_function_p (e, false))
1204               update_edge_key (heap, e);
1205             else if (e->aux)
1206               {
1207                 report_inline_failed_reason (e);
1208                 fibheap_delete_node (heap, (fibnode_t) e->aux);
1209                 e->aux = NULL;
1210               }
1211           }
1212         if (e->next_callee)
1213           e = e->next_callee;
1214         else
1215           {
1216             do
1217               {
1218                 if (e->caller == node)
1219                   return;
1220                 e = e->caller->callers;
1221               }
1222             while (!e->next_callee);
1223             e = e->next_callee;
1224           }
1225       }
1226 }
1227
1228 /* Enqueue all recursive calls from NODE into priority queue depending on
1229    how likely we want to recursively inline the call.  */
1230
1231 static void
1232 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1233                         fibheap_t heap)
1234 {
1235   struct cgraph_edge *e;
1236   enum availability avail;
1237
1238   for (e = where->callees; e; e = e->next_callee)
1239     if (e->callee == node
1240         || (cgraph_function_or_thunk_node (e->callee, &avail) == node
1241             && avail > AVAIL_OVERWRITABLE))
1242       {
1243         /* When profile feedback is available, prioritize by expected number
1244            of calls.  */
1245         fibheap_insert (heap,
1246                         !max_count ? -e->frequency
1247                         : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
1248                         e);
1249       }
1250   for (e = where->callees; e; e = e->next_callee)
1251     if (!e->inline_failed)
1252       lookup_recursive_calls (node, e->callee, heap);
1253 }
1254
1255 /* Decide on recursive inlining: in the case function has recursive calls,
1256    inline until body size reaches given argument.  If any new indirect edges
1257    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1258    is NULL.  */
1259
1260 static bool
1261 recursive_inlining (struct cgraph_edge *edge,
1262                     vec<cgraph_edge_p> *new_edges)
1263 {
1264   int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1265   fibheap_t heap;
1266   struct cgraph_node *node;
1267   struct cgraph_edge *e;
1268   struct cgraph_node *master_clone = NULL, *next;
1269   int depth = 0;
1270   int n = 0;
1271
1272   node = edge->caller;
1273   if (node->global.inlined_to)
1274     node = node->global.inlined_to;
1275
1276   if (DECL_DECLARED_INLINE_P (node->decl))
1277     limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1278
1279   /* Make sure that function is small enough to be considered for inlining.  */
1280   if (estimate_size_after_inlining (node, edge)  >= limit)
1281     return false;
1282   heap = fibheap_new ();
1283   lookup_recursive_calls (node, node, heap);
1284   if (fibheap_empty (heap))
1285     {
1286       fibheap_delete (heap);
1287       return false;
1288     }
1289
1290   if (dump_file)
1291     fprintf (dump_file,
1292              "  Performing recursive inlining on %s\n",
1293              node->name ());
1294
1295   /* Do the inlining and update list of recursive call during process.  */
1296   while (!fibheap_empty (heap))
1297     {
1298       struct cgraph_edge *curr
1299         = (struct cgraph_edge *) fibheap_extract_min (heap);
1300       struct cgraph_node *cnode, *dest = curr->callee;
1301
1302       if (!can_inline_edge_p (curr, true))
1303         continue;
1304
1305       /* MASTER_CLONE is produced in the case we already started modified
1306          the function. Be sure to redirect edge to the original body before
1307          estimating growths otherwise we will be seeing growths after inlining
1308          the already modified body.  */
1309       if (master_clone)
1310         {
1311           cgraph_redirect_edge_callee (curr, master_clone);
1312           reset_edge_growth_cache (curr);
1313         }
1314
1315       if (estimate_size_after_inlining (node, curr) > limit)
1316         {
1317           cgraph_redirect_edge_callee (curr, dest);
1318           reset_edge_growth_cache (curr);
1319           break;
1320         }
1321
1322       depth = 1;
1323       for (cnode = curr->caller;
1324            cnode->global.inlined_to; cnode = cnode->callers->caller)
1325         if (node->decl
1326             == cgraph_function_or_thunk_node (curr->callee, NULL)->decl)
1327           depth++;
1328
1329       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1330         {
1331           cgraph_redirect_edge_callee (curr, dest);
1332           reset_edge_growth_cache (curr);
1333           continue;
1334         }
1335
1336       if (dump_file)
1337         {
1338           fprintf (dump_file,
1339                    "   Inlining call of depth %i", depth);
1340           if (node->count)
1341             {
1342               fprintf (dump_file, " called approx. %.2f times per call",
1343                        (double)curr->count / node->count);
1344             }
1345           fprintf (dump_file, "\n");
1346         }
1347       if (!master_clone)
1348         {
1349           /* We need original clone to copy around.  */
1350           master_clone = cgraph_clone_node (node, node->decl,
1351                                             node->count, CGRAPH_FREQ_BASE,
1352                                             false, vNULL, true, NULL);
1353           for (e = master_clone->callees; e; e = e->next_callee)
1354             if (!e->inline_failed)
1355               clone_inlined_nodes (e, true, false, NULL);
1356           cgraph_redirect_edge_callee (curr, master_clone);
1357           reset_edge_growth_cache (curr);
1358         }
1359
1360       inline_call (curr, false, new_edges, &overall_size, true);
1361       lookup_recursive_calls (node, curr->callee, heap);
1362       n++;
1363     }
1364
1365   if (!fibheap_empty (heap) && dump_file)
1366     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1367   fibheap_delete (heap);
1368
1369   if (!master_clone)
1370     return false;
1371
1372   if (dump_file)
1373     fprintf (dump_file,
1374              "\n   Inlined %i times, "
1375              "body grown from size %i to %i, time %i to %i\n", n,
1376              inline_summary (master_clone)->size, inline_summary (node)->size,
1377              inline_summary (master_clone)->time, inline_summary (node)->time);
1378
1379   /* Remove master clone we used for inlining.  We rely that clones inlined
1380      into master clone gets queued just before master clone so we don't
1381      need recursion.  */
1382   for (node = cgraph_first_function (); node != master_clone;
1383        node = next)
1384     {
1385       next = cgraph_next_function (node);
1386       if (node->global.inlined_to == master_clone)
1387         cgraph_remove_node (node);
1388     }
1389   cgraph_remove_node (master_clone);
1390   return true;
1391 }
1392
1393
1394 /* Given whole compilation unit estimate of INSNS, compute how large we can
1395    allow the unit to grow.  */
1396
1397 static int
1398 compute_max_insns (int insns)
1399 {
1400   int max_insns = insns;
1401   if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1402     max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1403
1404   return ((HOST_WIDEST_INT) max_insns
1405           * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1406 }
1407
1408
1409 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1410
1411 static void
1412 add_new_edges_to_heap (fibheap_t heap, vec<cgraph_edge_p> new_edges)
1413 {
1414   while (new_edges.length () > 0)
1415     {
1416       struct cgraph_edge *edge = new_edges.pop ();
1417
1418       gcc_assert (!edge->aux);
1419       if (edge->inline_failed
1420           && can_inline_edge_p (edge, true)
1421           && want_inline_small_function_p (edge, true))
1422         edge->aux = fibheap_insert (heap, edge_badness (edge, false), edge);
1423     }
1424 }
1425
1426 /* Remove EDGE from the fibheap.  */
1427
1428 static void
1429 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1430 {
1431   if (e->callee)
1432     reset_node_growth_cache (e->callee);
1433   if (e->aux)
1434     {
1435       fibheap_delete_node ((fibheap_t)data, (fibnode_t)e->aux);
1436       e->aux = NULL;
1437     }
1438 }
1439
1440 /* Return true if speculation of edge E seems useful.
1441    If ANTICIPATE_INLINING is true, be conservative and hope that E
1442    may get inlined.  */
1443
1444 bool
1445 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1446 {
1447   enum availability avail;
1448   struct cgraph_node *target = cgraph_function_or_thunk_node (e->callee, &avail);
1449   struct cgraph_edge *direct, *indirect;
1450   struct ipa_ref *ref;
1451
1452   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1453
1454   if (!cgraph_maybe_hot_edge_p (e))
1455     return false;
1456
1457   /* See if IP optimizations found something potentially useful about the
1458      function.  For now we look only for CONST/PURE flags.  Almost everything
1459      else we propagate is useless.  */
1460   if (avail >= AVAIL_AVAILABLE)
1461     {
1462       int ecf_flags = flags_from_decl_or_type (target->decl);
1463       if (ecf_flags & ECF_CONST)
1464         {
1465           cgraph_speculative_call_info (e, direct, indirect, ref);
1466           if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1467             return true;
1468         }
1469       else if (ecf_flags & ECF_PURE)
1470         {
1471           cgraph_speculative_call_info (e, direct, indirect, ref);
1472           if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1473             return true;
1474         }
1475     }
1476   /* If we did not managed to inline the function nor redirect
1477      to an ipa-cp clone (that are seen by having local flag set),
1478      it is probably pointless to inline it unless hardware is missing
1479      indirect call predictor.  */
1480   if (!anticipate_inlining && e->inline_failed && !target->local.local)
1481     return false;
1482   /* For overwritable targets there is not much to do.  */
1483   if (e->inline_failed && !can_inline_edge_p (e, false, true))
1484     return false;
1485   /* OK, speculation seems interesting.  */
1486   return true;
1487 }
1488
1489 /* We know that EDGE is not going to be inlined.
1490    See if we can remove speculation.  */
1491
1492 static void
1493 resolve_noninline_speculation (fibheap_t edge_heap, struct cgraph_edge *edge)
1494 {
1495   if (edge->speculative && !speculation_useful_p (edge, false))
1496     {
1497       struct cgraph_node *node = edge->caller;
1498       struct cgraph_node *where = node->global.inlined_to
1499                                   ? node->global.inlined_to : node;
1500       bitmap updated_nodes = BITMAP_ALLOC (NULL);
1501
1502       cgraph_resolve_speculation (edge, NULL);
1503       reset_edge_caches (where);
1504       inline_update_overall_summary (where);
1505       update_caller_keys (edge_heap, where,
1506                           updated_nodes, NULL);
1507       update_callee_keys (edge_heap, where,
1508                           updated_nodes);
1509       BITMAP_FREE (updated_nodes);
1510     }
1511 }
1512
1513 /* We use greedy algorithm for inlining of small functions:
1514    All inline candidates are put into prioritized heap ordered in
1515    increasing badness.
1516
1517    The inlining of small functions is bounded by unit growth parameters.  */
1518
1519 static void
1520 inline_small_functions (void)
1521 {
1522   struct cgraph_node *node;
1523   struct cgraph_edge *edge;
1524   fibheap_t edge_heap = fibheap_new ();
1525   bitmap updated_nodes = BITMAP_ALLOC (NULL);
1526   int min_size, max_size;
1527   auto_vec<cgraph_edge_p> new_indirect_edges;
1528   int initial_size = 0;
1529   struct cgraph_node **order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
1530   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1531
1532   if (flag_indirect_inlining)
1533     new_indirect_edges.create (8);
1534
1535   edge_removal_hook_holder
1536     = cgraph_add_edge_removal_hook (&heap_edge_removal_hook, edge_heap);
1537
1538   /* Compute overall unit size and other global parameters used by badness
1539      metrics.  */
1540
1541   max_count = 0;
1542   ipa_reduced_postorder (order, true, true, NULL);
1543   free (order);
1544
1545   FOR_EACH_DEFINED_FUNCTION (node)
1546     if (!node->global.inlined_to)
1547       {
1548         if (cgraph_function_with_gimple_body_p (node)
1549             || node->thunk.thunk_p)
1550           {
1551             struct inline_summary *info = inline_summary (node);
1552             struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1553
1554             if (!DECL_EXTERNAL (node->decl))
1555               initial_size += info->size;
1556             info->growth = estimate_growth (node);
1557             if (dfs && dfs->next_cycle)
1558               {
1559                 struct cgraph_node *n2;
1560                 int id = dfs->scc_no + 1;
1561                 for (n2 = node; n2;
1562                      n2 = ((struct ipa_dfs_info *) node->aux)->next_cycle)
1563                   {
1564                     struct inline_summary *info2 = inline_summary (n2);
1565                     if (info2->scc_no)
1566                       break;
1567                     info2->scc_no = id;
1568                   }
1569               }
1570           }
1571
1572         for (edge = node->callers; edge; edge = edge->next_caller)
1573           if (max_count < edge->count)
1574             max_count = edge->count;
1575       }
1576   sreal_init (&max_count_real, max_count, 0);
1577   sreal_init (&max_relbenefit_real, RELATIVE_TIME_BENEFIT_RANGE, 0);
1578   sreal_init (&half_int_min_real, INT_MAX / 2, 0);
1579   ipa_free_postorder_info ();
1580   initialize_growth_caches ();
1581
1582   if (dump_file)
1583     fprintf (dump_file,
1584              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1585              initial_size);
1586
1587   overall_size = initial_size;
1588   max_size = compute_max_insns (overall_size);
1589   min_size = overall_size;
1590
1591   /* Populate the heeap with all edges we might inline.  */
1592
1593   FOR_EACH_DEFINED_FUNCTION (node)
1594     {
1595       bool update = false;
1596       struct cgraph_edge *next;
1597
1598       if (dump_file)
1599         fprintf (dump_file, "Enqueueing calls in %s/%i.\n",
1600                  node->name (), node->order);
1601
1602       for (edge = node->callees; edge; edge = next)
1603         {
1604           next = edge->next_callee;
1605           if (edge->inline_failed
1606               && !edge->aux
1607               && can_inline_edge_p (edge, true)
1608               && want_inline_small_function_p (edge, true)
1609               && edge->inline_failed)
1610             {
1611               gcc_assert (!edge->aux);
1612               update_edge_key (edge_heap, edge);
1613             }
1614           if (edge->speculative && !speculation_useful_p (edge, edge->aux != NULL))
1615             {
1616               cgraph_resolve_speculation (edge, NULL);
1617               update = true;
1618             }
1619         }
1620       if (update)
1621         {
1622           struct cgraph_node *where = node->global.inlined_to
1623                                       ? node->global.inlined_to : node;
1624           inline_update_overall_summary (where);
1625           reset_node_growth_cache (where);
1626           reset_edge_caches (where);
1627           update_caller_keys (edge_heap, where,
1628                               updated_nodes, NULL);
1629           bitmap_clear (updated_nodes);
1630         }
1631     }
1632
1633   gcc_assert (in_lto_p
1634               || !max_count
1635               || (profile_info && flag_branch_probabilities));
1636
1637   while (!fibheap_empty (edge_heap))
1638     {
1639       int old_size = overall_size;
1640       struct cgraph_node *where, *callee;
1641       int badness = fibheap_min_key (edge_heap);
1642       int current_badness;
1643       int cached_badness;
1644       int growth;
1645
1646       edge = (struct cgraph_edge *) fibheap_extract_min (edge_heap);
1647       gcc_assert (edge->aux);
1648       edge->aux = NULL;
1649       if (!edge->inline_failed)
1650         continue;
1651
1652       /* Be sure that caches are maintained consistent.
1653          We can not make this ENABLE_CHECKING only because it cause different
1654          updates of the fibheap queue.  */
1655       cached_badness = edge_badness (edge, false);
1656       reset_edge_growth_cache (edge);
1657       reset_node_growth_cache (edge->callee);
1658
1659       /* When updating the edge costs, we only decrease badness in the keys.
1660          Increases of badness are handled lazilly; when we see key with out
1661          of date value on it, we re-insert it now.  */
1662       current_badness = edge_badness (edge, false);
1663       gcc_assert (cached_badness == current_badness);
1664       gcc_assert (current_badness >= badness);
1665       if (current_badness != badness)
1666         {
1667           edge->aux = fibheap_insert (edge_heap, current_badness, edge);
1668           continue;
1669         }
1670
1671       if (!can_inline_edge_p (edge, true))
1672         {
1673           resolve_noninline_speculation (edge_heap, edge);
1674           continue;
1675         }
1676
1677       callee = cgraph_function_or_thunk_node (edge->callee, NULL);
1678       growth = estimate_edge_growth (edge);
1679       if (dump_file)
1680         {
1681           fprintf (dump_file,
1682                    "\nConsidering %s/%i with %i size\n",
1683                    callee->name (), callee->order,
1684                    inline_summary (callee)->size);
1685           fprintf (dump_file,
1686                    " to be inlined into %s/%i in %s:%i\n"
1687                    " Estimated growth after inlined into all is %+i insns.\n"
1688                    " Estimated badness is %i, frequency %.2f.\n",
1689                    edge->caller->name (), edge->caller->order,
1690                    flag_wpa ? "unknown"
1691                    : gimple_filename ((const_gimple) edge->call_stmt),
1692                    flag_wpa ? -1
1693                    : gimple_lineno ((const_gimple) edge->call_stmt),
1694                    estimate_growth (callee),
1695                    badness,
1696                    edge->frequency / (double)CGRAPH_FREQ_BASE);
1697           if (edge->count)
1698             fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n",
1699                      edge->count);
1700           if (dump_flags & TDF_DETAILS)
1701             edge_badness (edge, true);
1702         }
1703
1704       if (overall_size + growth > max_size
1705           && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1706         {
1707           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1708           report_inline_failed_reason (edge);
1709           resolve_noninline_speculation (edge_heap, edge);
1710           continue;
1711         }
1712
1713       if (!want_inline_small_function_p (edge, true))
1714         {
1715           resolve_noninline_speculation (edge_heap, edge);
1716           continue;
1717         }
1718
1719       /* Heuristics for inlining small functions works poorly for
1720          recursive calls where we do efect similar to loop unrolling.
1721          When inliing such edge seems profitable, leave decision on
1722          specific inliner.  */
1723       if (cgraph_edge_recursive_p (edge))
1724         {
1725           where = edge->caller;
1726           if (where->global.inlined_to)
1727             where = where->global.inlined_to;
1728           if (!recursive_inlining (edge,
1729                                    flag_indirect_inlining
1730                                    ? &new_indirect_edges : NULL))
1731             {
1732               edge->inline_failed = CIF_RECURSIVE_INLINING;
1733               resolve_noninline_speculation (edge_heap, edge);
1734               continue;
1735             }
1736           reset_edge_caches (where);
1737           /* Recursive inliner inlines all recursive calls of the function
1738              at once. Consequently we need to update all callee keys.  */
1739           if (flag_indirect_inlining)
1740             add_new_edges_to_heap (edge_heap, new_indirect_edges);
1741           update_callee_keys (edge_heap, where, updated_nodes);
1742           bitmap_clear (updated_nodes);
1743         }
1744       else
1745         {
1746           struct cgraph_node *outer_node = NULL;
1747           int depth = 0;
1748
1749           /* Consider the case where self recursive function A is inlined into B.
1750              This is desired optimization in some cases, since it leads to effect
1751              similar of loop peeling and we might completely optimize out the
1752              recursive call.  However we must be extra selective.  */
1753
1754           where = edge->caller;
1755           while (where->global.inlined_to)
1756             {
1757               if (where->decl == callee->decl)
1758                 outer_node = where, depth++;
1759               where = where->callers->caller;
1760             }
1761           if (outer_node
1762               && !want_inline_self_recursive_call_p (edge, outer_node,
1763                                                      true, depth))
1764             {
1765               edge->inline_failed
1766                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
1767                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
1768               resolve_noninline_speculation (edge_heap, edge);
1769               continue;
1770             }
1771           else if (depth && dump_file)
1772             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
1773
1774           gcc_checking_assert (!callee->global.inlined_to);
1775           inline_call (edge, true, &new_indirect_edges, &overall_size, true);
1776           if (flag_indirect_inlining)
1777             add_new_edges_to_heap (edge_heap, new_indirect_edges);
1778
1779           reset_edge_caches (edge->callee);
1780           reset_node_growth_cache (callee);
1781
1782           update_callee_keys (edge_heap, where, updated_nodes);
1783         }
1784       where = edge->caller;
1785       if (where->global.inlined_to)
1786         where = where->global.inlined_to;
1787
1788       /* Our profitability metric can depend on local properties
1789          such as number of inlinable calls and size of the function body.
1790          After inlining these properties might change for the function we
1791          inlined into (since it's body size changed) and for the functions
1792          called by function we inlined (since number of it inlinable callers
1793          might change).  */
1794       update_caller_keys (edge_heap, where, updated_nodes, NULL);
1795       bitmap_clear (updated_nodes);
1796
1797       if (dump_file)
1798         {
1799           fprintf (dump_file,
1800                    " Inlined into %s which now has time %i and size %i,"
1801                    "net change of %+i.\n",
1802                    edge->caller->name (),
1803                    inline_summary (edge->caller)->time,
1804                    inline_summary (edge->caller)->size,
1805                    overall_size - old_size);
1806         }
1807       if (min_size > overall_size)
1808         {
1809           min_size = overall_size;
1810           max_size = compute_max_insns (min_size);
1811
1812           if (dump_file)
1813             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
1814         }
1815     }
1816
1817   free_growth_caches ();
1818   fibheap_delete (edge_heap);
1819   if (dump_file)
1820     fprintf (dump_file,
1821              "Unit growth for small function inlining: %i->%i (%i%%)\n",
1822              initial_size, overall_size,
1823              initial_size ? overall_size * 100 / (initial_size) - 100: 0);
1824   BITMAP_FREE (updated_nodes);
1825   cgraph_remove_edge_removal_hook (edge_removal_hook_holder);
1826 }
1827
1828 /* Flatten NODE.  Performed both during early inlining and
1829    at IPA inlining time.  */
1830
1831 static void
1832 flatten_function (struct cgraph_node *node, bool early)
1833 {
1834   struct cgraph_edge *e;
1835
1836   /* We shouldn't be called recursively when we are being processed.  */
1837   gcc_assert (node->aux == NULL);
1838
1839   node->aux = (void *) node;
1840
1841   for (e = node->callees; e; e = e->next_callee)
1842     {
1843       struct cgraph_node *orig_callee;
1844       struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
1845
1846       /* We've hit cycle?  It is time to give up.  */
1847       if (callee->aux)
1848         {
1849           if (dump_file)
1850             fprintf (dump_file,
1851                      "Not inlining %s into %s to avoid cycle.\n",
1852                      xstrdup (callee->name ()),
1853                      xstrdup (e->caller->name ()));
1854           e->inline_failed = CIF_RECURSIVE_INLINING;
1855           continue;
1856         }
1857
1858       /* When the edge is already inlined, we just need to recurse into
1859          it in order to fully flatten the leaves.  */
1860       if (!e->inline_failed)
1861         {
1862           flatten_function (callee, early);
1863           continue;
1864         }
1865
1866       /* Flatten attribute needs to be processed during late inlining. For
1867          extra code quality we however do flattening during early optimization,
1868          too.  */
1869       if (!early
1870           ? !can_inline_edge_p (e, true)
1871           : !can_early_inline_edge_p (e))
1872         continue;
1873
1874       if (cgraph_edge_recursive_p (e))
1875         {
1876           if (dump_file)
1877             fprintf (dump_file, "Not inlining: recursive call.\n");
1878           continue;
1879         }
1880
1881       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
1882           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
1883         {
1884           if (dump_file)
1885             fprintf (dump_file, "Not inlining: SSA form does not match.\n");
1886           continue;
1887         }
1888
1889       /* Inline the edge and flatten the inline clone.  Avoid
1890          recursing through the original node if the node was cloned.  */
1891       if (dump_file)
1892         fprintf (dump_file, " Inlining %s into %s.\n",
1893                  xstrdup (callee->name ()),
1894                  xstrdup (e->caller->name ()));
1895       orig_callee = callee;
1896       inline_call (e, true, NULL, NULL, false);
1897       if (e->callee != orig_callee)
1898         orig_callee->aux = (void *) node;
1899       flatten_function (e->callee, early);
1900       if (e->callee != orig_callee)
1901         orig_callee->aux = NULL;
1902     }
1903
1904   node->aux = NULL;
1905   if (!node->global.inlined_to)
1906     inline_update_overall_summary (node);
1907 }
1908
1909 /* Count number of callers of NODE and store it into DATA (that
1910    points to int.  Worker for cgraph_for_node_and_aliases.  */
1911
1912 static bool
1913 sum_callers (struct cgraph_node *node, void *data)
1914 {
1915   struct cgraph_edge *e;
1916   int *num_calls = (int *)data;
1917
1918   for (e = node->callers; e; e = e->next_caller)
1919     (*num_calls)++;
1920   return false;
1921 }
1922
1923 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
1924    DATA points to number of calls originally found so we avoid infinite
1925    recursion.  */
1926
1927 static bool
1928 inline_to_all_callers (struct cgraph_node *node, void *data)
1929 {
1930   int *num_calls = (int *)data;
1931   while (node->callers && !node->global.inlined_to)
1932     {
1933       struct cgraph_node *caller = node->callers->caller;
1934
1935       if (dump_file)
1936         {
1937           fprintf (dump_file,
1938                    "\nInlining %s size %i.\n",
1939                    node->name (),
1940                    inline_summary (node)->size);
1941           fprintf (dump_file,
1942                    " Called once from %s %i insns.\n",
1943                    node->callers->caller->name (),
1944                    inline_summary (node->callers->caller)->size);
1945         }
1946
1947       inline_call (node->callers, true, NULL, NULL, true);
1948       if (dump_file)
1949         fprintf (dump_file,
1950                  " Inlined into %s which now has %i size\n",
1951                  caller->name (),
1952                  inline_summary (caller)->size);
1953       if (!(*num_calls)--)
1954         {
1955           if (dump_file)
1956             fprintf (dump_file, "New calls found; giving up.\n");
1957           return true;
1958         }
1959     }
1960   return false;
1961 }
1962
1963 /* Decide on the inlining.  We do so in the topological order to avoid
1964    expenses on updating data structures.  */
1965
1966 static unsigned int
1967 ipa_inline (void)
1968 {
1969   struct cgraph_node *node;
1970   int nnodes;
1971   struct cgraph_node **order;
1972   int i;
1973   int cold;
1974   bool remove_functions = false;
1975
1976   if (!optimize)
1977     return 0;
1978
1979   order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
1980
1981   if (in_lto_p && optimize)
1982     ipa_update_after_lto_read ();
1983
1984   if (dump_file)
1985     dump_inline_summaries (dump_file);
1986
1987   nnodes = ipa_reverse_postorder (order);
1988
1989   FOR_EACH_FUNCTION (node)
1990     node->aux = 0;
1991
1992   if (dump_file)
1993     fprintf (dump_file, "\nFlattening functions:\n");
1994
1995   /* In the first pass handle functions to be flattened.  Do this with
1996      a priority so none of our later choices will make this impossible.  */
1997   for (i = nnodes - 1; i >= 0; i--)
1998     {
1999       node = order[i];
2000
2001       /* Handle nodes to be flattened.
2002          Ideally when processing callees we stop inlining at the
2003          entry of cycles, possibly cloning that entry point and
2004          try to flatten itself turning it into a self-recursive
2005          function.  */
2006       if (lookup_attribute ("flatten",
2007                             DECL_ATTRIBUTES (node->decl)) != NULL)
2008         {
2009           if (dump_file)
2010             fprintf (dump_file,
2011                      "Flattening %s\n", node->name ());
2012           flatten_function (node, false);
2013         }
2014     }
2015
2016   inline_small_functions ();
2017
2018   /* Do first after-inlining removal.  We want to remove all "stale" extern inline
2019      functions and virtual functions so we really know what is called once.  */
2020   symtab_remove_unreachable_nodes (false, dump_file);
2021   free (order);
2022
2023   /* Inline functions with a property that after inlining into all callers the
2024      code size will shrink because the out-of-line copy is eliminated.
2025      We do this regardless on the callee size as long as function growth limits
2026      are met.  */
2027   if (dump_file)
2028     fprintf (dump_file,
2029              "\nDeciding on functions to be inlined into all callers and removing useless speculations:\n");
2030
2031   /* Inlining one function called once has good chance of preventing
2032      inlining other function into the same callee.  Ideally we should
2033      work in priority order, but probably inlining hot functions first
2034      is good cut without the extra pain of maintaining the queue.
2035
2036      ??? this is not really fitting the bill perfectly: inlining function
2037      into callee often leads to better optimization of callee due to
2038      increased context for optimization.
2039      For example if main() function calls a function that outputs help
2040      and then function that does the main optmization, we should inline
2041      the second with priority even if both calls are cold by themselves.
2042
2043      We probably want to implement new predicate replacing our use of
2044      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2045      to be hot.  */
2046   for (cold = 0; cold <= 1; cold ++)
2047     {
2048       FOR_EACH_DEFINED_FUNCTION (node)
2049         {
2050           struct cgraph_edge *edge, *next;
2051           bool update=false;
2052
2053           for (edge = node->callees; edge; edge = next)
2054             {
2055               next = edge->next_callee;
2056               if (edge->speculative && !speculation_useful_p (edge, false))
2057                 {
2058                   cgraph_resolve_speculation (edge, NULL);
2059                   update = true;
2060                   remove_functions = true;
2061                 }
2062             }
2063           if (update)
2064             {
2065               struct cgraph_node *where = node->global.inlined_to
2066                                           ? node->global.inlined_to : node;
2067               reset_node_growth_cache (where);
2068               reset_edge_caches (where);
2069               inline_update_overall_summary (where);
2070             }
2071           if (flag_inline_functions_called_once
2072               && want_inline_function_to_all_callers_p (node, cold))
2073             {
2074               int num_calls = 0;
2075               cgraph_for_node_and_aliases (node, sum_callers,
2076                                            &num_calls, true);
2077               cgraph_for_node_and_aliases (node, inline_to_all_callers,
2078                                            &num_calls, true);
2079               remove_functions = true;
2080             }
2081         }
2082     }
2083
2084   /* Free ipa-prop structures if they are no longer needed.  */
2085   if (optimize)
2086     ipa_free_all_structures_after_iinln ();
2087
2088   if (dump_file)
2089     fprintf (dump_file,
2090              "\nInlined %i calls, eliminated %i functions\n\n",
2091              ncalls_inlined, nfunctions_inlined);
2092
2093   if (dump_file)
2094     dump_inline_summaries (dump_file);
2095   /* In WPA we use inline summaries for partitioning process.  */
2096   if (!flag_wpa)
2097     inline_free_summary ();
2098   return remove_functions ? TODO_remove_functions : 0;
2099 }
2100
2101 /* Inline always-inline function calls in NODE.  */
2102
2103 static bool
2104 inline_always_inline_functions (struct cgraph_node *node)
2105 {
2106   struct cgraph_edge *e;
2107   bool inlined = false;
2108
2109   for (e = node->callees; e; e = e->next_callee)
2110     {
2111       struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
2112       if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2113         continue;
2114
2115       if (cgraph_edge_recursive_p (e))
2116         {
2117           if (dump_file)
2118             fprintf (dump_file, "  Not inlining recursive call to %s.\n",
2119                      e->callee->name ());
2120           e->inline_failed = CIF_RECURSIVE_INLINING;
2121           continue;
2122         }
2123
2124       if (!can_early_inline_edge_p (e))
2125         {
2126           /* Set inlined to true if the callee is marked "always_inline" but
2127              is not inlinable.  This will allow flagging an error later in
2128              expand_call_inline in tree-inline.c.  */
2129           if (lookup_attribute ("always_inline",
2130                                  DECL_ATTRIBUTES (callee->decl)) != NULL)
2131             inlined = true;
2132           continue;
2133         }
2134
2135       if (dump_file)
2136         fprintf (dump_file, "  Inlining %s into %s (always_inline).\n",
2137                  xstrdup (e->callee->name ()),
2138                  xstrdup (e->caller->name ()));
2139       inline_call (e, true, NULL, NULL, false);
2140       inlined = true;
2141     }
2142   if (inlined)
2143     inline_update_overall_summary (node);
2144
2145   return inlined;
2146 }
2147
2148 /* Decide on the inlining.  We do so in the topological order to avoid
2149    expenses on updating data structures.  */
2150
2151 static bool
2152 early_inline_small_functions (struct cgraph_node *node)
2153 {
2154   struct cgraph_edge *e;
2155   bool inlined = false;
2156
2157   for (e = node->callees; e; e = e->next_callee)
2158     {
2159       struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
2160       if (!inline_summary (callee)->inlinable
2161           || !e->inline_failed)
2162         continue;
2163
2164       /* Do not consider functions not declared inline.  */
2165       if (!DECL_DECLARED_INLINE_P (callee->decl)
2166           && !flag_inline_small_functions
2167           && !flag_inline_functions)
2168         continue;
2169
2170       if (dump_file)
2171         fprintf (dump_file, "Considering inline candidate %s.\n",
2172                  callee->name ());
2173
2174       if (!can_early_inline_edge_p (e))
2175         continue;
2176
2177       if (cgraph_edge_recursive_p (e))
2178         {
2179           if (dump_file)
2180             fprintf (dump_file, "  Not inlining: recursive call.\n");
2181           continue;
2182         }
2183
2184       if (!want_early_inline_function_p (e))
2185         continue;
2186
2187       if (dump_file)
2188         fprintf (dump_file, " Inlining %s into %s.\n",
2189                  xstrdup (callee->name ()),
2190                  xstrdup (e->caller->name ()));
2191       inline_call (e, true, NULL, NULL, true);
2192       inlined = true;
2193     }
2194
2195   return inlined;
2196 }
2197
2198 /* Do inlining of small functions.  Doing so early helps profiling and other
2199    passes to be somewhat more effective and avoids some code duplication in
2200    later real inlining pass for testcases with very many function calls.  */
2201 static unsigned int
2202 early_inliner (void)
2203 {
2204   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2205   struct cgraph_edge *edge;
2206   unsigned int todo = 0;
2207   int iterations = 0;
2208   bool inlined = false;
2209
2210   if (seen_error ())
2211     return 0;
2212
2213   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2214      happens when some pass decides to construct new function and
2215      cgraph_add_new_function calls lowering passes and early optimization on
2216      it.  This may confuse ourself when early inliner decide to inline call to
2217      function clone, because function clones don't have parameter list in
2218      ipa-prop matching their signature.  */
2219   if (ipa_node_params_vector.exists ())
2220     return 0;
2221
2222 #ifdef ENABLE_CHECKING
2223   verify_cgraph_node (node);
2224 #endif
2225   ipa_remove_all_references (&node->ref_list);
2226
2227   /* Even when not optimizing or not inlining inline always-inline
2228      functions.  */
2229   inlined = inline_always_inline_functions (node);
2230
2231   if (!optimize
2232       || flag_no_inline
2233       || !flag_early_inlining
2234       /* Never inline regular functions into always-inline functions
2235          during incremental inlining.  This sucks as functions calling
2236          always inline functions will get less optimized, but at the
2237          same time inlining of functions calling always inline
2238          function into an always inline function might introduce
2239          cycles of edges to be always inlined in the callgraph.
2240
2241          We might want to be smarter and just avoid this type of inlining.  */
2242       || DECL_DISREGARD_INLINE_LIMITS (node->decl))
2243     ;
2244   else if (lookup_attribute ("flatten",
2245                              DECL_ATTRIBUTES (node->decl)) != NULL)
2246     {
2247       /* When the function is marked to be flattened, recursively inline
2248          all calls in it.  */
2249       if (dump_file)
2250         fprintf (dump_file,
2251                  "Flattening %s\n", node->name ());
2252       flatten_function (node, true);
2253       inlined = true;
2254     }
2255   else
2256     {
2257       /* We iterate incremental inlining to get trivial cases of indirect
2258          inlining.  */
2259       while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
2260              && early_inline_small_functions (node))
2261         {
2262           timevar_push (TV_INTEGRATION);
2263           todo |= optimize_inline_calls (current_function_decl);
2264
2265           /* Technically we ought to recompute inline parameters so the new
2266              iteration of early inliner works as expected.  We however have
2267              values approximately right and thus we only need to update edge
2268              info that might be cleared out for newly discovered edges.  */
2269           for (edge = node->callees; edge; edge = edge->next_callee)
2270             {
2271               struct inline_edge_summary *es = inline_edge_summary (edge);
2272               es->call_stmt_size
2273                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2274               es->call_stmt_time
2275                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2276               if (edge->callee->decl
2277                   && !gimple_check_call_matching_types (
2278                       edge->call_stmt, edge->callee->decl, false))
2279                 edge->call_stmt_cannot_inline_p = true;
2280             }
2281           timevar_pop (TV_INTEGRATION);
2282           iterations++;
2283           inlined = false;
2284         }
2285       if (dump_file)
2286         fprintf (dump_file, "Iterations: %i\n", iterations);
2287     }
2288
2289   if (inlined)
2290     {
2291       timevar_push (TV_INTEGRATION);
2292       todo |= optimize_inline_calls (current_function_decl);
2293       timevar_pop (TV_INTEGRATION);
2294     }
2295
2296   cfun->always_inline_functions_inlined = true;
2297
2298   return todo;
2299 }
2300
2301 namespace {
2302
2303 const pass_data pass_data_early_inline =
2304 {
2305   GIMPLE_PASS, /* type */
2306   "einline", /* name */
2307   OPTGROUP_INLINE, /* optinfo_flags */
2308   false, /* has_gate */
2309   true, /* has_execute */
2310   TV_EARLY_INLINING, /* tv_id */
2311   PROP_ssa, /* properties_required */
2312   0, /* properties_provided */
2313   0, /* properties_destroyed */
2314   0, /* todo_flags_start */
2315   0, /* todo_flags_finish */
2316 };
2317
2318 class pass_early_inline : public gimple_opt_pass
2319 {
2320 public:
2321   pass_early_inline (gcc::context *ctxt)
2322     : gimple_opt_pass (pass_data_early_inline, ctxt)
2323   {}
2324
2325   /* opt_pass methods: */
2326   unsigned int execute () { return early_inliner (); }
2327
2328 }; // class pass_early_inline
2329
2330 } // anon namespace
2331
2332 gimple_opt_pass *
2333 make_pass_early_inline (gcc::context *ctxt)
2334 {
2335   return new pass_early_inline (ctxt);
2336 }
2337
2338
2339 /* When to run IPA inlining.  Inlining of always-inline functions
2340    happens during early inlining.
2341
2342    Enable inlining unconditoinally, because callgraph redirection
2343    happens here.   */
2344
2345 static bool
2346 gate_ipa_inline (void)
2347 {
2348   return true;
2349 }
2350
2351 namespace {
2352
2353 const pass_data pass_data_ipa_inline =
2354 {
2355   IPA_PASS, /* type */
2356   "inline", /* name */
2357   OPTGROUP_INLINE, /* optinfo_flags */
2358   true, /* has_gate */
2359   true, /* has_execute */
2360   TV_IPA_INLINING, /* tv_id */
2361   0, /* properties_required */
2362   0, /* properties_provided */
2363   0, /* properties_destroyed */
2364   TODO_remove_functions, /* todo_flags_start */
2365   ( TODO_dump_symtab ), /* todo_flags_finish */
2366 };
2367
2368 class pass_ipa_inline : public ipa_opt_pass_d
2369 {
2370 public:
2371   pass_ipa_inline (gcc::context *ctxt)
2372     : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
2373                       inline_generate_summary, /* generate_summary */
2374                       inline_write_summary, /* write_summary */
2375                       inline_read_summary, /* read_summary */
2376                       NULL, /* write_optimization_summary */
2377                       NULL, /* read_optimization_summary */
2378                       NULL, /* stmt_fixup */
2379                       0, /* function_transform_todo_flags_start */
2380                       inline_transform, /* function_transform */
2381                       NULL) /* variable_transform */
2382   {}
2383
2384   /* opt_pass methods: */
2385   bool gate () { return gate_ipa_inline (); }
2386   unsigned int execute () { return ipa_inline (); }
2387
2388 }; // class pass_ipa_inline
2389
2390 } // anon namespace
2391
2392 ipa_opt_pass_d *
2393 make_pass_ipa_inline (gcc::context *ctxt)
2394 {
2395   return new pass_ipa_inline (ctxt);
2396 }