gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003, 2004 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  20 02110-1301, USA.  */
  21
  22 /*  Inlining decision heuristics
  23
  24     We separate inlining decisions from the inliner itself and store it
  25     inside callgraph as so called inline plan.  Refer to cgraph.c
  26     documentation about particular representation of inline plans in the
  27     callgraph.
  28
  29     There are three major parts of this file:
  30
  31     cgraph_mark_inline implementation
  32
  33       This function allows to mark given call inline and performs necessary
  34       modifications of cgraph (production of the clones and updating overall
  35       statistics)
  36
  37     inlining heuristics limits
  38
  39       These functions allow to check that particular inlining is allowed
  40       by the limits specified by user (allowed function growth, overall unit
  41       growth and so on).
  42
  43     inlining heuristics
  44
  45       This is implementation of IPA pass aiming to get as much of benefit
  46       from inlining obeying the limits checked above.
  47
  48       The implementation of particular heuristics is separated from
  49       the rest of code to make it easier to replace it with more complicated
  50       implementation in the future.  The rest of inlining code acts as a
  51       library aimed to modify the callgraph and verify that the parameters
  52       on code size growth fits.
  53
  54       To mark given call inline, use cgraph_mark_inline function, the
  55       verification is performed by cgraph_default_inline_p and
  56       cgraph_check_inline_limits.
  57
  58       The heuristics implements simple knapsack style algorithm ordering
  59       all functions by their "profitability" (estimated by code size growth)
  60       and inlining them in priority order.
  61
  62       cgraph_decide_inlining implements heuristics taking whole callgraph
  63       into account, while cgraph_decide_inlining_incrementally considers
  64       only one function at a time and is used in non-unit-at-a-time mode.  */
  65
  66 #include "config.h"
  67 #include "system.h"
  68 #include "coretypes.h"
  69 #include "tm.h"
  70 #include "tree.h"
  71 #include "tree-inline.h"
  72 #include "langhooks.h"
  73 #include "flags.h"
  74 #include "cgraph.h"
  75 #include "diagnostic.h"
  76 #include "timevar.h"
  77 #include "params.h"
  78 #include "fibheap.h"
  79 #include "intl.h"
  80 #include "tree-pass.h"
  81 #include "hashtab.h"
  82 #include "coverage.h"
  83 #include "ggc.h"
  84
  85 /* Statistics we collect about inlining algorithm.  */
  86 static int ncalls_inlined;
  87 static int nfunctions_inlined;
  88 static int initial_insns;
  89 static int overall_insns;
  90 static int max_insns;
  91 static gcov_type max_count;
  92
  93 /* Estimate size of the function after inlining WHAT into TO.  */
  94
  95 static int
  96 cgraph_estimate_size_after_inlining (int times, struct cgraph_node *to,
  97                                      struct cgraph_node *what)
  98 {
  99   int size;
 100   tree fndecl = what->decl, arg;
 101   int call_insns = PARAM_VALUE (PARAM_INLINE_CALL_COST);
 102
 103   for (arg = DECL_ARGUMENTS (fndecl); arg; arg = TREE_CHAIN (arg))
 104     call_insns += estimate_move_cost (TREE_TYPE (arg));
 105   size = (what->global.insns - call_insns) * times + to->global.insns;
 106   gcc_assert (size >= 0);
 107   return size;
 108 }
 109
 110 /* E is expected to be an edge being inlined.  Clone destination node of
 111    the edge and redirect it to the new clone.
 112    DUPLICATE is used for bookkeeping on whether we are actually creating new
 113    clones or re-using node originally representing out-of-line function call.
 114    */
 115 void
 116 cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate, bool update_original)
 117 {
 118   HOST_WIDE_INT peak;
 119   if (duplicate)
 120     {
 121       /* We may eliminate the need for out-of-line copy to be output.
 122          In that case just go ahead and re-use it.  */
 123       if (!e->callee->callers->next_caller
 124           && !e->callee->needed
 125           && flag_unit_at_a_time)
 126         {
 127           gcc_assert (!e->callee->global.inlined_to);
 128           if (DECL_SAVED_TREE (e->callee->decl))
 129             overall_insns -= e->callee->global.insns, nfunctions_inlined++;
 130           duplicate = false;
 131         }
 132       else
 133         {
 134           struct cgraph_node *n;
 135           n = cgraph_clone_node (e->callee, e->count, e->loop_nest,
 136                                  update_original);
 137           cgraph_redirect_edge_callee (e, n);
 138         }
 139     }
 140
 141   if (e->caller->global.inlined_to)
 142     e->callee->global.inlined_to = e->caller->global.inlined_to;
 143   else
 144     e->callee->global.inlined_to = e->caller;
 145   e->callee->global.stack_frame_offset
 146     = e->caller->global.stack_frame_offset + e->caller->local.estimated_self_stack_size;
 147   peak = e->callee->global.stack_frame_offset + e->callee->local.estimated_self_stack_size;
 148   if (e->callee->global.inlined_to->global.estimated_stack_size < peak)
 149     e->callee->global.inlined_to->global.estimated_stack_size = peak;
 150
 151   /* Recursively clone all bodies.  */
 152   for (e = e->callee->callees; e; e = e->next_callee)
 153     if (!e->inline_failed)
 154       cgraph_clone_inlined_nodes (e, duplicate, update_original);
 155 }
 156
 157 /* Mark edge E as inlined and update callgraph accordingly.
 158    UPDATE_ORIGINAL specify whether profile of original function should be
 159    updated. */
 160
 161 void
 162 cgraph_mark_inline_edge (struct cgraph_edge *e, bool update_original)
 163 {
 164   int old_insns = 0, new_insns = 0;
 165   struct cgraph_node *to = NULL, *what;
 166
 167   if (e->callee->inline_decl)
 168     cgraph_redirect_edge_callee (e, cgraph_node (e->callee->inline_decl));
 169
 170   gcc_assert (e->inline_failed);
 171   e->inline_failed = NULL;
 172
 173   if (!e->callee->global.inlined && flag_unit_at_a_time)
 174     DECL_POSSIBLY_INLINED (e->callee->decl) = true;
 175   e->callee->global.inlined = true;
 176
 177   cgraph_clone_inlined_nodes (e, true, update_original);
 178
 179   what = e->callee;
 180
 181   /* Now update size of caller and all functions caller is inlined into.  */
 182   for (;e && !e->inline_failed; e = e->caller->callers)
 183     {
 184       old_insns = e->caller->global.insns;
 185       new_insns = cgraph_estimate_size_after_inlining (1, e->caller,
 186                                                        what);
 187       gcc_assert (new_insns >= 0);
 188       to = e->caller;
 189       to->global.insns = new_insns;
 190     }
 191   gcc_assert (what->global.inlined_to == to);
 192   if (new_insns > old_insns)
 193     overall_insns += new_insns - old_insns;
 194   ncalls_inlined++;
 195 }
 196
 197 /* Mark all calls of EDGE->CALLEE inlined into EDGE->CALLER.
 198    Return following unredirected edge in the list of callers
 199    of EDGE->CALLEE  */
 200
 201 static struct cgraph_edge *
 202 cgraph_mark_inline (struct cgraph_edge *edge)
 203 {
 204   struct cgraph_node *to = edge->caller;
 205   struct cgraph_node *what = edge->callee;
 206   struct cgraph_edge *e, *next;
 207   int times = 0;
 208
 209   /* Look for all calls, mark them inline and clone recursively
 210      all inlined functions.  */
 211   for (e = what->callers; e; e = next)
 212     {
 213       next = e->next_caller;
 214       if (e->caller == to && e->inline_failed)
 215         {
 216           cgraph_mark_inline_edge (e, true);
 217           if (e == edge)
 218             edge = next;
 219           times++;
 220         }
 221     }
 222   gcc_assert (times);
 223   return edge;
 224 }
 225
 226 /* Estimate the growth caused by inlining NODE into all callees.  */
 227
 228 static int
 229 cgraph_estimate_growth (struct cgraph_node *node)
 230 {
 231   int growth = 0;
 232   struct cgraph_edge *e;
 233   if (node->global.estimated_growth != INT_MIN)
 234     return node->global.estimated_growth;
 235
 236   for (e = node->callers; e; e = e->next_caller)
 237     if (e->inline_failed)
 238       growth += (cgraph_estimate_size_after_inlining (1, e->caller, node)
 239                  - e->caller->global.insns);
 240
 241   /* ??? Wrong for self recursive functions or cases where we decide to not
 242      inline for different reasons, but it is not big deal as in that case
 243      we will keep the body around, but we will also avoid some inlining.  */
 244   if (!node->needed && !DECL_EXTERNAL (node->decl))
 245     growth -= node->global.insns;
 246
 247   node->global.estimated_growth = growth;
 248   return growth;
 249 }
 250
 251 /* Return false when inlining WHAT into TO is not good idea
 252    as it would cause too large growth of function bodies.
 253    When ONE_ONLY is true, assume that only one call site is going
 254    to be inlined, otherwise figure out how many call sites in
 255    TO calls WHAT and verify that all can be inlined.
 256    */
 257
 258 static bool
 259 cgraph_check_inline_limits (struct cgraph_node *to, struct cgraph_node *what,
 260                             const char **reason, bool one_only)
 261 {
 262   int times = 0;
 263   struct cgraph_edge *e;
 264   int newsize;
 265   int limit;
 266   HOST_WIDE_INT stack_size_limit, inlined_stack;
 267
 268   if (one_only)
 269     times = 1;
 270   else
 271     for (e = to->callees; e; e = e->next_callee)
 272       if (e->callee == what)
 273         times++;
 274
 275   if (to->global.inlined_to)
 276     to = to->global.inlined_to;
 277
 278   /* When inlining large function body called once into small function,
 279      take the inlined function as base for limiting the growth.  */
 280   if (to->local.self_insns > what->local.self_insns)
 281     limit = to->local.self_insns;
 282   else
 283     limit = what->local.self_insns;
 284
 285   limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
 286
 287   /* Check the size after inlining against the function limits.  But allow
 288      the function to shrink if it went over the limits by forced inlining.  */
 289   newsize = cgraph_estimate_size_after_inlining (times, to, what);
 290   if (newsize >= to->global.insns
 291       && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
 292       && newsize > limit)
 293     {
 294       if (reason)
 295         *reason = N_("--param large-function-growth limit reached");
 296       return false;
 297     }
 298
 299   stack_size_limit = to->local.estimated_self_stack_size;
 300
 301   stack_size_limit += stack_size_limit * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100;
 302
 303   inlined_stack = (to->global.stack_frame_offset
 304                    + to->local.estimated_self_stack_size
 305                    + what->global.estimated_stack_size);
 306   if (inlined_stack  > stack_size_limit
 307       && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
 308     {
 309       if (reason)
 310         *reason = N_("--param large-stack-frame-growth limit reached");
 311       return false;
 312     }
 313   return true;
 314 }
 315
 316 /* Return true when function N is small enough to be inlined.  */
 317
 318 bool
 319 cgraph_default_inline_p (struct cgraph_node *n, const char **reason)
 320 {
 321   tree decl = n->decl;
 322
 323   if (n->inline_decl)
 324     decl = n->inline_decl;
 325   if (!DECL_INLINE (decl))
 326     {
 327       if (reason)
 328         *reason = N_("function not inlinable");
 329       return false;
 330     }
 331
 332   if (!DECL_STRUCT_FUNCTION (decl)->cfg)
 333     {
 334       if (reason)
 335         *reason = N_("function body not available");
 336       return false;
 337     }
 338
 339   if (DECL_DECLARED_INLINE_P (decl))
 340     {
 341       if (n->global.insns >= MAX_INLINE_INSNS_SINGLE)
 342         {
 343           if (reason)
 344             *reason = N_("--param max-inline-insns-single limit reached");
 345           return false;
 346         }
 347     }
 348   else
 349     {
 350       if (n->global.insns >= MAX_INLINE_INSNS_AUTO)
 351         {
 352           if (reason)
 353             *reason = N_("--param max-inline-insns-auto limit reached");
 354           return false;
 355         }
 356     }
 357
 358   return true;
 359 }
 360
 361 /* Return true when inlining WHAT would create recursive inlining.
 362    We call recursive inlining all cases where same function appears more than
 363    once in the single recursion nest path in the inline graph.  */
 364
 365 static bool
 366 cgraph_recursive_inlining_p (struct cgraph_node *to,
 367                              struct cgraph_node *what,
 368                              const char **reason)
 369 {
 370   bool recursive;
 371   if (to->global.inlined_to)
 372     recursive = what->decl == to->global.inlined_to->decl;
 373   else
 374     recursive = what->decl == to->decl;
 375   /* Marking recursive function inline has sane semantic and thus we should
 376      not warn on it.  */
 377   if (recursive && reason)
 378     *reason = (what->local.disregard_inline_limits
 379                ? N_("recursive inlining") : "");
 380   return recursive;
 381 }
 382
 383 /* Return true if the call can be hot.  */
 384 static bool
 385 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
 386 {
 387   if (profile_info && flag_branch_probabilities
 388       && (edge->count
 389           <= profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
 390     return false;
 391   return true;
 392 }
 393
 394 /* A cost model driving the inlining heuristics in a way so the edges with
 395    smallest badness are inlined first.  After each inlining is performed
 396    the costs of all caller edges of nodes affected are recomputed so the
 397    metrics may accurately depend on values such as number of inlinable callers
 398    of the function or function body size.
 399
 400    With profiling we use number of executions of each edge to drive the cost.
 401    We also should distinguish hot and cold calls where the cold calls are
 402    inlined into only when code size is overall improved.
 403    */
 404
 405 static int
 406 cgraph_edge_badness (struct cgraph_edge *edge)
 407 {
 408   if (max_count)
 409     {
 410       int growth =
 411         cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee);
 412       growth -= edge->caller->global.insns;
 413
 414       /* Always prefer inlining saving code size.  */
 415       if (growth <= 0)
 416         return INT_MIN - growth;
 417       return ((int)((double)edge->count * INT_MIN / max_count)) / growth;
 418     }
 419   else
 420   {
 421     int nest = MIN (edge->loop_nest, 8);
 422     int badness = cgraph_estimate_growth (edge->callee) * 256;
 423
 424     /* Decrease badness if call is nested.  */
 425     if (badness > 0)
 426       badness >>= nest;
 427     else
 428       badness <<= nest;
 429
 430     /* Make recursive inlining happen always after other inlining is done.  */
 431     if (cgraph_recursive_inlining_p (edge->caller, edge->callee, NULL))
 432       return badness + 1;
 433     else
 434       return badness;
 435   }
 436 }
 437
 438 /* Recompute heap nodes for each of caller edge.  */
 439
 440 static void
 441 update_caller_keys (fibheap_t heap, struct cgraph_node *node,
 442                     bitmap updated_nodes)
 443 {
 444   struct cgraph_edge *edge;
 445   const char *failed_reason;
 446
 447   if (!node->local.inlinable || node->local.disregard_inline_limits
 448       || node->global.inlined_to)
 449     return;
 450   if (bitmap_bit_p (updated_nodes, node->uid))
 451     return;
 452   bitmap_set_bit (updated_nodes, node->uid);
 453   node->global.estimated_growth = INT_MIN;
 454
 455   if (!node->local.inlinable)
 456     return;
 457   /* Prune out edges we won't inline into anymore.  */
 458   if (!cgraph_default_inline_p (node, &failed_reason))
 459     {
 460       for (edge = node->callers; edge; edge = edge->next_caller)
 461         if (edge->aux)
 462           {
 463             fibheap_delete_node (heap, edge->aux);
 464             edge->aux = NULL;
 465             if (edge->inline_failed)
 466               edge->inline_failed = failed_reason;
 467           }
 468       return;
 469     }
 470
 471   for (edge = node->callers; edge; edge = edge->next_caller)
 472     if (edge->inline_failed)
 473       {
 474         int badness = cgraph_edge_badness (edge);
 475         if (edge->aux)
 476           {
 477             fibnode_t n = edge->aux;
 478             gcc_assert (n->data == edge);
 479             if (n->key == badness)
 480               continue;
 481
 482             /* fibheap_replace_key only increase the keys.  */
 483             if (fibheap_replace_key (heap, n, badness))
 484               continue;
 485             fibheap_delete_node (heap, edge->aux);
 486           }
 487         edge->aux = fibheap_insert (heap, badness, edge);
 488       }
 489 }
 490
 491 /* Recompute heap nodes for each of caller edges of each of callees.  */
 492
 493 static void
 494 update_callee_keys (fibheap_t heap, struct cgraph_node *node,
 495                     bitmap updated_nodes)
 496 {
 497   struct cgraph_edge *e;
 498   node->global.estimated_growth = INT_MIN;
 499
 500   for (e = node->callees; e; e = e->next_callee)
 501     if (e->inline_failed)
 502       update_caller_keys (heap, e->callee, updated_nodes);
 503     else if (!e->inline_failed)
 504       update_callee_keys (heap, e->callee, updated_nodes);
 505 }
 506
 507 /* Enqueue all recursive calls from NODE into priority queue depending on
 508    how likely we want to recursively inline the call.  */
 509
 510 static void
 511 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
 512                         fibheap_t heap)
 513 {
 514   static int priority;
 515   struct cgraph_edge *e;
 516   for (e = where->callees; e; e = e->next_callee)
 517     if (e->callee == node)
 518       {
 519         /* When profile feedback is available, prioritize by expected number
 520            of calls.  Without profile feedback we maintain simple queue
 521            to order candidates via recursive depths.  */
 522         fibheap_insert (heap,
 523                         !max_count ? priority++
 524                         : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
 525                         e);
 526       }
 527   for (e = where->callees; e; e = e->next_callee)
 528     if (!e->inline_failed)
 529       lookup_recursive_calls (node, e->callee, heap);
 530 }
 531
 532 /* Find callgraph nodes closing a circle in the graph.  The
 533    resulting hashtab can be used to avoid walking the circles.
 534    Uses the cgraph nodes ->aux field which needs to be zero
 535    before and will be zero after operation.  */
 536
 537 static void
 538 cgraph_find_cycles (struct cgraph_node *node, htab_t cycles)
 539 {
 540   struct cgraph_edge *e;
 541
 542   if (node->aux)
 543     {
 544       void **slot;
 545       slot = htab_find_slot (cycles, node, INSERT);
 546       if (!*slot)
 547         {
 548           if (dump_file)
 549             fprintf (dump_file, "Cycle contains %s\n", cgraph_node_name (node));
 550           *slot = node;
 551         }
 552       return;
 553     }
 554
 555   node->aux = node;
 556   for (e = node->callees; e; e = e->next_callee)
 557     cgraph_find_cycles (e->callee, cycles);
 558   node->aux = 0;
 559 }
 560
 561 /* Flatten the cgraph node.  We have to be careful in recursing
 562    as to not run endlessly in circles of the callgraph.
 563    We do so by using a hashtab of cycle entering nodes as generated
 564    by cgraph_find_cycles.  */
 565
 566 static void
 567 cgraph_flatten_node (struct cgraph_node *node, htab_t cycles)
 568 {
 569   struct cgraph_edge *e;
 570
 571   for (e = node->callees; e; e = e->next_callee)
 572     {
 573       /* Inline call, if possible, and recurse.  Be sure we are not
 574          entering callgraph circles here.  */
 575       if (e->inline_failed
 576           && e->callee->local.inlinable
 577           && !cgraph_recursive_inlining_p (node, e->callee,
 578                                            &e->inline_failed)
 579           && !htab_find (cycles, e->callee))
 580         {
 581           if (dump_file)
 582             fprintf (dump_file, " inlining %s", cgraph_node_name (e->callee));
 583           cgraph_mark_inline_edge (e, true);
 584           cgraph_flatten_node (e->callee, cycles);
 585         }
 586       else if (dump_file)
 587         fprintf (dump_file, " !inlining %s", cgraph_node_name (e->callee));
 588     }
 589 }
 590
 591 /* Decide on recursive inlining: in the case function has recursive calls,
 592    inline until body size reaches given argument.  */
 593
 594 static bool
 595 cgraph_decide_recursive_inlining (struct cgraph_node *node)
 596 {
 597   int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
 598   int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
 599   int probability = PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY);
 600   fibheap_t heap;
 601   struct cgraph_edge *e;
 602   struct cgraph_node *master_clone, *next;
 603   int depth = 0;
 604   int n = 0;
 605
 606   if (DECL_DECLARED_INLINE_P (node->decl))
 607     {
 608       limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
 609       max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
 610     }
 611
 612   /* Make sure that function is small enough to be considered for inlining.  */
 613   if (!max_depth
 614       || cgraph_estimate_size_after_inlining (1, node, node)  >= limit)
 615     return false;
 616   heap = fibheap_new ();
 617   lookup_recursive_calls (node, node, heap);
 618   if (fibheap_empty (heap))
 619     {
 620       fibheap_delete (heap);
 621       return false;
 622     }
 623
 624   if (dump_file)
 625     fprintf (dump_file,
 626              "  Performing recursive inlining on %s\n",
 627              cgraph_node_name (node));
 628
 629   /* We need original clone to copy around.  */
 630   master_clone = cgraph_clone_node (node, node->count, 1, false);
 631   master_clone->needed = true;
 632   for (e = master_clone->callees; e; e = e->next_callee)
 633     if (!e->inline_failed)
 634       cgraph_clone_inlined_nodes (e, true, false);
 635
 636   /* Do the inlining and update list of recursive call during process.  */
 637   while (!fibheap_empty (heap)
 638          && (cgraph_estimate_size_after_inlining (1, node, master_clone)
 639              <= limit))
 640     {
 641       struct cgraph_edge *curr = fibheap_extract_min (heap);
 642       struct cgraph_node *cnode;
 643
 644       depth = 1;
 645       for (cnode = curr->caller;
 646            cnode->global.inlined_to; cnode = cnode->callers->caller)
 647         if (node->decl == curr->callee->decl)
 648           depth++;
 649       if (depth > max_depth)
 650         {
 651           if (dump_file)
 652             fprintf (dump_file,
 653                      "   maxmal depth reached\n");
 654           continue;
 655         }
 656
 657       if (max_count)
 658         {
 659           if (!cgraph_maybe_hot_edge_p (curr))
 660             {
 661               if (dump_file)
 662                 fprintf (dump_file, "   Not inlining cold call\n");
 663               continue;
 664             }
 665           if (curr->count * 100 / node->count < probability)
 666             {
 667               if (dump_file)
 668                 fprintf (dump_file,
 669                          "   Probability of edge is too small\n");
 670               continue;
 671             }
 672         }
 673
 674       if (dump_file)
 675         {
 676           fprintf (dump_file,
 677                    "   Inlining call of depth %i", depth);
 678           if (node->count)
 679             {
 680               fprintf (dump_file, " called approx. %.2f times per call",
 681                        (double)curr->count / node->count);
 682             }
 683           fprintf (dump_file, "\n");
 684         }
 685       cgraph_redirect_edge_callee (curr, master_clone);
 686       cgraph_mark_inline_edge (curr, false);
 687       lookup_recursive_calls (node, curr->callee, heap);
 688       n++;
 689     }
 690   if (!fibheap_empty (heap) && dump_file)
 691     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
 692
 693   fibheap_delete (heap);
 694   if (dump_file)
 695     fprintf (dump_file,
 696              "\n   Inlined %i times, body grown from %i to %i insns\n", n,
 697              master_clone->global.insns, node->global.insns);
 698
 699   /* Remove master clone we used for inlining.  We rely that clones inlined
 700      into master clone gets queued just before master clone so we don't
 701      need recursion.  */
 702   for (node = cgraph_nodes; node != master_clone;
 703        node = next)
 704     {
 705       next = node->next;
 706       if (node->global.inlined_to == master_clone)
 707         cgraph_remove_node (node);
 708     }
 709   cgraph_remove_node (master_clone);
 710   /* FIXME: Recursive inlining actually reduces number of calls of the
 711      function.  At this place we should probably walk the function and
 712      inline clones and compensate the counts accordingly.  This probably
 713      doesn't matter much in practice.  */
 714   return n > 0;
 715 }
 716
 717 /* Set inline_failed for all callers of given function to REASON.  */
 718
 719 static void
 720 cgraph_set_inline_failed (struct cgraph_node *node, const char *reason)
 721 {
 722   struct cgraph_edge *e;
 723
 724   if (dump_file)
 725     fprintf (dump_file, "Inlining failed: %s\n", reason);
 726   for (e = node->callers; e; e = e->next_caller)
 727     if (e->inline_failed)
 728       e->inline_failed = reason;
 729 }
 730
 731 /* We use greedy algorithm for inlining of small functions:
 732    All inline candidates are put into prioritized heap based on estimated
 733    growth of the overall number of instructions and then update the estimates.
 734
 735    INLINED and INLINED_CALEES are just pointers to arrays large enough
 736    to be passed to cgraph_inlined_into and cgraph_inlined_callees.  */
 737
 738 static void
 739 cgraph_decide_inlining_of_small_functions (void)
 740 {
 741   struct cgraph_node *node;
 742   struct cgraph_edge *edge;
 743   const char *failed_reason;
 744   fibheap_t heap = fibheap_new ();
 745   bitmap updated_nodes = BITMAP_ALLOC (NULL);
 746
 747   if (dump_file)
 748     fprintf (dump_file, "\nDeciding on smaller functions:\n");
 749
 750   /* Put all inline candidates into the heap.  */
 751
 752   for (node = cgraph_nodes; node; node = node->next)
 753     {
 754       if (!node->local.inlinable || !node->callers
 755           || node->local.disregard_inline_limits)
 756         continue;
 757       if (dump_file)
 758         fprintf (dump_file, "Considering inline candidate %s.\n", cgraph_node_name (node));
 759
 760       node->global.estimated_growth = INT_MIN;
 761       if (!cgraph_default_inline_p (node, &failed_reason))
 762         {
 763           cgraph_set_inline_failed (node, failed_reason);
 764           continue;
 765         }
 766
 767       for (edge = node->callers; edge; edge = edge->next_caller)
 768         if (edge->inline_failed)
 769           {
 770             gcc_assert (!edge->aux);
 771             edge->aux = fibheap_insert (heap, cgraph_edge_badness (edge), edge);
 772           }
 773     }
 774   while (overall_insns <= max_insns && (edge = fibheap_extract_min (heap)))
 775     {
 776       int old_insns = overall_insns;
 777       struct cgraph_node *where;
 778       int growth =
 779         cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee);
 780
 781       growth -= edge->caller->global.insns;
 782
 783       if (dump_file)
 784         {
 785           fprintf (dump_file,
 786                    "\nConsidering %s with %i insns\n",
 787                    cgraph_node_name (edge->callee),
 788                    edge->callee->global.insns);
 789           fprintf (dump_file,
 790                    " to be inlined into %s\n"
 791                    " Estimated growth after inlined into all callees is %+i insns.\n"
 792                    " Estimated badness is %i.\n",
 793                    cgraph_node_name (edge->caller),
 794                    cgraph_estimate_growth (edge->callee),
 795                    cgraph_edge_badness (edge));
 796           if (edge->count)
 797             fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n", edge->count);
 798         }
 799       gcc_assert (edge->aux);
 800       edge->aux = NULL;
 801       if (!edge->inline_failed)
 802         continue;
 803
 804       /* When not having profile info ready we don't weight by any way the
 805          position of call in procedure itself.  This means if call of
 806          function A from function B seems profitable to inline, the recursive
 807          call of function A in inline copy of A in B will look profitable too
 808          and we end up inlining until reaching maximal function growth.  This
 809          is not good idea so prohibit the recursive inlining.
 810
 811          ??? When the frequencies are taken into account we might not need this
 812          restriction.   */
 813       if (!max_count)
 814         {
 815           where = edge->caller;
 816           while (where->global.inlined_to)
 817             {
 818               if (where->decl == edge->callee->decl)
 819                 break;
 820               where = where->callers->caller;
 821             }
 822           if (where->global.inlined_to)
 823             {
 824               edge->inline_failed
 825                 = (edge->callee->local.disregard_inline_limits ? N_("recursive inlining") : "");
 826               if (dump_file)
 827                 fprintf (dump_file, " inline_failed:Recursive inlining performed only for function itself.\n");
 828               continue;
 829             }
 830         }
 831
 832       if (!cgraph_maybe_hot_edge_p (edge) && growth > 0)
 833         {
 834           if (!cgraph_recursive_inlining_p (edge->caller, edge->callee,
 835                                             &edge->inline_failed))
 836             {
 837               edge->inline_failed =
 838                 N_("call is unlikely");
 839               if (dump_file)
 840                 fprintf (dump_file, " inline_failed:%s.\n", edge->inline_failed);
 841             }
 842           continue;
 843         }
 844       if (!cgraph_default_inline_p (edge->callee, &edge->inline_failed))
 845         {
 846           if (!cgraph_recursive_inlining_p (edge->caller, edge->callee,
 847                                             &edge->inline_failed))
 848             {
 849               if (dump_file)
 850                 fprintf (dump_file, " inline_failed:%s.\n", edge->inline_failed);
 851             }
 852           continue;
 853         }
 854       if (cgraph_recursive_inlining_p (edge->caller, edge->callee,
 855                                        &edge->inline_failed))
 856         {
 857           where = edge->caller;
 858           if (where->global.inlined_to)
 859             where = where->global.inlined_to;
 860           if (!cgraph_decide_recursive_inlining (where))
 861             continue;
 862           update_callee_keys (heap, where, updated_nodes);
 863         }
 864       else
 865         {
 866           struct cgraph_node *callee;
 867           if (!cgraph_check_inline_limits (edge->caller, edge->callee,
 868                                            &edge->inline_failed, true))
 869             {
 870               if (dump_file)
 871                 fprintf (dump_file, " Not inlining into %s:%s.\n",
 872                          cgraph_node_name (edge->caller), edge->inline_failed);
 873               continue;
 874             }
 875           callee = edge->callee;
 876           cgraph_mark_inline_edge (edge, true);
 877           update_callee_keys (heap, callee, updated_nodes);
 878         }
 879       where = edge->caller;
 880       if (where->global.inlined_to)
 881         where = where->global.inlined_to;
 882
 883       /* Our profitability metric can depend on local properties
 884          such as number of inlinable calls and size of the function body.
 885          After inlining these properties might change for the function we
 886          inlined into (since it's body size changed) and for the functions
 887          called by function we inlined (since number of it inlinable callers
 888          might change).  */
 889       update_caller_keys (heap, where, updated_nodes);
 890       bitmap_clear (updated_nodes);
 891
 892       if (dump_file)
 893         {
 894           fprintf (dump_file,
 895                    " Inlined into %s which now has %i insns,"
 896                    "net change of %+i insns.\n",
 897                    cgraph_node_name (edge->caller),
 898                    edge->caller->global.insns,
 899                    overall_insns - old_insns);
 900         }
 901     }
 902   while ((edge = fibheap_extract_min (heap)) != NULL)
 903     {
 904       gcc_assert (edge->aux);
 905       edge->aux = NULL;
 906       if (!edge->callee->local.disregard_inline_limits && edge->inline_failed
 907           && !cgraph_recursive_inlining_p (edge->caller, edge->callee,
 908                                            &edge->inline_failed))
 909         edge->inline_failed = N_("--param inline-unit-growth limit reached");
 910     }
 911   fibheap_delete (heap);
 912   BITMAP_FREE (updated_nodes);
 913 }
 914
 915 /* Decide on the inlining.  We do so in the topological order to avoid
 916    expenses on updating data structures.  */
 917
 918 static unsigned int
 919 cgraph_decide_inlining (void)
 920 {
 921   struct cgraph_node *node;
 922   int nnodes;
 923   struct cgraph_node **order =
 924     XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
 925   int old_insns = 0;
 926   int i;
 927
 928   max_count = 0;
 929   for (node = cgraph_nodes; node; node = node->next)
 930     if (node->analyzed && (node->needed || node->reachable))
 931       {
 932         struct cgraph_edge *e;
 933
 934         /* At the moment, no IPA passes change function bodies before inlining.
 935            Save some time by not recomputing function body sizes if early inlining
 936            already did so.  */
 937         if (!flag_early_inlining)
 938           node->local.self_insns = node->global.insns
 939              = estimate_num_insns (node->decl);
 940
 941         initial_insns += node->local.self_insns;
 942         gcc_assert (node->local.self_insns == node->global.insns);
 943         for (e = node->callees; e; e = e->next_callee)
 944           if (max_count < e->count)
 945             max_count = e->count;
 946       }
 947   overall_insns = initial_insns;
 948   gcc_assert (!max_count || (profile_info && flag_branch_probabilities));
 949
 950   max_insns = overall_insns;
 951   if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
 952     max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
 953
 954   max_insns = ((HOST_WIDEST_INT) max_insns
 955                * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
 956
 957   nnodes = cgraph_postorder (order);
 958
 959   if (dump_file)
 960     fprintf (dump_file,
 961              "\nDeciding on inlining.  Starting with %i insns.\n",
 962              initial_insns);
 963
 964   for (node = cgraph_nodes; node; node = node->next)
 965     node->aux = 0;
 966
 967   if (dump_file)
 968     fprintf (dump_file, "\nInlining always_inline functions:\n");
 969
 970   /* In the first pass mark all always_inline edges.  Do this with a priority
 971      so none of our later choices will make this impossible.  */
 972   for (i = nnodes - 1; i >= 0; i--)
 973     {
 974       struct cgraph_edge *e, *next;
 975
 976       node = order[i];
 977
 978       /* Handle nodes to be flattened, but don't update overall unit size.  */
 979       if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) != NULL)
 980         {
 981           int old_overall_insns = overall_insns;
 982           htab_t cycles;
 983           if (dump_file)
 984             fprintf (dump_file,
 985                      "Flattening %s\n", cgraph_node_name (node));
 986           cycles = htab_create (7, htab_hash_pointer, htab_eq_pointer, NULL);
 987           cgraph_find_cycles (node, cycles);
 988           cgraph_flatten_node (node, cycles);
 989           htab_delete (cycles);
 990           overall_insns = old_overall_insns;
 991           /* We don't need to consider always_inline functions inside the flattened
 992              function anymore.  */
 993           continue;
 994         }
 995
 996       if (!node->local.disregard_inline_limits)
 997         continue;
 998       if (dump_file)
 999         fprintf (dump_file,
1000                  "\nConsidering %s %i insns (always inline)\n",
1001                  cgraph_node_name (node), node->global.insns);
1002       old_insns = overall_insns;
1003       for (e = node->callers; e; e = next)
1004         {
1005           next = e->next_caller;
1006           if (!e->inline_failed)
1007             continue;
1008           if (cgraph_recursive_inlining_p (e->caller, e->callee,
1009                                            &e->inline_failed))
1010             continue;
1011           cgraph_mark_inline_edge (e, true);
1012           if (dump_file)
1013             fprintf (dump_file,
1014                      " Inlined into %s which now has %i insns.\n",
1015                      cgraph_node_name (e->caller),
1016                      e->caller->global.insns);
1017         }
1018       if (dump_file)
1019         fprintf (dump_file,
1020                  " Inlined for a net change of %+i insns.\n",
1021                  overall_insns - old_insns);
1022     }
1023
1024   if (!flag_really_no_inline)
1025     cgraph_decide_inlining_of_small_functions ();
1026
1027   if (!flag_really_no_inline
1028       && flag_inline_functions_called_once)
1029     {
1030       if (dump_file)
1031         fprintf (dump_file, "\nDeciding on functions called once:\n");
1032
1033       /* And finally decide what functions are called once.  */
1034
1035       for (i = nnodes - 1; i >= 0; i--)
1036         {
1037           node = order[i];
1038
1039           if (node->callers && !node->callers->next_caller && !node->needed
1040               && node->local.inlinable && node->callers->inline_failed
1041               && !DECL_EXTERNAL (node->decl) && !DECL_COMDAT (node->decl))
1042             {
1043               if (dump_file)
1044                 {
1045                   fprintf (dump_file,
1046                            "\nConsidering %s %i insns.\n",
1047                            cgraph_node_name (node), node->global.insns);
1048                   fprintf (dump_file,
1049                            " Called once from %s %i insns.\n",
1050                            cgraph_node_name (node->callers->caller),
1051                            node->callers->caller->global.insns);
1052                 }
1053
1054               old_insns = overall_insns;
1055
1056               if (cgraph_check_inline_limits (node->callers->caller, node,
1057                                               NULL, false))
1058                 {
1059                   cgraph_mark_inline (node->callers);
1060                   if (dump_file)
1061                     fprintf (dump_file,
1062                              " Inlined into %s which now has %i insns"
1063                              " for a net change of %+i insns.\n",
1064                              cgraph_node_name (node->callers->caller),
1065                              node->callers->caller->global.insns,
1066                              overall_insns - old_insns);
1067                 }
1068               else
1069                 {
1070                   if (dump_file)
1071                     fprintf (dump_file,
1072                              " Inline limit reached, not inlined.\n");
1073                 }
1074             }
1075         }
1076     }
1077
1078   if (dump_file)
1079     fprintf (dump_file,
1080              "\nInlined %i calls, eliminated %i functions, "
1081              "%i insns turned to %i insns.\n\n",
1082              ncalls_inlined, nfunctions_inlined, initial_insns,
1083              overall_insns);
1084   free (order);
1085   return 0;
1086 }
1087
1088 /* Decide on the inlining.  We do so in the topological order to avoid
1089    expenses on updating data structures.  */
1090
1091 bool
1092 cgraph_decide_inlining_incrementally (struct cgraph_node *node, bool early)
1093 {
1094   struct cgraph_edge *e;
1095   bool inlined = false;
1096   const char *failed_reason;
1097
1098   /* First of all look for always inline functions.  */
1099   for (e = node->callees; e; e = e->next_callee)
1100     if (e->callee->local.disregard_inline_limits
1101         && e->inline_failed
1102         && !cgraph_recursive_inlining_p (node, e->callee, &e->inline_failed)
1103         /* ??? It is possible that renaming variable removed the function body
1104            in duplicate_decls. See gcc.c-torture/compile/20011119-2.c  */
1105         && (DECL_SAVED_TREE (e->callee->decl) || e->callee->inline_decl))
1106       {
1107         if (dump_file && early)
1108           {
1109             fprintf (dump_file, "  Early inlining %s",
1110                      cgraph_node_name (e->callee));
1111             fprintf (dump_file, " into %s\n", cgraph_node_name (node));
1112           }
1113         cgraph_mark_inline (e);
1114         inlined = true;
1115       }
1116
1117   /* Now do the automatic inlining.  */
1118   if (!flag_really_no_inline)
1119     for (e = node->callees; e; e = e->next_callee)
1120       if (e->callee->local.inlinable
1121           && e->inline_failed
1122           && !e->callee->local.disregard_inline_limits
1123           && !cgraph_recursive_inlining_p (node, e->callee, &e->inline_failed)
1124           && (!early
1125               || (cgraph_estimate_size_after_inlining (1, e->caller, e->callee)
1126                   <= e->caller->global.insns))
1127           && cgraph_check_inline_limits (node, e->callee, &e->inline_failed,
1128                                          false)
1129           && (DECL_SAVED_TREE (e->callee->decl) || e->callee->inline_decl))
1130         {
1131           if (cgraph_default_inline_p (e->callee, &failed_reason))
1132             {
1133               if (dump_file && early)
1134                 {
1135                   fprintf (dump_file, "  Early inlining %s",
1136                            cgraph_node_name (e->callee));
1137                   fprintf (dump_file, " into %s\n", cgraph_node_name (node));
1138                 }
1139               cgraph_mark_inline (e);
1140               inlined = true;
1141             }
1142           else if (!early)
1143             e->inline_failed = failed_reason;
1144         }
1145   if (early && inlined)
1146     {
1147       timevar_push (TV_INTEGRATION);
1148       push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1149       tree_register_cfg_hooks ();
1150       current_function_decl = node->decl;
1151       optimize_inline_calls (current_function_decl);
1152       node->local.self_insns = node->global.insns;
1153       current_function_decl = NULL;
1154       pop_cfun ();
1155       timevar_pop (TV_INTEGRATION);
1156     }
1157   return inlined;
1158 }
1159
1160 /* When inlining shall be performed.  */
1161 static bool
1162 cgraph_gate_inlining (void)
1163 {
1164   return flag_inline_trees;
1165 }
1166
1167 struct tree_opt_pass pass_ipa_inline =
1168 {
1169   "inline",                             /* name */
1170   cgraph_gate_inlining,                 /* gate */
1171   cgraph_decide_inlining,               /* execute */
1172   NULL,                                 /* sub */
1173   NULL,                                 /* next */
1174   0,                                    /* static_pass_number */
1175   TV_INLINE_HEURISTICS,                 /* tv_id */
1176   0,                                    /* properties_required */
1177   PROP_cfg,                             /* properties_provided */
1178   0,                                    /* properties_destroyed */
1179   0,                                    /* todo_flags_start */
1180   TODO_dump_cgraph | TODO_dump_func
1181   | TODO_remove_functions,              /* todo_flags_finish */
1182   0                                     /* letter */
1183 };
1184
1185 /* Because inlining might remove no-longer reachable nodes, we need to
1186    keep the array visible to garbage collector to avoid reading collected
1187    out nodes.  */
1188 static int nnodes;
1189 static GTY ((length ("nnodes"))) struct cgraph_node **order;
1190
1191 /* Do inlining of small functions.  Doing so early helps profiling and other
1192    passes to be somewhat more effective and avoids some code duplication in
1193    later real inlining pass for testcases with very many function calls.  */
1194 static unsigned int
1195 cgraph_early_inlining (void)
1196 {
1197   struct cgraph_node *node;
1198   int i;
1199
1200   if (sorrycount || errorcount)
1201     return 0;
1202 #ifdef ENABLE_CHECKING
1203   for (node = cgraph_nodes; node; node = node->next)
1204     gcc_assert (!node->aux);
1205 #endif
1206
1207   order = ggc_alloc (sizeof (*order) * cgraph_n_nodes);
1208   nnodes = cgraph_postorder (order);
1209   for (i = nnodes - 1; i >= 0; i--)
1210     {
1211       node = order[i];
1212       if (node->analyzed && (node->needed || node->reachable))
1213         node->local.self_insns = node->global.insns
1214           = estimate_num_insns (node->decl);
1215     }
1216   for (i = nnodes - 1; i >= 0; i--)
1217     {
1218       node = order[i];
1219       if (node->analyzed && node->local.inlinable
1220           && (node->needed || node->reachable)
1221           && node->callers)
1222         {
1223           if (cgraph_decide_inlining_incrementally (node, true))
1224             ggc_collect ();
1225         }
1226     }
1227 #ifdef ENABLE_CHECKING
1228   for (node = cgraph_nodes; node; node = node->next)
1229     gcc_assert (!node->global.inlined_to);
1230 #endif
1231   ggc_free (order);
1232   order = NULL;
1233   nnodes = 0;
1234   return 0;
1235 }
1236
1237 /* When inlining shall be performed.  */
1238 static bool
1239 cgraph_gate_early_inlining (void)
1240 {
1241   return flag_inline_trees && flag_early_inlining;
1242 }
1243
1244 struct tree_opt_pass pass_early_ipa_inline =
1245 {
1246   "einline",                            /* name */
1247   cgraph_gate_early_inlining,           /* gate */
1248   cgraph_early_inlining,                /* execute */
1249   NULL,                                 /* sub */
1250   NULL,                                 /* next */
1251   0,                                    /* static_pass_number */
1252   TV_INLINE_HEURISTICS,                 /* tv_id */
1253   0,                                    /* properties_required */
1254   PROP_cfg,                             /* properties_provided */
1255   0,                                    /* properties_destroyed */
1256   0,                                    /* todo_flags_start */
1257   TODO_dump_cgraph | TODO_dump_func
1258   | TODO_remove_functions,              /* todo_flags_finish */
1259   0                                     /* letter */
1260 };
1261
1262 #include "gt-ipa-inline.h"