1 /* Inlining decision heuristics.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
22 /* Inlining decision heuristics
24 We separate inlining decisions from the inliner itself and store it
25 inside callgraph as so called inline plan. Refer to cgraph.c
26 documentation about particular representation of inline plans in the
29 There are three major parts of this file:
31 cgraph_mark_inline implementation
33 This function allows to mark given call inline and performs necessary
34 modifications of cgraph (production of the clones and updating overall
37 inlining heuristics limits
39 These functions allow to check that particular inlining is allowed
40 by the limits specified by user (allowed function growth, overall unit
45 This is implementation of IPA pass aiming to get as much of benefit
46 from inlining obeying the limits checked above.
48 The implementation of particular heuristics is separated from
49 the rest of code to make it easier to replace it with more complicated
50 implementation in the future. The rest of inlining code acts as a
51 library aimed to modify the callgraph and verify that the parameters
52 on code size growth fits.
54 To mark given call inline, use cgraph_mark_inline function, the
55 verification is performed by cgraph_default_inline_p and
56 cgraph_check_inline_limits.
58 The heuristics implements simple knapsack style algorithm ordering
59 all functions by their "profitability" (estimated by code size growth)
60 and inlining them in priority order.
62 cgraph_decide_inlining implements heuristics taking whole callgraph
63 into account, while cgraph_decide_inlining_incrementally considers
64 only one function at a time and is used in non-unit-at-a-time mode.
66 The inliner itself is split into several passes:
68 pass_inline_parameters
70 This pass computes local properties of functions that are used by inliner:
71 estimated function body size, whether function is inlinable at all and
72 stack frame consumption.
74 Before executing any of inliner passes, this local pass has to be applied
75 to each function in the callgraph (ie run as subpass of some earlier
76 IPA pass). The results are made out of date by any optimization applied
81 Simple local inlining pass inlining callees into current function. This
82 pass makes no global whole compilation unit analysis and this when allowed
83 to do inlining expanding code size it might result in unbounded growth of
86 This is the main inlining pass in non-unit-at-a-time.
88 With unit-at-a-time the pass is run during conversion into SSA form.
89 Only functions already converted into SSA form are inlined, so the
90 conversion must happen in topological order on the callgraph (that is
91 maintained by pass manager). The functions after inlining are early
92 optimized so the early inliner sees unoptimized function itself, but
93 all considered callees are already optimized allowing it to unfold
94 abstraction penalty on C++ effectivly and cheaply.
96 pass_ipa_early_inlining
98 With profiling, the early inlining is also neccesary to reduce
99 instrumentation costs on program with high abstraction penalty (doing
100 many redundant calls). This can't happen in parallel with early
101 optimization and profile instrumentation, because we would end up
102 re-instrumenting already instrumented function bodies we brought in via
105 To avoid this, this pass is executed as IPA pass before profiling. It is
106 simple wrapper to pass_early_inlining and ensures first inlining.
110 This is the main pass implementing simple greedy algorithm to do inlining
111 of small functions that results in overall growth of compilation unit and
112 inlining of functions called once. The pass compute just so called inline
113 plan (representation of inlining to be done in callgraph) and unlike early
114 inlining it is not performing the inlining itself.
118 This pass performs actual inlining according to pass_ipa_inline on given
119 function. Possible the function body before inlining is saved when it is
120 needed for further inlining later.
125 #include "coretypes.h"
128 #include "tree-inline.h"
129 #include "langhooks.h"
132 #include "diagnostic.h"
137 #include "tree-pass.h"
139 #include "coverage.h"
141 #include "tree-flow.h"
143 /* Statistics we collect about inlining algorithm. */
144 static int ncalls_inlined
;
145 static int nfunctions_inlined
;
146 static int initial_insns
;
147 static int overall_insns
;
148 static int max_insns
;
149 static gcov_type max_count
;
151 /* Estimate size of the function after inlining WHAT into TO. */
154 cgraph_estimate_size_after_inlining (int times
, struct cgraph_node
*to
,
155 struct cgraph_node
*what
)
158 tree fndecl
= what
->decl
, arg
;
159 int call_insns
= PARAM_VALUE (PARAM_INLINE_CALL_COST
);
161 for (arg
= DECL_ARGUMENTS (fndecl
); arg
; arg
= TREE_CHAIN (arg
))
162 call_insns
+= estimate_move_cost (TREE_TYPE (arg
));
163 size
= (what
->global
.insns
- call_insns
) * times
+ to
->global
.insns
;
164 gcc_assert (size
>= 0);
168 /* E is expected to be an edge being inlined. Clone destination node of
169 the edge and redirect it to the new clone.
170 DUPLICATE is used for bookkeeping on whether we are actually creating new
171 clones or re-using node originally representing out-of-line function call.
174 cgraph_clone_inlined_nodes (struct cgraph_edge
*e
, bool duplicate
, bool update_original
)
179 /* We may eliminate the need for out-of-line copy to be output.
180 In that case just go ahead and re-use it. */
181 if (!e
->callee
->callers
->next_caller
182 && !e
->callee
->needed
183 && flag_unit_at_a_time
)
185 gcc_assert (!e
->callee
->global
.inlined_to
);
186 if (DECL_SAVED_TREE (e
->callee
->decl
))
187 overall_insns
-= e
->callee
->global
.insns
, nfunctions_inlined
++;
192 struct cgraph_node
*n
;
193 n
= cgraph_clone_node (e
->callee
, e
->count
, e
->loop_nest
,
195 cgraph_redirect_edge_callee (e
, n
);
199 if (e
->caller
->global
.inlined_to
)
200 e
->callee
->global
.inlined_to
= e
->caller
->global
.inlined_to
;
202 e
->callee
->global
.inlined_to
= e
->caller
;
203 e
->callee
->global
.stack_frame_offset
204 = e
->caller
->global
.stack_frame_offset
+ e
->caller
->local
.estimated_self_stack_size
;
205 peak
= e
->callee
->global
.stack_frame_offset
+ e
->callee
->local
.estimated_self_stack_size
;
206 if (e
->callee
->global
.inlined_to
->global
.estimated_stack_size
< peak
)
207 e
->callee
->global
.inlined_to
->global
.estimated_stack_size
= peak
;
209 /* Recursively clone all bodies. */
210 for (e
= e
->callee
->callees
; e
; e
= e
->next_callee
)
211 if (!e
->inline_failed
)
212 cgraph_clone_inlined_nodes (e
, duplicate
, update_original
);
215 /* Mark edge E as inlined and update callgraph accordingly.
216 UPDATE_ORIGINAL specify whether profile of original function should be
220 cgraph_mark_inline_edge (struct cgraph_edge
*e
, bool update_original
)
222 int old_insns
= 0, new_insns
= 0;
223 struct cgraph_node
*to
= NULL
, *what
;
225 if (e
->callee
->inline_decl
)
226 cgraph_redirect_edge_callee (e
, cgraph_node (e
->callee
->inline_decl
));
228 gcc_assert (e
->inline_failed
);
229 e
->inline_failed
= NULL
;
231 if (!e
->callee
->global
.inlined
&& flag_unit_at_a_time
)
232 DECL_POSSIBLY_INLINED (e
->callee
->decl
) = true;
233 e
->callee
->global
.inlined
= true;
235 cgraph_clone_inlined_nodes (e
, true, update_original
);
239 /* Now update size of caller and all functions caller is inlined into. */
240 for (;e
&& !e
->inline_failed
; e
= e
->caller
->callers
)
242 old_insns
= e
->caller
->global
.insns
;
243 new_insns
= cgraph_estimate_size_after_inlining (1, e
->caller
,
245 gcc_assert (new_insns
>= 0);
247 to
->global
.insns
= new_insns
;
249 gcc_assert (what
->global
.inlined_to
== to
);
250 if (new_insns
> old_insns
)
251 overall_insns
+= new_insns
- old_insns
;
255 /* Mark all calls of EDGE->CALLEE inlined into EDGE->CALLER.
256 Return following unredirected edge in the list of callers
259 static struct cgraph_edge
*
260 cgraph_mark_inline (struct cgraph_edge
*edge
)
262 struct cgraph_node
*to
= edge
->caller
;
263 struct cgraph_node
*what
= edge
->callee
;
264 struct cgraph_edge
*e
, *next
;
267 /* Look for all calls, mark them inline and clone recursively
268 all inlined functions. */
269 for (e
= what
->callers
; e
; e
= next
)
271 next
= e
->next_caller
;
272 if (e
->caller
== to
&& e
->inline_failed
)
274 cgraph_mark_inline_edge (e
, true);
284 /* Estimate the growth caused by inlining NODE into all callees. */
287 cgraph_estimate_growth (struct cgraph_node
*node
)
290 struct cgraph_edge
*e
;
291 if (node
->global
.estimated_growth
!= INT_MIN
)
292 return node
->global
.estimated_growth
;
294 for (e
= node
->callers
; e
; e
= e
->next_caller
)
295 if (e
->inline_failed
)
296 growth
+= (cgraph_estimate_size_after_inlining (1, e
->caller
, node
)
297 - e
->caller
->global
.insns
);
299 /* ??? Wrong for self recursive functions or cases where we decide to not
300 inline for different reasons, but it is not big deal as in that case
301 we will keep the body around, but we will also avoid some inlining. */
302 if (!node
->needed
&& !DECL_EXTERNAL (node
->decl
))
303 growth
-= node
->global
.insns
;
305 node
->global
.estimated_growth
= growth
;
309 /* Return false when inlining WHAT into TO is not good idea
310 as it would cause too large growth of function bodies.
311 When ONE_ONLY is true, assume that only one call site is going
312 to be inlined, otherwise figure out how many call sites in
313 TO calls WHAT and verify that all can be inlined.
317 cgraph_check_inline_limits (struct cgraph_node
*to
, struct cgraph_node
*what
,
318 const char **reason
, bool one_only
)
321 struct cgraph_edge
*e
;
324 HOST_WIDE_INT stack_size_limit
, inlined_stack
;
329 for (e
= to
->callees
; e
; e
= e
->next_callee
)
330 if (e
->callee
== what
)
333 if (to
->global
.inlined_to
)
334 to
= to
->global
.inlined_to
;
336 /* When inlining large function body called once into small function,
337 take the inlined function as base for limiting the growth. */
338 if (to
->local
.self_insns
> what
->local
.self_insns
)
339 limit
= to
->local
.self_insns
;
341 limit
= what
->local
.self_insns
;
343 limit
+= limit
* PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH
) / 100;
345 /* Check the size after inlining against the function limits. But allow
346 the function to shrink if it went over the limits by forced inlining. */
347 newsize
= cgraph_estimate_size_after_inlining (times
, to
, what
);
348 if (newsize
>= to
->global
.insns
349 && newsize
> PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS
)
353 *reason
= N_("--param large-function-growth limit reached");
357 stack_size_limit
= to
->local
.estimated_self_stack_size
;
359 stack_size_limit
+= stack_size_limit
* PARAM_VALUE (PARAM_STACK_FRAME_GROWTH
) / 100;
361 inlined_stack
= (to
->global
.stack_frame_offset
362 + to
->local
.estimated_self_stack_size
363 + what
->global
.estimated_stack_size
);
364 if (inlined_stack
> stack_size_limit
365 && inlined_stack
> PARAM_VALUE (PARAM_LARGE_STACK_FRAME
))
368 *reason
= N_("--param large-stack-frame-growth limit reached");
374 /* Return true when function N is small enough to be inlined. */
377 cgraph_default_inline_p (struct cgraph_node
*n
, const char **reason
)
382 decl
= n
->inline_decl
;
383 if (!DECL_INLINE (decl
))
386 *reason
= N_("function not inlinable");
390 if (!DECL_STRUCT_FUNCTION (decl
)->cfg
)
393 *reason
= N_("function body not available");
397 if (DECL_DECLARED_INLINE_P (decl
))
399 if (n
->global
.insns
>= MAX_INLINE_INSNS_SINGLE
)
402 *reason
= N_("--param max-inline-insns-single limit reached");
408 if (n
->global
.insns
>= MAX_INLINE_INSNS_AUTO
)
411 *reason
= N_("--param max-inline-insns-auto limit reached");
419 /* Return true when inlining WHAT would create recursive inlining.
420 We call recursive inlining all cases where same function appears more than
421 once in the single recursion nest path in the inline graph. */
424 cgraph_recursive_inlining_p (struct cgraph_node
*to
,
425 struct cgraph_node
*what
,
429 if (to
->global
.inlined_to
)
430 recursive
= what
->decl
== to
->global
.inlined_to
->decl
;
432 recursive
= what
->decl
== to
->decl
;
433 /* Marking recursive function inline has sane semantic and thus we should
435 if (recursive
&& reason
)
436 *reason
= (what
->local
.disregard_inline_limits
437 ? N_("recursive inlining") : "");
441 /* Return true if the call can be hot. */
443 cgraph_maybe_hot_edge_p (struct cgraph_edge
*edge
)
445 if (profile_info
&& flag_branch_probabilities
447 <= profile_info
->sum_max
/ PARAM_VALUE (HOT_BB_COUNT_FRACTION
)))
452 /* A cost model driving the inlining heuristics in a way so the edges with
453 smallest badness are inlined first. After each inlining is performed
454 the costs of all caller edges of nodes affected are recomputed so the
455 metrics may accurately depend on values such as number of inlinable callers
456 of the function or function body size.
458 With profiling we use number of executions of each edge to drive the cost.
459 We also should distinguish hot and cold calls where the cold calls are
460 inlined into only when code size is overall improved.
464 cgraph_edge_badness (struct cgraph_edge
*edge
)
469 cgraph_estimate_size_after_inlining (1, edge
->caller
, edge
->callee
);
470 growth
-= edge
->caller
->global
.insns
;
472 /* Always prefer inlining saving code size. */
474 return INT_MIN
- growth
;
475 return ((int)((double)edge
->count
* INT_MIN
/ max_count
)) / growth
;
479 int nest
= MIN (edge
->loop_nest
, 8);
480 int badness
= cgraph_estimate_growth (edge
->callee
) * 256;
482 /* Decrease badness if call is nested. */
488 /* Make recursive inlining happen always after other inlining is done. */
489 if (cgraph_recursive_inlining_p (edge
->caller
, edge
->callee
, NULL
))
496 /* Recompute heap nodes for each of caller edge. */
499 update_caller_keys (fibheap_t heap
, struct cgraph_node
*node
,
500 bitmap updated_nodes
)
502 struct cgraph_edge
*edge
;
503 const char *failed_reason
;
505 if (!node
->local
.inlinable
|| node
->local
.disregard_inline_limits
506 || node
->global
.inlined_to
)
508 if (bitmap_bit_p (updated_nodes
, node
->uid
))
510 bitmap_set_bit (updated_nodes
, node
->uid
);
511 node
->global
.estimated_growth
= INT_MIN
;
513 if (!node
->local
.inlinable
)
515 /* Prune out edges we won't inline into anymore. */
516 if (!cgraph_default_inline_p (node
, &failed_reason
))
518 for (edge
= node
->callers
; edge
; edge
= edge
->next_caller
)
521 fibheap_delete_node (heap
, edge
->aux
);
523 if (edge
->inline_failed
)
524 edge
->inline_failed
= failed_reason
;
529 for (edge
= node
->callers
; edge
; edge
= edge
->next_caller
)
530 if (edge
->inline_failed
)
532 int badness
= cgraph_edge_badness (edge
);
535 fibnode_t n
= edge
->aux
;
536 gcc_assert (n
->data
== edge
);
537 if (n
->key
== badness
)
540 /* fibheap_replace_key only increase the keys. */
541 if (fibheap_replace_key (heap
, n
, badness
))
543 fibheap_delete_node (heap
, edge
->aux
);
545 edge
->aux
= fibheap_insert (heap
, badness
, edge
);
549 /* Recompute heap nodes for each of caller edges of each of callees. */
552 update_callee_keys (fibheap_t heap
, struct cgraph_node
*node
,
553 bitmap updated_nodes
)
555 struct cgraph_edge
*e
;
556 node
->global
.estimated_growth
= INT_MIN
;
558 for (e
= node
->callees
; e
; e
= e
->next_callee
)
559 if (e
->inline_failed
)
560 update_caller_keys (heap
, e
->callee
, updated_nodes
);
561 else if (!e
->inline_failed
)
562 update_callee_keys (heap
, e
->callee
, updated_nodes
);
565 /* Enqueue all recursive calls from NODE into priority queue depending on
566 how likely we want to recursively inline the call. */
569 lookup_recursive_calls (struct cgraph_node
*node
, struct cgraph_node
*where
,
573 struct cgraph_edge
*e
;
574 for (e
= where
->callees
; e
; e
= e
->next_callee
)
575 if (e
->callee
== node
)
577 /* When profile feedback is available, prioritize by expected number
578 of calls. Without profile feedback we maintain simple queue
579 to order candidates via recursive depths. */
580 fibheap_insert (heap
,
581 !max_count
? priority
++
582 : -(e
->count
/ ((max_count
+ (1<<24) - 1) / (1<<24))),
585 for (e
= where
->callees
; e
; e
= e
->next_callee
)
586 if (!e
->inline_failed
)
587 lookup_recursive_calls (node
, e
->callee
, heap
);
590 /* Find callgraph nodes closing a circle in the graph. The
591 resulting hashtab can be used to avoid walking the circles.
592 Uses the cgraph nodes ->aux field which needs to be zero
593 before and will be zero after operation. */
596 cgraph_find_cycles (struct cgraph_node
*node
, htab_t cycles
)
598 struct cgraph_edge
*e
;
603 slot
= htab_find_slot (cycles
, node
, INSERT
);
607 fprintf (dump_file
, "Cycle contains %s\n", cgraph_node_name (node
));
614 for (e
= node
->callees
; e
; e
= e
->next_callee
)
615 cgraph_find_cycles (e
->callee
, cycles
);
619 /* Flatten the cgraph node. We have to be careful in recursing
620 as to not run endlessly in circles of the callgraph.
621 We do so by using a hashtab of cycle entering nodes as generated
622 by cgraph_find_cycles. */
625 cgraph_flatten_node (struct cgraph_node
*node
, htab_t cycles
)
627 struct cgraph_edge
*e
;
629 for (e
= node
->callees
; e
; e
= e
->next_callee
)
631 /* Inline call, if possible, and recurse. Be sure we are not
632 entering callgraph circles here. */
634 && e
->callee
->local
.inlinable
635 && !cgraph_recursive_inlining_p (node
, e
->callee
,
637 && !htab_find (cycles
, e
->callee
))
640 fprintf (dump_file
, " inlining %s", cgraph_node_name (e
->callee
));
641 cgraph_mark_inline_edge (e
, true);
642 cgraph_flatten_node (e
->callee
, cycles
);
645 fprintf (dump_file
, " !inlining %s", cgraph_node_name (e
->callee
));
649 /* Decide on recursive inlining: in the case function has recursive calls,
650 inline until body size reaches given argument. */
653 cgraph_decide_recursive_inlining (struct cgraph_node
*node
)
655 int limit
= PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO
);
656 int max_depth
= PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO
);
657 int probability
= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY
);
659 struct cgraph_edge
*e
;
660 struct cgraph_node
*master_clone
, *next
;
664 if (DECL_DECLARED_INLINE_P (node
->decl
))
666 limit
= PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE
);
667 max_depth
= PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH
);
670 /* Make sure that function is small enough to be considered for inlining. */
672 || cgraph_estimate_size_after_inlining (1, node
, node
) >= limit
)
674 heap
= fibheap_new ();
675 lookup_recursive_calls (node
, node
, heap
);
676 if (fibheap_empty (heap
))
678 fibheap_delete (heap
);
684 " Performing recursive inlining on %s\n",
685 cgraph_node_name (node
));
687 /* We need original clone to copy around. */
688 master_clone
= cgraph_clone_node (node
, node
->count
, 1, false);
689 master_clone
->needed
= true;
690 for (e
= master_clone
->callees
; e
; e
= e
->next_callee
)
691 if (!e
->inline_failed
)
692 cgraph_clone_inlined_nodes (e
, true, false);
694 /* Do the inlining and update list of recursive call during process. */
695 while (!fibheap_empty (heap
)
696 && (cgraph_estimate_size_after_inlining (1, node
, master_clone
)
699 struct cgraph_edge
*curr
= fibheap_extract_min (heap
);
700 struct cgraph_node
*cnode
;
703 for (cnode
= curr
->caller
;
704 cnode
->global
.inlined_to
; cnode
= cnode
->callers
->caller
)
705 if (node
->decl
== curr
->callee
->decl
)
707 if (depth
> max_depth
)
711 " maxmal depth reached\n");
717 if (!cgraph_maybe_hot_edge_p (curr
))
720 fprintf (dump_file
, " Not inlining cold call\n");
723 if (curr
->count
* 100 / node
->count
< probability
)
727 " Probability of edge is too small\n");
735 " Inlining call of depth %i", depth
);
738 fprintf (dump_file
, " called approx. %.2f times per call",
739 (double)curr
->count
/ node
->count
);
741 fprintf (dump_file
, "\n");
743 cgraph_redirect_edge_callee (curr
, master_clone
);
744 cgraph_mark_inline_edge (curr
, false);
745 lookup_recursive_calls (node
, curr
->callee
, heap
);
748 if (!fibheap_empty (heap
) && dump_file
)
749 fprintf (dump_file
, " Recursive inlining growth limit met.\n");
751 fibheap_delete (heap
);
754 "\n Inlined %i times, body grown from %i to %i insns\n", n
,
755 master_clone
->global
.insns
, node
->global
.insns
);
757 /* Remove master clone we used for inlining. We rely that clones inlined
758 into master clone gets queued just before master clone so we don't
760 for (node
= cgraph_nodes
; node
!= master_clone
;
764 if (node
->global
.inlined_to
== master_clone
)
765 cgraph_remove_node (node
);
767 cgraph_remove_node (master_clone
);
768 /* FIXME: Recursive inlining actually reduces number of calls of the
769 function. At this place we should probably walk the function and
770 inline clones and compensate the counts accordingly. This probably
771 doesn't matter much in practice. */
775 /* Set inline_failed for all callers of given function to REASON. */
778 cgraph_set_inline_failed (struct cgraph_node
*node
, const char *reason
)
780 struct cgraph_edge
*e
;
783 fprintf (dump_file
, "Inlining failed: %s\n", reason
);
784 for (e
= node
->callers
; e
; e
= e
->next_caller
)
785 if (e
->inline_failed
)
786 e
->inline_failed
= reason
;
789 /* We use greedy algorithm for inlining of small functions:
790 All inline candidates are put into prioritized heap based on estimated
791 growth of the overall number of instructions and then update the estimates.
793 INLINED and INLINED_CALEES are just pointers to arrays large enough
794 to be passed to cgraph_inlined_into and cgraph_inlined_callees. */
797 cgraph_decide_inlining_of_small_functions (void)
799 struct cgraph_node
*node
;
800 struct cgraph_edge
*edge
;
801 const char *failed_reason
;
802 fibheap_t heap
= fibheap_new ();
803 bitmap updated_nodes
= BITMAP_ALLOC (NULL
);
806 fprintf (dump_file
, "\nDeciding on smaller functions:\n");
808 /* Put all inline candidates into the heap. */
810 for (node
= cgraph_nodes
; node
; node
= node
->next
)
812 if (!node
->local
.inlinable
|| !node
->callers
813 || node
->local
.disregard_inline_limits
)
816 fprintf (dump_file
, "Considering inline candidate %s.\n", cgraph_node_name (node
));
818 node
->global
.estimated_growth
= INT_MIN
;
819 if (!cgraph_default_inline_p (node
, &failed_reason
))
821 cgraph_set_inline_failed (node
, failed_reason
);
825 for (edge
= node
->callers
; edge
; edge
= edge
->next_caller
)
826 if (edge
->inline_failed
)
828 gcc_assert (!edge
->aux
);
829 edge
->aux
= fibheap_insert (heap
, cgraph_edge_badness (edge
), edge
);
832 while (overall_insns
<= max_insns
&& (edge
= fibheap_extract_min (heap
)))
834 int old_insns
= overall_insns
;
835 struct cgraph_node
*where
;
837 cgraph_estimate_size_after_inlining (1, edge
->caller
, edge
->callee
);
839 growth
-= edge
->caller
->global
.insns
;
844 "\nConsidering %s with %i insns\n",
845 cgraph_node_name (edge
->callee
),
846 edge
->callee
->global
.insns
);
848 " to be inlined into %s\n"
849 " Estimated growth after inlined into all callees is %+i insns.\n"
850 " Estimated badness is %i.\n",
851 cgraph_node_name (edge
->caller
),
852 cgraph_estimate_growth (edge
->callee
),
853 cgraph_edge_badness (edge
));
855 fprintf (dump_file
," Called "HOST_WIDEST_INT_PRINT_DEC
"x\n", edge
->count
);
857 gcc_assert (edge
->aux
);
859 if (!edge
->inline_failed
)
862 /* When not having profile info ready we don't weight by any way the
863 position of call in procedure itself. This means if call of
864 function A from function B seems profitable to inline, the recursive
865 call of function A in inline copy of A in B will look profitable too
866 and we end up inlining until reaching maximal function growth. This
867 is not good idea so prohibit the recursive inlining.
869 ??? When the frequencies are taken into account we might not need this
873 where
= edge
->caller
;
874 while (where
->global
.inlined_to
)
876 if (where
->decl
== edge
->callee
->decl
)
878 where
= where
->callers
->caller
;
880 if (where
->global
.inlined_to
)
883 = (edge
->callee
->local
.disregard_inline_limits
? N_("recursive inlining") : "");
885 fprintf (dump_file
, " inline_failed:Recursive inlining performed only for function itself.\n");
890 if (!cgraph_maybe_hot_edge_p (edge
) && growth
> 0)
892 if (!cgraph_recursive_inlining_p (edge
->caller
, edge
->callee
,
893 &edge
->inline_failed
))
895 edge
->inline_failed
=
896 N_("call is unlikely");
898 fprintf (dump_file
, " inline_failed:%s.\n", edge
->inline_failed
);
902 if (!cgraph_default_inline_p (edge
->callee
, &edge
->inline_failed
))
904 if (!cgraph_recursive_inlining_p (edge
->caller
, edge
->callee
,
905 &edge
->inline_failed
))
908 fprintf (dump_file
, " inline_failed:%s.\n", edge
->inline_failed
);
912 if (cgraph_recursive_inlining_p (edge
->caller
, edge
->callee
,
913 &edge
->inline_failed
))
915 where
= edge
->caller
;
916 if (where
->global
.inlined_to
)
917 where
= where
->global
.inlined_to
;
918 if (!cgraph_decide_recursive_inlining (where
))
920 update_callee_keys (heap
, where
, updated_nodes
);
924 struct cgraph_node
*callee
;
925 if (!cgraph_check_inline_limits (edge
->caller
, edge
->callee
,
926 &edge
->inline_failed
, true))
929 fprintf (dump_file
, " Not inlining into %s:%s.\n",
930 cgraph_node_name (edge
->caller
), edge
->inline_failed
);
933 callee
= edge
->callee
;
934 cgraph_mark_inline_edge (edge
, true);
935 update_callee_keys (heap
, callee
, updated_nodes
);
937 where
= edge
->caller
;
938 if (where
->global
.inlined_to
)
939 where
= where
->global
.inlined_to
;
941 /* Our profitability metric can depend on local properties
942 such as number of inlinable calls and size of the function body.
943 After inlining these properties might change for the function we
944 inlined into (since it's body size changed) and for the functions
945 called by function we inlined (since number of it inlinable callers
947 update_caller_keys (heap
, where
, updated_nodes
);
948 bitmap_clear (updated_nodes
);
953 " Inlined into %s which now has %i insns,"
954 "net change of %+i insns.\n",
955 cgraph_node_name (edge
->caller
),
956 edge
->caller
->global
.insns
,
957 overall_insns
- old_insns
);
960 while ((edge
= fibheap_extract_min (heap
)) != NULL
)
962 gcc_assert (edge
->aux
);
964 if (!edge
->callee
->local
.disregard_inline_limits
&& edge
->inline_failed
965 && !cgraph_recursive_inlining_p (edge
->caller
, edge
->callee
,
966 &edge
->inline_failed
))
967 edge
->inline_failed
= N_("--param inline-unit-growth limit reached");
969 fibheap_delete (heap
);
970 BITMAP_FREE (updated_nodes
);
973 /* Decide on the inlining. We do so in the topological order to avoid
974 expenses on updating data structures. */
977 cgraph_decide_inlining (void)
979 struct cgraph_node
*node
;
981 struct cgraph_node
**order
=
982 XCNEWVEC (struct cgraph_node
*, cgraph_n_nodes
);
987 for (node
= cgraph_nodes
; node
; node
= node
->next
)
988 if (node
->analyzed
&& (node
->needed
|| node
->reachable
))
990 struct cgraph_edge
*e
;
992 initial_insns
+= node
->local
.self_insns
;
993 gcc_assert (node
->local
.self_insns
== node
->global
.insns
);
994 for (e
= node
->callees
; e
; e
= e
->next_callee
)
995 if (max_count
< e
->count
)
996 max_count
= e
->count
;
998 overall_insns
= initial_insns
;
999 gcc_assert (!max_count
|| (profile_info
&& flag_branch_probabilities
));
1001 max_insns
= overall_insns
;
1002 if (max_insns
< PARAM_VALUE (PARAM_LARGE_UNIT_INSNS
))
1003 max_insns
= PARAM_VALUE (PARAM_LARGE_UNIT_INSNS
);
1005 max_insns
= ((HOST_WIDEST_INT
) max_insns
1006 * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH
)) / 100);
1008 nnodes
= cgraph_postorder (order
);
1012 "\nDeciding on inlining. Starting with %i insns.\n",
1015 for (node
= cgraph_nodes
; node
; node
= node
->next
)
1019 fprintf (dump_file
, "\nInlining always_inline functions:\n");
1021 /* In the first pass mark all always_inline edges. Do this with a priority
1022 so none of our later choices will make this impossible. */
1023 for (i
= nnodes
- 1; i
>= 0; i
--)
1025 struct cgraph_edge
*e
, *next
;
1029 /* Handle nodes to be flattened, but don't update overall unit size. */
1030 if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node
->decl
)) != NULL
)
1032 int old_overall_insns
= overall_insns
;
1036 "Flattening %s\n", cgraph_node_name (node
));
1037 cycles
= htab_create (7, htab_hash_pointer
, htab_eq_pointer
, NULL
);
1038 cgraph_find_cycles (node
, cycles
);
1039 cgraph_flatten_node (node
, cycles
);
1040 htab_delete (cycles
);
1041 overall_insns
= old_overall_insns
;
1042 /* We don't need to consider always_inline functions inside the flattened
1043 function anymore. */
1047 if (!node
->local
.disregard_inline_limits
)
1051 "\nConsidering %s %i insns (always inline)\n",
1052 cgraph_node_name (node
), node
->global
.insns
);
1053 old_insns
= overall_insns
;
1054 for (e
= node
->callers
; e
; e
= next
)
1056 next
= e
->next_caller
;
1057 if (!e
->inline_failed
)
1059 if (cgraph_recursive_inlining_p (e
->caller
, e
->callee
,
1062 cgraph_mark_inline_edge (e
, true);
1065 " Inlined into %s which now has %i insns.\n",
1066 cgraph_node_name (e
->caller
),
1067 e
->caller
->global
.insns
);
1071 " Inlined for a net change of %+i insns.\n",
1072 overall_insns
- old_insns
);
1075 if (!flag_really_no_inline
)
1076 cgraph_decide_inlining_of_small_functions ();
1078 if (!flag_really_no_inline
1079 && flag_inline_functions_called_once
)
1082 fprintf (dump_file
, "\nDeciding on functions called once:\n");
1084 /* And finally decide what functions are called once. */
1086 for (i
= nnodes
- 1; i
>= 0; i
--)
1090 if (node
->callers
&& !node
->callers
->next_caller
&& !node
->needed
1091 && node
->local
.inlinable
&& node
->callers
->inline_failed
1092 && !DECL_EXTERNAL (node
->decl
) && !DECL_COMDAT (node
->decl
))
1097 "\nConsidering %s %i insns.\n",
1098 cgraph_node_name (node
), node
->global
.insns
);
1100 " Called once from %s %i insns.\n",
1101 cgraph_node_name (node
->callers
->caller
),
1102 node
->callers
->caller
->global
.insns
);
1105 old_insns
= overall_insns
;
1107 if (cgraph_check_inline_limits (node
->callers
->caller
, node
,
1110 cgraph_mark_inline (node
->callers
);
1113 " Inlined into %s which now has %i insns"
1114 " for a net change of %+i insns.\n",
1115 cgraph_node_name (node
->callers
->caller
),
1116 node
->callers
->caller
->global
.insns
,
1117 overall_insns
- old_insns
);
1123 " Inline limit reached, not inlined.\n");
1131 "\nInlined %i calls, eliminated %i functions, "
1132 "%i insns turned to %i insns.\n\n",
1133 ncalls_inlined
, nfunctions_inlined
, initial_insns
,
1139 /* Decide on the inlining. We do so in the topological order to avoid
1140 expenses on updating data structures. */
1143 cgraph_decide_inlining_incrementally (struct cgraph_node
*node
, bool early
)
1145 struct cgraph_edge
*e
;
1146 bool inlined
= false;
1147 const char *failed_reason
;
1148 unsigned int todo
= 0;
1150 #ifdef ENABLE_CHECKING
1151 verify_cgraph_node (node
);
1154 /* First of all look for always inline functions. */
1155 for (e
= node
->callees
; e
; e
= e
->next_callee
)
1156 if (e
->callee
->local
.disregard_inline_limits
1158 && (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node
->decl
))
1159 == gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e
->callee
->decl
)))
1160 && !cgraph_recursive_inlining_p (node
, e
->callee
, &e
->inline_failed
)
1161 /* ??? It is possible that renaming variable removed the function body
1162 in duplicate_decls. See gcc.c-torture/compile/20011119-2.c */
1163 && (DECL_SAVED_TREE (e
->callee
->decl
) || e
->callee
->inline_decl
))
1165 if (dump_file
&& early
)
1167 fprintf (dump_file
, " Early inlining %s",
1168 cgraph_node_name (e
->callee
));
1169 fprintf (dump_file
, " into %s\n", cgraph_node_name (node
));
1171 cgraph_mark_inline (e
);
1172 /* In order to fully inline alway_inline functions at -O0, we need to
1173 recurse here, since the inlined functions might not be processed by
1174 incremental inlining at all yet. */
1176 if (!flag_unit_at_a_time
)
1177 cgraph_decide_inlining_incrementally (e
->callee
, early
);
1182 /* Now do the automatic inlining. */
1183 if (!flag_really_no_inline
)
1184 for (e
= node
->callees
; e
; e
= e
->next_callee
)
1185 if (e
->callee
->local
.inlinable
1187 && !e
->callee
->local
.disregard_inline_limits
1188 && !cgraph_recursive_inlining_p (node
, e
->callee
, &e
->inline_failed
)
1189 && (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node
->decl
))
1190 == gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e
->callee
->decl
)))
1192 || (cgraph_estimate_size_after_inlining (1, e
->caller
, e
->callee
)
1193 <= e
->caller
->global
.insns
))
1194 && cgraph_check_inline_limits (node
, e
->callee
, &e
->inline_failed
,
1196 && (DECL_SAVED_TREE (e
->callee
->decl
) || e
->callee
->inline_decl
))
1198 if (cgraph_default_inline_p (e
->callee
, &failed_reason
))
1200 if (dump_file
&& early
)
1202 fprintf (dump_file
, " Early inlining %s",
1203 cgraph_node_name (e
->callee
));
1204 fprintf (dump_file
, " into %s\n", cgraph_node_name (node
));
1206 cgraph_mark_inline (e
);
1210 e
->inline_failed
= failed_reason
;
1212 if (early
&& inlined
&& !node
->global
.inlined_to
)
1214 timevar_push (TV_INTEGRATION
);
1215 todo
= optimize_inline_calls (current_function_decl
);
1216 timevar_pop (TV_INTEGRATION
);
1221 /* When inlining shall be performed. */
1223 cgraph_gate_inlining (void)
1225 return flag_inline_trees
;
1228 struct tree_opt_pass pass_ipa_inline
=
1230 "inline", /* name */
1231 cgraph_gate_inlining
, /* gate */
1232 cgraph_decide_inlining
, /* execute */
1235 0, /* static_pass_number */
1236 TV_INLINE_HEURISTICS
, /* tv_id */
1237 0, /* properties_required */
1238 PROP_cfg
, /* properties_provided */
1239 0, /* properties_destroyed */
1240 TODO_remove_functions
, /* todo_flags_finish */
1241 TODO_dump_cgraph
| TODO_dump_func
1242 | TODO_remove_functions
, /* todo_flags_finish */
1246 /* Because inlining might remove no-longer reachable nodes, we need to
1247 keep the array visible to garbage collector to avoid reading collected
1250 static GTY ((length ("nnodes"))) struct cgraph_node
**order
;
1252 /* Do inlining of small functions. Doing so early helps profiling and other
1253 passes to be somewhat more effective and avoids some code duplication in
1254 later real inlining pass for testcases with very many function calls. */
1256 cgraph_early_inlining (void)
1258 struct cgraph_node
*node
= cgraph_node (current_function_decl
);
1260 if (sorrycount
|| errorcount
)
1262 return cgraph_decide_inlining_incrementally (node
, flag_unit_at_a_time
);
1265 /* When inlining shall be performed. */
1267 cgraph_gate_early_inlining (void)
1269 return flag_inline_trees
&& flag_early_inlining
;
1272 struct tree_opt_pass pass_early_inline
=
1274 "einline", /* name */
1275 cgraph_gate_early_inlining
, /* gate */
1276 cgraph_early_inlining
, /* execute */
1279 0, /* static_pass_number */
1280 TV_INLINE_HEURISTICS
, /* tv_id */
1281 0, /* properties_required */
1282 PROP_cfg
, /* properties_provided */
1283 0, /* properties_destroyed */
1284 0, /* todo_flags_start */
1285 TODO_dump_func
, /* todo_flags_finish */
1289 /* When inlining shall be performed. */
1291 cgraph_gate_ipa_early_inlining (void)
1293 return (flag_inline_trees
&& flag_early_inlining
1294 && (flag_branch_probabilities
|| flag_test_coverage
1295 || profile_arc_flag
));
1298 /* IPA pass wrapper for early inlining pass. We need to run early inlining
1299 before tree profiling so we have stand alone IPA pass for doing so. */
1300 struct tree_opt_pass pass_ipa_early_inline
=
1302 "einline_ipa", /* name */
1303 cgraph_gate_ipa_early_inlining
, /* gate */
1307 0, /* static_pass_number */
1308 TV_INLINE_HEURISTICS
, /* tv_id */
1309 0, /* properties_required */
1310 PROP_cfg
, /* properties_provided */
1311 0, /* properties_destroyed */
1312 0, /* todo_flags_start */
1313 TODO_dump_cgraph
, /* todo_flags_finish */
1317 /* Compute parameters of functions used by inliner. */
1319 compute_inline_parameters (void)
1321 struct cgraph_node
*node
= cgraph_node (current_function_decl
);
1323 gcc_assert (!node
->global
.inlined_to
);
1324 node
->local
.estimated_self_stack_size
= estimated_stack_frame_size ();
1325 node
->global
.estimated_stack_size
= node
->local
.estimated_self_stack_size
;
1326 node
->global
.stack_frame_offset
= 0;
1327 node
->local
.inlinable
= tree_inlinable_function_p (current_function_decl
);
1328 node
->local
.self_insns
= estimate_num_insns (current_function_decl
);
1329 if (node
->local
.inlinable
)
1330 node
->local
.disregard_inline_limits
1331 = lang_hooks
.tree_inlining
.disregard_inline_limits (current_function_decl
);
1332 if (flag_really_no_inline
&& !node
->local
.disregard_inline_limits
)
1333 node
->local
.inlinable
= 0;
1334 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
1335 node
->global
.insns
= node
->local
.self_insns
;
1339 /* When inlining shall be performed. */
1341 gate_inline_passes (void)
1343 return flag_inline_trees
;
1346 struct tree_opt_pass pass_inline_parameters
=
1349 gate_inline_passes
, /* gate */
1350 compute_inline_parameters
, /* execute */
1353 0, /* static_pass_number */
1354 TV_INLINE_HEURISTICS
, /* tv_id */
1355 0, /* properties_required */
1356 PROP_cfg
, /* properties_provided */
1357 0, /* properties_destroyed */
1358 0, /* todo_flags_start */
1359 0, /* todo_flags_finish */
1363 /* Apply inline plan to the function. */
1367 unsigned int todo
= 0;
1368 struct cgraph_edge
*e
;
1369 struct cgraph_node
*node
= cgraph_node (current_function_decl
);
1371 /* Even when not optimizing, ensure that always_inline functions get inlined.
1374 cgraph_decide_inlining_incrementally (node
, false);
1376 /* We might need the body of this function so that we can expand
1377 it inline somewhere else. */
1378 if (cgraph_preserve_function_body_p (current_function_decl
))
1379 save_inline_function_body (node
);
1381 for (e
= node
->callees
; e
; e
= e
->next_callee
)
1382 if (!e
->inline_failed
|| warn_inline
)
1386 timevar_push (TV_INTEGRATION
);
1387 todo
= optimize_inline_calls (current_function_decl
);
1388 timevar_pop (TV_INTEGRATION
);
1390 /* In non-unit-at-a-time we must mark all referenced functions as needed. */
1391 if (!flag_unit_at_a_time
)
1393 struct cgraph_edge
*e
;
1394 for (e
= node
->callees
; e
; e
= e
->next_callee
)
1395 if (e
->callee
->analyzed
)
1396 cgraph_mark_needed_node (e
->callee
);
1398 return todo
| execute_fixup_cfg ();
1401 struct tree_opt_pass pass_apply_inline
=
1403 "apply_inline", /* name */
1405 apply_inline
, /* execute */
1408 0, /* static_pass_number */
1409 TV_INLINE_HEURISTICS
, /* tv_id */
1410 0, /* properties_required */
1411 PROP_cfg
, /* properties_provided */
1412 0, /* properties_destroyed */
1413 0, /* todo_flags_start */
1414 TODO_dump_func
| TODO_verify_flow
1415 | TODO_verify_stmts
, /* todo_flags_finish */
1419 #include "gt-ipa-inline.h"