tree-pas.h (TODO_remove_function): New flag.
[gcc.git] / gcc / ipa-inline.c
1 /* Inlining decision heuristics.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA. */
21
22 /* Inlining decision heuristics
23
24 We separate inlining decisions from the inliner itself and store it
25 inside callgraph as so called inline plan. Refer to cgraph.c
26 documentation about particular representation of inline plans in the
27 callgraph.
28
29 There are three major parts of this file:
30
31 cgraph_mark_inline implementation
32
33 This function allows to mark given call inline and performs necessary
34 modifications of cgraph (production of the clones and updating overall
35 statistics)
36
37 inlining heuristics limits
38
39 These functions allow to check that particular inlining is allowed
40 by the limits specified by user (allowed function growth, overall unit
41 growth and so on).
42
43 inlining heuristics
44
45 This is implementation of IPA pass aiming to get as much of benefit
46 from inlining obeying the limits checked above.
47
48 The implementation of particular heuristics is separated from
49 the rest of code to make it easier to replace it with more complicated
50 implementation in the future. The rest of inlining code acts as a
51 library aimed to modify the callgraph and verify that the parameters
52 on code size growth fits.
53
54 To mark given call inline, use cgraph_mark_inline function, the
55 verification is performed by cgraph_default_inline_p and
56 cgraph_check_inline_limits.
57
58 The heuristics implements simple knapsack style algorithm ordering
59 all functions by their "profitability" (estimated by code size growth)
60 and inlining them in priority order.
61
62 cgraph_decide_inlining implements heuristics taking whole callgraph
63 into account, while cgraph_decide_inlining_incrementally considers
64 only one function at a time and is used in non-unit-at-a-time mode. */
65
66 #include "config.h"
67 #include "system.h"
68 #include "coretypes.h"
69 #include "tm.h"
70 #include "tree.h"
71 #include "tree-inline.h"
72 #include "langhooks.h"
73 #include "flags.h"
74 #include "cgraph.h"
75 #include "diagnostic.h"
76 #include "timevar.h"
77 #include "params.h"
78 #include "fibheap.h"
79 #include "intl.h"
80 #include "tree-pass.h"
81 #include "hashtab.h"
82 #include "coverage.h"
83 #include "ggc.h"
84
85 /* Statistics we collect about inlining algorithm. */
86 static int ncalls_inlined;
87 static int nfunctions_inlined;
88 static int initial_insns;
89 static int overall_insns;
90 static int max_insns;
91 static gcov_type max_count;
92
93 /* Estimate size of the function after inlining WHAT into TO. */
94
95 static int
96 cgraph_estimate_size_after_inlining (int times, struct cgraph_node *to,
97 struct cgraph_node *what)
98 {
99 int size;
100 tree fndecl = what->decl, arg;
101 int call_insns = PARAM_VALUE (PARAM_INLINE_CALL_COST);
102
103 for (arg = DECL_ARGUMENTS (fndecl); arg; arg = TREE_CHAIN (arg))
104 call_insns += estimate_move_cost (TREE_TYPE (arg));
105 size = (what->global.insns - call_insns) * times + to->global.insns;
106 gcc_assert (size >= 0);
107 return size;
108 }
109
110 /* E is expected to be an edge being inlined. Clone destination node of
111 the edge and redirect it to the new clone.
112 DUPLICATE is used for bookkeeping on whether we are actually creating new
113 clones or re-using node originally representing out-of-line function call.
114 */
115 void
116 cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate, bool update_original)
117 {
118 HOST_WIDE_INT peak;
119 if (duplicate)
120 {
121 /* We may eliminate the need for out-of-line copy to be output.
122 In that case just go ahead and re-use it. */
123 if (!e->callee->callers->next_caller
124 && !e->callee->needed
125 && flag_unit_at_a_time)
126 {
127 gcc_assert (!e->callee->global.inlined_to);
128 if (DECL_SAVED_TREE (e->callee->decl))
129 overall_insns -= e->callee->global.insns, nfunctions_inlined++;
130 duplicate = false;
131 }
132 else
133 {
134 struct cgraph_node *n;
135 n = cgraph_clone_node (e->callee, e->count, e->loop_nest,
136 update_original);
137 cgraph_redirect_edge_callee (e, n);
138 }
139 }
140
141 if (e->caller->global.inlined_to)
142 e->callee->global.inlined_to = e->caller->global.inlined_to;
143 else
144 e->callee->global.inlined_to = e->caller;
145 e->callee->global.stack_frame_offset
146 = e->caller->global.stack_frame_offset + e->caller->local.estimated_self_stack_size;
147 peak = e->callee->global.stack_frame_offset + e->callee->local.estimated_self_stack_size;
148 if (e->callee->global.inlined_to->global.estimated_stack_size < peak)
149 e->callee->global.inlined_to->global.estimated_stack_size = peak;
150
151 /* Recursively clone all bodies. */
152 for (e = e->callee->callees; e; e = e->next_callee)
153 if (!e->inline_failed)
154 cgraph_clone_inlined_nodes (e, duplicate, update_original);
155 }
156
157 /* Mark edge E as inlined and update callgraph accordingly.
158 UPDATE_ORIGINAL specify whether profile of original function should be
159 updated. */
160
161 void
162 cgraph_mark_inline_edge (struct cgraph_edge *e, bool update_original)
163 {
164 int old_insns = 0, new_insns = 0;
165 struct cgraph_node *to = NULL, *what;
166
167 if (e->callee->inline_decl)
168 cgraph_redirect_edge_callee (e, cgraph_node (e->callee->inline_decl));
169
170 gcc_assert (e->inline_failed);
171 e->inline_failed = NULL;
172
173 if (!e->callee->global.inlined && flag_unit_at_a_time)
174 DECL_POSSIBLY_INLINED (e->callee->decl) = true;
175 e->callee->global.inlined = true;
176
177 cgraph_clone_inlined_nodes (e, true, update_original);
178
179 what = e->callee;
180
181 /* Now update size of caller and all functions caller is inlined into. */
182 for (;e && !e->inline_failed; e = e->caller->callers)
183 {
184 old_insns = e->caller->global.insns;
185 new_insns = cgraph_estimate_size_after_inlining (1, e->caller,
186 what);
187 gcc_assert (new_insns >= 0);
188 to = e->caller;
189 to->global.insns = new_insns;
190 }
191 gcc_assert (what->global.inlined_to == to);
192 if (new_insns > old_insns)
193 overall_insns += new_insns - old_insns;
194 ncalls_inlined++;
195 }
196
197 /* Mark all calls of EDGE->CALLEE inlined into EDGE->CALLER.
198 Return following unredirected edge in the list of callers
199 of EDGE->CALLEE */
200
201 static struct cgraph_edge *
202 cgraph_mark_inline (struct cgraph_edge *edge)
203 {
204 struct cgraph_node *to = edge->caller;
205 struct cgraph_node *what = edge->callee;
206 struct cgraph_edge *e, *next;
207 int times = 0;
208
209 /* Look for all calls, mark them inline and clone recursively
210 all inlined functions. */
211 for (e = what->callers; e; e = next)
212 {
213 next = e->next_caller;
214 if (e->caller == to && e->inline_failed)
215 {
216 cgraph_mark_inline_edge (e, true);
217 if (e == edge)
218 edge = next;
219 times++;
220 }
221 }
222 gcc_assert (times);
223 return edge;
224 }
225
226 /* Estimate the growth caused by inlining NODE into all callees. */
227
228 static int
229 cgraph_estimate_growth (struct cgraph_node *node)
230 {
231 int growth = 0;
232 struct cgraph_edge *e;
233 if (node->global.estimated_growth != INT_MIN)
234 return node->global.estimated_growth;
235
236 for (e = node->callers; e; e = e->next_caller)
237 if (e->inline_failed)
238 growth += (cgraph_estimate_size_after_inlining (1, e->caller, node)
239 - e->caller->global.insns);
240
241 /* ??? Wrong for self recursive functions or cases where we decide to not
242 inline for different reasons, but it is not big deal as in that case
243 we will keep the body around, but we will also avoid some inlining. */
244 if (!node->needed && !DECL_EXTERNAL (node->decl))
245 growth -= node->global.insns;
246
247 node->global.estimated_growth = growth;
248 return growth;
249 }
250
251 /* Return false when inlining WHAT into TO is not good idea
252 as it would cause too large growth of function bodies.
253 When ONE_ONLY is true, assume that only one call site is going
254 to be inlined, otherwise figure out how many call sites in
255 TO calls WHAT and verify that all can be inlined.
256 */
257
258 static bool
259 cgraph_check_inline_limits (struct cgraph_node *to, struct cgraph_node *what,
260 const char **reason, bool one_only)
261 {
262 int times = 0;
263 struct cgraph_edge *e;
264 int newsize;
265 int limit;
266 HOST_WIDE_INT stack_size_limit, inlined_stack;
267
268 if (one_only)
269 times = 1;
270 else
271 for (e = to->callees; e; e = e->next_callee)
272 if (e->callee == what)
273 times++;
274
275 if (to->global.inlined_to)
276 to = to->global.inlined_to;
277
278 /* When inlining large function body called once into small function,
279 take the inlined function as base for limiting the growth. */
280 if (to->local.self_insns > what->local.self_insns)
281 limit = to->local.self_insns;
282 else
283 limit = what->local.self_insns;
284
285 limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
286
287 /* Check the size after inlining against the function limits. But allow
288 the function to shrink if it went over the limits by forced inlining. */
289 newsize = cgraph_estimate_size_after_inlining (times, to, what);
290 if (newsize >= to->global.insns
291 && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
292 && newsize > limit)
293 {
294 if (reason)
295 *reason = N_("--param large-function-growth limit reached");
296 return false;
297 }
298
299 stack_size_limit = to->local.estimated_self_stack_size;
300
301 stack_size_limit += stack_size_limit * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100;
302
303 inlined_stack = (to->global.stack_frame_offset
304 + to->local.estimated_self_stack_size
305 + what->global.estimated_stack_size);
306 if (inlined_stack > stack_size_limit
307 && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
308 {
309 if (reason)
310 *reason = N_("--param large-stack-frame-growth limit reached");
311 return false;
312 }
313 return true;
314 }
315
316 /* Return true when function N is small enough to be inlined. */
317
318 bool
319 cgraph_default_inline_p (struct cgraph_node *n, const char **reason)
320 {
321 tree decl = n->decl;
322
323 if (n->inline_decl)
324 decl = n->inline_decl;
325 if (!DECL_INLINE (decl))
326 {
327 if (reason)
328 *reason = N_("function not inlinable");
329 return false;
330 }
331
332 if (!DECL_STRUCT_FUNCTION (decl)->cfg)
333 {
334 if (reason)
335 *reason = N_("function body not available");
336 return false;
337 }
338
339 if (DECL_DECLARED_INLINE_P (decl))
340 {
341 if (n->global.insns >= MAX_INLINE_INSNS_SINGLE)
342 {
343 if (reason)
344 *reason = N_("--param max-inline-insns-single limit reached");
345 return false;
346 }
347 }
348 else
349 {
350 if (n->global.insns >= MAX_INLINE_INSNS_AUTO)
351 {
352 if (reason)
353 *reason = N_("--param max-inline-insns-auto limit reached");
354 return false;
355 }
356 }
357
358 return true;
359 }
360
361 /* Return true when inlining WHAT would create recursive inlining.
362 We call recursive inlining all cases where same function appears more than
363 once in the single recursion nest path in the inline graph. */
364
365 static bool
366 cgraph_recursive_inlining_p (struct cgraph_node *to,
367 struct cgraph_node *what,
368 const char **reason)
369 {
370 bool recursive;
371 if (to->global.inlined_to)
372 recursive = what->decl == to->global.inlined_to->decl;
373 else
374 recursive = what->decl == to->decl;
375 /* Marking recursive function inline has sane semantic and thus we should
376 not warn on it. */
377 if (recursive && reason)
378 *reason = (what->local.disregard_inline_limits
379 ? N_("recursive inlining") : "");
380 return recursive;
381 }
382
383 /* Return true if the call can be hot. */
384 static bool
385 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
386 {
387 if (profile_info && flag_branch_probabilities
388 && (edge->count
389 <= profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
390 return false;
391 return true;
392 }
393
394 /* A cost model driving the inlining heuristics in a way so the edges with
395 smallest badness are inlined first. After each inlining is performed
396 the costs of all caller edges of nodes affected are recomputed so the
397 metrics may accurately depend on values such as number of inlinable callers
398 of the function or function body size.
399
400 With profiling we use number of executions of each edge to drive the cost.
401 We also should distinguish hot and cold calls where the cold calls are
402 inlined into only when code size is overall improved.
403 */
404
405 static int
406 cgraph_edge_badness (struct cgraph_edge *edge)
407 {
408 if (max_count)
409 {
410 int growth =
411 cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee);
412 growth -= edge->caller->global.insns;
413
414 /* Always prefer inlining saving code size. */
415 if (growth <= 0)
416 return INT_MIN - growth;
417 return ((int)((double)edge->count * INT_MIN / max_count)) / growth;
418 }
419 else
420 {
421 int nest = MIN (edge->loop_nest, 8);
422 int badness = cgraph_estimate_growth (edge->callee) * 256;
423
424 /* Decrease badness if call is nested. */
425 if (badness > 0)
426 badness >>= nest;
427 else
428 badness <<= nest;
429
430 /* Make recursive inlining happen always after other inlining is done. */
431 if (cgraph_recursive_inlining_p (edge->caller, edge->callee, NULL))
432 return badness + 1;
433 else
434 return badness;
435 }
436 }
437
438 /* Recompute heap nodes for each of caller edge. */
439
440 static void
441 update_caller_keys (fibheap_t heap, struct cgraph_node *node,
442 bitmap updated_nodes)
443 {
444 struct cgraph_edge *edge;
445 const char *failed_reason;
446
447 if (!node->local.inlinable || node->local.disregard_inline_limits
448 || node->global.inlined_to)
449 return;
450 if (bitmap_bit_p (updated_nodes, node->uid))
451 return;
452 bitmap_set_bit (updated_nodes, node->uid);
453 node->global.estimated_growth = INT_MIN;
454
455 if (!node->local.inlinable)
456 return;
457 /* Prune out edges we won't inline into anymore. */
458 if (!cgraph_default_inline_p (node, &failed_reason))
459 {
460 for (edge = node->callers; edge; edge = edge->next_caller)
461 if (edge->aux)
462 {
463 fibheap_delete_node (heap, edge->aux);
464 edge->aux = NULL;
465 if (edge->inline_failed)
466 edge->inline_failed = failed_reason;
467 }
468 return;
469 }
470
471 for (edge = node->callers; edge; edge = edge->next_caller)
472 if (edge->inline_failed)
473 {
474 int badness = cgraph_edge_badness (edge);
475 if (edge->aux)
476 {
477 fibnode_t n = edge->aux;
478 gcc_assert (n->data == edge);
479 if (n->key == badness)
480 continue;
481
482 /* fibheap_replace_key only increase the keys. */
483 if (fibheap_replace_key (heap, n, badness))
484 continue;
485 fibheap_delete_node (heap, edge->aux);
486 }
487 edge->aux = fibheap_insert (heap, badness, edge);
488 }
489 }
490
491 /* Recompute heap nodes for each of caller edges of each of callees. */
492
493 static void
494 update_callee_keys (fibheap_t heap, struct cgraph_node *node,
495 bitmap updated_nodes)
496 {
497 struct cgraph_edge *e;
498 node->global.estimated_growth = INT_MIN;
499
500 for (e = node->callees; e; e = e->next_callee)
501 if (e->inline_failed)
502 update_caller_keys (heap, e->callee, updated_nodes);
503 else if (!e->inline_failed)
504 update_callee_keys (heap, e->callee, updated_nodes);
505 }
506
507 /* Enqueue all recursive calls from NODE into priority queue depending on
508 how likely we want to recursively inline the call. */
509
510 static void
511 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
512 fibheap_t heap)
513 {
514 static int priority;
515 struct cgraph_edge *e;
516 for (e = where->callees; e; e = e->next_callee)
517 if (e->callee == node)
518 {
519 /* When profile feedback is available, prioritize by expected number
520 of calls. Without profile feedback we maintain simple queue
521 to order candidates via recursive depths. */
522 fibheap_insert (heap,
523 !max_count ? priority++
524 : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
525 e);
526 }
527 for (e = where->callees; e; e = e->next_callee)
528 if (!e->inline_failed)
529 lookup_recursive_calls (node, e->callee, heap);
530 }
531
532 /* Find callgraph nodes closing a circle in the graph. The
533 resulting hashtab can be used to avoid walking the circles.
534 Uses the cgraph nodes ->aux field which needs to be zero
535 before and will be zero after operation. */
536
537 static void
538 cgraph_find_cycles (struct cgraph_node *node, htab_t cycles)
539 {
540 struct cgraph_edge *e;
541
542 if (node->aux)
543 {
544 void **slot;
545 slot = htab_find_slot (cycles, node, INSERT);
546 if (!*slot)
547 {
548 if (dump_file)
549 fprintf (dump_file, "Cycle contains %s\n", cgraph_node_name (node));
550 *slot = node;
551 }
552 return;
553 }
554
555 node->aux = node;
556 for (e = node->callees; e; e = e->next_callee)
557 cgraph_find_cycles (e->callee, cycles);
558 node->aux = 0;
559 }
560
561 /* Flatten the cgraph node. We have to be careful in recursing
562 as to not run endlessly in circles of the callgraph.
563 We do so by using a hashtab of cycle entering nodes as generated
564 by cgraph_find_cycles. */
565
566 static void
567 cgraph_flatten_node (struct cgraph_node *node, htab_t cycles)
568 {
569 struct cgraph_edge *e;
570
571 for (e = node->callees; e; e = e->next_callee)
572 {
573 /* Inline call, if possible, and recurse. Be sure we are not
574 entering callgraph circles here. */
575 if (e->inline_failed
576 && e->callee->local.inlinable
577 && !cgraph_recursive_inlining_p (node, e->callee,
578 &e->inline_failed)
579 && !htab_find (cycles, e->callee))
580 {
581 if (dump_file)
582 fprintf (dump_file, " inlining %s", cgraph_node_name (e->callee));
583 cgraph_mark_inline_edge (e, true);
584 cgraph_flatten_node (e->callee, cycles);
585 }
586 else if (dump_file)
587 fprintf (dump_file, " !inlining %s", cgraph_node_name (e->callee));
588 }
589 }
590
591 /* Decide on recursive inlining: in the case function has recursive calls,
592 inline until body size reaches given argument. */
593
594 static bool
595 cgraph_decide_recursive_inlining (struct cgraph_node *node)
596 {
597 int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
598 int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
599 int probability = PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY);
600 fibheap_t heap;
601 struct cgraph_edge *e;
602 struct cgraph_node *master_clone, *next;
603 int depth = 0;
604 int n = 0;
605
606 if (DECL_DECLARED_INLINE_P (node->decl))
607 {
608 limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
609 max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
610 }
611
612 /* Make sure that function is small enough to be considered for inlining. */
613 if (!max_depth
614 || cgraph_estimate_size_after_inlining (1, node, node) >= limit)
615 return false;
616 heap = fibheap_new ();
617 lookup_recursive_calls (node, node, heap);
618 if (fibheap_empty (heap))
619 {
620 fibheap_delete (heap);
621 return false;
622 }
623
624 if (dump_file)
625 fprintf (dump_file,
626 " Performing recursive inlining on %s\n",
627 cgraph_node_name (node));
628
629 /* We need original clone to copy around. */
630 master_clone = cgraph_clone_node (node, node->count, 1, false);
631 master_clone->needed = true;
632 for (e = master_clone->callees; e; e = e->next_callee)
633 if (!e->inline_failed)
634 cgraph_clone_inlined_nodes (e, true, false);
635
636 /* Do the inlining and update list of recursive call during process. */
637 while (!fibheap_empty (heap)
638 && (cgraph_estimate_size_after_inlining (1, node, master_clone)
639 <= limit))
640 {
641 struct cgraph_edge *curr = fibheap_extract_min (heap);
642 struct cgraph_node *cnode;
643
644 depth = 1;
645 for (cnode = curr->caller;
646 cnode->global.inlined_to; cnode = cnode->callers->caller)
647 if (node->decl == curr->callee->decl)
648 depth++;
649 if (depth > max_depth)
650 {
651 if (dump_file)
652 fprintf (dump_file,
653 " maxmal depth reached\n");
654 continue;
655 }
656
657 if (max_count)
658 {
659 if (!cgraph_maybe_hot_edge_p (curr))
660 {
661 if (dump_file)
662 fprintf (dump_file, " Not inlining cold call\n");
663 continue;
664 }
665 if (curr->count * 100 / node->count < probability)
666 {
667 if (dump_file)
668 fprintf (dump_file,
669 " Probability of edge is too small\n");
670 continue;
671 }
672 }
673
674 if (dump_file)
675 {
676 fprintf (dump_file,
677 " Inlining call of depth %i", depth);
678 if (node->count)
679 {
680 fprintf (dump_file, " called approx. %.2f times per call",
681 (double)curr->count / node->count);
682 }
683 fprintf (dump_file, "\n");
684 }
685 cgraph_redirect_edge_callee (curr, master_clone);
686 cgraph_mark_inline_edge (curr, false);
687 lookup_recursive_calls (node, curr->callee, heap);
688 n++;
689 }
690 if (!fibheap_empty (heap) && dump_file)
691 fprintf (dump_file, " Recursive inlining growth limit met.\n");
692
693 fibheap_delete (heap);
694 if (dump_file)
695 fprintf (dump_file,
696 "\n Inlined %i times, body grown from %i to %i insns\n", n,
697 master_clone->global.insns, node->global.insns);
698
699 /* Remove master clone we used for inlining. We rely that clones inlined
700 into master clone gets queued just before master clone so we don't
701 need recursion. */
702 for (node = cgraph_nodes; node != master_clone;
703 node = next)
704 {
705 next = node->next;
706 if (node->global.inlined_to == master_clone)
707 cgraph_remove_node (node);
708 }
709 cgraph_remove_node (master_clone);
710 /* FIXME: Recursive inlining actually reduces number of calls of the
711 function. At this place we should probably walk the function and
712 inline clones and compensate the counts accordingly. This probably
713 doesn't matter much in practice. */
714 return n > 0;
715 }
716
717 /* Set inline_failed for all callers of given function to REASON. */
718
719 static void
720 cgraph_set_inline_failed (struct cgraph_node *node, const char *reason)
721 {
722 struct cgraph_edge *e;
723
724 if (dump_file)
725 fprintf (dump_file, "Inlining failed: %s\n", reason);
726 for (e = node->callers; e; e = e->next_caller)
727 if (e->inline_failed)
728 e->inline_failed = reason;
729 }
730
731 /* We use greedy algorithm for inlining of small functions:
732 All inline candidates are put into prioritized heap based on estimated
733 growth of the overall number of instructions and then update the estimates.
734
735 INLINED and INLINED_CALEES are just pointers to arrays large enough
736 to be passed to cgraph_inlined_into and cgraph_inlined_callees. */
737
738 static void
739 cgraph_decide_inlining_of_small_functions (void)
740 {
741 struct cgraph_node *node;
742 struct cgraph_edge *edge;
743 const char *failed_reason;
744 fibheap_t heap = fibheap_new ();
745 bitmap updated_nodes = BITMAP_ALLOC (NULL);
746
747 if (dump_file)
748 fprintf (dump_file, "\nDeciding on smaller functions:\n");
749
750 /* Put all inline candidates into the heap. */
751
752 for (node = cgraph_nodes; node; node = node->next)
753 {
754 if (!node->local.inlinable || !node->callers
755 || node->local.disregard_inline_limits)
756 continue;
757 if (dump_file)
758 fprintf (dump_file, "Considering inline candidate %s.\n", cgraph_node_name (node));
759
760 node->global.estimated_growth = INT_MIN;
761 if (!cgraph_default_inline_p (node, &failed_reason))
762 {
763 cgraph_set_inline_failed (node, failed_reason);
764 continue;
765 }
766
767 for (edge = node->callers; edge; edge = edge->next_caller)
768 if (edge->inline_failed)
769 {
770 gcc_assert (!edge->aux);
771 edge->aux = fibheap_insert (heap, cgraph_edge_badness (edge), edge);
772 }
773 }
774 while (overall_insns <= max_insns && (edge = fibheap_extract_min (heap)))
775 {
776 int old_insns = overall_insns;
777 struct cgraph_node *where;
778 int growth =
779 cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee);
780
781 growth -= edge->caller->global.insns;
782
783 if (dump_file)
784 {
785 fprintf (dump_file,
786 "\nConsidering %s with %i insns\n",
787 cgraph_node_name (edge->callee),
788 edge->callee->global.insns);
789 fprintf (dump_file,
790 " to be inlined into %s\n"
791 " Estimated growth after inlined into all callees is %+i insns.\n"
792 " Estimated badness is %i.\n",
793 cgraph_node_name (edge->caller),
794 cgraph_estimate_growth (edge->callee),
795 cgraph_edge_badness (edge));
796 if (edge->count)
797 fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n", edge->count);
798 }
799 gcc_assert (edge->aux);
800 edge->aux = NULL;
801 if (!edge->inline_failed)
802 continue;
803
804 /* When not having profile info ready we don't weight by any way the
805 position of call in procedure itself. This means if call of
806 function A from function B seems profitable to inline, the recursive
807 call of function A in inline copy of A in B will look profitable too
808 and we end up inlining until reaching maximal function growth. This
809 is not good idea so prohibit the recursive inlining.
810
811 ??? When the frequencies are taken into account we might not need this
812 restriction. */
813 if (!max_count)
814 {
815 where = edge->caller;
816 while (where->global.inlined_to)
817 {
818 if (where->decl == edge->callee->decl)
819 break;
820 where = where->callers->caller;
821 }
822 if (where->global.inlined_to)
823 {
824 edge->inline_failed
825 = (edge->callee->local.disregard_inline_limits ? N_("recursive inlining") : "");
826 if (dump_file)
827 fprintf (dump_file, " inline_failed:Recursive inlining performed only for function itself.\n");
828 continue;
829 }
830 }
831
832 if (!cgraph_maybe_hot_edge_p (edge) && growth > 0)
833 {
834 if (!cgraph_recursive_inlining_p (edge->caller, edge->callee,
835 &edge->inline_failed))
836 {
837 edge->inline_failed =
838 N_("call is unlikely");
839 if (dump_file)
840 fprintf (dump_file, " inline_failed:%s.\n", edge->inline_failed);
841 }
842 continue;
843 }
844 if (!cgraph_default_inline_p (edge->callee, &edge->inline_failed))
845 {
846 if (!cgraph_recursive_inlining_p (edge->caller, edge->callee,
847 &edge->inline_failed))
848 {
849 if (dump_file)
850 fprintf (dump_file, " inline_failed:%s.\n", edge->inline_failed);
851 }
852 continue;
853 }
854 if (cgraph_recursive_inlining_p (edge->caller, edge->callee,
855 &edge->inline_failed))
856 {
857 where = edge->caller;
858 if (where->global.inlined_to)
859 where = where->global.inlined_to;
860 if (!cgraph_decide_recursive_inlining (where))
861 continue;
862 update_callee_keys (heap, where, updated_nodes);
863 }
864 else
865 {
866 struct cgraph_node *callee;
867 if (!cgraph_check_inline_limits (edge->caller, edge->callee,
868 &edge->inline_failed, true))
869 {
870 if (dump_file)
871 fprintf (dump_file, " Not inlining into %s:%s.\n",
872 cgraph_node_name (edge->caller), edge->inline_failed);
873 continue;
874 }
875 callee = edge->callee;
876 cgraph_mark_inline_edge (edge, true);
877 update_callee_keys (heap, callee, updated_nodes);
878 }
879 where = edge->caller;
880 if (where->global.inlined_to)
881 where = where->global.inlined_to;
882
883 /* Our profitability metric can depend on local properties
884 such as number of inlinable calls and size of the function body.
885 After inlining these properties might change for the function we
886 inlined into (since it's body size changed) and for the functions
887 called by function we inlined (since number of it inlinable callers
888 might change). */
889 update_caller_keys (heap, where, updated_nodes);
890 bitmap_clear (updated_nodes);
891
892 if (dump_file)
893 {
894 fprintf (dump_file,
895 " Inlined into %s which now has %i insns,"
896 "net change of %+i insns.\n",
897 cgraph_node_name (edge->caller),
898 edge->caller->global.insns,
899 overall_insns - old_insns);
900 }
901 }
902 while ((edge = fibheap_extract_min (heap)) != NULL)
903 {
904 gcc_assert (edge->aux);
905 edge->aux = NULL;
906 if (!edge->callee->local.disregard_inline_limits && edge->inline_failed
907 && !cgraph_recursive_inlining_p (edge->caller, edge->callee,
908 &edge->inline_failed))
909 edge->inline_failed = N_("--param inline-unit-growth limit reached");
910 }
911 fibheap_delete (heap);
912 BITMAP_FREE (updated_nodes);
913 }
914
915 /* Decide on the inlining. We do so in the topological order to avoid
916 expenses on updating data structures. */
917
918 static unsigned int
919 cgraph_decide_inlining (void)
920 {
921 struct cgraph_node *node;
922 int nnodes;
923 struct cgraph_node **order =
924 XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
925 int old_insns = 0;
926 int i;
927
928 max_count = 0;
929 for (node = cgraph_nodes; node; node = node->next)
930 if (node->analyzed && (node->needed || node->reachable))
931 {
932 struct cgraph_edge *e;
933
934 /* At the moment, no IPA passes change function bodies before inlining.
935 Save some time by not recomputing function body sizes if early inlining
936 already did so. */
937 if (!flag_early_inlining)
938 node->local.self_insns = node->global.insns
939 = estimate_num_insns (node->decl);
940
941 initial_insns += node->local.self_insns;
942 gcc_assert (node->local.self_insns == node->global.insns);
943 for (e = node->callees; e; e = e->next_callee)
944 if (max_count < e->count)
945 max_count = e->count;
946 }
947 overall_insns = initial_insns;
948 gcc_assert (!max_count || (profile_info && flag_branch_probabilities));
949
950 max_insns = overall_insns;
951 if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
952 max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
953
954 max_insns = ((HOST_WIDEST_INT) max_insns
955 * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
956
957 nnodes = cgraph_postorder (order);
958
959 if (dump_file)
960 fprintf (dump_file,
961 "\nDeciding on inlining. Starting with %i insns.\n",
962 initial_insns);
963
964 for (node = cgraph_nodes; node; node = node->next)
965 node->aux = 0;
966
967 if (dump_file)
968 fprintf (dump_file, "\nInlining always_inline functions:\n");
969
970 /* In the first pass mark all always_inline edges. Do this with a priority
971 so none of our later choices will make this impossible. */
972 for (i = nnodes - 1; i >= 0; i--)
973 {
974 struct cgraph_edge *e, *next;
975
976 node = order[i];
977
978 /* Handle nodes to be flattened, but don't update overall unit size. */
979 if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) != NULL)
980 {
981 int old_overall_insns = overall_insns;
982 htab_t cycles;
983 if (dump_file)
984 fprintf (dump_file,
985 "Flattening %s\n", cgraph_node_name (node));
986 cycles = htab_create (7, htab_hash_pointer, htab_eq_pointer, NULL);
987 cgraph_find_cycles (node, cycles);
988 cgraph_flatten_node (node, cycles);
989 htab_delete (cycles);
990 overall_insns = old_overall_insns;
991 /* We don't need to consider always_inline functions inside the flattened
992 function anymore. */
993 continue;
994 }
995
996 if (!node->local.disregard_inline_limits)
997 continue;
998 if (dump_file)
999 fprintf (dump_file,
1000 "\nConsidering %s %i insns (always inline)\n",
1001 cgraph_node_name (node), node->global.insns);
1002 old_insns = overall_insns;
1003 for (e = node->callers; e; e = next)
1004 {
1005 next = e->next_caller;
1006 if (!e->inline_failed)
1007 continue;
1008 if (cgraph_recursive_inlining_p (e->caller, e->callee,
1009 &e->inline_failed))
1010 continue;
1011 cgraph_mark_inline_edge (e, true);
1012 if (dump_file)
1013 fprintf (dump_file,
1014 " Inlined into %s which now has %i insns.\n",
1015 cgraph_node_name (e->caller),
1016 e->caller->global.insns);
1017 }
1018 if (dump_file)
1019 fprintf (dump_file,
1020 " Inlined for a net change of %+i insns.\n",
1021 overall_insns - old_insns);
1022 }
1023
1024 if (!flag_really_no_inline)
1025 cgraph_decide_inlining_of_small_functions ();
1026
1027 if (!flag_really_no_inline
1028 && flag_inline_functions_called_once)
1029 {
1030 if (dump_file)
1031 fprintf (dump_file, "\nDeciding on functions called once:\n");
1032
1033 /* And finally decide what functions are called once. */
1034
1035 for (i = nnodes - 1; i >= 0; i--)
1036 {
1037 node = order[i];
1038
1039 if (node->callers && !node->callers->next_caller && !node->needed
1040 && node->local.inlinable && node->callers->inline_failed
1041 && !DECL_EXTERNAL (node->decl) && !DECL_COMDAT (node->decl))
1042 {
1043 if (dump_file)
1044 {
1045 fprintf (dump_file,
1046 "\nConsidering %s %i insns.\n",
1047 cgraph_node_name (node), node->global.insns);
1048 fprintf (dump_file,
1049 " Called once from %s %i insns.\n",
1050 cgraph_node_name (node->callers->caller),
1051 node->callers->caller->global.insns);
1052 }
1053
1054 old_insns = overall_insns;
1055
1056 if (cgraph_check_inline_limits (node->callers->caller, node,
1057 NULL, false))
1058 {
1059 cgraph_mark_inline (node->callers);
1060 if (dump_file)
1061 fprintf (dump_file,
1062 " Inlined into %s which now has %i insns"
1063 " for a net change of %+i insns.\n",
1064 cgraph_node_name (node->callers->caller),
1065 node->callers->caller->global.insns,
1066 overall_insns - old_insns);
1067 }
1068 else
1069 {
1070 if (dump_file)
1071 fprintf (dump_file,
1072 " Inline limit reached, not inlined.\n");
1073 }
1074 }
1075 }
1076 }
1077
1078 if (dump_file)
1079 fprintf (dump_file,
1080 "\nInlined %i calls, eliminated %i functions, "
1081 "%i insns turned to %i insns.\n\n",
1082 ncalls_inlined, nfunctions_inlined, initial_insns,
1083 overall_insns);
1084 free (order);
1085 return 0;
1086 }
1087
1088 /* Decide on the inlining. We do so in the topological order to avoid
1089 expenses on updating data structures. */
1090
1091 bool
1092 cgraph_decide_inlining_incrementally (struct cgraph_node *node, bool early)
1093 {
1094 struct cgraph_edge *e;
1095 bool inlined = false;
1096 const char *failed_reason;
1097
1098 /* First of all look for always inline functions. */
1099 for (e = node->callees; e; e = e->next_callee)
1100 if (e->callee->local.disregard_inline_limits
1101 && e->inline_failed
1102 && !cgraph_recursive_inlining_p (node, e->callee, &e->inline_failed)
1103 /* ??? It is possible that renaming variable removed the function body
1104 in duplicate_decls. See gcc.c-torture/compile/20011119-2.c */
1105 && (DECL_SAVED_TREE (e->callee->decl) || e->callee->inline_decl))
1106 {
1107 if (dump_file && early)
1108 {
1109 fprintf (dump_file, " Early inlining %s",
1110 cgraph_node_name (e->callee));
1111 fprintf (dump_file, " into %s\n", cgraph_node_name (node));
1112 }
1113 cgraph_mark_inline (e);
1114 inlined = true;
1115 }
1116
1117 /* Now do the automatic inlining. */
1118 if (!flag_really_no_inline)
1119 for (e = node->callees; e; e = e->next_callee)
1120 if (e->callee->local.inlinable
1121 && e->inline_failed
1122 && !e->callee->local.disregard_inline_limits
1123 && !cgraph_recursive_inlining_p (node, e->callee, &e->inline_failed)
1124 && (!early
1125 || (cgraph_estimate_size_after_inlining (1, e->caller, e->callee)
1126 <= e->caller->global.insns))
1127 && cgraph_check_inline_limits (node, e->callee, &e->inline_failed,
1128 false)
1129 && (DECL_SAVED_TREE (e->callee->decl) || e->callee->inline_decl))
1130 {
1131 if (cgraph_default_inline_p (e->callee, &failed_reason))
1132 {
1133 if (dump_file && early)
1134 {
1135 fprintf (dump_file, " Early inlining %s",
1136 cgraph_node_name (e->callee));
1137 fprintf (dump_file, " into %s\n", cgraph_node_name (node));
1138 }
1139 cgraph_mark_inline (e);
1140 inlined = true;
1141 }
1142 else if (!early)
1143 e->inline_failed = failed_reason;
1144 }
1145 if (early && inlined)
1146 {
1147 timevar_push (TV_INTEGRATION);
1148 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1149 tree_register_cfg_hooks ();
1150 current_function_decl = node->decl;
1151 optimize_inline_calls (current_function_decl);
1152 node->local.self_insns = node->global.insns;
1153 current_function_decl = NULL;
1154 pop_cfun ();
1155 timevar_pop (TV_INTEGRATION);
1156 }
1157 return inlined;
1158 }
1159
1160 /* When inlining shall be performed. */
1161 static bool
1162 cgraph_gate_inlining (void)
1163 {
1164 return flag_inline_trees;
1165 }
1166
1167 struct tree_opt_pass pass_ipa_inline =
1168 {
1169 "inline", /* name */
1170 cgraph_gate_inlining, /* gate */
1171 cgraph_decide_inlining, /* execute */
1172 NULL, /* sub */
1173 NULL, /* next */
1174 0, /* static_pass_number */
1175 TV_INLINE_HEURISTICS, /* tv_id */
1176 0, /* properties_required */
1177 PROP_cfg, /* properties_provided */
1178 0, /* properties_destroyed */
1179 0, /* todo_flags_start */
1180 TODO_dump_cgraph | TODO_dump_func
1181 | TODO_remove_functions, /* todo_flags_finish */
1182 0 /* letter */
1183 };
1184
1185 /* Because inlining might remove no-longer reachable nodes, we need to
1186 keep the array visible to garbage collector to avoid reading collected
1187 out nodes. */
1188 static int nnodes;
1189 static GTY ((length ("nnodes"))) struct cgraph_node **order;
1190
1191 /* Do inlining of small functions. Doing so early helps profiling and other
1192 passes to be somewhat more effective and avoids some code duplication in
1193 later real inlining pass for testcases with very many function calls. */
1194 static unsigned int
1195 cgraph_early_inlining (void)
1196 {
1197 struct cgraph_node *node;
1198 int i;
1199
1200 if (sorrycount || errorcount)
1201 return 0;
1202 #ifdef ENABLE_CHECKING
1203 for (node = cgraph_nodes; node; node = node->next)
1204 gcc_assert (!node->aux);
1205 #endif
1206
1207 order = ggc_alloc (sizeof (*order) * cgraph_n_nodes);
1208 nnodes = cgraph_postorder (order);
1209 for (i = nnodes - 1; i >= 0; i--)
1210 {
1211 node = order[i];
1212 if (node->analyzed && (node->needed || node->reachable))
1213 node->local.self_insns = node->global.insns
1214 = estimate_num_insns (node->decl);
1215 }
1216 for (i = nnodes - 1; i >= 0; i--)
1217 {
1218 node = order[i];
1219 if (node->analyzed && node->local.inlinable
1220 && (node->needed || node->reachable)
1221 && node->callers)
1222 {
1223 if (cgraph_decide_inlining_incrementally (node, true))
1224 ggc_collect ();
1225 }
1226 }
1227 #ifdef ENABLE_CHECKING
1228 for (node = cgraph_nodes; node; node = node->next)
1229 gcc_assert (!node->global.inlined_to);
1230 #endif
1231 ggc_free (order);
1232 order = NULL;
1233 nnodes = 0;
1234 return 0;
1235 }
1236
1237 /* When inlining shall be performed. */
1238 static bool
1239 cgraph_gate_early_inlining (void)
1240 {
1241 return flag_inline_trees && flag_early_inlining;
1242 }
1243
1244 struct tree_opt_pass pass_early_ipa_inline =
1245 {
1246 "einline", /* name */
1247 cgraph_gate_early_inlining, /* gate */
1248 cgraph_early_inlining, /* execute */
1249 NULL, /* sub */
1250 NULL, /* next */
1251 0, /* static_pass_number */
1252 TV_INLINE_HEURISTICS, /* tv_id */
1253 0, /* properties_required */
1254 PROP_cfg, /* properties_provided */
1255 0, /* properties_destroyed */
1256 0, /* todo_flags_start */
1257 TODO_dump_cgraph | TODO_dump_func
1258 | TODO_remove_functions, /* todo_flags_finish */
1259 0 /* letter */
1260 };
1261
1262 #include "gt-ipa-inline.h"