cond.md (stzx_16): Use register_operand for operand 0.
[gcc.git] / gcc / ipa-inline.c
1 /* Inlining decision heuristics.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Inlining decision heuristics
22
23 The implementation of inliner is organized as follows:
24
25 inlining heuristics limits
26
27 can_inline_edge_p allow to check that particular inlining is allowed
28 by the limits specified by user (allowed function growth, growth and so
29 on).
30
31 Functions are inlined when it is obvious the result is profitable (such
32 as functions called once or when inlining reduce code size).
33 In addition to that we perform inlining of small functions and recursive
34 inlining.
35
36 inlining heuristics
37
38 The inliner itself is split into two passes:
39
40 pass_early_inlining
41
42 Simple local inlining pass inlining callees into current function.
43 This pass makes no use of whole unit analysis and thus it can do only
44 very simple decisions based on local properties.
45
46 The strength of the pass is that it is run in topological order
47 (reverse postorder) on the callgraph. Functions are converted into SSA
48 form just before this pass and optimized subsequently. As a result, the
49 callees of the function seen by the early inliner was already optimized
50 and results of early inlining adds a lot of optimization opportunities
51 for the local optimization.
52
53 The pass handle the obvious inlining decisions within the compilation
54 unit - inlining auto inline functions, inlining for size and
55 flattening.
56
57 main strength of the pass is the ability to eliminate abstraction
58 penalty in C++ code (via combination of inlining and early
59 optimization) and thus improve quality of analysis done by real IPA
60 optimizers.
61
62 Because of lack of whole unit knowledge, the pass can not really make
63 good code size/performance tradeoffs. It however does very simple
64 speculative inlining allowing code size to grow by
65 EARLY_INLINING_INSNS when callee is leaf function. In this case the
66 optimizations performed later are very likely to eliminate the cost.
67
68 pass_ipa_inline
69
70 This is the real inliner able to handle inlining with whole program
71 knowledge. It performs following steps:
72
73 1) inlining of small functions. This is implemented by greedy
74 algorithm ordering all inlinable cgraph edges by their badness and
75 inlining them in this order as long as inline limits allows doing so.
76
77 This heuristics is not very good on inlining recursive calls. Recursive
78 calls can be inlined with results similar to loop unrolling. To do so,
79 special purpose recursive inliner is executed on function when
80 recursive edge is met as viable candidate.
81
82 2) Unreachable functions are removed from callgraph. Inlining leads
83 to devirtualization and other modification of callgraph so functions
84 may become unreachable during the process. Also functions declared as
85 extern inline or virtual functions are removed, since after inlining
86 we no longer need the offline bodies.
87
88 3) Functions called once and not exported from the unit are inlined.
89 This should almost always lead to reduction of code size by eliminating
90 the need for offline copy of the function. */
91
92 #include "config.h"
93 #include "system.h"
94 #include "coretypes.h"
95 #include "tm.h"
96 #include "tree.h"
97 #include "trans-mem.h"
98 #include "calls.h"
99 #include "tree-inline.h"
100 #include "langhooks.h"
101 #include "flags.h"
102 #include "diagnostic.h"
103 #include "gimple-pretty-print.h"
104 #include "params.h"
105 #include "fibheap.h"
106 #include "intl.h"
107 #include "tree-pass.h"
108 #include "coverage.h"
109 #include "ggc.h"
110 #include "rtl.h"
111 #include "bitmap.h"
112 #include "gimple.h"
113 #include "gimple-ssa.h"
114 #include "ipa-prop.h"
115 #include "except.h"
116 #include "target.h"
117 #include "ipa-inline.h"
118 #include "ipa-utils.h"
119 #include "sreal.h"
120 #include "cilk.h"
121
122 /* Statistics we collect about inlining algorithm. */
123 static int overall_size;
124 static gcov_type max_count;
125 static sreal max_count_real, max_relbenefit_real, half_int_min_real;
126
127 /* Return false when inlining edge E would lead to violating
128 limits on function unit growth or stack usage growth.
129
130 The relative function body growth limit is present generally
131 to avoid problems with non-linear behavior of the compiler.
132 To allow inlining huge functions into tiny wrapper, the limit
133 is always based on the bigger of the two functions considered.
134
135 For stack growth limits we always base the growth in stack usage
136 of the callers. We want to prevent applications from segfaulting
137 on stack overflow when functions with huge stack frames gets
138 inlined. */
139
140 static bool
141 caller_growth_limits (struct cgraph_edge *e)
142 {
143 struct cgraph_node *to = e->caller;
144 struct cgraph_node *what = cgraph_function_or_thunk_node (e->callee, NULL);
145 int newsize;
146 int limit = 0;
147 HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
148 struct inline_summary *info, *what_info, *outer_info = inline_summary (to);
149
150 /* Look for function e->caller is inlined to. While doing
151 so work out the largest function body on the way. As
152 described above, we want to base our function growth
153 limits based on that. Not on the self size of the
154 outer function, not on the self size of inline code
155 we immediately inline to. This is the most relaxed
156 interpretation of the rule "do not grow large functions
157 too much in order to prevent compiler from exploding". */
158 while (true)
159 {
160 info = inline_summary (to);
161 if (limit < info->self_size)
162 limit = info->self_size;
163 if (stack_size_limit < info->estimated_self_stack_size)
164 stack_size_limit = info->estimated_self_stack_size;
165 if (to->global.inlined_to)
166 to = to->callers->caller;
167 else
168 break;
169 }
170
171 what_info = inline_summary (what);
172
173 if (limit < what_info->self_size)
174 limit = what_info->self_size;
175
176 limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
177
178 /* Check the size after inlining against the function limits. But allow
179 the function to shrink if it went over the limits by forced inlining. */
180 newsize = estimate_size_after_inlining (to, e);
181 if (newsize >= info->size
182 && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
183 && newsize > limit)
184 {
185 e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
186 return false;
187 }
188
189 if (!what_info->estimated_stack_size)
190 return true;
191
192 /* FIXME: Stack size limit often prevents inlining in Fortran programs
193 due to large i/o datastructures used by the Fortran front-end.
194 We ought to ignore this limit when we know that the edge is executed
195 on every invocation of the caller (i.e. its call statement dominates
196 exit block). We do not track this information, yet. */
197 stack_size_limit += ((gcov_type)stack_size_limit
198 * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
199
200 inlined_stack = (outer_info->stack_frame_offset
201 + outer_info->estimated_self_stack_size
202 + what_info->estimated_stack_size);
203 /* Check new stack consumption with stack consumption at the place
204 stack is used. */
205 if (inlined_stack > stack_size_limit
206 /* If function already has large stack usage from sibling
207 inline call, we can inline, too.
208 This bit overoptimistically assume that we are good at stack
209 packing. */
210 && inlined_stack > info->estimated_stack_size
211 && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
212 {
213 e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
214 return false;
215 }
216 return true;
217 }
218
219 /* Dump info about why inlining has failed. */
220
221 static void
222 report_inline_failed_reason (struct cgraph_edge *e)
223 {
224 if (dump_file)
225 {
226 fprintf (dump_file, " not inlinable: %s/%i -> %s/%i, %s\n",
227 xstrdup (e->caller->name ()), e->caller->order,
228 xstrdup (e->callee->name ()), e->callee->order,
229 cgraph_inline_failed_string (e->inline_failed));
230 }
231 }
232
233 /* Decide if we can inline the edge and possibly update
234 inline_failed reason.
235 We check whether inlining is possible at all and whether
236 caller growth limits allow doing so.
237
238 if REPORT is true, output reason to the dump file.
239
240 if DISREGARD_LIMITES is true, ignore size limits.*/
241
242 static bool
243 can_inline_edge_p (struct cgraph_edge *e, bool report,
244 bool disregard_limits = false)
245 {
246 bool inlinable = true;
247 enum availability avail;
248 struct cgraph_node *callee
249 = cgraph_function_or_thunk_node (e->callee, &avail);
250 tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->caller->decl);
251 tree callee_tree
252 = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
253 struct function *caller_cfun = DECL_STRUCT_FUNCTION (e->caller->decl);
254 struct function *callee_cfun
255 = callee ? DECL_STRUCT_FUNCTION (callee->decl) : NULL;
256
257 if (!caller_cfun && e->caller->clone_of)
258 caller_cfun = DECL_STRUCT_FUNCTION (e->caller->clone_of->decl);
259
260 if (!callee_cfun && callee && callee->clone_of)
261 callee_cfun = DECL_STRUCT_FUNCTION (callee->clone_of->decl);
262
263 gcc_assert (e->inline_failed);
264
265 if (!callee || !callee->definition)
266 {
267 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
268 inlinable = false;
269 }
270 else if (!inline_summary (callee)->inlinable
271 || (caller_cfun && fn_contains_cilk_spawn_p (caller_cfun)))
272 {
273 e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
274 inlinable = false;
275 }
276 else if (avail <= AVAIL_OVERWRITABLE)
277 {
278 e->inline_failed = CIF_OVERWRITABLE;
279 inlinable = false;
280 }
281 else if (e->call_stmt_cannot_inline_p)
282 {
283 if (e->inline_failed != CIF_FUNCTION_NOT_OPTIMIZED)
284 e->inline_failed = CIF_MISMATCHED_ARGUMENTS;
285 inlinable = false;
286 }
287 /* Don't inline if the functions have different EH personalities. */
288 else if (DECL_FUNCTION_PERSONALITY (e->caller->decl)
289 && DECL_FUNCTION_PERSONALITY (callee->decl)
290 && (DECL_FUNCTION_PERSONALITY (e->caller->decl)
291 != DECL_FUNCTION_PERSONALITY (callee->decl)))
292 {
293 e->inline_failed = CIF_EH_PERSONALITY;
294 inlinable = false;
295 }
296 /* TM pure functions should not be inlined into non-TM_pure
297 functions. */
298 else if (is_tm_pure (callee->decl)
299 && !is_tm_pure (e->caller->decl))
300 {
301 e->inline_failed = CIF_UNSPECIFIED;
302 inlinable = false;
303 }
304 /* Don't inline if the callee can throw non-call exceptions but the
305 caller cannot.
306 FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is missing.
307 Move the flag into cgraph node or mirror it in the inline summary. */
308 else if (callee_cfun && callee_cfun->can_throw_non_call_exceptions
309 && !(caller_cfun && caller_cfun->can_throw_non_call_exceptions))
310 {
311 e->inline_failed = CIF_NON_CALL_EXCEPTIONS;
312 inlinable = false;
313 }
314 /* Check compatibility of target optimization options. */
315 else if (!targetm.target_option.can_inline_p (e->caller->decl,
316 callee->decl))
317 {
318 e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
319 inlinable = false;
320 }
321 /* Check if caller growth allows the inlining. */
322 else if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
323 && !disregard_limits
324 && !lookup_attribute ("flatten",
325 DECL_ATTRIBUTES
326 (e->caller->global.inlined_to
327 ? e->caller->global.inlined_to->decl
328 : e->caller->decl))
329 && !caller_growth_limits (e))
330 inlinable = false;
331 /* Don't inline a function with a higher optimization level than the
332 caller. FIXME: this is really just tip of iceberg of handling
333 optimization attribute. */
334 else if (caller_tree != callee_tree)
335 {
336 struct cl_optimization *caller_opt
337 = TREE_OPTIMIZATION ((caller_tree)
338 ? caller_tree
339 : optimization_default_node);
340
341 struct cl_optimization *callee_opt
342 = TREE_OPTIMIZATION ((callee_tree)
343 ? callee_tree
344 : optimization_default_node);
345
346 if (((caller_opt->x_optimize > callee_opt->x_optimize)
347 || (caller_opt->x_optimize_size != callee_opt->x_optimize_size))
348 /* gcc.dg/pr43564.c. Look at forced inline even in -O0. */
349 && !DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
350 {
351 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
352 inlinable = false;
353 }
354 }
355
356 if (!inlinable && report)
357 report_inline_failed_reason (e);
358 return inlinable;
359 }
360
361
362 /* Return true if the edge E is inlinable during early inlining. */
363
364 static bool
365 can_early_inline_edge_p (struct cgraph_edge *e)
366 {
367 struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee,
368 NULL);
369 /* Early inliner might get called at WPA stage when IPA pass adds new
370 function. In this case we can not really do any of early inlining
371 because function bodies are missing. */
372 if (!gimple_has_body_p (callee->decl))
373 {
374 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
375 return false;
376 }
377 /* In early inliner some of callees may not be in SSA form yet
378 (i.e. the callgraph is cyclic and we did not process
379 the callee by early inliner, yet). We don't have CIF code for this
380 case; later we will re-do the decision in the real inliner. */
381 if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
382 || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
383 {
384 if (dump_file)
385 fprintf (dump_file, " edge not inlinable: not in SSA form\n");
386 return false;
387 }
388 if (!can_inline_edge_p (e, true))
389 return false;
390 return true;
391 }
392
393
394 /* Return number of calls in N. Ignore cheap builtins. */
395
396 static int
397 num_calls (struct cgraph_node *n)
398 {
399 struct cgraph_edge *e;
400 int num = 0;
401
402 for (e = n->callees; e; e = e->next_callee)
403 if (!is_inexpensive_builtin (e->callee->decl))
404 num++;
405 return num;
406 }
407
408
409 /* Return true if we are interested in inlining small function. */
410
411 static bool
412 want_early_inline_function_p (struct cgraph_edge *e)
413 {
414 bool want_inline = true;
415 struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
416
417 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
418 ;
419 else if (!DECL_DECLARED_INLINE_P (callee->decl)
420 && !flag_inline_small_functions)
421 {
422 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
423 report_inline_failed_reason (e);
424 want_inline = false;
425 }
426 else
427 {
428 int growth = estimate_edge_growth (e);
429 int n;
430
431 if (growth <= 0)
432 ;
433 else if (!cgraph_maybe_hot_edge_p (e)
434 && growth > 0)
435 {
436 if (dump_file)
437 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
438 "call is cold and code would grow by %i\n",
439 xstrdup (e->caller->name ()),
440 e->caller->order,
441 xstrdup (callee->name ()), callee->order,
442 growth);
443 want_inline = false;
444 }
445 else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
446 {
447 if (dump_file)
448 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
449 "growth %i exceeds --param early-inlining-insns\n",
450 xstrdup (e->caller->name ()),
451 e->caller->order,
452 xstrdup (callee->name ()), callee->order,
453 growth);
454 want_inline = false;
455 }
456 else if ((n = num_calls (callee)) != 0
457 && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
458 {
459 if (dump_file)
460 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
461 "growth %i exceeds --param early-inlining-insns "
462 "divided by number of calls\n",
463 xstrdup (e->caller->name ()),
464 e->caller->order,
465 xstrdup (callee->name ()), callee->order,
466 growth);
467 want_inline = false;
468 }
469 }
470 return want_inline;
471 }
472
473 /* Compute time of the edge->caller + edge->callee execution when inlining
474 does not happen. */
475
476 inline gcov_type
477 compute_uninlined_call_time (struct inline_summary *callee_info,
478 struct cgraph_edge *edge)
479 {
480 gcov_type uninlined_call_time =
481 RDIV ((gcov_type)callee_info->time * MAX (edge->frequency, 1),
482 CGRAPH_FREQ_BASE);
483 gcov_type caller_time = inline_summary (edge->caller->global.inlined_to
484 ? edge->caller->global.inlined_to
485 : edge->caller)->time;
486 return uninlined_call_time + caller_time;
487 }
488
489 /* Same as compute_uinlined_call_time but compute time when inlining
490 does happen. */
491
492 inline gcov_type
493 compute_inlined_call_time (struct cgraph_edge *edge,
494 int edge_time)
495 {
496 gcov_type caller_time = inline_summary (edge->caller->global.inlined_to
497 ? edge->caller->global.inlined_to
498 : edge->caller)->time;
499 gcov_type time = (caller_time
500 + RDIV (((gcov_type) edge_time
501 - inline_edge_summary (edge)->call_stmt_time)
502 * MAX (edge->frequency, 1), CGRAPH_FREQ_BASE));
503 /* Possible one roundoff error, but watch for overflows. */
504 gcc_checking_assert (time >= INT_MIN / 2);
505 if (time < 0)
506 time = 0;
507 return time;
508 }
509
510 /* Return true if the speedup for inlining E is bigger than
511 PARAM_MAX_INLINE_MIN_SPEEDUP. */
512
513 static bool
514 big_speedup_p (struct cgraph_edge *e)
515 {
516 gcov_type time = compute_uninlined_call_time (inline_summary (e->callee),
517 e);
518 gcov_type inlined_time = compute_inlined_call_time (e,
519 estimate_edge_time (e));
520 if (time - inlined_time
521 > RDIV (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP), 100))
522 return true;
523 return false;
524 }
525
526 /* Return true if we are interested in inlining small function.
527 When REPORT is true, report reason to dump file. */
528
529 static bool
530 want_inline_small_function_p (struct cgraph_edge *e, bool report)
531 {
532 bool want_inline = true;
533 struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
534
535 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
536 ;
537 else if (!DECL_DECLARED_INLINE_P (callee->decl)
538 && !flag_inline_small_functions)
539 {
540 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
541 want_inline = false;
542 }
543 else
544 {
545 int growth = estimate_edge_growth (e);
546 inline_hints hints = estimate_edge_hints (e);
547 bool big_speedup = big_speedup_p (e);
548
549 if (growth <= 0)
550 ;
551 /* Apply MAX_INLINE_INSNS_SINGLE limit. Do not do so when
552 hints suggests that inlining given function is very profitable. */
553 else if (DECL_DECLARED_INLINE_P (callee->decl)
554 && growth >= MAX_INLINE_INSNS_SINGLE
555 && !big_speedup
556 && !(hints & (INLINE_HINT_indirect_call
557 | INLINE_HINT_loop_iterations
558 | INLINE_HINT_array_index
559 | INLINE_HINT_loop_stride)))
560 {
561 e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
562 want_inline = false;
563 }
564 /* Before giving up based on fact that caller size will grow, allow
565 functions that are called few times and eliminating the offline
566 copy will lead to overall code size reduction.
567 Not all of these will be handled by subsequent inlining of functions
568 called once: in particular weak functions are not handled or funcitons
569 that inline to multiple calls but a lot of bodies is optimized out.
570 Finally we want to inline earlier to allow inlining of callbacks.
571
572 This is slightly wrong on aggressive side: it is entirely possible
573 that function is called many times with a context where inlining
574 reduces code size and few times with a context where inlining increase
575 code size. Resoluting growth estimate will be negative even if it
576 would make more sense to keep offline copy and do not inline into the
577 call sites that makes the code size grow.
578
579 When badness orders the calls in a way that code reducing calls come
580 first, this situation is not a problem at all: after inlining all
581 "good" calls, we will realize that keeping the function around is
582 better. */
583 else if (growth <= MAX_INLINE_INSNS_SINGLE
584 /* Unlike for functions called once, we play unsafe with
585 COMDATs. We can allow that since we know functions
586 in consideration are small (and thus risk is small) and
587 moreover grow estimates already accounts that COMDAT
588 functions may or may not disappear when eliminated from
589 current unit. With good probability making aggressive
590 choice in all units is going to make overall program
591 smaller.
592
593 Consequently we ask cgraph_can_remove_if_no_direct_calls_p
594 instead of
595 cgraph_will_be_removed_from_program_if_no_direct_calls */
596 && !DECL_EXTERNAL (callee->decl)
597 && cgraph_can_remove_if_no_direct_calls_p (callee)
598 && estimate_growth (callee) <= 0)
599 ;
600 else if (!DECL_DECLARED_INLINE_P (callee->decl)
601 && !flag_inline_functions)
602 {
603 e->inline_failed = CIF_NOT_DECLARED_INLINED;
604 want_inline = false;
605 }
606 /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
607 Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
608 inlining given function is very profitable. */
609 else if (!DECL_DECLARED_INLINE_P (callee->decl)
610 && !big_speedup
611 && growth >= ((hints & (INLINE_HINT_indirect_call
612 | INLINE_HINT_loop_iterations
613 | INLINE_HINT_array_index
614 | INLINE_HINT_loop_stride))
615 ? MAX (MAX_INLINE_INSNS_AUTO,
616 MAX_INLINE_INSNS_SINGLE)
617 : MAX_INLINE_INSNS_AUTO))
618 {
619 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
620 want_inline = false;
621 }
622 /* If call is cold, do not inline when function body would grow. */
623 else if (!cgraph_maybe_hot_edge_p (e))
624 {
625 e->inline_failed = CIF_UNLIKELY_CALL;
626 want_inline = false;
627 }
628 }
629 if (!want_inline && report)
630 report_inline_failed_reason (e);
631 return want_inline;
632 }
633
634 /* EDGE is self recursive edge.
635 We hand two cases - when function A is inlining into itself
636 or when function A is being inlined into another inliner copy of function
637 A within function B.
638
639 In first case OUTER_NODE points to the toplevel copy of A, while
640 in the second case OUTER_NODE points to the outermost copy of A in B.
641
642 In both cases we want to be extra selective since
643 inlining the call will just introduce new recursive calls to appear. */
644
645 static bool
646 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
647 struct cgraph_node *outer_node,
648 bool peeling,
649 int depth)
650 {
651 char const *reason = NULL;
652 bool want_inline = true;
653 int caller_freq = CGRAPH_FREQ_BASE;
654 int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
655
656 if (DECL_DECLARED_INLINE_P (edge->caller->decl))
657 max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
658
659 if (!cgraph_maybe_hot_edge_p (edge))
660 {
661 reason = "recursive call is cold";
662 want_inline = false;
663 }
664 else if (max_count && !outer_node->count)
665 {
666 reason = "not executed in profile";
667 want_inline = false;
668 }
669 else if (depth > max_depth)
670 {
671 reason = "--param max-inline-recursive-depth exceeded.";
672 want_inline = false;
673 }
674
675 if (outer_node->global.inlined_to)
676 caller_freq = outer_node->callers->frequency;
677
678 if (!want_inline)
679 ;
680 /* Inlining of self recursive function into copy of itself within other function
681 is transformation similar to loop peeling.
682
683 Peeling is profitable if we can inline enough copies to make probability
684 of actual call to the self recursive function very small. Be sure that
685 the probability of recursion is small.
686
687 We ensure that the frequency of recursing is at most 1 - (1/max_depth).
688 This way the expected number of recision is at most max_depth. */
689 else if (peeling)
690 {
691 int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
692 / max_depth);
693 int i;
694 for (i = 1; i < depth; i++)
695 max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
696 if (max_count
697 && (edge->count * CGRAPH_FREQ_BASE / outer_node->count
698 >= max_prob))
699 {
700 reason = "profile of recursive call is too large";
701 want_inline = false;
702 }
703 if (!max_count
704 && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq
705 >= max_prob))
706 {
707 reason = "frequency of recursive call is too large";
708 want_inline = false;
709 }
710 }
711 /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
712 depth is large. We reduce function call overhead and increase chances that
713 things fit in hardware return predictor.
714
715 Recursive inlining might however increase cost of stack frame setup
716 actually slowing down functions whose recursion tree is wide rather than
717 deep.
718
719 Deciding reliably on when to do recursive inlining without profile feedback
720 is tricky. For now we disable recursive inlining when probability of self
721 recursion is low.
722
723 Recursive inlining of self recursive call within loop also results in large loop
724 depths that generally optimize badly. We may want to throttle down inlining
725 in those cases. In particular this seems to happen in one of libstdc++ rb tree
726 methods. */
727 else
728 {
729 if (max_count
730 && (edge->count * 100 / outer_node->count
731 <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
732 {
733 reason = "profile of recursive call is too small";
734 want_inline = false;
735 }
736 else if (!max_count
737 && (edge->frequency * 100 / caller_freq
738 <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
739 {
740 reason = "frequency of recursive call is too small";
741 want_inline = false;
742 }
743 }
744 if (!want_inline && dump_file)
745 fprintf (dump_file, " not inlining recursively: %s\n", reason);
746 return want_inline;
747 }
748
749 /* Return true when NODE has uninlinable caller;
750 set HAS_HOT_CALL if it has hot call.
751 Worker for cgraph_for_node_and_aliases. */
752
753 static bool
754 check_callers (struct cgraph_node *node, void *has_hot_call)
755 {
756 struct cgraph_edge *e;
757 for (e = node->callers; e; e = e->next_caller)
758 {
759 if (!can_inline_edge_p (e, true))
760 return true;
761 if (!has_hot_call && cgraph_maybe_hot_edge_p (e))
762 *(bool *)has_hot_call = true;
763 }
764 return false;
765 }
766
767 /* If NODE has a caller, return true. */
768
769 static bool
770 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
771 {
772 if (node->callers)
773 return true;
774 return false;
775 }
776
777 /* Decide if inlining NODE would reduce unit size by eliminating
778 the offline copy of function.
779 When COLD is true the cold calls are considered, too. */
780
781 static bool
782 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
783 {
784 struct cgraph_node *function = cgraph_function_or_thunk_node (node, NULL);
785 bool has_hot_call = false;
786
787 /* Does it have callers? */
788 if (!cgraph_for_node_and_aliases (node, has_caller_p, NULL, true))
789 return false;
790 /* Already inlined? */
791 if (function->global.inlined_to)
792 return false;
793 if (cgraph_function_or_thunk_node (node, NULL) != node)
794 return false;
795 /* Inlining into all callers would increase size? */
796 if (estimate_growth (node) > 0)
797 return false;
798 /* All inlines must be possible. */
799 if (cgraph_for_node_and_aliases (node, check_callers, &has_hot_call, true))
800 return false;
801 if (!cold && !has_hot_call)
802 return false;
803 return true;
804 }
805
806 #define RELATIVE_TIME_BENEFIT_RANGE (INT_MAX / 64)
807
808 /* Return relative time improvement for inlining EDGE in range
809 1...RELATIVE_TIME_BENEFIT_RANGE */
810
811 static inline int
812 relative_time_benefit (struct inline_summary *callee_info,
813 struct cgraph_edge *edge,
814 int edge_time)
815 {
816 gcov_type relbenefit;
817 gcov_type uninlined_call_time = compute_uninlined_call_time (callee_info, edge);
818 gcov_type inlined_call_time = compute_inlined_call_time (edge, edge_time);
819
820 /* Inlining into extern inline function is not a win. */
821 if (DECL_EXTERNAL (edge->caller->global.inlined_to
822 ? edge->caller->global.inlined_to->decl
823 : edge->caller->decl))
824 return 1;
825
826 /* Watch overflows. */
827 gcc_checking_assert (uninlined_call_time >= 0);
828 gcc_checking_assert (inlined_call_time >= 0);
829 gcc_checking_assert (uninlined_call_time >= inlined_call_time);
830
831 /* Compute relative time benefit, i.e. how much the call becomes faster.
832 ??? perhaps computing how much the caller+calle together become faster
833 would lead to more realistic results. */
834 if (!uninlined_call_time)
835 uninlined_call_time = 1;
836 relbenefit =
837 RDIV (((gcov_type)uninlined_call_time - inlined_call_time) * RELATIVE_TIME_BENEFIT_RANGE,
838 uninlined_call_time);
839 relbenefit = MIN (relbenefit, RELATIVE_TIME_BENEFIT_RANGE);
840 gcc_checking_assert (relbenefit >= 0);
841 relbenefit = MAX (relbenefit, 1);
842 return relbenefit;
843 }
844
845
846 /* A cost model driving the inlining heuristics in a way so the edges with
847 smallest badness are inlined first. After each inlining is performed
848 the costs of all caller edges of nodes affected are recomputed so the
849 metrics may accurately depend on values such as number of inlinable callers
850 of the function or function body size. */
851
852 static int
853 edge_badness (struct cgraph_edge *edge, bool dump)
854 {
855 gcov_type badness;
856 int growth, edge_time;
857 struct cgraph_node *callee = cgraph_function_or_thunk_node (edge->callee,
858 NULL);
859 struct inline_summary *callee_info = inline_summary (callee);
860 inline_hints hints;
861
862 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
863 return INT_MIN;
864
865 growth = estimate_edge_growth (edge);
866 edge_time = estimate_edge_time (edge);
867 hints = estimate_edge_hints (edge);
868 gcc_checking_assert (edge_time >= 0);
869 gcc_checking_assert (edge_time <= callee_info->time);
870 gcc_checking_assert (growth <= callee_info->size);
871
872 if (dump)
873 {
874 fprintf (dump_file, " Badness calculation for %s/%i -> %s/%i\n",
875 xstrdup (edge->caller->name ()),
876 edge->caller->order,
877 xstrdup (callee->name ()),
878 edge->callee->order);
879 fprintf (dump_file, " size growth %i, time %i ",
880 growth,
881 edge_time);
882 dump_inline_hints (dump_file, hints);
883 if (big_speedup_p (edge))
884 fprintf (dump_file, " big_speedup");
885 fprintf (dump_file, "\n");
886 }
887
888 /* Always prefer inlining saving code size. */
889 if (growth <= 0)
890 {
891 badness = INT_MIN / 2 + growth;
892 if (dump)
893 fprintf (dump_file, " %i: Growth %i <= 0\n", (int) badness,
894 growth);
895 }
896
897 /* When profiling is available, compute badness as:
898
899 relative_edge_count * relative_time_benefit
900 goodness = -------------------------------------------
901 growth_f_caller
902 badness = -goodness
903
904 The fraction is upside down, because on edge counts and time beneits
905 the bounds are known. Edge growth is essentially unlimited. */
906
907 else if (max_count)
908 {
909 sreal tmp, relbenefit_real, growth_real;
910 int relbenefit = relative_time_benefit (callee_info, edge, edge_time);
911 /* Capping edge->count to max_count. edge->count can be larger than
912 max_count if an inline adds new edges which increase max_count
913 after max_count is computed. */
914 gcov_type edge_count = edge->count > max_count ? max_count : edge->count;
915
916 sreal_init (&relbenefit_real, relbenefit, 0);
917 sreal_init (&growth_real, growth, 0);
918
919 /* relative_edge_count. */
920 sreal_init (&tmp, edge_count, 0);
921 sreal_div (&tmp, &tmp, &max_count_real);
922
923 /* relative_time_benefit. */
924 sreal_mul (&tmp, &tmp, &relbenefit_real);
925 sreal_div (&tmp, &tmp, &max_relbenefit_real);
926
927 /* growth_f_caller. */
928 sreal_mul (&tmp, &tmp, &half_int_min_real);
929 sreal_div (&tmp, &tmp, &growth_real);
930
931 badness = -1 * sreal_to_int (&tmp);
932
933 if (dump)
934 {
935 fprintf (dump_file,
936 " %i (relative %f): profile info. Relative count %f%s"
937 " * Relative benefit %f\n",
938 (int) badness, (double) badness / INT_MIN,
939 (double) edge_count / max_count,
940 edge->count > max_count ? " (capped to max_count)" : "",
941 relbenefit * 100.0 / RELATIVE_TIME_BENEFIT_RANGE);
942 }
943 }
944
945 /* When function local profile is available. Compute badness as:
946
947 relative_time_benefit
948 goodness = ---------------------------------
949 growth_of_caller * overall_growth
950
951 badness = - goodness
952
953 compensated by the inline hints.
954 */
955 else if (flag_guess_branch_prob)
956 {
957 badness = (relative_time_benefit (callee_info, edge, edge_time)
958 * (INT_MIN / 16 / RELATIVE_TIME_BENEFIT_RANGE));
959 badness /= (MIN (65536/2, growth) * MIN (65536/2, MAX (1, callee_info->growth)));
960 gcc_checking_assert (badness <=0 && badness >= INT_MIN / 16);
961 if ((hints & (INLINE_HINT_indirect_call
962 | INLINE_HINT_loop_iterations
963 | INLINE_HINT_array_index
964 | INLINE_HINT_loop_stride))
965 || callee_info->growth <= 0)
966 badness *= 8;
967 if (hints & (INLINE_HINT_same_scc))
968 badness /= 16;
969 else if (hints & (INLINE_HINT_in_scc))
970 badness /= 8;
971 else if (hints & (INLINE_HINT_cross_module))
972 badness /= 2;
973 gcc_checking_assert (badness <= 0 && badness >= INT_MIN / 2);
974 if ((hints & INLINE_HINT_declared_inline) && badness >= INT_MIN / 32)
975 badness *= 16;
976 if (dump)
977 {
978 fprintf (dump_file,
979 " %i: guessed profile. frequency %f,"
980 " benefit %f%%, time w/o inlining %i, time w inlining %i"
981 " overall growth %i (current) %i (original)\n",
982 (int) badness, (double)edge->frequency / CGRAPH_FREQ_BASE,
983 relative_time_benefit (callee_info, edge, edge_time) * 100.0
984 / RELATIVE_TIME_BENEFIT_RANGE,
985 (int)compute_uninlined_call_time (callee_info, edge),
986 (int)compute_inlined_call_time (edge, edge_time),
987 estimate_growth (callee),
988 callee_info->growth);
989 }
990 }
991 /* When function local profile is not available or it does not give
992 useful information (ie frequency is zero), base the cost on
993 loop nest and overall size growth, so we optimize for overall number
994 of functions fully inlined in program. */
995 else
996 {
997 int nest = MIN (inline_edge_summary (edge)->loop_depth, 8);
998 badness = growth * 256;
999
1000 /* Decrease badness if call is nested. */
1001 if (badness > 0)
1002 badness >>= nest;
1003 else
1004 {
1005 badness <<= nest;
1006 }
1007 if (dump)
1008 fprintf (dump_file, " %i: no profile. nest %i\n", (int) badness,
1009 nest);
1010 }
1011
1012 /* Ensure that we did not overflow in all the fixed point math above. */
1013 gcc_assert (badness >= INT_MIN);
1014 gcc_assert (badness <= INT_MAX - 1);
1015 /* Make recursive inlining happen always after other inlining is done. */
1016 if (cgraph_edge_recursive_p (edge))
1017 return badness + 1;
1018 else
1019 return badness;
1020 }
1021
1022 /* Recompute badness of EDGE and update its key in HEAP if needed. */
1023 static inline void
1024 update_edge_key (fibheap_t heap, struct cgraph_edge *edge)
1025 {
1026 int badness = edge_badness (edge, false);
1027 if (edge->aux)
1028 {
1029 fibnode_t n = (fibnode_t) edge->aux;
1030 gcc_checking_assert (n->data == edge);
1031
1032 /* fibheap_replace_key only decrease the keys.
1033 When we increase the key we do not update heap
1034 and instead re-insert the element once it becomes
1035 a minimum of heap. */
1036 if (badness < n->key)
1037 {
1038 if (dump_file && (dump_flags & TDF_DETAILS))
1039 {
1040 fprintf (dump_file,
1041 " decreasing badness %s/%i -> %s/%i, %i to %i\n",
1042 xstrdup (edge->caller->name ()),
1043 edge->caller->order,
1044 xstrdup (edge->callee->name ()),
1045 edge->callee->order,
1046 (int)n->key,
1047 badness);
1048 }
1049 fibheap_replace_key (heap, n, badness);
1050 gcc_checking_assert (n->key == badness);
1051 }
1052 }
1053 else
1054 {
1055 if (dump_file && (dump_flags & TDF_DETAILS))
1056 {
1057 fprintf (dump_file,
1058 " enqueuing call %s/%i -> %s/%i, badness %i\n",
1059 xstrdup (edge->caller->name ()),
1060 edge->caller->order,
1061 xstrdup (edge->callee->name ()),
1062 edge->callee->order,
1063 badness);
1064 }
1065 edge->aux = fibheap_insert (heap, badness, edge);
1066 }
1067 }
1068
1069
1070 /* NODE was inlined.
1071 All caller edges needs to be resetted because
1072 size estimates change. Similarly callees needs reset
1073 because better context may be known. */
1074
1075 static void
1076 reset_edge_caches (struct cgraph_node *node)
1077 {
1078 struct cgraph_edge *edge;
1079 struct cgraph_edge *e = node->callees;
1080 struct cgraph_node *where = node;
1081 int i;
1082 struct ipa_ref *ref;
1083
1084 if (where->global.inlined_to)
1085 where = where->global.inlined_to;
1086
1087 /* WHERE body size has changed, the cached growth is invalid. */
1088 reset_node_growth_cache (where);
1089
1090 for (edge = where->callers; edge; edge = edge->next_caller)
1091 if (edge->inline_failed)
1092 reset_edge_growth_cache (edge);
1093 for (i = 0; ipa_ref_list_referring_iterate (&where->ref_list,
1094 i, ref); i++)
1095 if (ref->use == IPA_REF_ALIAS)
1096 reset_edge_caches (ipa_ref_referring_node (ref));
1097
1098 if (!e)
1099 return;
1100
1101 while (true)
1102 if (!e->inline_failed && e->callee->callees)
1103 e = e->callee->callees;
1104 else
1105 {
1106 if (e->inline_failed)
1107 reset_edge_growth_cache (e);
1108 if (e->next_callee)
1109 e = e->next_callee;
1110 else
1111 {
1112 do
1113 {
1114 if (e->caller == node)
1115 return;
1116 e = e->caller->callers;
1117 }
1118 while (!e->next_callee);
1119 e = e->next_callee;
1120 }
1121 }
1122 }
1123
1124 /* Recompute HEAP nodes for each of caller of NODE.
1125 UPDATED_NODES track nodes we already visited, to avoid redundant work.
1126 When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1127 it is inlinable. Otherwise check all edges. */
1128
1129 static void
1130 update_caller_keys (fibheap_t heap, struct cgraph_node *node,
1131 bitmap updated_nodes,
1132 struct cgraph_edge *check_inlinablity_for)
1133 {
1134 struct cgraph_edge *edge;
1135 int i;
1136 struct ipa_ref *ref;
1137
1138 if ((!node->alias && !inline_summary (node)->inlinable)
1139 || node->global.inlined_to)
1140 return;
1141 if (!bitmap_set_bit (updated_nodes, node->uid))
1142 return;
1143
1144 for (i = 0; ipa_ref_list_referring_iterate (&node->ref_list,
1145 i, ref); i++)
1146 if (ref->use == IPA_REF_ALIAS)
1147 {
1148 struct cgraph_node *alias = ipa_ref_referring_node (ref);
1149 update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1150 }
1151
1152 for (edge = node->callers; edge; edge = edge->next_caller)
1153 if (edge->inline_failed)
1154 {
1155 if (!check_inlinablity_for
1156 || check_inlinablity_for == edge)
1157 {
1158 if (can_inline_edge_p (edge, false)
1159 && want_inline_small_function_p (edge, false))
1160 update_edge_key (heap, edge);
1161 else if (edge->aux)
1162 {
1163 report_inline_failed_reason (edge);
1164 fibheap_delete_node (heap, (fibnode_t) edge->aux);
1165 edge->aux = NULL;
1166 }
1167 }
1168 else if (edge->aux)
1169 update_edge_key (heap, edge);
1170 }
1171 }
1172
1173 /* Recompute HEAP nodes for each uninlined call in NODE.
1174 This is used when we know that edge badnesses are going only to increase
1175 (we introduced new call site) and thus all we need is to insert newly
1176 created edges into heap. */
1177
1178 static void
1179 update_callee_keys (fibheap_t heap, struct cgraph_node *node,
1180 bitmap updated_nodes)
1181 {
1182 struct cgraph_edge *e = node->callees;
1183
1184 if (!e)
1185 return;
1186 while (true)
1187 if (!e->inline_failed && e->callee->callees)
1188 e = e->callee->callees;
1189 else
1190 {
1191 enum availability avail;
1192 struct cgraph_node *callee;
1193 /* We do not reset callee growth cache here. Since we added a new call,
1194 growth chould have just increased and consequentely badness metric
1195 don't need updating. */
1196 if (e->inline_failed
1197 && (callee = cgraph_function_or_thunk_node (e->callee, &avail))
1198 && inline_summary (callee)->inlinable
1199 && avail >= AVAIL_AVAILABLE
1200 && !bitmap_bit_p (updated_nodes, callee->uid))
1201 {
1202 if (can_inline_edge_p (e, false)
1203 && want_inline_small_function_p (e, false))
1204 update_edge_key (heap, e);
1205 else if (e->aux)
1206 {
1207 report_inline_failed_reason (e);
1208 fibheap_delete_node (heap, (fibnode_t) e->aux);
1209 e->aux = NULL;
1210 }
1211 }
1212 if (e->next_callee)
1213 e = e->next_callee;
1214 else
1215 {
1216 do
1217 {
1218 if (e->caller == node)
1219 return;
1220 e = e->caller->callers;
1221 }
1222 while (!e->next_callee);
1223 e = e->next_callee;
1224 }
1225 }
1226 }
1227
1228 /* Enqueue all recursive calls from NODE into priority queue depending on
1229 how likely we want to recursively inline the call. */
1230
1231 static void
1232 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1233 fibheap_t heap)
1234 {
1235 struct cgraph_edge *e;
1236 enum availability avail;
1237
1238 for (e = where->callees; e; e = e->next_callee)
1239 if (e->callee == node
1240 || (cgraph_function_or_thunk_node (e->callee, &avail) == node
1241 && avail > AVAIL_OVERWRITABLE))
1242 {
1243 /* When profile feedback is available, prioritize by expected number
1244 of calls. */
1245 fibheap_insert (heap,
1246 !max_count ? -e->frequency
1247 : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
1248 e);
1249 }
1250 for (e = where->callees; e; e = e->next_callee)
1251 if (!e->inline_failed)
1252 lookup_recursive_calls (node, e->callee, heap);
1253 }
1254
1255 /* Decide on recursive inlining: in the case function has recursive calls,
1256 inline until body size reaches given argument. If any new indirect edges
1257 are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1258 is NULL. */
1259
1260 static bool
1261 recursive_inlining (struct cgraph_edge *edge,
1262 vec<cgraph_edge_p> *new_edges)
1263 {
1264 int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1265 fibheap_t heap;
1266 struct cgraph_node *node;
1267 struct cgraph_edge *e;
1268 struct cgraph_node *master_clone = NULL, *next;
1269 int depth = 0;
1270 int n = 0;
1271
1272 node = edge->caller;
1273 if (node->global.inlined_to)
1274 node = node->global.inlined_to;
1275
1276 if (DECL_DECLARED_INLINE_P (node->decl))
1277 limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1278
1279 /* Make sure that function is small enough to be considered for inlining. */
1280 if (estimate_size_after_inlining (node, edge) >= limit)
1281 return false;
1282 heap = fibheap_new ();
1283 lookup_recursive_calls (node, node, heap);
1284 if (fibheap_empty (heap))
1285 {
1286 fibheap_delete (heap);
1287 return false;
1288 }
1289
1290 if (dump_file)
1291 fprintf (dump_file,
1292 " Performing recursive inlining on %s\n",
1293 node->name ());
1294
1295 /* Do the inlining and update list of recursive call during process. */
1296 while (!fibheap_empty (heap))
1297 {
1298 struct cgraph_edge *curr
1299 = (struct cgraph_edge *) fibheap_extract_min (heap);
1300 struct cgraph_node *cnode, *dest = curr->callee;
1301
1302 if (!can_inline_edge_p (curr, true))
1303 continue;
1304
1305 /* MASTER_CLONE is produced in the case we already started modified
1306 the function. Be sure to redirect edge to the original body before
1307 estimating growths otherwise we will be seeing growths after inlining
1308 the already modified body. */
1309 if (master_clone)
1310 {
1311 cgraph_redirect_edge_callee (curr, master_clone);
1312 reset_edge_growth_cache (curr);
1313 }
1314
1315 if (estimate_size_after_inlining (node, curr) > limit)
1316 {
1317 cgraph_redirect_edge_callee (curr, dest);
1318 reset_edge_growth_cache (curr);
1319 break;
1320 }
1321
1322 depth = 1;
1323 for (cnode = curr->caller;
1324 cnode->global.inlined_to; cnode = cnode->callers->caller)
1325 if (node->decl
1326 == cgraph_function_or_thunk_node (curr->callee, NULL)->decl)
1327 depth++;
1328
1329 if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1330 {
1331 cgraph_redirect_edge_callee (curr, dest);
1332 reset_edge_growth_cache (curr);
1333 continue;
1334 }
1335
1336 if (dump_file)
1337 {
1338 fprintf (dump_file,
1339 " Inlining call of depth %i", depth);
1340 if (node->count)
1341 {
1342 fprintf (dump_file, " called approx. %.2f times per call",
1343 (double)curr->count / node->count);
1344 }
1345 fprintf (dump_file, "\n");
1346 }
1347 if (!master_clone)
1348 {
1349 /* We need original clone to copy around. */
1350 master_clone = cgraph_clone_node (node, node->decl,
1351 node->count, CGRAPH_FREQ_BASE,
1352 false, vNULL, true, NULL);
1353 for (e = master_clone->callees; e; e = e->next_callee)
1354 if (!e->inline_failed)
1355 clone_inlined_nodes (e, true, false, NULL);
1356 cgraph_redirect_edge_callee (curr, master_clone);
1357 reset_edge_growth_cache (curr);
1358 }
1359
1360 inline_call (curr, false, new_edges, &overall_size, true);
1361 lookup_recursive_calls (node, curr->callee, heap);
1362 n++;
1363 }
1364
1365 if (!fibheap_empty (heap) && dump_file)
1366 fprintf (dump_file, " Recursive inlining growth limit met.\n");
1367 fibheap_delete (heap);
1368
1369 if (!master_clone)
1370 return false;
1371
1372 if (dump_file)
1373 fprintf (dump_file,
1374 "\n Inlined %i times, "
1375 "body grown from size %i to %i, time %i to %i\n", n,
1376 inline_summary (master_clone)->size, inline_summary (node)->size,
1377 inline_summary (master_clone)->time, inline_summary (node)->time);
1378
1379 /* Remove master clone we used for inlining. We rely that clones inlined
1380 into master clone gets queued just before master clone so we don't
1381 need recursion. */
1382 for (node = cgraph_first_function (); node != master_clone;
1383 node = next)
1384 {
1385 next = cgraph_next_function (node);
1386 if (node->global.inlined_to == master_clone)
1387 cgraph_remove_node (node);
1388 }
1389 cgraph_remove_node (master_clone);
1390 return true;
1391 }
1392
1393
1394 /* Given whole compilation unit estimate of INSNS, compute how large we can
1395 allow the unit to grow. */
1396
1397 static int
1398 compute_max_insns (int insns)
1399 {
1400 int max_insns = insns;
1401 if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1402 max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1403
1404 return ((HOST_WIDEST_INT) max_insns
1405 * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1406 }
1407
1408
1409 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */
1410
1411 static void
1412 add_new_edges_to_heap (fibheap_t heap, vec<cgraph_edge_p> new_edges)
1413 {
1414 while (new_edges.length () > 0)
1415 {
1416 struct cgraph_edge *edge = new_edges.pop ();
1417
1418 gcc_assert (!edge->aux);
1419 if (edge->inline_failed
1420 && can_inline_edge_p (edge, true)
1421 && want_inline_small_function_p (edge, true))
1422 edge->aux = fibheap_insert (heap, edge_badness (edge, false), edge);
1423 }
1424 }
1425
1426 /* Remove EDGE from the fibheap. */
1427
1428 static void
1429 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1430 {
1431 if (e->callee)
1432 reset_node_growth_cache (e->callee);
1433 if (e->aux)
1434 {
1435 fibheap_delete_node ((fibheap_t)data, (fibnode_t)e->aux);
1436 e->aux = NULL;
1437 }
1438 }
1439
1440 /* Return true if speculation of edge E seems useful.
1441 If ANTICIPATE_INLINING is true, be conservative and hope that E
1442 may get inlined. */
1443
1444 bool
1445 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1446 {
1447 enum availability avail;
1448 struct cgraph_node *target = cgraph_function_or_thunk_node (e->callee, &avail);
1449 struct cgraph_edge *direct, *indirect;
1450 struct ipa_ref *ref;
1451
1452 gcc_assert (e->speculative && !e->indirect_unknown_callee);
1453
1454 if (!cgraph_maybe_hot_edge_p (e))
1455 return false;
1456
1457 /* See if IP optimizations found something potentially useful about the
1458 function. For now we look only for CONST/PURE flags. Almost everything
1459 else we propagate is useless. */
1460 if (avail >= AVAIL_AVAILABLE)
1461 {
1462 int ecf_flags = flags_from_decl_or_type (target->decl);
1463 if (ecf_flags & ECF_CONST)
1464 {
1465 cgraph_speculative_call_info (e, direct, indirect, ref);
1466 if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1467 return true;
1468 }
1469 else if (ecf_flags & ECF_PURE)
1470 {
1471 cgraph_speculative_call_info (e, direct, indirect, ref);
1472 if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1473 return true;
1474 }
1475 }
1476 /* If we did not managed to inline the function nor redirect
1477 to an ipa-cp clone (that are seen by having local flag set),
1478 it is probably pointless to inline it unless hardware is missing
1479 indirect call predictor. */
1480 if (!anticipate_inlining && e->inline_failed && !target->local.local)
1481 return false;
1482 /* For overwritable targets there is not much to do. */
1483 if (e->inline_failed && !can_inline_edge_p (e, false, true))
1484 return false;
1485 /* OK, speculation seems interesting. */
1486 return true;
1487 }
1488
1489 /* We know that EDGE is not going to be inlined.
1490 See if we can remove speculation. */
1491
1492 static void
1493 resolve_noninline_speculation (fibheap_t edge_heap, struct cgraph_edge *edge)
1494 {
1495 if (edge->speculative && !speculation_useful_p (edge, false))
1496 {
1497 struct cgraph_node *node = edge->caller;
1498 struct cgraph_node *where = node->global.inlined_to
1499 ? node->global.inlined_to : node;
1500 bitmap updated_nodes = BITMAP_ALLOC (NULL);
1501
1502 cgraph_resolve_speculation (edge, NULL);
1503 reset_edge_caches (where);
1504 inline_update_overall_summary (where);
1505 update_caller_keys (edge_heap, where,
1506 updated_nodes, NULL);
1507 update_callee_keys (edge_heap, where,
1508 updated_nodes);
1509 BITMAP_FREE (updated_nodes);
1510 }
1511 }
1512
1513 /* We use greedy algorithm for inlining of small functions:
1514 All inline candidates are put into prioritized heap ordered in
1515 increasing badness.
1516
1517 The inlining of small functions is bounded by unit growth parameters. */
1518
1519 static void
1520 inline_small_functions (void)
1521 {
1522 struct cgraph_node *node;
1523 struct cgraph_edge *edge;
1524 fibheap_t edge_heap = fibheap_new ();
1525 bitmap updated_nodes = BITMAP_ALLOC (NULL);
1526 int min_size, max_size;
1527 auto_vec<cgraph_edge_p> new_indirect_edges;
1528 int initial_size = 0;
1529 struct cgraph_node **order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
1530 struct cgraph_edge_hook_list *edge_removal_hook_holder;
1531
1532 if (flag_indirect_inlining)
1533 new_indirect_edges.create (8);
1534
1535 edge_removal_hook_holder
1536 = cgraph_add_edge_removal_hook (&heap_edge_removal_hook, edge_heap);
1537
1538 /* Compute overall unit size and other global parameters used by badness
1539 metrics. */
1540
1541 max_count = 0;
1542 ipa_reduced_postorder (order, true, true, NULL);
1543 free (order);
1544
1545 FOR_EACH_DEFINED_FUNCTION (node)
1546 if (!node->global.inlined_to)
1547 {
1548 if (cgraph_function_with_gimple_body_p (node)
1549 || node->thunk.thunk_p)
1550 {
1551 struct inline_summary *info = inline_summary (node);
1552 struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1553
1554 if (!DECL_EXTERNAL (node->decl))
1555 initial_size += info->size;
1556 info->growth = estimate_growth (node);
1557 if (dfs && dfs->next_cycle)
1558 {
1559 struct cgraph_node *n2;
1560 int id = dfs->scc_no + 1;
1561 for (n2 = node; n2;
1562 n2 = ((struct ipa_dfs_info *) node->aux)->next_cycle)
1563 {
1564 struct inline_summary *info2 = inline_summary (n2);
1565 if (info2->scc_no)
1566 break;
1567 info2->scc_no = id;
1568 }
1569 }
1570 }
1571
1572 for (edge = node->callers; edge; edge = edge->next_caller)
1573 if (max_count < edge->count)
1574 max_count = edge->count;
1575 }
1576 sreal_init (&max_count_real, max_count, 0);
1577 sreal_init (&max_relbenefit_real, RELATIVE_TIME_BENEFIT_RANGE, 0);
1578 sreal_init (&half_int_min_real, INT_MAX / 2, 0);
1579 ipa_free_postorder_info ();
1580 initialize_growth_caches ();
1581
1582 if (dump_file)
1583 fprintf (dump_file,
1584 "\nDeciding on inlining of small functions. Starting with size %i.\n",
1585 initial_size);
1586
1587 overall_size = initial_size;
1588 max_size = compute_max_insns (overall_size);
1589 min_size = overall_size;
1590
1591 /* Populate the heeap with all edges we might inline. */
1592
1593 FOR_EACH_DEFINED_FUNCTION (node)
1594 {
1595 bool update = false;
1596 struct cgraph_edge *next;
1597
1598 if (dump_file)
1599 fprintf (dump_file, "Enqueueing calls in %s/%i.\n",
1600 node->name (), node->order);
1601
1602 for (edge = node->callees; edge; edge = next)
1603 {
1604 next = edge->next_callee;
1605 if (edge->inline_failed
1606 && !edge->aux
1607 && can_inline_edge_p (edge, true)
1608 && want_inline_small_function_p (edge, true)
1609 && edge->inline_failed)
1610 {
1611 gcc_assert (!edge->aux);
1612 update_edge_key (edge_heap, edge);
1613 }
1614 if (edge->speculative && !speculation_useful_p (edge, edge->aux != NULL))
1615 {
1616 cgraph_resolve_speculation (edge, NULL);
1617 update = true;
1618 }
1619 }
1620 if (update)
1621 {
1622 struct cgraph_node *where = node->global.inlined_to
1623 ? node->global.inlined_to : node;
1624 inline_update_overall_summary (where);
1625 reset_node_growth_cache (where);
1626 reset_edge_caches (where);
1627 update_caller_keys (edge_heap, where,
1628 updated_nodes, NULL);
1629 bitmap_clear (updated_nodes);
1630 }
1631 }
1632
1633 gcc_assert (in_lto_p
1634 || !max_count
1635 || (profile_info && flag_branch_probabilities));
1636
1637 while (!fibheap_empty (edge_heap))
1638 {
1639 int old_size = overall_size;
1640 struct cgraph_node *where, *callee;
1641 int badness = fibheap_min_key (edge_heap);
1642 int current_badness;
1643 int cached_badness;
1644 int growth;
1645
1646 edge = (struct cgraph_edge *) fibheap_extract_min (edge_heap);
1647 gcc_assert (edge->aux);
1648 edge->aux = NULL;
1649 if (!edge->inline_failed)
1650 continue;
1651
1652 /* Be sure that caches are maintained consistent.
1653 We can not make this ENABLE_CHECKING only because it cause different
1654 updates of the fibheap queue. */
1655 cached_badness = edge_badness (edge, false);
1656 reset_edge_growth_cache (edge);
1657 reset_node_growth_cache (edge->callee);
1658
1659 /* When updating the edge costs, we only decrease badness in the keys.
1660 Increases of badness are handled lazilly; when we see key with out
1661 of date value on it, we re-insert it now. */
1662 current_badness = edge_badness (edge, false);
1663 gcc_assert (cached_badness == current_badness);
1664 gcc_assert (current_badness >= badness);
1665 if (current_badness != badness)
1666 {
1667 edge->aux = fibheap_insert (edge_heap, current_badness, edge);
1668 continue;
1669 }
1670
1671 if (!can_inline_edge_p (edge, true))
1672 {
1673 resolve_noninline_speculation (edge_heap, edge);
1674 continue;
1675 }
1676
1677 callee = cgraph_function_or_thunk_node (edge->callee, NULL);
1678 growth = estimate_edge_growth (edge);
1679 if (dump_file)
1680 {
1681 fprintf (dump_file,
1682 "\nConsidering %s/%i with %i size\n",
1683 callee->name (), callee->order,
1684 inline_summary (callee)->size);
1685 fprintf (dump_file,
1686 " to be inlined into %s/%i in %s:%i\n"
1687 " Estimated growth after inlined into all is %+i insns.\n"
1688 " Estimated badness is %i, frequency %.2f.\n",
1689 edge->caller->name (), edge->caller->order,
1690 flag_wpa ? "unknown"
1691 : gimple_filename ((const_gimple) edge->call_stmt),
1692 flag_wpa ? -1
1693 : gimple_lineno ((const_gimple) edge->call_stmt),
1694 estimate_growth (callee),
1695 badness,
1696 edge->frequency / (double)CGRAPH_FREQ_BASE);
1697 if (edge->count)
1698 fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n",
1699 edge->count);
1700 if (dump_flags & TDF_DETAILS)
1701 edge_badness (edge, true);
1702 }
1703
1704 if (overall_size + growth > max_size
1705 && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1706 {
1707 edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1708 report_inline_failed_reason (edge);
1709 resolve_noninline_speculation (edge_heap, edge);
1710 continue;
1711 }
1712
1713 if (!want_inline_small_function_p (edge, true))
1714 {
1715 resolve_noninline_speculation (edge_heap, edge);
1716 continue;
1717 }
1718
1719 /* Heuristics for inlining small functions works poorly for
1720 recursive calls where we do efect similar to loop unrolling.
1721 When inliing such edge seems profitable, leave decision on
1722 specific inliner. */
1723 if (cgraph_edge_recursive_p (edge))
1724 {
1725 where = edge->caller;
1726 if (where->global.inlined_to)
1727 where = where->global.inlined_to;
1728 if (!recursive_inlining (edge,
1729 flag_indirect_inlining
1730 ? &new_indirect_edges : NULL))
1731 {
1732 edge->inline_failed = CIF_RECURSIVE_INLINING;
1733 resolve_noninline_speculation (edge_heap, edge);
1734 continue;
1735 }
1736 reset_edge_caches (where);
1737 /* Recursive inliner inlines all recursive calls of the function
1738 at once. Consequently we need to update all callee keys. */
1739 if (flag_indirect_inlining)
1740 add_new_edges_to_heap (edge_heap, new_indirect_edges);
1741 update_callee_keys (edge_heap, where, updated_nodes);
1742 bitmap_clear (updated_nodes);
1743 }
1744 else
1745 {
1746 struct cgraph_node *outer_node = NULL;
1747 int depth = 0;
1748
1749 /* Consider the case where self recursive function A is inlined into B.
1750 This is desired optimization in some cases, since it leads to effect
1751 similar of loop peeling and we might completely optimize out the
1752 recursive call. However we must be extra selective. */
1753
1754 where = edge->caller;
1755 while (where->global.inlined_to)
1756 {
1757 if (where->decl == callee->decl)
1758 outer_node = where, depth++;
1759 where = where->callers->caller;
1760 }
1761 if (outer_node
1762 && !want_inline_self_recursive_call_p (edge, outer_node,
1763 true, depth))
1764 {
1765 edge->inline_failed
1766 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
1767 ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
1768 resolve_noninline_speculation (edge_heap, edge);
1769 continue;
1770 }
1771 else if (depth && dump_file)
1772 fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
1773
1774 gcc_checking_assert (!callee->global.inlined_to);
1775 inline_call (edge, true, &new_indirect_edges, &overall_size, true);
1776 if (flag_indirect_inlining)
1777 add_new_edges_to_heap (edge_heap, new_indirect_edges);
1778
1779 reset_edge_caches (edge->callee);
1780 reset_node_growth_cache (callee);
1781
1782 update_callee_keys (edge_heap, where, updated_nodes);
1783 }
1784 where = edge->caller;
1785 if (where->global.inlined_to)
1786 where = where->global.inlined_to;
1787
1788 /* Our profitability metric can depend on local properties
1789 such as number of inlinable calls and size of the function body.
1790 After inlining these properties might change for the function we
1791 inlined into (since it's body size changed) and for the functions
1792 called by function we inlined (since number of it inlinable callers
1793 might change). */
1794 update_caller_keys (edge_heap, where, updated_nodes, NULL);
1795 bitmap_clear (updated_nodes);
1796
1797 if (dump_file)
1798 {
1799 fprintf (dump_file,
1800 " Inlined into %s which now has time %i and size %i,"
1801 "net change of %+i.\n",
1802 edge->caller->name (),
1803 inline_summary (edge->caller)->time,
1804 inline_summary (edge->caller)->size,
1805 overall_size - old_size);
1806 }
1807 if (min_size > overall_size)
1808 {
1809 min_size = overall_size;
1810 max_size = compute_max_insns (min_size);
1811
1812 if (dump_file)
1813 fprintf (dump_file, "New minimal size reached: %i\n", min_size);
1814 }
1815 }
1816
1817 free_growth_caches ();
1818 fibheap_delete (edge_heap);
1819 if (dump_file)
1820 fprintf (dump_file,
1821 "Unit growth for small function inlining: %i->%i (%i%%)\n",
1822 initial_size, overall_size,
1823 initial_size ? overall_size * 100 / (initial_size) - 100: 0);
1824 BITMAP_FREE (updated_nodes);
1825 cgraph_remove_edge_removal_hook (edge_removal_hook_holder);
1826 }
1827
1828 /* Flatten NODE. Performed both during early inlining and
1829 at IPA inlining time. */
1830
1831 static void
1832 flatten_function (struct cgraph_node *node, bool early)
1833 {
1834 struct cgraph_edge *e;
1835
1836 /* We shouldn't be called recursively when we are being processed. */
1837 gcc_assert (node->aux == NULL);
1838
1839 node->aux = (void *) node;
1840
1841 for (e = node->callees; e; e = e->next_callee)
1842 {
1843 struct cgraph_node *orig_callee;
1844 struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
1845
1846 /* We've hit cycle? It is time to give up. */
1847 if (callee->aux)
1848 {
1849 if (dump_file)
1850 fprintf (dump_file,
1851 "Not inlining %s into %s to avoid cycle.\n",
1852 xstrdup (callee->name ()),
1853 xstrdup (e->caller->name ()));
1854 e->inline_failed = CIF_RECURSIVE_INLINING;
1855 continue;
1856 }
1857
1858 /* When the edge is already inlined, we just need to recurse into
1859 it in order to fully flatten the leaves. */
1860 if (!e->inline_failed)
1861 {
1862 flatten_function (callee, early);
1863 continue;
1864 }
1865
1866 /* Flatten attribute needs to be processed during late inlining. For
1867 extra code quality we however do flattening during early optimization,
1868 too. */
1869 if (!early
1870 ? !can_inline_edge_p (e, true)
1871 : !can_early_inline_edge_p (e))
1872 continue;
1873
1874 if (cgraph_edge_recursive_p (e))
1875 {
1876 if (dump_file)
1877 fprintf (dump_file, "Not inlining: recursive call.\n");
1878 continue;
1879 }
1880
1881 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
1882 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
1883 {
1884 if (dump_file)
1885 fprintf (dump_file, "Not inlining: SSA form does not match.\n");
1886 continue;
1887 }
1888
1889 /* Inline the edge and flatten the inline clone. Avoid
1890 recursing through the original node if the node was cloned. */
1891 if (dump_file)
1892 fprintf (dump_file, " Inlining %s into %s.\n",
1893 xstrdup (callee->name ()),
1894 xstrdup (e->caller->name ()));
1895 orig_callee = callee;
1896 inline_call (e, true, NULL, NULL, false);
1897 if (e->callee != orig_callee)
1898 orig_callee->aux = (void *) node;
1899 flatten_function (e->callee, early);
1900 if (e->callee != orig_callee)
1901 orig_callee->aux = NULL;
1902 }
1903
1904 node->aux = NULL;
1905 if (!node->global.inlined_to)
1906 inline_update_overall_summary (node);
1907 }
1908
1909 /* Count number of callers of NODE and store it into DATA (that
1910 points to int. Worker for cgraph_for_node_and_aliases. */
1911
1912 static bool
1913 sum_callers (struct cgraph_node *node, void *data)
1914 {
1915 struct cgraph_edge *e;
1916 int *num_calls = (int *)data;
1917
1918 for (e = node->callers; e; e = e->next_caller)
1919 (*num_calls)++;
1920 return false;
1921 }
1922
1923 /* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases.
1924 DATA points to number of calls originally found so we avoid infinite
1925 recursion. */
1926
1927 static bool
1928 inline_to_all_callers (struct cgraph_node *node, void *data)
1929 {
1930 int *num_calls = (int *)data;
1931 while (node->callers && !node->global.inlined_to)
1932 {
1933 struct cgraph_node *caller = node->callers->caller;
1934
1935 if (dump_file)
1936 {
1937 fprintf (dump_file,
1938 "\nInlining %s size %i.\n",
1939 node->name (),
1940 inline_summary (node)->size);
1941 fprintf (dump_file,
1942 " Called once from %s %i insns.\n",
1943 node->callers->caller->name (),
1944 inline_summary (node->callers->caller)->size);
1945 }
1946
1947 inline_call (node->callers, true, NULL, NULL, true);
1948 if (dump_file)
1949 fprintf (dump_file,
1950 " Inlined into %s which now has %i size\n",
1951 caller->name (),
1952 inline_summary (caller)->size);
1953 if (!(*num_calls)--)
1954 {
1955 if (dump_file)
1956 fprintf (dump_file, "New calls found; giving up.\n");
1957 return true;
1958 }
1959 }
1960 return false;
1961 }
1962
1963 /* Decide on the inlining. We do so in the topological order to avoid
1964 expenses on updating data structures. */
1965
1966 static unsigned int
1967 ipa_inline (void)
1968 {
1969 struct cgraph_node *node;
1970 int nnodes;
1971 struct cgraph_node **order;
1972 int i;
1973 int cold;
1974 bool remove_functions = false;
1975
1976 if (!optimize)
1977 return 0;
1978
1979 order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
1980
1981 if (in_lto_p && optimize)
1982 ipa_update_after_lto_read ();
1983
1984 if (dump_file)
1985 dump_inline_summaries (dump_file);
1986
1987 nnodes = ipa_reverse_postorder (order);
1988
1989 FOR_EACH_FUNCTION (node)
1990 node->aux = 0;
1991
1992 if (dump_file)
1993 fprintf (dump_file, "\nFlattening functions:\n");
1994
1995 /* In the first pass handle functions to be flattened. Do this with
1996 a priority so none of our later choices will make this impossible. */
1997 for (i = nnodes - 1; i >= 0; i--)
1998 {
1999 node = order[i];
2000
2001 /* Handle nodes to be flattened.
2002 Ideally when processing callees we stop inlining at the
2003 entry of cycles, possibly cloning that entry point and
2004 try to flatten itself turning it into a self-recursive
2005 function. */
2006 if (lookup_attribute ("flatten",
2007 DECL_ATTRIBUTES (node->decl)) != NULL)
2008 {
2009 if (dump_file)
2010 fprintf (dump_file,
2011 "Flattening %s\n", node->name ());
2012 flatten_function (node, false);
2013 }
2014 }
2015
2016 inline_small_functions ();
2017
2018 /* Do first after-inlining removal. We want to remove all "stale" extern inline
2019 functions and virtual functions so we really know what is called once. */
2020 symtab_remove_unreachable_nodes (false, dump_file);
2021 free (order);
2022
2023 /* Inline functions with a property that after inlining into all callers the
2024 code size will shrink because the out-of-line copy is eliminated.
2025 We do this regardless on the callee size as long as function growth limits
2026 are met. */
2027 if (dump_file)
2028 fprintf (dump_file,
2029 "\nDeciding on functions to be inlined into all callers and removing useless speculations:\n");
2030
2031 /* Inlining one function called once has good chance of preventing
2032 inlining other function into the same callee. Ideally we should
2033 work in priority order, but probably inlining hot functions first
2034 is good cut without the extra pain of maintaining the queue.
2035
2036 ??? this is not really fitting the bill perfectly: inlining function
2037 into callee often leads to better optimization of callee due to
2038 increased context for optimization.
2039 For example if main() function calls a function that outputs help
2040 and then function that does the main optmization, we should inline
2041 the second with priority even if both calls are cold by themselves.
2042
2043 We probably want to implement new predicate replacing our use of
2044 maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2045 to be hot. */
2046 for (cold = 0; cold <= 1; cold ++)
2047 {
2048 FOR_EACH_DEFINED_FUNCTION (node)
2049 {
2050 struct cgraph_edge *edge, *next;
2051 bool update=false;
2052
2053 for (edge = node->callees; edge; edge = next)
2054 {
2055 next = edge->next_callee;
2056 if (edge->speculative && !speculation_useful_p (edge, false))
2057 {
2058 cgraph_resolve_speculation (edge, NULL);
2059 update = true;
2060 remove_functions = true;
2061 }
2062 }
2063 if (update)
2064 {
2065 struct cgraph_node *where = node->global.inlined_to
2066 ? node->global.inlined_to : node;
2067 reset_node_growth_cache (where);
2068 reset_edge_caches (where);
2069 inline_update_overall_summary (where);
2070 }
2071 if (flag_inline_functions_called_once
2072 && want_inline_function_to_all_callers_p (node, cold))
2073 {
2074 int num_calls = 0;
2075 cgraph_for_node_and_aliases (node, sum_callers,
2076 &num_calls, true);
2077 cgraph_for_node_and_aliases (node, inline_to_all_callers,
2078 &num_calls, true);
2079 remove_functions = true;
2080 }
2081 }
2082 }
2083
2084 /* Free ipa-prop structures if they are no longer needed. */
2085 if (optimize)
2086 ipa_free_all_structures_after_iinln ();
2087
2088 if (dump_file)
2089 fprintf (dump_file,
2090 "\nInlined %i calls, eliminated %i functions\n\n",
2091 ncalls_inlined, nfunctions_inlined);
2092
2093 if (dump_file)
2094 dump_inline_summaries (dump_file);
2095 /* In WPA we use inline summaries for partitioning process. */
2096 if (!flag_wpa)
2097 inline_free_summary ();
2098 return remove_functions ? TODO_remove_functions : 0;
2099 }
2100
2101 /* Inline always-inline function calls in NODE. */
2102
2103 static bool
2104 inline_always_inline_functions (struct cgraph_node *node)
2105 {
2106 struct cgraph_edge *e;
2107 bool inlined = false;
2108
2109 for (e = node->callees; e; e = e->next_callee)
2110 {
2111 struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
2112 if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2113 continue;
2114
2115 if (cgraph_edge_recursive_p (e))
2116 {
2117 if (dump_file)
2118 fprintf (dump_file, " Not inlining recursive call to %s.\n",
2119 e->callee->name ());
2120 e->inline_failed = CIF_RECURSIVE_INLINING;
2121 continue;
2122 }
2123
2124 if (!can_early_inline_edge_p (e))
2125 {
2126 /* Set inlined to true if the callee is marked "always_inline" but
2127 is not inlinable. This will allow flagging an error later in
2128 expand_call_inline in tree-inline.c. */
2129 if (lookup_attribute ("always_inline",
2130 DECL_ATTRIBUTES (callee->decl)) != NULL)
2131 inlined = true;
2132 continue;
2133 }
2134
2135 if (dump_file)
2136 fprintf (dump_file, " Inlining %s into %s (always_inline).\n",
2137 xstrdup (e->callee->name ()),
2138 xstrdup (e->caller->name ()));
2139 inline_call (e, true, NULL, NULL, false);
2140 inlined = true;
2141 }
2142 if (inlined)
2143 inline_update_overall_summary (node);
2144
2145 return inlined;
2146 }
2147
2148 /* Decide on the inlining. We do so in the topological order to avoid
2149 expenses on updating data structures. */
2150
2151 static bool
2152 early_inline_small_functions (struct cgraph_node *node)
2153 {
2154 struct cgraph_edge *e;
2155 bool inlined = false;
2156
2157 for (e = node->callees; e; e = e->next_callee)
2158 {
2159 struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
2160 if (!inline_summary (callee)->inlinable
2161 || !e->inline_failed)
2162 continue;
2163
2164 /* Do not consider functions not declared inline. */
2165 if (!DECL_DECLARED_INLINE_P (callee->decl)
2166 && !flag_inline_small_functions
2167 && !flag_inline_functions)
2168 continue;
2169
2170 if (dump_file)
2171 fprintf (dump_file, "Considering inline candidate %s.\n",
2172 callee->name ());
2173
2174 if (!can_early_inline_edge_p (e))
2175 continue;
2176
2177 if (cgraph_edge_recursive_p (e))
2178 {
2179 if (dump_file)
2180 fprintf (dump_file, " Not inlining: recursive call.\n");
2181 continue;
2182 }
2183
2184 if (!want_early_inline_function_p (e))
2185 continue;
2186
2187 if (dump_file)
2188 fprintf (dump_file, " Inlining %s into %s.\n",
2189 xstrdup (callee->name ()),
2190 xstrdup (e->caller->name ()));
2191 inline_call (e, true, NULL, NULL, true);
2192 inlined = true;
2193 }
2194
2195 return inlined;
2196 }
2197
2198 /* Do inlining of small functions. Doing so early helps profiling and other
2199 passes to be somewhat more effective and avoids some code duplication in
2200 later real inlining pass for testcases with very many function calls. */
2201 static unsigned int
2202 early_inliner (void)
2203 {
2204 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2205 struct cgraph_edge *edge;
2206 unsigned int todo = 0;
2207 int iterations = 0;
2208 bool inlined = false;
2209
2210 if (seen_error ())
2211 return 0;
2212
2213 /* Do nothing if datastructures for ipa-inliner are already computed. This
2214 happens when some pass decides to construct new function and
2215 cgraph_add_new_function calls lowering passes and early optimization on
2216 it. This may confuse ourself when early inliner decide to inline call to
2217 function clone, because function clones don't have parameter list in
2218 ipa-prop matching their signature. */
2219 if (ipa_node_params_vector.exists ())
2220 return 0;
2221
2222 #ifdef ENABLE_CHECKING
2223 verify_cgraph_node (node);
2224 #endif
2225 ipa_remove_all_references (&node->ref_list);
2226
2227 /* Even when not optimizing or not inlining inline always-inline
2228 functions. */
2229 inlined = inline_always_inline_functions (node);
2230
2231 if (!optimize
2232 || flag_no_inline
2233 || !flag_early_inlining
2234 /* Never inline regular functions into always-inline functions
2235 during incremental inlining. This sucks as functions calling
2236 always inline functions will get less optimized, but at the
2237 same time inlining of functions calling always inline
2238 function into an always inline function might introduce
2239 cycles of edges to be always inlined in the callgraph.
2240
2241 We might want to be smarter and just avoid this type of inlining. */
2242 || DECL_DISREGARD_INLINE_LIMITS (node->decl))
2243 ;
2244 else if (lookup_attribute ("flatten",
2245 DECL_ATTRIBUTES (node->decl)) != NULL)
2246 {
2247 /* When the function is marked to be flattened, recursively inline
2248 all calls in it. */
2249 if (dump_file)
2250 fprintf (dump_file,
2251 "Flattening %s\n", node->name ());
2252 flatten_function (node, true);
2253 inlined = true;
2254 }
2255 else
2256 {
2257 /* We iterate incremental inlining to get trivial cases of indirect
2258 inlining. */
2259 while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
2260 && early_inline_small_functions (node))
2261 {
2262 timevar_push (TV_INTEGRATION);
2263 todo |= optimize_inline_calls (current_function_decl);
2264
2265 /* Technically we ought to recompute inline parameters so the new
2266 iteration of early inliner works as expected. We however have
2267 values approximately right and thus we only need to update edge
2268 info that might be cleared out for newly discovered edges. */
2269 for (edge = node->callees; edge; edge = edge->next_callee)
2270 {
2271 struct inline_edge_summary *es = inline_edge_summary (edge);
2272 es->call_stmt_size
2273 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2274 es->call_stmt_time
2275 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2276 if (edge->callee->decl
2277 && !gimple_check_call_matching_types (
2278 edge->call_stmt, edge->callee->decl, false))
2279 edge->call_stmt_cannot_inline_p = true;
2280 }
2281 timevar_pop (TV_INTEGRATION);
2282 iterations++;
2283 inlined = false;
2284 }
2285 if (dump_file)
2286 fprintf (dump_file, "Iterations: %i\n", iterations);
2287 }
2288
2289 if (inlined)
2290 {
2291 timevar_push (TV_INTEGRATION);
2292 todo |= optimize_inline_calls (current_function_decl);
2293 timevar_pop (TV_INTEGRATION);
2294 }
2295
2296 cfun->always_inline_functions_inlined = true;
2297
2298 return todo;
2299 }
2300
2301 namespace {
2302
2303 const pass_data pass_data_early_inline =
2304 {
2305 GIMPLE_PASS, /* type */
2306 "einline", /* name */
2307 OPTGROUP_INLINE, /* optinfo_flags */
2308 false, /* has_gate */
2309 true, /* has_execute */
2310 TV_EARLY_INLINING, /* tv_id */
2311 PROP_ssa, /* properties_required */
2312 0, /* properties_provided */
2313 0, /* properties_destroyed */
2314 0, /* todo_flags_start */
2315 0, /* todo_flags_finish */
2316 };
2317
2318 class pass_early_inline : public gimple_opt_pass
2319 {
2320 public:
2321 pass_early_inline (gcc::context *ctxt)
2322 : gimple_opt_pass (pass_data_early_inline, ctxt)
2323 {}
2324
2325 /* opt_pass methods: */
2326 unsigned int execute () { return early_inliner (); }
2327
2328 }; // class pass_early_inline
2329
2330 } // anon namespace
2331
2332 gimple_opt_pass *
2333 make_pass_early_inline (gcc::context *ctxt)
2334 {
2335 return new pass_early_inline (ctxt);
2336 }
2337
2338
2339 /* When to run IPA inlining. Inlining of always-inline functions
2340 happens during early inlining.
2341
2342 Enable inlining unconditoinally, because callgraph redirection
2343 happens here. */
2344
2345 static bool
2346 gate_ipa_inline (void)
2347 {
2348 return true;
2349 }
2350
2351 namespace {
2352
2353 const pass_data pass_data_ipa_inline =
2354 {
2355 IPA_PASS, /* type */
2356 "inline", /* name */
2357 OPTGROUP_INLINE, /* optinfo_flags */
2358 true, /* has_gate */
2359 true, /* has_execute */
2360 TV_IPA_INLINING, /* tv_id */
2361 0, /* properties_required */
2362 0, /* properties_provided */
2363 0, /* properties_destroyed */
2364 TODO_remove_functions, /* todo_flags_start */
2365 ( TODO_dump_symtab ), /* todo_flags_finish */
2366 };
2367
2368 class pass_ipa_inline : public ipa_opt_pass_d
2369 {
2370 public:
2371 pass_ipa_inline (gcc::context *ctxt)
2372 : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
2373 inline_generate_summary, /* generate_summary */
2374 inline_write_summary, /* write_summary */
2375 inline_read_summary, /* read_summary */
2376 NULL, /* write_optimization_summary */
2377 NULL, /* read_optimization_summary */
2378 NULL, /* stmt_fixup */
2379 0, /* function_transform_todo_flags_start */
2380 inline_transform, /* function_transform */
2381 NULL) /* variable_transform */
2382 {}
2383
2384 /* opt_pass methods: */
2385 bool gate () { return gate_ipa_inline (); }
2386 unsigned int execute () { return ipa_inline (); }
2387
2388 }; // class pass_ipa_inline
2389
2390 } // anon namespace
2391
2392 ipa_opt_pass_d *
2393 make_pass_ipa_inline (gcc::context *ctxt)
2394 {
2395 return new pass_ipa_inline (ctxt);
2396 }