gcc.dg/tree-ssa/ssa-dom-cse-2.c: xfail scan for mmix.
[gcc.git] / gcc / ipa-inline.c
1 /* Inlining decision heuristics.
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Inlining decision heuristics
22
23 The implementation of inliner is organized as follows:
24
25 inlining heuristics limits
26
27 can_inline_edge_p allow to check that particular inlining is allowed
28 by the limits specified by user (allowed function growth, growth and so
29 on).
30
31 Functions are inlined when it is obvious the result is profitable (such
32 as functions called once or when inlining reduce code size).
33 In addition to that we perform inlining of small functions and recursive
34 inlining.
35
36 inlining heuristics
37
38 The inliner itself is split into two passes:
39
40 pass_early_inlining
41
42 Simple local inlining pass inlining callees into current function.
43 This pass makes no use of whole unit analysis and thus it can do only
44 very simple decisions based on local properties.
45
46 The strength of the pass is that it is run in topological order
47 (reverse postorder) on the callgraph. Functions are converted into SSA
48 form just before this pass and optimized subsequently. As a result, the
49 callees of the function seen by the early inliner was already optimized
50 and results of early inlining adds a lot of optimization opportunities
51 for the local optimization.
52
53 The pass handle the obvious inlining decisions within the compilation
54 unit - inlining auto inline functions, inlining for size and
55 flattening.
56
57 main strength of the pass is the ability to eliminate abstraction
58 penalty in C++ code (via combination of inlining and early
59 optimization) and thus improve quality of analysis done by real IPA
60 optimizers.
61
62 Because of lack of whole unit knowledge, the pass cannot really make
63 good code size/performance tradeoffs. It however does very simple
64 speculative inlining allowing code size to grow by
65 EARLY_INLINING_INSNS when callee is leaf function. In this case the
66 optimizations performed later are very likely to eliminate the cost.
67
68 pass_ipa_inline
69
70 This is the real inliner able to handle inlining with whole program
71 knowledge. It performs following steps:
72
73 1) inlining of small functions. This is implemented by greedy
74 algorithm ordering all inlinable cgraph edges by their badness and
75 inlining them in this order as long as inline limits allows doing so.
76
77 This heuristics is not very good on inlining recursive calls. Recursive
78 calls can be inlined with results similar to loop unrolling. To do so,
79 special purpose recursive inliner is executed on function when
80 recursive edge is met as viable candidate.
81
82 2) Unreachable functions are removed from callgraph. Inlining leads
83 to devirtualization and other modification of callgraph so functions
84 may become unreachable during the process. Also functions declared as
85 extern inline or virtual functions are removed, since after inlining
86 we no longer need the offline bodies.
87
88 3) Functions called once and not exported from the unit are inlined.
89 This should almost always lead to reduction of code size by eliminating
90 the need for offline copy of the function. */
91
92 #include "config.h"
93 #include "system.h"
94 #include "coretypes.h"
95 #include "backend.h"
96 #include "target.h"
97 #include "rtl.h"
98 #include "tree.h"
99 #include "gimple.h"
100 #include "alloc-pool.h"
101 #include "tree-pass.h"
102 #include "gimple-ssa.h"
103 #include "cgraph.h"
104 #include "lto-streamer.h"
105 #include "trans-mem.h"
106 #include "calls.h"
107 #include "tree-inline.h"
108 #include "profile.h"
109 #include "symbol-summary.h"
110 #include "tree-vrp.h"
111 #include "ipa-prop.h"
112 #include "ipa-fnsummary.h"
113 #include "ipa-inline.h"
114 #include "ipa-utils.h"
115 #include "sreal.h"
116 #include "auto-profile.h"
117 #include "builtins.h"
118 #include "fibonacci_heap.h"
119 #include "stringpool.h"
120 #include "attribs.h"
121 #include "asan.h"
122
123 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
124 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
125
126 /* Statistics we collect about inlining algorithm. */
127 static int overall_size;
128 static profile_count max_count;
129 static profile_count spec_rem;
130
131 /* Return false when inlining edge E would lead to violating
132 limits on function unit growth or stack usage growth.
133
134 The relative function body growth limit is present generally
135 to avoid problems with non-linear behavior of the compiler.
136 To allow inlining huge functions into tiny wrapper, the limit
137 is always based on the bigger of the two functions considered.
138
139 For stack growth limits we always base the growth in stack usage
140 of the callers. We want to prevent applications from segfaulting
141 on stack overflow when functions with huge stack frames gets
142 inlined. */
143
144 static bool
145 caller_growth_limits (struct cgraph_edge *e)
146 {
147 struct cgraph_node *to = e->caller;
148 struct cgraph_node *what = e->callee->ultimate_alias_target ();
149 int newsize;
150 int limit = 0;
151 HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
152 ipa_size_summary *outer_info = ipa_size_summaries->get (to);
153
154 /* Look for function e->caller is inlined to. While doing
155 so work out the largest function body on the way. As
156 described above, we want to base our function growth
157 limits based on that. Not on the self size of the
158 outer function, not on the self size of inline code
159 we immediately inline to. This is the most relaxed
160 interpretation of the rule "do not grow large functions
161 too much in order to prevent compiler from exploding". */
162 while (true)
163 {
164 ipa_size_summary *size_info = ipa_size_summaries->get (to);
165 if (limit < size_info->self_size)
166 limit = size_info->self_size;
167 if (stack_size_limit < size_info->estimated_self_stack_size)
168 stack_size_limit = size_info->estimated_self_stack_size;
169 if (to->inlined_to)
170 to = to->callers->caller;
171 else
172 break;
173 }
174
175 ipa_fn_summary *what_info = ipa_fn_summaries->get (what);
176 ipa_size_summary *what_size_info = ipa_size_summaries->get (what);
177
178 if (limit < what_size_info->self_size)
179 limit = what_size_info->self_size;
180
181 limit += limit * opt_for_fn (to->decl, param_large_function_growth) / 100;
182
183 /* Check the size after inlining against the function limits. But allow
184 the function to shrink if it went over the limits by forced inlining. */
185 newsize = estimate_size_after_inlining (to, e);
186 if (newsize >= ipa_size_summaries->get (what)->size
187 && newsize > opt_for_fn (to->decl, param_large_function_insns)
188 && newsize > limit)
189 {
190 e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
191 return false;
192 }
193
194 if (!what_info->estimated_stack_size)
195 return true;
196
197 /* FIXME: Stack size limit often prevents inlining in Fortran programs
198 due to large i/o datastructures used by the Fortran front-end.
199 We ought to ignore this limit when we know that the edge is executed
200 on every invocation of the caller (i.e. its call statement dominates
201 exit block). We do not track this information, yet. */
202 stack_size_limit += ((gcov_type)stack_size_limit
203 * opt_for_fn (to->decl, param_stack_frame_growth)
204 / 100);
205
206 inlined_stack = (ipa_get_stack_frame_offset (to)
207 + outer_info->estimated_self_stack_size
208 + what_info->estimated_stack_size);
209 /* Check new stack consumption with stack consumption at the place
210 stack is used. */
211 if (inlined_stack > stack_size_limit
212 /* If function already has large stack usage from sibling
213 inline call, we can inline, too.
214 This bit overoptimistically assume that we are good at stack
215 packing. */
216 && inlined_stack > ipa_fn_summaries->get (to)->estimated_stack_size
217 && inlined_stack > opt_for_fn (to->decl, param_large_stack_frame))
218 {
219 e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
220 return false;
221 }
222 return true;
223 }
224
225 /* Dump info about why inlining has failed. */
226
227 static void
228 report_inline_failed_reason (struct cgraph_edge *e)
229 {
230 if (dump_enabled_p ())
231 {
232 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
233 " not inlinable: %C -> %C, %s\n",
234 e->caller, e->callee,
235 cgraph_inline_failed_string (e->inline_failed));
236 if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
237 || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
238 && e->caller->lto_file_data
239 && e->callee->ultimate_alias_target ()->lto_file_data)
240 {
241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
242 " LTO objects: %s, %s\n",
243 e->caller->lto_file_data->file_name,
244 e->callee->ultimate_alias_target ()->lto_file_data->file_name);
245 }
246 if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
247 if (dump_file)
248 cl_target_option_print_diff
249 (dump_file, 2, target_opts_for_fn (e->caller->decl),
250 target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
251 if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
252 if (dump_file)
253 cl_optimization_print_diff
254 (dump_file, 2, opts_for_fn (e->caller->decl),
255 opts_for_fn (e->callee->ultimate_alias_target ()->decl));
256 }
257 }
258
259 /* Decide whether sanitizer-related attributes allow inlining. */
260
261 static bool
262 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
263 {
264 if (!caller || !callee)
265 return true;
266
267 /* Follow clang and allow inlining for always_inline functions. */
268 if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)))
269 return true;
270
271 const sanitize_code codes[] =
272 {
273 SANITIZE_ADDRESS,
274 SANITIZE_THREAD,
275 SANITIZE_UNDEFINED,
276 SANITIZE_UNDEFINED_NONDEFAULT,
277 SANITIZE_POINTER_COMPARE,
278 SANITIZE_POINTER_SUBTRACT
279 };
280
281 for (unsigned i = 0; i < sizeof (codes) / sizeof (codes[0]); i++)
282 if (sanitize_flags_p (codes[i], caller)
283 != sanitize_flags_p (codes[i], callee))
284 return false;
285
286 return true;
287 }
288
289 /* Used for flags where it is safe to inline when caller's value is
290 grater than callee's. */
291 #define check_maybe_up(flag) \
292 (opts_for_fn (caller->decl)->x_##flag \
293 != opts_for_fn (callee->decl)->x_##flag \
294 && (!always_inline \
295 || opts_for_fn (caller->decl)->x_##flag \
296 < opts_for_fn (callee->decl)->x_##flag))
297 /* Used for flags where it is safe to inline when caller's value is
298 smaller than callee's. */
299 #define check_maybe_down(flag) \
300 (opts_for_fn (caller->decl)->x_##flag \
301 != opts_for_fn (callee->decl)->x_##flag \
302 && (!always_inline \
303 || opts_for_fn (caller->decl)->x_##flag \
304 > opts_for_fn (callee->decl)->x_##flag))
305 /* Used for flags where exact match is needed for correctness. */
306 #define check_match(flag) \
307 (opts_for_fn (caller->decl)->x_##flag \
308 != opts_for_fn (callee->decl)->x_##flag)
309
310 /* Decide if we can inline the edge and possibly update
311 inline_failed reason.
312 We check whether inlining is possible at all and whether
313 caller growth limits allow doing so.
314
315 if REPORT is true, output reason to the dump file. */
316
317 static bool
318 can_inline_edge_p (struct cgraph_edge *e, bool report,
319 bool early = false)
320 {
321 gcc_checking_assert (e->inline_failed);
322
323 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
324 {
325 if (report)
326 report_inline_failed_reason (e);
327 return false;
328 }
329
330 bool inlinable = true;
331 enum availability avail;
332 cgraph_node *caller = (e->caller->inlined_to
333 ? e->caller->inlined_to : e->caller);
334 cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
335
336 if (!callee->definition)
337 {
338 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
339 inlinable = false;
340 }
341 if (!early && (!opt_for_fn (callee->decl, optimize)
342 || !opt_for_fn (caller->decl, optimize)))
343 {
344 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
345 inlinable = false;
346 }
347 else if (callee->calls_comdat_local)
348 {
349 e->inline_failed = CIF_USES_COMDAT_LOCAL;
350 inlinable = false;
351 }
352 else if (avail <= AVAIL_INTERPOSABLE)
353 {
354 e->inline_failed = CIF_OVERWRITABLE;
355 inlinable = false;
356 }
357 /* All edges with call_stmt_cannot_inline_p should have inline_failed
358 initialized to one of FINAL_ERROR reasons. */
359 else if (e->call_stmt_cannot_inline_p)
360 gcc_unreachable ();
361 /* Don't inline if the functions have different EH personalities. */
362 else if (DECL_FUNCTION_PERSONALITY (caller->decl)
363 && DECL_FUNCTION_PERSONALITY (callee->decl)
364 && (DECL_FUNCTION_PERSONALITY (caller->decl)
365 != DECL_FUNCTION_PERSONALITY (callee->decl)))
366 {
367 e->inline_failed = CIF_EH_PERSONALITY;
368 inlinable = false;
369 }
370 /* TM pure functions should not be inlined into non-TM_pure
371 functions. */
372 else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
373 {
374 e->inline_failed = CIF_UNSPECIFIED;
375 inlinable = false;
376 }
377 /* Check compatibility of target optimization options. */
378 else if (!targetm.target_option.can_inline_p (caller->decl,
379 callee->decl))
380 {
381 e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
382 inlinable = false;
383 }
384 else if (ipa_fn_summaries->get (callee) == NULL
385 || !ipa_fn_summaries->get (callee)->inlinable)
386 {
387 e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
388 inlinable = false;
389 }
390 /* Don't inline a function with mismatched sanitization attributes. */
391 else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
392 {
393 e->inline_failed = CIF_SANITIZE_ATTRIBUTE_MISMATCH;
394 inlinable = false;
395 }
396 if (!inlinable && report)
397 report_inline_failed_reason (e);
398 return inlinable;
399 }
400
401 /* Return inlining_insns_single limit for function N. If HINT is true
402 scale up the bound. */
403
404 static int
405 inline_insns_single (cgraph_node *n, bool hint)
406 {
407 if (hint)
408 return opt_for_fn (n->decl, param_max_inline_insns_single)
409 * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
410 return opt_for_fn (n->decl, param_max_inline_insns_single);
411 }
412
413 /* Return inlining_insns_auto limit for function N. If HINT is true
414 scale up the bound. */
415
416 static int
417 inline_insns_auto (cgraph_node *n, bool hint)
418 {
419 int max_inline_insns_auto = opt_for_fn (n->decl, param_max_inline_insns_auto);
420 if (hint)
421 return max_inline_insns_auto
422 * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
423 return max_inline_insns_auto;
424 }
425
426 /* Decide if we can inline the edge and possibly update
427 inline_failed reason.
428 We check whether inlining is possible at all and whether
429 caller growth limits allow doing so.
430
431 if REPORT is true, output reason to the dump file.
432
433 if DISREGARD_LIMITS is true, ignore size limits. */
434
435 static bool
436 can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report,
437 bool disregard_limits = false, bool early = false)
438 {
439 gcc_checking_assert (e->inline_failed);
440
441 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
442 {
443 if (report)
444 report_inline_failed_reason (e);
445 return false;
446 }
447
448 bool inlinable = true;
449 enum availability avail;
450 cgraph_node *caller = (e->caller->inlined_to
451 ? e->caller->inlined_to : e->caller);
452 cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
453 tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
454 tree callee_tree
455 = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
456 /* Check if caller growth allows the inlining. */
457 if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
458 && !disregard_limits
459 && !lookup_attribute ("flatten",
460 DECL_ATTRIBUTES (caller->decl))
461 && !caller_growth_limits (e))
462 inlinable = false;
463 else if (callee->externally_visible
464 && !DECL_DISREGARD_INLINE_LIMITS (callee->decl)
465 && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
466 {
467 e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
468 inlinable = false;
469 }
470 /* Don't inline a function with a higher optimization level than the
471 caller. FIXME: this is really just tip of iceberg of handling
472 optimization attribute. */
473 else if (caller_tree != callee_tree)
474 {
475 bool always_inline =
476 (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
477 && lookup_attribute ("always_inline",
478 DECL_ATTRIBUTES (callee->decl)));
479 ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
480 ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
481
482 /* Until GCC 4.9 we did not check the semantics-altering flags
483 below and inlined across optimization boundaries.
484 Enabling checks below breaks several packages by refusing
485 to inline library always_inline functions. See PR65873.
486 Disable the check for early inlining for now until better solution
487 is found. */
488 if (always_inline && early)
489 ;
490 /* There are some options that change IL semantics which means
491 we cannot inline in these cases for correctness reason.
492 Not even for always_inline declared functions. */
493 else if (check_match (flag_wrapv)
494 || check_match (flag_trapv)
495 || check_match (flag_pcc_struct_return)
496 || check_maybe_down (optimize_debug)
497 /* When caller or callee does FP math, be sure FP codegen flags
498 compatible. */
499 || ((caller_info->fp_expressions && callee_info->fp_expressions)
500 && (check_maybe_up (flag_rounding_math)
501 || check_maybe_up (flag_trapping_math)
502 || check_maybe_down (flag_unsafe_math_optimizations)
503 || check_maybe_down (flag_finite_math_only)
504 || check_maybe_up (flag_signaling_nans)
505 || check_maybe_down (flag_cx_limited_range)
506 || check_maybe_up (flag_signed_zeros)
507 || check_maybe_down (flag_associative_math)
508 || check_maybe_down (flag_reciprocal_math)
509 || check_maybe_down (flag_fp_int_builtin_inexact)
510 /* Strictly speaking only when the callee contains function
511 calls that may end up setting errno. */
512 || check_maybe_up (flag_errno_math)))
513 /* We do not want to make code compiled with exceptions to be
514 brought into a non-EH function unless we know that the callee
515 does not throw.
516 This is tracked by DECL_FUNCTION_PERSONALITY. */
517 || (check_maybe_up (flag_non_call_exceptions)
518 && DECL_FUNCTION_PERSONALITY (callee->decl))
519 || (check_maybe_up (flag_exceptions)
520 && DECL_FUNCTION_PERSONALITY (callee->decl))
521 /* When devirtualization is disabled for callee, it is not safe
522 to inline it as we possibly mangled the type info.
523 Allow early inlining of always inlines. */
524 || (!early && check_maybe_down (flag_devirtualize)))
525 {
526 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
527 inlinable = false;
528 }
529 /* gcc.dg/pr43564.c. Apply user-forced inline even at -O0. */
530 else if (always_inline)
531 ;
532 /* When user added an attribute to the callee honor it. */
533 else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
534 && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
535 {
536 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
537 inlinable = false;
538 }
539 /* If explicit optimize attribute are not used, the mismatch is caused
540 by different command line options used to build different units.
541 Do not care about COMDAT functions - those are intended to be
542 optimized with the optimization flags of module they are used in.
543 Also do not care about mixing up size/speed optimization when
544 DECL_DISREGARD_INLINE_LIMITS is set. */
545 else if ((callee->merged_comdat
546 && !lookup_attribute ("optimize",
547 DECL_ATTRIBUTES (caller->decl)))
548 || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
549 ;
550 /* If mismatch is caused by merging two LTO units with different
551 optimization flags we want to be bit nicer. However never inline
552 if one of functions is not optimized at all. */
553 else if (!opt_for_fn (callee->decl, optimize)
554 || !opt_for_fn (caller->decl, optimize))
555 {
556 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
557 inlinable = false;
558 }
559 /* If callee is optimized for size and caller is not, allow inlining if
560 code shrinks or we are in param_max_inline_insns_single limit and
561 callee is inline (and thus likely an unified comdat).
562 This will allow caller to run faster. */
563 else if (opt_for_fn (callee->decl, optimize_size)
564 > opt_for_fn (caller->decl, optimize_size))
565 {
566 int growth = estimate_edge_growth (e);
567 if (growth > opt_for_fn (caller->decl, param_max_inline_insns_size)
568 && (!DECL_DECLARED_INLINE_P (callee->decl)
569 && growth >= MAX (inline_insns_single (caller, false),
570 inline_insns_auto (caller, false))))
571 {
572 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
573 inlinable = false;
574 }
575 }
576 /* If callee is more aggressively optimized for performance than caller,
577 we generally want to inline only cheap (runtime wise) functions. */
578 else if (opt_for_fn (callee->decl, optimize_size)
579 < opt_for_fn (caller->decl, optimize_size)
580 || (opt_for_fn (callee->decl, optimize)
581 > opt_for_fn (caller->decl, optimize)))
582 {
583 if (estimate_edge_time (e)
584 >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
585 {
586 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
587 inlinable = false;
588 }
589 }
590
591 }
592
593 if (!inlinable && report)
594 report_inline_failed_reason (e);
595 return inlinable;
596 }
597
598
599 /* Return true if the edge E is inlinable during early inlining. */
600
601 static bool
602 can_early_inline_edge_p (struct cgraph_edge *e)
603 {
604 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
605 /* Early inliner might get called at WPA stage when IPA pass adds new
606 function. In this case we cannot really do any of early inlining
607 because function bodies are missing. */
608 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
609 return false;
610 if (!gimple_has_body_p (callee->decl))
611 {
612 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
613 return false;
614 }
615 /* In early inliner some of callees may not be in SSA form yet
616 (i.e. the callgraph is cyclic and we did not process
617 the callee by early inliner, yet). We don't have CIF code for this
618 case; later we will re-do the decision in the real inliner. */
619 if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
620 || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
621 {
622 if (dump_enabled_p ())
623 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
624 " edge not inlinable: not in SSA form\n");
625 return false;
626 }
627 if (!can_inline_edge_p (e, true, true)
628 || !can_inline_edge_by_limits_p (e, true, false, true))
629 return false;
630 return true;
631 }
632
633
634 /* Return number of calls in N. Ignore cheap builtins. */
635
636 static int
637 num_calls (struct cgraph_node *n)
638 {
639 struct cgraph_edge *e;
640 int num = 0;
641
642 for (e = n->callees; e; e = e->next_callee)
643 if (!is_inexpensive_builtin (e->callee->decl))
644 num++;
645 return num;
646 }
647
648
649 /* Return true if we are interested in inlining small function. */
650
651 static bool
652 want_early_inline_function_p (struct cgraph_edge *e)
653 {
654 bool want_inline = true;
655 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
656
657 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
658 ;
659 /* For AutoFDO, we need to make sure that before profile summary, all
660 hot paths' IR look exactly the same as profiled binary. As a result,
661 in einliner, we will disregard size limit and inline those callsites
662 that are:
663 * inlined in the profiled binary, and
664 * the cloned callee has enough samples to be considered "hot". */
665 else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
666 ;
667 else if (!DECL_DECLARED_INLINE_P (callee->decl)
668 && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
669 {
670 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
671 report_inline_failed_reason (e);
672 want_inline = false;
673 }
674 else
675 {
676 /* First take care of very large functions. */
677 int min_growth = estimate_min_edge_growth (e), growth = 0;
678 int n;
679 int early_inlining_insns = param_early_inlining_insns;
680
681 if (min_growth > early_inlining_insns)
682 {
683 if (dump_enabled_p ())
684 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
685 " will not early inline: %C->%C, "
686 "call is cold and code would grow "
687 "at least by %i\n",
688 e->caller, callee,
689 min_growth);
690 want_inline = false;
691 }
692 else
693 growth = estimate_edge_growth (e);
694
695
696 if (!want_inline || growth <= param_max_inline_insns_size)
697 ;
698 else if (!e->maybe_hot_p ())
699 {
700 if (dump_enabled_p ())
701 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
702 " will not early inline: %C->%C, "
703 "call is cold and code would grow by %i\n",
704 e->caller, callee,
705 growth);
706 want_inline = false;
707 }
708 else if (growth > early_inlining_insns)
709 {
710 if (dump_enabled_p ())
711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
712 " will not early inline: %C->%C, "
713 "growth %i exceeds --param early-inlining-insns\n",
714 e->caller, callee, growth);
715 want_inline = false;
716 }
717 else if ((n = num_calls (callee)) != 0
718 && growth * (n + 1) > early_inlining_insns)
719 {
720 if (dump_enabled_p ())
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
722 " will not early inline: %C->%C, "
723 "growth %i exceeds --param early-inlining-insns "
724 "divided by number of calls\n",
725 e->caller, callee, growth);
726 want_inline = false;
727 }
728 }
729 return want_inline;
730 }
731
732 /* Compute time of the edge->caller + edge->callee execution when inlining
733 does not happen. */
734
735 inline sreal
736 compute_uninlined_call_time (struct cgraph_edge *edge,
737 sreal uninlined_call_time,
738 sreal freq)
739 {
740 cgraph_node *caller = (edge->caller->inlined_to
741 ? edge->caller->inlined_to
742 : edge->caller);
743
744 if (freq > 0)
745 uninlined_call_time *= freq;
746 else
747 uninlined_call_time = uninlined_call_time >> 11;
748
749 sreal caller_time = ipa_fn_summaries->get (caller)->time;
750 return uninlined_call_time + caller_time;
751 }
752
753 /* Same as compute_uinlined_call_time but compute time when inlining
754 does happen. */
755
756 inline sreal
757 compute_inlined_call_time (struct cgraph_edge *edge,
758 sreal time,
759 sreal freq)
760 {
761 cgraph_node *caller = (edge->caller->inlined_to
762 ? edge->caller->inlined_to
763 : edge->caller);
764 sreal caller_time = ipa_fn_summaries->get (caller)->time;
765
766 if (freq > 0)
767 time *= freq;
768 else
769 time = time >> 11;
770
771 /* This calculation should match one in ipa-inline-analysis.c
772 (estimate_edge_size_and_time). */
773 time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
774 time += caller_time;
775 if (time <= 0)
776 time = ((sreal) 1) >> 8;
777 gcc_checking_assert (time >= 0);
778 return time;
779 }
780
781 /* Determine time saved by inlining EDGE of frequency FREQ
782 where callee's runtime w/o inlining is UNINLINED_TYPE
783 and with inlined is INLINED_TYPE. */
784
785 inline sreal
786 inlining_speedup (struct cgraph_edge *edge,
787 sreal freq,
788 sreal uninlined_time,
789 sreal inlined_time)
790 {
791 sreal speedup = uninlined_time - inlined_time;
792 /* Handling of call_time should match one in ipa-inline-fnsummary.c
793 (estimate_edge_size_and_time). */
794 sreal call_time = ipa_call_summaries->get (edge)->call_stmt_time;
795
796 if (freq > 0)
797 {
798 speedup = (speedup + call_time);
799 if (freq != 1)
800 speedup = speedup * freq;
801 }
802 else if (freq == 0)
803 speedup = speedup >> 11;
804 gcc_checking_assert (speedup >= 0);
805 return speedup;
806 }
807
808 /* Return true if the speedup for inlining E is bigger than
809 PARAM_MAX_INLINE_MIN_SPEEDUP. */
810
811 static bool
812 big_speedup_p (struct cgraph_edge *e)
813 {
814 sreal unspec_time;
815 sreal spec_time = estimate_edge_time (e, &unspec_time);
816 sreal freq = e->sreal_frequency ();
817 sreal time = compute_uninlined_call_time (e, unspec_time, freq);
818 sreal inlined_time = compute_inlined_call_time (e, spec_time, freq);
819 cgraph_node *caller = (e->caller->inlined_to
820 ? e->caller->inlined_to
821 : e->caller);
822 int limit = opt_for_fn (caller->decl, param_inline_min_speedup);
823
824 if ((time - inlined_time) * 100 > time * limit)
825 return true;
826 return false;
827 }
828
829 /* Return true if we are interested in inlining small function.
830 When REPORT is true, report reason to dump file. */
831
832 static bool
833 want_inline_small_function_p (struct cgraph_edge *e, bool report)
834 {
835 bool want_inline = true;
836 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
837 cgraph_node *to = (e->caller->inlined_to
838 ? e->caller->inlined_to : e->caller);
839
840 /* Allow this function to be called before can_inline_edge_p,
841 since it's usually cheaper. */
842 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
843 want_inline = false;
844 else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
845 ;
846 else if (!DECL_DECLARED_INLINE_P (callee->decl)
847 && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
848 {
849 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
850 want_inline = false;
851 }
852 /* Do fast and conservative check if the function can be good
853 inline candidate. */
854 else if ((!DECL_DECLARED_INLINE_P (callee->decl)
855 && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
856 && ipa_fn_summaries->get (callee)->min_size
857 - ipa_call_summaries->get (e)->call_stmt_size
858 > inline_insns_auto (e->caller, true))
859 {
860 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
861 want_inline = false;
862 }
863 else if ((DECL_DECLARED_INLINE_P (callee->decl)
864 || e->count.ipa ().nonzero_p ())
865 && ipa_fn_summaries->get (callee)->min_size
866 - ipa_call_summaries->get (e)->call_stmt_size
867 > inline_insns_single (e->caller, true))
868 {
869 e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
870 ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
871 : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
872 want_inline = false;
873 }
874 else
875 {
876 int growth = estimate_edge_growth (e);
877 ipa_hints hints = estimate_edge_hints (e);
878 bool apply_hints = (hints & (INLINE_HINT_indirect_call
879 | INLINE_HINT_known_hot
880 | INLINE_HINT_loop_iterations
881 | INLINE_HINT_loop_stride));
882
883 if (growth <= opt_for_fn (to->decl,
884 param_max_inline_insns_size))
885 ;
886 /* Apply param_max_inline_insns_single limit. Do not do so when
887 hints suggests that inlining given function is very profitable.
888 Avoid computation of big_speedup_p when not necessary to change
889 outcome of decision. */
890 else if (DECL_DECLARED_INLINE_P (callee->decl)
891 && growth >= inline_insns_single (e->caller, apply_hints)
892 && (apply_hints
893 || growth >= inline_insns_single (e->caller, true)
894 || !big_speedup_p (e)))
895 {
896 e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
897 want_inline = false;
898 }
899 else if (!DECL_DECLARED_INLINE_P (callee->decl)
900 && !opt_for_fn (e->caller->decl, flag_inline_functions)
901 && growth >= opt_for_fn (to->decl,
902 param_max_inline_insns_small))
903 {
904 /* growth_positive_p is expensive, always test it last. */
905 if (growth >= inline_insns_single (e->caller, false)
906 || growth_positive_p (callee, e, growth))
907 {
908 e->inline_failed = CIF_NOT_DECLARED_INLINED;
909 want_inline = false;
910 }
911 }
912 /* Apply param_max_inline_insns_auto limit for functions not declared
913 inline. Bypass the limit when speedup seems big. */
914 else if (!DECL_DECLARED_INLINE_P (callee->decl)
915 && growth >= inline_insns_auto (e->caller, apply_hints)
916 && (apply_hints
917 || growth >= inline_insns_auto (e->caller, true)
918 || !big_speedup_p (e)))
919 {
920 /* growth_positive_p is expensive, always test it last. */
921 if (growth >= inline_insns_single (e->caller, false)
922 || growth_positive_p (callee, e, growth))
923 {
924 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
925 want_inline = false;
926 }
927 }
928 /* If call is cold, do not inline when function body would grow. */
929 else if (!e->maybe_hot_p ()
930 && (growth >= inline_insns_single (e->caller, false)
931 || growth_positive_p (callee, e, growth)))
932 {
933 e->inline_failed = CIF_UNLIKELY_CALL;
934 want_inline = false;
935 }
936 }
937 if (!want_inline && report)
938 report_inline_failed_reason (e);
939 return want_inline;
940 }
941
942 /* EDGE is self recursive edge.
943 We handle two cases - when function A is inlining into itself
944 or when function A is being inlined into another inliner copy of function
945 A within function B.
946
947 In first case OUTER_NODE points to the toplevel copy of A, while
948 in the second case OUTER_NODE points to the outermost copy of A in B.
949
950 In both cases we want to be extra selective since
951 inlining the call will just introduce new recursive calls to appear. */
952
953 static bool
954 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
955 struct cgraph_node *outer_node,
956 bool peeling,
957 int depth)
958 {
959 char const *reason = NULL;
960 bool want_inline = true;
961 sreal caller_freq = 1;
962 int max_depth = opt_for_fn (outer_node->decl,
963 param_max_inline_recursive_depth_auto);
964
965 if (DECL_DECLARED_INLINE_P (edge->caller->decl))
966 max_depth = opt_for_fn (outer_node->decl,
967 param_max_inline_recursive_depth);
968
969 if (!edge->maybe_hot_p ())
970 {
971 reason = "recursive call is cold";
972 want_inline = false;
973 }
974 else if (depth > max_depth)
975 {
976 reason = "--param max-inline-recursive-depth exceeded.";
977 want_inline = false;
978 }
979 else if (outer_node->inlined_to
980 && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
981 {
982 reason = "caller frequency is 0";
983 want_inline = false;
984 }
985
986 if (!want_inline)
987 ;
988 /* Inlining of self recursive function into copy of itself within other
989 function is transformation similar to loop peeling.
990
991 Peeling is profitable if we can inline enough copies to make probability
992 of actual call to the self recursive function very small. Be sure that
993 the probability of recursion is small.
994
995 We ensure that the frequency of recursing is at most 1 - (1/max_depth).
996 This way the expected number of recursion is at most max_depth. */
997 else if (peeling)
998 {
999 sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
1000 int i;
1001 for (i = 1; i < depth; i++)
1002 max_prob = max_prob * max_prob;
1003 if (edge->sreal_frequency () >= max_prob * caller_freq)
1004 {
1005 reason = "frequency of recursive call is too large";
1006 want_inline = false;
1007 }
1008 }
1009 /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
1010 recursion depth is large. We reduce function call overhead and increase
1011 chances that things fit in hardware return predictor.
1012
1013 Recursive inlining might however increase cost of stack frame setup
1014 actually slowing down functions whose recursion tree is wide rather than
1015 deep.
1016
1017 Deciding reliably on when to do recursive inlining without profile feedback
1018 is tricky. For now we disable recursive inlining when probability of self
1019 recursion is low.
1020
1021 Recursive inlining of self recursive call within loop also results in
1022 large loop depths that generally optimize badly. We may want to throttle
1023 down inlining in those cases. In particular this seems to happen in one
1024 of libstdc++ rb tree methods. */
1025 else
1026 {
1027 if (edge->sreal_frequency () * 100
1028 <= caller_freq
1029 * opt_for_fn (outer_node->decl,
1030 param_min_inline_recursive_probability))
1031 {
1032 reason = "frequency of recursive call is too small";
1033 want_inline = false;
1034 }
1035 }
1036 if (!want_inline && dump_enabled_p ())
1037 dump_printf_loc (MSG_MISSED_OPTIMIZATION, edge->call_stmt,
1038 " not inlining recursively: %s\n", reason);
1039 return want_inline;
1040 }
1041
1042 /* Return true when NODE has uninlinable caller;
1043 set HAS_HOT_CALL if it has hot call.
1044 Worker for cgraph_for_node_and_aliases. */
1045
1046 static bool
1047 check_callers (struct cgraph_node *node, void *has_hot_call)
1048 {
1049 struct cgraph_edge *e;
1050 for (e = node->callers; e; e = e->next_caller)
1051 {
1052 if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
1053 || !opt_for_fn (e->caller->decl, optimize))
1054 return true;
1055 if (!can_inline_edge_p (e, true))
1056 return true;
1057 if (e->recursive_p ())
1058 return true;
1059 if (!can_inline_edge_by_limits_p (e, true))
1060 return true;
1061 if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
1062 *(bool *)has_hot_call = true;
1063 }
1064 return false;
1065 }
1066
1067 /* If NODE has a caller, return true. */
1068
1069 static bool
1070 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
1071 {
1072 if (node->callers)
1073 return true;
1074 return false;
1075 }
1076
1077 /* Decide if inlining NODE would reduce unit size by eliminating
1078 the offline copy of function.
1079 When COLD is true the cold calls are considered, too. */
1080
1081 static bool
1082 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
1083 {
1084 bool has_hot_call = false;
1085
1086 /* Aliases gets inlined along with the function they alias. */
1087 if (node->alias)
1088 return false;
1089 /* Already inlined? */
1090 if (node->inlined_to)
1091 return false;
1092 /* Does it have callers? */
1093 if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
1094 return false;
1095 /* Inlining into all callers would increase size? */
1096 if (growth_positive_p (node, NULL, INT_MIN) > 0)
1097 return false;
1098 /* All inlines must be possible. */
1099 if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1100 true))
1101 return false;
1102 if (!cold && !has_hot_call)
1103 return false;
1104 return true;
1105 }
1106
1107 /* Return true if WHERE of SIZE is a possible candidate for wrapper heuristics
1108 in estimate_edge_badness. */
1109
1110 static bool
1111 wrapper_heuristics_may_apply (struct cgraph_node *where, int size)
1112 {
1113 return size < (DECL_DECLARED_INLINE_P (where->decl)
1114 ? inline_insns_single (where, false)
1115 : inline_insns_auto (where, false));
1116 }
1117
1118 /* A cost model driving the inlining heuristics in a way so the edges with
1119 smallest badness are inlined first. After each inlining is performed
1120 the costs of all caller edges of nodes affected are recomputed so the
1121 metrics may accurately depend on values such as number of inlinable callers
1122 of the function or function body size. */
1123
1124 static sreal
1125 edge_badness (struct cgraph_edge *edge, bool dump)
1126 {
1127 sreal badness;
1128 int growth;
1129 sreal edge_time, unspec_edge_time;
1130 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1131 class ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1132 ipa_hints hints;
1133 cgraph_node *caller = (edge->caller->inlined_to
1134 ? edge->caller->inlined_to
1135 : edge->caller);
1136
1137 growth = estimate_edge_growth (edge);
1138 edge_time = estimate_edge_time (edge, &unspec_edge_time);
1139 hints = estimate_edge_hints (edge);
1140 gcc_checking_assert (edge_time >= 0);
1141 /* Check that inlined time is better, but tolerate some roundoff issues.
1142 FIXME: When callee profile drops to 0 we account calls more. This
1143 should be fixed by never doing that. */
1144 gcc_checking_assert ((edge_time * 100
1145 - callee_info->time * 101).to_int () <= 0
1146 || callee->count.ipa ().initialized_p ());
1147 gcc_checking_assert (growth <= ipa_size_summaries->get (callee)->size);
1148
1149 if (dump)
1150 {
1151 fprintf (dump_file, " Badness calculation for %s -> %s\n",
1152 edge->caller->dump_name (),
1153 edge->callee->dump_name ());
1154 fprintf (dump_file, " size growth %i, time %f unspec %f ",
1155 growth,
1156 edge_time.to_double (),
1157 unspec_edge_time.to_double ());
1158 ipa_dump_hints (dump_file, hints);
1159 if (big_speedup_p (edge))
1160 fprintf (dump_file, " big_speedup");
1161 fprintf (dump_file, "\n");
1162 }
1163
1164 /* Always prefer inlining saving code size. */
1165 if (growth <= 0)
1166 {
1167 badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1168 if (dump)
1169 fprintf (dump_file, " %f: Growth %d <= 0\n", badness.to_double (),
1170 growth);
1171 }
1172 /* Inlining into EXTERNAL functions is not going to change anything unless
1173 they are themselves inlined. */
1174 else if (DECL_EXTERNAL (caller->decl))
1175 {
1176 if (dump)
1177 fprintf (dump_file, " max: function is external\n");
1178 return sreal::max ();
1179 }
1180 /* When profile is available. Compute badness as:
1181
1182 time_saved * caller_count
1183 goodness = -------------------------------------------------
1184 growth_of_caller * overall_growth * combined_size
1185
1186 badness = - goodness
1187
1188 Again use negative value to make calls with profile appear hotter
1189 then calls without.
1190 */
1191 else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1192 || caller->count.ipa ().nonzero_p ())
1193 {
1194 sreal numerator, denominator;
1195 int overall_growth;
1196 sreal freq = edge->sreal_frequency ();
1197
1198 numerator = inlining_speedup (edge, freq, unspec_edge_time, edge_time);
1199 if (numerator <= 0)
1200 numerator = ((sreal) 1 >> 8);
1201 if (caller->count.ipa ().nonzero_p ())
1202 numerator *= caller->count.ipa ().to_gcov_type ();
1203 else if (caller->count.ipa ().initialized_p ())
1204 numerator = numerator >> 11;
1205 denominator = growth;
1206
1207 overall_growth = callee_info->growth;
1208
1209 /* Look for inliner wrappers of the form:
1210
1211 inline_caller ()
1212 {
1213 do_fast_job...
1214 if (need_more_work)
1215 noninline_callee ();
1216 }
1217 Without penalizing this case, we usually inline noninline_callee
1218 into the inline_caller because overall_growth is small preventing
1219 further inlining of inline_caller.
1220
1221 Penalize only callgraph edges to functions with small overall
1222 growth ...
1223 */
1224 if (growth > overall_growth
1225 /* ... and having only one caller which is not inlined ... */
1226 && callee_info->single_caller
1227 && !edge->caller->inlined_to
1228 /* ... and edges executed only conditionally ... */
1229 && freq < 1
1230 /* ... consider case where callee is not inline but caller is ... */
1231 && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1232 && DECL_DECLARED_INLINE_P (caller->decl))
1233 /* ... or when early optimizers decided to split and edge
1234 frequency still indicates splitting is a win ... */
1235 || (callee->split_part && !caller->split_part
1236 && freq * 100
1237 < opt_for_fn (caller->decl,
1238 param_partial_inlining_entry_probability)
1239 /* ... and do not overwrite user specified hints. */
1240 && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1241 || DECL_DECLARED_INLINE_P (caller->decl)))))
1242 {
1243 ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1244 int caller_growth = caller_info->growth;
1245
1246 /* Only apply the penalty when caller looks like inline candidate,
1247 and it is not called once. */
1248 if (!caller_info->single_caller && overall_growth < caller_growth
1249 && caller_info->inlinable
1250 && wrapper_heuristics_may_apply
1251 (caller, ipa_size_summaries->get (caller)->size))
1252 {
1253 if (dump)
1254 fprintf (dump_file,
1255 " Wrapper penalty. Increasing growth %i to %i\n",
1256 overall_growth, caller_growth);
1257 overall_growth = caller_growth;
1258 }
1259 }
1260 if (overall_growth > 0)
1261 {
1262 /* Strongly prefer functions with few callers that can be inlined
1263 fully. The square root here leads to smaller binaries at average.
1264 Watch however for extreme cases and return to linear function
1265 when growth is large. */
1266 if (overall_growth < 256)
1267 overall_growth *= overall_growth;
1268 else
1269 overall_growth += 256 * 256 - 256;
1270 denominator *= overall_growth;
1271 }
1272 denominator *= ipa_size_summaries->get (caller)->size + growth;
1273
1274 badness = - numerator / denominator;
1275
1276 if (dump)
1277 {
1278 fprintf (dump_file,
1279 " %f: guessed profile. frequency %f, count %" PRId64
1280 " caller count %" PRId64
1281 " time saved %f"
1282 " overall growth %i (current) %i (original)"
1283 " %i (compensated)\n",
1284 badness.to_double (),
1285 freq.to_double (),
1286 edge->count.ipa ().initialized_p () ? edge->count.ipa ().to_gcov_type () : -1,
1287 caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1,
1288 inlining_speedup (edge, freq, unspec_edge_time, edge_time).to_double (),
1289 estimate_growth (callee),
1290 callee_info->growth, overall_growth);
1291 }
1292 }
1293 /* When function local profile is not available or it does not give
1294 useful information (i.e. frequency is zero), base the cost on
1295 loop nest and overall size growth, so we optimize for overall number
1296 of functions fully inlined in program. */
1297 else
1298 {
1299 int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1300 badness = growth;
1301
1302 /* Decrease badness if call is nested. */
1303 if (badness > 0)
1304 badness = badness >> nest;
1305 else
1306 badness = badness << nest;
1307 if (dump)
1308 fprintf (dump_file, " %f: no profile. nest %i\n",
1309 badness.to_double (), nest);
1310 }
1311 gcc_checking_assert (badness != 0);
1312
1313 if (edge->recursive_p ())
1314 badness = badness.shift (badness > 0 ? 4 : -4);
1315 if ((hints & (INLINE_HINT_indirect_call
1316 | INLINE_HINT_loop_iterations
1317 | INLINE_HINT_loop_stride))
1318 || callee_info->growth <= 0)
1319 badness = badness.shift (badness > 0 ? -2 : 2);
1320 if (hints & (INLINE_HINT_same_scc))
1321 badness = badness.shift (badness > 0 ? 3 : -3);
1322 else if (hints & (INLINE_HINT_in_scc))
1323 badness = badness.shift (badness > 0 ? 2 : -2);
1324 else if (hints & (INLINE_HINT_cross_module))
1325 badness = badness.shift (badness > 0 ? 1 : -1);
1326 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1327 badness = badness.shift (badness > 0 ? -4 : 4);
1328 else if ((hints & INLINE_HINT_declared_inline))
1329 badness = badness.shift (badness > 0 ? -3 : 3);
1330 if (dump)
1331 fprintf (dump_file, " Adjusted by hints %f\n", badness.to_double ());
1332 return badness;
1333 }
1334
1335 /* Recompute badness of EDGE and update its key in HEAP if needed. */
1336 static inline void
1337 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1338 {
1339 sreal badness = edge_badness (edge, false);
1340 if (edge->aux)
1341 {
1342 edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1343 gcc_checking_assert (n->get_data () == edge);
1344
1345 /* fibonacci_heap::replace_key does busy updating of the
1346 heap that is unnecessarily expensive.
1347 We do lazy increases: after extracting minimum if the key
1348 turns out to be out of date, it is re-inserted into heap
1349 with correct value. */
1350 if (badness < n->get_key ())
1351 {
1352 if (dump_file && (dump_flags & TDF_DETAILS))
1353 {
1354 fprintf (dump_file,
1355 " decreasing badness %s -> %s, %f to %f\n",
1356 edge->caller->dump_name (),
1357 edge->callee->dump_name (),
1358 n->get_key ().to_double (),
1359 badness.to_double ());
1360 }
1361 heap->decrease_key (n, badness);
1362 }
1363 }
1364 else
1365 {
1366 if (dump_file && (dump_flags & TDF_DETAILS))
1367 {
1368 fprintf (dump_file,
1369 " enqueuing call %s -> %s, badness %f\n",
1370 edge->caller->dump_name (),
1371 edge->callee->dump_name (),
1372 badness.to_double ());
1373 }
1374 edge->aux = heap->insert (badness, edge);
1375 }
1376 }
1377
1378
1379 /* NODE was inlined.
1380 All caller edges needs to be reset because
1381 size estimates change. Similarly callees needs reset
1382 because better context may be known. */
1383
1384 static void
1385 reset_edge_caches (struct cgraph_node *node)
1386 {
1387 struct cgraph_edge *edge;
1388 struct cgraph_edge *e = node->callees;
1389 struct cgraph_node *where = node;
1390 struct ipa_ref *ref;
1391
1392 if (where->inlined_to)
1393 where = where->inlined_to;
1394
1395 reset_node_cache (where);
1396
1397 if (edge_growth_cache != NULL)
1398 for (edge = where->callers; edge; edge = edge->next_caller)
1399 if (edge->inline_failed)
1400 edge_growth_cache->remove (edge);
1401
1402 FOR_EACH_ALIAS (where, ref)
1403 reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1404
1405 if (!e)
1406 return;
1407
1408 while (true)
1409 if (!e->inline_failed && e->callee->callees)
1410 e = e->callee->callees;
1411 else
1412 {
1413 if (edge_growth_cache != NULL && e->inline_failed)
1414 edge_growth_cache->remove (e);
1415 if (e->next_callee)
1416 e = e->next_callee;
1417 else
1418 {
1419 do
1420 {
1421 if (e->caller == node)
1422 return;
1423 e = e->caller->callers;
1424 }
1425 while (!e->next_callee);
1426 e = e->next_callee;
1427 }
1428 }
1429 }
1430
1431 /* Recompute HEAP nodes for each of caller of NODE.
1432 UPDATED_NODES track nodes we already visited, to avoid redundant work.
1433 When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1434 it is inlinable. Otherwise check all edges. */
1435
1436 static void
1437 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1438 bitmap updated_nodes,
1439 struct cgraph_edge *check_inlinablity_for)
1440 {
1441 struct cgraph_edge *edge;
1442 struct ipa_ref *ref;
1443
1444 if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1445 || node->inlined_to)
1446 return;
1447 if (!bitmap_set_bit (updated_nodes, node->get_uid ()))
1448 return;
1449
1450 FOR_EACH_ALIAS (node, ref)
1451 {
1452 struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1453 update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1454 }
1455
1456 for (edge = node->callers; edge; edge = edge->next_caller)
1457 if (edge->inline_failed)
1458 {
1459 if (!check_inlinablity_for
1460 || check_inlinablity_for == edge)
1461 {
1462 if (can_inline_edge_p (edge, false)
1463 && want_inline_small_function_p (edge, false)
1464 && can_inline_edge_by_limits_p (edge, false))
1465 update_edge_key (heap, edge);
1466 else if (edge->aux)
1467 {
1468 report_inline_failed_reason (edge);
1469 heap->delete_node ((edge_heap_node_t *) edge->aux);
1470 edge->aux = NULL;
1471 }
1472 }
1473 else if (edge->aux)
1474 update_edge_key (heap, edge);
1475 }
1476 }
1477
1478 /* Recompute HEAP nodes for each uninlined call in NODE
1479 If UPDATE_SINCE is non-NULL check if edges called within that function
1480 are inlinable (typically UPDATE_SINCE is the inline clone we introduced
1481 where all edges have new context).
1482
1483 This is used when we know that edge badnesses are going only to increase
1484 (we introduced new call site) and thus all we need is to insert newly
1485 created edges into heap. */
1486
1487 static void
1488 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1489 struct cgraph_node *update_since,
1490 bitmap updated_nodes)
1491 {
1492 struct cgraph_edge *e = node->callees;
1493 bool check_inlinability = update_since == node;
1494
1495 if (!e)
1496 return;
1497 while (true)
1498 if (!e->inline_failed && e->callee->callees)
1499 {
1500 if (e->callee == update_since)
1501 check_inlinability = true;
1502 e = e->callee->callees;
1503 }
1504 else
1505 {
1506 enum availability avail;
1507 struct cgraph_node *callee;
1508 if (!check_inlinability)
1509 {
1510 if (e->aux
1511 && !bitmap_bit_p (updated_nodes,
1512 e->callee->ultimate_alias_target
1513 (&avail, e->caller)->get_uid ()))
1514 update_edge_key (heap, e);
1515 }
1516 /* We do not reset callee growth cache here. Since we added a new call,
1517 growth should have just increased and consequently badness metric
1518 don't need updating. */
1519 else if (e->inline_failed
1520 && (callee = e->callee->ultimate_alias_target (&avail,
1521 e->caller))
1522 && avail >= AVAIL_AVAILABLE
1523 && ipa_fn_summaries->get (callee) != NULL
1524 && ipa_fn_summaries->get (callee)->inlinable
1525 && !bitmap_bit_p (updated_nodes, callee->get_uid ()))
1526 {
1527 if (can_inline_edge_p (e, false)
1528 && want_inline_small_function_p (e, false)
1529 && can_inline_edge_by_limits_p (e, false))
1530 {
1531 gcc_checking_assert (check_inlinability || can_inline_edge_p (e, false));
1532 gcc_checking_assert (check_inlinability || e->aux);
1533 update_edge_key (heap, e);
1534 }
1535 else if (e->aux)
1536 {
1537 report_inline_failed_reason (e);
1538 heap->delete_node ((edge_heap_node_t *) e->aux);
1539 e->aux = NULL;
1540 }
1541 }
1542 /* In case we redirected to unreachable node we only need to remove the
1543 fibheap entry. */
1544 else if (e->aux)
1545 {
1546 heap->delete_node ((edge_heap_node_t *) e->aux);
1547 e->aux = NULL;
1548 }
1549 if (e->next_callee)
1550 e = e->next_callee;
1551 else
1552 {
1553 do
1554 {
1555 if (e->caller == node)
1556 return;
1557 if (e->caller == update_since)
1558 check_inlinability = false;
1559 e = e->caller->callers;
1560 }
1561 while (!e->next_callee);
1562 e = e->next_callee;
1563 }
1564 }
1565 }
1566
1567 /* Enqueue all recursive calls from NODE into priority queue depending on
1568 how likely we want to recursively inline the call. */
1569
1570 static void
1571 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1572 edge_heap_t *heap)
1573 {
1574 struct cgraph_edge *e;
1575 enum availability avail;
1576
1577 for (e = where->callees; e; e = e->next_callee)
1578 if (e->callee == node
1579 || (e->callee->ultimate_alias_target (&avail, e->caller) == node
1580 && avail > AVAIL_INTERPOSABLE))
1581 heap->insert (-e->sreal_frequency (), e);
1582 for (e = where->callees; e; e = e->next_callee)
1583 if (!e->inline_failed)
1584 lookup_recursive_calls (node, e->callee, heap);
1585 }
1586
1587 /* Decide on recursive inlining: in the case function has recursive calls,
1588 inline until body size reaches given argument. If any new indirect edges
1589 are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1590 is NULL. */
1591
1592 static bool
1593 recursive_inlining (struct cgraph_edge *edge,
1594 vec<cgraph_edge *> *new_edges)
1595 {
1596 cgraph_node *to = (edge->caller->inlined_to
1597 ? edge->caller->inlined_to : edge->caller);
1598 int limit = opt_for_fn (to->decl,
1599 param_max_inline_insns_recursive_auto);
1600 edge_heap_t heap (sreal::min ());
1601 struct cgraph_node *node;
1602 struct cgraph_edge *e;
1603 struct cgraph_node *master_clone = NULL, *next;
1604 int depth = 0;
1605 int n = 0;
1606
1607 node = edge->caller;
1608 if (node->inlined_to)
1609 node = node->inlined_to;
1610
1611 if (DECL_DECLARED_INLINE_P (node->decl))
1612 limit = opt_for_fn (to->decl, param_max_inline_insns_recursive);
1613
1614 /* Make sure that function is small enough to be considered for inlining. */
1615 if (estimate_size_after_inlining (node, edge) >= limit)
1616 return false;
1617 lookup_recursive_calls (node, node, &heap);
1618 if (heap.empty ())
1619 return false;
1620
1621 if (dump_file)
1622 fprintf (dump_file,
1623 " Performing recursive inlining on %s\n", node->dump_name ());
1624
1625 /* Do the inlining and update list of recursive call during process. */
1626 while (!heap.empty ())
1627 {
1628 struct cgraph_edge *curr = heap.extract_min ();
1629 struct cgraph_node *cnode, *dest = curr->callee;
1630
1631 if (!can_inline_edge_p (curr, true)
1632 || !can_inline_edge_by_limits_p (curr, true))
1633 continue;
1634
1635 /* MASTER_CLONE is produced in the case we already started modified
1636 the function. Be sure to redirect edge to the original body before
1637 estimating growths otherwise we will be seeing growths after inlining
1638 the already modified body. */
1639 if (master_clone)
1640 {
1641 curr->redirect_callee (master_clone);
1642 if (edge_growth_cache != NULL)
1643 edge_growth_cache->remove (curr);
1644 }
1645
1646 if (estimate_size_after_inlining (node, curr) > limit)
1647 {
1648 curr->redirect_callee (dest);
1649 if (edge_growth_cache != NULL)
1650 edge_growth_cache->remove (curr);
1651 break;
1652 }
1653
1654 depth = 1;
1655 for (cnode = curr->caller;
1656 cnode->inlined_to; cnode = cnode->callers->caller)
1657 if (node->decl
1658 == curr->callee->ultimate_alias_target ()->decl)
1659 depth++;
1660
1661 if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1662 {
1663 curr->redirect_callee (dest);
1664 if (edge_growth_cache != NULL)
1665 edge_growth_cache->remove (curr);
1666 continue;
1667 }
1668
1669 if (dump_file)
1670 {
1671 fprintf (dump_file,
1672 " Inlining call of depth %i", depth);
1673 if (node->count.nonzero_p () && curr->count.initialized_p ())
1674 {
1675 fprintf (dump_file, " called approx. %.2f times per call",
1676 (double)curr->count.to_gcov_type ()
1677 / node->count.to_gcov_type ());
1678 }
1679 fprintf (dump_file, "\n");
1680 }
1681 if (!master_clone)
1682 {
1683 /* We need original clone to copy around. */
1684 master_clone = node->create_clone (node->decl, node->count,
1685 false, vNULL, true, NULL, NULL);
1686 for (e = master_clone->callees; e; e = e->next_callee)
1687 if (!e->inline_failed)
1688 clone_inlined_nodes (e, true, false, NULL);
1689 curr->redirect_callee (master_clone);
1690 if (edge_growth_cache != NULL)
1691 edge_growth_cache->remove (curr);
1692 }
1693
1694 inline_call (curr, false, new_edges, &overall_size, true);
1695 reset_node_cache (node);
1696 lookup_recursive_calls (node, curr->callee, &heap);
1697 n++;
1698 }
1699
1700 if (!heap.empty () && dump_file)
1701 fprintf (dump_file, " Recursive inlining growth limit met.\n");
1702
1703 if (!master_clone)
1704 return false;
1705
1706 if (dump_enabled_p ())
1707 dump_printf_loc (MSG_NOTE, edge->call_stmt,
1708 "\n Inlined %i times, "
1709 "body grown from size %i to %i, time %f to %f\n", n,
1710 ipa_size_summaries->get (master_clone)->size,
1711 ipa_size_summaries->get (node)->size,
1712 ipa_fn_summaries->get (master_clone)->time.to_double (),
1713 ipa_fn_summaries->get (node)->time.to_double ());
1714
1715 /* Remove master clone we used for inlining. We rely that clones inlined
1716 into master clone gets queued just before master clone so we don't
1717 need recursion. */
1718 for (node = symtab->first_function (); node != master_clone;
1719 node = next)
1720 {
1721 next = symtab->next_function (node);
1722 if (node->inlined_to == master_clone)
1723 node->remove ();
1724 }
1725 master_clone->remove ();
1726 return true;
1727 }
1728
1729
1730 /* Given whole compilation unit estimate of INSNS, compute how large we can
1731 allow the unit to grow. */
1732
1733 static int64_t
1734 compute_max_insns (cgraph_node *node, int insns)
1735 {
1736 int max_insns = insns;
1737 if (max_insns < opt_for_fn (node->decl, param_large_unit_insns))
1738 max_insns = opt_for_fn (node->decl, param_large_unit_insns);
1739
1740 return ((int64_t) max_insns
1741 * (100 + opt_for_fn (node->decl, param_inline_unit_growth)) / 100);
1742 }
1743
1744
1745 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */
1746
1747 static void
1748 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> new_edges)
1749 {
1750 while (new_edges.length () > 0)
1751 {
1752 struct cgraph_edge *edge = new_edges.pop ();
1753
1754 gcc_assert (!edge->aux);
1755 gcc_assert (edge->callee);
1756 if (edge->inline_failed
1757 && can_inline_edge_p (edge, true)
1758 && want_inline_small_function_p (edge, true)
1759 && can_inline_edge_by_limits_p (edge, true))
1760 edge->aux = heap->insert (edge_badness (edge, false), edge);
1761 }
1762 }
1763
1764 /* Remove EDGE from the fibheap. */
1765
1766 static void
1767 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1768 {
1769 if (e->aux)
1770 {
1771 ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1772 e->aux = NULL;
1773 }
1774 }
1775
1776 /* Return true if speculation of edge E seems useful.
1777 If ANTICIPATE_INLINING is true, be conservative and hope that E
1778 may get inlined. */
1779
1780 bool
1781 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1782 {
1783 /* If we have already decided to inline the edge, it seems useful. */
1784 if (!e->inline_failed)
1785 return true;
1786
1787 enum availability avail;
1788 struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1789 e->caller);
1790
1791 gcc_assert (e->speculative && !e->indirect_unknown_callee);
1792
1793 if (!e->maybe_hot_p ())
1794 return false;
1795
1796 /* See if IP optimizations found something potentially useful about the
1797 function. For now we look only for CONST/PURE flags. Almost everything
1798 else we propagate is useless. */
1799 if (avail >= AVAIL_AVAILABLE)
1800 {
1801 int ecf_flags = flags_from_decl_or_type (target->decl);
1802 if (ecf_flags & ECF_CONST)
1803 {
1804 if (!(e->speculative_call_indirect_edge ()->indirect_info
1805 ->ecf_flags & ECF_CONST))
1806 return true;
1807 }
1808 else if (ecf_flags & ECF_PURE)
1809 {
1810 if (!(e->speculative_call_indirect_edge ()->indirect_info
1811 ->ecf_flags & ECF_PURE))
1812 return true;
1813 }
1814 }
1815 /* If we did not managed to inline the function nor redirect
1816 to an ipa-cp clone (that are seen by having local flag set),
1817 it is probably pointless to inline it unless hardware is missing
1818 indirect call predictor. */
1819 if (!anticipate_inlining && !target->local)
1820 return false;
1821 /* For overwritable targets there is not much to do. */
1822 if (!can_inline_edge_p (e, false)
1823 || !can_inline_edge_by_limits_p (e, false, true))
1824 return false;
1825 /* OK, speculation seems interesting. */
1826 return true;
1827 }
1828
1829 /* We know that EDGE is not going to be inlined.
1830 See if we can remove speculation. */
1831
1832 static void
1833 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1834 {
1835 if (edge->speculative && !speculation_useful_p (edge, false))
1836 {
1837 struct cgraph_node *node = edge->caller;
1838 struct cgraph_node *where = node->inlined_to
1839 ? node->inlined_to : node;
1840 auto_bitmap updated_nodes;
1841
1842 if (edge->count.ipa ().initialized_p ())
1843 spec_rem += edge->count.ipa ();
1844 cgraph_edge::resolve_speculation (edge);
1845 reset_edge_caches (where);
1846 ipa_update_overall_fn_summary (where);
1847 update_caller_keys (edge_heap, where,
1848 updated_nodes, NULL);
1849 update_callee_keys (edge_heap, where, NULL,
1850 updated_nodes);
1851 }
1852 }
1853
1854 /* Return true if NODE should be accounted for overall size estimate.
1855 Skip all nodes optimized for size so we can measure the growth of hot
1856 part of program no matter of the padding. */
1857
1858 bool
1859 inline_account_function_p (struct cgraph_node *node)
1860 {
1861 return (!DECL_EXTERNAL (node->decl)
1862 && !opt_for_fn (node->decl, optimize_size)
1863 && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
1864 }
1865
1866 /* Count number of callers of NODE and store it into DATA (that
1867 points to int. Worker for cgraph_for_node_and_aliases. */
1868
1869 static bool
1870 sum_callers (struct cgraph_node *node, void *data)
1871 {
1872 struct cgraph_edge *e;
1873 int *num_calls = (int *)data;
1874
1875 for (e = node->callers; e; e = e->next_caller)
1876 (*num_calls)++;
1877 return false;
1878 }
1879
1880 /* We only propagate across edges with non-interposable callee. */
1881
1882 inline bool
1883 ignore_edge_p (struct cgraph_edge *e)
1884 {
1885 enum availability avail;
1886 e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
1887 return (avail <= AVAIL_INTERPOSABLE);
1888 }
1889
1890 /* We use greedy algorithm for inlining of small functions:
1891 All inline candidates are put into prioritized heap ordered in
1892 increasing badness.
1893
1894 The inlining of small functions is bounded by unit growth parameters. */
1895
1896 static void
1897 inline_small_functions (void)
1898 {
1899 struct cgraph_node *node;
1900 struct cgraph_edge *edge;
1901 edge_heap_t edge_heap (sreal::min ());
1902 auto_bitmap updated_nodes;
1903 int min_size;
1904 auto_vec<cgraph_edge *> new_indirect_edges;
1905 int initial_size = 0;
1906 struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1907 struct cgraph_edge_hook_list *edge_removal_hook_holder;
1908 new_indirect_edges.create (8);
1909
1910 edge_removal_hook_holder
1911 = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1912
1913 /* Compute overall unit size and other global parameters used by badness
1914 metrics. */
1915
1916 max_count = profile_count::uninitialized ();
1917 ipa_reduced_postorder (order, true, ignore_edge_p);
1918 free (order);
1919
1920 FOR_EACH_DEFINED_FUNCTION (node)
1921 if (!node->inlined_to)
1922 {
1923 if (!node->alias && node->analyzed
1924 && (node->has_gimple_body_p () || node->thunk.thunk_p)
1925 && opt_for_fn (node->decl, optimize))
1926 {
1927 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
1928 struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1929
1930 /* Do not account external functions, they will be optimized out
1931 if not inlined. Also only count the non-cold portion of program. */
1932 if (inline_account_function_p (node))
1933 initial_size += ipa_size_summaries->get (node)->size;
1934 info->growth = estimate_growth (node);
1935
1936 int num_calls = 0;
1937 node->call_for_symbol_and_aliases (sum_callers, &num_calls,
1938 true);
1939 if (num_calls == 1)
1940 info->single_caller = true;
1941 if (dfs && dfs->next_cycle)
1942 {
1943 struct cgraph_node *n2;
1944 int id = dfs->scc_no + 1;
1945 for (n2 = node; n2;
1946 n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
1947 if (opt_for_fn (n2->decl, optimize))
1948 {
1949 ipa_fn_summary *info2 = ipa_fn_summaries->get
1950 (n2->inlined_to ? n2->inlined_to : n2);
1951 if (info2->scc_no)
1952 break;
1953 info2->scc_no = id;
1954 }
1955 }
1956 }
1957
1958 for (edge = node->callers; edge; edge = edge->next_caller)
1959 max_count = max_count.max (edge->count.ipa ());
1960 }
1961 ipa_free_postorder_info ();
1962 initialize_growth_caches ();
1963
1964 if (dump_file)
1965 fprintf (dump_file,
1966 "\nDeciding on inlining of small functions. Starting with size %i.\n",
1967 initial_size);
1968
1969 overall_size = initial_size;
1970 min_size = overall_size;
1971
1972 /* Populate the heap with all edges we might inline. */
1973
1974 FOR_EACH_DEFINED_FUNCTION (node)
1975 {
1976 bool update = false;
1977 struct cgraph_edge *next = NULL;
1978 bool has_speculative = false;
1979
1980 if (!opt_for_fn (node->decl, optimize))
1981 continue;
1982
1983 if (dump_file)
1984 fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
1985
1986 for (edge = node->callees; edge; edge = edge->next_callee)
1987 {
1988 if (edge->inline_failed
1989 && !edge->aux
1990 && can_inline_edge_p (edge, true)
1991 && want_inline_small_function_p (edge, true)
1992 && can_inline_edge_by_limits_p (edge, true)
1993 && edge->inline_failed)
1994 {
1995 gcc_assert (!edge->aux);
1996 update_edge_key (&edge_heap, edge);
1997 }
1998 if (edge->speculative)
1999 has_speculative = true;
2000 }
2001 if (has_speculative)
2002 for (edge = node->callees; edge; edge = next)
2003 {
2004 next = edge->next_callee;
2005 if (edge->speculative
2006 && !speculation_useful_p (edge, edge->aux != NULL))
2007 {
2008 cgraph_edge::resolve_speculation (edge);
2009 update = true;
2010 }
2011 }
2012 if (update)
2013 {
2014 struct cgraph_node *where = node->inlined_to
2015 ? node->inlined_to : node;
2016 ipa_update_overall_fn_summary (where);
2017 reset_edge_caches (where);
2018 update_caller_keys (&edge_heap, where,
2019 updated_nodes, NULL);
2020 update_callee_keys (&edge_heap, where, NULL,
2021 updated_nodes);
2022 bitmap_clear (updated_nodes);
2023 }
2024 }
2025
2026 gcc_assert (in_lto_p
2027 || !(max_count > 0)
2028 || (profile_info && flag_branch_probabilities));
2029
2030 while (!edge_heap.empty ())
2031 {
2032 int old_size = overall_size;
2033 struct cgraph_node *where, *callee;
2034 sreal badness = edge_heap.min_key ();
2035 sreal current_badness;
2036 int growth;
2037
2038 edge = edge_heap.extract_min ();
2039 gcc_assert (edge->aux);
2040 edge->aux = NULL;
2041 if (!edge->inline_failed || !edge->callee->analyzed)
2042 continue;
2043
2044 /* Be sure that caches are maintained consistent.
2045 This check is affected by scaling roundoff errors when compiling for
2046 IPA this we skip it in that case. */
2047 if (flag_checking && !edge->callee->count.ipa_p ()
2048 && (!max_count.initialized_p () || !max_count.nonzero_p ()))
2049 {
2050 sreal cached_badness = edge_badness (edge, false);
2051
2052 int old_size_est = estimate_edge_size (edge);
2053 sreal old_time_est = estimate_edge_time (edge);
2054 int old_hints_est = estimate_edge_hints (edge);
2055
2056 if (edge_growth_cache != NULL)
2057 edge_growth_cache->remove (edge);
2058 reset_node_cache (edge->caller->inlined_to
2059 ? edge->caller->inlined_to
2060 : edge->caller);
2061 gcc_assert (old_size_est == estimate_edge_size (edge));
2062 gcc_assert (old_time_est == estimate_edge_time (edge));
2063 /* FIXME:
2064
2065 gcc_assert (old_hints_est == estimate_edge_hints (edge));
2066
2067 fails with profile feedback because some hints depends on
2068 maybe_hot_edge_p predicate and because callee gets inlined to other
2069 calls, the edge may become cold.
2070 This ought to be fixed by computing relative probabilities
2071 for given invocation but that will be better done once whole
2072 code is converted to sreals. Disable for now and revert to "wrong"
2073 value so enable/disable checking paths agree. */
2074 edge_growth_cache->get (edge)->hints = old_hints_est + 1;
2075
2076 /* When updating the edge costs, we only decrease badness in the keys.
2077 Increases of badness are handled lazily; when we see key with out
2078 of date value on it, we re-insert it now. */
2079 current_badness = edge_badness (edge, false);
2080 gcc_assert (cached_badness == current_badness);
2081 gcc_assert (current_badness >= badness);
2082 }
2083 else
2084 current_badness = edge_badness (edge, false);
2085 if (current_badness != badness)
2086 {
2087 if (edge_heap.min () && current_badness > edge_heap.min_key ())
2088 {
2089 edge->aux = edge_heap.insert (current_badness, edge);
2090 continue;
2091 }
2092 else
2093 badness = current_badness;
2094 }
2095
2096 if (!can_inline_edge_p (edge, true)
2097 || !can_inline_edge_by_limits_p (edge, true))
2098 {
2099 resolve_noninline_speculation (&edge_heap, edge);
2100 continue;
2101 }
2102
2103 callee = edge->callee->ultimate_alias_target ();
2104 growth = estimate_edge_growth (edge);
2105 if (dump_file)
2106 {
2107 fprintf (dump_file,
2108 "\nConsidering %s with %i size\n",
2109 callee->dump_name (),
2110 ipa_size_summaries->get (callee)->size);
2111 fprintf (dump_file,
2112 " to be inlined into %s in %s:%i\n"
2113 " Estimated badness is %f, frequency %.2f.\n",
2114 edge->caller->dump_name (),
2115 edge->call_stmt
2116 && (LOCATION_LOCUS (gimple_location ((const gimple *)
2117 edge->call_stmt))
2118 > BUILTINS_LOCATION)
2119 ? gimple_filename ((const gimple *) edge->call_stmt)
2120 : "unknown",
2121 edge->call_stmt
2122 ? gimple_lineno ((const gimple *) edge->call_stmt)
2123 : -1,
2124 badness.to_double (),
2125 edge->sreal_frequency ().to_double ());
2126 if (edge->count.ipa ().initialized_p ())
2127 {
2128 fprintf (dump_file, " Called ");
2129 edge->count.ipa ().dump (dump_file);
2130 fprintf (dump_file, " times\n");
2131 }
2132 if (dump_flags & TDF_DETAILS)
2133 edge_badness (edge, true);
2134 }
2135
2136 where = edge->caller;
2137
2138 if (overall_size + growth > compute_max_insns (where, min_size)
2139 && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2140 {
2141 edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
2142 report_inline_failed_reason (edge);
2143 resolve_noninline_speculation (&edge_heap, edge);
2144 continue;
2145 }
2146
2147 if (!want_inline_small_function_p (edge, true))
2148 {
2149 resolve_noninline_speculation (&edge_heap, edge);
2150 continue;
2151 }
2152
2153 profile_count old_count = callee->count;
2154
2155 /* Heuristics for inlining small functions work poorly for
2156 recursive calls where we do effects similar to loop unrolling.
2157 When inlining such edge seems profitable, leave decision on
2158 specific inliner. */
2159 if (edge->recursive_p ())
2160 {
2161 if (where->inlined_to)
2162 where = where->inlined_to;
2163 if (!recursive_inlining (edge,
2164 opt_for_fn (edge->caller->decl,
2165 flag_indirect_inlining)
2166 ? &new_indirect_edges : NULL))
2167 {
2168 edge->inline_failed = CIF_RECURSIVE_INLINING;
2169 resolve_noninline_speculation (&edge_heap, edge);
2170 continue;
2171 }
2172 reset_edge_caches (where);
2173 /* Recursive inliner inlines all recursive calls of the function
2174 at once. Consequently we need to update all callee keys. */
2175 if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2176 add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2177 update_callee_keys (&edge_heap, where, where, updated_nodes);
2178 bitmap_clear (updated_nodes);
2179 }
2180 else
2181 {
2182 struct cgraph_node *outer_node = NULL;
2183 int depth = 0;
2184
2185 /* Consider the case where self recursive function A is inlined
2186 into B. This is desired optimization in some cases, since it
2187 leads to effect similar of loop peeling and we might completely
2188 optimize out the recursive call. However we must be extra
2189 selective. */
2190
2191 where = edge->caller;
2192 while (where->inlined_to)
2193 {
2194 if (where->decl == callee->decl)
2195 outer_node = where, depth++;
2196 where = where->callers->caller;
2197 }
2198 if (outer_node
2199 && !want_inline_self_recursive_call_p (edge, outer_node,
2200 true, depth))
2201 {
2202 edge->inline_failed
2203 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2204 ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2205 resolve_noninline_speculation (&edge_heap, edge);
2206 continue;
2207 }
2208 else if (depth && dump_file)
2209 fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2210
2211 gcc_checking_assert (!callee->inlined_to);
2212
2213 int old_size = ipa_size_summaries->get (where)->size;
2214 sreal old_time = ipa_fn_summaries->get (where)->time;
2215
2216 inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2217 reset_edge_caches (edge->callee);
2218 add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2219
2220 /* If caller's size and time increased we do not need to update
2221 all edges because badness is not going to decrease. */
2222 if (old_size <= ipa_size_summaries->get (where)->size
2223 && old_time <= ipa_fn_summaries->get (where)->time
2224 /* Wrapper penalty may be non-monotonous in this respect.
2225 Fortunately it only affects small functions. */
2226 && !wrapper_heuristics_may_apply (where, old_size))
2227 update_callee_keys (&edge_heap, edge->callee, edge->callee,
2228 updated_nodes);
2229 else
2230 update_callee_keys (&edge_heap, where,
2231 edge->callee,
2232 updated_nodes);
2233 }
2234 where = edge->caller;
2235 if (where->inlined_to)
2236 where = where->inlined_to;
2237
2238 /* Our profitability metric can depend on local properties
2239 such as number of inlinable calls and size of the function body.
2240 After inlining these properties might change for the function we
2241 inlined into (since it's body size changed) and for the functions
2242 called by function we inlined (since number of it inlinable callers
2243 might change). */
2244 update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2245 /* Offline copy count has possibly changed, recompute if profile is
2246 available. */
2247 struct cgraph_node *n
2248 = cgraph_node::get (edge->callee->decl)->ultimate_alias_target ();
2249 if (n != edge->callee && n->analyzed && !(n->count == old_count)
2250 && n->count.ipa_p ())
2251 update_callee_keys (&edge_heap, n, NULL, updated_nodes);
2252 bitmap_clear (updated_nodes);
2253
2254 if (dump_enabled_p ())
2255 {
2256 ipa_fn_summary *s = ipa_fn_summaries->get (where);
2257
2258 /* dump_printf can't handle %+i. */
2259 char buf_net_change[100];
2260 snprintf (buf_net_change, sizeof buf_net_change, "%+i",
2261 overall_size - old_size);
2262
2263 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, edge->call_stmt,
2264 " Inlined %C into %C which now has time %f and "
2265 "size %i, net change of %s%s.\n",
2266 edge->callee, edge->caller,
2267 s->time.to_double (),
2268 ipa_size_summaries->get (edge->caller)->size,
2269 buf_net_change,
2270 cross_module_call_p (edge) ? " (cross module)":"");
2271 }
2272 if (min_size > overall_size)
2273 {
2274 min_size = overall_size;
2275
2276 if (dump_file)
2277 fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2278 }
2279 }
2280
2281 free_growth_caches ();
2282 if (dump_enabled_p ())
2283 dump_printf (MSG_NOTE,
2284 "Unit growth for small function inlining: %i->%i (%i%%)\n",
2285 initial_size, overall_size,
2286 initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2287 symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2288 }
2289
2290 /* Flatten NODE. Performed both during early inlining and
2291 at IPA inlining time. */
2292
2293 static void
2294 flatten_function (struct cgraph_node *node, bool early, bool update)
2295 {
2296 struct cgraph_edge *e;
2297
2298 /* We shouldn't be called recursively when we are being processed. */
2299 gcc_assert (node->aux == NULL);
2300
2301 node->aux = (void *) node;
2302
2303 for (e = node->callees; e; e = e->next_callee)
2304 {
2305 struct cgraph_node *orig_callee;
2306 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2307
2308 /* We've hit cycle? It is time to give up. */
2309 if (callee->aux)
2310 {
2311 if (dump_enabled_p ())
2312 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2313 "Not inlining %C into %C to avoid cycle.\n",
2314 callee, e->caller);
2315 if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
2316 e->inline_failed = CIF_RECURSIVE_INLINING;
2317 continue;
2318 }
2319
2320 /* When the edge is already inlined, we just need to recurse into
2321 it in order to fully flatten the leaves. */
2322 if (!e->inline_failed)
2323 {
2324 flatten_function (callee, early, false);
2325 continue;
2326 }
2327
2328 /* Flatten attribute needs to be processed during late inlining. For
2329 extra code quality we however do flattening during early optimization,
2330 too. */
2331 if (!early
2332 ? !can_inline_edge_p (e, true)
2333 && !can_inline_edge_by_limits_p (e, true)
2334 : !can_early_inline_edge_p (e))
2335 continue;
2336
2337 if (e->recursive_p ())
2338 {
2339 if (dump_enabled_p ())
2340 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2341 "Not inlining: recursive call.\n");
2342 continue;
2343 }
2344
2345 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2346 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2347 {
2348 if (dump_enabled_p ())
2349 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2350 "Not inlining: SSA form does not match.\n");
2351 continue;
2352 }
2353
2354 /* Inline the edge and flatten the inline clone. Avoid
2355 recursing through the original node if the node was cloned. */
2356 if (dump_enabled_p ())
2357 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2358 " Inlining %C into %C.\n",
2359 callee, e->caller);
2360 orig_callee = callee;
2361 inline_call (e, true, NULL, NULL, false);
2362 if (e->callee != orig_callee)
2363 orig_callee->aux = (void *) node;
2364 flatten_function (e->callee, early, false);
2365 if (e->callee != orig_callee)
2366 orig_callee->aux = NULL;
2367 }
2368
2369 node->aux = NULL;
2370 cgraph_node *where = node->inlined_to ? node->inlined_to : node;
2371 if (update && opt_for_fn (where->decl, optimize))
2372 ipa_update_overall_fn_summary (where);
2373 }
2374
2375 /* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases.
2376 DATA points to number of calls originally found so we avoid infinite
2377 recursion. */
2378
2379 static bool
2380 inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2381 hash_set<cgraph_node *> *callers)
2382 {
2383 int *num_calls = (int *)data;
2384 bool callee_removed = false;
2385
2386 while (node->callers && !node->inlined_to)
2387 {
2388 struct cgraph_node *caller = node->callers->caller;
2389
2390 if (!can_inline_edge_p (node->callers, true)
2391 || !can_inline_edge_by_limits_p (node->callers, true)
2392 || node->callers->recursive_p ())
2393 {
2394 if (dump_file)
2395 fprintf (dump_file, "Uninlinable call found; giving up.\n");
2396 *num_calls = 0;
2397 return false;
2398 }
2399
2400 if (dump_file)
2401 {
2402 cgraph_node *ultimate = node->ultimate_alias_target ();
2403 fprintf (dump_file,
2404 "\nInlining %s size %i.\n",
2405 ultimate->dump_name (),
2406 ipa_size_summaries->get (ultimate)->size);
2407 fprintf (dump_file,
2408 " Called once from %s %i insns.\n",
2409 node->callers->caller->dump_name (),
2410 ipa_size_summaries->get (node->callers->caller)->size);
2411 }
2412
2413 /* Remember which callers we inlined to, delaying updating the
2414 overall summary. */
2415 callers->add (node->callers->caller);
2416 inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2417 if (dump_file)
2418 fprintf (dump_file,
2419 " Inlined into %s which now has %i size\n",
2420 caller->dump_name (),
2421 ipa_size_summaries->get (caller)->size);
2422 if (!(*num_calls)--)
2423 {
2424 if (dump_file)
2425 fprintf (dump_file, "New calls found; giving up.\n");
2426 return callee_removed;
2427 }
2428 if (callee_removed)
2429 return true;
2430 }
2431 return false;
2432 }
2433
2434 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2435 update. */
2436
2437 static bool
2438 inline_to_all_callers (struct cgraph_node *node, void *data)
2439 {
2440 hash_set<cgraph_node *> callers;
2441 bool res = inline_to_all_callers_1 (node, data, &callers);
2442 /* Perform the delayed update of the overall summary of all callers
2443 processed. This avoids quadratic behavior in the cases where
2444 we have a lot of calls to the same function. */
2445 for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2446 i != callers.end (); ++i)
2447 ipa_update_overall_fn_summary ((*i)->inlined_to ? (*i)->inlined_to : *i);
2448 return res;
2449 }
2450
2451 /* Output overall time estimate. */
2452 static void
2453 dump_overall_stats (void)
2454 {
2455 sreal sum_weighted = 0, sum = 0;
2456 struct cgraph_node *node;
2457
2458 FOR_EACH_DEFINED_FUNCTION (node)
2459 if (!node->inlined_to
2460 && !node->alias)
2461 {
2462 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2463 if (s != NULL)
2464 {
2465 sum += s->time;
2466 if (node->count.ipa ().initialized_p ())
2467 sum_weighted += s->time * node->count.ipa ().to_gcov_type ();
2468 }
2469 }
2470 fprintf (dump_file, "Overall time estimate: "
2471 "%f weighted by profile: "
2472 "%f\n", sum.to_double (), sum_weighted.to_double ());
2473 }
2474
2475 /* Output some useful stats about inlining. */
2476
2477 static void
2478 dump_inline_stats (void)
2479 {
2480 int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2481 int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2482 int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2483 int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2484 int64_t inlined_speculative = 0, inlined_speculative_ply = 0;
2485 int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2486 int64_t reason[CIF_N_REASONS][2];
2487 sreal reason_freq[CIF_N_REASONS];
2488 int i;
2489 struct cgraph_node *node;
2490
2491 memset (reason, 0, sizeof (reason));
2492 for (i=0; i < CIF_N_REASONS; i++)
2493 reason_freq[i] = 0;
2494 FOR_EACH_DEFINED_FUNCTION (node)
2495 {
2496 struct cgraph_edge *e;
2497 for (e = node->callees; e; e = e->next_callee)
2498 {
2499 if (e->inline_failed)
2500 {
2501 if (e->count.ipa ().initialized_p ())
2502 reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2503 reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
2504 reason[(int) e->inline_failed][1] ++;
2505 if (DECL_VIRTUAL_P (e->callee->decl)
2506 && e->count.ipa ().initialized_p ())
2507 {
2508 if (e->indirect_inlining_edge)
2509 noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2510 else
2511 noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2512 }
2513 else if (e->count.ipa ().initialized_p ())
2514 {
2515 if (e->indirect_inlining_edge)
2516 noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2517 else
2518 noninlined_cnt += e->count.ipa ().to_gcov_type ();
2519 }
2520 }
2521 else if (e->count.ipa ().initialized_p ())
2522 {
2523 if (e->speculative)
2524 {
2525 if (DECL_VIRTUAL_P (e->callee->decl))
2526 inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2527 else
2528 inlined_speculative += e->count.ipa ().to_gcov_type ();
2529 }
2530 else if (DECL_VIRTUAL_P (e->callee->decl))
2531 {
2532 if (e->indirect_inlining_edge)
2533 inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2534 else
2535 inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2536 }
2537 else
2538 {
2539 if (e->indirect_inlining_edge)
2540 inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2541 else
2542 inlined_cnt += e->count.ipa ().to_gcov_type ();
2543 }
2544 }
2545 }
2546 for (e = node->indirect_calls; e; e = e->next_callee)
2547 if (e->indirect_info->polymorphic
2548 & e->count.ipa ().initialized_p ())
2549 indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2550 else if (e->count.ipa ().initialized_p ())
2551 indirect_cnt += e->count.ipa ().to_gcov_type ();
2552 }
2553 if (max_count.initialized_p ())
2554 {
2555 fprintf (dump_file,
2556 "Inlined %" PRId64 " + speculative "
2557 "%" PRId64 " + speculative polymorphic "
2558 "%" PRId64 " + previously indirect "
2559 "%" PRId64 " + virtual "
2560 "%" PRId64 " + virtual and previously indirect "
2561 "%" PRId64 "\n" "Not inlined "
2562 "%" PRId64 " + previously indirect "
2563 "%" PRId64 " + virtual "
2564 "%" PRId64 " + virtual and previously indirect "
2565 "%" PRId64 " + still indirect "
2566 "%" PRId64 " + still indirect polymorphic "
2567 "%" PRId64 "\n", inlined_cnt,
2568 inlined_speculative, inlined_speculative_ply,
2569 inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2570 noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2571 noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2572 fprintf (dump_file, "Removed speculations ");
2573 spec_rem.dump (dump_file);
2574 fprintf (dump_file, "\n");
2575 }
2576 dump_overall_stats ();
2577 fprintf (dump_file, "\nWhy inlining failed?\n");
2578 for (i = 0; i < CIF_N_REASONS; i++)
2579 if (reason[i][1])
2580 fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
2581 cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2582 (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
2583 }
2584
2585 /* Called when node is removed. */
2586
2587 static void
2588 flatten_remove_node_hook (struct cgraph_node *node, void *data)
2589 {
2590 if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
2591 return;
2592
2593 hash_set<struct cgraph_node *> *removed
2594 = (hash_set<struct cgraph_node *> *) data;
2595 removed->add (node);
2596 }
2597
2598 /* Decide on the inlining. We do so in the topological order to avoid
2599 expenses on updating data structures. */
2600
2601 static unsigned int
2602 ipa_inline (void)
2603 {
2604 struct cgraph_node *node;
2605 int nnodes;
2606 struct cgraph_node **order;
2607 int i, j;
2608 int cold;
2609 bool remove_functions = false;
2610
2611 order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2612
2613 if (dump_file)
2614 ipa_dump_fn_summaries (dump_file);
2615
2616 nnodes = ipa_reverse_postorder (order);
2617 spec_rem = profile_count::zero ();
2618
2619 FOR_EACH_FUNCTION (node)
2620 {
2621 node->aux = 0;
2622
2623 /* Recompute the default reasons for inlining because they may have
2624 changed during merging. */
2625 if (in_lto_p)
2626 {
2627 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2628 {
2629 gcc_assert (e->inline_failed);
2630 initialize_inline_failed (e);
2631 }
2632 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2633 initialize_inline_failed (e);
2634 }
2635 }
2636
2637 if (dump_file)
2638 fprintf (dump_file, "\nFlattening functions:\n");
2639
2640 /* First shrink order array, so that it only contains nodes with
2641 flatten attribute. */
2642 for (i = nnodes - 1, j = i; i >= 0; i--)
2643 {
2644 node = order[i];
2645 if (node->definition
2646 /* Do not try to flatten aliases. These may happen for example when
2647 creating local aliases. */
2648 && !node->alias
2649 && lookup_attribute ("flatten",
2650 DECL_ATTRIBUTES (node->decl)) != NULL)
2651 order[j--] = order[i];
2652 }
2653
2654 /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2655 nodes with flatten attribute. If there is more than one such
2656 node, we need to register a node removal hook, as flatten_function
2657 could remove other nodes with flatten attribute. See PR82801. */
2658 struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
2659 hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
2660 if (j < nnodes - 2)
2661 {
2662 flatten_removed_nodes = new hash_set<struct cgraph_node *>;
2663 node_removal_hook_holder
2664 = symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
2665 flatten_removed_nodes);
2666 }
2667
2668 /* In the first pass handle functions to be flattened. Do this with
2669 a priority so none of our later choices will make this impossible. */
2670 for (i = nnodes - 1; i > j; i--)
2671 {
2672 node = order[i];
2673 if (flatten_removed_nodes
2674 && flatten_removed_nodes->contains (node))
2675 continue;
2676
2677 /* Handle nodes to be flattened.
2678 Ideally when processing callees we stop inlining at the
2679 entry of cycles, possibly cloning that entry point and
2680 try to flatten itself turning it into a self-recursive
2681 function. */
2682 if (dump_file)
2683 fprintf (dump_file, "Flattening %s\n", node->dump_name ());
2684 flatten_function (node, false, true);
2685 }
2686
2687 if (j < nnodes - 2)
2688 {
2689 symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
2690 delete flatten_removed_nodes;
2691 }
2692 free (order);
2693
2694 if (dump_file)
2695 dump_overall_stats ();
2696
2697 inline_small_functions ();
2698
2699 gcc_assert (symtab->state == IPA_SSA);
2700 symtab->state = IPA_SSA_AFTER_INLINING;
2701 /* Do first after-inlining removal. We want to remove all "stale" extern
2702 inline functions and virtual functions so we really know what is called
2703 once. */
2704 symtab->remove_unreachable_nodes (dump_file);
2705
2706 /* Inline functions with a property that after inlining into all callers the
2707 code size will shrink because the out-of-line copy is eliminated.
2708 We do this regardless on the callee size as long as function growth limits
2709 are met. */
2710 if (dump_file)
2711 fprintf (dump_file,
2712 "\nDeciding on functions to be inlined into all callers and "
2713 "removing useless speculations:\n");
2714
2715 /* Inlining one function called once has good chance of preventing
2716 inlining other function into the same callee. Ideally we should
2717 work in priority order, but probably inlining hot functions first
2718 is good cut without the extra pain of maintaining the queue.
2719
2720 ??? this is not really fitting the bill perfectly: inlining function
2721 into callee often leads to better optimization of callee due to
2722 increased context for optimization.
2723 For example if main() function calls a function that outputs help
2724 and then function that does the main optimization, we should inline
2725 the second with priority even if both calls are cold by themselves.
2726
2727 We probably want to implement new predicate replacing our use of
2728 maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2729 to be hot. */
2730 for (cold = 0; cold <= 1; cold ++)
2731 {
2732 FOR_EACH_DEFINED_FUNCTION (node)
2733 {
2734 struct cgraph_edge *edge, *next;
2735 bool update=false;
2736
2737 if (!opt_for_fn (node->decl, optimize)
2738 || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2739 continue;
2740
2741 for (edge = node->callees; edge; edge = next)
2742 {
2743 next = edge->next_callee;
2744 if (edge->speculative && !speculation_useful_p (edge, false))
2745 {
2746 if (edge->count.ipa ().initialized_p ())
2747 spec_rem += edge->count.ipa ();
2748 cgraph_edge::resolve_speculation (edge);
2749 update = true;
2750 remove_functions = true;
2751 }
2752 }
2753 if (update)
2754 {
2755 struct cgraph_node *where = node->inlined_to
2756 ? node->inlined_to : node;
2757 reset_edge_caches (where);
2758 ipa_update_overall_fn_summary (where);
2759 }
2760 if (want_inline_function_to_all_callers_p (node, cold))
2761 {
2762 int num_calls = 0;
2763 node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2764 true);
2765 while (node->call_for_symbol_and_aliases
2766 (inline_to_all_callers, &num_calls, true))
2767 ;
2768 remove_functions = true;
2769 }
2770 }
2771 }
2772
2773 /* Free ipa-prop structures if they are no longer needed. */
2774 ipa_free_all_structures_after_iinln ();
2775
2776 if (dump_enabled_p ())
2777 dump_printf (MSG_NOTE,
2778 "\nInlined %i calls, eliminated %i functions\n\n",
2779 ncalls_inlined, nfunctions_inlined);
2780 if (dump_file)
2781 dump_inline_stats ();
2782
2783 if (dump_file)
2784 ipa_dump_fn_summaries (dump_file);
2785 return remove_functions ? TODO_remove_functions : 0;
2786 }
2787
2788 /* Inline always-inline function calls in NODE. */
2789
2790 static bool
2791 inline_always_inline_functions (struct cgraph_node *node)
2792 {
2793 struct cgraph_edge *e;
2794 bool inlined = false;
2795
2796 for (e = node->callees; e; e = e->next_callee)
2797 {
2798 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2799 if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2800 continue;
2801
2802 if (e->recursive_p ())
2803 {
2804 if (dump_enabled_p ())
2805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2806 " Not inlining recursive call to %C.\n",
2807 e->callee);
2808 e->inline_failed = CIF_RECURSIVE_INLINING;
2809 continue;
2810 }
2811
2812 if (!can_early_inline_edge_p (e))
2813 {
2814 /* Set inlined to true if the callee is marked "always_inline" but
2815 is not inlinable. This will allow flagging an error later in
2816 expand_call_inline in tree-inline.c. */
2817 if (lookup_attribute ("always_inline",
2818 DECL_ATTRIBUTES (callee->decl)) != NULL)
2819 inlined = true;
2820 continue;
2821 }
2822
2823 if (dump_enabled_p ())
2824 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2825 " Inlining %C into %C (always_inline).\n",
2826 e->callee, e->caller);
2827 inline_call (e, true, NULL, NULL, false);
2828 inlined = true;
2829 }
2830 if (inlined)
2831 ipa_update_overall_fn_summary (node);
2832
2833 return inlined;
2834 }
2835
2836 /* Decide on the inlining. We do so in the topological order to avoid
2837 expenses on updating data structures. */
2838
2839 static bool
2840 early_inline_small_functions (struct cgraph_node *node)
2841 {
2842 struct cgraph_edge *e;
2843 bool inlined = false;
2844
2845 for (e = node->callees; e; e = e->next_callee)
2846 {
2847 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2848
2849 /* We can encounter not-yet-analyzed function during
2850 early inlining on callgraphs with strongly
2851 connected components. */
2852 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
2853 if (s == NULL || !s->inlinable || !e->inline_failed)
2854 continue;
2855
2856 /* Do not consider functions not declared inline. */
2857 if (!DECL_DECLARED_INLINE_P (callee->decl)
2858 && !opt_for_fn (node->decl, flag_inline_small_functions)
2859 && !opt_for_fn (node->decl, flag_inline_functions))
2860 continue;
2861
2862 if (dump_enabled_p ())
2863 dump_printf_loc (MSG_NOTE, e->call_stmt,
2864 "Considering inline candidate %C.\n",
2865 callee);
2866
2867 if (!can_early_inline_edge_p (e))
2868 continue;
2869
2870 if (e->recursive_p ())
2871 {
2872 if (dump_enabled_p ())
2873 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2874 " Not inlining: recursive call.\n");
2875 continue;
2876 }
2877
2878 if (!want_early_inline_function_p (e))
2879 continue;
2880
2881 if (dump_enabled_p ())
2882 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2883 " Inlining %C into %C.\n",
2884 callee, e->caller);
2885 inline_call (e, true, NULL, NULL, false);
2886 inlined = true;
2887 }
2888
2889 if (inlined)
2890 ipa_update_overall_fn_summary (node);
2891
2892 return inlined;
2893 }
2894
2895 unsigned int
2896 early_inliner (function *fun)
2897 {
2898 struct cgraph_node *node = cgraph_node::get (current_function_decl);
2899 struct cgraph_edge *edge;
2900 unsigned int todo = 0;
2901 int iterations = 0;
2902 bool inlined = false;
2903
2904 if (seen_error ())
2905 return 0;
2906
2907 /* Do nothing if datastructures for ipa-inliner are already computed. This
2908 happens when some pass decides to construct new function and
2909 cgraph_add_new_function calls lowering passes and early optimization on
2910 it. This may confuse ourself when early inliner decide to inline call to
2911 function clone, because function clones don't have parameter list in
2912 ipa-prop matching their signature. */
2913 if (ipa_node_params_sum)
2914 return 0;
2915
2916 if (flag_checking)
2917 node->verify ();
2918 node->remove_all_references ();
2919
2920 /* Even when not optimizing or not inlining inline always-inline
2921 functions. */
2922 inlined = inline_always_inline_functions (node);
2923
2924 if (!optimize
2925 || flag_no_inline
2926 || !flag_early_inlining
2927 /* Never inline regular functions into always-inline functions
2928 during incremental inlining. This sucks as functions calling
2929 always inline functions will get less optimized, but at the
2930 same time inlining of functions calling always inline
2931 function into an always inline function might introduce
2932 cycles of edges to be always inlined in the callgraph.
2933
2934 We might want to be smarter and just avoid this type of inlining. */
2935 || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
2936 && lookup_attribute ("always_inline",
2937 DECL_ATTRIBUTES (node->decl))))
2938 ;
2939 else if (lookup_attribute ("flatten",
2940 DECL_ATTRIBUTES (node->decl)) != NULL)
2941 {
2942 /* When the function is marked to be flattened, recursively inline
2943 all calls in it. */
2944 if (dump_enabled_p ())
2945 dump_printf (MSG_OPTIMIZED_LOCATIONS,
2946 "Flattening %C\n", node);
2947 flatten_function (node, true, true);
2948 inlined = true;
2949 }
2950 else
2951 {
2952 /* If some always_inline functions was inlined, apply the changes.
2953 This way we will not account always inline into growth limits and
2954 moreover we will inline calls from always inlines that we skipped
2955 previously because of conditional above. */
2956 if (inlined)
2957 {
2958 timevar_push (TV_INTEGRATION);
2959 todo |= optimize_inline_calls (current_function_decl);
2960 /* optimize_inline_calls call above might have introduced new
2961 statements that don't have inline parameters computed. */
2962 for (edge = node->callees; edge; edge = edge->next_callee)
2963 {
2964 /* We can enounter not-yet-analyzed function during
2965 early inlining on callgraphs with strongly
2966 connected components. */
2967 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2968 es->call_stmt_size
2969 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2970 es->call_stmt_time
2971 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2972 }
2973 ipa_update_overall_fn_summary (node);
2974 inlined = false;
2975 timevar_pop (TV_INTEGRATION);
2976 }
2977 /* We iterate incremental inlining to get trivial cases of indirect
2978 inlining. */
2979 while (iterations < opt_for_fn (node->decl,
2980 param_early_inliner_max_iterations)
2981 && early_inline_small_functions (node))
2982 {
2983 timevar_push (TV_INTEGRATION);
2984 todo |= optimize_inline_calls (current_function_decl);
2985
2986 /* Technically we ought to recompute inline parameters so the new
2987 iteration of early inliner works as expected. We however have
2988 values approximately right and thus we only need to update edge
2989 info that might be cleared out for newly discovered edges. */
2990 for (edge = node->callees; edge; edge = edge->next_callee)
2991 {
2992 /* We have no summary for new bound store calls yet. */
2993 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2994 es->call_stmt_size
2995 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2996 es->call_stmt_time
2997 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2998 }
2999 if (iterations < opt_for_fn (node->decl,
3000 param_early_inliner_max_iterations) - 1)
3001 ipa_update_overall_fn_summary (node);
3002 timevar_pop (TV_INTEGRATION);
3003 iterations++;
3004 inlined = false;
3005 }
3006 if (dump_file)
3007 fprintf (dump_file, "Iterations: %i\n", iterations);
3008 }
3009
3010 if (inlined)
3011 {
3012 timevar_push (TV_INTEGRATION);
3013 todo |= optimize_inline_calls (current_function_decl);
3014 timevar_pop (TV_INTEGRATION);
3015 }
3016
3017 fun->always_inline_functions_inlined = true;
3018
3019 return todo;
3020 }
3021
3022 /* Do inlining of small functions. Doing so early helps profiling and other
3023 passes to be somewhat more effective and avoids some code duplication in
3024 later real inlining pass for testcases with very many function calls. */
3025
3026 namespace {
3027
3028 const pass_data pass_data_early_inline =
3029 {
3030 GIMPLE_PASS, /* type */
3031 "einline", /* name */
3032 OPTGROUP_INLINE, /* optinfo_flags */
3033 TV_EARLY_INLINING, /* tv_id */
3034 PROP_ssa, /* properties_required */
3035 0, /* properties_provided */
3036 0, /* properties_destroyed */
3037 0, /* todo_flags_start */
3038 0, /* todo_flags_finish */
3039 };
3040
3041 class pass_early_inline : public gimple_opt_pass
3042 {
3043 public:
3044 pass_early_inline (gcc::context *ctxt)
3045 : gimple_opt_pass (pass_data_early_inline, ctxt)
3046 {}
3047
3048 /* opt_pass methods: */
3049 virtual unsigned int execute (function *);
3050
3051 }; // class pass_early_inline
3052
3053 unsigned int
3054 pass_early_inline::execute (function *fun)
3055 {
3056 return early_inliner (fun);
3057 }
3058
3059 } // anon namespace
3060
3061 gimple_opt_pass *
3062 make_pass_early_inline (gcc::context *ctxt)
3063 {
3064 return new pass_early_inline (ctxt);
3065 }
3066
3067 namespace {
3068
3069 const pass_data pass_data_ipa_inline =
3070 {
3071 IPA_PASS, /* type */
3072 "inline", /* name */
3073 OPTGROUP_INLINE, /* optinfo_flags */
3074 TV_IPA_INLINING, /* tv_id */
3075 0, /* properties_required */
3076 0, /* properties_provided */
3077 0, /* properties_destroyed */
3078 0, /* todo_flags_start */
3079 ( TODO_dump_symtab ), /* todo_flags_finish */
3080 };
3081
3082 class pass_ipa_inline : public ipa_opt_pass_d
3083 {
3084 public:
3085 pass_ipa_inline (gcc::context *ctxt)
3086 : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
3087 NULL, /* generate_summary */
3088 NULL, /* write_summary */
3089 NULL, /* read_summary */
3090 NULL, /* write_optimization_summary */
3091 NULL, /* read_optimization_summary */
3092 NULL, /* stmt_fixup */
3093 0, /* function_transform_todo_flags_start */
3094 inline_transform, /* function_transform */
3095 NULL) /* variable_transform */
3096 {}
3097
3098 /* opt_pass methods: */
3099 virtual unsigned int execute (function *) { return ipa_inline (); }
3100
3101 }; // class pass_ipa_inline
3102
3103 } // anon namespace
3104
3105 ipa_opt_pass_d *
3106 make_pass_ipa_inline (gcc::context *ctxt)
3107 {
3108 return new pass_ipa_inline (ctxt);
3109 }