ipa-inline-analysis.c (cgraph_2edge_hook_list, [...]): Remove.
[gcc.git] / gcc / ipa-inline-analysis.c
1 /* Inlining decision heuristics.
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Analysis used by the inliner and other passes limiting code size growth.
22
23 We estimate for each function
24 - function body size
25 - average function execution time
26 - inlining size benefit (that is how much of function body size
27 and its call sequence is expected to disappear by inlining)
28 - inlining time benefit
29 - function frame size
30 For each call
31 - call statement size and time
32
33 inline_summary data structures store above information locally (i.e.
34 parameters of the function itself) and globally (i.e. parameters of
35 the function created by applying all the inline decisions already
36 present in the callgraph).
37
38 We provide access to the inline_summary data structure and
39 basic logic updating the parameters when inlining is performed.
40
41 The summaries are context sensitive. Context means
42 1) partial assignment of known constant values of operands
43 2) whether function is inlined into the call or not.
44 It is easy to add more variants. To represent function size and time
45 that depends on context (i.e. it is known to be optimized away when
46 context is known either by inlining or from IP-CP and cloning),
47 we use predicates.
48
49 estimate_edge_size and estimate_edge_growth can be used to query
50 function size/time in the given context. inline_merge_summary merges
51 properties of caller and callee after inlining.
52
53 Finally pass_inline_parameters is exported. This is used to drive
54 computation of function parameters used by the early inliner. IPA
55 inlined performs analysis via its analyze_function method. */
56
57 #include "config.h"
58 #include "system.h"
59 #include "coretypes.h"
60 #include "backend.h"
61 #include "tree.h"
62 #include "gimple.h"
63 #include "alloc-pool.h"
64 #include "tree-pass.h"
65 #include "ssa.h"
66 #include "tree-streamer.h"
67 #include "cgraph.h"
68 #include "diagnostic.h"
69 #include "fold-const.h"
70 #include "print-tree.h"
71 #include "tree-inline.h"
72 #include "gimple-pretty-print.h"
73 #include "params.h"
74 #include "cfganal.h"
75 #include "gimple-iterator.h"
76 #include "tree-cfg.h"
77 #include "tree-ssa-loop-niter.h"
78 #include "tree-ssa-loop.h"
79 #include "symbol-summary.h"
80 #include "ipa-prop.h"
81 #include "ipa-inline.h"
82 #include "cfgloop.h"
83 #include "tree-scalar-evolution.h"
84 #include "ipa-utils.h"
85 #include "cilk.h"
86 #include "cfgexpand.h"
87 #include "gimplify.h"
88
89 /* Summaries. */
90 function_summary <inline_summary *> *inline_summaries;
91 call_summary <ipa_call_summary *> *ipa_call_summaries;
92
93 /* Cached node/edge growths. */
94 vec<edge_growth_cache_entry> edge_growth_cache;
95
96 /* Edge predicates goes here. */
97 static object_allocator<predicate> edge_predicate_pool ("edge predicates");
98
99
100 /* Dump inline hints. */
101 void
102 dump_inline_hints (FILE *f, inline_hints hints)
103 {
104 if (!hints)
105 return;
106 fprintf (f, "inline hints:");
107 if (hints & INLINE_HINT_indirect_call)
108 {
109 hints &= ~INLINE_HINT_indirect_call;
110 fprintf (f, " indirect_call");
111 }
112 if (hints & INLINE_HINT_loop_iterations)
113 {
114 hints &= ~INLINE_HINT_loop_iterations;
115 fprintf (f, " loop_iterations");
116 }
117 if (hints & INLINE_HINT_loop_stride)
118 {
119 hints &= ~INLINE_HINT_loop_stride;
120 fprintf (f, " loop_stride");
121 }
122 if (hints & INLINE_HINT_same_scc)
123 {
124 hints &= ~INLINE_HINT_same_scc;
125 fprintf (f, " same_scc");
126 }
127 if (hints & INLINE_HINT_in_scc)
128 {
129 hints &= ~INLINE_HINT_in_scc;
130 fprintf (f, " in_scc");
131 }
132 if (hints & INLINE_HINT_cross_module)
133 {
134 hints &= ~INLINE_HINT_cross_module;
135 fprintf (f, " cross_module");
136 }
137 if (hints & INLINE_HINT_declared_inline)
138 {
139 hints &= ~INLINE_HINT_declared_inline;
140 fprintf (f, " declared_inline");
141 }
142 if (hints & INLINE_HINT_array_index)
143 {
144 hints &= ~INLINE_HINT_array_index;
145 fprintf (f, " array_index");
146 }
147 if (hints & INLINE_HINT_known_hot)
148 {
149 hints &= ~INLINE_HINT_known_hot;
150 fprintf (f, " known_hot");
151 }
152 gcc_assert (!hints);
153 }
154
155
156 /* Record SIZE and TIME to SUMMARY.
157 The accounted code will be executed when EXEC_PRED is true.
158 When NONCONST_PRED is false the code will evaulate to constant and
159 will get optimized out in specialized clones of the function. */
160
161 static void
162 account_size_time (struct inline_summary *summary, int size, sreal time,
163 predicate *exec_pred,
164 predicate *nonconst_pred_ptr)
165 {
166 size_time_entry *e;
167 bool found = false;
168 int i;
169 predicate nonconst_pred;
170
171 if (*exec_pred == false)
172 return;
173
174 nonconst_pred = *nonconst_pred_ptr & *exec_pred;
175
176 if (nonconst_pred == false)
177 return;
178
179 /* We need to create initial empty unconitional clause, but otherwie
180 we don't need to account empty times and sizes. */
181 if (!size && time == 0 && summary->entry)
182 return;
183
184 gcc_assert (time >= 0);
185
186 for (i = 0; vec_safe_iterate (summary->entry, i, &e); i++)
187 if (e->exec_predicate == *exec_pred
188 && e->nonconst_predicate == nonconst_pred)
189 {
190 found = true;
191 break;
192 }
193 if (i == 256)
194 {
195 i = 0;
196 found = true;
197 e = &(*summary->entry)[0];
198 if (dump_file && (dump_flags & TDF_DETAILS))
199 fprintf (dump_file,
200 "\t\tReached limit on number of entries, "
201 "ignoring the predicate.");
202 }
203 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
204 {
205 fprintf (dump_file,
206 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
207 ((double) size) / INLINE_SIZE_SCALE,
208 (time.to_double ()), found ? "" : "new ");
209 exec_pred->dump (dump_file, summary->conds, 0);
210 if (*exec_pred != nonconst_pred)
211 {
212 fprintf (dump_file, " nonconst:");
213 nonconst_pred.dump (dump_file, summary->conds);
214 }
215 else
216 fprintf (dump_file, "\n");
217 }
218 if (!found)
219 {
220 struct size_time_entry new_entry;
221 new_entry.size = size;
222 new_entry.time = time;
223 new_entry.exec_predicate = *exec_pred;
224 new_entry.nonconst_predicate = nonconst_pred;
225 vec_safe_push (summary->entry, new_entry);
226 }
227 else
228 {
229 e->size += size;
230 e->time += time;
231 }
232 }
233
234 /* We proved E to be unreachable, redirect it to __bultin_unreachable. */
235
236 static struct cgraph_edge *
237 redirect_to_unreachable (struct cgraph_edge *e)
238 {
239 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
240 struct cgraph_node *target = cgraph_node::get_create
241 (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
242
243 if (e->speculative)
244 e = e->resolve_speculation (target->decl);
245 else if (!e->callee)
246 e->make_direct (target);
247 else
248 e->redirect_callee (target);
249 struct ipa_call_summary *es = ipa_call_summaries->get (e);
250 e->inline_failed = CIF_UNREACHABLE;
251 e->frequency = 0;
252 e->count = 0;
253 es->call_stmt_size = 0;
254 es->call_stmt_time = 0;
255 if (callee)
256 callee->remove_symbol_and_inline_clones ();
257 return e;
258 }
259
260 /* Set predicate for edge E. */
261
262 static void
263 edge_set_predicate (struct cgraph_edge *e, predicate *predicate)
264 {
265 /* If the edge is determined to be never executed, redirect it
266 to BUILTIN_UNREACHABLE to save inliner from inlining into it. */
267 if (predicate && *predicate == false
268 /* When handling speculative edges, we need to do the redirection
269 just once. Do it always on the direct edge, so we do not
270 attempt to resolve speculation while duplicating the edge. */
271 && (!e->speculative || e->callee))
272 e = redirect_to_unreachable (e);
273
274 struct ipa_call_summary *es = ipa_call_summaries->get (e);
275 if (predicate && *predicate != true)
276 {
277 if (!es->predicate)
278 es->predicate = edge_predicate_pool.allocate ();
279 *es->predicate = *predicate;
280 }
281 else
282 {
283 if (es->predicate)
284 edge_predicate_pool.remove (es->predicate);
285 es->predicate = NULL;
286 }
287 }
288
289 /* Set predicate for hint *P. */
290
291 static void
292 set_hint_predicate (predicate **p, predicate new_predicate)
293 {
294 if (new_predicate == false || new_predicate == true)
295 {
296 if (*p)
297 edge_predicate_pool.remove (*p);
298 *p = NULL;
299 }
300 else
301 {
302 if (!*p)
303 *p = edge_predicate_pool.allocate ();
304 **p = new_predicate;
305 }
306 }
307
308
309 /* Compute what conditions may or may not hold given invormation about
310 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
311 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
312 copy when called in a given context. It is a bitmask of conditions. Bit
313 0 means that condition is known to be false, while bit 1 means that condition
314 may or may not be true. These differs - for example NOT_INLINED condition
315 is always false in the second and also builtin_constant_p tests can not use
316 the fact that parameter is indeed a constant.
317
318 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
319 KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
320 Return clause of possible truths. When INLINE_P is true, assume that we are
321 inlining.
322
323 ERROR_MARK means compile time invariant. */
324
325 static void
326 evaluate_conditions_for_known_args (struct cgraph_node *node,
327 bool inline_p,
328 vec<tree> known_vals,
329 vec<ipa_agg_jump_function_p>
330 known_aggs,
331 clause_t *ret_clause,
332 clause_t *ret_nonspec_clause)
333 {
334 clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition;
335 clause_t nonspec_clause = 1 << predicate::not_inlined_condition;
336 struct inline_summary *info = inline_summaries->get (node);
337 int i;
338 struct condition *c;
339
340 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
341 {
342 tree val;
343 tree res;
344
345 /* We allow call stmt to have fewer arguments than the callee function
346 (especially for K&R style programs). So bound check here (we assume
347 known_aggs vector, if non-NULL, has the same length as
348 known_vals). */
349 gcc_checking_assert (!known_aggs.exists ()
350 || (known_vals.length () == known_aggs.length ()));
351 if (c->operand_num >= (int) known_vals.length ())
352 {
353 clause |= 1 << (i + predicate::first_dynamic_condition);
354 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
355 continue;
356 }
357
358 if (c->agg_contents)
359 {
360 struct ipa_agg_jump_function *agg;
361
362 if (c->code == predicate::changed
363 && !c->by_ref
364 && (known_vals[c->operand_num] == error_mark_node))
365 continue;
366
367 if (known_aggs.exists ())
368 {
369 agg = known_aggs[c->operand_num];
370 val = ipa_find_agg_cst_for_param (agg, known_vals[c->operand_num],
371 c->offset, c->by_ref);
372 }
373 else
374 val = NULL_TREE;
375 }
376 else
377 {
378 val = known_vals[c->operand_num];
379 if (val == error_mark_node && c->code != predicate::changed)
380 val = NULL_TREE;
381 }
382
383 if (!val)
384 {
385 clause |= 1 << (i + predicate::first_dynamic_condition);
386 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
387 continue;
388 }
389 if (c->code == predicate::changed)
390 {
391 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
392 continue;
393 }
394
395 if (tree_to_shwi (TYPE_SIZE (TREE_TYPE (val))) != c->size)
396 {
397 clause |= 1 << (i + predicate::first_dynamic_condition);
398 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
399 continue;
400 }
401 if (c->code == predicate::is_not_constant)
402 {
403 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
404 continue;
405 }
406
407 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (c->val), val);
408 res = val
409 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
410 : NULL;
411
412 if (res && integer_zerop (res))
413 continue;
414
415 clause |= 1 << (i + predicate::first_dynamic_condition);
416 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
417 }
418 *ret_clause = clause;
419 if (ret_nonspec_clause)
420 *ret_nonspec_clause = nonspec_clause;
421 }
422
423
424 /* Work out what conditions might be true at invocation of E. */
425
426 static void
427 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
428 clause_t *clause_ptr, clause_t *nonspec_clause_ptr,
429 vec<tree> *known_vals_ptr,
430 vec<ipa_polymorphic_call_context>
431 *known_contexts_ptr,
432 vec<ipa_agg_jump_function_p> *known_aggs_ptr)
433 {
434 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
435 struct inline_summary *info = inline_summaries->get (callee);
436 vec<tree> known_vals = vNULL;
437 vec<ipa_agg_jump_function_p> known_aggs = vNULL;
438
439 if (clause_ptr)
440 *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition;
441 if (known_vals_ptr)
442 known_vals_ptr->create (0);
443 if (known_contexts_ptr)
444 known_contexts_ptr->create (0);
445
446 if (ipa_node_params_sum
447 && !e->call_stmt_cannot_inline_p
448 && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr))
449 {
450 struct ipa_node_params *parms_info;
451 struct ipa_edge_args *args = IPA_EDGE_REF (e);
452 struct ipa_call_summary *es = ipa_call_summaries->get (e);
453 int i, count = ipa_get_cs_argument_count (args);
454
455 if (e->caller->global.inlined_to)
456 parms_info = IPA_NODE_REF (e->caller->global.inlined_to);
457 else
458 parms_info = IPA_NODE_REF (e->caller);
459
460 if (count && (info->conds || known_vals_ptr))
461 known_vals.safe_grow_cleared (count);
462 if (count && (info->conds || known_aggs_ptr))
463 known_aggs.safe_grow_cleared (count);
464 if (count && known_contexts_ptr)
465 known_contexts_ptr->safe_grow_cleared (count);
466
467 for (i = 0; i < count; i++)
468 {
469 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
470 tree cst = ipa_value_from_jfunc (parms_info, jf);
471
472 if (!cst && e->call_stmt
473 && i < (int)gimple_call_num_args (e->call_stmt))
474 {
475 cst = gimple_call_arg (e->call_stmt, i);
476 if (!is_gimple_min_invariant (cst))
477 cst = NULL;
478 }
479 if (cst)
480 {
481 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
482 if (known_vals.exists ())
483 known_vals[i] = cst;
484 }
485 else if (inline_p && !es->param[i].change_prob)
486 known_vals[i] = error_mark_node;
487
488 if (known_contexts_ptr)
489 (*known_contexts_ptr)[i] = ipa_context_from_jfunc (parms_info, e,
490 i, jf);
491 /* TODO: When IPA-CP starts propagating and merging aggregate jump
492 functions, use its knowledge of the caller too, just like the
493 scalar case above. */
494 known_aggs[i] = &jf->agg;
495 }
496 }
497 else if (e->call_stmt && !e->call_stmt_cannot_inline_p
498 && ((clause_ptr && info->conds) || known_vals_ptr))
499 {
500 int i, count = (int)gimple_call_num_args (e->call_stmt);
501
502 if (count && (info->conds || known_vals_ptr))
503 known_vals.safe_grow_cleared (count);
504 for (i = 0; i < count; i++)
505 {
506 tree cst = gimple_call_arg (e->call_stmt, i);
507 if (!is_gimple_min_invariant (cst))
508 cst = NULL;
509 if (cst)
510 known_vals[i] = cst;
511 }
512 }
513
514 evaluate_conditions_for_known_args (callee, inline_p,
515 known_vals, known_aggs, clause_ptr,
516 nonspec_clause_ptr);
517
518 if (known_vals_ptr)
519 *known_vals_ptr = known_vals;
520 else
521 known_vals.release ();
522
523 if (known_aggs_ptr)
524 *known_aggs_ptr = known_aggs;
525 else
526 known_aggs.release ();
527 }
528
529
530 /* Allocate the inline summary vector or resize it to cover all cgraph nodes. */
531
532 static void
533 inline_summary_alloc (void)
534 {
535 if (!inline_summaries)
536 inline_summaries = inline_summary_t::create_ggc (symtab);
537 if (!ipa_call_summaries)
538 ipa_call_summaries = new ipa_call_summary_t (symtab, false);
539 }
540
541 /* We are called multiple time for given function; clear
542 data from previous run so they are not cumulated. */
543
544 static void
545 reset_ipa_call_summary (struct cgraph_edge *e)
546 {
547 struct ipa_call_summary *es = ipa_call_summaries->get (e);
548
549 es->call_stmt_size = es->call_stmt_time = 0;
550 if (es->predicate)
551 edge_predicate_pool.remove (es->predicate);
552 es->predicate = NULL;
553 es->param.release ();
554 }
555
556 /* We are called multiple time for given function; clear
557 data from previous run so they are not cumulated. */
558
559 static void
560 reset_inline_summary (struct cgraph_node *node,
561 inline_summary *info)
562 {
563 struct cgraph_edge *e;
564
565 info->self_size = 0;
566 info->self_time = 0;
567 info->estimated_stack_size = 0;
568 info->estimated_self_stack_size = 0;
569 info->stack_frame_offset = 0;
570 info->size = 0;
571 info->time = 0;
572 info->growth = 0;
573 info->scc_no = 0;
574 if (info->loop_iterations)
575 {
576 edge_predicate_pool.remove (info->loop_iterations);
577 info->loop_iterations = NULL;
578 }
579 if (info->loop_stride)
580 {
581 edge_predicate_pool.remove (info->loop_stride);
582 info->loop_stride = NULL;
583 }
584 if (info->array_index)
585 {
586 edge_predicate_pool.remove (info->array_index);
587 info->array_index = NULL;
588 }
589 vec_free (info->conds);
590 vec_free (info->entry);
591 for (e = node->callees; e; e = e->next_callee)
592 reset_ipa_call_summary (e);
593 for (e = node->indirect_calls; e; e = e->next_callee)
594 reset_ipa_call_summary (e);
595 info->fp_expressions = false;
596 }
597
598 /* Hook that is called by cgraph.c when a node is removed. */
599
600 void
601 inline_summary_t::remove (cgraph_node *node, inline_summary *info)
602 {
603 reset_inline_summary (node, info);
604 }
605
606 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
607 Additionally care about allocating new memory slot for updated predicate
608 and set it to NULL when it becomes true or false (and thus uninteresting).
609 */
610
611 static void
612 remap_hint_predicate_after_duplication (predicate **p,
613 clause_t possible_truths)
614 {
615 predicate new_predicate;
616
617 if (!*p)
618 return;
619
620 new_predicate = (*p)->remap_after_duplication (possible_truths);
621 /* We do not want to free previous predicate; it is used by node origin. */
622 *p = NULL;
623 set_hint_predicate (p, new_predicate);
624 }
625
626
627 /* Hook that is called by cgraph.c when a node is duplicated. */
628 void
629 inline_summary_t::duplicate (cgraph_node *src,
630 cgraph_node *dst,
631 inline_summary *,
632 inline_summary *info)
633 {
634 inline_summary_alloc ();
635 memcpy (info, inline_summaries->get (src), sizeof (inline_summary));
636 /* TODO: as an optimization, we may avoid copying conditions
637 that are known to be false or true. */
638 info->conds = vec_safe_copy (info->conds);
639
640 /* When there are any replacements in the function body, see if we can figure
641 out that something was optimized out. */
642 if (ipa_node_params_sum && dst->clone.tree_map)
643 {
644 vec<size_time_entry, va_gc> *entry = info->entry;
645 /* Use SRC parm info since it may not be copied yet. */
646 struct ipa_node_params *parms_info = IPA_NODE_REF (src);
647 vec<tree> known_vals = vNULL;
648 int count = ipa_get_param_count (parms_info);
649 int i, j;
650 clause_t possible_truths;
651 predicate true_pred = true;
652 size_time_entry *e;
653 int optimized_out_size = 0;
654 bool inlined_to_p = false;
655 struct cgraph_edge *edge, *next;
656
657 info->entry = 0;
658 known_vals.safe_grow_cleared (count);
659 for (i = 0; i < count; i++)
660 {
661 struct ipa_replace_map *r;
662
663 for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++)
664 {
665 if (((!r->old_tree && r->parm_num == i)
666 || (r->old_tree && r->old_tree == ipa_get_param (parms_info, i)))
667 && r->replace_p && !r->ref_p)
668 {
669 known_vals[i] = r->new_tree;
670 break;
671 }
672 }
673 }
674 evaluate_conditions_for_known_args (dst, false,
675 known_vals,
676 vNULL,
677 &possible_truths,
678 /* We are going to specialize,
679 so ignore nonspec truths. */
680 NULL);
681 known_vals.release ();
682
683 account_size_time (info, 0, 0, &true_pred, &true_pred);
684
685 /* Remap size_time vectors.
686 Simplify the predicate by prunning out alternatives that are known
687 to be false.
688 TODO: as on optimization, we can also eliminate conditions known
689 to be true. */
690 for (i = 0; vec_safe_iterate (entry, i, &e); i++)
691 {
692 predicate new_exec_pred;
693 predicate new_nonconst_pred;
694 new_exec_pred = e->exec_predicate.remap_after_duplication
695 (possible_truths);
696 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
697 (possible_truths);
698 if (new_exec_pred == false || new_nonconst_pred == false)
699 optimized_out_size += e->size;
700 else
701 account_size_time (info, e->size, e->time, &new_exec_pred,
702 &new_nonconst_pred);
703 }
704
705 /* Remap edge predicates with the same simplification as above.
706 Also copy constantness arrays. */
707 for (edge = dst->callees; edge; edge = next)
708 {
709 predicate new_predicate;
710 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
711 next = edge->next_callee;
712
713 if (!edge->inline_failed)
714 inlined_to_p = true;
715 if (!es->predicate)
716 continue;
717 new_predicate = es->predicate->remap_after_duplication
718 (possible_truths);
719 if (new_predicate == false && *es->predicate != false)
720 optimized_out_size += es->call_stmt_size * INLINE_SIZE_SCALE;
721 edge_set_predicate (edge, &new_predicate);
722 }
723
724 /* Remap indirect edge predicates with the same simplificaiton as above.
725 Also copy constantness arrays. */
726 for (edge = dst->indirect_calls; edge; edge = next)
727 {
728 predicate new_predicate;
729 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
730 next = edge->next_callee;
731
732 gcc_checking_assert (edge->inline_failed);
733 if (!es->predicate)
734 continue;
735 new_predicate = es->predicate->remap_after_duplication
736 (possible_truths);
737 if (new_predicate == false && *es->predicate != false)
738 optimized_out_size += es->call_stmt_size * INLINE_SIZE_SCALE;
739 edge_set_predicate (edge, &new_predicate);
740 }
741 remap_hint_predicate_after_duplication (&info->loop_iterations,
742 possible_truths);
743 remap_hint_predicate_after_duplication (&info->loop_stride,
744 possible_truths);
745 remap_hint_predicate_after_duplication (&info->array_index,
746 possible_truths);
747
748 /* If inliner or someone after inliner will ever start producing
749 non-trivial clones, we will get trouble with lack of information
750 about updating self sizes, because size vectors already contains
751 sizes of the calees. */
752 gcc_assert (!inlined_to_p || !optimized_out_size);
753 }
754 else
755 {
756 info->entry = vec_safe_copy (info->entry);
757 if (info->loop_iterations)
758 {
759 predicate p = *info->loop_iterations;
760 info->loop_iterations = NULL;
761 set_hint_predicate (&info->loop_iterations, p);
762 }
763 if (info->loop_stride)
764 {
765 predicate p = *info->loop_stride;
766 info->loop_stride = NULL;
767 set_hint_predicate (&info->loop_stride, p);
768 }
769 if (info->array_index)
770 {
771 predicate p = *info->array_index;
772 info->array_index = NULL;
773 set_hint_predicate (&info->array_index, p);
774 }
775 }
776 if (!dst->global.inlined_to)
777 inline_update_overall_summary (dst);
778 }
779
780
781 /* Hook that is called by cgraph.c when a node is duplicated. */
782
783 void
784 ipa_call_summary_t::duplicate (struct cgraph_edge *src,
785 struct cgraph_edge *dst,
786 struct ipa_call_summary *srcinfo,
787 struct ipa_call_summary *info)
788 {
789 *info = *srcinfo;
790 info->predicate = NULL;
791 edge_set_predicate (dst, srcinfo->predicate);
792 info->param = srcinfo->param.copy ();
793 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
794 {
795 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
796 - eni_size_weights.call_cost);
797 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
798 - eni_time_weights.call_cost);
799 }
800 }
801
802
803 /* Keep edge cache consistent across edge removal. */
804
805 void
806 ipa_call_summary_t::remove (struct cgraph_edge *edge,
807 struct ipa_call_summary *)
808 {
809 if (edge_growth_cache.exists ())
810 reset_edge_growth_cache (edge);
811 reset_ipa_call_summary (edge);
812 }
813
814
815 /* Initialize growth caches. */
816
817 void
818 initialize_growth_caches (void)
819 {
820 if (symtab->edges_max_uid)
821 edge_growth_cache.safe_grow_cleared (symtab->edges_max_uid);
822 }
823
824
825 /* Free growth caches. */
826
827 void
828 free_growth_caches (void)
829 {
830 edge_growth_cache.release ();
831 }
832
833
834 /* Dump edge summaries associated to NODE and recursively to all clones.
835 Indent by INDENT. */
836
837 static void
838 dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
839 struct inline_summary *info)
840 {
841 struct cgraph_edge *edge;
842 for (edge = node->callees; edge; edge = edge->next_callee)
843 {
844 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
845 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
846 int i;
847
848 fprintf (f,
849 "%*s%s/%i %s\n%*s loop depth:%2i freq:%4i size:%2i"
850 " time: %2i callee size:%2i stack:%2i",
851 indent, "", callee->name (), callee->order,
852 !edge->inline_failed
853 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
854 indent, "", es->loop_depth, edge->frequency,
855 es->call_stmt_size, es->call_stmt_time,
856 (int) inline_summaries->get (callee)->size / INLINE_SIZE_SCALE,
857 (int) inline_summaries->get (callee)->estimated_stack_size);
858
859 if (es->predicate)
860 {
861 fprintf (f, " predicate: ");
862 es->predicate->dump (f, info->conds);
863 }
864 else
865 fprintf (f, "\n");
866 if (es->param.exists ())
867 for (i = 0; i < (int) es->param.length (); i++)
868 {
869 int prob = es->param[i].change_prob;
870
871 if (!prob)
872 fprintf (f, "%*s op%i is compile time invariant\n",
873 indent + 2, "", i);
874 else if (prob != REG_BR_PROB_BASE)
875 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
876 prob * 100.0 / REG_BR_PROB_BASE);
877 }
878 if (!edge->inline_failed)
879 {
880 fprintf (f, "%*sStack frame offset %i, callee self size %i,"
881 " callee size %i\n",
882 indent + 2, "",
883 (int) inline_summaries->get (callee)->stack_frame_offset,
884 (int) inline_summaries->get (callee)->estimated_self_stack_size,
885 (int) inline_summaries->get (callee)->estimated_stack_size);
886 dump_ipa_call_summary (f, indent + 2, callee, info);
887 }
888 }
889 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
890 {
891 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
892 fprintf (f, "%*sindirect call loop depth:%2i freq:%4i size:%2i"
893 " time: %2i",
894 indent, "",
895 es->loop_depth,
896 edge->frequency, es->call_stmt_size, es->call_stmt_time);
897 if (es->predicate)
898 {
899 fprintf (f, "predicate: ");
900 es->predicate->dump (f, info->conds);
901 }
902 else
903 fprintf (f, "\n");
904 }
905 }
906
907
908 void
909 dump_inline_summary (FILE *f, struct cgraph_node *node)
910 {
911 if (node->definition)
912 {
913 struct inline_summary *s = inline_summaries->get (node);
914 size_time_entry *e;
915 int i;
916 fprintf (f, "Inline summary for %s/%i", node->name (),
917 node->order);
918 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
919 fprintf (f, " always_inline");
920 if (s->inlinable)
921 fprintf (f, " inlinable");
922 if (s->contains_cilk_spawn)
923 fprintf (f, " contains_cilk_spawn");
924 if (s->fp_expressions)
925 fprintf (f, " fp_expression");
926 fprintf (f, "\n self time: %f\n", s->self_time.to_double ());
927 fprintf (f, " global time: %f\n", s->time.to_double ());
928 fprintf (f, " self size: %i\n", s->self_size);
929 fprintf (f, " global size: %i\n", s->size);
930 fprintf (f, " min size: %i\n", s->min_size);
931 fprintf (f, " self stack: %i\n",
932 (int) s->estimated_self_stack_size);
933 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
934 if (s->growth)
935 fprintf (f, " estimated growth:%i\n", (int) s->growth);
936 if (s->scc_no)
937 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
938 for (i = 0; vec_safe_iterate (s->entry, i, &e); i++)
939 {
940 fprintf (f, " size:%f, time:%f",
941 (double) e->size / INLINE_SIZE_SCALE,
942 e->time.to_double ());
943 if (e->exec_predicate != true)
944 {
945 fprintf (f, ", executed if:");
946 e->exec_predicate.dump (f, s->conds, 0);
947 }
948 if (e->exec_predicate != e->nonconst_predicate)
949 {
950 fprintf (f, ", nonconst if:");
951 e->nonconst_predicate.dump (f, s->conds, 0);
952 }
953 fprintf (f, "\n");
954 }
955 if (s->loop_iterations)
956 {
957 fprintf (f, " loop iterations:");
958 s->loop_iterations->dump (f, s->conds);
959 }
960 if (s->loop_stride)
961 {
962 fprintf (f, " loop stride:");
963 s->loop_stride->dump (f, s->conds);
964 }
965 if (s->array_index)
966 {
967 fprintf (f, " array index:");
968 s->array_index->dump (f, s->conds);
969 }
970 fprintf (f, " calls:\n");
971 dump_ipa_call_summary (f, 4, node, s);
972 fprintf (f, "\n");
973 }
974 }
975
976 DEBUG_FUNCTION void
977 debug_inline_summary (struct cgraph_node *node)
978 {
979 dump_inline_summary (stderr, node);
980 }
981
982 void
983 dump_inline_summaries (FILE *f)
984 {
985 struct cgraph_node *node;
986
987 FOR_EACH_DEFINED_FUNCTION (node)
988 if (!node->global.inlined_to)
989 dump_inline_summary (f, node);
990 }
991
992 /* Give initial reasons why inlining would fail on EDGE. This gets either
993 nullified or usually overwritten by more precise reasons later. */
994
995 void
996 initialize_inline_failed (struct cgraph_edge *e)
997 {
998 struct cgraph_node *callee = e->callee;
999
1000 if (e->inline_failed && e->inline_failed != CIF_BODY_NOT_AVAILABLE
1001 && cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
1002 ;
1003 else if (e->indirect_unknown_callee)
1004 e->inline_failed = CIF_INDIRECT_UNKNOWN_CALL;
1005 else if (!callee->definition)
1006 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
1007 else if (callee->local.redefined_extern_inline)
1008 e->inline_failed = CIF_REDEFINED_EXTERN_INLINE;
1009 else
1010 e->inline_failed = CIF_FUNCTION_NOT_CONSIDERED;
1011 gcc_checking_assert (!e->call_stmt_cannot_inline_p
1012 || cgraph_inline_failed_type (e->inline_failed)
1013 == CIF_FINAL_ERROR);
1014 }
1015
1016 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
1017 boolean variable pointed to by DATA. */
1018
1019 static bool
1020 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
1021 void *data)
1022 {
1023 bool *b = (bool *) data;
1024 *b = true;
1025 return true;
1026 }
1027
1028 /* If OP refers to value of function parameter, return the corresponding
1029 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
1030 PARM_DECL) will be stored to *SIZE_P in that case too. */
1031
1032 static tree
1033 unmodified_parm_1 (gimple *stmt, tree op, HOST_WIDE_INT *size_p)
1034 {
1035 /* SSA_NAME referring to parm default def? */
1036 if (TREE_CODE (op) == SSA_NAME
1037 && SSA_NAME_IS_DEFAULT_DEF (op)
1038 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
1039 {
1040 if (size_p)
1041 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
1042 return SSA_NAME_VAR (op);
1043 }
1044 /* Non-SSA parm reference? */
1045 if (TREE_CODE (op) == PARM_DECL)
1046 {
1047 bool modified = false;
1048
1049 ao_ref refd;
1050 ao_ref_init (&refd, op);
1051 walk_aliased_vdefs (&refd, gimple_vuse (stmt), mark_modified, &modified,
1052 NULL);
1053 if (!modified)
1054 {
1055 if (size_p)
1056 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
1057 return op;
1058 }
1059 }
1060 return NULL_TREE;
1061 }
1062
1063 /* If OP refers to value of function parameter, return the corresponding
1064 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1065 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1066 stored to *SIZE_P in that case too. */
1067
1068 static tree
1069 unmodified_parm (gimple *stmt, tree op, HOST_WIDE_INT *size_p)
1070 {
1071 tree res = unmodified_parm_1 (stmt, op, size_p);
1072 if (res)
1073 return res;
1074
1075 if (TREE_CODE (op) == SSA_NAME
1076 && !SSA_NAME_IS_DEFAULT_DEF (op)
1077 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1078 return unmodified_parm (SSA_NAME_DEF_STMT (op),
1079 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
1080 size_p);
1081 return NULL_TREE;
1082 }
1083
1084 /* If OP refers to a value of a function parameter or value loaded from an
1085 aggregate passed to a parameter (either by value or reference), return TRUE
1086 and store the number of the parameter to *INDEX_P, the access size into
1087 *SIZE_P, and information whether and how it has been loaded from an
1088 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1089 statement in which OP is used or loaded. */
1090
1091 static bool
1092 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1093 gimple *stmt, tree op, int *index_p,
1094 HOST_WIDE_INT *size_p,
1095 struct agg_position_info *aggpos)
1096 {
1097 tree res = unmodified_parm_1 (stmt, op, size_p);
1098
1099 gcc_checking_assert (aggpos);
1100 if (res)
1101 {
1102 *index_p = ipa_get_param_decl_index (fbi->info, res);
1103 if (*index_p < 0)
1104 return false;
1105 aggpos->agg_contents = false;
1106 aggpos->by_ref = false;
1107 return true;
1108 }
1109
1110 if (TREE_CODE (op) == SSA_NAME)
1111 {
1112 if (SSA_NAME_IS_DEFAULT_DEF (op)
1113 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1114 return false;
1115 stmt = SSA_NAME_DEF_STMT (op);
1116 op = gimple_assign_rhs1 (stmt);
1117 if (!REFERENCE_CLASS_P (op))
1118 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1119 aggpos);
1120 }
1121
1122 aggpos->agg_contents = true;
1123 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1124 stmt, op, index_p, &aggpos->offset,
1125 size_p, &aggpos->by_ref);
1126 }
1127
1128 /* See if statement might disappear after inlining.
1129 0 - means not eliminated
1130 1 - half of statements goes away
1131 2 - for sure it is eliminated.
1132 We are not terribly sophisticated, basically looking for simple abstraction
1133 penalty wrappers. */
1134
1135 static int
1136 eliminated_by_inlining_prob (gimple *stmt)
1137 {
1138 enum gimple_code code = gimple_code (stmt);
1139 enum tree_code rhs_code;
1140
1141 if (!optimize)
1142 return 0;
1143
1144 switch (code)
1145 {
1146 case GIMPLE_RETURN:
1147 return 2;
1148 case GIMPLE_ASSIGN:
1149 if (gimple_num_ops (stmt) != 2)
1150 return 0;
1151
1152 rhs_code = gimple_assign_rhs_code (stmt);
1153
1154 /* Casts of parameters, loads from parameters passed by reference
1155 and stores to return value or parameters are often free after
1156 inlining dua to SRA and further combining.
1157 Assume that half of statements goes away. */
1158 if (CONVERT_EXPR_CODE_P (rhs_code)
1159 || rhs_code == VIEW_CONVERT_EXPR
1160 || rhs_code == ADDR_EXPR
1161 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1162 {
1163 tree rhs = gimple_assign_rhs1 (stmt);
1164 tree lhs = gimple_assign_lhs (stmt);
1165 tree inner_rhs = get_base_address (rhs);
1166 tree inner_lhs = get_base_address (lhs);
1167 bool rhs_free = false;
1168 bool lhs_free = false;
1169
1170 if (!inner_rhs)
1171 inner_rhs = rhs;
1172 if (!inner_lhs)
1173 inner_lhs = lhs;
1174
1175 /* Reads of parameter are expected to be free. */
1176 if (unmodified_parm (stmt, inner_rhs, NULL))
1177 rhs_free = true;
1178 /* Match expressions of form &this->field. Those will most likely
1179 combine with something upstream after inlining. */
1180 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1181 {
1182 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1183 if (TREE_CODE (op) == PARM_DECL)
1184 rhs_free = true;
1185 else if (TREE_CODE (op) == MEM_REF
1186 && unmodified_parm (stmt, TREE_OPERAND (op, 0), NULL))
1187 rhs_free = true;
1188 }
1189
1190 /* When parameter is not SSA register because its address is taken
1191 and it is just copied into one, the statement will be completely
1192 free after inlining (we will copy propagate backward). */
1193 if (rhs_free && is_gimple_reg (lhs))
1194 return 2;
1195
1196 /* Reads of parameters passed by reference
1197 expected to be free (i.e. optimized out after inlining). */
1198 if (TREE_CODE (inner_rhs) == MEM_REF
1199 && unmodified_parm (stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1200 rhs_free = true;
1201
1202 /* Copying parameter passed by reference into gimple register is
1203 probably also going to copy propagate, but we can't be quite
1204 sure. */
1205 if (rhs_free && is_gimple_reg (lhs))
1206 lhs_free = true;
1207
1208 /* Writes to parameters, parameters passed by value and return value
1209 (either dirrectly or passed via invisible reference) are free.
1210
1211 TODO: We ought to handle testcase like
1212 struct a {int a,b;};
1213 struct a
1214 retrurnsturct (void)
1215 {
1216 struct a a ={1,2};
1217 return a;
1218 }
1219
1220 This translate into:
1221
1222 retrurnsturct ()
1223 {
1224 int a$b;
1225 int a$a;
1226 struct a a;
1227 struct a D.2739;
1228
1229 <bb 2>:
1230 D.2739.a = 1;
1231 D.2739.b = 2;
1232 return D.2739;
1233
1234 }
1235 For that we either need to copy ipa-split logic detecting writes
1236 to return value. */
1237 if (TREE_CODE (inner_lhs) == PARM_DECL
1238 || TREE_CODE (inner_lhs) == RESULT_DECL
1239 || (TREE_CODE (inner_lhs) == MEM_REF
1240 && (unmodified_parm (stmt, TREE_OPERAND (inner_lhs, 0), NULL)
1241 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1242 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1243 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1244 (inner_lhs,
1245 0))) == RESULT_DECL))))
1246 lhs_free = true;
1247 if (lhs_free
1248 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1249 rhs_free = true;
1250 if (lhs_free && rhs_free)
1251 return 1;
1252 }
1253 return 0;
1254 default:
1255 return 0;
1256 }
1257 }
1258
1259
1260 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1261 predicates to the CFG edges. */
1262
1263 static void
1264 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1265 struct inline_summary *summary,
1266 basic_block bb)
1267 {
1268 gimple *last;
1269 tree op;
1270 int index;
1271 HOST_WIDE_INT size;
1272 struct agg_position_info aggpos;
1273 enum tree_code code, inverted_code;
1274 edge e;
1275 edge_iterator ei;
1276 gimple *set_stmt;
1277 tree op2;
1278
1279 last = last_stmt (bb);
1280 if (!last || gimple_code (last) != GIMPLE_COND)
1281 return;
1282 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1283 return;
1284 op = gimple_cond_lhs (last);
1285 /* TODO: handle conditionals like
1286 var = op0 < 4;
1287 if (var != 0). */
1288 if (unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
1289 {
1290 code = gimple_cond_code (last);
1291 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1292
1293 FOR_EACH_EDGE (e, ei, bb->succs)
1294 {
1295 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1296 ? code : inverted_code);
1297 /* invert_tree_comparison will return ERROR_MARK on FP
1298 comparsions that are not EQ/NE instead of returning proper
1299 unordered one. Be sure it is not confused with NON_CONSTANT. */
1300 if (this_code != ERROR_MARK)
1301 {
1302 predicate p
1303 = add_condition (summary, index, size, &aggpos, this_code,
1304 unshare_expr_without_location
1305 (gimple_cond_rhs (last)));
1306 e->aux = edge_predicate_pool.allocate ();
1307 *(predicate *) e->aux = p;
1308 }
1309 }
1310 }
1311
1312 if (TREE_CODE (op) != SSA_NAME)
1313 return;
1314 /* Special case
1315 if (builtin_constant_p (op))
1316 constant_code
1317 else
1318 nonconstant_code.
1319 Here we can predicate nonconstant_code. We can't
1320 really handle constant_code since we have no predicate
1321 for this and also the constant code is not known to be
1322 optimized away when inliner doen't see operand is constant.
1323 Other optimizers might think otherwise. */
1324 if (gimple_cond_code (last) != NE_EXPR
1325 || !integer_zerop (gimple_cond_rhs (last)))
1326 return;
1327 set_stmt = SSA_NAME_DEF_STMT (op);
1328 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1329 || gimple_call_num_args (set_stmt) != 1)
1330 return;
1331 op2 = gimple_call_arg (set_stmt, 0);
1332 if (!unmodified_parm_or_parm_agg_item (fbi, set_stmt, op2, &index, &size,
1333 &aggpos))
1334 return;
1335 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1336 {
1337 predicate p = add_condition (summary, index, size, &aggpos,
1338 predicate::is_not_constant, NULL_TREE);
1339 e->aux = edge_predicate_pool.allocate ();
1340 *(predicate *) e->aux = p;
1341 }
1342 }
1343
1344
1345 /* If BB ends by a switch we can turn into predicates, attach corresponding
1346 predicates to the CFG edges. */
1347
1348 static void
1349 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1350 struct inline_summary *summary,
1351 basic_block bb)
1352 {
1353 gimple *lastg;
1354 tree op;
1355 int index;
1356 HOST_WIDE_INT size;
1357 struct agg_position_info aggpos;
1358 edge e;
1359 edge_iterator ei;
1360 size_t n;
1361 size_t case_idx;
1362
1363 lastg = last_stmt (bb);
1364 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
1365 return;
1366 gswitch *last = as_a <gswitch *> (lastg);
1367 op = gimple_switch_index (last);
1368 if (!unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
1369 return;
1370
1371 FOR_EACH_EDGE (e, ei, bb->succs)
1372 {
1373 e->aux = edge_predicate_pool.allocate ();
1374 *(predicate *) e->aux = false;
1375 }
1376 n = gimple_switch_num_labels (last);
1377 for (case_idx = 0; case_idx < n; ++case_idx)
1378 {
1379 tree cl = gimple_switch_label (last, case_idx);
1380 tree min, max;
1381 predicate p;
1382
1383 e = find_edge (bb, label_to_block (CASE_LABEL (cl)));
1384 min = CASE_LOW (cl);
1385 max = CASE_HIGH (cl);
1386
1387 /* For default we might want to construct predicate that none
1388 of cases is met, but it is bit hard to do not having negations
1389 of conditionals handy. */
1390 if (!min && !max)
1391 p = true;
1392 else if (!max)
1393 p = add_condition (summary, index, size, &aggpos, EQ_EXPR,
1394 unshare_expr_without_location (min));
1395 else
1396 {
1397 predicate p1, p2;
1398 p1 = add_condition (summary, index, size, &aggpos, GE_EXPR,
1399 unshare_expr_without_location (min));
1400 p2 = add_condition (summary, index, size, &aggpos, LE_EXPR,
1401 unshare_expr_without_location (max));
1402 p = p1 & p2;
1403 }
1404 *(struct predicate *) e->aux
1405 = p.or_with (summary->conds, *(struct predicate *) e->aux);
1406 }
1407 }
1408
1409
1410 /* For each BB in NODE attach to its AUX pointer predicate under
1411 which it is executable. */
1412
1413 static void
1414 compute_bb_predicates (struct ipa_func_body_info *fbi,
1415 struct cgraph_node *node,
1416 struct inline_summary *summary)
1417 {
1418 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1419 bool done = false;
1420 basic_block bb;
1421
1422 FOR_EACH_BB_FN (bb, my_function)
1423 {
1424 set_cond_stmt_execution_predicate (fbi, summary, bb);
1425 set_switch_stmt_execution_predicate (fbi, summary, bb);
1426 }
1427
1428 /* Entry block is always executable. */
1429 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1430 = edge_predicate_pool.allocate ();
1431 *(predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
1432
1433 /* A simple dataflow propagation of predicates forward in the CFG.
1434 TODO: work in reverse postorder. */
1435 while (!done)
1436 {
1437 done = true;
1438 FOR_EACH_BB_FN (bb, my_function)
1439 {
1440 predicate p = false;
1441 edge e;
1442 edge_iterator ei;
1443 FOR_EACH_EDGE (e, ei, bb->preds)
1444 {
1445 if (e->src->aux)
1446 {
1447 predicate this_bb_predicate
1448 = *(predicate *) e->src->aux;
1449 if (e->aux)
1450 this_bb_predicate &= (*(struct predicate *) e->aux);
1451 p = p.or_with (summary->conds, this_bb_predicate);
1452 if (p == true)
1453 break;
1454 }
1455 }
1456 if (p == false)
1457 gcc_checking_assert (!bb->aux);
1458 else
1459 {
1460 if (!bb->aux)
1461 {
1462 done = false;
1463 bb->aux = edge_predicate_pool.allocate ();
1464 *((predicate *) bb->aux) = p;
1465 }
1466 else if (p != *(predicate *) bb->aux)
1467 {
1468 /* This OR operation is needed to ensure monotonous data flow
1469 in the case we hit the limit on number of clauses and the
1470 and/or operations above give approximate answers. */
1471 p = p.or_with (summary->conds, *(predicate *)bb->aux);
1472 if (p != *(predicate *) bb->aux)
1473 {
1474 done = false;
1475 *((predicate *) bb->aux) = p;
1476 }
1477 }
1478 }
1479 }
1480 }
1481 }
1482
1483
1484 /* We keep info about constantness of SSA names. */
1485
1486 typedef predicate predicate_t;
1487 /* Return predicate specifying when the STMT might have result that is not
1488 a compile time constant. */
1489
1490 static predicate
1491 will_be_nonconstant_expr_predicate (struct ipa_node_params *info,
1492 struct inline_summary *summary,
1493 tree expr,
1494 vec<predicate_t> nonconstant_names)
1495 {
1496 tree parm;
1497 int index;
1498 HOST_WIDE_INT size;
1499
1500 while (UNARY_CLASS_P (expr))
1501 expr = TREE_OPERAND (expr, 0);
1502
1503 parm = unmodified_parm (NULL, expr, &size);
1504 if (parm && (index = ipa_get_param_decl_index (info, parm)) >= 0)
1505 return add_condition (summary, index, size, NULL, predicate::changed,
1506 NULL_TREE);
1507 if (is_gimple_min_invariant (expr))
1508 return false;
1509 if (TREE_CODE (expr) == SSA_NAME)
1510 return nonconstant_names[SSA_NAME_VERSION (expr)];
1511 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
1512 {
1513 predicate p1 = will_be_nonconstant_expr_predicate
1514 (info, summary, TREE_OPERAND (expr, 0),
1515 nonconstant_names);
1516 if (p1 == true)
1517 return p1;
1518
1519 predicate p2;
1520 p2 = will_be_nonconstant_expr_predicate (info, summary,
1521 TREE_OPERAND (expr, 1),
1522 nonconstant_names);
1523 return p1.or_with (summary->conds, p2);
1524 }
1525 else if (TREE_CODE (expr) == COND_EXPR)
1526 {
1527 predicate p1 = will_be_nonconstant_expr_predicate
1528 (info, summary, TREE_OPERAND (expr, 0),
1529 nonconstant_names);
1530 if (p1 == true)
1531 return p1;
1532
1533 predicate p2;
1534 p2 = will_be_nonconstant_expr_predicate (info, summary,
1535 TREE_OPERAND (expr, 1),
1536 nonconstant_names);
1537 if (p2 == true)
1538 return p2;
1539 p1 = p1.or_with (summary->conds, p2);
1540 p2 = will_be_nonconstant_expr_predicate (info, summary,
1541 TREE_OPERAND (expr, 2),
1542 nonconstant_names);
1543 return p2.or_with (summary->conds, p1);
1544 }
1545 else
1546 {
1547 debug_tree (expr);
1548 gcc_unreachable ();
1549 }
1550 return false;
1551 }
1552
1553
1554 /* Return predicate specifying when the STMT might have result that is not
1555 a compile time constant. */
1556
1557 static predicate
1558 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
1559 struct inline_summary *summary,
1560 gimple *stmt,
1561 vec<predicate_t> nonconstant_names)
1562 {
1563 predicate p = true;
1564 ssa_op_iter iter;
1565 tree use;
1566 predicate op_non_const;
1567 bool is_load;
1568 int base_index;
1569 HOST_WIDE_INT size;
1570 struct agg_position_info aggpos;
1571
1572 /* What statments might be optimized away
1573 when their arguments are constant. */
1574 if (gimple_code (stmt) != GIMPLE_ASSIGN
1575 && gimple_code (stmt) != GIMPLE_COND
1576 && gimple_code (stmt) != GIMPLE_SWITCH
1577 && (gimple_code (stmt) != GIMPLE_CALL
1578 || !(gimple_call_flags (stmt) & ECF_CONST)))
1579 return p;
1580
1581 /* Stores will stay anyway. */
1582 if (gimple_store_p (stmt))
1583 return p;
1584
1585 is_load = gimple_assign_load_p (stmt);
1586
1587 /* Loads can be optimized when the value is known. */
1588 if (is_load)
1589 {
1590 tree op;
1591 gcc_assert (gimple_assign_single_p (stmt));
1592 op = gimple_assign_rhs1 (stmt);
1593 if (!unmodified_parm_or_parm_agg_item (fbi, stmt, op, &base_index, &size,
1594 &aggpos))
1595 return p;
1596 }
1597 else
1598 base_index = -1;
1599
1600 /* See if we understand all operands before we start
1601 adding conditionals. */
1602 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1603 {
1604 tree parm = unmodified_parm (stmt, use, NULL);
1605 /* For arguments we can build a condition. */
1606 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
1607 continue;
1608 if (TREE_CODE (use) != SSA_NAME)
1609 return p;
1610 /* If we know when operand is constant,
1611 we still can say something useful. */
1612 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
1613 continue;
1614 return p;
1615 }
1616
1617 if (is_load)
1618 op_non_const =
1619 add_condition (summary, base_index, size, &aggpos, predicate::changed,
1620 NULL);
1621 else
1622 op_non_const = false;
1623 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1624 {
1625 HOST_WIDE_INT size;
1626 tree parm = unmodified_parm (stmt, use, &size);
1627 int index;
1628
1629 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1630 {
1631 if (index != base_index)
1632 p = add_condition (summary, index, size, NULL, predicate::changed,
1633 NULL_TREE);
1634 else
1635 continue;
1636 }
1637 else
1638 p = nonconstant_names[SSA_NAME_VERSION (use)];
1639 op_non_const = p.or_with (summary->conds, op_non_const);
1640 }
1641 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
1642 && gimple_op (stmt, 0)
1643 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1644 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
1645 = op_non_const;
1646 return op_non_const;
1647 }
1648
1649 struct record_modified_bb_info
1650 {
1651 bitmap bb_set;
1652 gimple *stmt;
1653 };
1654
1655 /* Value is initialized in INIT_BB and used in USE_BB. We want to copute
1656 probability how often it changes between USE_BB.
1657 INIT_BB->frequency/USE_BB->frequency is an estimate, but if INIT_BB
1658 is in different loop nest, we can do better.
1659 This is all just estimate. In theory we look for minimal cut separating
1660 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
1661 anyway. */
1662
1663 static basic_block
1664 get_minimal_bb (basic_block init_bb, basic_block use_bb)
1665 {
1666 struct loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
1667 if (l && l->header->frequency < init_bb->frequency)
1668 return l->header;
1669 return init_bb;
1670 }
1671
1672 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
1673 set except for info->stmt. */
1674
1675 static bool
1676 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
1677 {
1678 struct record_modified_bb_info *info =
1679 (struct record_modified_bb_info *) data;
1680 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
1681 return false;
1682 bitmap_set_bit (info->bb_set,
1683 SSA_NAME_IS_DEFAULT_DEF (vdef)
1684 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
1685 : get_minimal_bb
1686 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
1687 gimple_bb (info->stmt))->index);
1688 return false;
1689 }
1690
1691 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
1692 will change since last invocation of STMT.
1693
1694 Value 0 is reserved for compile time invariants.
1695 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
1696 ought to be REG_BR_PROB_BASE / estimated_iters. */
1697
1698 static int
1699 param_change_prob (gimple *stmt, int i)
1700 {
1701 tree op = gimple_call_arg (stmt, i);
1702 basic_block bb = gimple_bb (stmt);
1703
1704 if (TREE_CODE (op) == WITH_SIZE_EXPR)
1705 op = TREE_OPERAND (op, 0);
1706
1707 tree base = get_base_address (op);
1708
1709 /* Global invariants never change. */
1710 if (is_gimple_min_invariant (base))
1711 return 0;
1712
1713 /* We would have to do non-trivial analysis to really work out what
1714 is the probability of value to change (i.e. when init statement
1715 is in a sibling loop of the call).
1716
1717 We do an conservative estimate: when call is executed N times more often
1718 than the statement defining value, we take the frequency 1/N. */
1719 if (TREE_CODE (base) == SSA_NAME)
1720 {
1721 int init_freq;
1722
1723 if (!bb->frequency)
1724 return REG_BR_PROB_BASE;
1725
1726 if (SSA_NAME_IS_DEFAULT_DEF (base))
1727 init_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency;
1728 else
1729 init_freq = get_minimal_bb
1730 (gimple_bb (SSA_NAME_DEF_STMT (base)),
1731 gimple_bb (stmt))->frequency;
1732
1733 if (!init_freq)
1734 init_freq = 1;
1735 if (init_freq < bb->frequency)
1736 return MAX (GCOV_COMPUTE_SCALE (init_freq, bb->frequency), 1);
1737 else
1738 return REG_BR_PROB_BASE;
1739 }
1740 else
1741 {
1742 ao_ref refd;
1743 int max;
1744 struct record_modified_bb_info info;
1745 bitmap_iterator bi;
1746 unsigned index;
1747 tree init = ctor_for_folding (base);
1748
1749 if (init != error_mark_node)
1750 return 0;
1751 if (!bb->frequency)
1752 return REG_BR_PROB_BASE;
1753 ao_ref_init (&refd, op);
1754 info.stmt = stmt;
1755 info.bb_set = BITMAP_ALLOC (NULL);
1756 walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
1757 NULL);
1758 if (bitmap_bit_p (info.bb_set, bb->index))
1759 {
1760 BITMAP_FREE (info.bb_set);
1761 return REG_BR_PROB_BASE;
1762 }
1763
1764 /* Assume that every memory is initialized at entry.
1765 TODO: Can we easilly determine if value is always defined
1766 and thus we may skip entry block? */
1767 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency)
1768 max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency;
1769 else
1770 max = 1;
1771
1772 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
1773 max = MIN (max, BASIC_BLOCK_FOR_FN (cfun, index)->frequency);
1774
1775 BITMAP_FREE (info.bb_set);
1776 if (max < bb->frequency)
1777 return MAX (GCOV_COMPUTE_SCALE (max, bb->frequency), 1);
1778 else
1779 return REG_BR_PROB_BASE;
1780 }
1781 }
1782
1783 /* Find whether a basic block BB is the final block of a (half) diamond CFG
1784 sub-graph and if the predicate the condition depends on is known. If so,
1785 return true and store the pointer the predicate in *P. */
1786
1787 static bool
1788 phi_result_unknown_predicate (struct ipa_node_params *info,
1789 inline_summary *summary, basic_block bb,
1790 predicate *p,
1791 vec<predicate_t> nonconstant_names)
1792 {
1793 edge e;
1794 edge_iterator ei;
1795 basic_block first_bb = NULL;
1796 gimple *stmt;
1797
1798 if (single_pred_p (bb))
1799 {
1800 *p = false;
1801 return true;
1802 }
1803
1804 FOR_EACH_EDGE (e, ei, bb->preds)
1805 {
1806 if (single_succ_p (e->src))
1807 {
1808 if (!single_pred_p (e->src))
1809 return false;
1810 if (!first_bb)
1811 first_bb = single_pred (e->src);
1812 else if (single_pred (e->src) != first_bb)
1813 return false;
1814 }
1815 else
1816 {
1817 if (!first_bb)
1818 first_bb = e->src;
1819 else if (e->src != first_bb)
1820 return false;
1821 }
1822 }
1823
1824 if (!first_bb)
1825 return false;
1826
1827 stmt = last_stmt (first_bb);
1828 if (!stmt
1829 || gimple_code (stmt) != GIMPLE_COND
1830 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
1831 return false;
1832
1833 *p = will_be_nonconstant_expr_predicate (info, summary,
1834 gimple_cond_lhs (stmt),
1835 nonconstant_names);
1836 if (*p == true)
1837 return false;
1838 else
1839 return true;
1840 }
1841
1842 /* Given a PHI statement in a function described by inline properties SUMMARY
1843 and *P being the predicate describing whether the selected PHI argument is
1844 known, store a predicate for the result of the PHI statement into
1845 NONCONSTANT_NAMES, if possible. */
1846
1847 static void
1848 predicate_for_phi_result (struct inline_summary *summary, gphi *phi,
1849 predicate *p,
1850 vec<predicate_t> nonconstant_names)
1851 {
1852 unsigned i;
1853
1854 for (i = 0; i < gimple_phi_num_args (phi); i++)
1855 {
1856 tree arg = gimple_phi_arg (phi, i)->def;
1857 if (!is_gimple_min_invariant (arg))
1858 {
1859 gcc_assert (TREE_CODE (arg) == SSA_NAME);
1860 *p = p->or_with (summary->conds,
1861 nonconstant_names[SSA_NAME_VERSION (arg)]);
1862 if (*p == true)
1863 return;
1864 }
1865 }
1866
1867 if (dump_file && (dump_flags & TDF_DETAILS))
1868 {
1869 fprintf (dump_file, "\t\tphi predicate: ");
1870 p->dump (dump_file, summary->conds);
1871 }
1872 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
1873 }
1874
1875 /* Return predicate specifying when array index in access OP becomes non-constant. */
1876
1877 static predicate
1878 array_index_predicate (inline_summary *info,
1879 vec< predicate_t> nonconstant_names, tree op)
1880 {
1881 predicate p = false;
1882 while (handled_component_p (op))
1883 {
1884 if (TREE_CODE (op) == ARRAY_REF || TREE_CODE (op) == ARRAY_RANGE_REF)
1885 {
1886 if (TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
1887 p = p.or_with (info->conds,
1888 nonconstant_names[SSA_NAME_VERSION
1889 (TREE_OPERAND (op, 1))]);
1890 }
1891 op = TREE_OPERAND (op, 0);
1892 }
1893 return p;
1894 }
1895
1896 /* For a typical usage of __builtin_expect (a<b, 1), we
1897 may introduce an extra relation stmt:
1898 With the builtin, we have
1899 t1 = a <= b;
1900 t2 = (long int) t1;
1901 t3 = __builtin_expect (t2, 1);
1902 if (t3 != 0)
1903 goto ...
1904 Without the builtin, we have
1905 if (a<=b)
1906 goto...
1907 This affects the size/time estimation and may have
1908 an impact on the earlier inlining.
1909 Here find this pattern and fix it up later. */
1910
1911 static gimple *
1912 find_foldable_builtin_expect (basic_block bb)
1913 {
1914 gimple_stmt_iterator bsi;
1915
1916 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1917 {
1918 gimple *stmt = gsi_stmt (bsi);
1919 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
1920 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
1921 {
1922 tree var = gimple_call_lhs (stmt);
1923 tree arg = gimple_call_arg (stmt, 0);
1924 use_operand_p use_p;
1925 gimple *use_stmt;
1926 bool match = false;
1927 bool done = false;
1928
1929 if (!var || !arg)
1930 continue;
1931 gcc_assert (TREE_CODE (var) == SSA_NAME);
1932
1933 while (TREE_CODE (arg) == SSA_NAME)
1934 {
1935 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
1936 if (!is_gimple_assign (stmt_tmp))
1937 break;
1938 switch (gimple_assign_rhs_code (stmt_tmp))
1939 {
1940 case LT_EXPR:
1941 case LE_EXPR:
1942 case GT_EXPR:
1943 case GE_EXPR:
1944 case EQ_EXPR:
1945 case NE_EXPR:
1946 match = true;
1947 done = true;
1948 break;
1949 CASE_CONVERT:
1950 break;
1951 default:
1952 done = true;
1953 break;
1954 }
1955 if (done)
1956 break;
1957 arg = gimple_assign_rhs1 (stmt_tmp);
1958 }
1959
1960 if (match && single_imm_use (var, &use_p, &use_stmt)
1961 && gimple_code (use_stmt) == GIMPLE_COND)
1962 return use_stmt;
1963 }
1964 }
1965 return NULL;
1966 }
1967
1968 /* Return true when the basic blocks contains only clobbers followed by RESX.
1969 Such BBs are kept around to make removal of dead stores possible with
1970 presence of EH and will be optimized out by optimize_clobbers later in the
1971 game.
1972
1973 NEED_EH is used to recurse in case the clobber has non-EH predecestors
1974 that can be clobber only, too.. When it is false, the RESX is not necessary
1975 on the end of basic block. */
1976
1977 static bool
1978 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
1979 {
1980 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1981 edge_iterator ei;
1982 edge e;
1983
1984 if (need_eh)
1985 {
1986 if (gsi_end_p (gsi))
1987 return false;
1988 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
1989 return false;
1990 gsi_prev (&gsi);
1991 }
1992 else if (!single_succ_p (bb))
1993 return false;
1994
1995 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
1996 {
1997 gimple *stmt = gsi_stmt (gsi);
1998 if (is_gimple_debug (stmt))
1999 continue;
2000 if (gimple_clobber_p (stmt))
2001 continue;
2002 if (gimple_code (stmt) == GIMPLE_LABEL)
2003 break;
2004 return false;
2005 }
2006
2007 /* See if all predecestors are either throws or clobber only BBs. */
2008 FOR_EACH_EDGE (e, ei, bb->preds)
2009 if (!(e->flags & EDGE_EH)
2010 && !clobber_only_eh_bb_p (e->src, false))
2011 return false;
2012
2013 return true;
2014 }
2015
2016 /* Return true if STMT compute a floating point expression that may be affected
2017 by -ffast-math and similar flags. */
2018
2019 static bool
2020 fp_expression_p (gimple *stmt)
2021 {
2022 ssa_op_iter i;
2023 tree op;
2024
2025 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
2026 if (FLOAT_TYPE_P (TREE_TYPE (op)))
2027 return true;
2028 return false;
2029 }
2030
2031 /* Compute function body size parameters for NODE.
2032 When EARLY is true, we compute only simple summaries without
2033 non-trivial predicates to drive the early inliner. */
2034
2035 static void
2036 estimate_function_body_sizes (struct cgraph_node *node, bool early)
2037 {
2038 sreal time = 0;
2039 /* Estimate static overhead for function prologue/epilogue and alignment. */
2040 int size = 2;
2041 /* Benefits are scaled by probability of elimination that is in range
2042 <0,2>. */
2043 basic_block bb;
2044 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
2045 int freq;
2046 struct inline_summary *info = inline_summaries->get (node);
2047 predicate bb_predicate;
2048 struct ipa_func_body_info fbi;
2049 vec<predicate_t> nonconstant_names = vNULL;
2050 int nblocks, n;
2051 int *order;
2052 predicate array_index = true;
2053 gimple *fix_builtin_expect_stmt;
2054
2055 gcc_assert (my_function && my_function->cfg);
2056 gcc_assert (cfun == my_function);
2057
2058 memset(&fbi, 0, sizeof(fbi));
2059 info->conds = NULL;
2060 info->entry = NULL;
2061
2062 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2063 so we can produce proper inline hints.
2064
2065 When optimizing and analyzing for early inliner, initialize node params
2066 so we can produce correct BB predicates. */
2067
2068 if (opt_for_fn (node->decl, optimize))
2069 {
2070 calculate_dominance_info (CDI_DOMINATORS);
2071 if (!early)
2072 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2073 else
2074 {
2075 ipa_check_create_node_params ();
2076 ipa_initialize_node_params (node);
2077 }
2078
2079 if (ipa_node_params_sum)
2080 {
2081 fbi.node = node;
2082 fbi.info = IPA_NODE_REF (node);
2083 fbi.bb_infos = vNULL;
2084 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
2085 fbi.param_count = count_formal_params(node->decl);
2086 nonconstant_names.safe_grow_cleared
2087 (SSANAMES (my_function)->length ());
2088 }
2089 }
2090
2091 if (dump_file)
2092 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2093 node->name ());
2094
2095 /* When we run into maximal number of entries, we assign everything to the
2096 constant truth case. Be sure to have it in list. */
2097 bb_predicate = true;
2098 account_size_time (info, 0, 0, &bb_predicate, &bb_predicate);
2099
2100 bb_predicate = predicate::not_inlined ();
2101 account_size_time (info, 2 * INLINE_SIZE_SCALE, 0, &bb_predicate,
2102 &bb_predicate);
2103
2104 if (fbi.info)
2105 compute_bb_predicates (&fbi, node, info);
2106 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2107 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2108 for (n = 0; n < nblocks; n++)
2109 {
2110 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2111 freq = compute_call_stmt_bb_frequency (node->decl, bb);
2112 if (clobber_only_eh_bb_p (bb))
2113 {
2114 if (dump_file && (dump_flags & TDF_DETAILS))
2115 fprintf (dump_file, "\n Ignoring BB %i;"
2116 " it will be optimized away by cleanup_clobbers\n",
2117 bb->index);
2118 continue;
2119 }
2120
2121 /* TODO: Obviously predicates can be propagated down across CFG. */
2122 if (fbi.info)
2123 {
2124 if (bb->aux)
2125 bb_predicate = *(predicate *) bb->aux;
2126 else
2127 bb_predicate = false;
2128 }
2129 else
2130 bb_predicate = true;
2131
2132 if (dump_file && (dump_flags & TDF_DETAILS))
2133 {
2134 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2135 bb_predicate.dump (dump_file, info->conds);
2136 }
2137
2138 if (fbi.info && nonconstant_names.exists ())
2139 {
2140 predicate phi_predicate;
2141 bool first_phi = true;
2142
2143 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2144 gsi_next (&bsi))
2145 {
2146 if (first_phi
2147 && !phi_result_unknown_predicate (fbi.info, info, bb,
2148 &phi_predicate,
2149 nonconstant_names))
2150 break;
2151 first_phi = false;
2152 if (dump_file && (dump_flags & TDF_DETAILS))
2153 {
2154 fprintf (dump_file, " ");
2155 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
2156 }
2157 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2158 nonconstant_names);
2159 }
2160 }
2161
2162 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2163
2164 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
2165 gsi_next (&bsi))
2166 {
2167 gimple *stmt = gsi_stmt (bsi);
2168 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2169 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2170 int prob;
2171 predicate will_be_nonconstant;
2172
2173 /* This relation stmt should be folded after we remove
2174 buildin_expect call. Adjust the cost here. */
2175 if (stmt == fix_builtin_expect_stmt)
2176 {
2177 this_size--;
2178 this_time--;
2179 }
2180
2181 if (dump_file && (dump_flags & TDF_DETAILS))
2182 {
2183 fprintf (dump_file, " ");
2184 print_gimple_stmt (dump_file, stmt, 0);
2185 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2186 ((double) freq) / CGRAPH_FREQ_BASE, this_size,
2187 this_time);
2188 }
2189
2190 if (gimple_assign_load_p (stmt) && nonconstant_names.exists ())
2191 {
2192 predicate this_array_index;
2193 this_array_index =
2194 array_index_predicate (info, nonconstant_names,
2195 gimple_assign_rhs1 (stmt));
2196 if (this_array_index != false)
2197 array_index &= this_array_index;
2198 }
2199 if (gimple_store_p (stmt) && nonconstant_names.exists ())
2200 {
2201 predicate this_array_index;
2202 this_array_index =
2203 array_index_predicate (info, nonconstant_names,
2204 gimple_get_lhs (stmt));
2205 if (this_array_index != false)
2206 array_index &= this_array_index;
2207 }
2208
2209
2210 if (is_gimple_call (stmt)
2211 && !gimple_call_internal_p (stmt))
2212 {
2213 struct cgraph_edge *edge = node->get_edge (stmt);
2214 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
2215
2216 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2217 resolved as constant. We however don't want to optimize
2218 out the cgraph edges. */
2219 if (nonconstant_names.exists ()
2220 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2221 && gimple_call_lhs (stmt)
2222 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2223 {
2224 predicate false_p = false;
2225 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2226 = false_p;
2227 }
2228 if (ipa_node_params_sum)
2229 {
2230 int count = gimple_call_num_args (stmt);
2231 int i;
2232
2233 if (count)
2234 es->param.safe_grow_cleared (count);
2235 for (i = 0; i < count; i++)
2236 {
2237 int prob = param_change_prob (stmt, i);
2238 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2239 es->param[i].change_prob = prob;
2240 }
2241 }
2242
2243 es->call_stmt_size = this_size;
2244 es->call_stmt_time = this_time;
2245 es->loop_depth = bb_loop_depth (bb);
2246 edge_set_predicate (edge, &bb_predicate);
2247 }
2248
2249 /* TODO: When conditional jump or swithc is known to be constant, but
2250 we did not translate it into the predicates, we really can account
2251 just maximum of the possible paths. */
2252 if (fbi.info)
2253 will_be_nonconstant
2254 = will_be_nonconstant_predicate (&fbi, info,
2255 stmt, nonconstant_names);
2256 else
2257 will_be_nonconstant = true;
2258 if (this_time || this_size)
2259 {
2260 this_time *= freq;
2261
2262 prob = eliminated_by_inlining_prob (stmt);
2263 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2264 fprintf (dump_file,
2265 "\t\t50%% will be eliminated by inlining\n");
2266 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2267 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2268
2269 struct predicate p = bb_predicate & will_be_nonconstant;
2270
2271 /* We can ignore statement when we proved it is never going
2272 to happen, but we can not do that for call statements
2273 because edges are accounted specially. */
2274
2275 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
2276 {
2277 time += this_time;
2278 size += this_size;
2279 }
2280
2281 /* We account everything but the calls. Calls have their own
2282 size/time info attached to cgraph edges. This is necessary
2283 in order to make the cost disappear after inlining. */
2284 if (!is_gimple_call (stmt))
2285 {
2286 if (prob)
2287 {
2288 predicate ip = bb_predicate & predicate::not_inlined ();
2289 account_size_time (info, this_size * prob,
2290 (sreal)(this_time * prob)
2291 / (CGRAPH_FREQ_BASE * 2), &ip,
2292 &p);
2293 }
2294 if (prob != 2)
2295 account_size_time (info, this_size * (2 - prob),
2296 (sreal)(this_time * (2 - prob))
2297 / (CGRAPH_FREQ_BASE * 2),
2298 &bb_predicate,
2299 &p);
2300 }
2301
2302 if (!info->fp_expressions && fp_expression_p (stmt))
2303 {
2304 info->fp_expressions = true;
2305 if (dump_file)
2306 fprintf (dump_file, " fp_expression set\n");
2307 }
2308
2309 gcc_assert (time >= 0);
2310 gcc_assert (size >= 0);
2311 }
2312 }
2313 }
2314 set_hint_predicate (&inline_summaries->get (node)->array_index, array_index);
2315 time = time / CGRAPH_FREQ_BASE;
2316 free (order);
2317
2318 if (nonconstant_names.exists () && !early)
2319 {
2320 struct loop *loop;
2321 predicate loop_iterations = true;
2322 predicate loop_stride = true;
2323
2324 if (dump_file && (dump_flags & TDF_DETAILS))
2325 flow_loops_dump (dump_file, NULL, 0);
2326 scev_initialize ();
2327 FOR_EACH_LOOP (loop, 0)
2328 {
2329 vec<edge> exits;
2330 edge ex;
2331 unsigned int j;
2332 struct tree_niter_desc niter_desc;
2333 bb_predicate = *(predicate *) loop->header->aux;
2334
2335 exits = get_loop_exit_edges (loop);
2336 FOR_EACH_VEC_ELT (exits, j, ex)
2337 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
2338 && !is_gimple_min_invariant (niter_desc.niter))
2339 {
2340 predicate will_be_nonconstant
2341 = will_be_nonconstant_expr_predicate (fbi.info, info,
2342 niter_desc.niter,
2343 nonconstant_names);
2344 if (will_be_nonconstant != true)
2345 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2346 if (will_be_nonconstant != true
2347 && will_be_nonconstant != false)
2348 /* This is slightly inprecise. We may want to represent each
2349 loop with independent predicate. */
2350 loop_iterations &= will_be_nonconstant;
2351 }
2352 exits.release ();
2353 }
2354
2355 /* To avoid quadratic behavior we analyze stride predicates only
2356 with respect to the containing loop. Thus we simply iterate
2357 over all defs in the outermost loop body. */
2358 for (loop = loops_for_fn (cfun)->tree_root->inner;
2359 loop != NULL; loop = loop->next)
2360 {
2361 basic_block *body = get_loop_body (loop);
2362 for (unsigned i = 0; i < loop->num_nodes; i++)
2363 {
2364 gimple_stmt_iterator gsi;
2365 bb_predicate = *(predicate *) body[i]->aux;
2366 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
2367 gsi_next (&gsi))
2368 {
2369 gimple *stmt = gsi_stmt (gsi);
2370
2371 if (!is_gimple_assign (stmt))
2372 continue;
2373
2374 tree def = gimple_assign_lhs (stmt);
2375 if (TREE_CODE (def) != SSA_NAME)
2376 continue;
2377
2378 affine_iv iv;
2379 if (!simple_iv (loop_containing_stmt (stmt),
2380 loop_containing_stmt (stmt),
2381 def, &iv, true)
2382 || is_gimple_min_invariant (iv.step))
2383 continue;
2384
2385 predicate will_be_nonconstant
2386 = will_be_nonconstant_expr_predicate (fbi.info, info,
2387 iv.step,
2388 nonconstant_names);
2389 if (will_be_nonconstant != true)
2390 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2391 if (will_be_nonconstant != true
2392 && will_be_nonconstant != false)
2393 /* This is slightly inprecise. We may want to represent
2394 each loop with independent predicate. */
2395 loop_stride = loop_stride & will_be_nonconstant;
2396 }
2397 }
2398 free (body);
2399 }
2400 set_hint_predicate (&inline_summaries->get (node)->loop_iterations,
2401 loop_iterations);
2402 set_hint_predicate (&inline_summaries->get (node)->loop_stride,
2403 loop_stride);
2404 scev_finalize ();
2405 }
2406 FOR_ALL_BB_FN (bb, my_function)
2407 {
2408 edge e;
2409 edge_iterator ei;
2410
2411 if (bb->aux)
2412 edge_predicate_pool.remove ((predicate *)bb->aux);
2413 bb->aux = NULL;
2414 FOR_EACH_EDGE (e, ei, bb->succs)
2415 {
2416 if (e->aux)
2417 edge_predicate_pool.remove ((predicate *) e->aux);
2418 e->aux = NULL;
2419 }
2420 }
2421 inline_summaries->get (node)->self_time = time;
2422 inline_summaries->get (node)->self_size = size;
2423 nonconstant_names.release ();
2424 ipa_release_body_info (&fbi);
2425 if (opt_for_fn (node->decl, optimize))
2426 {
2427 if (!early)
2428 loop_optimizer_finalize ();
2429 else if (!ipa_edge_args_sum)
2430 ipa_free_all_node_params ();
2431 free_dominance_info (CDI_DOMINATORS);
2432 }
2433 if (dump_file)
2434 {
2435 fprintf (dump_file, "\n");
2436 dump_inline_summary (dump_file, node);
2437 }
2438 }
2439
2440
2441 /* Compute parameters of functions used by inliner.
2442 EARLY is true when we compute parameters for the early inliner */
2443
2444 void
2445 compute_inline_parameters (struct cgraph_node *node, bool early)
2446 {
2447 HOST_WIDE_INT self_stack_size;
2448 struct cgraph_edge *e;
2449 struct inline_summary *info;
2450
2451 gcc_assert (!node->global.inlined_to);
2452
2453 inline_summary_alloc ();
2454
2455 info = inline_summaries->get (node);
2456 reset_inline_summary (node, info);
2457
2458 /* Estimate the stack size for the function if we're optimizing. */
2459 self_stack_size = optimize && !node->thunk.thunk_p
2460 ? estimated_stack_frame_size (node) : 0;
2461 info->estimated_self_stack_size = self_stack_size;
2462 info->estimated_stack_size = self_stack_size;
2463 info->stack_frame_offset = 0;
2464
2465 if (node->thunk.thunk_p)
2466 {
2467 struct ipa_call_summary *es = ipa_call_summaries->get (node->callees);
2468 predicate t = true;
2469
2470 node->local.can_change_signature = false;
2471 es->call_stmt_size = eni_size_weights.call_cost;
2472 es->call_stmt_time = eni_time_weights.call_cost;
2473 account_size_time (info, INLINE_SIZE_SCALE * 2,
2474 2, &t, &t);
2475 t = predicate::not_inlined ();
2476 account_size_time (info, 2 * INLINE_SIZE_SCALE, 0, &t, &t);
2477 inline_update_overall_summary (node);
2478 info->self_size = info->size;
2479 info->self_time = info->time;
2480 /* We can not inline instrumentation clones. */
2481 if (node->thunk.add_pointer_bounds_args)
2482 {
2483 info->inlinable = false;
2484 node->callees->inline_failed = CIF_CHKP;
2485 }
2486 else
2487 info->inlinable = true;
2488 }
2489 else
2490 {
2491 /* Even is_gimple_min_invariant rely on current_function_decl. */
2492 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2493
2494 /* Can this function be inlined at all? */
2495 if (!opt_for_fn (node->decl, optimize)
2496 && !lookup_attribute ("always_inline",
2497 DECL_ATTRIBUTES (node->decl)))
2498 info->inlinable = false;
2499 else
2500 info->inlinable = tree_inlinable_function_p (node->decl);
2501
2502 info->contains_cilk_spawn = fn_contains_cilk_spawn_p (cfun);
2503
2504 /* Type attributes can use parameter indices to describe them. */
2505 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
2506 node->local.can_change_signature = false;
2507 else
2508 {
2509 /* Otherwise, inlinable functions always can change signature. */
2510 if (info->inlinable)
2511 node->local.can_change_signature = true;
2512 else
2513 {
2514 /* Functions calling builtin_apply can not change signature. */
2515 for (e = node->callees; e; e = e->next_callee)
2516 {
2517 tree cdecl = e->callee->decl;
2518 if (DECL_BUILT_IN (cdecl)
2519 && DECL_BUILT_IN_CLASS (cdecl) == BUILT_IN_NORMAL
2520 && (DECL_FUNCTION_CODE (cdecl) == BUILT_IN_APPLY_ARGS
2521 || DECL_FUNCTION_CODE (cdecl) == BUILT_IN_VA_START))
2522 break;
2523 }
2524 node->local.can_change_signature = !e;
2525 }
2526 }
2527 /* Functions called by instrumentation thunk can't change signature
2528 because instrumentation thunk modification is not supported. */
2529 if (node->local.can_change_signature)
2530 for (e = node->callers; e; e = e->next_caller)
2531 if (e->caller->thunk.thunk_p
2532 && e->caller->thunk.add_pointer_bounds_args)
2533 {
2534 node->local.can_change_signature = false;
2535 break;
2536 }
2537 estimate_function_body_sizes (node, early);
2538 pop_cfun ();
2539 }
2540 for (e = node->callees; e; e = e->next_callee)
2541 if (e->callee->comdat_local_p ())
2542 break;
2543 node->calls_comdat_local = (e != NULL);
2544
2545 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
2546 info->time = info->self_time;
2547 info->size = info->self_size;
2548 info->stack_frame_offset = 0;
2549 info->estimated_stack_size = info->estimated_self_stack_size;
2550
2551 /* Code above should compute exactly the same result as
2552 inline_update_overall_summary but because computation happens in
2553 different order the roundoff errors result in slight changes. */
2554 inline_update_overall_summary (node);
2555 gcc_assert (!(info->time - info->self_time).to_int ()
2556 && info->size == info->self_size);
2557 }
2558
2559
2560 /* Compute parameters of functions used by inliner using
2561 current_function_decl. */
2562
2563 static unsigned int
2564 compute_inline_parameters_for_current (void)
2565 {
2566 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
2567 return 0;
2568 }
2569
2570 namespace {
2571
2572 const pass_data pass_data_inline_parameters =
2573 {
2574 GIMPLE_PASS, /* type */
2575 "inline_param", /* name */
2576 OPTGROUP_INLINE, /* optinfo_flags */
2577 TV_INLINE_PARAMETERS, /* tv_id */
2578 0, /* properties_required */
2579 0, /* properties_provided */
2580 0, /* properties_destroyed */
2581 0, /* todo_flags_start */
2582 0, /* todo_flags_finish */
2583 };
2584
2585 class pass_inline_parameters : public gimple_opt_pass
2586 {
2587 public:
2588 pass_inline_parameters (gcc::context *ctxt)
2589 : gimple_opt_pass (pass_data_inline_parameters, ctxt)
2590 {}
2591
2592 /* opt_pass methods: */
2593 opt_pass * clone () { return new pass_inline_parameters (m_ctxt); }
2594 virtual unsigned int execute (function *)
2595 {
2596 return compute_inline_parameters_for_current ();
2597 }
2598
2599 }; // class pass_inline_parameters
2600
2601 } // anon namespace
2602
2603 gimple_opt_pass *
2604 make_pass_inline_parameters (gcc::context *ctxt)
2605 {
2606 return new pass_inline_parameters (ctxt);
2607 }
2608
2609
2610 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
2611 KNOWN_CONTEXTS and KNOWN_AGGS. */
2612
2613 static bool
2614 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
2615 int *size, int *time,
2616 vec<tree> known_vals,
2617 vec<ipa_polymorphic_call_context> known_contexts,
2618 vec<ipa_agg_jump_function_p> known_aggs)
2619 {
2620 tree target;
2621 struct cgraph_node *callee;
2622 struct inline_summary *isummary;
2623 enum availability avail;
2624 bool speculative;
2625
2626 if (!known_vals.exists () && !known_contexts.exists ())
2627 return false;
2628 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
2629 return false;
2630
2631 target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts,
2632 known_aggs, &speculative);
2633 if (!target || speculative)
2634 return false;
2635
2636 /* Account for difference in cost between indirect and direct calls. */
2637 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
2638 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
2639 gcc_checking_assert (*time >= 0);
2640 gcc_checking_assert (*size >= 0);
2641
2642 callee = cgraph_node::get (target);
2643 if (!callee || !callee->definition)
2644 return false;
2645 callee = callee->function_symbol (&avail);
2646 if (avail < AVAIL_AVAILABLE)
2647 return false;
2648 isummary = inline_summaries->get (callee);
2649 return isummary->inlinable;
2650 }
2651
2652 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
2653 handle edge E with probability PROB.
2654 Set HINTS if edge may be devirtualized.
2655 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
2656 site. */
2657
2658 static inline void
2659 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
2660 sreal *time,
2661 int prob,
2662 vec<tree> known_vals,
2663 vec<ipa_polymorphic_call_context> known_contexts,
2664 vec<ipa_agg_jump_function_p> known_aggs,
2665 inline_hints *hints)
2666 {
2667 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2668 int call_size = es->call_stmt_size;
2669 int call_time = es->call_stmt_time;
2670 int cur_size;
2671 if (!e->callee
2672 && estimate_edge_devirt_benefit (e, &call_size, &call_time,
2673 known_vals, known_contexts, known_aggs)
2674 && hints && e->maybe_hot_p ())
2675 *hints |= INLINE_HINT_indirect_call;
2676 cur_size = call_size * INLINE_SIZE_SCALE;
2677 *size += cur_size;
2678 if (min_size)
2679 *min_size += cur_size;
2680 if (prob == REG_BR_PROB_BASE)
2681 *time += ((sreal)(call_time * e->frequency)) / CGRAPH_FREQ_BASE;
2682 else
2683 *time += ((sreal)call_time) * (prob * e->frequency)
2684 / (CGRAPH_FREQ_BASE * REG_BR_PROB_BASE);
2685 }
2686
2687
2688
2689 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
2690 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2691 describe context of the call site. */
2692
2693 static void
2694 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
2695 int *min_size, sreal *time,
2696 inline_hints *hints,
2697 clause_t possible_truths,
2698 vec<tree> known_vals,
2699 vec<ipa_polymorphic_call_context> known_contexts,
2700 vec<ipa_agg_jump_function_p> known_aggs)
2701 {
2702 struct cgraph_edge *e;
2703 for (e = node->callees; e; e = e->next_callee)
2704 {
2705 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2706
2707 /* Do not care about zero sized builtins. */
2708 if (e->inline_failed && !es->call_stmt_size)
2709 {
2710 gcc_checking_assert (!es->call_stmt_time);
2711 continue;
2712 }
2713 if (!es->predicate
2714 || es->predicate->evaluate (possible_truths))
2715 {
2716 if (e->inline_failed)
2717 {
2718 /* Predicates of calls shall not use NOT_CHANGED codes,
2719 sowe do not need to compute probabilities. */
2720 estimate_edge_size_and_time (e, size,
2721 es->predicate ? NULL : min_size,
2722 time, REG_BR_PROB_BASE,
2723 known_vals, known_contexts,
2724 known_aggs, hints);
2725 }
2726 else
2727 estimate_calls_size_and_time (e->callee, size, min_size, time,
2728 hints,
2729 possible_truths,
2730 known_vals, known_contexts,
2731 known_aggs);
2732 }
2733 }
2734 for (e = node->indirect_calls; e; e = e->next_callee)
2735 {
2736 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2737 if (!es->predicate
2738 || es->predicate->evaluate (possible_truths))
2739 estimate_edge_size_and_time (e, size,
2740 es->predicate ? NULL : min_size,
2741 time, REG_BR_PROB_BASE,
2742 known_vals, known_contexts, known_aggs,
2743 hints);
2744 }
2745 }
2746
2747
2748 /* Estimate size and time needed to execute NODE assuming
2749 POSSIBLE_TRUTHS clause, and KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2750 information about NODE's arguments. If non-NULL use also probability
2751 information present in INLINE_PARAM_SUMMARY vector.
2752 Additionally detemine hints determined by the context. Finally compute
2753 minimal size needed for the call that is independent on the call context and
2754 can be used for fast estimates. Return the values in RET_SIZE,
2755 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
2756
2757 static void
2758 estimate_node_size_and_time (struct cgraph_node *node,
2759 clause_t possible_truths,
2760 clause_t nonspec_possible_truths,
2761 vec<tree> known_vals,
2762 vec<ipa_polymorphic_call_context> known_contexts,
2763 vec<ipa_agg_jump_function_p> known_aggs,
2764 int *ret_size, int *ret_min_size,
2765 sreal *ret_time,
2766 sreal *ret_nonspecialized_time,
2767 inline_hints *ret_hints,
2768 vec<inline_param_summary>
2769 inline_param_summary)
2770 {
2771 struct inline_summary *info = inline_summaries->get (node);
2772 size_time_entry *e;
2773 int size = 0;
2774 sreal time = 0;
2775 int min_size = 0;
2776 inline_hints hints = 0;
2777 int i;
2778
2779 if (dump_file && (dump_flags & TDF_DETAILS))
2780 {
2781 bool found = false;
2782 fprintf (dump_file, " Estimating body: %s/%i\n"
2783 " Known to be false: ", node->name (),
2784 node->order);
2785
2786 for (i = predicate::not_inlined_condition;
2787 i < (predicate::first_dynamic_condition
2788 + (int) vec_safe_length (info->conds)); i++)
2789 if (!(possible_truths & (1 << i)))
2790 {
2791 if (found)
2792 fprintf (dump_file, ", ");
2793 found = true;
2794 dump_condition (dump_file, info->conds, i);
2795 }
2796 }
2797
2798 estimate_calls_size_and_time (node, &size, &min_size, &time, &hints, possible_truths,
2799 known_vals, known_contexts, known_aggs);
2800 sreal nonspecialized_time = time;
2801
2802 for (i = 0; vec_safe_iterate (info->entry, i, &e); i++)
2803 {
2804 bool nonconst = e->nonconst_predicate.evaluate (possible_truths);
2805 bool exec = e->exec_predicate.evaluate (nonspec_possible_truths);
2806 gcc_assert (!nonconst || exec);
2807 if (exec)
2808 {
2809 gcc_checking_assert (e->time >= 0);
2810 gcc_checking_assert (time >= 0);
2811
2812 /* We compute specialized size only because size of nonspecialized
2813 copy is context independent.
2814
2815 The difference between nonspecialized execution and specialized is
2816 that nonspecialized is not going to have optimized out computations
2817 known to be constant in a specialized setting. */
2818 if (nonconst)
2819 size += e->size;
2820 nonspecialized_time += e->time;
2821 if (!nonconst)
2822 ;
2823 else if (!inline_param_summary.exists ())
2824 {
2825 if (nonconst)
2826 time += e->time;
2827 }
2828 else
2829 {
2830 int prob = e->nonconst_predicate.probability
2831 (info->conds, possible_truths,
2832 inline_param_summary);
2833 gcc_checking_assert (prob >= 0);
2834 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
2835 time += e->time * prob / REG_BR_PROB_BASE;
2836 }
2837 gcc_checking_assert (time >= 0);
2838 }
2839 }
2840 gcc_checking_assert ((*info->entry)[0].exec_predicate == true);
2841 gcc_checking_assert ((*info->entry)[0].nonconst_predicate == true);
2842 min_size = (*info->entry)[0].size;
2843 gcc_checking_assert (size >= 0);
2844 gcc_checking_assert (time >= 0);
2845 /* nonspecialized_time should be always bigger than specialized time.
2846 Roundoff issues however may get into the way. */
2847 gcc_checking_assert ((nonspecialized_time - time) >= -1);
2848
2849 /* Roundoff issues may make specialized time bigger than nonspecialized
2850 time. We do not really want that to happen because some heurstics
2851 may get confused by seeing negative speedups. */
2852 if (time > nonspecialized_time)
2853 time = nonspecialized_time;
2854
2855 if (info->loop_iterations
2856 && !info->loop_iterations->evaluate (possible_truths))
2857 hints |= INLINE_HINT_loop_iterations;
2858 if (info->loop_stride
2859 && !info->loop_stride->evaluate (possible_truths))
2860 hints |= INLINE_HINT_loop_stride;
2861 if (info->array_index
2862 && !info->array_index->evaluate (possible_truths))
2863 hints |= INLINE_HINT_array_index;
2864 if (info->scc_no)
2865 hints |= INLINE_HINT_in_scc;
2866 if (DECL_DECLARED_INLINE_P (node->decl))
2867 hints |= INLINE_HINT_declared_inline;
2868
2869 size = RDIV (size, INLINE_SIZE_SCALE);
2870 min_size = RDIV (min_size, INLINE_SIZE_SCALE);
2871
2872 if (dump_file && (dump_flags & TDF_DETAILS))
2873 fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size,
2874 time.to_double (), nonspecialized_time.to_double ());
2875 if (ret_time)
2876 *ret_time = time;
2877 if (ret_nonspecialized_time)
2878 *ret_nonspecialized_time = nonspecialized_time;
2879 if (ret_size)
2880 *ret_size = size;
2881 if (ret_min_size)
2882 *ret_min_size = min_size;
2883 if (ret_hints)
2884 *ret_hints = hints;
2885 return;
2886 }
2887
2888
2889 /* Estimate size and time needed to execute callee of EDGE assuming that
2890 parameters known to be constant at caller of EDGE are propagated.
2891 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
2892 and types for parameters. */
2893
2894 void
2895 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
2896 vec<tree> known_vals,
2897 vec<ipa_polymorphic_call_context>
2898 known_contexts,
2899 vec<ipa_agg_jump_function_p> known_aggs,
2900 int *ret_size, sreal *ret_time,
2901 sreal *ret_nonspec_time,
2902 inline_hints *hints)
2903 {
2904 clause_t clause, nonspec_clause;
2905
2906 evaluate_conditions_for_known_args (node, false, known_vals, known_aggs,
2907 &clause, &nonspec_clause);
2908 estimate_node_size_and_time (node, clause, nonspec_clause,
2909 known_vals, known_contexts,
2910 known_aggs, ret_size, NULL, ret_time,
2911 ret_nonspec_time, hints, vNULL);
2912 }
2913
2914
2915 /* Update summary information of inline clones after inlining.
2916 Compute peak stack usage. */
2917
2918 static void
2919 inline_update_callee_summaries (struct cgraph_node *node, int depth)
2920 {
2921 struct cgraph_edge *e;
2922 struct inline_summary *callee_info = inline_summaries->get (node);
2923 struct inline_summary *caller_info = inline_summaries->get (node->callers->caller);
2924 HOST_WIDE_INT peak;
2925
2926 callee_info->stack_frame_offset
2927 = caller_info->stack_frame_offset
2928 + caller_info->estimated_self_stack_size;
2929 peak = callee_info->stack_frame_offset
2930 + callee_info->estimated_self_stack_size;
2931 if (inline_summaries->get (node->global.inlined_to)->estimated_stack_size < peak)
2932 inline_summaries->get (node->global.inlined_to)->estimated_stack_size = peak;
2933 ipa_propagate_frequency (node);
2934 for (e = node->callees; e; e = e->next_callee)
2935 {
2936 if (!e->inline_failed)
2937 inline_update_callee_summaries (e->callee, depth);
2938 ipa_call_summaries->get (e)->loop_depth += depth;
2939 }
2940 for (e = node->indirect_calls; e; e = e->next_callee)
2941 ipa_call_summaries->get (e)->loop_depth += depth;
2942 }
2943
2944 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
2945 When functoin A is inlined in B and A calls C with parameter that
2946 changes with probability PROB1 and C is known to be passthroug
2947 of argument if B that change with probability PROB2, the probability
2948 of change is now PROB1*PROB2. */
2949
2950 static void
2951 remap_edge_change_prob (struct cgraph_edge *inlined_edge,
2952 struct cgraph_edge *edge)
2953 {
2954 if (ipa_node_params_sum)
2955 {
2956 int i;
2957 struct ipa_edge_args *args = IPA_EDGE_REF (edge);
2958 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
2959 struct ipa_call_summary *inlined_es
2960 = ipa_call_summaries->get (inlined_edge);
2961
2962 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
2963 {
2964 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
2965 if (jfunc->type == IPA_JF_PASS_THROUGH
2966 || jfunc->type == IPA_JF_ANCESTOR)
2967 {
2968 int id = jfunc->type == IPA_JF_PASS_THROUGH
2969 ? ipa_get_jf_pass_through_formal_id (jfunc)
2970 : ipa_get_jf_ancestor_formal_id (jfunc);
2971 if (id < (int) inlined_es->param.length ())
2972 {
2973 int prob1 = es->param[i].change_prob;
2974 int prob2 = inlined_es->param[id].change_prob;
2975 int prob = combine_probabilities (prob1, prob2);
2976
2977 if (prob1 && prob2 && !prob)
2978 prob = 1;
2979
2980 es->param[i].change_prob = prob;
2981 }
2982 }
2983 }
2984 }
2985 }
2986
2987 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
2988
2989 Remap predicates of callees of NODE. Rest of arguments match
2990 remap_predicate.
2991
2992 Also update change probabilities. */
2993
2994 static void
2995 remap_edge_summaries (struct cgraph_edge *inlined_edge,
2996 struct cgraph_node *node,
2997 struct inline_summary *info,
2998 struct inline_summary *callee_info,
2999 vec<int> operand_map,
3000 vec<int> offset_map,
3001 clause_t possible_truths,
3002 predicate *toplev_predicate)
3003 {
3004 struct cgraph_edge *e, *next;
3005 for (e = node->callees; e; e = next)
3006 {
3007 struct ipa_call_summary *es = ipa_call_summaries->get (e);
3008 predicate p;
3009 next = e->next_callee;
3010
3011 if (e->inline_failed)
3012 {
3013 remap_edge_change_prob (inlined_edge, e);
3014
3015 if (es->predicate)
3016 {
3017 p = es->predicate->remap_after_inlining
3018 (info, callee_info, operand_map,
3019 offset_map, possible_truths,
3020 *toplev_predicate);
3021 edge_set_predicate (e, &p);
3022 }
3023 else
3024 edge_set_predicate (e, toplev_predicate);
3025 }
3026 else
3027 remap_edge_summaries (inlined_edge, e->callee, info, callee_info,
3028 operand_map, offset_map, possible_truths,
3029 toplev_predicate);
3030 }
3031 for (e = node->indirect_calls; e; e = next)
3032 {
3033 struct ipa_call_summary *es = ipa_call_summaries->get (e);
3034 predicate p;
3035 next = e->next_callee;
3036
3037 remap_edge_change_prob (inlined_edge, e);
3038 if (es->predicate)
3039 {
3040 p = es->predicate->remap_after_inlining
3041 (info, callee_info, operand_map, offset_map,
3042 possible_truths, *toplev_predicate);
3043 edge_set_predicate (e, &p);
3044 }
3045 else
3046 edge_set_predicate (e, toplev_predicate);
3047 }
3048 }
3049
3050 /* Same as remap_predicate, but set result into hint *HINT. */
3051
3052 static void
3053 remap_hint_predicate (struct inline_summary *info,
3054 struct inline_summary *callee_info,
3055 predicate **hint,
3056 vec<int> operand_map,
3057 vec<int> offset_map,
3058 clause_t possible_truths,
3059 predicate *toplev_predicate)
3060 {
3061 predicate p;
3062
3063 if (!*hint)
3064 return;
3065 p = (*hint)->remap_after_inlining
3066 (info, callee_info,
3067 operand_map, offset_map,
3068 possible_truths, *toplev_predicate);
3069 if (p != false && p != true)
3070 {
3071 if (!*hint)
3072 set_hint_predicate (hint, p);
3073 else
3074 **hint &= p;
3075 }
3076 }
3077
3078 /* We inlined EDGE. Update summary of the function we inlined into. */
3079
3080 void
3081 inline_merge_summary (struct cgraph_edge *edge)
3082 {
3083 struct inline_summary *callee_info = inline_summaries->get (edge->callee);
3084 struct cgraph_node *to = (edge->caller->global.inlined_to
3085 ? edge->caller->global.inlined_to : edge->caller);
3086 struct inline_summary *info = inline_summaries->get (to);
3087 clause_t clause = 0; /* not_inline is known to be false. */
3088 size_time_entry *e;
3089 vec<int> operand_map = vNULL;
3090 vec<int> offset_map = vNULL;
3091 int i;
3092 predicate toplev_predicate;
3093 predicate true_p = true;
3094 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
3095
3096 if (es->predicate)
3097 toplev_predicate = *es->predicate;
3098 else
3099 toplev_predicate = true;
3100
3101 info->fp_expressions |= callee_info->fp_expressions;
3102
3103 if (callee_info->conds)
3104 evaluate_properties_for_edge (edge, true, &clause, NULL, NULL, NULL, NULL);
3105 if (ipa_node_params_sum && callee_info->conds)
3106 {
3107 struct ipa_edge_args *args = IPA_EDGE_REF (edge);
3108 int count = ipa_get_cs_argument_count (args);
3109 int i;
3110
3111 if (count)
3112 {
3113 operand_map.safe_grow_cleared (count);
3114 offset_map.safe_grow_cleared (count);
3115 }
3116 for (i = 0; i < count; i++)
3117 {
3118 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3119 int map = -1;
3120
3121 /* TODO: handle non-NOPs when merging. */
3122 if (jfunc->type == IPA_JF_PASS_THROUGH)
3123 {
3124 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3125 map = ipa_get_jf_pass_through_formal_id (jfunc);
3126 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
3127 offset_map[i] = -1;
3128 }
3129 else if (jfunc->type == IPA_JF_ANCESTOR)
3130 {
3131 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
3132 if (offset >= 0 && offset < INT_MAX)
3133 {
3134 map = ipa_get_jf_ancestor_formal_id (jfunc);
3135 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
3136 offset = -1;
3137 offset_map[i] = offset;
3138 }
3139 }
3140 operand_map[i] = map;
3141 gcc_assert (map < ipa_get_param_count (IPA_NODE_REF (to)));
3142 }
3143 }
3144 for (i = 0; vec_safe_iterate (callee_info->entry, i, &e); i++)
3145 {
3146 predicate p;
3147 p = e->exec_predicate.remap_after_inlining
3148 (info, callee_info, operand_map,
3149 offset_map, clause,
3150 toplev_predicate);
3151 predicate nonconstp;
3152 nonconstp = e->nonconst_predicate.remap_after_inlining
3153 (info, callee_info, operand_map,
3154 offset_map, clause,
3155 toplev_predicate);
3156 if (p != false && nonconstp != false)
3157 {
3158 sreal add_time = ((sreal)e->time * edge->frequency) / CGRAPH_FREQ_BASE;
3159 int prob = e->nonconst_predicate.probability (callee_info->conds,
3160 clause, es->param);
3161 add_time = add_time * prob / REG_BR_PROB_BASE;
3162 if (prob != REG_BR_PROB_BASE
3163 && dump_file && (dump_flags & TDF_DETAILS))
3164 {
3165 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
3166 (double) prob / REG_BR_PROB_BASE);
3167 }
3168 account_size_time (info, e->size, add_time, &p, &nonconstp);
3169 }
3170 }
3171 remap_edge_summaries (edge, edge->callee, info, callee_info, operand_map,
3172 offset_map, clause, &toplev_predicate);
3173 remap_hint_predicate (info, callee_info,
3174 &callee_info->loop_iterations,
3175 operand_map, offset_map, clause, &toplev_predicate);
3176 remap_hint_predicate (info, callee_info,
3177 &callee_info->loop_stride,
3178 operand_map, offset_map, clause, &toplev_predicate);
3179 remap_hint_predicate (info, callee_info,
3180 &callee_info->array_index,
3181 operand_map, offset_map, clause, &toplev_predicate);
3182
3183 inline_update_callee_summaries (edge->callee,
3184 ipa_call_summaries->get (edge)->loop_depth);
3185
3186 /* We do not maintain predicates of inlined edges, free it. */
3187 edge_set_predicate (edge, &true_p);
3188 /* Similarly remove param summaries. */
3189 es->param.release ();
3190 operand_map.release ();
3191 offset_map.release ();
3192 }
3193
3194 /* For performance reasons inline_merge_summary is not updating overall size
3195 and time. Recompute it. */
3196
3197 void
3198 inline_update_overall_summary (struct cgraph_node *node)
3199 {
3200 struct inline_summary *info = inline_summaries->get (node);
3201 size_time_entry *e;
3202 int i;
3203
3204 info->size = 0;
3205 info->time = 0;
3206 for (i = 0; vec_safe_iterate (info->entry, i, &e); i++)
3207 {
3208 info->size += e->size;
3209 info->time += e->time;
3210 }
3211 estimate_calls_size_and_time (node, &info->size, &info->min_size,
3212 &info->time, NULL,
3213 ~(clause_t) (1 << predicate::false_condition),
3214 vNULL, vNULL, vNULL);
3215 info->size = (info->size + INLINE_SIZE_SCALE / 2) / INLINE_SIZE_SCALE;
3216 }
3217
3218 /* Return hints derrived from EDGE. */
3219 int
3220 simple_edge_hints (struct cgraph_edge *edge)
3221 {
3222 int hints = 0;
3223 struct cgraph_node *to = (edge->caller->global.inlined_to
3224 ? edge->caller->global.inlined_to : edge->caller);
3225 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
3226 if (inline_summaries->get (to)->scc_no
3227 && inline_summaries->get (to)->scc_no
3228 == inline_summaries->get (callee)->scc_no
3229 && !edge->recursive_p ())
3230 hints |= INLINE_HINT_same_scc;
3231
3232 if (callee->lto_file_data && edge->caller->lto_file_data
3233 && edge->caller->lto_file_data != callee->lto_file_data
3234 && !callee->merged_comdat && !callee->icf_merged)
3235 hints |= INLINE_HINT_cross_module;
3236
3237 return hints;
3238 }
3239
3240 /* Estimate the time cost for the caller when inlining EDGE.
3241 Only to be called via estimate_edge_time, that handles the
3242 caching mechanism.
3243
3244 When caching, also update the cache entry. Compute both time and
3245 size, since we always need both metrics eventually. */
3246
3247 sreal
3248 do_estimate_edge_time (struct cgraph_edge *edge)
3249 {
3250 sreal time, nonspec_time;
3251 int size;
3252 inline_hints hints;
3253 struct cgraph_node *callee;
3254 clause_t clause, nonspec_clause;
3255 vec<tree> known_vals;
3256 vec<ipa_polymorphic_call_context> known_contexts;
3257 vec<ipa_agg_jump_function_p> known_aggs;
3258 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
3259 int min_size;
3260
3261 callee = edge->callee->ultimate_alias_target ();
3262
3263 gcc_checking_assert (edge->inline_failed);
3264 evaluate_properties_for_edge (edge, true,
3265 &clause, &nonspec_clause, &known_vals,
3266 &known_contexts, &known_aggs);
3267 estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
3268 known_contexts, known_aggs, &size, &min_size,
3269 &time, &nonspec_time, &hints, es->param);
3270
3271 /* When we have profile feedback, we can quite safely identify hot
3272 edges and for those we disable size limits. Don't do that when
3273 probability that caller will call the callee is low however, since it
3274 may hurt optimization of the caller's hot path. */
3275 if (edge->count && edge->maybe_hot_p ()
3276 && (edge->count * 2
3277 > (edge->caller->global.inlined_to
3278 ? edge->caller->global.inlined_to->count : edge->caller->count)))
3279 hints |= INLINE_HINT_known_hot;
3280
3281 known_vals.release ();
3282 known_contexts.release ();
3283 known_aggs.release ();
3284 gcc_checking_assert (size >= 0);
3285 gcc_checking_assert (time >= 0);
3286
3287 /* When caching, update the cache entry. */
3288 if (edge_growth_cache.exists ())
3289 {
3290 inline_summaries->get (edge->callee)->min_size = min_size;
3291 if ((int) edge_growth_cache.length () <= edge->uid)
3292 edge_growth_cache.safe_grow_cleared (symtab->edges_max_uid);
3293 edge_growth_cache[edge->uid].time = time;
3294 edge_growth_cache[edge->uid].nonspec_time = nonspec_time;
3295
3296 edge_growth_cache[edge->uid].size = size + (size >= 0);
3297 hints |= simple_edge_hints (edge);
3298 edge_growth_cache[edge->uid].hints = hints + 1;
3299 }
3300 return time;
3301 }
3302
3303
3304 /* Return estimated callee growth after inlining EDGE.
3305 Only to be called via estimate_edge_size. */
3306
3307 int
3308 do_estimate_edge_size (struct cgraph_edge *edge)
3309 {
3310 int size;
3311 struct cgraph_node *callee;
3312 clause_t clause, nonspec_clause;
3313 vec<tree> known_vals;
3314 vec<ipa_polymorphic_call_context> known_contexts;
3315 vec<ipa_agg_jump_function_p> known_aggs;
3316
3317 /* When we do caching, use do_estimate_edge_time to populate the entry. */
3318
3319 if (edge_growth_cache.exists ())
3320 {
3321 do_estimate_edge_time (edge);
3322 size = edge_growth_cache[edge->uid].size;
3323 gcc_checking_assert (size);
3324 return size - (size > 0);
3325 }
3326
3327 callee = edge->callee->ultimate_alias_target ();
3328
3329 /* Early inliner runs without caching, go ahead and do the dirty work. */
3330 gcc_checking_assert (edge->inline_failed);
3331 evaluate_properties_for_edge (edge, true,
3332 &clause, &nonspec_clause,
3333 &known_vals, &known_contexts,
3334 &known_aggs);
3335 estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
3336 known_contexts, known_aggs, &size, NULL, NULL,
3337 NULL, NULL, vNULL);
3338 known_vals.release ();
3339 known_contexts.release ();
3340 known_aggs.release ();
3341 return size;
3342 }
3343
3344
3345 /* Estimate the growth of the caller when inlining EDGE.
3346 Only to be called via estimate_edge_size. */
3347
3348 inline_hints
3349 do_estimate_edge_hints (struct cgraph_edge *edge)
3350 {
3351 inline_hints hints;
3352 struct cgraph_node *callee;
3353 clause_t clause, nonspec_clause;
3354 vec<tree> known_vals;
3355 vec<ipa_polymorphic_call_context> known_contexts;
3356 vec<ipa_agg_jump_function_p> known_aggs;
3357
3358 /* When we do caching, use do_estimate_edge_time to populate the entry. */
3359
3360 if (edge_growth_cache.exists ())
3361 {
3362 do_estimate_edge_time (edge);
3363 hints = edge_growth_cache[edge->uid].hints;
3364 gcc_checking_assert (hints);
3365 return hints - 1;
3366 }
3367
3368 callee = edge->callee->ultimate_alias_target ();
3369
3370 /* Early inliner runs without caching, go ahead and do the dirty work. */
3371 gcc_checking_assert (edge->inline_failed);
3372 evaluate_properties_for_edge (edge, true,
3373 &clause, &nonspec_clause,
3374 &known_vals, &known_contexts,
3375 &known_aggs);
3376 estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
3377 known_contexts, known_aggs, NULL, NULL,
3378 NULL, NULL, &hints, vNULL);
3379 known_vals.release ();
3380 known_contexts.release ();
3381 known_aggs.release ();
3382 hints |= simple_edge_hints (edge);
3383 return hints;
3384 }
3385
3386 /* Estimate the size of NODE after inlining EDGE which should be an
3387 edge to either NODE or a call inlined into NODE. */
3388
3389 int
3390 estimate_size_after_inlining (struct cgraph_node *node,
3391 struct cgraph_edge *edge)
3392 {
3393 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
3394 if (!es->predicate || *es->predicate != false)
3395 {
3396 int size = inline_summaries->get (node)->size + estimate_edge_growth (edge);
3397 gcc_assert (size >= 0);
3398 return size;
3399 }
3400 return inline_summaries->get (node)->size;
3401 }
3402
3403
3404 struct growth_data
3405 {
3406 struct cgraph_node *node;
3407 bool self_recursive;
3408 bool uninlinable;
3409 int growth;
3410 };
3411
3412
3413 /* Worker for do_estimate_growth. Collect growth for all callers. */
3414
3415 static bool
3416 do_estimate_growth_1 (struct cgraph_node *node, void *data)
3417 {
3418 struct cgraph_edge *e;
3419 struct growth_data *d = (struct growth_data *) data;
3420
3421 for (e = node->callers; e; e = e->next_caller)
3422 {
3423 gcc_checking_assert (e->inline_failed);
3424
3425 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
3426 {
3427 d->uninlinable = true;
3428 continue;
3429 }
3430
3431 if (e->recursive_p ())
3432 {
3433 d->self_recursive = true;
3434 continue;
3435 }
3436 d->growth += estimate_edge_growth (e);
3437 }
3438 return false;
3439 }
3440
3441
3442 /* Estimate the growth caused by inlining NODE into all callees. */
3443
3444 int
3445 estimate_growth (struct cgraph_node *node)
3446 {
3447 struct growth_data d = { node, false, false, 0 };
3448 struct inline_summary *info = inline_summaries->get (node);
3449
3450 node->call_for_symbol_and_aliases (do_estimate_growth_1, &d, true);
3451
3452 /* For self recursive functions the growth estimation really should be
3453 infinity. We don't want to return very large values because the growth
3454 plays various roles in badness computation fractions. Be sure to not
3455 return zero or negative growths. */
3456 if (d.self_recursive)
3457 d.growth = d.growth < info->size ? info->size : d.growth;
3458 else if (DECL_EXTERNAL (node->decl) || d.uninlinable)
3459 ;
3460 else
3461 {
3462 if (node->will_be_removed_from_program_if_no_direct_calls_p ())
3463 d.growth -= info->size;
3464 /* COMDAT functions are very often not shared across multiple units
3465 since they come from various template instantiations.
3466 Take this into account. */
3467 else if (DECL_COMDAT (node->decl)
3468 && node->can_remove_if_no_direct_calls_p ())
3469 d.growth -= (info->size
3470 * (100 - PARAM_VALUE (PARAM_COMDAT_SHARING_PROBABILITY))
3471 + 50) / 100;
3472 }
3473
3474 return d.growth;
3475 }
3476
3477 /* Verify if there are fewer than MAX_CALLERS. */
3478
3479 static bool
3480 check_callers (cgraph_node *node, int *max_callers)
3481 {
3482 ipa_ref *ref;
3483
3484 if (!node->can_remove_if_no_direct_calls_and_refs_p ())
3485 return true;
3486
3487 for (cgraph_edge *e = node->callers; e; e = e->next_caller)
3488 {
3489 (*max_callers)--;
3490 if (!*max_callers
3491 || cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
3492 return true;
3493 }
3494
3495 FOR_EACH_ALIAS (node, ref)
3496 if (check_callers (dyn_cast <cgraph_node *> (ref->referring), max_callers))
3497 return true;
3498
3499 return false;
3500 }
3501
3502
3503 /* Make cheap estimation if growth of NODE is likely positive knowing
3504 EDGE_GROWTH of one particular edge.
3505 We assume that most of other edges will have similar growth
3506 and skip computation if there are too many callers. */
3507
3508 bool
3509 growth_likely_positive (struct cgraph_node *node,
3510 int edge_growth)
3511 {
3512 int max_callers;
3513 struct cgraph_edge *e;
3514 gcc_checking_assert (edge_growth > 0);
3515
3516 /* First quickly check if NODE is removable at all. */
3517 if (DECL_EXTERNAL (node->decl))
3518 return true;
3519 if (!node->can_remove_if_no_direct_calls_and_refs_p ()
3520 || node->address_taken)
3521 return true;
3522
3523 max_callers = inline_summaries->get (node)->size * 4 / edge_growth + 2;
3524
3525 for (e = node->callers; e; e = e->next_caller)
3526 {
3527 max_callers--;
3528 if (!max_callers
3529 || cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
3530 return true;
3531 }
3532
3533 ipa_ref *ref;
3534 FOR_EACH_ALIAS (node, ref)
3535 if (check_callers (dyn_cast <cgraph_node *> (ref->referring), &max_callers))
3536 return true;
3537
3538 /* Unlike for functions called once, we play unsafe with
3539 COMDATs. We can allow that since we know functions
3540 in consideration are small (and thus risk is small) and
3541 moreover grow estimates already accounts that COMDAT
3542 functions may or may not disappear when eliminated from
3543 current unit. With good probability making aggressive
3544 choice in all units is going to make overall program
3545 smaller. */
3546 if (DECL_COMDAT (node->decl))
3547 {
3548 if (!node->can_remove_if_no_direct_calls_p ())
3549 return true;
3550 }
3551 else if (!node->will_be_removed_from_program_if_no_direct_calls_p ())
3552 return true;
3553
3554 return estimate_growth (node) > 0;
3555 }
3556
3557
3558 /* This function performs intraprocedural analysis in NODE that is required to
3559 inline indirect calls. */
3560
3561 static void
3562 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
3563 {
3564 ipa_analyze_node (node);
3565 if (dump_file && (dump_flags & TDF_DETAILS))
3566 {
3567 ipa_print_node_params (dump_file, node);
3568 ipa_print_node_jump_functions (dump_file, node);
3569 }
3570 }
3571
3572
3573 /* Note function body size. */
3574
3575 void
3576 inline_analyze_function (struct cgraph_node *node)
3577 {
3578 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
3579
3580 if (dump_file)
3581 fprintf (dump_file, "\nAnalyzing function: %s/%u\n",
3582 node->name (), node->order);
3583 if (opt_for_fn (node->decl, optimize) && !node->thunk.thunk_p)
3584 inline_indirect_intraprocedural_analysis (node);
3585 compute_inline_parameters (node, false);
3586 if (!optimize)
3587 {
3588 struct cgraph_edge *e;
3589 for (e = node->callees; e; e = e->next_callee)
3590 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3591 for (e = node->indirect_calls; e; e = e->next_callee)
3592 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3593 }
3594
3595 pop_cfun ();
3596 }
3597
3598
3599 /* Called when new function is inserted to callgraph late. */
3600
3601 void
3602 inline_summary_t::insert (struct cgraph_node *node, inline_summary *)
3603 {
3604 inline_analyze_function (node);
3605 }
3606
3607 /* Note function body size. */
3608
3609 void
3610 inline_generate_summary (void)
3611 {
3612 struct cgraph_node *node;
3613
3614 FOR_EACH_DEFINED_FUNCTION (node)
3615 if (DECL_STRUCT_FUNCTION (node->decl))
3616 node->local.versionable = tree_versionable_function_p (node->decl);
3617
3618 /* When not optimizing, do not bother to analyze. Inlining is still done
3619 because edge redirection needs to happen there. */
3620 if (!optimize && !flag_generate_lto && !flag_generate_offload && !flag_wpa)
3621 return;
3622
3623 if (!inline_summaries)
3624 inline_summaries = (inline_summary_t*) inline_summary_t::create_ggc (symtab);
3625
3626 inline_summaries->enable_insertion_hook ();
3627
3628 ipa_register_cgraph_hooks ();
3629 inline_free_summary ();
3630
3631 FOR_EACH_DEFINED_FUNCTION (node)
3632 if (!node->alias)
3633 inline_analyze_function (node);
3634 }
3635
3636
3637 /* Write inline summary for edge E to OB. */
3638
3639 static void
3640 read_ipa_call_summary (struct lto_input_block *ib, struct cgraph_edge *e)
3641 {
3642 struct ipa_call_summary *es = ipa_call_summaries->get (e);
3643 predicate p;
3644 int length, i;
3645
3646 es->call_stmt_size = streamer_read_uhwi (ib);
3647 es->call_stmt_time = streamer_read_uhwi (ib);
3648 es->loop_depth = streamer_read_uhwi (ib);
3649 p.stream_in (ib);
3650 edge_set_predicate (e, &p);
3651 length = streamer_read_uhwi (ib);
3652 if (length)
3653 {
3654 es->param.safe_grow_cleared (length);
3655 for (i = 0; i < length; i++)
3656 es->param[i].change_prob = streamer_read_uhwi (ib);
3657 }
3658 }
3659
3660
3661 /* Stream in inline summaries from the section. */
3662
3663 static void
3664 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
3665 size_t len)
3666 {
3667 const struct lto_function_header *header =
3668 (const struct lto_function_header *) data;
3669 const int cfg_offset = sizeof (struct lto_function_header);
3670 const int main_offset = cfg_offset + header->cfg_size;
3671 const int string_offset = main_offset + header->main_size;
3672 struct data_in *data_in;
3673 unsigned int i, count2, j;
3674 unsigned int f_count;
3675
3676 lto_input_block ib ((const char *) data + main_offset, header->main_size,
3677 file_data->mode_table);
3678
3679 data_in =
3680 lto_data_in_create (file_data, (const char *) data + string_offset,
3681 header->string_size, vNULL);
3682 f_count = streamer_read_uhwi (&ib);
3683 for (i = 0; i < f_count; i++)
3684 {
3685 unsigned int index;
3686 struct cgraph_node *node;
3687 struct inline_summary *info;
3688 lto_symtab_encoder_t encoder;
3689 struct bitpack_d bp;
3690 struct cgraph_edge *e;
3691 predicate p;
3692
3693 index = streamer_read_uhwi (&ib);
3694 encoder = file_data->symtab_node_encoder;
3695 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
3696 index));
3697 info = inline_summaries->get (node);
3698
3699 info->estimated_stack_size
3700 = info->estimated_self_stack_size = streamer_read_uhwi (&ib);
3701 info->size = info->self_size = streamer_read_uhwi (&ib);
3702 info->time = info->self_time = sreal::stream_in (&ib);
3703
3704 bp = streamer_read_bitpack (&ib);
3705 info->inlinable = bp_unpack_value (&bp, 1);
3706 info->contains_cilk_spawn = bp_unpack_value (&bp, 1);
3707 info->fp_expressions = bp_unpack_value (&bp, 1);
3708
3709 count2 = streamer_read_uhwi (&ib);
3710 gcc_assert (!info->conds);
3711 for (j = 0; j < count2; j++)
3712 {
3713 struct condition c;
3714 c.operand_num = streamer_read_uhwi (&ib);
3715 c.size = streamer_read_uhwi (&ib);
3716 c.code = (enum tree_code) streamer_read_uhwi (&ib);
3717 c.val = stream_read_tree (&ib, data_in);
3718 bp = streamer_read_bitpack (&ib);
3719 c.agg_contents = bp_unpack_value (&bp, 1);
3720 c.by_ref = bp_unpack_value (&bp, 1);
3721 if (c.agg_contents)
3722 c.offset = streamer_read_uhwi (&ib);
3723 vec_safe_push (info->conds, c);
3724 }
3725 count2 = streamer_read_uhwi (&ib);
3726 gcc_assert (!info->entry);
3727 for (j = 0; j < count2; j++)
3728 {
3729 struct size_time_entry e;
3730
3731 e.size = streamer_read_uhwi (&ib);
3732 e.time = sreal::stream_in (&ib);
3733 e.exec_predicate.stream_in (&ib);
3734 e.nonconst_predicate.stream_in (&ib);
3735
3736 vec_safe_push (info->entry, e);
3737 }
3738
3739 p.stream_in (&ib);
3740 set_hint_predicate (&info->loop_iterations, p);
3741 p.stream_in (&ib);
3742 set_hint_predicate (&info->loop_stride, p);
3743 p.stream_in (&ib);
3744 set_hint_predicate (&info->array_index, p);
3745 for (e = node->callees; e; e = e->next_callee)
3746 read_ipa_call_summary (&ib, e);
3747 for (e = node->indirect_calls; e; e = e->next_callee)
3748 read_ipa_call_summary (&ib, e);
3749 }
3750
3751 lto_free_section_data (file_data, LTO_section_inline_summary, NULL, data,
3752 len);
3753 lto_data_in_delete (data_in);
3754 }
3755
3756
3757 /* Read inline summary. Jump functions are shared among ipa-cp
3758 and inliner, so when ipa-cp is active, we don't need to write them
3759 twice. */
3760
3761 void
3762 inline_read_summary (void)
3763 {
3764 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
3765 struct lto_file_decl_data *file_data;
3766 unsigned int j = 0;
3767
3768 inline_summary_alloc ();
3769
3770 while ((file_data = file_data_vec[j++]))
3771 {
3772 size_t len;
3773 const char *data = lto_get_section_data (file_data,
3774 LTO_section_inline_summary,
3775 NULL, &len);
3776 if (data)
3777 inline_read_section (file_data, data, len);
3778 else
3779 /* Fatal error here. We do not want to support compiling ltrans units
3780 with different version of compiler or different flags than the WPA
3781 unit, so this should never happen. */
3782 fatal_error (input_location,
3783 "ipa inline summary is missing in input file");
3784 }
3785 if (optimize)
3786 {
3787 ipa_register_cgraph_hooks ();
3788 if (!flag_ipa_cp)
3789 ipa_prop_read_jump_functions ();
3790 }
3791
3792 gcc_assert (inline_summaries);
3793 inline_summaries->enable_insertion_hook ();
3794 }
3795
3796
3797 /* Write inline summary for edge E to OB. */
3798
3799 static void
3800 write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
3801 {
3802 struct ipa_call_summary *es = ipa_call_summaries->get (e);
3803 int i;
3804
3805 streamer_write_uhwi (ob, es->call_stmt_size);
3806 streamer_write_uhwi (ob, es->call_stmt_time);
3807 streamer_write_uhwi (ob, es->loop_depth);
3808 if (es->predicate)
3809 es->predicate->stream_out (ob);
3810 else
3811 streamer_write_uhwi (ob, 0);
3812 streamer_write_uhwi (ob, es->param.length ());
3813 for (i = 0; i < (int) es->param.length (); i++)
3814 streamer_write_uhwi (ob, es->param[i].change_prob);
3815 }
3816
3817
3818 /* Write inline summary for node in SET.
3819 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
3820 active, we don't need to write them twice. */
3821
3822 void
3823 inline_write_summary (void)
3824 {
3825 struct output_block *ob = create_output_block (LTO_section_inline_summary);
3826 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
3827 unsigned int count = 0;
3828 int i;
3829
3830 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
3831 {
3832 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
3833 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
3834 if (cnode && cnode->definition && !cnode->alias)
3835 count++;
3836 }
3837 streamer_write_uhwi (ob, count);
3838
3839 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
3840 {
3841 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
3842 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
3843 if (cnode && cnode->definition && !cnode->alias)
3844 {
3845 struct inline_summary *info = inline_summaries->get (cnode);
3846 struct bitpack_d bp;
3847 struct cgraph_edge *edge;
3848 int i;
3849 size_time_entry *e;
3850 struct condition *c;
3851
3852 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
3853 streamer_write_hwi (ob, info->estimated_self_stack_size);
3854 streamer_write_hwi (ob, info->self_size);
3855 info->self_time.stream_out (ob);
3856 bp = bitpack_create (ob->main_stream);
3857 bp_pack_value (&bp, info->inlinable, 1);
3858 bp_pack_value (&bp, info->contains_cilk_spawn, 1);
3859 bp_pack_value (&bp, info->fp_expressions, 1);
3860 streamer_write_bitpack (&bp);
3861 streamer_write_uhwi (ob, vec_safe_length (info->conds));
3862 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
3863 {
3864 streamer_write_uhwi (ob, c->operand_num);
3865 streamer_write_uhwi (ob, c->size);
3866 streamer_write_uhwi (ob, c->code);
3867 stream_write_tree (ob, c->val, true);
3868 bp = bitpack_create (ob->main_stream);
3869 bp_pack_value (&bp, c->agg_contents, 1);
3870 bp_pack_value (&bp, c->by_ref, 1);
3871 streamer_write_bitpack (&bp);
3872 if (c->agg_contents)
3873 streamer_write_uhwi (ob, c->offset);
3874 }
3875 streamer_write_uhwi (ob, vec_safe_length (info->entry));
3876 for (i = 0; vec_safe_iterate (info->entry, i, &e); i++)
3877 {
3878 streamer_write_uhwi (ob, e->size);
3879 e->time.stream_out (ob);
3880 e->exec_predicate.stream_out (ob);
3881 e->nonconst_predicate.stream_out (ob);
3882 }
3883 if (info->loop_iterations)
3884 info->loop_iterations->stream_out (ob);
3885 else
3886 streamer_write_uhwi (ob, 0);
3887 if (info->loop_stride)
3888 info->loop_stride->stream_out (ob);
3889 else
3890 streamer_write_uhwi (ob, 0);
3891 if (info->array_index)
3892 info->array_index->stream_out (ob);
3893 else
3894 streamer_write_uhwi (ob, 0);
3895 for (edge = cnode->callees; edge; edge = edge->next_callee)
3896 write_ipa_call_summary (ob, edge);
3897 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
3898 write_ipa_call_summary (ob, edge);
3899 }
3900 }
3901 streamer_write_char_stream (ob->main_stream, 0);
3902 produce_asm (ob, NULL);
3903 destroy_output_block (ob);
3904
3905 if (optimize && !flag_ipa_cp)
3906 ipa_prop_write_jump_functions ();
3907 }
3908
3909
3910 /* Release inline summary. */
3911
3912 void
3913 inline_free_summary (void)
3914 {
3915 struct cgraph_node *node;
3916 if (!ipa_call_summaries)
3917 return;
3918 FOR_EACH_DEFINED_FUNCTION (node)
3919 if (!node->alias)
3920 reset_inline_summary (node, inline_summaries->get (node));
3921 inline_summaries->release ();
3922 inline_summaries = NULL;
3923 ipa_call_summaries->release ();
3924 delete ipa_call_summaries;
3925 ipa_call_summaries = NULL;
3926 edge_predicate_pool.release ();
3927 }