13f775d62bfc3d35b685a59acc755c9dca03a56f
[gcc.git] / gcc / ipa-inline-analysis.c
1 /* Inlining decision heuristics.
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Analysis used by the inliner and other passes limiting code size growth.
22
23 We estimate for each function
24 - function body size
25 - average function execution time
26 - inlining size benefit (that is how much of function body size
27 and its call sequence is expected to disappear by inlining)
28 - inlining time benefit
29 - function frame size
30 For each call
31 - call statement size and time
32
33 inline_summary data structures store above information locally (i.e.
34 parameters of the function itself) and globally (i.e. parameters of
35 the function created by applying all the inline decisions already
36 present in the callgraph).
37
38 We provide access to the inline_summary data structure and
39 basic logic updating the parameters when inlining is performed.
40
41 The summaries are context sensitive. Context means
42 1) partial assignment of known constant values of operands
43 2) whether function is inlined into the call or not.
44 It is easy to add more variants. To represent function size and time
45 that depends on context (i.e. it is known to be optimized away when
46 context is known either by inlining or from IP-CP and cloning),
47 we use predicates.
48
49 estimate_edge_size and estimate_edge_growth can be used to query
50 function size/time in the given context. inline_merge_summary merges
51 properties of caller and callee after inlining.
52
53 Finally pass_inline_parameters is exported. This is used to drive
54 computation of function parameters used by the early inliner. IPA
55 inlined performs analysis via its analyze_function method. */
56
57 #include "config.h"
58 #include "system.h"
59 #include "coretypes.h"
60 #include "backend.h"
61 #include "tree.h"
62 #include "gimple.h"
63 #include "alloc-pool.h"
64 #include "tree-pass.h"
65 #include "ssa.h"
66 #include "tree-streamer.h"
67 #include "cgraph.h"
68 #include "diagnostic.h"
69 #include "fold-const.h"
70 #include "print-tree.h"
71 #include "tree-inline.h"
72 #include "gimple-pretty-print.h"
73 #include "params.h"
74 #include "cfganal.h"
75 #include "gimple-iterator.h"
76 #include "tree-cfg.h"
77 #include "tree-ssa-loop-niter.h"
78 #include "tree-ssa-loop.h"
79 #include "symbol-summary.h"
80 #include "ipa-prop.h"
81 #include "ipa-inline.h"
82 #include "cfgloop.h"
83 #include "tree-scalar-evolution.h"
84 #include "ipa-utils.h"
85 #include "cilk.h"
86 #include "cfgexpand.h"
87 #include "gimplify.h"
88
89 /* Summaries. */
90 function_summary <inline_summary *> *inline_summaries;
91 call_summary <ipa_call_summary *> *ipa_call_summaries;
92
93 /* Cached node/edge growths. */
94 vec<edge_growth_cache_entry> edge_growth_cache;
95
96 /* Edge predicates goes here. */
97 static object_allocator<predicate> edge_predicate_pool ("edge predicates");
98
99
100 /* Dump inline hints. */
101 void
102 dump_inline_hints (FILE *f, inline_hints hints)
103 {
104 if (!hints)
105 return;
106 fprintf (f, "inline hints:");
107 if (hints & INLINE_HINT_indirect_call)
108 {
109 hints &= ~INLINE_HINT_indirect_call;
110 fprintf (f, " indirect_call");
111 }
112 if (hints & INLINE_HINT_loop_iterations)
113 {
114 hints &= ~INLINE_HINT_loop_iterations;
115 fprintf (f, " loop_iterations");
116 }
117 if (hints & INLINE_HINT_loop_stride)
118 {
119 hints &= ~INLINE_HINT_loop_stride;
120 fprintf (f, " loop_stride");
121 }
122 if (hints & INLINE_HINT_same_scc)
123 {
124 hints &= ~INLINE_HINT_same_scc;
125 fprintf (f, " same_scc");
126 }
127 if (hints & INLINE_HINT_in_scc)
128 {
129 hints &= ~INLINE_HINT_in_scc;
130 fprintf (f, " in_scc");
131 }
132 if (hints & INLINE_HINT_cross_module)
133 {
134 hints &= ~INLINE_HINT_cross_module;
135 fprintf (f, " cross_module");
136 }
137 if (hints & INLINE_HINT_declared_inline)
138 {
139 hints &= ~INLINE_HINT_declared_inline;
140 fprintf (f, " declared_inline");
141 }
142 if (hints & INLINE_HINT_array_index)
143 {
144 hints &= ~INLINE_HINT_array_index;
145 fprintf (f, " array_index");
146 }
147 if (hints & INLINE_HINT_known_hot)
148 {
149 hints &= ~INLINE_HINT_known_hot;
150 fprintf (f, " known_hot");
151 }
152 gcc_assert (!hints);
153 }
154
155
156 /* Record SIZE and TIME to SUMMARY.
157 The accounted code will be executed when EXEC_PRED is true.
158 When NONCONST_PRED is false the code will evaulate to constant and
159 will get optimized out in specialized clones of the function. */
160
161 void
162 inline_summary::account_size_time (int size, sreal time,
163 const predicate &exec_pred,
164 const predicate &nonconst_pred_in)
165 {
166 size_time_entry *e;
167 bool found = false;
168 int i;
169 predicate nonconst_pred;
170
171 if (exec_pred == false)
172 return;
173
174 nonconst_pred = nonconst_pred_in & exec_pred;
175
176 if (nonconst_pred == false)
177 return;
178
179 /* We need to create initial empty unconitional clause, but otherwie
180 we don't need to account empty times and sizes. */
181 if (!size && time == 0 && size_time_table)
182 return;
183
184 gcc_assert (time >= 0);
185
186 for (i = 0; vec_safe_iterate (size_time_table, i, &e); i++)
187 if (e->exec_predicate == exec_pred
188 && e->nonconst_predicate == nonconst_pred)
189 {
190 found = true;
191 break;
192 }
193 if (i == 256)
194 {
195 i = 0;
196 found = true;
197 e = &(*size_time_table)[0];
198 if (dump_file && (dump_flags & TDF_DETAILS))
199 fprintf (dump_file,
200 "\t\tReached limit on number of entries, "
201 "ignoring the predicate.");
202 }
203 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
204 {
205 fprintf (dump_file,
206 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
207 ((double) size) / INLINE_SIZE_SCALE,
208 (time.to_double ()), found ? "" : "new ");
209 exec_pred.dump (dump_file, conds, 0);
210 if (exec_pred != nonconst_pred)
211 {
212 fprintf (dump_file, " nonconst:");
213 nonconst_pred.dump (dump_file, conds);
214 }
215 else
216 fprintf (dump_file, "\n");
217 }
218 if (!found)
219 {
220 struct size_time_entry new_entry;
221 new_entry.size = size;
222 new_entry.time = time;
223 new_entry.exec_predicate = exec_pred;
224 new_entry.nonconst_predicate = nonconst_pred;
225 vec_safe_push (size_time_table, new_entry);
226 }
227 else
228 {
229 e->size += size;
230 e->time += time;
231 }
232 }
233
234 /* We proved E to be unreachable, redirect it to __bultin_unreachable. */
235
236 static struct cgraph_edge *
237 redirect_to_unreachable (struct cgraph_edge *e)
238 {
239 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
240 struct cgraph_node *target = cgraph_node::get_create
241 (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
242
243 if (e->speculative)
244 e = e->resolve_speculation (target->decl);
245 else if (!e->callee)
246 e->make_direct (target);
247 else
248 e->redirect_callee (target);
249 struct ipa_call_summary *es = ipa_call_summaries->get (e);
250 e->inline_failed = CIF_UNREACHABLE;
251 e->frequency = 0;
252 e->count = 0;
253 es->call_stmt_size = 0;
254 es->call_stmt_time = 0;
255 if (callee)
256 callee->remove_symbol_and_inline_clones ();
257 return e;
258 }
259
260 /* Set predicate for edge E. */
261
262 static void
263 edge_set_predicate (struct cgraph_edge *e, predicate *predicate)
264 {
265 /* If the edge is determined to be never executed, redirect it
266 to BUILTIN_UNREACHABLE to save inliner from inlining into it. */
267 if (predicate && *predicate == false
268 /* When handling speculative edges, we need to do the redirection
269 just once. Do it always on the direct edge, so we do not
270 attempt to resolve speculation while duplicating the edge. */
271 && (!e->speculative || e->callee))
272 e = redirect_to_unreachable (e);
273
274 struct ipa_call_summary *es = ipa_call_summaries->get (e);
275 if (predicate && *predicate != true)
276 {
277 if (!es->predicate)
278 es->predicate = edge_predicate_pool.allocate ();
279 *es->predicate = *predicate;
280 }
281 else
282 {
283 if (es->predicate)
284 edge_predicate_pool.remove (es->predicate);
285 es->predicate = NULL;
286 }
287 }
288
289 /* Set predicate for hint *P. */
290
291 static void
292 set_hint_predicate (predicate **p, predicate new_predicate)
293 {
294 if (new_predicate == false || new_predicate == true)
295 {
296 if (*p)
297 edge_predicate_pool.remove (*p);
298 *p = NULL;
299 }
300 else
301 {
302 if (!*p)
303 *p = edge_predicate_pool.allocate ();
304 **p = new_predicate;
305 }
306 }
307
308
309 /* Compute what conditions may or may not hold given invormation about
310 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
311 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
312 copy when called in a given context. It is a bitmask of conditions. Bit
313 0 means that condition is known to be false, while bit 1 means that condition
314 may or may not be true. These differs - for example NOT_INLINED condition
315 is always false in the second and also builtin_constant_p tests can not use
316 the fact that parameter is indeed a constant.
317
318 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
319 KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
320 Return clause of possible truths. When INLINE_P is true, assume that we are
321 inlining.
322
323 ERROR_MARK means compile time invariant. */
324
325 static void
326 evaluate_conditions_for_known_args (struct cgraph_node *node,
327 bool inline_p,
328 vec<tree> known_vals,
329 vec<ipa_agg_jump_function_p>
330 known_aggs,
331 clause_t *ret_clause,
332 clause_t *ret_nonspec_clause)
333 {
334 clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition;
335 clause_t nonspec_clause = 1 << predicate::not_inlined_condition;
336 struct inline_summary *info = inline_summaries->get (node);
337 int i;
338 struct condition *c;
339
340 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
341 {
342 tree val;
343 tree res;
344
345 /* We allow call stmt to have fewer arguments than the callee function
346 (especially for K&R style programs). So bound check here (we assume
347 known_aggs vector, if non-NULL, has the same length as
348 known_vals). */
349 gcc_checking_assert (!known_aggs.exists ()
350 || (known_vals.length () == known_aggs.length ()));
351 if (c->operand_num >= (int) known_vals.length ())
352 {
353 clause |= 1 << (i + predicate::first_dynamic_condition);
354 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
355 continue;
356 }
357
358 if (c->agg_contents)
359 {
360 struct ipa_agg_jump_function *agg;
361
362 if (c->code == predicate::changed
363 && !c->by_ref
364 && (known_vals[c->operand_num] == error_mark_node))
365 continue;
366
367 if (known_aggs.exists ())
368 {
369 agg = known_aggs[c->operand_num];
370 val = ipa_find_agg_cst_for_param (agg, known_vals[c->operand_num],
371 c->offset, c->by_ref);
372 }
373 else
374 val = NULL_TREE;
375 }
376 else
377 {
378 val = known_vals[c->operand_num];
379 if (val == error_mark_node && c->code != predicate::changed)
380 val = NULL_TREE;
381 }
382
383 if (!val)
384 {
385 clause |= 1 << (i + predicate::first_dynamic_condition);
386 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
387 continue;
388 }
389 if (c->code == predicate::changed)
390 {
391 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
392 continue;
393 }
394
395 if (tree_to_shwi (TYPE_SIZE (TREE_TYPE (val))) != c->size)
396 {
397 clause |= 1 << (i + predicate::first_dynamic_condition);
398 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
399 continue;
400 }
401 if (c->code == predicate::is_not_constant)
402 {
403 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
404 continue;
405 }
406
407 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (c->val), val);
408 res = val
409 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
410 : NULL;
411
412 if (res && integer_zerop (res))
413 continue;
414
415 clause |= 1 << (i + predicate::first_dynamic_condition);
416 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
417 }
418 *ret_clause = clause;
419 if (ret_nonspec_clause)
420 *ret_nonspec_clause = nonspec_clause;
421 }
422
423
424 /* Work out what conditions might be true at invocation of E. */
425
426 static void
427 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
428 clause_t *clause_ptr, clause_t *nonspec_clause_ptr,
429 vec<tree> *known_vals_ptr,
430 vec<ipa_polymorphic_call_context>
431 *known_contexts_ptr,
432 vec<ipa_agg_jump_function_p> *known_aggs_ptr)
433 {
434 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
435 struct inline_summary *info = inline_summaries->get (callee);
436 vec<tree> known_vals = vNULL;
437 vec<ipa_agg_jump_function_p> known_aggs = vNULL;
438
439 if (clause_ptr)
440 *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition;
441 if (known_vals_ptr)
442 known_vals_ptr->create (0);
443 if (known_contexts_ptr)
444 known_contexts_ptr->create (0);
445
446 if (ipa_node_params_sum
447 && !e->call_stmt_cannot_inline_p
448 && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr))
449 {
450 struct ipa_node_params *parms_info;
451 struct ipa_edge_args *args = IPA_EDGE_REF (e);
452 struct ipa_call_summary *es = ipa_call_summaries->get (e);
453 int i, count = ipa_get_cs_argument_count (args);
454
455 if (e->caller->global.inlined_to)
456 parms_info = IPA_NODE_REF (e->caller->global.inlined_to);
457 else
458 parms_info = IPA_NODE_REF (e->caller);
459
460 if (count && (info->conds || known_vals_ptr))
461 known_vals.safe_grow_cleared (count);
462 if (count && (info->conds || known_aggs_ptr))
463 known_aggs.safe_grow_cleared (count);
464 if (count && known_contexts_ptr)
465 known_contexts_ptr->safe_grow_cleared (count);
466
467 for (i = 0; i < count; i++)
468 {
469 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
470 tree cst = ipa_value_from_jfunc (parms_info, jf);
471
472 if (!cst && e->call_stmt
473 && i < (int)gimple_call_num_args (e->call_stmt))
474 {
475 cst = gimple_call_arg (e->call_stmt, i);
476 if (!is_gimple_min_invariant (cst))
477 cst = NULL;
478 }
479 if (cst)
480 {
481 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
482 if (known_vals.exists ())
483 known_vals[i] = cst;
484 }
485 else if (inline_p && !es->param[i].change_prob)
486 known_vals[i] = error_mark_node;
487
488 if (known_contexts_ptr)
489 (*known_contexts_ptr)[i] = ipa_context_from_jfunc (parms_info, e,
490 i, jf);
491 /* TODO: When IPA-CP starts propagating and merging aggregate jump
492 functions, use its knowledge of the caller too, just like the
493 scalar case above. */
494 known_aggs[i] = &jf->agg;
495 }
496 }
497 else if (e->call_stmt && !e->call_stmt_cannot_inline_p
498 && ((clause_ptr && info->conds) || known_vals_ptr))
499 {
500 int i, count = (int)gimple_call_num_args (e->call_stmt);
501
502 if (count && (info->conds || known_vals_ptr))
503 known_vals.safe_grow_cleared (count);
504 for (i = 0; i < count; i++)
505 {
506 tree cst = gimple_call_arg (e->call_stmt, i);
507 if (!is_gimple_min_invariant (cst))
508 cst = NULL;
509 if (cst)
510 known_vals[i] = cst;
511 }
512 }
513
514 evaluate_conditions_for_known_args (callee, inline_p,
515 known_vals, known_aggs, clause_ptr,
516 nonspec_clause_ptr);
517
518 if (known_vals_ptr)
519 *known_vals_ptr = known_vals;
520 else
521 known_vals.release ();
522
523 if (known_aggs_ptr)
524 *known_aggs_ptr = known_aggs;
525 else
526 known_aggs.release ();
527 }
528
529
530 /* Allocate the inline summary vector or resize it to cover all cgraph nodes. */
531
532 static void
533 inline_summary_alloc (void)
534 {
535 if (!inline_summaries)
536 inline_summaries = inline_summary_t::create_ggc (symtab);
537 if (!ipa_call_summaries)
538 ipa_call_summaries = new ipa_call_summary_t (symtab, false);
539 }
540
541 /* We are called multiple time for given function; clear
542 data from previous run so they are not cumulated. */
543
544 void
545 ipa_call_summary::reset ()
546 {
547 call_stmt_size = call_stmt_time = 0;
548 if (predicate)
549 edge_predicate_pool.remove (predicate);
550 predicate = NULL;
551 param.release ();
552 }
553
554 /* We are called multiple time for given function; clear
555 data from previous run so they are not cumulated. */
556
557 void
558 inline_summary::reset (struct cgraph_node *node)
559 {
560 struct cgraph_edge *e;
561
562 self_size = 0;
563 self_time = 0;
564 estimated_stack_size = 0;
565 estimated_self_stack_size = 0;
566 stack_frame_offset = 0;
567 size = 0;
568 time = 0;
569 growth = 0;
570 scc_no = 0;
571 if (loop_iterations)
572 {
573 edge_predicate_pool.remove (loop_iterations);
574 loop_iterations = NULL;
575 }
576 if (loop_stride)
577 {
578 edge_predicate_pool.remove (loop_stride);
579 loop_stride = NULL;
580 }
581 if (array_index)
582 {
583 edge_predicate_pool.remove (array_index);
584 array_index = NULL;
585 }
586 vec_free (conds);
587 vec_free (size_time_table);
588 for (e = node->callees; e; e = e->next_callee)
589 ipa_call_summaries->get (e)->reset ();
590 for (e = node->indirect_calls; e; e = e->next_callee)
591 ipa_call_summaries->get (e)->reset ();
592 fp_expressions = false;
593 }
594
595 /* Hook that is called by cgraph.c when a node is removed. */
596
597 void
598 inline_summary_t::remove (cgraph_node *node, inline_summary *info)
599 {
600 info->reset (node);
601 }
602
603 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
604 Additionally care about allocating new memory slot for updated predicate
605 and set it to NULL when it becomes true or false (and thus uninteresting).
606 */
607
608 static void
609 remap_hint_predicate_after_duplication (predicate **p,
610 clause_t possible_truths)
611 {
612 predicate new_predicate;
613
614 if (!*p)
615 return;
616
617 new_predicate = (*p)->remap_after_duplication (possible_truths);
618 /* We do not want to free previous predicate; it is used by node origin. */
619 *p = NULL;
620 set_hint_predicate (p, new_predicate);
621 }
622
623
624 /* Hook that is called by cgraph.c when a node is duplicated. */
625 void
626 inline_summary_t::duplicate (cgraph_node *src,
627 cgraph_node *dst,
628 inline_summary *,
629 inline_summary *info)
630 {
631 inline_summary_alloc ();
632 memcpy (info, inline_summaries->get (src), sizeof (inline_summary));
633 /* TODO: as an optimization, we may avoid copying conditions
634 that are known to be false or true. */
635 info->conds = vec_safe_copy (info->conds);
636
637 /* When there are any replacements in the function body, see if we can figure
638 out that something was optimized out. */
639 if (ipa_node_params_sum && dst->clone.tree_map)
640 {
641 vec<size_time_entry, va_gc> *entry = info->size_time_table;
642 /* Use SRC parm info since it may not be copied yet. */
643 struct ipa_node_params *parms_info = IPA_NODE_REF (src);
644 vec<tree> known_vals = vNULL;
645 int count = ipa_get_param_count (parms_info);
646 int i, j;
647 clause_t possible_truths;
648 predicate true_pred = true;
649 size_time_entry *e;
650 int optimized_out_size = 0;
651 bool inlined_to_p = false;
652 struct cgraph_edge *edge, *next;
653
654 info->size_time_table = 0;
655 known_vals.safe_grow_cleared (count);
656 for (i = 0; i < count; i++)
657 {
658 struct ipa_replace_map *r;
659
660 for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++)
661 {
662 if (((!r->old_tree && r->parm_num == i)
663 || (r->old_tree && r->old_tree == ipa_get_param (parms_info, i)))
664 && r->replace_p && !r->ref_p)
665 {
666 known_vals[i] = r->new_tree;
667 break;
668 }
669 }
670 }
671 evaluate_conditions_for_known_args (dst, false,
672 known_vals,
673 vNULL,
674 &possible_truths,
675 /* We are going to specialize,
676 so ignore nonspec truths. */
677 NULL);
678 known_vals.release ();
679
680 info->account_size_time (0, 0, true_pred, true_pred);
681
682 /* Remap size_time vectors.
683 Simplify the predicate by prunning out alternatives that are known
684 to be false.
685 TODO: as on optimization, we can also eliminate conditions known
686 to be true. */
687 for (i = 0; vec_safe_iterate (entry, i, &e); i++)
688 {
689 predicate new_exec_pred;
690 predicate new_nonconst_pred;
691 new_exec_pred = e->exec_predicate.remap_after_duplication
692 (possible_truths);
693 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
694 (possible_truths);
695 if (new_exec_pred == false || new_nonconst_pred == false)
696 optimized_out_size += e->size;
697 else
698 info->account_size_time (e->size, e->time, new_exec_pred,
699 new_nonconst_pred);
700 }
701
702 /* Remap edge predicates with the same simplification as above.
703 Also copy constantness arrays. */
704 for (edge = dst->callees; edge; edge = next)
705 {
706 predicate new_predicate;
707 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
708 next = edge->next_callee;
709
710 if (!edge->inline_failed)
711 inlined_to_p = true;
712 if (!es->predicate)
713 continue;
714 new_predicate = es->predicate->remap_after_duplication
715 (possible_truths);
716 if (new_predicate == false && *es->predicate != false)
717 optimized_out_size += es->call_stmt_size * INLINE_SIZE_SCALE;
718 edge_set_predicate (edge, &new_predicate);
719 }
720
721 /* Remap indirect edge predicates with the same simplificaiton as above.
722 Also copy constantness arrays. */
723 for (edge = dst->indirect_calls; edge; edge = next)
724 {
725 predicate new_predicate;
726 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
727 next = edge->next_callee;
728
729 gcc_checking_assert (edge->inline_failed);
730 if (!es->predicate)
731 continue;
732 new_predicate = es->predicate->remap_after_duplication
733 (possible_truths);
734 if (new_predicate == false && *es->predicate != false)
735 optimized_out_size += es->call_stmt_size * INLINE_SIZE_SCALE;
736 edge_set_predicate (edge, &new_predicate);
737 }
738 remap_hint_predicate_after_duplication (&info->loop_iterations,
739 possible_truths);
740 remap_hint_predicate_after_duplication (&info->loop_stride,
741 possible_truths);
742 remap_hint_predicate_after_duplication (&info->array_index,
743 possible_truths);
744
745 /* If inliner or someone after inliner will ever start producing
746 non-trivial clones, we will get trouble with lack of information
747 about updating self sizes, because size vectors already contains
748 sizes of the calees. */
749 gcc_assert (!inlined_to_p || !optimized_out_size);
750 }
751 else
752 {
753 info->size_time_table = vec_safe_copy (info->size_time_table);
754 if (info->loop_iterations)
755 {
756 predicate p = *info->loop_iterations;
757 info->loop_iterations = NULL;
758 set_hint_predicate (&info->loop_iterations, p);
759 }
760 if (info->loop_stride)
761 {
762 predicate p = *info->loop_stride;
763 info->loop_stride = NULL;
764 set_hint_predicate (&info->loop_stride, p);
765 }
766 if (info->array_index)
767 {
768 predicate p = *info->array_index;
769 info->array_index = NULL;
770 set_hint_predicate (&info->array_index, p);
771 }
772 }
773 if (!dst->global.inlined_to)
774 inline_update_overall_summary (dst);
775 }
776
777
778 /* Hook that is called by cgraph.c when a node is duplicated. */
779
780 void
781 ipa_call_summary_t::duplicate (struct cgraph_edge *src,
782 struct cgraph_edge *dst,
783 struct ipa_call_summary *srcinfo,
784 struct ipa_call_summary *info)
785 {
786 *info = *srcinfo;
787 info->predicate = NULL;
788 edge_set_predicate (dst, srcinfo->predicate);
789 info->param = srcinfo->param.copy ();
790 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
791 {
792 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
793 - eni_size_weights.call_cost);
794 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
795 - eni_time_weights.call_cost);
796 }
797 }
798
799
800 /* Keep edge cache consistent across edge removal. */
801
802 void
803 ipa_call_summary_t::remove (struct cgraph_edge *edge,
804 struct ipa_call_summary *sum)
805 {
806 if (edge_growth_cache.exists ())
807 reset_edge_growth_cache (edge);
808 sum->reset ();
809 }
810
811
812 /* Initialize growth caches. */
813
814 void
815 initialize_growth_caches (void)
816 {
817 if (symtab->edges_max_uid)
818 edge_growth_cache.safe_grow_cleared (symtab->edges_max_uid);
819 }
820
821
822 /* Free growth caches. */
823
824 void
825 free_growth_caches (void)
826 {
827 edge_growth_cache.release ();
828 }
829
830
831 /* Dump edge summaries associated to NODE and recursively to all clones.
832 Indent by INDENT. */
833
834 static void
835 dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
836 struct inline_summary *info)
837 {
838 struct cgraph_edge *edge;
839 for (edge = node->callees; edge; edge = edge->next_callee)
840 {
841 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
842 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
843 int i;
844
845 fprintf (f,
846 "%*s%s/%i %s\n%*s loop depth:%2i freq:%4i size:%2i"
847 " time: %2i callee size:%2i stack:%2i",
848 indent, "", callee->name (), callee->order,
849 !edge->inline_failed
850 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
851 indent, "", es->loop_depth, edge->frequency,
852 es->call_stmt_size, es->call_stmt_time,
853 (int) inline_summaries->get (callee)->size / INLINE_SIZE_SCALE,
854 (int) inline_summaries->get (callee)->estimated_stack_size);
855
856 if (es->predicate)
857 {
858 fprintf (f, " predicate: ");
859 es->predicate->dump (f, info->conds);
860 }
861 else
862 fprintf (f, "\n");
863 if (es->param.exists ())
864 for (i = 0; i < (int) es->param.length (); i++)
865 {
866 int prob = es->param[i].change_prob;
867
868 if (!prob)
869 fprintf (f, "%*s op%i is compile time invariant\n",
870 indent + 2, "", i);
871 else if (prob != REG_BR_PROB_BASE)
872 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
873 prob * 100.0 / REG_BR_PROB_BASE);
874 }
875 if (!edge->inline_failed)
876 {
877 fprintf (f, "%*sStack frame offset %i, callee self size %i,"
878 " callee size %i\n",
879 indent + 2, "",
880 (int) inline_summaries->get (callee)->stack_frame_offset,
881 (int) inline_summaries->get (callee)->estimated_self_stack_size,
882 (int) inline_summaries->get (callee)->estimated_stack_size);
883 dump_ipa_call_summary (f, indent + 2, callee, info);
884 }
885 }
886 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
887 {
888 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
889 fprintf (f, "%*sindirect call loop depth:%2i freq:%4i size:%2i"
890 " time: %2i",
891 indent, "",
892 es->loop_depth,
893 edge->frequency, es->call_stmt_size, es->call_stmt_time);
894 if (es->predicate)
895 {
896 fprintf (f, "predicate: ");
897 es->predicate->dump (f, info->conds);
898 }
899 else
900 fprintf (f, "\n");
901 }
902 }
903
904
905 void
906 dump_inline_summary (FILE *f, struct cgraph_node *node)
907 {
908 if (node->definition)
909 {
910 struct inline_summary *s = inline_summaries->get (node);
911 size_time_entry *e;
912 int i;
913 fprintf (f, "Inline summary for %s/%i", node->name (),
914 node->order);
915 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
916 fprintf (f, " always_inline");
917 if (s->inlinable)
918 fprintf (f, " inlinable");
919 if (s->contains_cilk_spawn)
920 fprintf (f, " contains_cilk_spawn");
921 if (s->fp_expressions)
922 fprintf (f, " fp_expression");
923 fprintf (f, "\n self time: %f\n", s->self_time.to_double ());
924 fprintf (f, " global time: %f\n", s->time.to_double ());
925 fprintf (f, " self size: %i\n", s->self_size);
926 fprintf (f, " global size: %i\n", s->size);
927 fprintf (f, " min size: %i\n", s->min_size);
928 fprintf (f, " self stack: %i\n",
929 (int) s->estimated_self_stack_size);
930 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
931 if (s->growth)
932 fprintf (f, " estimated growth:%i\n", (int) s->growth);
933 if (s->scc_no)
934 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
935 for (i = 0; vec_safe_iterate (s->size_time_table, i, &e); i++)
936 {
937 fprintf (f, " size:%f, time:%f",
938 (double) e->size / INLINE_SIZE_SCALE,
939 e->time.to_double ());
940 if (e->exec_predicate != true)
941 {
942 fprintf (f, ", executed if:");
943 e->exec_predicate.dump (f, s->conds, 0);
944 }
945 if (e->exec_predicate != e->nonconst_predicate)
946 {
947 fprintf (f, ", nonconst if:");
948 e->nonconst_predicate.dump (f, s->conds, 0);
949 }
950 fprintf (f, "\n");
951 }
952 if (s->loop_iterations)
953 {
954 fprintf (f, " loop iterations:");
955 s->loop_iterations->dump (f, s->conds);
956 }
957 if (s->loop_stride)
958 {
959 fprintf (f, " loop stride:");
960 s->loop_stride->dump (f, s->conds);
961 }
962 if (s->array_index)
963 {
964 fprintf (f, " array index:");
965 s->array_index->dump (f, s->conds);
966 }
967 fprintf (f, " calls:\n");
968 dump_ipa_call_summary (f, 4, node, s);
969 fprintf (f, "\n");
970 }
971 }
972
973 DEBUG_FUNCTION void
974 debug_inline_summary (struct cgraph_node *node)
975 {
976 dump_inline_summary (stderr, node);
977 }
978
979 void
980 dump_inline_summaries (FILE *f)
981 {
982 struct cgraph_node *node;
983
984 FOR_EACH_DEFINED_FUNCTION (node)
985 if (!node->global.inlined_to)
986 dump_inline_summary (f, node);
987 }
988
989 /* Give initial reasons why inlining would fail on EDGE. This gets either
990 nullified or usually overwritten by more precise reasons later. */
991
992 void
993 initialize_inline_failed (struct cgraph_edge *e)
994 {
995 struct cgraph_node *callee = e->callee;
996
997 if (e->inline_failed && e->inline_failed != CIF_BODY_NOT_AVAILABLE
998 && cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
999 ;
1000 else if (e->indirect_unknown_callee)
1001 e->inline_failed = CIF_INDIRECT_UNKNOWN_CALL;
1002 else if (!callee->definition)
1003 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
1004 else if (callee->local.redefined_extern_inline)
1005 e->inline_failed = CIF_REDEFINED_EXTERN_INLINE;
1006 else
1007 e->inline_failed = CIF_FUNCTION_NOT_CONSIDERED;
1008 gcc_checking_assert (!e->call_stmt_cannot_inline_p
1009 || cgraph_inline_failed_type (e->inline_failed)
1010 == CIF_FINAL_ERROR);
1011 }
1012
1013 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
1014 boolean variable pointed to by DATA. */
1015
1016 static bool
1017 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
1018 void *data)
1019 {
1020 bool *b = (bool *) data;
1021 *b = true;
1022 return true;
1023 }
1024
1025 /* If OP refers to value of function parameter, return the corresponding
1026 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
1027 PARM_DECL) will be stored to *SIZE_P in that case too. */
1028
1029 static tree
1030 unmodified_parm_1 (gimple *stmt, tree op, HOST_WIDE_INT *size_p)
1031 {
1032 /* SSA_NAME referring to parm default def? */
1033 if (TREE_CODE (op) == SSA_NAME
1034 && SSA_NAME_IS_DEFAULT_DEF (op)
1035 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
1036 {
1037 if (size_p)
1038 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
1039 return SSA_NAME_VAR (op);
1040 }
1041 /* Non-SSA parm reference? */
1042 if (TREE_CODE (op) == PARM_DECL)
1043 {
1044 bool modified = false;
1045
1046 ao_ref refd;
1047 ao_ref_init (&refd, op);
1048 walk_aliased_vdefs (&refd, gimple_vuse (stmt), mark_modified, &modified,
1049 NULL);
1050 if (!modified)
1051 {
1052 if (size_p)
1053 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
1054 return op;
1055 }
1056 }
1057 return NULL_TREE;
1058 }
1059
1060 /* If OP refers to value of function parameter, return the corresponding
1061 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1062 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1063 stored to *SIZE_P in that case too. */
1064
1065 static tree
1066 unmodified_parm (gimple *stmt, tree op, HOST_WIDE_INT *size_p)
1067 {
1068 tree res = unmodified_parm_1 (stmt, op, size_p);
1069 if (res)
1070 return res;
1071
1072 if (TREE_CODE (op) == SSA_NAME
1073 && !SSA_NAME_IS_DEFAULT_DEF (op)
1074 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1075 return unmodified_parm (SSA_NAME_DEF_STMT (op),
1076 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
1077 size_p);
1078 return NULL_TREE;
1079 }
1080
1081 /* If OP refers to a value of a function parameter or value loaded from an
1082 aggregate passed to a parameter (either by value or reference), return TRUE
1083 and store the number of the parameter to *INDEX_P, the access size into
1084 *SIZE_P, and information whether and how it has been loaded from an
1085 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1086 statement in which OP is used or loaded. */
1087
1088 static bool
1089 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1090 gimple *stmt, tree op, int *index_p,
1091 HOST_WIDE_INT *size_p,
1092 struct agg_position_info *aggpos)
1093 {
1094 tree res = unmodified_parm_1 (stmt, op, size_p);
1095
1096 gcc_checking_assert (aggpos);
1097 if (res)
1098 {
1099 *index_p = ipa_get_param_decl_index (fbi->info, res);
1100 if (*index_p < 0)
1101 return false;
1102 aggpos->agg_contents = false;
1103 aggpos->by_ref = false;
1104 return true;
1105 }
1106
1107 if (TREE_CODE (op) == SSA_NAME)
1108 {
1109 if (SSA_NAME_IS_DEFAULT_DEF (op)
1110 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1111 return false;
1112 stmt = SSA_NAME_DEF_STMT (op);
1113 op = gimple_assign_rhs1 (stmt);
1114 if (!REFERENCE_CLASS_P (op))
1115 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1116 aggpos);
1117 }
1118
1119 aggpos->agg_contents = true;
1120 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1121 stmt, op, index_p, &aggpos->offset,
1122 size_p, &aggpos->by_ref);
1123 }
1124
1125 /* See if statement might disappear after inlining.
1126 0 - means not eliminated
1127 1 - half of statements goes away
1128 2 - for sure it is eliminated.
1129 We are not terribly sophisticated, basically looking for simple abstraction
1130 penalty wrappers. */
1131
1132 static int
1133 eliminated_by_inlining_prob (gimple *stmt)
1134 {
1135 enum gimple_code code = gimple_code (stmt);
1136 enum tree_code rhs_code;
1137
1138 if (!optimize)
1139 return 0;
1140
1141 switch (code)
1142 {
1143 case GIMPLE_RETURN:
1144 return 2;
1145 case GIMPLE_ASSIGN:
1146 if (gimple_num_ops (stmt) != 2)
1147 return 0;
1148
1149 rhs_code = gimple_assign_rhs_code (stmt);
1150
1151 /* Casts of parameters, loads from parameters passed by reference
1152 and stores to return value or parameters are often free after
1153 inlining dua to SRA and further combining.
1154 Assume that half of statements goes away. */
1155 if (CONVERT_EXPR_CODE_P (rhs_code)
1156 || rhs_code == VIEW_CONVERT_EXPR
1157 || rhs_code == ADDR_EXPR
1158 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1159 {
1160 tree rhs = gimple_assign_rhs1 (stmt);
1161 tree lhs = gimple_assign_lhs (stmt);
1162 tree inner_rhs = get_base_address (rhs);
1163 tree inner_lhs = get_base_address (lhs);
1164 bool rhs_free = false;
1165 bool lhs_free = false;
1166
1167 if (!inner_rhs)
1168 inner_rhs = rhs;
1169 if (!inner_lhs)
1170 inner_lhs = lhs;
1171
1172 /* Reads of parameter are expected to be free. */
1173 if (unmodified_parm (stmt, inner_rhs, NULL))
1174 rhs_free = true;
1175 /* Match expressions of form &this->field. Those will most likely
1176 combine with something upstream after inlining. */
1177 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1178 {
1179 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1180 if (TREE_CODE (op) == PARM_DECL)
1181 rhs_free = true;
1182 else if (TREE_CODE (op) == MEM_REF
1183 && unmodified_parm (stmt, TREE_OPERAND (op, 0), NULL))
1184 rhs_free = true;
1185 }
1186
1187 /* When parameter is not SSA register because its address is taken
1188 and it is just copied into one, the statement will be completely
1189 free after inlining (we will copy propagate backward). */
1190 if (rhs_free && is_gimple_reg (lhs))
1191 return 2;
1192
1193 /* Reads of parameters passed by reference
1194 expected to be free (i.e. optimized out after inlining). */
1195 if (TREE_CODE (inner_rhs) == MEM_REF
1196 && unmodified_parm (stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1197 rhs_free = true;
1198
1199 /* Copying parameter passed by reference into gimple register is
1200 probably also going to copy propagate, but we can't be quite
1201 sure. */
1202 if (rhs_free && is_gimple_reg (lhs))
1203 lhs_free = true;
1204
1205 /* Writes to parameters, parameters passed by value and return value
1206 (either dirrectly or passed via invisible reference) are free.
1207
1208 TODO: We ought to handle testcase like
1209 struct a {int a,b;};
1210 struct a
1211 retrurnsturct (void)
1212 {
1213 struct a a ={1,2};
1214 return a;
1215 }
1216
1217 This translate into:
1218
1219 retrurnsturct ()
1220 {
1221 int a$b;
1222 int a$a;
1223 struct a a;
1224 struct a D.2739;
1225
1226 <bb 2>:
1227 D.2739.a = 1;
1228 D.2739.b = 2;
1229 return D.2739;
1230
1231 }
1232 For that we either need to copy ipa-split logic detecting writes
1233 to return value. */
1234 if (TREE_CODE (inner_lhs) == PARM_DECL
1235 || TREE_CODE (inner_lhs) == RESULT_DECL
1236 || (TREE_CODE (inner_lhs) == MEM_REF
1237 && (unmodified_parm (stmt, TREE_OPERAND (inner_lhs, 0), NULL)
1238 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1239 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1240 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1241 (inner_lhs,
1242 0))) == RESULT_DECL))))
1243 lhs_free = true;
1244 if (lhs_free
1245 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1246 rhs_free = true;
1247 if (lhs_free && rhs_free)
1248 return 1;
1249 }
1250 return 0;
1251 default:
1252 return 0;
1253 }
1254 }
1255
1256
1257 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1258 predicates to the CFG edges. */
1259
1260 static void
1261 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1262 struct inline_summary *summary,
1263 basic_block bb)
1264 {
1265 gimple *last;
1266 tree op;
1267 int index;
1268 HOST_WIDE_INT size;
1269 struct agg_position_info aggpos;
1270 enum tree_code code, inverted_code;
1271 edge e;
1272 edge_iterator ei;
1273 gimple *set_stmt;
1274 tree op2;
1275
1276 last = last_stmt (bb);
1277 if (!last || gimple_code (last) != GIMPLE_COND)
1278 return;
1279 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1280 return;
1281 op = gimple_cond_lhs (last);
1282 /* TODO: handle conditionals like
1283 var = op0 < 4;
1284 if (var != 0). */
1285 if (unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
1286 {
1287 code = gimple_cond_code (last);
1288 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1289
1290 FOR_EACH_EDGE (e, ei, bb->succs)
1291 {
1292 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1293 ? code : inverted_code);
1294 /* invert_tree_comparison will return ERROR_MARK on FP
1295 comparsions that are not EQ/NE instead of returning proper
1296 unordered one. Be sure it is not confused with NON_CONSTANT. */
1297 if (this_code != ERROR_MARK)
1298 {
1299 predicate p
1300 = add_condition (summary, index, size, &aggpos, this_code,
1301 unshare_expr_without_location
1302 (gimple_cond_rhs (last)));
1303 e->aux = edge_predicate_pool.allocate ();
1304 *(predicate *) e->aux = p;
1305 }
1306 }
1307 }
1308
1309 if (TREE_CODE (op) != SSA_NAME)
1310 return;
1311 /* Special case
1312 if (builtin_constant_p (op))
1313 constant_code
1314 else
1315 nonconstant_code.
1316 Here we can predicate nonconstant_code. We can't
1317 really handle constant_code since we have no predicate
1318 for this and also the constant code is not known to be
1319 optimized away when inliner doen't see operand is constant.
1320 Other optimizers might think otherwise. */
1321 if (gimple_cond_code (last) != NE_EXPR
1322 || !integer_zerop (gimple_cond_rhs (last)))
1323 return;
1324 set_stmt = SSA_NAME_DEF_STMT (op);
1325 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1326 || gimple_call_num_args (set_stmt) != 1)
1327 return;
1328 op2 = gimple_call_arg (set_stmt, 0);
1329 if (!unmodified_parm_or_parm_agg_item (fbi, set_stmt, op2, &index, &size,
1330 &aggpos))
1331 return;
1332 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1333 {
1334 predicate p = add_condition (summary, index, size, &aggpos,
1335 predicate::is_not_constant, NULL_TREE);
1336 e->aux = edge_predicate_pool.allocate ();
1337 *(predicate *) e->aux = p;
1338 }
1339 }
1340
1341
1342 /* If BB ends by a switch we can turn into predicates, attach corresponding
1343 predicates to the CFG edges. */
1344
1345 static void
1346 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1347 struct inline_summary *summary,
1348 basic_block bb)
1349 {
1350 gimple *lastg;
1351 tree op;
1352 int index;
1353 HOST_WIDE_INT size;
1354 struct agg_position_info aggpos;
1355 edge e;
1356 edge_iterator ei;
1357 size_t n;
1358 size_t case_idx;
1359
1360 lastg = last_stmt (bb);
1361 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
1362 return;
1363 gswitch *last = as_a <gswitch *> (lastg);
1364 op = gimple_switch_index (last);
1365 if (!unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
1366 return;
1367
1368 FOR_EACH_EDGE (e, ei, bb->succs)
1369 {
1370 e->aux = edge_predicate_pool.allocate ();
1371 *(predicate *) e->aux = false;
1372 }
1373 n = gimple_switch_num_labels (last);
1374 for (case_idx = 0; case_idx < n; ++case_idx)
1375 {
1376 tree cl = gimple_switch_label (last, case_idx);
1377 tree min, max;
1378 predicate p;
1379
1380 e = find_edge (bb, label_to_block (CASE_LABEL (cl)));
1381 min = CASE_LOW (cl);
1382 max = CASE_HIGH (cl);
1383
1384 /* For default we might want to construct predicate that none
1385 of cases is met, but it is bit hard to do not having negations
1386 of conditionals handy. */
1387 if (!min && !max)
1388 p = true;
1389 else if (!max)
1390 p = add_condition (summary, index, size, &aggpos, EQ_EXPR,
1391 unshare_expr_without_location (min));
1392 else
1393 {
1394 predicate p1, p2;
1395 p1 = add_condition (summary, index, size, &aggpos, GE_EXPR,
1396 unshare_expr_without_location (min));
1397 p2 = add_condition (summary, index, size, &aggpos, LE_EXPR,
1398 unshare_expr_without_location (max));
1399 p = p1 & p2;
1400 }
1401 *(struct predicate *) e->aux
1402 = p.or_with (summary->conds, *(struct predicate *) e->aux);
1403 }
1404 }
1405
1406
1407 /* For each BB in NODE attach to its AUX pointer predicate under
1408 which it is executable. */
1409
1410 static void
1411 compute_bb_predicates (struct ipa_func_body_info *fbi,
1412 struct cgraph_node *node,
1413 struct inline_summary *summary)
1414 {
1415 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1416 bool done = false;
1417 basic_block bb;
1418
1419 FOR_EACH_BB_FN (bb, my_function)
1420 {
1421 set_cond_stmt_execution_predicate (fbi, summary, bb);
1422 set_switch_stmt_execution_predicate (fbi, summary, bb);
1423 }
1424
1425 /* Entry block is always executable. */
1426 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1427 = edge_predicate_pool.allocate ();
1428 *(predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
1429
1430 /* A simple dataflow propagation of predicates forward in the CFG.
1431 TODO: work in reverse postorder. */
1432 while (!done)
1433 {
1434 done = true;
1435 FOR_EACH_BB_FN (bb, my_function)
1436 {
1437 predicate p = false;
1438 edge e;
1439 edge_iterator ei;
1440 FOR_EACH_EDGE (e, ei, bb->preds)
1441 {
1442 if (e->src->aux)
1443 {
1444 predicate this_bb_predicate
1445 = *(predicate *) e->src->aux;
1446 if (e->aux)
1447 this_bb_predicate &= (*(struct predicate *) e->aux);
1448 p = p.or_with (summary->conds, this_bb_predicate);
1449 if (p == true)
1450 break;
1451 }
1452 }
1453 if (p == false)
1454 gcc_checking_assert (!bb->aux);
1455 else
1456 {
1457 if (!bb->aux)
1458 {
1459 done = false;
1460 bb->aux = edge_predicate_pool.allocate ();
1461 *((predicate *) bb->aux) = p;
1462 }
1463 else if (p != *(predicate *) bb->aux)
1464 {
1465 /* This OR operation is needed to ensure monotonous data flow
1466 in the case we hit the limit on number of clauses and the
1467 and/or operations above give approximate answers. */
1468 p = p.or_with (summary->conds, *(predicate *)bb->aux);
1469 if (p != *(predicate *) bb->aux)
1470 {
1471 done = false;
1472 *((predicate *) bb->aux) = p;
1473 }
1474 }
1475 }
1476 }
1477 }
1478 }
1479
1480
1481 /* We keep info about constantness of SSA names. */
1482
1483 typedef predicate predicate_t;
1484 /* Return predicate specifying when the STMT might have result that is not
1485 a compile time constant. */
1486
1487 static predicate
1488 will_be_nonconstant_expr_predicate (struct ipa_node_params *info,
1489 struct inline_summary *summary,
1490 tree expr,
1491 vec<predicate_t> nonconstant_names)
1492 {
1493 tree parm;
1494 int index;
1495 HOST_WIDE_INT size;
1496
1497 while (UNARY_CLASS_P (expr))
1498 expr = TREE_OPERAND (expr, 0);
1499
1500 parm = unmodified_parm (NULL, expr, &size);
1501 if (parm && (index = ipa_get_param_decl_index (info, parm)) >= 0)
1502 return add_condition (summary, index, size, NULL, predicate::changed,
1503 NULL_TREE);
1504 if (is_gimple_min_invariant (expr))
1505 return false;
1506 if (TREE_CODE (expr) == SSA_NAME)
1507 return nonconstant_names[SSA_NAME_VERSION (expr)];
1508 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
1509 {
1510 predicate p1 = will_be_nonconstant_expr_predicate
1511 (info, summary, TREE_OPERAND (expr, 0),
1512 nonconstant_names);
1513 if (p1 == true)
1514 return p1;
1515
1516 predicate p2;
1517 p2 = will_be_nonconstant_expr_predicate (info, summary,
1518 TREE_OPERAND (expr, 1),
1519 nonconstant_names);
1520 return p1.or_with (summary->conds, p2);
1521 }
1522 else if (TREE_CODE (expr) == COND_EXPR)
1523 {
1524 predicate p1 = will_be_nonconstant_expr_predicate
1525 (info, summary, TREE_OPERAND (expr, 0),
1526 nonconstant_names);
1527 if (p1 == true)
1528 return p1;
1529
1530 predicate p2;
1531 p2 = will_be_nonconstant_expr_predicate (info, summary,
1532 TREE_OPERAND (expr, 1),
1533 nonconstant_names);
1534 if (p2 == true)
1535 return p2;
1536 p1 = p1.or_with (summary->conds, p2);
1537 p2 = will_be_nonconstant_expr_predicate (info, summary,
1538 TREE_OPERAND (expr, 2),
1539 nonconstant_names);
1540 return p2.or_with (summary->conds, p1);
1541 }
1542 else
1543 {
1544 debug_tree (expr);
1545 gcc_unreachable ();
1546 }
1547 return false;
1548 }
1549
1550
1551 /* Return predicate specifying when the STMT might have result that is not
1552 a compile time constant. */
1553
1554 static predicate
1555 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
1556 struct inline_summary *summary,
1557 gimple *stmt,
1558 vec<predicate_t> nonconstant_names)
1559 {
1560 predicate p = true;
1561 ssa_op_iter iter;
1562 tree use;
1563 predicate op_non_const;
1564 bool is_load;
1565 int base_index;
1566 HOST_WIDE_INT size;
1567 struct agg_position_info aggpos;
1568
1569 /* What statments might be optimized away
1570 when their arguments are constant. */
1571 if (gimple_code (stmt) != GIMPLE_ASSIGN
1572 && gimple_code (stmt) != GIMPLE_COND
1573 && gimple_code (stmt) != GIMPLE_SWITCH
1574 && (gimple_code (stmt) != GIMPLE_CALL
1575 || !(gimple_call_flags (stmt) & ECF_CONST)))
1576 return p;
1577
1578 /* Stores will stay anyway. */
1579 if (gimple_store_p (stmt))
1580 return p;
1581
1582 is_load = gimple_assign_load_p (stmt);
1583
1584 /* Loads can be optimized when the value is known. */
1585 if (is_load)
1586 {
1587 tree op;
1588 gcc_assert (gimple_assign_single_p (stmt));
1589 op = gimple_assign_rhs1 (stmt);
1590 if (!unmodified_parm_or_parm_agg_item (fbi, stmt, op, &base_index, &size,
1591 &aggpos))
1592 return p;
1593 }
1594 else
1595 base_index = -1;
1596
1597 /* See if we understand all operands before we start
1598 adding conditionals. */
1599 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1600 {
1601 tree parm = unmodified_parm (stmt, use, NULL);
1602 /* For arguments we can build a condition. */
1603 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
1604 continue;
1605 if (TREE_CODE (use) != SSA_NAME)
1606 return p;
1607 /* If we know when operand is constant,
1608 we still can say something useful. */
1609 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
1610 continue;
1611 return p;
1612 }
1613
1614 if (is_load)
1615 op_non_const =
1616 add_condition (summary, base_index, size, &aggpos, predicate::changed,
1617 NULL);
1618 else
1619 op_non_const = false;
1620 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1621 {
1622 HOST_WIDE_INT size;
1623 tree parm = unmodified_parm (stmt, use, &size);
1624 int index;
1625
1626 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1627 {
1628 if (index != base_index)
1629 p = add_condition (summary, index, size, NULL, predicate::changed,
1630 NULL_TREE);
1631 else
1632 continue;
1633 }
1634 else
1635 p = nonconstant_names[SSA_NAME_VERSION (use)];
1636 op_non_const = p.or_with (summary->conds, op_non_const);
1637 }
1638 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
1639 && gimple_op (stmt, 0)
1640 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1641 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
1642 = op_non_const;
1643 return op_non_const;
1644 }
1645
1646 struct record_modified_bb_info
1647 {
1648 bitmap bb_set;
1649 gimple *stmt;
1650 };
1651
1652 /* Value is initialized in INIT_BB and used in USE_BB. We want to copute
1653 probability how often it changes between USE_BB.
1654 INIT_BB->frequency/USE_BB->frequency is an estimate, but if INIT_BB
1655 is in different loop nest, we can do better.
1656 This is all just estimate. In theory we look for minimal cut separating
1657 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
1658 anyway. */
1659
1660 static basic_block
1661 get_minimal_bb (basic_block init_bb, basic_block use_bb)
1662 {
1663 struct loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
1664 if (l && l->header->frequency < init_bb->frequency)
1665 return l->header;
1666 return init_bb;
1667 }
1668
1669 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
1670 set except for info->stmt. */
1671
1672 static bool
1673 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
1674 {
1675 struct record_modified_bb_info *info =
1676 (struct record_modified_bb_info *) data;
1677 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
1678 return false;
1679 bitmap_set_bit (info->bb_set,
1680 SSA_NAME_IS_DEFAULT_DEF (vdef)
1681 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
1682 : get_minimal_bb
1683 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
1684 gimple_bb (info->stmt))->index);
1685 return false;
1686 }
1687
1688 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
1689 will change since last invocation of STMT.
1690
1691 Value 0 is reserved for compile time invariants.
1692 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
1693 ought to be REG_BR_PROB_BASE / estimated_iters. */
1694
1695 static int
1696 param_change_prob (gimple *stmt, int i)
1697 {
1698 tree op = gimple_call_arg (stmt, i);
1699 basic_block bb = gimple_bb (stmt);
1700
1701 if (TREE_CODE (op) == WITH_SIZE_EXPR)
1702 op = TREE_OPERAND (op, 0);
1703
1704 tree base = get_base_address (op);
1705
1706 /* Global invariants never change. */
1707 if (is_gimple_min_invariant (base))
1708 return 0;
1709
1710 /* We would have to do non-trivial analysis to really work out what
1711 is the probability of value to change (i.e. when init statement
1712 is in a sibling loop of the call).
1713
1714 We do an conservative estimate: when call is executed N times more often
1715 than the statement defining value, we take the frequency 1/N. */
1716 if (TREE_CODE (base) == SSA_NAME)
1717 {
1718 int init_freq;
1719
1720 if (!bb->frequency)
1721 return REG_BR_PROB_BASE;
1722
1723 if (SSA_NAME_IS_DEFAULT_DEF (base))
1724 init_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency;
1725 else
1726 init_freq = get_minimal_bb
1727 (gimple_bb (SSA_NAME_DEF_STMT (base)),
1728 gimple_bb (stmt))->frequency;
1729
1730 if (!init_freq)
1731 init_freq = 1;
1732 if (init_freq < bb->frequency)
1733 return MAX (GCOV_COMPUTE_SCALE (init_freq, bb->frequency), 1);
1734 else
1735 return REG_BR_PROB_BASE;
1736 }
1737 else
1738 {
1739 ao_ref refd;
1740 int max;
1741 struct record_modified_bb_info info;
1742 bitmap_iterator bi;
1743 unsigned index;
1744 tree init = ctor_for_folding (base);
1745
1746 if (init != error_mark_node)
1747 return 0;
1748 if (!bb->frequency)
1749 return REG_BR_PROB_BASE;
1750 ao_ref_init (&refd, op);
1751 info.stmt = stmt;
1752 info.bb_set = BITMAP_ALLOC (NULL);
1753 walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
1754 NULL);
1755 if (bitmap_bit_p (info.bb_set, bb->index))
1756 {
1757 BITMAP_FREE (info.bb_set);
1758 return REG_BR_PROB_BASE;
1759 }
1760
1761 /* Assume that every memory is initialized at entry.
1762 TODO: Can we easilly determine if value is always defined
1763 and thus we may skip entry block? */
1764 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency)
1765 max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency;
1766 else
1767 max = 1;
1768
1769 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
1770 max = MIN (max, BASIC_BLOCK_FOR_FN (cfun, index)->frequency);
1771
1772 BITMAP_FREE (info.bb_set);
1773 if (max < bb->frequency)
1774 return MAX (GCOV_COMPUTE_SCALE (max, bb->frequency), 1);
1775 else
1776 return REG_BR_PROB_BASE;
1777 }
1778 }
1779
1780 /* Find whether a basic block BB is the final block of a (half) diamond CFG
1781 sub-graph and if the predicate the condition depends on is known. If so,
1782 return true and store the pointer the predicate in *P. */
1783
1784 static bool
1785 phi_result_unknown_predicate (struct ipa_node_params *info,
1786 inline_summary *summary, basic_block bb,
1787 predicate *p,
1788 vec<predicate_t> nonconstant_names)
1789 {
1790 edge e;
1791 edge_iterator ei;
1792 basic_block first_bb = NULL;
1793 gimple *stmt;
1794
1795 if (single_pred_p (bb))
1796 {
1797 *p = false;
1798 return true;
1799 }
1800
1801 FOR_EACH_EDGE (e, ei, bb->preds)
1802 {
1803 if (single_succ_p (e->src))
1804 {
1805 if (!single_pred_p (e->src))
1806 return false;
1807 if (!first_bb)
1808 first_bb = single_pred (e->src);
1809 else if (single_pred (e->src) != first_bb)
1810 return false;
1811 }
1812 else
1813 {
1814 if (!first_bb)
1815 first_bb = e->src;
1816 else if (e->src != first_bb)
1817 return false;
1818 }
1819 }
1820
1821 if (!first_bb)
1822 return false;
1823
1824 stmt = last_stmt (first_bb);
1825 if (!stmt
1826 || gimple_code (stmt) != GIMPLE_COND
1827 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
1828 return false;
1829
1830 *p = will_be_nonconstant_expr_predicate (info, summary,
1831 gimple_cond_lhs (stmt),
1832 nonconstant_names);
1833 if (*p == true)
1834 return false;
1835 else
1836 return true;
1837 }
1838
1839 /* Given a PHI statement in a function described by inline properties SUMMARY
1840 and *P being the predicate describing whether the selected PHI argument is
1841 known, store a predicate for the result of the PHI statement into
1842 NONCONSTANT_NAMES, if possible. */
1843
1844 static void
1845 predicate_for_phi_result (struct inline_summary *summary, gphi *phi,
1846 predicate *p,
1847 vec<predicate_t> nonconstant_names)
1848 {
1849 unsigned i;
1850
1851 for (i = 0; i < gimple_phi_num_args (phi); i++)
1852 {
1853 tree arg = gimple_phi_arg (phi, i)->def;
1854 if (!is_gimple_min_invariant (arg))
1855 {
1856 gcc_assert (TREE_CODE (arg) == SSA_NAME);
1857 *p = p->or_with (summary->conds,
1858 nonconstant_names[SSA_NAME_VERSION (arg)]);
1859 if (*p == true)
1860 return;
1861 }
1862 }
1863
1864 if (dump_file && (dump_flags & TDF_DETAILS))
1865 {
1866 fprintf (dump_file, "\t\tphi predicate: ");
1867 p->dump (dump_file, summary->conds);
1868 }
1869 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
1870 }
1871
1872 /* Return predicate specifying when array index in access OP becomes non-constant. */
1873
1874 static predicate
1875 array_index_predicate (inline_summary *info,
1876 vec< predicate_t> nonconstant_names, tree op)
1877 {
1878 predicate p = false;
1879 while (handled_component_p (op))
1880 {
1881 if (TREE_CODE (op) == ARRAY_REF || TREE_CODE (op) == ARRAY_RANGE_REF)
1882 {
1883 if (TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
1884 p = p.or_with (info->conds,
1885 nonconstant_names[SSA_NAME_VERSION
1886 (TREE_OPERAND (op, 1))]);
1887 }
1888 op = TREE_OPERAND (op, 0);
1889 }
1890 return p;
1891 }
1892
1893 /* For a typical usage of __builtin_expect (a<b, 1), we
1894 may introduce an extra relation stmt:
1895 With the builtin, we have
1896 t1 = a <= b;
1897 t2 = (long int) t1;
1898 t3 = __builtin_expect (t2, 1);
1899 if (t3 != 0)
1900 goto ...
1901 Without the builtin, we have
1902 if (a<=b)
1903 goto...
1904 This affects the size/time estimation and may have
1905 an impact on the earlier inlining.
1906 Here find this pattern and fix it up later. */
1907
1908 static gimple *
1909 find_foldable_builtin_expect (basic_block bb)
1910 {
1911 gimple_stmt_iterator bsi;
1912
1913 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1914 {
1915 gimple *stmt = gsi_stmt (bsi);
1916 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
1917 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
1918 {
1919 tree var = gimple_call_lhs (stmt);
1920 tree arg = gimple_call_arg (stmt, 0);
1921 use_operand_p use_p;
1922 gimple *use_stmt;
1923 bool match = false;
1924 bool done = false;
1925
1926 if (!var || !arg)
1927 continue;
1928 gcc_assert (TREE_CODE (var) == SSA_NAME);
1929
1930 while (TREE_CODE (arg) == SSA_NAME)
1931 {
1932 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
1933 if (!is_gimple_assign (stmt_tmp))
1934 break;
1935 switch (gimple_assign_rhs_code (stmt_tmp))
1936 {
1937 case LT_EXPR:
1938 case LE_EXPR:
1939 case GT_EXPR:
1940 case GE_EXPR:
1941 case EQ_EXPR:
1942 case NE_EXPR:
1943 match = true;
1944 done = true;
1945 break;
1946 CASE_CONVERT:
1947 break;
1948 default:
1949 done = true;
1950 break;
1951 }
1952 if (done)
1953 break;
1954 arg = gimple_assign_rhs1 (stmt_tmp);
1955 }
1956
1957 if (match && single_imm_use (var, &use_p, &use_stmt)
1958 && gimple_code (use_stmt) == GIMPLE_COND)
1959 return use_stmt;
1960 }
1961 }
1962 return NULL;
1963 }
1964
1965 /* Return true when the basic blocks contains only clobbers followed by RESX.
1966 Such BBs are kept around to make removal of dead stores possible with
1967 presence of EH and will be optimized out by optimize_clobbers later in the
1968 game.
1969
1970 NEED_EH is used to recurse in case the clobber has non-EH predecestors
1971 that can be clobber only, too.. When it is false, the RESX is not necessary
1972 on the end of basic block. */
1973
1974 static bool
1975 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
1976 {
1977 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1978 edge_iterator ei;
1979 edge e;
1980
1981 if (need_eh)
1982 {
1983 if (gsi_end_p (gsi))
1984 return false;
1985 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
1986 return false;
1987 gsi_prev (&gsi);
1988 }
1989 else if (!single_succ_p (bb))
1990 return false;
1991
1992 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
1993 {
1994 gimple *stmt = gsi_stmt (gsi);
1995 if (is_gimple_debug (stmt))
1996 continue;
1997 if (gimple_clobber_p (stmt))
1998 continue;
1999 if (gimple_code (stmt) == GIMPLE_LABEL)
2000 break;
2001 return false;
2002 }
2003
2004 /* See if all predecestors are either throws or clobber only BBs. */
2005 FOR_EACH_EDGE (e, ei, bb->preds)
2006 if (!(e->flags & EDGE_EH)
2007 && !clobber_only_eh_bb_p (e->src, false))
2008 return false;
2009
2010 return true;
2011 }
2012
2013 /* Return true if STMT compute a floating point expression that may be affected
2014 by -ffast-math and similar flags. */
2015
2016 static bool
2017 fp_expression_p (gimple *stmt)
2018 {
2019 ssa_op_iter i;
2020 tree op;
2021
2022 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
2023 if (FLOAT_TYPE_P (TREE_TYPE (op)))
2024 return true;
2025 return false;
2026 }
2027
2028 /* Compute function body size parameters for NODE.
2029 When EARLY is true, we compute only simple summaries without
2030 non-trivial predicates to drive the early inliner. */
2031
2032 static void
2033 estimate_function_body_sizes (struct cgraph_node *node, bool early)
2034 {
2035 sreal time = 0;
2036 /* Estimate static overhead for function prologue/epilogue and alignment. */
2037 int size = 2;
2038 /* Benefits are scaled by probability of elimination that is in range
2039 <0,2>. */
2040 basic_block bb;
2041 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
2042 int freq;
2043 struct inline_summary *info = inline_summaries->get (node);
2044 predicate bb_predicate;
2045 struct ipa_func_body_info fbi;
2046 vec<predicate_t> nonconstant_names = vNULL;
2047 int nblocks, n;
2048 int *order;
2049 predicate array_index = true;
2050 gimple *fix_builtin_expect_stmt;
2051
2052 gcc_assert (my_function && my_function->cfg);
2053 gcc_assert (cfun == my_function);
2054
2055 memset(&fbi, 0, sizeof(fbi));
2056 info->conds = NULL;
2057 info->size_time_table = NULL;
2058
2059 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2060 so we can produce proper inline hints.
2061
2062 When optimizing and analyzing for early inliner, initialize node params
2063 so we can produce correct BB predicates. */
2064
2065 if (opt_for_fn (node->decl, optimize))
2066 {
2067 calculate_dominance_info (CDI_DOMINATORS);
2068 if (!early)
2069 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2070 else
2071 {
2072 ipa_check_create_node_params ();
2073 ipa_initialize_node_params (node);
2074 }
2075
2076 if (ipa_node_params_sum)
2077 {
2078 fbi.node = node;
2079 fbi.info = IPA_NODE_REF (node);
2080 fbi.bb_infos = vNULL;
2081 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
2082 fbi.param_count = count_formal_params(node->decl);
2083 nonconstant_names.safe_grow_cleared
2084 (SSANAMES (my_function)->length ());
2085 }
2086 }
2087
2088 if (dump_file)
2089 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2090 node->name ());
2091
2092 /* When we run into maximal number of entries, we assign everything to the
2093 constant truth case. Be sure to have it in list. */
2094 bb_predicate = true;
2095 info->account_size_time (0, 0, bb_predicate, bb_predicate);
2096
2097 bb_predicate = predicate::not_inlined ();
2098 info->account_size_time (2 * INLINE_SIZE_SCALE, 0, bb_predicate,
2099 bb_predicate);
2100
2101 if (fbi.info)
2102 compute_bb_predicates (&fbi, node, info);
2103 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2104 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2105 for (n = 0; n < nblocks; n++)
2106 {
2107 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2108 freq = compute_call_stmt_bb_frequency (node->decl, bb);
2109 if (clobber_only_eh_bb_p (bb))
2110 {
2111 if (dump_file && (dump_flags & TDF_DETAILS))
2112 fprintf (dump_file, "\n Ignoring BB %i;"
2113 " it will be optimized away by cleanup_clobbers\n",
2114 bb->index);
2115 continue;
2116 }
2117
2118 /* TODO: Obviously predicates can be propagated down across CFG. */
2119 if (fbi.info)
2120 {
2121 if (bb->aux)
2122 bb_predicate = *(predicate *) bb->aux;
2123 else
2124 bb_predicate = false;
2125 }
2126 else
2127 bb_predicate = true;
2128
2129 if (dump_file && (dump_flags & TDF_DETAILS))
2130 {
2131 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2132 bb_predicate.dump (dump_file, info->conds);
2133 }
2134
2135 if (fbi.info && nonconstant_names.exists ())
2136 {
2137 predicate phi_predicate;
2138 bool first_phi = true;
2139
2140 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2141 gsi_next (&bsi))
2142 {
2143 if (first_phi
2144 && !phi_result_unknown_predicate (fbi.info, info, bb,
2145 &phi_predicate,
2146 nonconstant_names))
2147 break;
2148 first_phi = false;
2149 if (dump_file && (dump_flags & TDF_DETAILS))
2150 {
2151 fprintf (dump_file, " ");
2152 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
2153 }
2154 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2155 nonconstant_names);
2156 }
2157 }
2158
2159 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2160
2161 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
2162 gsi_next (&bsi))
2163 {
2164 gimple *stmt = gsi_stmt (bsi);
2165 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2166 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2167 int prob;
2168 predicate will_be_nonconstant;
2169
2170 /* This relation stmt should be folded after we remove
2171 buildin_expect call. Adjust the cost here. */
2172 if (stmt == fix_builtin_expect_stmt)
2173 {
2174 this_size--;
2175 this_time--;
2176 }
2177
2178 if (dump_file && (dump_flags & TDF_DETAILS))
2179 {
2180 fprintf (dump_file, " ");
2181 print_gimple_stmt (dump_file, stmt, 0);
2182 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2183 ((double) freq) / CGRAPH_FREQ_BASE, this_size,
2184 this_time);
2185 }
2186
2187 if (gimple_assign_load_p (stmt) && nonconstant_names.exists ())
2188 {
2189 predicate this_array_index;
2190 this_array_index =
2191 array_index_predicate (info, nonconstant_names,
2192 gimple_assign_rhs1 (stmt));
2193 if (this_array_index != false)
2194 array_index &= this_array_index;
2195 }
2196 if (gimple_store_p (stmt) && nonconstant_names.exists ())
2197 {
2198 predicate this_array_index;
2199 this_array_index =
2200 array_index_predicate (info, nonconstant_names,
2201 gimple_get_lhs (stmt));
2202 if (this_array_index != false)
2203 array_index &= this_array_index;
2204 }
2205
2206
2207 if (is_gimple_call (stmt)
2208 && !gimple_call_internal_p (stmt))
2209 {
2210 struct cgraph_edge *edge = node->get_edge (stmt);
2211 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
2212
2213 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2214 resolved as constant. We however don't want to optimize
2215 out the cgraph edges. */
2216 if (nonconstant_names.exists ()
2217 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2218 && gimple_call_lhs (stmt)
2219 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2220 {
2221 predicate false_p = false;
2222 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2223 = false_p;
2224 }
2225 if (ipa_node_params_sum)
2226 {
2227 int count = gimple_call_num_args (stmt);
2228 int i;
2229
2230 if (count)
2231 es->param.safe_grow_cleared (count);
2232 for (i = 0; i < count; i++)
2233 {
2234 int prob = param_change_prob (stmt, i);
2235 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2236 es->param[i].change_prob = prob;
2237 }
2238 }
2239
2240 es->call_stmt_size = this_size;
2241 es->call_stmt_time = this_time;
2242 es->loop_depth = bb_loop_depth (bb);
2243 edge_set_predicate (edge, &bb_predicate);
2244 }
2245
2246 /* TODO: When conditional jump or swithc is known to be constant, but
2247 we did not translate it into the predicates, we really can account
2248 just maximum of the possible paths. */
2249 if (fbi.info)
2250 will_be_nonconstant
2251 = will_be_nonconstant_predicate (&fbi, info,
2252 stmt, nonconstant_names);
2253 else
2254 will_be_nonconstant = true;
2255 if (this_time || this_size)
2256 {
2257 this_time *= freq;
2258
2259 prob = eliminated_by_inlining_prob (stmt);
2260 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2261 fprintf (dump_file,
2262 "\t\t50%% will be eliminated by inlining\n");
2263 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2264 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2265
2266 struct predicate p = bb_predicate & will_be_nonconstant;
2267
2268 /* We can ignore statement when we proved it is never going
2269 to happen, but we can not do that for call statements
2270 because edges are accounted specially. */
2271
2272 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
2273 {
2274 time += this_time;
2275 size += this_size;
2276 }
2277
2278 /* We account everything but the calls. Calls have their own
2279 size/time info attached to cgraph edges. This is necessary
2280 in order to make the cost disappear after inlining. */
2281 if (!is_gimple_call (stmt))
2282 {
2283 if (prob)
2284 {
2285 predicate ip = bb_predicate & predicate::not_inlined ();
2286 info->account_size_time (this_size * prob,
2287 (sreal)(this_time * prob)
2288 / (CGRAPH_FREQ_BASE * 2), ip,
2289 p);
2290 }
2291 if (prob != 2)
2292 info->account_size_time (this_size * (2 - prob),
2293 (sreal)(this_time * (2 - prob))
2294 / (CGRAPH_FREQ_BASE * 2),
2295 bb_predicate,
2296 p);
2297 }
2298
2299 if (!info->fp_expressions && fp_expression_p (stmt))
2300 {
2301 info->fp_expressions = true;
2302 if (dump_file)
2303 fprintf (dump_file, " fp_expression set\n");
2304 }
2305
2306 gcc_assert (time >= 0);
2307 gcc_assert (size >= 0);
2308 }
2309 }
2310 }
2311 set_hint_predicate (&inline_summaries->get (node)->array_index, array_index);
2312 time = time / CGRAPH_FREQ_BASE;
2313 free (order);
2314
2315 if (nonconstant_names.exists () && !early)
2316 {
2317 struct loop *loop;
2318 predicate loop_iterations = true;
2319 predicate loop_stride = true;
2320
2321 if (dump_file && (dump_flags & TDF_DETAILS))
2322 flow_loops_dump (dump_file, NULL, 0);
2323 scev_initialize ();
2324 FOR_EACH_LOOP (loop, 0)
2325 {
2326 vec<edge> exits;
2327 edge ex;
2328 unsigned int j;
2329 struct tree_niter_desc niter_desc;
2330 bb_predicate = *(predicate *) loop->header->aux;
2331
2332 exits = get_loop_exit_edges (loop);
2333 FOR_EACH_VEC_ELT (exits, j, ex)
2334 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
2335 && !is_gimple_min_invariant (niter_desc.niter))
2336 {
2337 predicate will_be_nonconstant
2338 = will_be_nonconstant_expr_predicate (fbi.info, info,
2339 niter_desc.niter,
2340 nonconstant_names);
2341 if (will_be_nonconstant != true)
2342 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2343 if (will_be_nonconstant != true
2344 && will_be_nonconstant != false)
2345 /* This is slightly inprecise. We may want to represent each
2346 loop with independent predicate. */
2347 loop_iterations &= will_be_nonconstant;
2348 }
2349 exits.release ();
2350 }
2351
2352 /* To avoid quadratic behavior we analyze stride predicates only
2353 with respect to the containing loop. Thus we simply iterate
2354 over all defs in the outermost loop body. */
2355 for (loop = loops_for_fn (cfun)->tree_root->inner;
2356 loop != NULL; loop = loop->next)
2357 {
2358 basic_block *body = get_loop_body (loop);
2359 for (unsigned i = 0; i < loop->num_nodes; i++)
2360 {
2361 gimple_stmt_iterator gsi;
2362 bb_predicate = *(predicate *) body[i]->aux;
2363 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
2364 gsi_next (&gsi))
2365 {
2366 gimple *stmt = gsi_stmt (gsi);
2367
2368 if (!is_gimple_assign (stmt))
2369 continue;
2370
2371 tree def = gimple_assign_lhs (stmt);
2372 if (TREE_CODE (def) != SSA_NAME)
2373 continue;
2374
2375 affine_iv iv;
2376 if (!simple_iv (loop_containing_stmt (stmt),
2377 loop_containing_stmt (stmt),
2378 def, &iv, true)
2379 || is_gimple_min_invariant (iv.step))
2380 continue;
2381
2382 predicate will_be_nonconstant
2383 = will_be_nonconstant_expr_predicate (fbi.info, info,
2384 iv.step,
2385 nonconstant_names);
2386 if (will_be_nonconstant != true)
2387 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2388 if (will_be_nonconstant != true
2389 && will_be_nonconstant != false)
2390 /* This is slightly inprecise. We may want to represent
2391 each loop with independent predicate. */
2392 loop_stride = loop_stride & will_be_nonconstant;
2393 }
2394 }
2395 free (body);
2396 }
2397 set_hint_predicate (&inline_summaries->get (node)->loop_iterations,
2398 loop_iterations);
2399 set_hint_predicate (&inline_summaries->get (node)->loop_stride,
2400 loop_stride);
2401 scev_finalize ();
2402 }
2403 FOR_ALL_BB_FN (bb, my_function)
2404 {
2405 edge e;
2406 edge_iterator ei;
2407
2408 if (bb->aux)
2409 edge_predicate_pool.remove ((predicate *)bb->aux);
2410 bb->aux = NULL;
2411 FOR_EACH_EDGE (e, ei, bb->succs)
2412 {
2413 if (e->aux)
2414 edge_predicate_pool.remove ((predicate *) e->aux);
2415 e->aux = NULL;
2416 }
2417 }
2418 inline_summaries->get (node)->self_time = time;
2419 inline_summaries->get (node)->self_size = size;
2420 nonconstant_names.release ();
2421 ipa_release_body_info (&fbi);
2422 if (opt_for_fn (node->decl, optimize))
2423 {
2424 if (!early)
2425 loop_optimizer_finalize ();
2426 else if (!ipa_edge_args_sum)
2427 ipa_free_all_node_params ();
2428 free_dominance_info (CDI_DOMINATORS);
2429 }
2430 if (dump_file)
2431 {
2432 fprintf (dump_file, "\n");
2433 dump_inline_summary (dump_file, node);
2434 }
2435 }
2436
2437
2438 /* Compute parameters of functions used by inliner.
2439 EARLY is true when we compute parameters for the early inliner */
2440
2441 void
2442 compute_inline_parameters (struct cgraph_node *node, bool early)
2443 {
2444 HOST_WIDE_INT self_stack_size;
2445 struct cgraph_edge *e;
2446 struct inline_summary *info;
2447
2448 gcc_assert (!node->global.inlined_to);
2449
2450 inline_summary_alloc ();
2451
2452 info = inline_summaries->get (node);
2453 info->reset (node);
2454
2455 /* Estimate the stack size for the function if we're optimizing. */
2456 self_stack_size = optimize && !node->thunk.thunk_p
2457 ? estimated_stack_frame_size (node) : 0;
2458 info->estimated_self_stack_size = self_stack_size;
2459 info->estimated_stack_size = self_stack_size;
2460 info->stack_frame_offset = 0;
2461
2462 if (node->thunk.thunk_p)
2463 {
2464 struct ipa_call_summary *es = ipa_call_summaries->get (node->callees);
2465 predicate t = true;
2466
2467 node->local.can_change_signature = false;
2468 es->call_stmt_size = eni_size_weights.call_cost;
2469 es->call_stmt_time = eni_time_weights.call_cost;
2470 info->account_size_time (INLINE_SIZE_SCALE * 2, 2, t, t);
2471 t = predicate::not_inlined ();
2472 info->account_size_time (2 * INLINE_SIZE_SCALE, 0, t, t);
2473 inline_update_overall_summary (node);
2474 info->self_size = info->size;
2475 info->self_time = info->time;
2476 /* We can not inline instrumentation clones. */
2477 if (node->thunk.add_pointer_bounds_args)
2478 {
2479 info->inlinable = false;
2480 node->callees->inline_failed = CIF_CHKP;
2481 }
2482 else
2483 info->inlinable = true;
2484 }
2485 else
2486 {
2487 /* Even is_gimple_min_invariant rely on current_function_decl. */
2488 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2489
2490 /* Can this function be inlined at all? */
2491 if (!opt_for_fn (node->decl, optimize)
2492 && !lookup_attribute ("always_inline",
2493 DECL_ATTRIBUTES (node->decl)))
2494 info->inlinable = false;
2495 else
2496 info->inlinable = tree_inlinable_function_p (node->decl);
2497
2498 info->contains_cilk_spawn = fn_contains_cilk_spawn_p (cfun);
2499
2500 /* Type attributes can use parameter indices to describe them. */
2501 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
2502 node->local.can_change_signature = false;
2503 else
2504 {
2505 /* Otherwise, inlinable functions always can change signature. */
2506 if (info->inlinable)
2507 node->local.can_change_signature = true;
2508 else
2509 {
2510 /* Functions calling builtin_apply can not change signature. */
2511 for (e = node->callees; e; e = e->next_callee)
2512 {
2513 tree cdecl = e->callee->decl;
2514 if (DECL_BUILT_IN (cdecl)
2515 && DECL_BUILT_IN_CLASS (cdecl) == BUILT_IN_NORMAL
2516 && (DECL_FUNCTION_CODE (cdecl) == BUILT_IN_APPLY_ARGS
2517 || DECL_FUNCTION_CODE (cdecl) == BUILT_IN_VA_START))
2518 break;
2519 }
2520 node->local.can_change_signature = !e;
2521 }
2522 }
2523 /* Functions called by instrumentation thunk can't change signature
2524 because instrumentation thunk modification is not supported. */
2525 if (node->local.can_change_signature)
2526 for (e = node->callers; e; e = e->next_caller)
2527 if (e->caller->thunk.thunk_p
2528 && e->caller->thunk.add_pointer_bounds_args)
2529 {
2530 node->local.can_change_signature = false;
2531 break;
2532 }
2533 estimate_function_body_sizes (node, early);
2534 pop_cfun ();
2535 }
2536 for (e = node->callees; e; e = e->next_callee)
2537 if (e->callee->comdat_local_p ())
2538 break;
2539 node->calls_comdat_local = (e != NULL);
2540
2541 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
2542 info->time = info->self_time;
2543 info->size = info->self_size;
2544 info->stack_frame_offset = 0;
2545 info->estimated_stack_size = info->estimated_self_stack_size;
2546
2547 /* Code above should compute exactly the same result as
2548 inline_update_overall_summary but because computation happens in
2549 different order the roundoff errors result in slight changes. */
2550 inline_update_overall_summary (node);
2551 gcc_assert (!(info->time - info->self_time).to_int ()
2552 && info->size == info->self_size);
2553 }
2554
2555
2556 /* Compute parameters of functions used by inliner using
2557 current_function_decl. */
2558
2559 static unsigned int
2560 compute_inline_parameters_for_current (void)
2561 {
2562 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
2563 return 0;
2564 }
2565
2566 namespace {
2567
2568 const pass_data pass_data_inline_parameters =
2569 {
2570 GIMPLE_PASS, /* type */
2571 "inline_param", /* name */
2572 OPTGROUP_INLINE, /* optinfo_flags */
2573 TV_INLINE_PARAMETERS, /* tv_id */
2574 0, /* properties_required */
2575 0, /* properties_provided */
2576 0, /* properties_destroyed */
2577 0, /* todo_flags_start */
2578 0, /* todo_flags_finish */
2579 };
2580
2581 class pass_inline_parameters : public gimple_opt_pass
2582 {
2583 public:
2584 pass_inline_parameters (gcc::context *ctxt)
2585 : gimple_opt_pass (pass_data_inline_parameters, ctxt)
2586 {}
2587
2588 /* opt_pass methods: */
2589 opt_pass * clone () { return new pass_inline_parameters (m_ctxt); }
2590 virtual unsigned int execute (function *)
2591 {
2592 return compute_inline_parameters_for_current ();
2593 }
2594
2595 }; // class pass_inline_parameters
2596
2597 } // anon namespace
2598
2599 gimple_opt_pass *
2600 make_pass_inline_parameters (gcc::context *ctxt)
2601 {
2602 return new pass_inline_parameters (ctxt);
2603 }
2604
2605
2606 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
2607 KNOWN_CONTEXTS and KNOWN_AGGS. */
2608
2609 static bool
2610 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
2611 int *size, int *time,
2612 vec<tree> known_vals,
2613 vec<ipa_polymorphic_call_context> known_contexts,
2614 vec<ipa_agg_jump_function_p> known_aggs)
2615 {
2616 tree target;
2617 struct cgraph_node *callee;
2618 struct inline_summary *isummary;
2619 enum availability avail;
2620 bool speculative;
2621
2622 if (!known_vals.exists () && !known_contexts.exists ())
2623 return false;
2624 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
2625 return false;
2626
2627 target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts,
2628 known_aggs, &speculative);
2629 if (!target || speculative)
2630 return false;
2631
2632 /* Account for difference in cost between indirect and direct calls. */
2633 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
2634 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
2635 gcc_checking_assert (*time >= 0);
2636 gcc_checking_assert (*size >= 0);
2637
2638 callee = cgraph_node::get (target);
2639 if (!callee || !callee->definition)
2640 return false;
2641 callee = callee->function_symbol (&avail);
2642 if (avail < AVAIL_AVAILABLE)
2643 return false;
2644 isummary = inline_summaries->get (callee);
2645 return isummary->inlinable;
2646 }
2647
2648 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
2649 handle edge E with probability PROB.
2650 Set HINTS if edge may be devirtualized.
2651 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
2652 site. */
2653
2654 static inline void
2655 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
2656 sreal *time,
2657 int prob,
2658 vec<tree> known_vals,
2659 vec<ipa_polymorphic_call_context> known_contexts,
2660 vec<ipa_agg_jump_function_p> known_aggs,
2661 inline_hints *hints)
2662 {
2663 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2664 int call_size = es->call_stmt_size;
2665 int call_time = es->call_stmt_time;
2666 int cur_size;
2667 if (!e->callee
2668 && estimate_edge_devirt_benefit (e, &call_size, &call_time,
2669 known_vals, known_contexts, known_aggs)
2670 && hints && e->maybe_hot_p ())
2671 *hints |= INLINE_HINT_indirect_call;
2672 cur_size = call_size * INLINE_SIZE_SCALE;
2673 *size += cur_size;
2674 if (min_size)
2675 *min_size += cur_size;
2676 if (prob == REG_BR_PROB_BASE)
2677 *time += ((sreal)(call_time * e->frequency)) / CGRAPH_FREQ_BASE;
2678 else
2679 *time += ((sreal)call_time) * (prob * e->frequency)
2680 / (CGRAPH_FREQ_BASE * REG_BR_PROB_BASE);
2681 }
2682
2683
2684
2685 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
2686 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2687 describe context of the call site. */
2688
2689 static void
2690 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
2691 int *min_size, sreal *time,
2692 inline_hints *hints,
2693 clause_t possible_truths,
2694 vec<tree> known_vals,
2695 vec<ipa_polymorphic_call_context> known_contexts,
2696 vec<ipa_agg_jump_function_p> known_aggs)
2697 {
2698 struct cgraph_edge *e;
2699 for (e = node->callees; e; e = e->next_callee)
2700 {
2701 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2702
2703 /* Do not care about zero sized builtins. */
2704 if (e->inline_failed && !es->call_stmt_size)
2705 {
2706 gcc_checking_assert (!es->call_stmt_time);
2707 continue;
2708 }
2709 if (!es->predicate
2710 || es->predicate->evaluate (possible_truths))
2711 {
2712 if (e->inline_failed)
2713 {
2714 /* Predicates of calls shall not use NOT_CHANGED codes,
2715 sowe do not need to compute probabilities. */
2716 estimate_edge_size_and_time (e, size,
2717 es->predicate ? NULL : min_size,
2718 time, REG_BR_PROB_BASE,
2719 known_vals, known_contexts,
2720 known_aggs, hints);
2721 }
2722 else
2723 estimate_calls_size_and_time (e->callee, size, min_size, time,
2724 hints,
2725 possible_truths,
2726 known_vals, known_contexts,
2727 known_aggs);
2728 }
2729 }
2730 for (e = node->indirect_calls; e; e = e->next_callee)
2731 {
2732 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2733 if (!es->predicate
2734 || es->predicate->evaluate (possible_truths))
2735 estimate_edge_size_and_time (e, size,
2736 es->predicate ? NULL : min_size,
2737 time, REG_BR_PROB_BASE,
2738 known_vals, known_contexts, known_aggs,
2739 hints);
2740 }
2741 }
2742
2743
2744 /* Estimate size and time needed to execute NODE assuming
2745 POSSIBLE_TRUTHS clause, and KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2746 information about NODE's arguments. If non-NULL use also probability
2747 information present in INLINE_PARAM_SUMMARY vector.
2748 Additionally detemine hints determined by the context. Finally compute
2749 minimal size needed for the call that is independent on the call context and
2750 can be used for fast estimates. Return the values in RET_SIZE,
2751 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
2752
2753 static void
2754 estimate_node_size_and_time (struct cgraph_node *node,
2755 clause_t possible_truths,
2756 clause_t nonspec_possible_truths,
2757 vec<tree> known_vals,
2758 vec<ipa_polymorphic_call_context> known_contexts,
2759 vec<ipa_agg_jump_function_p> known_aggs,
2760 int *ret_size, int *ret_min_size,
2761 sreal *ret_time,
2762 sreal *ret_nonspecialized_time,
2763 inline_hints *ret_hints,
2764 vec<inline_param_summary>
2765 inline_param_summary)
2766 {
2767 struct inline_summary *info = inline_summaries->get (node);
2768 size_time_entry *e;
2769 int size = 0;
2770 sreal time = 0;
2771 int min_size = 0;
2772 inline_hints hints = 0;
2773 int i;
2774
2775 if (dump_file && (dump_flags & TDF_DETAILS))
2776 {
2777 bool found = false;
2778 fprintf (dump_file, " Estimating body: %s/%i\n"
2779 " Known to be false: ", node->name (),
2780 node->order);
2781
2782 for (i = predicate::not_inlined_condition;
2783 i < (predicate::first_dynamic_condition
2784 + (int) vec_safe_length (info->conds)); i++)
2785 if (!(possible_truths & (1 << i)))
2786 {
2787 if (found)
2788 fprintf (dump_file, ", ");
2789 found = true;
2790 dump_condition (dump_file, info->conds, i);
2791 }
2792 }
2793
2794 estimate_calls_size_and_time (node, &size, &min_size, &time, &hints, possible_truths,
2795 known_vals, known_contexts, known_aggs);
2796 sreal nonspecialized_time = time;
2797
2798 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
2799 {
2800 bool nonconst = e->nonconst_predicate.evaluate (possible_truths);
2801 bool exec = e->exec_predicate.evaluate (nonspec_possible_truths);
2802 gcc_assert (!nonconst || exec);
2803 if (exec)
2804 {
2805 gcc_checking_assert (e->time >= 0);
2806 gcc_checking_assert (time >= 0);
2807
2808 /* We compute specialized size only because size of nonspecialized
2809 copy is context independent.
2810
2811 The difference between nonspecialized execution and specialized is
2812 that nonspecialized is not going to have optimized out computations
2813 known to be constant in a specialized setting. */
2814 if (nonconst)
2815 size += e->size;
2816 nonspecialized_time += e->time;
2817 if (!nonconst)
2818 ;
2819 else if (!inline_param_summary.exists ())
2820 {
2821 if (nonconst)
2822 time += e->time;
2823 }
2824 else
2825 {
2826 int prob = e->nonconst_predicate.probability
2827 (info->conds, possible_truths,
2828 inline_param_summary);
2829 gcc_checking_assert (prob >= 0);
2830 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
2831 time += e->time * prob / REG_BR_PROB_BASE;
2832 }
2833 gcc_checking_assert (time >= 0);
2834 }
2835 }
2836 gcc_checking_assert ((*info->size_time_table)[0].exec_predicate == true);
2837 gcc_checking_assert ((*info->size_time_table)[0].nonconst_predicate == true);
2838 min_size = (*info->size_time_table)[0].size;
2839 gcc_checking_assert (size >= 0);
2840 gcc_checking_assert (time >= 0);
2841 /* nonspecialized_time should be always bigger than specialized time.
2842 Roundoff issues however may get into the way. */
2843 gcc_checking_assert ((nonspecialized_time - time) >= -1);
2844
2845 /* Roundoff issues may make specialized time bigger than nonspecialized
2846 time. We do not really want that to happen because some heurstics
2847 may get confused by seeing negative speedups. */
2848 if (time > nonspecialized_time)
2849 time = nonspecialized_time;
2850
2851 if (info->loop_iterations
2852 && !info->loop_iterations->evaluate (possible_truths))
2853 hints |= INLINE_HINT_loop_iterations;
2854 if (info->loop_stride
2855 && !info->loop_stride->evaluate (possible_truths))
2856 hints |= INLINE_HINT_loop_stride;
2857 if (info->array_index
2858 && !info->array_index->evaluate (possible_truths))
2859 hints |= INLINE_HINT_array_index;
2860 if (info->scc_no)
2861 hints |= INLINE_HINT_in_scc;
2862 if (DECL_DECLARED_INLINE_P (node->decl))
2863 hints |= INLINE_HINT_declared_inline;
2864
2865 size = RDIV (size, INLINE_SIZE_SCALE);
2866 min_size = RDIV (min_size, INLINE_SIZE_SCALE);
2867
2868 if (dump_file && (dump_flags & TDF_DETAILS))
2869 fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size,
2870 time.to_double (), nonspecialized_time.to_double ());
2871 if (ret_time)
2872 *ret_time = time;
2873 if (ret_nonspecialized_time)
2874 *ret_nonspecialized_time = nonspecialized_time;
2875 if (ret_size)
2876 *ret_size = size;
2877 if (ret_min_size)
2878 *ret_min_size = min_size;
2879 if (ret_hints)
2880 *ret_hints = hints;
2881 return;
2882 }
2883
2884
2885 /* Estimate size and time needed to execute callee of EDGE assuming that
2886 parameters known to be constant at caller of EDGE are propagated.
2887 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
2888 and types for parameters. */
2889
2890 void
2891 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
2892 vec<tree> known_vals,
2893 vec<ipa_polymorphic_call_context>
2894 known_contexts,
2895 vec<ipa_agg_jump_function_p> known_aggs,
2896 int *ret_size, sreal *ret_time,
2897 sreal *ret_nonspec_time,
2898 inline_hints *hints)
2899 {
2900 clause_t clause, nonspec_clause;
2901
2902 evaluate_conditions_for_known_args (node, false, known_vals, known_aggs,
2903 &clause, &nonspec_clause);
2904 estimate_node_size_and_time (node, clause, nonspec_clause,
2905 known_vals, known_contexts,
2906 known_aggs, ret_size, NULL, ret_time,
2907 ret_nonspec_time, hints, vNULL);
2908 }
2909
2910
2911 /* Update summary information of inline clones after inlining.
2912 Compute peak stack usage. */
2913
2914 static void
2915 inline_update_callee_summaries (struct cgraph_node *node, int depth)
2916 {
2917 struct cgraph_edge *e;
2918 struct inline_summary *callee_info = inline_summaries->get (node);
2919 struct inline_summary *caller_info = inline_summaries->get (node->callers->caller);
2920 HOST_WIDE_INT peak;
2921
2922 callee_info->stack_frame_offset
2923 = caller_info->stack_frame_offset
2924 + caller_info->estimated_self_stack_size;
2925 peak = callee_info->stack_frame_offset
2926 + callee_info->estimated_self_stack_size;
2927 if (inline_summaries->get (node->global.inlined_to)->estimated_stack_size < peak)
2928 inline_summaries->get (node->global.inlined_to)->estimated_stack_size = peak;
2929 ipa_propagate_frequency (node);
2930 for (e = node->callees; e; e = e->next_callee)
2931 {
2932 if (!e->inline_failed)
2933 inline_update_callee_summaries (e->callee, depth);
2934 ipa_call_summaries->get (e)->loop_depth += depth;
2935 }
2936 for (e = node->indirect_calls; e; e = e->next_callee)
2937 ipa_call_summaries->get (e)->loop_depth += depth;
2938 }
2939
2940 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
2941 When functoin A is inlined in B and A calls C with parameter that
2942 changes with probability PROB1 and C is known to be passthroug
2943 of argument if B that change with probability PROB2, the probability
2944 of change is now PROB1*PROB2. */
2945
2946 static void
2947 remap_edge_change_prob (struct cgraph_edge *inlined_edge,
2948 struct cgraph_edge *edge)
2949 {
2950 if (ipa_node_params_sum)
2951 {
2952 int i;
2953 struct ipa_edge_args *args = IPA_EDGE_REF (edge);
2954 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
2955 struct ipa_call_summary *inlined_es
2956 = ipa_call_summaries->get (inlined_edge);
2957
2958 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
2959 {
2960 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
2961 if (jfunc->type == IPA_JF_PASS_THROUGH
2962 || jfunc->type == IPA_JF_ANCESTOR)
2963 {
2964 int id = jfunc->type == IPA_JF_PASS_THROUGH
2965 ? ipa_get_jf_pass_through_formal_id (jfunc)
2966 : ipa_get_jf_ancestor_formal_id (jfunc);
2967 if (id < (int) inlined_es->param.length ())
2968 {
2969 int prob1 = es->param[i].change_prob;
2970 int prob2 = inlined_es->param[id].change_prob;
2971 int prob = combine_probabilities (prob1, prob2);
2972
2973 if (prob1 && prob2 && !prob)
2974 prob = 1;
2975
2976 es->param[i].change_prob = prob;
2977 }
2978 }
2979 }
2980 }
2981 }
2982
2983 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
2984
2985 Remap predicates of callees of NODE. Rest of arguments match
2986 remap_predicate.
2987
2988 Also update change probabilities. */
2989
2990 static void
2991 remap_edge_summaries (struct cgraph_edge *inlined_edge,
2992 struct cgraph_node *node,
2993 struct inline_summary *info,
2994 struct inline_summary *callee_info,
2995 vec<int> operand_map,
2996 vec<int> offset_map,
2997 clause_t possible_truths,
2998 predicate *toplev_predicate)
2999 {
3000 struct cgraph_edge *e, *next;
3001 for (e = node->callees; e; e = next)
3002 {
3003 struct ipa_call_summary *es = ipa_call_summaries->get (e);
3004 predicate p;
3005 next = e->next_callee;
3006
3007 if (e->inline_failed)
3008 {
3009 remap_edge_change_prob (inlined_edge, e);
3010
3011 if (es->predicate)
3012 {
3013 p = es->predicate->remap_after_inlining
3014 (info, callee_info, operand_map,
3015 offset_map, possible_truths,
3016 *toplev_predicate);
3017 edge_set_predicate (e, &p);
3018 }
3019 else
3020 edge_set_predicate (e, toplev_predicate);
3021 }
3022 else
3023 remap_edge_summaries (inlined_edge, e->callee, info, callee_info,
3024 operand_map, offset_map, possible_truths,
3025 toplev_predicate);
3026 }
3027 for (e = node->indirect_calls; e; e = next)
3028 {
3029 struct ipa_call_summary *es = ipa_call_summaries->get (e);
3030 predicate p;
3031 next = e->next_callee;
3032
3033 remap_edge_change_prob (inlined_edge, e);
3034 if (es->predicate)
3035 {
3036 p = es->predicate->remap_after_inlining
3037 (info, callee_info, operand_map, offset_map,
3038 possible_truths, *toplev_predicate);
3039 edge_set_predicate (e, &p);
3040 }
3041 else
3042 edge_set_predicate (e, toplev_predicate);
3043 }
3044 }
3045
3046 /* Same as remap_predicate, but set result into hint *HINT. */
3047
3048 static void
3049 remap_hint_predicate (struct inline_summary *info,
3050 struct inline_summary *callee_info,
3051 predicate **hint,
3052 vec<int> operand_map,
3053 vec<int> offset_map,
3054 clause_t possible_truths,
3055 predicate *toplev_predicate)
3056 {
3057 predicate p;
3058
3059 if (!*hint)
3060 return;
3061 p = (*hint)->remap_after_inlining
3062 (info, callee_info,
3063 operand_map, offset_map,
3064 possible_truths, *toplev_predicate);
3065 if (p != false && p != true)
3066 {
3067 if (!*hint)
3068 set_hint_predicate (hint, p);
3069 else
3070 **hint &= p;
3071 }
3072 }
3073
3074 /* We inlined EDGE. Update summary of the function we inlined into. */
3075
3076 void
3077 inline_merge_summary (struct cgraph_edge *edge)
3078 {
3079 struct inline_summary *callee_info = inline_summaries->get (edge->callee);
3080 struct cgraph_node *to = (edge->caller->global.inlined_to
3081 ? edge->caller->global.inlined_to : edge->caller);
3082 struct inline_summary *info = inline_summaries->get (to);
3083 clause_t clause = 0; /* not_inline is known to be false. */
3084 size_time_entry *e;
3085 vec<int> operand_map = vNULL;
3086 vec<int> offset_map = vNULL;
3087 int i;
3088 predicate toplev_predicate;
3089 predicate true_p = true;
3090 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
3091
3092 if (es->predicate)
3093 toplev_predicate = *es->predicate;
3094 else
3095 toplev_predicate = true;
3096
3097 info->fp_expressions |= callee_info->fp_expressions;
3098
3099 if (callee_info->conds)
3100 evaluate_properties_for_edge (edge, true, &clause, NULL, NULL, NULL, NULL);
3101 if (ipa_node_params_sum && callee_info->conds)
3102 {
3103 struct ipa_edge_args *args = IPA_EDGE_REF (edge);
3104 int count = ipa_get_cs_argument_count (args);
3105 int i;
3106
3107 if (count)
3108 {
3109 operand_map.safe_grow_cleared (count);
3110 offset_map.safe_grow_cleared (count);
3111 }
3112 for (i = 0; i < count; i++)
3113 {
3114 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3115 int map = -1;
3116
3117 /* TODO: handle non-NOPs when merging. */
3118 if (jfunc->type == IPA_JF_PASS_THROUGH)
3119 {
3120 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3121 map = ipa_get_jf_pass_through_formal_id (jfunc);
3122 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
3123 offset_map[i] = -1;
3124 }
3125 else if (jfunc->type == IPA_JF_ANCESTOR)
3126 {
3127 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
3128 if (offset >= 0 && offset < INT_MAX)
3129 {
3130 map = ipa_get_jf_ancestor_formal_id (jfunc);
3131 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
3132 offset = -1;
3133 offset_map[i] = offset;
3134 }
3135 }
3136 operand_map[i] = map;
3137 gcc_assert (map < ipa_get_param_count (IPA_NODE_REF (to)));
3138 }
3139 }
3140 for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++)
3141 {
3142 predicate p;
3143 p = e->exec_predicate.remap_after_inlining
3144 (info, callee_info, operand_map,
3145 offset_map, clause,
3146 toplev_predicate);
3147 predicate nonconstp;
3148 nonconstp = e->nonconst_predicate.remap_after_inlining
3149 (info, callee_info, operand_map,
3150 offset_map, clause,
3151 toplev_predicate);
3152 if (p != false && nonconstp != false)
3153 {
3154 sreal add_time = ((sreal)e->time * edge->frequency) / CGRAPH_FREQ_BASE;
3155 int prob = e->nonconst_predicate.probability (callee_info->conds,
3156 clause, es->param);
3157 add_time = add_time * prob / REG_BR_PROB_BASE;
3158 if (prob != REG_BR_PROB_BASE
3159 && dump_file && (dump_flags & TDF_DETAILS))
3160 {
3161 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
3162 (double) prob / REG_BR_PROB_BASE);
3163 }
3164 info->account_size_time (e->size, add_time, p, nonconstp);
3165 }
3166 }
3167 remap_edge_summaries (edge, edge->callee, info, callee_info, operand_map,
3168 offset_map, clause, &toplev_predicate);
3169 remap_hint_predicate (info, callee_info,
3170 &callee_info->loop_iterations,
3171 operand_map, offset_map, clause, &toplev_predicate);
3172 remap_hint_predicate (info, callee_info,
3173 &callee_info->loop_stride,
3174 operand_map, offset_map, clause, &toplev_predicate);
3175 remap_hint_predicate (info, callee_info,
3176 &callee_info->array_index,
3177 operand_map, offset_map, clause, &toplev_predicate);
3178
3179 inline_update_callee_summaries (edge->callee,
3180 ipa_call_summaries->get (edge)->loop_depth);
3181
3182 /* We do not maintain predicates of inlined edges, free it. */
3183 edge_set_predicate (edge, &true_p);
3184 /* Similarly remove param summaries. */
3185 es->param.release ();
3186 operand_map.release ();
3187 offset_map.release ();
3188 }
3189
3190 /* For performance reasons inline_merge_summary is not updating overall size
3191 and time. Recompute it. */
3192
3193 void
3194 inline_update_overall_summary (struct cgraph_node *node)
3195 {
3196 struct inline_summary *info = inline_summaries->get (node);
3197 size_time_entry *e;
3198 int i;
3199
3200 info->size = 0;
3201 info->time = 0;
3202 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3203 {
3204 info->size += e->size;
3205 info->time += e->time;
3206 }
3207 estimate_calls_size_and_time (node, &info->size, &info->min_size,
3208 &info->time, NULL,
3209 ~(clause_t) (1 << predicate::false_condition),
3210 vNULL, vNULL, vNULL);
3211 info->size = (info->size + INLINE_SIZE_SCALE / 2) / INLINE_SIZE_SCALE;
3212 }
3213
3214 /* Return hints derrived from EDGE. */
3215 int
3216 simple_edge_hints (struct cgraph_edge *edge)
3217 {
3218 int hints = 0;
3219 struct cgraph_node *to = (edge->caller->global.inlined_to
3220 ? edge->caller->global.inlined_to : edge->caller);
3221 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
3222 if (inline_summaries->get (to)->scc_no
3223 && inline_summaries->get (to)->scc_no
3224 == inline_summaries->get (callee)->scc_no
3225 && !edge->recursive_p ())
3226 hints |= INLINE_HINT_same_scc;
3227
3228 if (callee->lto_file_data && edge->caller->lto_file_data
3229 && edge->caller->lto_file_data != callee->lto_file_data
3230 && !callee->merged_comdat && !callee->icf_merged)
3231 hints |= INLINE_HINT_cross_module;
3232
3233 return hints;
3234 }
3235
3236 /* Estimate the time cost for the caller when inlining EDGE.
3237 Only to be called via estimate_edge_time, that handles the
3238 caching mechanism.
3239
3240 When caching, also update the cache entry. Compute both time and
3241 size, since we always need both metrics eventually. */
3242
3243 sreal
3244 do_estimate_edge_time (struct cgraph_edge *edge)
3245 {
3246 sreal time, nonspec_time;
3247 int size;
3248 inline_hints hints;
3249 struct cgraph_node *callee;
3250 clause_t clause, nonspec_clause;
3251 vec<tree> known_vals;
3252 vec<ipa_polymorphic_call_context> known_contexts;
3253 vec<ipa_agg_jump_function_p> known_aggs;
3254 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
3255 int min_size;
3256
3257 callee = edge->callee->ultimate_alias_target ();
3258
3259 gcc_checking_assert (edge->inline_failed);
3260 evaluate_properties_for_edge (edge, true,
3261 &clause, &nonspec_clause, &known_vals,
3262 &known_contexts, &known_aggs);
3263 estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
3264 known_contexts, known_aggs, &size, &min_size,
3265 &time, &nonspec_time, &hints, es->param);
3266
3267 /* When we have profile feedback, we can quite safely identify hot
3268 edges and for those we disable size limits. Don't do that when
3269 probability that caller will call the callee is low however, since it
3270 may hurt optimization of the caller's hot path. */
3271 if (edge->count && edge->maybe_hot_p ()
3272 && (edge->count * 2
3273 > (edge->caller->global.inlined_to
3274 ? edge->caller->global.inlined_to->count : edge->caller->count)))
3275 hints |= INLINE_HINT_known_hot;
3276
3277 known_vals.release ();
3278 known_contexts.release ();
3279 known_aggs.release ();
3280 gcc_checking_assert (size >= 0);
3281 gcc_checking_assert (time >= 0);
3282
3283 /* When caching, update the cache entry. */
3284 if (edge_growth_cache.exists ())
3285 {
3286 inline_summaries->get (edge->callee)->min_size = min_size;
3287 if ((int) edge_growth_cache.length () <= edge->uid)
3288 edge_growth_cache.safe_grow_cleared (symtab->edges_max_uid);
3289 edge_growth_cache[edge->uid].time = time;
3290 edge_growth_cache[edge->uid].nonspec_time = nonspec_time;
3291
3292 edge_growth_cache[edge->uid].size = size + (size >= 0);
3293 hints |= simple_edge_hints (edge);
3294 edge_growth_cache[edge->uid].hints = hints + 1;
3295 }
3296 return time;
3297 }
3298
3299
3300 /* Return estimated callee growth after inlining EDGE.
3301 Only to be called via estimate_edge_size. */
3302
3303 int
3304 do_estimate_edge_size (struct cgraph_edge *edge)
3305 {
3306 int size;
3307 struct cgraph_node *callee;
3308 clause_t clause, nonspec_clause;
3309 vec<tree> known_vals;
3310 vec<ipa_polymorphic_call_context> known_contexts;
3311 vec<ipa_agg_jump_function_p> known_aggs;
3312
3313 /* When we do caching, use do_estimate_edge_time to populate the entry. */
3314
3315 if (edge_growth_cache.exists ())
3316 {
3317 do_estimate_edge_time (edge);
3318 size = edge_growth_cache[edge->uid].size;
3319 gcc_checking_assert (size);
3320 return size - (size > 0);
3321 }
3322
3323 callee = edge->callee->ultimate_alias_target ();
3324
3325 /* Early inliner runs without caching, go ahead and do the dirty work. */
3326 gcc_checking_assert (edge->inline_failed);
3327 evaluate_properties_for_edge (edge, true,
3328 &clause, &nonspec_clause,
3329 &known_vals, &known_contexts,
3330 &known_aggs);
3331 estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
3332 known_contexts, known_aggs, &size, NULL, NULL,
3333 NULL, NULL, vNULL);
3334 known_vals.release ();
3335 known_contexts.release ();
3336 known_aggs.release ();
3337 return size;
3338 }
3339
3340
3341 /* Estimate the growth of the caller when inlining EDGE.
3342 Only to be called via estimate_edge_size. */
3343
3344 inline_hints
3345 do_estimate_edge_hints (struct cgraph_edge *edge)
3346 {
3347 inline_hints hints;
3348 struct cgraph_node *callee;
3349 clause_t clause, nonspec_clause;
3350 vec<tree> known_vals;
3351 vec<ipa_polymorphic_call_context> known_contexts;
3352 vec<ipa_agg_jump_function_p> known_aggs;
3353
3354 /* When we do caching, use do_estimate_edge_time to populate the entry. */
3355
3356 if (edge_growth_cache.exists ())
3357 {
3358 do_estimate_edge_time (edge);
3359 hints = edge_growth_cache[edge->uid].hints;
3360 gcc_checking_assert (hints);
3361 return hints - 1;
3362 }
3363
3364 callee = edge->callee->ultimate_alias_target ();
3365
3366 /* Early inliner runs without caching, go ahead and do the dirty work. */
3367 gcc_checking_assert (edge->inline_failed);
3368 evaluate_properties_for_edge (edge, true,
3369 &clause, &nonspec_clause,
3370 &known_vals, &known_contexts,
3371 &known_aggs);
3372 estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
3373 known_contexts, known_aggs, NULL, NULL,
3374 NULL, NULL, &hints, vNULL);
3375 known_vals.release ();
3376 known_contexts.release ();
3377 known_aggs.release ();
3378 hints |= simple_edge_hints (edge);
3379 return hints;
3380 }
3381
3382 /* Estimate the size of NODE after inlining EDGE which should be an
3383 edge to either NODE or a call inlined into NODE. */
3384
3385 int
3386 estimate_size_after_inlining (struct cgraph_node *node,
3387 struct cgraph_edge *edge)
3388 {
3389 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
3390 if (!es->predicate || *es->predicate != false)
3391 {
3392 int size = inline_summaries->get (node)->size + estimate_edge_growth (edge);
3393 gcc_assert (size >= 0);
3394 return size;
3395 }
3396 return inline_summaries->get (node)->size;
3397 }
3398
3399
3400 struct growth_data
3401 {
3402 struct cgraph_node *node;
3403 bool self_recursive;
3404 bool uninlinable;
3405 int growth;
3406 };
3407
3408
3409 /* Worker for do_estimate_growth. Collect growth for all callers. */
3410
3411 static bool
3412 do_estimate_growth_1 (struct cgraph_node *node, void *data)
3413 {
3414 struct cgraph_edge *e;
3415 struct growth_data *d = (struct growth_data *) data;
3416
3417 for (e = node->callers; e; e = e->next_caller)
3418 {
3419 gcc_checking_assert (e->inline_failed);
3420
3421 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
3422 {
3423 d->uninlinable = true;
3424 continue;
3425 }
3426
3427 if (e->recursive_p ())
3428 {
3429 d->self_recursive = true;
3430 continue;
3431 }
3432 d->growth += estimate_edge_growth (e);
3433 }
3434 return false;
3435 }
3436
3437
3438 /* Estimate the growth caused by inlining NODE into all callees. */
3439
3440 int
3441 estimate_growth (struct cgraph_node *node)
3442 {
3443 struct growth_data d = { node, false, false, 0 };
3444 struct inline_summary *info = inline_summaries->get (node);
3445
3446 node->call_for_symbol_and_aliases (do_estimate_growth_1, &d, true);
3447
3448 /* For self recursive functions the growth estimation really should be
3449 infinity. We don't want to return very large values because the growth
3450 plays various roles in badness computation fractions. Be sure to not
3451 return zero or negative growths. */
3452 if (d.self_recursive)
3453 d.growth = d.growth < info->size ? info->size : d.growth;
3454 else if (DECL_EXTERNAL (node->decl) || d.uninlinable)
3455 ;
3456 else
3457 {
3458 if (node->will_be_removed_from_program_if_no_direct_calls_p ())
3459 d.growth -= info->size;
3460 /* COMDAT functions are very often not shared across multiple units
3461 since they come from various template instantiations.
3462 Take this into account. */
3463 else if (DECL_COMDAT (node->decl)
3464 && node->can_remove_if_no_direct_calls_p ())
3465 d.growth -= (info->size
3466 * (100 - PARAM_VALUE (PARAM_COMDAT_SHARING_PROBABILITY))
3467 + 50) / 100;
3468 }
3469
3470 return d.growth;
3471 }
3472
3473 /* Verify if there are fewer than MAX_CALLERS. */
3474
3475 static bool
3476 check_callers (cgraph_node *node, int *max_callers)
3477 {
3478 ipa_ref *ref;
3479
3480 if (!node->can_remove_if_no_direct_calls_and_refs_p ())
3481 return true;
3482
3483 for (cgraph_edge *e = node->callers; e; e = e->next_caller)
3484 {
3485 (*max_callers)--;
3486 if (!*max_callers
3487 || cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
3488 return true;
3489 }
3490
3491 FOR_EACH_ALIAS (node, ref)
3492 if (check_callers (dyn_cast <cgraph_node *> (ref->referring), max_callers))
3493 return true;
3494
3495 return false;
3496 }
3497
3498
3499 /* Make cheap estimation if growth of NODE is likely positive knowing
3500 EDGE_GROWTH of one particular edge.
3501 We assume that most of other edges will have similar growth
3502 and skip computation if there are too many callers. */
3503
3504 bool
3505 growth_likely_positive (struct cgraph_node *node,
3506 int edge_growth)
3507 {
3508 int max_callers;
3509 struct cgraph_edge *e;
3510 gcc_checking_assert (edge_growth > 0);
3511
3512 /* First quickly check if NODE is removable at all. */
3513 if (DECL_EXTERNAL (node->decl))
3514 return true;
3515 if (!node->can_remove_if_no_direct_calls_and_refs_p ()
3516 || node->address_taken)
3517 return true;
3518
3519 max_callers = inline_summaries->get (node)->size * 4 / edge_growth + 2;
3520
3521 for (e = node->callers; e; e = e->next_caller)
3522 {
3523 max_callers--;
3524 if (!max_callers
3525 || cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
3526 return true;
3527 }
3528
3529 ipa_ref *ref;
3530 FOR_EACH_ALIAS (node, ref)
3531 if (check_callers (dyn_cast <cgraph_node *> (ref->referring), &max_callers))
3532 return true;
3533
3534 /* Unlike for functions called once, we play unsafe with
3535 COMDATs. We can allow that since we know functions
3536 in consideration are small (and thus risk is small) and
3537 moreover grow estimates already accounts that COMDAT
3538 functions may or may not disappear when eliminated from
3539 current unit. With good probability making aggressive
3540 choice in all units is going to make overall program
3541 smaller. */
3542 if (DECL_COMDAT (node->decl))
3543 {
3544 if (!node->can_remove_if_no_direct_calls_p ())
3545 return true;
3546 }
3547 else if (!node->will_be_removed_from_program_if_no_direct_calls_p ())
3548 return true;
3549
3550 return estimate_growth (node) > 0;
3551 }
3552
3553
3554 /* This function performs intraprocedural analysis in NODE that is required to
3555 inline indirect calls. */
3556
3557 static void
3558 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
3559 {
3560 ipa_analyze_node (node);
3561 if (dump_file && (dump_flags & TDF_DETAILS))
3562 {
3563 ipa_print_node_params (dump_file, node);
3564 ipa_print_node_jump_functions (dump_file, node);
3565 }
3566 }
3567
3568
3569 /* Note function body size. */
3570
3571 void
3572 inline_analyze_function (struct cgraph_node *node)
3573 {
3574 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
3575
3576 if (dump_file)
3577 fprintf (dump_file, "\nAnalyzing function: %s/%u\n",
3578 node->name (), node->order);
3579 if (opt_for_fn (node->decl, optimize) && !node->thunk.thunk_p)
3580 inline_indirect_intraprocedural_analysis (node);
3581 compute_inline_parameters (node, false);
3582 if (!optimize)
3583 {
3584 struct cgraph_edge *e;
3585 for (e = node->callees; e; e = e->next_callee)
3586 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3587 for (e = node->indirect_calls; e; e = e->next_callee)
3588 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3589 }
3590
3591 pop_cfun ();
3592 }
3593
3594
3595 /* Called when new function is inserted to callgraph late. */
3596
3597 void
3598 inline_summary_t::insert (struct cgraph_node *node, inline_summary *)
3599 {
3600 inline_analyze_function (node);
3601 }
3602
3603 /* Note function body size. */
3604
3605 void
3606 inline_generate_summary (void)
3607 {
3608 struct cgraph_node *node;
3609
3610 FOR_EACH_DEFINED_FUNCTION (node)
3611 if (DECL_STRUCT_FUNCTION (node->decl))
3612 node->local.versionable = tree_versionable_function_p (node->decl);
3613
3614 /* When not optimizing, do not bother to analyze. Inlining is still done
3615 because edge redirection needs to happen there. */
3616 if (!optimize && !flag_generate_lto && !flag_generate_offload && !flag_wpa)
3617 return;
3618
3619 if (!inline_summaries)
3620 inline_summaries = (inline_summary_t*) inline_summary_t::create_ggc (symtab);
3621
3622 inline_summaries->enable_insertion_hook ();
3623
3624 ipa_register_cgraph_hooks ();
3625 inline_free_summary ();
3626
3627 FOR_EACH_DEFINED_FUNCTION (node)
3628 if (!node->alias)
3629 inline_analyze_function (node);
3630 }
3631
3632
3633 /* Write inline summary for edge E to OB. */
3634
3635 static void
3636 read_ipa_call_summary (struct lto_input_block *ib, struct cgraph_edge *e)
3637 {
3638 struct ipa_call_summary *es = ipa_call_summaries->get (e);
3639 predicate p;
3640 int length, i;
3641
3642 es->call_stmt_size = streamer_read_uhwi (ib);
3643 es->call_stmt_time = streamer_read_uhwi (ib);
3644 es->loop_depth = streamer_read_uhwi (ib);
3645 p.stream_in (ib);
3646 edge_set_predicate (e, &p);
3647 length = streamer_read_uhwi (ib);
3648 if (length)
3649 {
3650 es->param.safe_grow_cleared (length);
3651 for (i = 0; i < length; i++)
3652 es->param[i].change_prob = streamer_read_uhwi (ib);
3653 }
3654 }
3655
3656
3657 /* Stream in inline summaries from the section. */
3658
3659 static void
3660 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
3661 size_t len)
3662 {
3663 const struct lto_function_header *header =
3664 (const struct lto_function_header *) data;
3665 const int cfg_offset = sizeof (struct lto_function_header);
3666 const int main_offset = cfg_offset + header->cfg_size;
3667 const int string_offset = main_offset + header->main_size;
3668 struct data_in *data_in;
3669 unsigned int i, count2, j;
3670 unsigned int f_count;
3671
3672 lto_input_block ib ((const char *) data + main_offset, header->main_size,
3673 file_data->mode_table);
3674
3675 data_in =
3676 lto_data_in_create (file_data, (const char *) data + string_offset,
3677 header->string_size, vNULL);
3678 f_count = streamer_read_uhwi (&ib);
3679 for (i = 0; i < f_count; i++)
3680 {
3681 unsigned int index;
3682 struct cgraph_node *node;
3683 struct inline_summary *info;
3684 lto_symtab_encoder_t encoder;
3685 struct bitpack_d bp;
3686 struct cgraph_edge *e;
3687 predicate p;
3688
3689 index = streamer_read_uhwi (&ib);
3690 encoder = file_data->symtab_node_encoder;
3691 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
3692 index));
3693 info = inline_summaries->get (node);
3694
3695 info->estimated_stack_size
3696 = info->estimated_self_stack_size = streamer_read_uhwi (&ib);
3697 info->size = info->self_size = streamer_read_uhwi (&ib);
3698 info->time = info->self_time = sreal::stream_in (&ib);
3699
3700 bp = streamer_read_bitpack (&ib);
3701 info->inlinable = bp_unpack_value (&bp, 1);
3702 info->contains_cilk_spawn = bp_unpack_value (&bp, 1);
3703 info->fp_expressions = bp_unpack_value (&bp, 1);
3704
3705 count2 = streamer_read_uhwi (&ib);
3706 gcc_assert (!info->conds);
3707 for (j = 0; j < count2; j++)
3708 {
3709 struct condition c;
3710 c.operand_num = streamer_read_uhwi (&ib);
3711 c.size = streamer_read_uhwi (&ib);
3712 c.code = (enum tree_code) streamer_read_uhwi (&ib);
3713 c.val = stream_read_tree (&ib, data_in);
3714 bp = streamer_read_bitpack (&ib);
3715 c.agg_contents = bp_unpack_value (&bp, 1);
3716 c.by_ref = bp_unpack_value (&bp, 1);
3717 if (c.agg_contents)
3718 c.offset = streamer_read_uhwi (&ib);
3719 vec_safe_push (info->conds, c);
3720 }
3721 count2 = streamer_read_uhwi (&ib);
3722 gcc_assert (!info->size_time_table);
3723 for (j = 0; j < count2; j++)
3724 {
3725 struct size_time_entry e;
3726
3727 e.size = streamer_read_uhwi (&ib);
3728 e.time = sreal::stream_in (&ib);
3729 e.exec_predicate.stream_in (&ib);
3730 e.nonconst_predicate.stream_in (&ib);
3731
3732 vec_safe_push (info->size_time_table, e);
3733 }
3734
3735 p.stream_in (&ib);
3736 set_hint_predicate (&info->loop_iterations, p);
3737 p.stream_in (&ib);
3738 set_hint_predicate (&info->loop_stride, p);
3739 p.stream_in (&ib);
3740 set_hint_predicate (&info->array_index, p);
3741 for (e = node->callees; e; e = e->next_callee)
3742 read_ipa_call_summary (&ib, e);
3743 for (e = node->indirect_calls; e; e = e->next_callee)
3744 read_ipa_call_summary (&ib, e);
3745 }
3746
3747 lto_free_section_data (file_data, LTO_section_inline_summary, NULL, data,
3748 len);
3749 lto_data_in_delete (data_in);
3750 }
3751
3752
3753 /* Read inline summary. Jump functions are shared among ipa-cp
3754 and inliner, so when ipa-cp is active, we don't need to write them
3755 twice. */
3756
3757 void
3758 inline_read_summary (void)
3759 {
3760 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
3761 struct lto_file_decl_data *file_data;
3762 unsigned int j = 0;
3763
3764 inline_summary_alloc ();
3765
3766 while ((file_data = file_data_vec[j++]))
3767 {
3768 size_t len;
3769 const char *data = lto_get_section_data (file_data,
3770 LTO_section_inline_summary,
3771 NULL, &len);
3772 if (data)
3773 inline_read_section (file_data, data, len);
3774 else
3775 /* Fatal error here. We do not want to support compiling ltrans units
3776 with different version of compiler or different flags than the WPA
3777 unit, so this should never happen. */
3778 fatal_error (input_location,
3779 "ipa inline summary is missing in input file");
3780 }
3781 if (optimize)
3782 {
3783 ipa_register_cgraph_hooks ();
3784 if (!flag_ipa_cp)
3785 ipa_prop_read_jump_functions ();
3786 }
3787
3788 gcc_assert (inline_summaries);
3789 inline_summaries->enable_insertion_hook ();
3790 }
3791
3792
3793 /* Write inline summary for edge E to OB. */
3794
3795 static void
3796 write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
3797 {
3798 struct ipa_call_summary *es = ipa_call_summaries->get (e);
3799 int i;
3800
3801 streamer_write_uhwi (ob, es->call_stmt_size);
3802 streamer_write_uhwi (ob, es->call_stmt_time);
3803 streamer_write_uhwi (ob, es->loop_depth);
3804 if (es->predicate)
3805 es->predicate->stream_out (ob);
3806 else
3807 streamer_write_uhwi (ob, 0);
3808 streamer_write_uhwi (ob, es->param.length ());
3809 for (i = 0; i < (int) es->param.length (); i++)
3810 streamer_write_uhwi (ob, es->param[i].change_prob);
3811 }
3812
3813
3814 /* Write inline summary for node in SET.
3815 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
3816 active, we don't need to write them twice. */
3817
3818 void
3819 inline_write_summary (void)
3820 {
3821 struct output_block *ob = create_output_block (LTO_section_inline_summary);
3822 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
3823 unsigned int count = 0;
3824 int i;
3825
3826 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
3827 {
3828 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
3829 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
3830 if (cnode && cnode->definition && !cnode->alias)
3831 count++;
3832 }
3833 streamer_write_uhwi (ob, count);
3834
3835 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
3836 {
3837 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
3838 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
3839 if (cnode && cnode->definition && !cnode->alias)
3840 {
3841 struct inline_summary *info = inline_summaries->get (cnode);
3842 struct bitpack_d bp;
3843 struct cgraph_edge *edge;
3844 int i;
3845 size_time_entry *e;
3846 struct condition *c;
3847
3848 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
3849 streamer_write_hwi (ob, info->estimated_self_stack_size);
3850 streamer_write_hwi (ob, info->self_size);
3851 info->self_time.stream_out (ob);
3852 bp = bitpack_create (ob->main_stream);
3853 bp_pack_value (&bp, info->inlinable, 1);
3854 bp_pack_value (&bp, info->contains_cilk_spawn, 1);
3855 bp_pack_value (&bp, info->fp_expressions, 1);
3856 streamer_write_bitpack (&bp);
3857 streamer_write_uhwi (ob, vec_safe_length (info->conds));
3858 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
3859 {
3860 streamer_write_uhwi (ob, c->operand_num);
3861 streamer_write_uhwi (ob, c->size);
3862 streamer_write_uhwi (ob, c->code);
3863 stream_write_tree (ob, c->val, true);
3864 bp = bitpack_create (ob->main_stream);
3865 bp_pack_value (&bp, c->agg_contents, 1);
3866 bp_pack_value (&bp, c->by_ref, 1);
3867 streamer_write_bitpack (&bp);
3868 if (c->agg_contents)
3869 streamer_write_uhwi (ob, c->offset);
3870 }
3871 streamer_write_uhwi (ob, vec_safe_length (info->size_time_table));
3872 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3873 {
3874 streamer_write_uhwi (ob, e->size);
3875 e->time.stream_out (ob);
3876 e->exec_predicate.stream_out (ob);
3877 e->nonconst_predicate.stream_out (ob);
3878 }
3879 if (info->loop_iterations)
3880 info->loop_iterations->stream_out (ob);
3881 else
3882 streamer_write_uhwi (ob, 0);
3883 if (info->loop_stride)
3884 info->loop_stride->stream_out (ob);
3885 else
3886 streamer_write_uhwi (ob, 0);
3887 if (info->array_index)
3888 info->array_index->stream_out (ob);
3889 else
3890 streamer_write_uhwi (ob, 0);
3891 for (edge = cnode->callees; edge; edge = edge->next_callee)
3892 write_ipa_call_summary (ob, edge);
3893 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
3894 write_ipa_call_summary (ob, edge);
3895 }
3896 }
3897 streamer_write_char_stream (ob->main_stream, 0);
3898 produce_asm (ob, NULL);
3899 destroy_output_block (ob);
3900
3901 if (optimize && !flag_ipa_cp)
3902 ipa_prop_write_jump_functions ();
3903 }
3904
3905
3906 /* Release inline summary. */
3907
3908 void
3909 inline_free_summary (void)
3910 {
3911 struct cgraph_node *node;
3912 if (!ipa_call_summaries)
3913 return;
3914 FOR_EACH_DEFINED_FUNCTION (node)
3915 if (!node->alias)
3916 inline_summaries->get (node)->reset (node);
3917 inline_summaries->release ();
3918 inline_summaries = NULL;
3919 ipa_call_summaries->release ();
3920 delete ipa_call_summaries;
3921 ipa_call_summaries = NULL;
3922 edge_predicate_pool.release ();
3923 }