ipa-inline-analysis.c (inline_summary::reset): Do not reset self_time.
[gcc.git] / gcc / ipa-inline-analysis.c
1 /* Inlining decision heuristics.
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Analysis used by the inliner and other passes limiting code size growth.
22
23 We estimate for each function
24 - function body size
25 - average function execution time
26 - inlining size benefit (that is how much of function body size
27 and its call sequence is expected to disappear by inlining)
28 - inlining time benefit
29 - function frame size
30 For each call
31 - call statement size and time
32
33 inline_summary data structures store above information locally (i.e.
34 parameters of the function itself) and globally (i.e. parameters of
35 the function created by applying all the inline decisions already
36 present in the callgraph).
37
38 We provide access to the inline_summary data structure and
39 basic logic updating the parameters when inlining is performed.
40
41 The summaries are context sensitive. Context means
42 1) partial assignment of known constant values of operands
43 2) whether function is inlined into the call or not.
44 It is easy to add more variants. To represent function size and time
45 that depends on context (i.e. it is known to be optimized away when
46 context is known either by inlining or from IP-CP and cloning),
47 we use predicates.
48
49 estimate_edge_size and estimate_edge_growth can be used to query
50 function size/time in the given context. inline_merge_summary merges
51 properties of caller and callee after inlining.
52
53 Finally pass_inline_parameters is exported. This is used to drive
54 computation of function parameters used by the early inliner. IPA
55 inlined performs analysis via its analyze_function method. */
56
57 #include "config.h"
58 #include "system.h"
59 #include "coretypes.h"
60 #include "backend.h"
61 #include "tree.h"
62 #include "gimple.h"
63 #include "alloc-pool.h"
64 #include "tree-pass.h"
65 #include "ssa.h"
66 #include "tree-streamer.h"
67 #include "cgraph.h"
68 #include "diagnostic.h"
69 #include "fold-const.h"
70 #include "print-tree.h"
71 #include "tree-inline.h"
72 #include "gimple-pretty-print.h"
73 #include "params.h"
74 #include "cfganal.h"
75 #include "gimple-iterator.h"
76 #include "tree-cfg.h"
77 #include "tree-ssa-loop-niter.h"
78 #include "tree-ssa-loop.h"
79 #include "symbol-summary.h"
80 #include "ipa-prop.h"
81 #include "ipa-inline.h"
82 #include "cfgloop.h"
83 #include "tree-scalar-evolution.h"
84 #include "ipa-utils.h"
85 #include "cilk.h"
86 #include "cfgexpand.h"
87 #include "gimplify.h"
88
89 /* Summaries. */
90 function_summary <inline_summary *> *inline_summaries;
91 call_summary <ipa_call_summary *> *ipa_call_summaries;
92
93 /* Cached node/edge growths. */
94 vec<edge_growth_cache_entry> edge_growth_cache;
95
96 /* Edge predicates goes here. */
97 static object_allocator<predicate> edge_predicate_pool ("edge predicates");
98
99
100 /* Dump inline hints. */
101 void
102 dump_inline_hints (FILE *f, inline_hints hints)
103 {
104 if (!hints)
105 return;
106 fprintf (f, "inline hints:");
107 if (hints & INLINE_HINT_indirect_call)
108 {
109 hints &= ~INLINE_HINT_indirect_call;
110 fprintf (f, " indirect_call");
111 }
112 if (hints & INLINE_HINT_loop_iterations)
113 {
114 hints &= ~INLINE_HINT_loop_iterations;
115 fprintf (f, " loop_iterations");
116 }
117 if (hints & INLINE_HINT_loop_stride)
118 {
119 hints &= ~INLINE_HINT_loop_stride;
120 fprintf (f, " loop_stride");
121 }
122 if (hints & INLINE_HINT_same_scc)
123 {
124 hints &= ~INLINE_HINT_same_scc;
125 fprintf (f, " same_scc");
126 }
127 if (hints & INLINE_HINT_in_scc)
128 {
129 hints &= ~INLINE_HINT_in_scc;
130 fprintf (f, " in_scc");
131 }
132 if (hints & INLINE_HINT_cross_module)
133 {
134 hints &= ~INLINE_HINT_cross_module;
135 fprintf (f, " cross_module");
136 }
137 if (hints & INLINE_HINT_declared_inline)
138 {
139 hints &= ~INLINE_HINT_declared_inline;
140 fprintf (f, " declared_inline");
141 }
142 if (hints & INLINE_HINT_array_index)
143 {
144 hints &= ~INLINE_HINT_array_index;
145 fprintf (f, " array_index");
146 }
147 if (hints & INLINE_HINT_known_hot)
148 {
149 hints &= ~INLINE_HINT_known_hot;
150 fprintf (f, " known_hot");
151 }
152 gcc_assert (!hints);
153 }
154
155
156 /* Record SIZE and TIME to SUMMARY.
157 The accounted code will be executed when EXEC_PRED is true.
158 When NONCONST_PRED is false the code will evaulate to constant and
159 will get optimized out in specialized clones of the function. */
160
161 void
162 inline_summary::account_size_time (int size, sreal time,
163 const predicate &exec_pred,
164 const predicate &nonconst_pred_in)
165 {
166 size_time_entry *e;
167 bool found = false;
168 int i;
169 predicate nonconst_pred;
170
171 if (exec_pred == false)
172 return;
173
174 nonconst_pred = nonconst_pred_in & exec_pred;
175
176 if (nonconst_pred == false)
177 return;
178
179 /* We need to create initial empty unconitional clause, but otherwie
180 we don't need to account empty times and sizes. */
181 if (!size && time == 0 && size_time_table)
182 return;
183
184 gcc_assert (time >= 0);
185
186 for (i = 0; vec_safe_iterate (size_time_table, i, &e); i++)
187 if (e->exec_predicate == exec_pred
188 && e->nonconst_predicate == nonconst_pred)
189 {
190 found = true;
191 break;
192 }
193 if (i == 256)
194 {
195 i = 0;
196 found = true;
197 e = &(*size_time_table)[0];
198 if (dump_file && (dump_flags & TDF_DETAILS))
199 fprintf (dump_file,
200 "\t\tReached limit on number of entries, "
201 "ignoring the predicate.");
202 }
203 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
204 {
205 fprintf (dump_file,
206 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
207 ((double) size) / INLINE_SIZE_SCALE,
208 (time.to_double ()), found ? "" : "new ");
209 exec_pred.dump (dump_file, conds, 0);
210 if (exec_pred != nonconst_pred)
211 {
212 fprintf (dump_file, " nonconst:");
213 nonconst_pred.dump (dump_file, conds);
214 }
215 else
216 fprintf (dump_file, "\n");
217 }
218 if (!found)
219 {
220 struct size_time_entry new_entry;
221 new_entry.size = size;
222 new_entry.time = time;
223 new_entry.exec_predicate = exec_pred;
224 new_entry.nonconst_predicate = nonconst_pred;
225 vec_safe_push (size_time_table, new_entry);
226 }
227 else
228 {
229 e->size += size;
230 e->time += time;
231 }
232 }
233
234 /* We proved E to be unreachable, redirect it to __bultin_unreachable. */
235
236 static struct cgraph_edge *
237 redirect_to_unreachable (struct cgraph_edge *e)
238 {
239 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
240 struct cgraph_node *target = cgraph_node::get_create
241 (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
242
243 if (e->speculative)
244 e = e->resolve_speculation (target->decl);
245 else if (!e->callee)
246 e->make_direct (target);
247 else
248 e->redirect_callee (target);
249 struct ipa_call_summary *es = ipa_call_summaries->get (e);
250 e->inline_failed = CIF_UNREACHABLE;
251 e->frequency = 0;
252 e->count = 0;
253 es->call_stmt_size = 0;
254 es->call_stmt_time = 0;
255 if (callee)
256 callee->remove_symbol_and_inline_clones ();
257 return e;
258 }
259
260 /* Set predicate for edge E. */
261
262 static void
263 edge_set_predicate (struct cgraph_edge *e, predicate *predicate)
264 {
265 /* If the edge is determined to be never executed, redirect it
266 to BUILTIN_UNREACHABLE to save inliner from inlining into it. */
267 if (predicate && *predicate == false
268 /* When handling speculative edges, we need to do the redirection
269 just once. Do it always on the direct edge, so we do not
270 attempt to resolve speculation while duplicating the edge. */
271 && (!e->speculative || e->callee))
272 e = redirect_to_unreachable (e);
273
274 struct ipa_call_summary *es = ipa_call_summaries->get (e);
275 if (predicate && *predicate != true)
276 {
277 if (!es->predicate)
278 es->predicate = edge_predicate_pool.allocate ();
279 *es->predicate = *predicate;
280 }
281 else
282 {
283 if (es->predicate)
284 edge_predicate_pool.remove (es->predicate);
285 es->predicate = NULL;
286 }
287 }
288
289 /* Set predicate for hint *P. */
290
291 static void
292 set_hint_predicate (predicate **p, predicate new_predicate)
293 {
294 if (new_predicate == false || new_predicate == true)
295 {
296 if (*p)
297 edge_predicate_pool.remove (*p);
298 *p = NULL;
299 }
300 else
301 {
302 if (!*p)
303 *p = edge_predicate_pool.allocate ();
304 **p = new_predicate;
305 }
306 }
307
308
309 /* Compute what conditions may or may not hold given invormation about
310 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
311 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
312 copy when called in a given context. It is a bitmask of conditions. Bit
313 0 means that condition is known to be false, while bit 1 means that condition
314 may or may not be true. These differs - for example NOT_INLINED condition
315 is always false in the second and also builtin_constant_p tests can not use
316 the fact that parameter is indeed a constant.
317
318 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
319 KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
320 Return clause of possible truths. When INLINE_P is true, assume that we are
321 inlining.
322
323 ERROR_MARK means compile time invariant. */
324
325 static void
326 evaluate_conditions_for_known_args (struct cgraph_node *node,
327 bool inline_p,
328 vec<tree> known_vals,
329 vec<ipa_agg_jump_function_p>
330 known_aggs,
331 clause_t *ret_clause,
332 clause_t *ret_nonspec_clause)
333 {
334 clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition;
335 clause_t nonspec_clause = 1 << predicate::not_inlined_condition;
336 struct inline_summary *info = inline_summaries->get (node);
337 int i;
338 struct condition *c;
339
340 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
341 {
342 tree val;
343 tree res;
344
345 /* We allow call stmt to have fewer arguments than the callee function
346 (especially for K&R style programs). So bound check here (we assume
347 known_aggs vector, if non-NULL, has the same length as
348 known_vals). */
349 gcc_checking_assert (!known_aggs.exists ()
350 || (known_vals.length () == known_aggs.length ()));
351 if (c->operand_num >= (int) known_vals.length ())
352 {
353 clause |= 1 << (i + predicate::first_dynamic_condition);
354 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
355 continue;
356 }
357
358 if (c->agg_contents)
359 {
360 struct ipa_agg_jump_function *agg;
361
362 if (c->code == predicate::changed
363 && !c->by_ref
364 && (known_vals[c->operand_num] == error_mark_node))
365 continue;
366
367 if (known_aggs.exists ())
368 {
369 agg = known_aggs[c->operand_num];
370 val = ipa_find_agg_cst_for_param (agg, known_vals[c->operand_num],
371 c->offset, c->by_ref);
372 }
373 else
374 val = NULL_TREE;
375 }
376 else
377 {
378 val = known_vals[c->operand_num];
379 if (val == error_mark_node && c->code != predicate::changed)
380 val = NULL_TREE;
381 }
382
383 if (!val)
384 {
385 clause |= 1 << (i + predicate::first_dynamic_condition);
386 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
387 continue;
388 }
389 if (c->code == predicate::changed)
390 {
391 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
392 continue;
393 }
394
395 if (tree_to_shwi (TYPE_SIZE (TREE_TYPE (val))) != c->size)
396 {
397 clause |= 1 << (i + predicate::first_dynamic_condition);
398 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
399 continue;
400 }
401 if (c->code == predicate::is_not_constant)
402 {
403 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
404 continue;
405 }
406
407 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (c->val), val);
408 res = val
409 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
410 : NULL;
411
412 if (res && integer_zerop (res))
413 continue;
414
415 clause |= 1 << (i + predicate::first_dynamic_condition);
416 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
417 }
418 *ret_clause = clause;
419 if (ret_nonspec_clause)
420 *ret_nonspec_clause = nonspec_clause;
421 }
422
423
424 /* Work out what conditions might be true at invocation of E. */
425
426 static void
427 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
428 clause_t *clause_ptr, clause_t *nonspec_clause_ptr,
429 vec<tree> *known_vals_ptr,
430 vec<ipa_polymorphic_call_context>
431 *known_contexts_ptr,
432 vec<ipa_agg_jump_function_p> *known_aggs_ptr)
433 {
434 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
435 struct inline_summary *info = inline_summaries->get (callee);
436 vec<tree> known_vals = vNULL;
437 vec<ipa_agg_jump_function_p> known_aggs = vNULL;
438
439 if (clause_ptr)
440 *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition;
441 if (known_vals_ptr)
442 known_vals_ptr->create (0);
443 if (known_contexts_ptr)
444 known_contexts_ptr->create (0);
445
446 if (ipa_node_params_sum
447 && !e->call_stmt_cannot_inline_p
448 && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr))
449 {
450 struct ipa_node_params *parms_info;
451 struct ipa_edge_args *args = IPA_EDGE_REF (e);
452 struct ipa_call_summary *es = ipa_call_summaries->get (e);
453 int i, count = ipa_get_cs_argument_count (args);
454
455 if (e->caller->global.inlined_to)
456 parms_info = IPA_NODE_REF (e->caller->global.inlined_to);
457 else
458 parms_info = IPA_NODE_REF (e->caller);
459
460 if (count && (info->conds || known_vals_ptr))
461 known_vals.safe_grow_cleared (count);
462 if (count && (info->conds || known_aggs_ptr))
463 known_aggs.safe_grow_cleared (count);
464 if (count && known_contexts_ptr)
465 known_contexts_ptr->safe_grow_cleared (count);
466
467 for (i = 0; i < count; i++)
468 {
469 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
470 tree cst = ipa_value_from_jfunc (parms_info, jf);
471
472 if (!cst && e->call_stmt
473 && i < (int)gimple_call_num_args (e->call_stmt))
474 {
475 cst = gimple_call_arg (e->call_stmt, i);
476 if (!is_gimple_min_invariant (cst))
477 cst = NULL;
478 }
479 if (cst)
480 {
481 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
482 if (known_vals.exists ())
483 known_vals[i] = cst;
484 }
485 else if (inline_p && !es->param[i].change_prob)
486 known_vals[i] = error_mark_node;
487
488 if (known_contexts_ptr)
489 (*known_contexts_ptr)[i] = ipa_context_from_jfunc (parms_info, e,
490 i, jf);
491 /* TODO: When IPA-CP starts propagating and merging aggregate jump
492 functions, use its knowledge of the caller too, just like the
493 scalar case above. */
494 known_aggs[i] = &jf->agg;
495 }
496 }
497 else if (e->call_stmt && !e->call_stmt_cannot_inline_p
498 && ((clause_ptr && info->conds) || known_vals_ptr))
499 {
500 int i, count = (int)gimple_call_num_args (e->call_stmt);
501
502 if (count && (info->conds || known_vals_ptr))
503 known_vals.safe_grow_cleared (count);
504 for (i = 0; i < count; i++)
505 {
506 tree cst = gimple_call_arg (e->call_stmt, i);
507 if (!is_gimple_min_invariant (cst))
508 cst = NULL;
509 if (cst)
510 known_vals[i] = cst;
511 }
512 }
513
514 evaluate_conditions_for_known_args (callee, inline_p,
515 known_vals, known_aggs, clause_ptr,
516 nonspec_clause_ptr);
517
518 if (known_vals_ptr)
519 *known_vals_ptr = known_vals;
520 else
521 known_vals.release ();
522
523 if (known_aggs_ptr)
524 *known_aggs_ptr = known_aggs;
525 else
526 known_aggs.release ();
527 }
528
529
530 /* Allocate the inline summary vector or resize it to cover all cgraph nodes. */
531
532 static void
533 inline_summary_alloc (void)
534 {
535 if (!inline_summaries)
536 inline_summaries = inline_summary_t::create_ggc (symtab);
537 if (!ipa_call_summaries)
538 ipa_call_summaries = new ipa_call_summary_t (symtab, false);
539 }
540
541 /* We are called multiple time for given function; clear
542 data from previous run so they are not cumulated. */
543
544 void
545 ipa_call_summary::reset ()
546 {
547 call_stmt_size = call_stmt_time = 0;
548 if (predicate)
549 edge_predicate_pool.remove (predicate);
550 predicate = NULL;
551 param.release ();
552 }
553
554 /* We are called multiple time for given function; clear
555 data from previous run so they are not cumulated. */
556
557 void
558 inline_summary::reset (struct cgraph_node *node)
559 {
560 struct cgraph_edge *e;
561
562 self_size = 0;
563 estimated_stack_size = 0;
564 estimated_self_stack_size = 0;
565 stack_frame_offset = 0;
566 size = 0;
567 time = 0;
568 growth = 0;
569 scc_no = 0;
570 if (loop_iterations)
571 {
572 edge_predicate_pool.remove (loop_iterations);
573 loop_iterations = NULL;
574 }
575 if (loop_stride)
576 {
577 edge_predicate_pool.remove (loop_stride);
578 loop_stride = NULL;
579 }
580 if (array_index)
581 {
582 edge_predicate_pool.remove (array_index);
583 array_index = NULL;
584 }
585 vec_free (conds);
586 vec_free (size_time_table);
587 for (e = node->callees; e; e = e->next_callee)
588 ipa_call_summaries->get (e)->reset ();
589 for (e = node->indirect_calls; e; e = e->next_callee)
590 ipa_call_summaries->get (e)->reset ();
591 fp_expressions = false;
592 }
593
594 /* Hook that is called by cgraph.c when a node is removed. */
595
596 void
597 inline_summary_t::remove (cgraph_node *node, inline_summary *info)
598 {
599 info->reset (node);
600 }
601
602 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
603 Additionally care about allocating new memory slot for updated predicate
604 and set it to NULL when it becomes true or false (and thus uninteresting).
605 */
606
607 static void
608 remap_hint_predicate_after_duplication (predicate **p,
609 clause_t possible_truths)
610 {
611 predicate new_predicate;
612
613 if (!*p)
614 return;
615
616 new_predicate = (*p)->remap_after_duplication (possible_truths);
617 /* We do not want to free previous predicate; it is used by node origin. */
618 *p = NULL;
619 set_hint_predicate (p, new_predicate);
620 }
621
622
623 /* Hook that is called by cgraph.c when a node is duplicated. */
624 void
625 inline_summary_t::duplicate (cgraph_node *src,
626 cgraph_node *dst,
627 inline_summary *,
628 inline_summary *info)
629 {
630 inline_summary_alloc ();
631 memcpy (info, inline_summaries->get (src), sizeof (inline_summary));
632 /* TODO: as an optimization, we may avoid copying conditions
633 that are known to be false or true. */
634 info->conds = vec_safe_copy (info->conds);
635
636 /* When there are any replacements in the function body, see if we can figure
637 out that something was optimized out. */
638 if (ipa_node_params_sum && dst->clone.tree_map)
639 {
640 vec<size_time_entry, va_gc> *entry = info->size_time_table;
641 /* Use SRC parm info since it may not be copied yet. */
642 struct ipa_node_params *parms_info = IPA_NODE_REF (src);
643 vec<tree> known_vals = vNULL;
644 int count = ipa_get_param_count (parms_info);
645 int i, j;
646 clause_t possible_truths;
647 predicate true_pred = true;
648 size_time_entry *e;
649 int optimized_out_size = 0;
650 bool inlined_to_p = false;
651 struct cgraph_edge *edge, *next;
652
653 info->size_time_table = 0;
654 known_vals.safe_grow_cleared (count);
655 for (i = 0; i < count; i++)
656 {
657 struct ipa_replace_map *r;
658
659 for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++)
660 {
661 if (((!r->old_tree && r->parm_num == i)
662 || (r->old_tree && r->old_tree == ipa_get_param (parms_info, i)))
663 && r->replace_p && !r->ref_p)
664 {
665 known_vals[i] = r->new_tree;
666 break;
667 }
668 }
669 }
670 evaluate_conditions_for_known_args (dst, false,
671 known_vals,
672 vNULL,
673 &possible_truths,
674 /* We are going to specialize,
675 so ignore nonspec truths. */
676 NULL);
677 known_vals.release ();
678
679 info->account_size_time (0, 0, true_pred, true_pred);
680
681 /* Remap size_time vectors.
682 Simplify the predicate by prunning out alternatives that are known
683 to be false.
684 TODO: as on optimization, we can also eliminate conditions known
685 to be true. */
686 for (i = 0; vec_safe_iterate (entry, i, &e); i++)
687 {
688 predicate new_exec_pred;
689 predicate new_nonconst_pred;
690 new_exec_pred = e->exec_predicate.remap_after_duplication
691 (possible_truths);
692 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
693 (possible_truths);
694 if (new_exec_pred == false || new_nonconst_pred == false)
695 optimized_out_size += e->size;
696 else
697 info->account_size_time (e->size, e->time, new_exec_pred,
698 new_nonconst_pred);
699 }
700
701 /* Remap edge predicates with the same simplification as above.
702 Also copy constantness arrays. */
703 for (edge = dst->callees; edge; edge = next)
704 {
705 predicate new_predicate;
706 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
707 next = edge->next_callee;
708
709 if (!edge->inline_failed)
710 inlined_to_p = true;
711 if (!es->predicate)
712 continue;
713 new_predicate = es->predicate->remap_after_duplication
714 (possible_truths);
715 if (new_predicate == false && *es->predicate != false)
716 optimized_out_size += es->call_stmt_size * INLINE_SIZE_SCALE;
717 edge_set_predicate (edge, &new_predicate);
718 }
719
720 /* Remap indirect edge predicates with the same simplificaiton as above.
721 Also copy constantness arrays. */
722 for (edge = dst->indirect_calls; edge; edge = next)
723 {
724 predicate new_predicate;
725 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
726 next = edge->next_callee;
727
728 gcc_checking_assert (edge->inline_failed);
729 if (!es->predicate)
730 continue;
731 new_predicate = es->predicate->remap_after_duplication
732 (possible_truths);
733 if (new_predicate == false && *es->predicate != false)
734 optimized_out_size += es->call_stmt_size * INLINE_SIZE_SCALE;
735 edge_set_predicate (edge, &new_predicate);
736 }
737 remap_hint_predicate_after_duplication (&info->loop_iterations,
738 possible_truths);
739 remap_hint_predicate_after_duplication (&info->loop_stride,
740 possible_truths);
741 remap_hint_predicate_after_duplication (&info->array_index,
742 possible_truths);
743
744 /* If inliner or someone after inliner will ever start producing
745 non-trivial clones, we will get trouble with lack of information
746 about updating self sizes, because size vectors already contains
747 sizes of the calees. */
748 gcc_assert (!inlined_to_p || !optimized_out_size);
749 }
750 else
751 {
752 info->size_time_table = vec_safe_copy (info->size_time_table);
753 if (info->loop_iterations)
754 {
755 predicate p = *info->loop_iterations;
756 info->loop_iterations = NULL;
757 set_hint_predicate (&info->loop_iterations, p);
758 }
759 if (info->loop_stride)
760 {
761 predicate p = *info->loop_stride;
762 info->loop_stride = NULL;
763 set_hint_predicate (&info->loop_stride, p);
764 }
765 if (info->array_index)
766 {
767 predicate p = *info->array_index;
768 info->array_index = NULL;
769 set_hint_predicate (&info->array_index, p);
770 }
771 }
772 if (!dst->global.inlined_to)
773 inline_update_overall_summary (dst);
774 }
775
776
777 /* Hook that is called by cgraph.c when a node is duplicated. */
778
779 void
780 ipa_call_summary_t::duplicate (struct cgraph_edge *src,
781 struct cgraph_edge *dst,
782 struct ipa_call_summary *srcinfo,
783 struct ipa_call_summary *info)
784 {
785 *info = *srcinfo;
786 info->predicate = NULL;
787 edge_set_predicate (dst, srcinfo->predicate);
788 info->param = srcinfo->param.copy ();
789 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
790 {
791 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
792 - eni_size_weights.call_cost);
793 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
794 - eni_time_weights.call_cost);
795 }
796 }
797
798
799 /* Keep edge cache consistent across edge removal. */
800
801 void
802 ipa_call_summary_t::remove (struct cgraph_edge *edge,
803 struct ipa_call_summary *sum)
804 {
805 if (edge_growth_cache.exists ())
806 reset_edge_growth_cache (edge);
807 sum->reset ();
808 }
809
810
811 /* Initialize growth caches. */
812
813 void
814 initialize_growth_caches (void)
815 {
816 if (symtab->edges_max_uid)
817 edge_growth_cache.safe_grow_cleared (symtab->edges_max_uid);
818 }
819
820
821 /* Free growth caches. */
822
823 void
824 free_growth_caches (void)
825 {
826 edge_growth_cache.release ();
827 }
828
829
830 /* Dump edge summaries associated to NODE and recursively to all clones.
831 Indent by INDENT. */
832
833 static void
834 dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
835 struct inline_summary *info)
836 {
837 struct cgraph_edge *edge;
838 for (edge = node->callees; edge; edge = edge->next_callee)
839 {
840 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
841 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
842 int i;
843
844 fprintf (f,
845 "%*s%s/%i %s\n%*s loop depth:%2i freq:%4i size:%2i"
846 " time: %2i callee size:%2i stack:%2i",
847 indent, "", callee->name (), callee->order,
848 !edge->inline_failed
849 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
850 indent, "", es->loop_depth, edge->frequency,
851 es->call_stmt_size, es->call_stmt_time,
852 (int) inline_summaries->get (callee)->size / INLINE_SIZE_SCALE,
853 (int) inline_summaries->get (callee)->estimated_stack_size);
854
855 if (es->predicate)
856 {
857 fprintf (f, " predicate: ");
858 es->predicate->dump (f, info->conds);
859 }
860 else
861 fprintf (f, "\n");
862 if (es->param.exists ())
863 for (i = 0; i < (int) es->param.length (); i++)
864 {
865 int prob = es->param[i].change_prob;
866
867 if (!prob)
868 fprintf (f, "%*s op%i is compile time invariant\n",
869 indent + 2, "", i);
870 else if (prob != REG_BR_PROB_BASE)
871 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
872 prob * 100.0 / REG_BR_PROB_BASE);
873 }
874 if (!edge->inline_failed)
875 {
876 fprintf (f, "%*sStack frame offset %i, callee self size %i,"
877 " callee size %i\n",
878 indent + 2, "",
879 (int) inline_summaries->get (callee)->stack_frame_offset,
880 (int) inline_summaries->get (callee)->estimated_self_stack_size,
881 (int) inline_summaries->get (callee)->estimated_stack_size);
882 dump_ipa_call_summary (f, indent + 2, callee, info);
883 }
884 }
885 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
886 {
887 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
888 fprintf (f, "%*sindirect call loop depth:%2i freq:%4i size:%2i"
889 " time: %2i",
890 indent, "",
891 es->loop_depth,
892 edge->frequency, es->call_stmt_size, es->call_stmt_time);
893 if (es->predicate)
894 {
895 fprintf (f, "predicate: ");
896 es->predicate->dump (f, info->conds);
897 }
898 else
899 fprintf (f, "\n");
900 }
901 }
902
903
904 void
905 dump_inline_summary (FILE *f, struct cgraph_node *node)
906 {
907 if (node->definition)
908 {
909 struct inline_summary *s = inline_summaries->get (node);
910 size_time_entry *e;
911 int i;
912 fprintf (f, "Inline summary for %s/%i", node->name (),
913 node->order);
914 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
915 fprintf (f, " always_inline");
916 if (s->inlinable)
917 fprintf (f, " inlinable");
918 if (s->contains_cilk_spawn)
919 fprintf (f, " contains_cilk_spawn");
920 if (s->fp_expressions)
921 fprintf (f, " fp_expression");
922 fprintf (f, "\n global time: %f\n", s->time.to_double ());
923 fprintf (f, " self size: %i\n", s->self_size);
924 fprintf (f, " global size: %i\n", s->size);
925 fprintf (f, " min size: %i\n", s->min_size);
926 fprintf (f, " self stack: %i\n",
927 (int) s->estimated_self_stack_size);
928 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
929 if (s->growth)
930 fprintf (f, " estimated growth:%i\n", (int) s->growth);
931 if (s->scc_no)
932 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
933 for (i = 0; vec_safe_iterate (s->size_time_table, i, &e); i++)
934 {
935 fprintf (f, " size:%f, time:%f",
936 (double) e->size / INLINE_SIZE_SCALE,
937 e->time.to_double ());
938 if (e->exec_predicate != true)
939 {
940 fprintf (f, ", executed if:");
941 e->exec_predicate.dump (f, s->conds, 0);
942 }
943 if (e->exec_predicate != e->nonconst_predicate)
944 {
945 fprintf (f, ", nonconst if:");
946 e->nonconst_predicate.dump (f, s->conds, 0);
947 }
948 fprintf (f, "\n");
949 }
950 if (s->loop_iterations)
951 {
952 fprintf (f, " loop iterations:");
953 s->loop_iterations->dump (f, s->conds);
954 }
955 if (s->loop_stride)
956 {
957 fprintf (f, " loop stride:");
958 s->loop_stride->dump (f, s->conds);
959 }
960 if (s->array_index)
961 {
962 fprintf (f, " array index:");
963 s->array_index->dump (f, s->conds);
964 }
965 fprintf (f, " calls:\n");
966 dump_ipa_call_summary (f, 4, node, s);
967 fprintf (f, "\n");
968 }
969 }
970
971 DEBUG_FUNCTION void
972 debug_inline_summary (struct cgraph_node *node)
973 {
974 dump_inline_summary (stderr, node);
975 }
976
977 void
978 dump_inline_summaries (FILE *f)
979 {
980 struct cgraph_node *node;
981
982 FOR_EACH_DEFINED_FUNCTION (node)
983 if (!node->global.inlined_to)
984 dump_inline_summary (f, node);
985 }
986
987 /* Give initial reasons why inlining would fail on EDGE. This gets either
988 nullified or usually overwritten by more precise reasons later. */
989
990 void
991 initialize_inline_failed (struct cgraph_edge *e)
992 {
993 struct cgraph_node *callee = e->callee;
994
995 if (e->inline_failed && e->inline_failed != CIF_BODY_NOT_AVAILABLE
996 && cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
997 ;
998 else if (e->indirect_unknown_callee)
999 e->inline_failed = CIF_INDIRECT_UNKNOWN_CALL;
1000 else if (!callee->definition)
1001 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
1002 else if (callee->local.redefined_extern_inline)
1003 e->inline_failed = CIF_REDEFINED_EXTERN_INLINE;
1004 else
1005 e->inline_failed = CIF_FUNCTION_NOT_CONSIDERED;
1006 gcc_checking_assert (!e->call_stmt_cannot_inline_p
1007 || cgraph_inline_failed_type (e->inline_failed)
1008 == CIF_FINAL_ERROR);
1009 }
1010
1011 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
1012 boolean variable pointed to by DATA. */
1013
1014 static bool
1015 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
1016 void *data)
1017 {
1018 bool *b = (bool *) data;
1019 *b = true;
1020 return true;
1021 }
1022
1023 /* If OP refers to value of function parameter, return the corresponding
1024 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
1025 PARM_DECL) will be stored to *SIZE_P in that case too. */
1026
1027 static tree
1028 unmodified_parm_1 (gimple *stmt, tree op, HOST_WIDE_INT *size_p)
1029 {
1030 /* SSA_NAME referring to parm default def? */
1031 if (TREE_CODE (op) == SSA_NAME
1032 && SSA_NAME_IS_DEFAULT_DEF (op)
1033 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
1034 {
1035 if (size_p)
1036 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
1037 return SSA_NAME_VAR (op);
1038 }
1039 /* Non-SSA parm reference? */
1040 if (TREE_CODE (op) == PARM_DECL)
1041 {
1042 bool modified = false;
1043
1044 ao_ref refd;
1045 ao_ref_init (&refd, op);
1046 walk_aliased_vdefs (&refd, gimple_vuse (stmt), mark_modified, &modified,
1047 NULL);
1048 if (!modified)
1049 {
1050 if (size_p)
1051 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
1052 return op;
1053 }
1054 }
1055 return NULL_TREE;
1056 }
1057
1058 /* If OP refers to value of function parameter, return the corresponding
1059 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1060 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1061 stored to *SIZE_P in that case too. */
1062
1063 static tree
1064 unmodified_parm (gimple *stmt, tree op, HOST_WIDE_INT *size_p)
1065 {
1066 tree res = unmodified_parm_1 (stmt, op, size_p);
1067 if (res)
1068 return res;
1069
1070 if (TREE_CODE (op) == SSA_NAME
1071 && !SSA_NAME_IS_DEFAULT_DEF (op)
1072 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1073 return unmodified_parm (SSA_NAME_DEF_STMT (op),
1074 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
1075 size_p);
1076 return NULL_TREE;
1077 }
1078
1079 /* If OP refers to a value of a function parameter or value loaded from an
1080 aggregate passed to a parameter (either by value or reference), return TRUE
1081 and store the number of the parameter to *INDEX_P, the access size into
1082 *SIZE_P, and information whether and how it has been loaded from an
1083 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1084 statement in which OP is used or loaded. */
1085
1086 static bool
1087 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1088 gimple *stmt, tree op, int *index_p,
1089 HOST_WIDE_INT *size_p,
1090 struct agg_position_info *aggpos)
1091 {
1092 tree res = unmodified_parm_1 (stmt, op, size_p);
1093
1094 gcc_checking_assert (aggpos);
1095 if (res)
1096 {
1097 *index_p = ipa_get_param_decl_index (fbi->info, res);
1098 if (*index_p < 0)
1099 return false;
1100 aggpos->agg_contents = false;
1101 aggpos->by_ref = false;
1102 return true;
1103 }
1104
1105 if (TREE_CODE (op) == SSA_NAME)
1106 {
1107 if (SSA_NAME_IS_DEFAULT_DEF (op)
1108 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1109 return false;
1110 stmt = SSA_NAME_DEF_STMT (op);
1111 op = gimple_assign_rhs1 (stmt);
1112 if (!REFERENCE_CLASS_P (op))
1113 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1114 aggpos);
1115 }
1116
1117 aggpos->agg_contents = true;
1118 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1119 stmt, op, index_p, &aggpos->offset,
1120 size_p, &aggpos->by_ref);
1121 }
1122
1123 /* See if statement might disappear after inlining.
1124 0 - means not eliminated
1125 1 - half of statements goes away
1126 2 - for sure it is eliminated.
1127 We are not terribly sophisticated, basically looking for simple abstraction
1128 penalty wrappers. */
1129
1130 static int
1131 eliminated_by_inlining_prob (gimple *stmt)
1132 {
1133 enum gimple_code code = gimple_code (stmt);
1134 enum tree_code rhs_code;
1135
1136 if (!optimize)
1137 return 0;
1138
1139 switch (code)
1140 {
1141 case GIMPLE_RETURN:
1142 return 2;
1143 case GIMPLE_ASSIGN:
1144 if (gimple_num_ops (stmt) != 2)
1145 return 0;
1146
1147 rhs_code = gimple_assign_rhs_code (stmt);
1148
1149 /* Casts of parameters, loads from parameters passed by reference
1150 and stores to return value or parameters are often free after
1151 inlining dua to SRA and further combining.
1152 Assume that half of statements goes away. */
1153 if (CONVERT_EXPR_CODE_P (rhs_code)
1154 || rhs_code == VIEW_CONVERT_EXPR
1155 || rhs_code == ADDR_EXPR
1156 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1157 {
1158 tree rhs = gimple_assign_rhs1 (stmt);
1159 tree lhs = gimple_assign_lhs (stmt);
1160 tree inner_rhs = get_base_address (rhs);
1161 tree inner_lhs = get_base_address (lhs);
1162 bool rhs_free = false;
1163 bool lhs_free = false;
1164
1165 if (!inner_rhs)
1166 inner_rhs = rhs;
1167 if (!inner_lhs)
1168 inner_lhs = lhs;
1169
1170 /* Reads of parameter are expected to be free. */
1171 if (unmodified_parm (stmt, inner_rhs, NULL))
1172 rhs_free = true;
1173 /* Match expressions of form &this->field. Those will most likely
1174 combine with something upstream after inlining. */
1175 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1176 {
1177 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1178 if (TREE_CODE (op) == PARM_DECL)
1179 rhs_free = true;
1180 else if (TREE_CODE (op) == MEM_REF
1181 && unmodified_parm (stmt, TREE_OPERAND (op, 0), NULL))
1182 rhs_free = true;
1183 }
1184
1185 /* When parameter is not SSA register because its address is taken
1186 and it is just copied into one, the statement will be completely
1187 free after inlining (we will copy propagate backward). */
1188 if (rhs_free && is_gimple_reg (lhs))
1189 return 2;
1190
1191 /* Reads of parameters passed by reference
1192 expected to be free (i.e. optimized out after inlining). */
1193 if (TREE_CODE (inner_rhs) == MEM_REF
1194 && unmodified_parm (stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1195 rhs_free = true;
1196
1197 /* Copying parameter passed by reference into gimple register is
1198 probably also going to copy propagate, but we can't be quite
1199 sure. */
1200 if (rhs_free && is_gimple_reg (lhs))
1201 lhs_free = true;
1202
1203 /* Writes to parameters, parameters passed by value and return value
1204 (either dirrectly or passed via invisible reference) are free.
1205
1206 TODO: We ought to handle testcase like
1207 struct a {int a,b;};
1208 struct a
1209 retrurnsturct (void)
1210 {
1211 struct a a ={1,2};
1212 return a;
1213 }
1214
1215 This translate into:
1216
1217 retrurnsturct ()
1218 {
1219 int a$b;
1220 int a$a;
1221 struct a a;
1222 struct a D.2739;
1223
1224 <bb 2>:
1225 D.2739.a = 1;
1226 D.2739.b = 2;
1227 return D.2739;
1228
1229 }
1230 For that we either need to copy ipa-split logic detecting writes
1231 to return value. */
1232 if (TREE_CODE (inner_lhs) == PARM_DECL
1233 || TREE_CODE (inner_lhs) == RESULT_DECL
1234 || (TREE_CODE (inner_lhs) == MEM_REF
1235 && (unmodified_parm (stmt, TREE_OPERAND (inner_lhs, 0), NULL)
1236 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1237 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1238 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1239 (inner_lhs,
1240 0))) == RESULT_DECL))))
1241 lhs_free = true;
1242 if (lhs_free
1243 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1244 rhs_free = true;
1245 if (lhs_free && rhs_free)
1246 return 1;
1247 }
1248 return 0;
1249 default:
1250 return 0;
1251 }
1252 }
1253
1254
1255 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1256 predicates to the CFG edges. */
1257
1258 static void
1259 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1260 struct inline_summary *summary,
1261 basic_block bb)
1262 {
1263 gimple *last;
1264 tree op;
1265 int index;
1266 HOST_WIDE_INT size;
1267 struct agg_position_info aggpos;
1268 enum tree_code code, inverted_code;
1269 edge e;
1270 edge_iterator ei;
1271 gimple *set_stmt;
1272 tree op2;
1273
1274 last = last_stmt (bb);
1275 if (!last || gimple_code (last) != GIMPLE_COND)
1276 return;
1277 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1278 return;
1279 op = gimple_cond_lhs (last);
1280 /* TODO: handle conditionals like
1281 var = op0 < 4;
1282 if (var != 0). */
1283 if (unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
1284 {
1285 code = gimple_cond_code (last);
1286 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1287
1288 FOR_EACH_EDGE (e, ei, bb->succs)
1289 {
1290 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1291 ? code : inverted_code);
1292 /* invert_tree_comparison will return ERROR_MARK on FP
1293 comparsions that are not EQ/NE instead of returning proper
1294 unordered one. Be sure it is not confused with NON_CONSTANT. */
1295 if (this_code != ERROR_MARK)
1296 {
1297 predicate p
1298 = add_condition (summary, index, size, &aggpos, this_code,
1299 unshare_expr_without_location
1300 (gimple_cond_rhs (last)));
1301 e->aux = edge_predicate_pool.allocate ();
1302 *(predicate *) e->aux = p;
1303 }
1304 }
1305 }
1306
1307 if (TREE_CODE (op) != SSA_NAME)
1308 return;
1309 /* Special case
1310 if (builtin_constant_p (op))
1311 constant_code
1312 else
1313 nonconstant_code.
1314 Here we can predicate nonconstant_code. We can't
1315 really handle constant_code since we have no predicate
1316 for this and also the constant code is not known to be
1317 optimized away when inliner doen't see operand is constant.
1318 Other optimizers might think otherwise. */
1319 if (gimple_cond_code (last) != NE_EXPR
1320 || !integer_zerop (gimple_cond_rhs (last)))
1321 return;
1322 set_stmt = SSA_NAME_DEF_STMT (op);
1323 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1324 || gimple_call_num_args (set_stmt) != 1)
1325 return;
1326 op2 = gimple_call_arg (set_stmt, 0);
1327 if (!unmodified_parm_or_parm_agg_item (fbi, set_stmt, op2, &index, &size,
1328 &aggpos))
1329 return;
1330 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1331 {
1332 predicate p = add_condition (summary, index, size, &aggpos,
1333 predicate::is_not_constant, NULL_TREE);
1334 e->aux = edge_predicate_pool.allocate ();
1335 *(predicate *) e->aux = p;
1336 }
1337 }
1338
1339
1340 /* If BB ends by a switch we can turn into predicates, attach corresponding
1341 predicates to the CFG edges. */
1342
1343 static void
1344 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1345 struct inline_summary *summary,
1346 basic_block bb)
1347 {
1348 gimple *lastg;
1349 tree op;
1350 int index;
1351 HOST_WIDE_INT size;
1352 struct agg_position_info aggpos;
1353 edge e;
1354 edge_iterator ei;
1355 size_t n;
1356 size_t case_idx;
1357
1358 lastg = last_stmt (bb);
1359 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
1360 return;
1361 gswitch *last = as_a <gswitch *> (lastg);
1362 op = gimple_switch_index (last);
1363 if (!unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
1364 return;
1365
1366 FOR_EACH_EDGE (e, ei, bb->succs)
1367 {
1368 e->aux = edge_predicate_pool.allocate ();
1369 *(predicate *) e->aux = false;
1370 }
1371 n = gimple_switch_num_labels (last);
1372 for (case_idx = 0; case_idx < n; ++case_idx)
1373 {
1374 tree cl = gimple_switch_label (last, case_idx);
1375 tree min, max;
1376 predicate p;
1377
1378 e = find_edge (bb, label_to_block (CASE_LABEL (cl)));
1379 min = CASE_LOW (cl);
1380 max = CASE_HIGH (cl);
1381
1382 /* For default we might want to construct predicate that none
1383 of cases is met, but it is bit hard to do not having negations
1384 of conditionals handy. */
1385 if (!min && !max)
1386 p = true;
1387 else if (!max)
1388 p = add_condition (summary, index, size, &aggpos, EQ_EXPR,
1389 unshare_expr_without_location (min));
1390 else
1391 {
1392 predicate p1, p2;
1393 p1 = add_condition (summary, index, size, &aggpos, GE_EXPR,
1394 unshare_expr_without_location (min));
1395 p2 = add_condition (summary, index, size, &aggpos, LE_EXPR,
1396 unshare_expr_without_location (max));
1397 p = p1 & p2;
1398 }
1399 *(struct predicate *) e->aux
1400 = p.or_with (summary->conds, *(struct predicate *) e->aux);
1401 }
1402 }
1403
1404
1405 /* For each BB in NODE attach to its AUX pointer predicate under
1406 which it is executable. */
1407
1408 static void
1409 compute_bb_predicates (struct ipa_func_body_info *fbi,
1410 struct cgraph_node *node,
1411 struct inline_summary *summary)
1412 {
1413 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1414 bool done = false;
1415 basic_block bb;
1416
1417 FOR_EACH_BB_FN (bb, my_function)
1418 {
1419 set_cond_stmt_execution_predicate (fbi, summary, bb);
1420 set_switch_stmt_execution_predicate (fbi, summary, bb);
1421 }
1422
1423 /* Entry block is always executable. */
1424 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1425 = edge_predicate_pool.allocate ();
1426 *(predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
1427
1428 /* A simple dataflow propagation of predicates forward in the CFG.
1429 TODO: work in reverse postorder. */
1430 while (!done)
1431 {
1432 done = true;
1433 FOR_EACH_BB_FN (bb, my_function)
1434 {
1435 predicate p = false;
1436 edge e;
1437 edge_iterator ei;
1438 FOR_EACH_EDGE (e, ei, bb->preds)
1439 {
1440 if (e->src->aux)
1441 {
1442 predicate this_bb_predicate
1443 = *(predicate *) e->src->aux;
1444 if (e->aux)
1445 this_bb_predicate &= (*(struct predicate *) e->aux);
1446 p = p.or_with (summary->conds, this_bb_predicate);
1447 if (p == true)
1448 break;
1449 }
1450 }
1451 if (p == false)
1452 gcc_checking_assert (!bb->aux);
1453 else
1454 {
1455 if (!bb->aux)
1456 {
1457 done = false;
1458 bb->aux = edge_predicate_pool.allocate ();
1459 *((predicate *) bb->aux) = p;
1460 }
1461 else if (p != *(predicate *) bb->aux)
1462 {
1463 /* This OR operation is needed to ensure monotonous data flow
1464 in the case we hit the limit on number of clauses and the
1465 and/or operations above give approximate answers. */
1466 p = p.or_with (summary->conds, *(predicate *)bb->aux);
1467 if (p != *(predicate *) bb->aux)
1468 {
1469 done = false;
1470 *((predicate *) bb->aux) = p;
1471 }
1472 }
1473 }
1474 }
1475 }
1476 }
1477
1478
1479 /* We keep info about constantness of SSA names. */
1480
1481 typedef predicate predicate_t;
1482 /* Return predicate specifying when the STMT might have result that is not
1483 a compile time constant. */
1484
1485 static predicate
1486 will_be_nonconstant_expr_predicate (struct ipa_node_params *info,
1487 struct inline_summary *summary,
1488 tree expr,
1489 vec<predicate_t> nonconstant_names)
1490 {
1491 tree parm;
1492 int index;
1493 HOST_WIDE_INT size;
1494
1495 while (UNARY_CLASS_P (expr))
1496 expr = TREE_OPERAND (expr, 0);
1497
1498 parm = unmodified_parm (NULL, expr, &size);
1499 if (parm && (index = ipa_get_param_decl_index (info, parm)) >= 0)
1500 return add_condition (summary, index, size, NULL, predicate::changed,
1501 NULL_TREE);
1502 if (is_gimple_min_invariant (expr))
1503 return false;
1504 if (TREE_CODE (expr) == SSA_NAME)
1505 return nonconstant_names[SSA_NAME_VERSION (expr)];
1506 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
1507 {
1508 predicate p1 = will_be_nonconstant_expr_predicate
1509 (info, summary, TREE_OPERAND (expr, 0),
1510 nonconstant_names);
1511 if (p1 == true)
1512 return p1;
1513
1514 predicate p2;
1515 p2 = will_be_nonconstant_expr_predicate (info, summary,
1516 TREE_OPERAND (expr, 1),
1517 nonconstant_names);
1518 return p1.or_with (summary->conds, p2);
1519 }
1520 else if (TREE_CODE (expr) == COND_EXPR)
1521 {
1522 predicate p1 = will_be_nonconstant_expr_predicate
1523 (info, summary, TREE_OPERAND (expr, 0),
1524 nonconstant_names);
1525 if (p1 == true)
1526 return p1;
1527
1528 predicate p2;
1529 p2 = will_be_nonconstant_expr_predicate (info, summary,
1530 TREE_OPERAND (expr, 1),
1531 nonconstant_names);
1532 if (p2 == true)
1533 return p2;
1534 p1 = p1.or_with (summary->conds, p2);
1535 p2 = will_be_nonconstant_expr_predicate (info, summary,
1536 TREE_OPERAND (expr, 2),
1537 nonconstant_names);
1538 return p2.or_with (summary->conds, p1);
1539 }
1540 else
1541 {
1542 debug_tree (expr);
1543 gcc_unreachable ();
1544 }
1545 return false;
1546 }
1547
1548
1549 /* Return predicate specifying when the STMT might have result that is not
1550 a compile time constant. */
1551
1552 static predicate
1553 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
1554 struct inline_summary *summary,
1555 gimple *stmt,
1556 vec<predicate_t> nonconstant_names)
1557 {
1558 predicate p = true;
1559 ssa_op_iter iter;
1560 tree use;
1561 predicate op_non_const;
1562 bool is_load;
1563 int base_index;
1564 HOST_WIDE_INT size;
1565 struct agg_position_info aggpos;
1566
1567 /* What statments might be optimized away
1568 when their arguments are constant. */
1569 if (gimple_code (stmt) != GIMPLE_ASSIGN
1570 && gimple_code (stmt) != GIMPLE_COND
1571 && gimple_code (stmt) != GIMPLE_SWITCH
1572 && (gimple_code (stmt) != GIMPLE_CALL
1573 || !(gimple_call_flags (stmt) & ECF_CONST)))
1574 return p;
1575
1576 /* Stores will stay anyway. */
1577 if (gimple_store_p (stmt))
1578 return p;
1579
1580 is_load = gimple_assign_load_p (stmt);
1581
1582 /* Loads can be optimized when the value is known. */
1583 if (is_load)
1584 {
1585 tree op;
1586 gcc_assert (gimple_assign_single_p (stmt));
1587 op = gimple_assign_rhs1 (stmt);
1588 if (!unmodified_parm_or_parm_agg_item (fbi, stmt, op, &base_index, &size,
1589 &aggpos))
1590 return p;
1591 }
1592 else
1593 base_index = -1;
1594
1595 /* See if we understand all operands before we start
1596 adding conditionals. */
1597 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1598 {
1599 tree parm = unmodified_parm (stmt, use, NULL);
1600 /* For arguments we can build a condition. */
1601 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
1602 continue;
1603 if (TREE_CODE (use) != SSA_NAME)
1604 return p;
1605 /* If we know when operand is constant,
1606 we still can say something useful. */
1607 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
1608 continue;
1609 return p;
1610 }
1611
1612 if (is_load)
1613 op_non_const =
1614 add_condition (summary, base_index, size, &aggpos, predicate::changed,
1615 NULL);
1616 else
1617 op_non_const = false;
1618 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1619 {
1620 HOST_WIDE_INT size;
1621 tree parm = unmodified_parm (stmt, use, &size);
1622 int index;
1623
1624 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1625 {
1626 if (index != base_index)
1627 p = add_condition (summary, index, size, NULL, predicate::changed,
1628 NULL_TREE);
1629 else
1630 continue;
1631 }
1632 else
1633 p = nonconstant_names[SSA_NAME_VERSION (use)];
1634 op_non_const = p.or_with (summary->conds, op_non_const);
1635 }
1636 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
1637 && gimple_op (stmt, 0)
1638 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1639 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
1640 = op_non_const;
1641 return op_non_const;
1642 }
1643
1644 struct record_modified_bb_info
1645 {
1646 bitmap bb_set;
1647 gimple *stmt;
1648 };
1649
1650 /* Value is initialized in INIT_BB and used in USE_BB. We want to copute
1651 probability how often it changes between USE_BB.
1652 INIT_BB->frequency/USE_BB->frequency is an estimate, but if INIT_BB
1653 is in different loop nest, we can do better.
1654 This is all just estimate. In theory we look for minimal cut separating
1655 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
1656 anyway. */
1657
1658 static basic_block
1659 get_minimal_bb (basic_block init_bb, basic_block use_bb)
1660 {
1661 struct loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
1662 if (l && l->header->frequency < init_bb->frequency)
1663 return l->header;
1664 return init_bb;
1665 }
1666
1667 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
1668 set except for info->stmt. */
1669
1670 static bool
1671 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
1672 {
1673 struct record_modified_bb_info *info =
1674 (struct record_modified_bb_info *) data;
1675 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
1676 return false;
1677 bitmap_set_bit (info->bb_set,
1678 SSA_NAME_IS_DEFAULT_DEF (vdef)
1679 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
1680 : get_minimal_bb
1681 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
1682 gimple_bb (info->stmt))->index);
1683 return false;
1684 }
1685
1686 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
1687 will change since last invocation of STMT.
1688
1689 Value 0 is reserved for compile time invariants.
1690 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
1691 ought to be REG_BR_PROB_BASE / estimated_iters. */
1692
1693 static int
1694 param_change_prob (gimple *stmt, int i)
1695 {
1696 tree op = gimple_call_arg (stmt, i);
1697 basic_block bb = gimple_bb (stmt);
1698
1699 if (TREE_CODE (op) == WITH_SIZE_EXPR)
1700 op = TREE_OPERAND (op, 0);
1701
1702 tree base = get_base_address (op);
1703
1704 /* Global invariants never change. */
1705 if (is_gimple_min_invariant (base))
1706 return 0;
1707
1708 /* We would have to do non-trivial analysis to really work out what
1709 is the probability of value to change (i.e. when init statement
1710 is in a sibling loop of the call).
1711
1712 We do an conservative estimate: when call is executed N times more often
1713 than the statement defining value, we take the frequency 1/N. */
1714 if (TREE_CODE (base) == SSA_NAME)
1715 {
1716 int init_freq;
1717
1718 if (!bb->frequency)
1719 return REG_BR_PROB_BASE;
1720
1721 if (SSA_NAME_IS_DEFAULT_DEF (base))
1722 init_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency;
1723 else
1724 init_freq = get_minimal_bb
1725 (gimple_bb (SSA_NAME_DEF_STMT (base)),
1726 gimple_bb (stmt))->frequency;
1727
1728 if (!init_freq)
1729 init_freq = 1;
1730 if (init_freq < bb->frequency)
1731 return MAX (GCOV_COMPUTE_SCALE (init_freq, bb->frequency), 1);
1732 else
1733 return REG_BR_PROB_BASE;
1734 }
1735 else
1736 {
1737 ao_ref refd;
1738 int max;
1739 struct record_modified_bb_info info;
1740 bitmap_iterator bi;
1741 unsigned index;
1742 tree init = ctor_for_folding (base);
1743
1744 if (init != error_mark_node)
1745 return 0;
1746 if (!bb->frequency)
1747 return REG_BR_PROB_BASE;
1748 ao_ref_init (&refd, op);
1749 info.stmt = stmt;
1750 info.bb_set = BITMAP_ALLOC (NULL);
1751 walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
1752 NULL);
1753 if (bitmap_bit_p (info.bb_set, bb->index))
1754 {
1755 BITMAP_FREE (info.bb_set);
1756 return REG_BR_PROB_BASE;
1757 }
1758
1759 /* Assume that every memory is initialized at entry.
1760 TODO: Can we easilly determine if value is always defined
1761 and thus we may skip entry block? */
1762 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency)
1763 max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency;
1764 else
1765 max = 1;
1766
1767 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
1768 max = MIN (max, BASIC_BLOCK_FOR_FN (cfun, index)->frequency);
1769
1770 BITMAP_FREE (info.bb_set);
1771 if (max < bb->frequency)
1772 return MAX (GCOV_COMPUTE_SCALE (max, bb->frequency), 1);
1773 else
1774 return REG_BR_PROB_BASE;
1775 }
1776 }
1777
1778 /* Find whether a basic block BB is the final block of a (half) diamond CFG
1779 sub-graph and if the predicate the condition depends on is known. If so,
1780 return true and store the pointer the predicate in *P. */
1781
1782 static bool
1783 phi_result_unknown_predicate (struct ipa_node_params *info,
1784 inline_summary *summary, basic_block bb,
1785 predicate *p,
1786 vec<predicate_t> nonconstant_names)
1787 {
1788 edge e;
1789 edge_iterator ei;
1790 basic_block first_bb = NULL;
1791 gimple *stmt;
1792
1793 if (single_pred_p (bb))
1794 {
1795 *p = false;
1796 return true;
1797 }
1798
1799 FOR_EACH_EDGE (e, ei, bb->preds)
1800 {
1801 if (single_succ_p (e->src))
1802 {
1803 if (!single_pred_p (e->src))
1804 return false;
1805 if (!first_bb)
1806 first_bb = single_pred (e->src);
1807 else if (single_pred (e->src) != first_bb)
1808 return false;
1809 }
1810 else
1811 {
1812 if (!first_bb)
1813 first_bb = e->src;
1814 else if (e->src != first_bb)
1815 return false;
1816 }
1817 }
1818
1819 if (!first_bb)
1820 return false;
1821
1822 stmt = last_stmt (first_bb);
1823 if (!stmt
1824 || gimple_code (stmt) != GIMPLE_COND
1825 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
1826 return false;
1827
1828 *p = will_be_nonconstant_expr_predicate (info, summary,
1829 gimple_cond_lhs (stmt),
1830 nonconstant_names);
1831 if (*p == true)
1832 return false;
1833 else
1834 return true;
1835 }
1836
1837 /* Given a PHI statement in a function described by inline properties SUMMARY
1838 and *P being the predicate describing whether the selected PHI argument is
1839 known, store a predicate for the result of the PHI statement into
1840 NONCONSTANT_NAMES, if possible. */
1841
1842 static void
1843 predicate_for_phi_result (struct inline_summary *summary, gphi *phi,
1844 predicate *p,
1845 vec<predicate_t> nonconstant_names)
1846 {
1847 unsigned i;
1848
1849 for (i = 0; i < gimple_phi_num_args (phi); i++)
1850 {
1851 tree arg = gimple_phi_arg (phi, i)->def;
1852 if (!is_gimple_min_invariant (arg))
1853 {
1854 gcc_assert (TREE_CODE (arg) == SSA_NAME);
1855 *p = p->or_with (summary->conds,
1856 nonconstant_names[SSA_NAME_VERSION (arg)]);
1857 if (*p == true)
1858 return;
1859 }
1860 }
1861
1862 if (dump_file && (dump_flags & TDF_DETAILS))
1863 {
1864 fprintf (dump_file, "\t\tphi predicate: ");
1865 p->dump (dump_file, summary->conds);
1866 }
1867 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
1868 }
1869
1870 /* Return predicate specifying when array index in access OP becomes non-constant. */
1871
1872 static predicate
1873 array_index_predicate (inline_summary *info,
1874 vec< predicate_t> nonconstant_names, tree op)
1875 {
1876 predicate p = false;
1877 while (handled_component_p (op))
1878 {
1879 if (TREE_CODE (op) == ARRAY_REF || TREE_CODE (op) == ARRAY_RANGE_REF)
1880 {
1881 if (TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
1882 p = p.or_with (info->conds,
1883 nonconstant_names[SSA_NAME_VERSION
1884 (TREE_OPERAND (op, 1))]);
1885 }
1886 op = TREE_OPERAND (op, 0);
1887 }
1888 return p;
1889 }
1890
1891 /* For a typical usage of __builtin_expect (a<b, 1), we
1892 may introduce an extra relation stmt:
1893 With the builtin, we have
1894 t1 = a <= b;
1895 t2 = (long int) t1;
1896 t3 = __builtin_expect (t2, 1);
1897 if (t3 != 0)
1898 goto ...
1899 Without the builtin, we have
1900 if (a<=b)
1901 goto...
1902 This affects the size/time estimation and may have
1903 an impact on the earlier inlining.
1904 Here find this pattern and fix it up later. */
1905
1906 static gimple *
1907 find_foldable_builtin_expect (basic_block bb)
1908 {
1909 gimple_stmt_iterator bsi;
1910
1911 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1912 {
1913 gimple *stmt = gsi_stmt (bsi);
1914 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
1915 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
1916 {
1917 tree var = gimple_call_lhs (stmt);
1918 tree arg = gimple_call_arg (stmt, 0);
1919 use_operand_p use_p;
1920 gimple *use_stmt;
1921 bool match = false;
1922 bool done = false;
1923
1924 if (!var || !arg)
1925 continue;
1926 gcc_assert (TREE_CODE (var) == SSA_NAME);
1927
1928 while (TREE_CODE (arg) == SSA_NAME)
1929 {
1930 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
1931 if (!is_gimple_assign (stmt_tmp))
1932 break;
1933 switch (gimple_assign_rhs_code (stmt_tmp))
1934 {
1935 case LT_EXPR:
1936 case LE_EXPR:
1937 case GT_EXPR:
1938 case GE_EXPR:
1939 case EQ_EXPR:
1940 case NE_EXPR:
1941 match = true;
1942 done = true;
1943 break;
1944 CASE_CONVERT:
1945 break;
1946 default:
1947 done = true;
1948 break;
1949 }
1950 if (done)
1951 break;
1952 arg = gimple_assign_rhs1 (stmt_tmp);
1953 }
1954
1955 if (match && single_imm_use (var, &use_p, &use_stmt)
1956 && gimple_code (use_stmt) == GIMPLE_COND)
1957 return use_stmt;
1958 }
1959 }
1960 return NULL;
1961 }
1962
1963 /* Return true when the basic blocks contains only clobbers followed by RESX.
1964 Such BBs are kept around to make removal of dead stores possible with
1965 presence of EH and will be optimized out by optimize_clobbers later in the
1966 game.
1967
1968 NEED_EH is used to recurse in case the clobber has non-EH predecestors
1969 that can be clobber only, too.. When it is false, the RESX is not necessary
1970 on the end of basic block. */
1971
1972 static bool
1973 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
1974 {
1975 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1976 edge_iterator ei;
1977 edge e;
1978
1979 if (need_eh)
1980 {
1981 if (gsi_end_p (gsi))
1982 return false;
1983 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
1984 return false;
1985 gsi_prev (&gsi);
1986 }
1987 else if (!single_succ_p (bb))
1988 return false;
1989
1990 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
1991 {
1992 gimple *stmt = gsi_stmt (gsi);
1993 if (is_gimple_debug (stmt))
1994 continue;
1995 if (gimple_clobber_p (stmt))
1996 continue;
1997 if (gimple_code (stmt) == GIMPLE_LABEL)
1998 break;
1999 return false;
2000 }
2001
2002 /* See if all predecestors are either throws or clobber only BBs. */
2003 FOR_EACH_EDGE (e, ei, bb->preds)
2004 if (!(e->flags & EDGE_EH)
2005 && !clobber_only_eh_bb_p (e->src, false))
2006 return false;
2007
2008 return true;
2009 }
2010
2011 /* Return true if STMT compute a floating point expression that may be affected
2012 by -ffast-math and similar flags. */
2013
2014 static bool
2015 fp_expression_p (gimple *stmt)
2016 {
2017 ssa_op_iter i;
2018 tree op;
2019
2020 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
2021 if (FLOAT_TYPE_P (TREE_TYPE (op)))
2022 return true;
2023 return false;
2024 }
2025
2026 /* Compute function body size parameters for NODE.
2027 When EARLY is true, we compute only simple summaries without
2028 non-trivial predicates to drive the early inliner. */
2029
2030 static void
2031 estimate_function_body_sizes (struct cgraph_node *node, bool early)
2032 {
2033 sreal time = 0;
2034 /* Estimate static overhead for function prologue/epilogue and alignment. */
2035 int size = 2;
2036 /* Benefits are scaled by probability of elimination that is in range
2037 <0,2>. */
2038 basic_block bb;
2039 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
2040 int freq;
2041 struct inline_summary *info = inline_summaries->get (node);
2042 predicate bb_predicate;
2043 struct ipa_func_body_info fbi;
2044 vec<predicate_t> nonconstant_names = vNULL;
2045 int nblocks, n;
2046 int *order;
2047 predicate array_index = true;
2048 gimple *fix_builtin_expect_stmt;
2049
2050 gcc_assert (my_function && my_function->cfg);
2051 gcc_assert (cfun == my_function);
2052
2053 memset(&fbi, 0, sizeof(fbi));
2054 info->conds = NULL;
2055 info->size_time_table = NULL;
2056
2057 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2058 so we can produce proper inline hints.
2059
2060 When optimizing and analyzing for early inliner, initialize node params
2061 so we can produce correct BB predicates. */
2062
2063 if (opt_for_fn (node->decl, optimize))
2064 {
2065 calculate_dominance_info (CDI_DOMINATORS);
2066 if (!early)
2067 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2068 else
2069 {
2070 ipa_check_create_node_params ();
2071 ipa_initialize_node_params (node);
2072 }
2073
2074 if (ipa_node_params_sum)
2075 {
2076 fbi.node = node;
2077 fbi.info = IPA_NODE_REF (node);
2078 fbi.bb_infos = vNULL;
2079 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
2080 fbi.param_count = count_formal_params(node->decl);
2081 nonconstant_names.safe_grow_cleared
2082 (SSANAMES (my_function)->length ());
2083 }
2084 }
2085
2086 if (dump_file)
2087 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2088 node->name ());
2089
2090 /* When we run into maximal number of entries, we assign everything to the
2091 constant truth case. Be sure to have it in list. */
2092 bb_predicate = true;
2093 info->account_size_time (0, 0, bb_predicate, bb_predicate);
2094
2095 bb_predicate = predicate::not_inlined ();
2096 info->account_size_time (2 * INLINE_SIZE_SCALE, 0, bb_predicate,
2097 bb_predicate);
2098
2099 if (fbi.info)
2100 compute_bb_predicates (&fbi, node, info);
2101 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2102 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2103 for (n = 0; n < nblocks; n++)
2104 {
2105 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2106 freq = compute_call_stmt_bb_frequency (node->decl, bb);
2107 if (clobber_only_eh_bb_p (bb))
2108 {
2109 if (dump_file && (dump_flags & TDF_DETAILS))
2110 fprintf (dump_file, "\n Ignoring BB %i;"
2111 " it will be optimized away by cleanup_clobbers\n",
2112 bb->index);
2113 continue;
2114 }
2115
2116 /* TODO: Obviously predicates can be propagated down across CFG. */
2117 if (fbi.info)
2118 {
2119 if (bb->aux)
2120 bb_predicate = *(predicate *) bb->aux;
2121 else
2122 bb_predicate = false;
2123 }
2124 else
2125 bb_predicate = true;
2126
2127 if (dump_file && (dump_flags & TDF_DETAILS))
2128 {
2129 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2130 bb_predicate.dump (dump_file, info->conds);
2131 }
2132
2133 if (fbi.info && nonconstant_names.exists ())
2134 {
2135 predicate phi_predicate;
2136 bool first_phi = true;
2137
2138 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2139 gsi_next (&bsi))
2140 {
2141 if (first_phi
2142 && !phi_result_unknown_predicate (fbi.info, info, bb,
2143 &phi_predicate,
2144 nonconstant_names))
2145 break;
2146 first_phi = false;
2147 if (dump_file && (dump_flags & TDF_DETAILS))
2148 {
2149 fprintf (dump_file, " ");
2150 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
2151 }
2152 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2153 nonconstant_names);
2154 }
2155 }
2156
2157 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2158
2159 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
2160 gsi_next (&bsi))
2161 {
2162 gimple *stmt = gsi_stmt (bsi);
2163 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2164 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2165 int prob;
2166 predicate will_be_nonconstant;
2167
2168 /* This relation stmt should be folded after we remove
2169 buildin_expect call. Adjust the cost here. */
2170 if (stmt == fix_builtin_expect_stmt)
2171 {
2172 this_size--;
2173 this_time--;
2174 }
2175
2176 if (dump_file && (dump_flags & TDF_DETAILS))
2177 {
2178 fprintf (dump_file, " ");
2179 print_gimple_stmt (dump_file, stmt, 0);
2180 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2181 ((double) freq) / CGRAPH_FREQ_BASE, this_size,
2182 this_time);
2183 }
2184
2185 if (gimple_assign_load_p (stmt) && nonconstant_names.exists ())
2186 {
2187 predicate this_array_index;
2188 this_array_index =
2189 array_index_predicate (info, nonconstant_names,
2190 gimple_assign_rhs1 (stmt));
2191 if (this_array_index != false)
2192 array_index &= this_array_index;
2193 }
2194 if (gimple_store_p (stmt) && nonconstant_names.exists ())
2195 {
2196 predicate this_array_index;
2197 this_array_index =
2198 array_index_predicate (info, nonconstant_names,
2199 gimple_get_lhs (stmt));
2200 if (this_array_index != false)
2201 array_index &= this_array_index;
2202 }
2203
2204
2205 if (is_gimple_call (stmt)
2206 && !gimple_call_internal_p (stmt))
2207 {
2208 struct cgraph_edge *edge = node->get_edge (stmt);
2209 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
2210
2211 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2212 resolved as constant. We however don't want to optimize
2213 out the cgraph edges. */
2214 if (nonconstant_names.exists ()
2215 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2216 && gimple_call_lhs (stmt)
2217 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2218 {
2219 predicate false_p = false;
2220 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2221 = false_p;
2222 }
2223 if (ipa_node_params_sum)
2224 {
2225 int count = gimple_call_num_args (stmt);
2226 int i;
2227
2228 if (count)
2229 es->param.safe_grow_cleared (count);
2230 for (i = 0; i < count; i++)
2231 {
2232 int prob = param_change_prob (stmt, i);
2233 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2234 es->param[i].change_prob = prob;
2235 }
2236 }
2237
2238 es->call_stmt_size = this_size;
2239 es->call_stmt_time = this_time;
2240 es->loop_depth = bb_loop_depth (bb);
2241 edge_set_predicate (edge, &bb_predicate);
2242 }
2243
2244 /* TODO: When conditional jump or swithc is known to be constant, but
2245 we did not translate it into the predicates, we really can account
2246 just maximum of the possible paths. */
2247 if (fbi.info)
2248 will_be_nonconstant
2249 = will_be_nonconstant_predicate (&fbi, info,
2250 stmt, nonconstant_names);
2251 else
2252 will_be_nonconstant = true;
2253 if (this_time || this_size)
2254 {
2255 this_time *= freq;
2256
2257 prob = eliminated_by_inlining_prob (stmt);
2258 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2259 fprintf (dump_file,
2260 "\t\t50%% will be eliminated by inlining\n");
2261 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2262 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2263
2264 struct predicate p = bb_predicate & will_be_nonconstant;
2265
2266 /* We can ignore statement when we proved it is never going
2267 to happen, but we can not do that for call statements
2268 because edges are accounted specially. */
2269
2270 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
2271 {
2272 time += this_time;
2273 size += this_size;
2274 }
2275
2276 /* We account everything but the calls. Calls have their own
2277 size/time info attached to cgraph edges. This is necessary
2278 in order to make the cost disappear after inlining. */
2279 if (!is_gimple_call (stmt))
2280 {
2281 if (prob)
2282 {
2283 predicate ip = bb_predicate & predicate::not_inlined ();
2284 info->account_size_time (this_size * prob,
2285 (sreal)(this_time * prob)
2286 / (CGRAPH_FREQ_BASE * 2), ip,
2287 p);
2288 }
2289 if (prob != 2)
2290 info->account_size_time (this_size * (2 - prob),
2291 (sreal)(this_time * (2 - prob))
2292 / (CGRAPH_FREQ_BASE * 2),
2293 bb_predicate,
2294 p);
2295 }
2296
2297 if (!info->fp_expressions && fp_expression_p (stmt))
2298 {
2299 info->fp_expressions = true;
2300 if (dump_file)
2301 fprintf (dump_file, " fp_expression set\n");
2302 }
2303
2304 gcc_assert (time >= 0);
2305 gcc_assert (size >= 0);
2306 }
2307 }
2308 }
2309 set_hint_predicate (&inline_summaries->get (node)->array_index, array_index);
2310 time = time / CGRAPH_FREQ_BASE;
2311 free (order);
2312
2313 if (nonconstant_names.exists () && !early)
2314 {
2315 struct loop *loop;
2316 predicate loop_iterations = true;
2317 predicate loop_stride = true;
2318
2319 if (dump_file && (dump_flags & TDF_DETAILS))
2320 flow_loops_dump (dump_file, NULL, 0);
2321 scev_initialize ();
2322 FOR_EACH_LOOP (loop, 0)
2323 {
2324 vec<edge> exits;
2325 edge ex;
2326 unsigned int j;
2327 struct tree_niter_desc niter_desc;
2328 bb_predicate = *(predicate *) loop->header->aux;
2329
2330 exits = get_loop_exit_edges (loop);
2331 FOR_EACH_VEC_ELT (exits, j, ex)
2332 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
2333 && !is_gimple_min_invariant (niter_desc.niter))
2334 {
2335 predicate will_be_nonconstant
2336 = will_be_nonconstant_expr_predicate (fbi.info, info,
2337 niter_desc.niter,
2338 nonconstant_names);
2339 if (will_be_nonconstant != true)
2340 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2341 if (will_be_nonconstant != true
2342 && will_be_nonconstant != false)
2343 /* This is slightly inprecise. We may want to represent each
2344 loop with independent predicate. */
2345 loop_iterations &= will_be_nonconstant;
2346 }
2347 exits.release ();
2348 }
2349
2350 /* To avoid quadratic behavior we analyze stride predicates only
2351 with respect to the containing loop. Thus we simply iterate
2352 over all defs in the outermost loop body. */
2353 for (loop = loops_for_fn (cfun)->tree_root->inner;
2354 loop != NULL; loop = loop->next)
2355 {
2356 basic_block *body = get_loop_body (loop);
2357 for (unsigned i = 0; i < loop->num_nodes; i++)
2358 {
2359 gimple_stmt_iterator gsi;
2360 bb_predicate = *(predicate *) body[i]->aux;
2361 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
2362 gsi_next (&gsi))
2363 {
2364 gimple *stmt = gsi_stmt (gsi);
2365
2366 if (!is_gimple_assign (stmt))
2367 continue;
2368
2369 tree def = gimple_assign_lhs (stmt);
2370 if (TREE_CODE (def) != SSA_NAME)
2371 continue;
2372
2373 affine_iv iv;
2374 if (!simple_iv (loop_containing_stmt (stmt),
2375 loop_containing_stmt (stmt),
2376 def, &iv, true)
2377 || is_gimple_min_invariant (iv.step))
2378 continue;
2379
2380 predicate will_be_nonconstant
2381 = will_be_nonconstant_expr_predicate (fbi.info, info,
2382 iv.step,
2383 nonconstant_names);
2384 if (will_be_nonconstant != true)
2385 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2386 if (will_be_nonconstant != true
2387 && will_be_nonconstant != false)
2388 /* This is slightly inprecise. We may want to represent
2389 each loop with independent predicate. */
2390 loop_stride = loop_stride & will_be_nonconstant;
2391 }
2392 }
2393 free (body);
2394 }
2395 set_hint_predicate (&inline_summaries->get (node)->loop_iterations,
2396 loop_iterations);
2397 set_hint_predicate (&inline_summaries->get (node)->loop_stride,
2398 loop_stride);
2399 scev_finalize ();
2400 }
2401 FOR_ALL_BB_FN (bb, my_function)
2402 {
2403 edge e;
2404 edge_iterator ei;
2405
2406 if (bb->aux)
2407 edge_predicate_pool.remove ((predicate *)bb->aux);
2408 bb->aux = NULL;
2409 FOR_EACH_EDGE (e, ei, bb->succs)
2410 {
2411 if (e->aux)
2412 edge_predicate_pool.remove ((predicate *) e->aux);
2413 e->aux = NULL;
2414 }
2415 }
2416 inline_summaries->get (node)->time = time;
2417 inline_summaries->get (node)->self_size = size;
2418 nonconstant_names.release ();
2419 ipa_release_body_info (&fbi);
2420 if (opt_for_fn (node->decl, optimize))
2421 {
2422 if (!early)
2423 loop_optimizer_finalize ();
2424 else if (!ipa_edge_args_sum)
2425 ipa_free_all_node_params ();
2426 free_dominance_info (CDI_DOMINATORS);
2427 }
2428 if (dump_file)
2429 {
2430 fprintf (dump_file, "\n");
2431 dump_inline_summary (dump_file, node);
2432 }
2433 }
2434
2435
2436 /* Compute parameters of functions used by inliner.
2437 EARLY is true when we compute parameters for the early inliner */
2438
2439 void
2440 compute_inline_parameters (struct cgraph_node *node, bool early)
2441 {
2442 HOST_WIDE_INT self_stack_size;
2443 struct cgraph_edge *e;
2444 struct inline_summary *info;
2445
2446 gcc_assert (!node->global.inlined_to);
2447
2448 inline_summary_alloc ();
2449
2450 info = inline_summaries->get (node);
2451 info->reset (node);
2452
2453 /* Estimate the stack size for the function if we're optimizing. */
2454 self_stack_size = optimize && !node->thunk.thunk_p
2455 ? estimated_stack_frame_size (node) : 0;
2456 info->estimated_self_stack_size = self_stack_size;
2457 info->estimated_stack_size = self_stack_size;
2458 info->stack_frame_offset = 0;
2459
2460 if (node->thunk.thunk_p)
2461 {
2462 struct ipa_call_summary *es = ipa_call_summaries->get (node->callees);
2463 predicate t = true;
2464
2465 node->local.can_change_signature = false;
2466 es->call_stmt_size = eni_size_weights.call_cost;
2467 es->call_stmt_time = eni_time_weights.call_cost;
2468 info->account_size_time (INLINE_SIZE_SCALE * 2, 2, t, t);
2469 t = predicate::not_inlined ();
2470 info->account_size_time (2 * INLINE_SIZE_SCALE, 0, t, t);
2471 inline_update_overall_summary (node);
2472 info->self_size = info->size;
2473 /* We can not inline instrumentation clones. */
2474 if (node->thunk.add_pointer_bounds_args)
2475 {
2476 info->inlinable = false;
2477 node->callees->inline_failed = CIF_CHKP;
2478 }
2479 else
2480 info->inlinable = true;
2481 }
2482 else
2483 {
2484 /* Even is_gimple_min_invariant rely on current_function_decl. */
2485 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2486
2487 /* Can this function be inlined at all? */
2488 if (!opt_for_fn (node->decl, optimize)
2489 && !lookup_attribute ("always_inline",
2490 DECL_ATTRIBUTES (node->decl)))
2491 info->inlinable = false;
2492 else
2493 info->inlinable = tree_inlinable_function_p (node->decl);
2494
2495 info->contains_cilk_spawn = fn_contains_cilk_spawn_p (cfun);
2496
2497 /* Type attributes can use parameter indices to describe them. */
2498 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
2499 node->local.can_change_signature = false;
2500 else
2501 {
2502 /* Otherwise, inlinable functions always can change signature. */
2503 if (info->inlinable)
2504 node->local.can_change_signature = true;
2505 else
2506 {
2507 /* Functions calling builtin_apply can not change signature. */
2508 for (e = node->callees; e; e = e->next_callee)
2509 {
2510 tree cdecl = e->callee->decl;
2511 if (DECL_BUILT_IN (cdecl)
2512 && DECL_BUILT_IN_CLASS (cdecl) == BUILT_IN_NORMAL
2513 && (DECL_FUNCTION_CODE (cdecl) == BUILT_IN_APPLY_ARGS
2514 || DECL_FUNCTION_CODE (cdecl) == BUILT_IN_VA_START))
2515 break;
2516 }
2517 node->local.can_change_signature = !e;
2518 }
2519 }
2520 /* Functions called by instrumentation thunk can't change signature
2521 because instrumentation thunk modification is not supported. */
2522 if (node->local.can_change_signature)
2523 for (e = node->callers; e; e = e->next_caller)
2524 if (e->caller->thunk.thunk_p
2525 && e->caller->thunk.add_pointer_bounds_args)
2526 {
2527 node->local.can_change_signature = false;
2528 break;
2529 }
2530 estimate_function_body_sizes (node, early);
2531 pop_cfun ();
2532 }
2533 for (e = node->callees; e; e = e->next_callee)
2534 if (e->callee->comdat_local_p ())
2535 break;
2536 node->calls_comdat_local = (e != NULL);
2537
2538 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
2539 info->size = info->self_size;
2540 info->stack_frame_offset = 0;
2541 info->estimated_stack_size = info->estimated_self_stack_size;
2542
2543 /* Code above should compute exactly the same result as
2544 inline_update_overall_summary but because computation happens in
2545 different order the roundoff errors result in slight changes. */
2546 inline_update_overall_summary (node);
2547 gcc_assert (info->size == info->self_size);
2548 }
2549
2550
2551 /* Compute parameters of functions used by inliner using
2552 current_function_decl. */
2553
2554 static unsigned int
2555 compute_inline_parameters_for_current (void)
2556 {
2557 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
2558 return 0;
2559 }
2560
2561 namespace {
2562
2563 const pass_data pass_data_inline_parameters =
2564 {
2565 GIMPLE_PASS, /* type */
2566 "inline_param", /* name */
2567 OPTGROUP_INLINE, /* optinfo_flags */
2568 TV_INLINE_PARAMETERS, /* tv_id */
2569 0, /* properties_required */
2570 0, /* properties_provided */
2571 0, /* properties_destroyed */
2572 0, /* todo_flags_start */
2573 0, /* todo_flags_finish */
2574 };
2575
2576 class pass_inline_parameters : public gimple_opt_pass
2577 {
2578 public:
2579 pass_inline_parameters (gcc::context *ctxt)
2580 : gimple_opt_pass (pass_data_inline_parameters, ctxt)
2581 {}
2582
2583 /* opt_pass methods: */
2584 opt_pass * clone () { return new pass_inline_parameters (m_ctxt); }
2585 virtual unsigned int execute (function *)
2586 {
2587 return compute_inline_parameters_for_current ();
2588 }
2589
2590 }; // class pass_inline_parameters
2591
2592 } // anon namespace
2593
2594 gimple_opt_pass *
2595 make_pass_inline_parameters (gcc::context *ctxt)
2596 {
2597 return new pass_inline_parameters (ctxt);
2598 }
2599
2600
2601 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
2602 KNOWN_CONTEXTS and KNOWN_AGGS. */
2603
2604 static bool
2605 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
2606 int *size, int *time,
2607 vec<tree> known_vals,
2608 vec<ipa_polymorphic_call_context> known_contexts,
2609 vec<ipa_agg_jump_function_p> known_aggs)
2610 {
2611 tree target;
2612 struct cgraph_node *callee;
2613 struct inline_summary *isummary;
2614 enum availability avail;
2615 bool speculative;
2616
2617 if (!known_vals.exists () && !known_contexts.exists ())
2618 return false;
2619 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
2620 return false;
2621
2622 target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts,
2623 known_aggs, &speculative);
2624 if (!target || speculative)
2625 return false;
2626
2627 /* Account for difference in cost between indirect and direct calls. */
2628 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
2629 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
2630 gcc_checking_assert (*time >= 0);
2631 gcc_checking_assert (*size >= 0);
2632
2633 callee = cgraph_node::get (target);
2634 if (!callee || !callee->definition)
2635 return false;
2636 callee = callee->function_symbol (&avail);
2637 if (avail < AVAIL_AVAILABLE)
2638 return false;
2639 isummary = inline_summaries->get (callee);
2640 return isummary->inlinable;
2641 }
2642
2643 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
2644 handle edge E with probability PROB.
2645 Set HINTS if edge may be devirtualized.
2646 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
2647 site. */
2648
2649 static inline void
2650 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
2651 sreal *time,
2652 int prob,
2653 vec<tree> known_vals,
2654 vec<ipa_polymorphic_call_context> known_contexts,
2655 vec<ipa_agg_jump_function_p> known_aggs,
2656 inline_hints *hints)
2657 {
2658 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2659 int call_size = es->call_stmt_size;
2660 int call_time = es->call_stmt_time;
2661 int cur_size;
2662 if (!e->callee
2663 && estimate_edge_devirt_benefit (e, &call_size, &call_time,
2664 known_vals, known_contexts, known_aggs)
2665 && hints && e->maybe_hot_p ())
2666 *hints |= INLINE_HINT_indirect_call;
2667 cur_size = call_size * INLINE_SIZE_SCALE;
2668 *size += cur_size;
2669 if (min_size)
2670 *min_size += cur_size;
2671 if (prob == REG_BR_PROB_BASE)
2672 *time += ((sreal)(call_time * e->frequency)) / CGRAPH_FREQ_BASE;
2673 else
2674 *time += ((sreal)call_time) * (prob * e->frequency)
2675 / (CGRAPH_FREQ_BASE * REG_BR_PROB_BASE);
2676 }
2677
2678
2679
2680 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
2681 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2682 describe context of the call site. */
2683
2684 static void
2685 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
2686 int *min_size, sreal *time,
2687 inline_hints *hints,
2688 clause_t possible_truths,
2689 vec<tree> known_vals,
2690 vec<ipa_polymorphic_call_context> known_contexts,
2691 vec<ipa_agg_jump_function_p> known_aggs)
2692 {
2693 struct cgraph_edge *e;
2694 for (e = node->callees; e; e = e->next_callee)
2695 {
2696 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2697
2698 /* Do not care about zero sized builtins. */
2699 if (e->inline_failed && !es->call_stmt_size)
2700 {
2701 gcc_checking_assert (!es->call_stmt_time);
2702 continue;
2703 }
2704 if (!es->predicate
2705 || es->predicate->evaluate (possible_truths))
2706 {
2707 if (e->inline_failed)
2708 {
2709 /* Predicates of calls shall not use NOT_CHANGED codes,
2710 sowe do not need to compute probabilities. */
2711 estimate_edge_size_and_time (e, size,
2712 es->predicate ? NULL : min_size,
2713 time, REG_BR_PROB_BASE,
2714 known_vals, known_contexts,
2715 known_aggs, hints);
2716 }
2717 else
2718 estimate_calls_size_and_time (e->callee, size, min_size, time,
2719 hints,
2720 possible_truths,
2721 known_vals, known_contexts,
2722 known_aggs);
2723 }
2724 }
2725 for (e = node->indirect_calls; e; e = e->next_callee)
2726 {
2727 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2728 if (!es->predicate
2729 || es->predicate->evaluate (possible_truths))
2730 estimate_edge_size_and_time (e, size,
2731 es->predicate ? NULL : min_size,
2732 time, REG_BR_PROB_BASE,
2733 known_vals, known_contexts, known_aggs,
2734 hints);
2735 }
2736 }
2737
2738
2739 /* Estimate size and time needed to execute NODE assuming
2740 POSSIBLE_TRUTHS clause, and KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2741 information about NODE's arguments. If non-NULL use also probability
2742 information present in INLINE_PARAM_SUMMARY vector.
2743 Additionally detemine hints determined by the context. Finally compute
2744 minimal size needed for the call that is independent on the call context and
2745 can be used for fast estimates. Return the values in RET_SIZE,
2746 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
2747
2748 static void
2749 estimate_node_size_and_time (struct cgraph_node *node,
2750 clause_t possible_truths,
2751 clause_t nonspec_possible_truths,
2752 vec<tree> known_vals,
2753 vec<ipa_polymorphic_call_context> known_contexts,
2754 vec<ipa_agg_jump_function_p> known_aggs,
2755 int *ret_size, int *ret_min_size,
2756 sreal *ret_time,
2757 sreal *ret_nonspecialized_time,
2758 inline_hints *ret_hints,
2759 vec<inline_param_summary>
2760 inline_param_summary)
2761 {
2762 struct inline_summary *info = inline_summaries->get (node);
2763 size_time_entry *e;
2764 int size = 0;
2765 sreal time = 0;
2766 int min_size = 0;
2767 inline_hints hints = 0;
2768 int i;
2769
2770 if (dump_file && (dump_flags & TDF_DETAILS))
2771 {
2772 bool found = false;
2773 fprintf (dump_file, " Estimating body: %s/%i\n"
2774 " Known to be false: ", node->name (),
2775 node->order);
2776
2777 for (i = predicate::not_inlined_condition;
2778 i < (predicate::first_dynamic_condition
2779 + (int) vec_safe_length (info->conds)); i++)
2780 if (!(possible_truths & (1 << i)))
2781 {
2782 if (found)
2783 fprintf (dump_file, ", ");
2784 found = true;
2785 dump_condition (dump_file, info->conds, i);
2786 }
2787 }
2788
2789 estimate_calls_size_and_time (node, &size, &min_size, &time, &hints, possible_truths,
2790 known_vals, known_contexts, known_aggs);
2791 sreal nonspecialized_time = time;
2792
2793 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
2794 {
2795 bool nonconst = e->nonconst_predicate.evaluate (possible_truths);
2796 bool exec = e->exec_predicate.evaluate (nonspec_possible_truths);
2797 gcc_assert (!nonconst || exec);
2798 if (exec)
2799 {
2800 gcc_checking_assert (e->time >= 0);
2801 gcc_checking_assert (time >= 0);
2802
2803 /* We compute specialized size only because size of nonspecialized
2804 copy is context independent.
2805
2806 The difference between nonspecialized execution and specialized is
2807 that nonspecialized is not going to have optimized out computations
2808 known to be constant in a specialized setting. */
2809 if (nonconst)
2810 size += e->size;
2811 nonspecialized_time += e->time;
2812 if (!nonconst)
2813 ;
2814 else if (!inline_param_summary.exists ())
2815 {
2816 if (nonconst)
2817 time += e->time;
2818 }
2819 else
2820 {
2821 int prob = e->nonconst_predicate.probability
2822 (info->conds, possible_truths,
2823 inline_param_summary);
2824 gcc_checking_assert (prob >= 0);
2825 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
2826 time += e->time * prob / REG_BR_PROB_BASE;
2827 }
2828 gcc_checking_assert (time >= 0);
2829 }
2830 }
2831 gcc_checking_assert ((*info->size_time_table)[0].exec_predicate == true);
2832 gcc_checking_assert ((*info->size_time_table)[0].nonconst_predicate == true);
2833 min_size = (*info->size_time_table)[0].size;
2834 gcc_checking_assert (size >= 0);
2835 gcc_checking_assert (time >= 0);
2836 /* nonspecialized_time should be always bigger than specialized time.
2837 Roundoff issues however may get into the way. */
2838 gcc_checking_assert ((nonspecialized_time - time) >= -1);
2839
2840 /* Roundoff issues may make specialized time bigger than nonspecialized
2841 time. We do not really want that to happen because some heurstics
2842 may get confused by seeing negative speedups. */
2843 if (time > nonspecialized_time)
2844 time = nonspecialized_time;
2845
2846 if (info->loop_iterations
2847 && !info->loop_iterations->evaluate (possible_truths))
2848 hints |= INLINE_HINT_loop_iterations;
2849 if (info->loop_stride
2850 && !info->loop_stride->evaluate (possible_truths))
2851 hints |= INLINE_HINT_loop_stride;
2852 if (info->array_index
2853 && !info->array_index->evaluate (possible_truths))
2854 hints |= INLINE_HINT_array_index;
2855 if (info->scc_no)
2856 hints |= INLINE_HINT_in_scc;
2857 if (DECL_DECLARED_INLINE_P (node->decl))
2858 hints |= INLINE_HINT_declared_inline;
2859
2860 size = RDIV (size, INLINE_SIZE_SCALE);
2861 min_size = RDIV (min_size, INLINE_SIZE_SCALE);
2862
2863 if (dump_file && (dump_flags & TDF_DETAILS))
2864 fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size,
2865 time.to_double (), nonspecialized_time.to_double ());
2866 if (ret_time)
2867 *ret_time = time;
2868 if (ret_nonspecialized_time)
2869 *ret_nonspecialized_time = nonspecialized_time;
2870 if (ret_size)
2871 *ret_size = size;
2872 if (ret_min_size)
2873 *ret_min_size = min_size;
2874 if (ret_hints)
2875 *ret_hints = hints;
2876 return;
2877 }
2878
2879
2880 /* Estimate size and time needed to execute callee of EDGE assuming that
2881 parameters known to be constant at caller of EDGE are propagated.
2882 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
2883 and types for parameters. */
2884
2885 void
2886 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
2887 vec<tree> known_vals,
2888 vec<ipa_polymorphic_call_context>
2889 known_contexts,
2890 vec<ipa_agg_jump_function_p> known_aggs,
2891 int *ret_size, sreal *ret_time,
2892 sreal *ret_nonspec_time,
2893 inline_hints *hints)
2894 {
2895 clause_t clause, nonspec_clause;
2896
2897 evaluate_conditions_for_known_args (node, false, known_vals, known_aggs,
2898 &clause, &nonspec_clause);
2899 estimate_node_size_and_time (node, clause, nonspec_clause,
2900 known_vals, known_contexts,
2901 known_aggs, ret_size, NULL, ret_time,
2902 ret_nonspec_time, hints, vNULL);
2903 }
2904
2905
2906 /* Update summary information of inline clones after inlining.
2907 Compute peak stack usage. */
2908
2909 static void
2910 inline_update_callee_summaries (struct cgraph_node *node, int depth)
2911 {
2912 struct cgraph_edge *e;
2913 struct inline_summary *callee_info = inline_summaries->get (node);
2914 struct inline_summary *caller_info = inline_summaries->get (node->callers->caller);
2915 HOST_WIDE_INT peak;
2916
2917 callee_info->stack_frame_offset
2918 = caller_info->stack_frame_offset
2919 + caller_info->estimated_self_stack_size;
2920 peak = callee_info->stack_frame_offset
2921 + callee_info->estimated_self_stack_size;
2922 if (inline_summaries->get (node->global.inlined_to)->estimated_stack_size < peak)
2923 inline_summaries->get (node->global.inlined_to)->estimated_stack_size = peak;
2924 ipa_propagate_frequency (node);
2925 for (e = node->callees; e; e = e->next_callee)
2926 {
2927 if (!e->inline_failed)
2928 inline_update_callee_summaries (e->callee, depth);
2929 ipa_call_summaries->get (e)->loop_depth += depth;
2930 }
2931 for (e = node->indirect_calls; e; e = e->next_callee)
2932 ipa_call_summaries->get (e)->loop_depth += depth;
2933 }
2934
2935 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
2936 When functoin A is inlined in B and A calls C with parameter that
2937 changes with probability PROB1 and C is known to be passthroug
2938 of argument if B that change with probability PROB2, the probability
2939 of change is now PROB1*PROB2. */
2940
2941 static void
2942 remap_edge_change_prob (struct cgraph_edge *inlined_edge,
2943 struct cgraph_edge *edge)
2944 {
2945 if (ipa_node_params_sum)
2946 {
2947 int i;
2948 struct ipa_edge_args *args = IPA_EDGE_REF (edge);
2949 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
2950 struct ipa_call_summary *inlined_es
2951 = ipa_call_summaries->get (inlined_edge);
2952
2953 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
2954 {
2955 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
2956 if (jfunc->type == IPA_JF_PASS_THROUGH
2957 || jfunc->type == IPA_JF_ANCESTOR)
2958 {
2959 int id = jfunc->type == IPA_JF_PASS_THROUGH
2960 ? ipa_get_jf_pass_through_formal_id (jfunc)
2961 : ipa_get_jf_ancestor_formal_id (jfunc);
2962 if (id < (int) inlined_es->param.length ())
2963 {
2964 int prob1 = es->param[i].change_prob;
2965 int prob2 = inlined_es->param[id].change_prob;
2966 int prob = combine_probabilities (prob1, prob2);
2967
2968 if (prob1 && prob2 && !prob)
2969 prob = 1;
2970
2971 es->param[i].change_prob = prob;
2972 }
2973 }
2974 }
2975 }
2976 }
2977
2978 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
2979
2980 Remap predicates of callees of NODE. Rest of arguments match
2981 remap_predicate.
2982
2983 Also update change probabilities. */
2984
2985 static void
2986 remap_edge_summaries (struct cgraph_edge *inlined_edge,
2987 struct cgraph_node *node,
2988 struct inline_summary *info,
2989 struct inline_summary *callee_info,
2990 vec<int> operand_map,
2991 vec<int> offset_map,
2992 clause_t possible_truths,
2993 predicate *toplev_predicate)
2994 {
2995 struct cgraph_edge *e, *next;
2996 for (e = node->callees; e; e = next)
2997 {
2998 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2999 predicate p;
3000 next = e->next_callee;
3001
3002 if (e->inline_failed)
3003 {
3004 remap_edge_change_prob (inlined_edge, e);
3005
3006 if (es->predicate)
3007 {
3008 p = es->predicate->remap_after_inlining
3009 (info, callee_info, operand_map,
3010 offset_map, possible_truths,
3011 *toplev_predicate);
3012 edge_set_predicate (e, &p);
3013 }
3014 else
3015 edge_set_predicate (e, toplev_predicate);
3016 }
3017 else
3018 remap_edge_summaries (inlined_edge, e->callee, info, callee_info,
3019 operand_map, offset_map, possible_truths,
3020 toplev_predicate);
3021 }
3022 for (e = node->indirect_calls; e; e = next)
3023 {
3024 struct ipa_call_summary *es = ipa_call_summaries->get (e);
3025 predicate p;
3026 next = e->next_callee;
3027
3028 remap_edge_change_prob (inlined_edge, e);
3029 if (es->predicate)
3030 {
3031 p = es->predicate->remap_after_inlining
3032 (info, callee_info, operand_map, offset_map,
3033 possible_truths, *toplev_predicate);
3034 edge_set_predicate (e, &p);
3035 }
3036 else
3037 edge_set_predicate (e, toplev_predicate);
3038 }
3039 }
3040
3041 /* Same as remap_predicate, but set result into hint *HINT. */
3042
3043 static void
3044 remap_hint_predicate (struct inline_summary *info,
3045 struct inline_summary *callee_info,
3046 predicate **hint,
3047 vec<int> operand_map,
3048 vec<int> offset_map,
3049 clause_t possible_truths,
3050 predicate *toplev_predicate)
3051 {
3052 predicate p;
3053
3054 if (!*hint)
3055 return;
3056 p = (*hint)->remap_after_inlining
3057 (info, callee_info,
3058 operand_map, offset_map,
3059 possible_truths, *toplev_predicate);
3060 if (p != false && p != true)
3061 {
3062 if (!*hint)
3063 set_hint_predicate (hint, p);
3064 else
3065 **hint &= p;
3066 }
3067 }
3068
3069 /* We inlined EDGE. Update summary of the function we inlined into. */
3070
3071 void
3072 inline_merge_summary (struct cgraph_edge *edge)
3073 {
3074 struct inline_summary *callee_info = inline_summaries->get (edge->callee);
3075 struct cgraph_node *to = (edge->caller->global.inlined_to
3076 ? edge->caller->global.inlined_to : edge->caller);
3077 struct inline_summary *info = inline_summaries->get (to);
3078 clause_t clause = 0; /* not_inline is known to be false. */
3079 size_time_entry *e;
3080 vec<int> operand_map = vNULL;
3081 vec<int> offset_map = vNULL;
3082 int i;
3083 predicate toplev_predicate;
3084 predicate true_p = true;
3085 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
3086
3087 if (es->predicate)
3088 toplev_predicate = *es->predicate;
3089 else
3090 toplev_predicate = true;
3091
3092 info->fp_expressions |= callee_info->fp_expressions;
3093
3094 if (callee_info->conds)
3095 evaluate_properties_for_edge (edge, true, &clause, NULL, NULL, NULL, NULL);
3096 if (ipa_node_params_sum && callee_info->conds)
3097 {
3098 struct ipa_edge_args *args = IPA_EDGE_REF (edge);
3099 int count = ipa_get_cs_argument_count (args);
3100 int i;
3101
3102 if (count)
3103 {
3104 operand_map.safe_grow_cleared (count);
3105 offset_map.safe_grow_cleared (count);
3106 }
3107 for (i = 0; i < count; i++)
3108 {
3109 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3110 int map = -1;
3111
3112 /* TODO: handle non-NOPs when merging. */
3113 if (jfunc->type == IPA_JF_PASS_THROUGH)
3114 {
3115 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3116 map = ipa_get_jf_pass_through_formal_id (jfunc);
3117 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
3118 offset_map[i] = -1;
3119 }
3120 else if (jfunc->type == IPA_JF_ANCESTOR)
3121 {
3122 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
3123 if (offset >= 0 && offset < INT_MAX)
3124 {
3125 map = ipa_get_jf_ancestor_formal_id (jfunc);
3126 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
3127 offset = -1;
3128 offset_map[i] = offset;
3129 }
3130 }
3131 operand_map[i] = map;
3132 gcc_assert (map < ipa_get_param_count (IPA_NODE_REF (to)));
3133 }
3134 }
3135 for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++)
3136 {
3137 predicate p;
3138 p = e->exec_predicate.remap_after_inlining
3139 (info, callee_info, operand_map,
3140 offset_map, clause,
3141 toplev_predicate);
3142 predicate nonconstp;
3143 nonconstp = e->nonconst_predicate.remap_after_inlining
3144 (info, callee_info, operand_map,
3145 offset_map, clause,
3146 toplev_predicate);
3147 if (p != false && nonconstp != false)
3148 {
3149 sreal add_time = ((sreal)e->time * edge->frequency) / CGRAPH_FREQ_BASE;
3150 int prob = e->nonconst_predicate.probability (callee_info->conds,
3151 clause, es->param);
3152 add_time = add_time * prob / REG_BR_PROB_BASE;
3153 if (prob != REG_BR_PROB_BASE
3154 && dump_file && (dump_flags & TDF_DETAILS))
3155 {
3156 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
3157 (double) prob / REG_BR_PROB_BASE);
3158 }
3159 info->account_size_time (e->size, add_time, p, nonconstp);
3160 }
3161 }
3162 remap_edge_summaries (edge, edge->callee, info, callee_info, operand_map,
3163 offset_map, clause, &toplev_predicate);
3164 remap_hint_predicate (info, callee_info,
3165 &callee_info->loop_iterations,
3166 operand_map, offset_map, clause, &toplev_predicate);
3167 remap_hint_predicate (info, callee_info,
3168 &callee_info->loop_stride,
3169 operand_map, offset_map, clause, &toplev_predicate);
3170 remap_hint_predicate (info, callee_info,
3171 &callee_info->array_index,
3172 operand_map, offset_map, clause, &toplev_predicate);
3173
3174 inline_update_callee_summaries (edge->callee,
3175 ipa_call_summaries->get (edge)->loop_depth);
3176
3177 /* We do not maintain predicates of inlined edges, free it. */
3178 edge_set_predicate (edge, &true_p);
3179 /* Similarly remove param summaries. */
3180 es->param.release ();
3181 operand_map.release ();
3182 offset_map.release ();
3183 }
3184
3185 /* For performance reasons inline_merge_summary is not updating overall size
3186 and time. Recompute it. */
3187
3188 void
3189 inline_update_overall_summary (struct cgraph_node *node)
3190 {
3191 struct inline_summary *info = inline_summaries->get (node);
3192 size_time_entry *e;
3193 int i;
3194
3195 info->size = 0;
3196 info->time = 0;
3197 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3198 {
3199 info->size += e->size;
3200 info->time += e->time;
3201 }
3202 estimate_calls_size_and_time (node, &info->size, &info->min_size,
3203 &info->time, NULL,
3204 ~(clause_t) (1 << predicate::false_condition),
3205 vNULL, vNULL, vNULL);
3206 info->size = (info->size + INLINE_SIZE_SCALE / 2) / INLINE_SIZE_SCALE;
3207 }
3208
3209 /* Return hints derrived from EDGE. */
3210 int
3211 simple_edge_hints (struct cgraph_edge *edge)
3212 {
3213 int hints = 0;
3214 struct cgraph_node *to = (edge->caller->global.inlined_to
3215 ? edge->caller->global.inlined_to : edge->caller);
3216 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
3217 if (inline_summaries->get (to)->scc_no
3218 && inline_summaries->get (to)->scc_no
3219 == inline_summaries->get (callee)->scc_no
3220 && !edge->recursive_p ())
3221 hints |= INLINE_HINT_same_scc;
3222
3223 if (callee->lto_file_data && edge->caller->lto_file_data
3224 && edge->caller->lto_file_data != callee->lto_file_data
3225 && !callee->merged_comdat && !callee->icf_merged)
3226 hints |= INLINE_HINT_cross_module;
3227
3228 return hints;
3229 }
3230
3231 /* Estimate the time cost for the caller when inlining EDGE.
3232 Only to be called via estimate_edge_time, that handles the
3233 caching mechanism.
3234
3235 When caching, also update the cache entry. Compute both time and
3236 size, since we always need both metrics eventually. */
3237
3238 sreal
3239 do_estimate_edge_time (struct cgraph_edge *edge)
3240 {
3241 sreal time, nonspec_time;
3242 int size;
3243 inline_hints hints;
3244 struct cgraph_node *callee;
3245 clause_t clause, nonspec_clause;
3246 vec<tree> known_vals;
3247 vec<ipa_polymorphic_call_context> known_contexts;
3248 vec<ipa_agg_jump_function_p> known_aggs;
3249 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
3250 int min_size;
3251
3252 callee = edge->callee->ultimate_alias_target ();
3253
3254 gcc_checking_assert (edge->inline_failed);
3255 evaluate_properties_for_edge (edge, true,
3256 &clause, &nonspec_clause, &known_vals,
3257 &known_contexts, &known_aggs);
3258 estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
3259 known_contexts, known_aggs, &size, &min_size,
3260 &time, &nonspec_time, &hints, es->param);
3261
3262 /* When we have profile feedback, we can quite safely identify hot
3263 edges and for those we disable size limits. Don't do that when
3264 probability that caller will call the callee is low however, since it
3265 may hurt optimization of the caller's hot path. */
3266 if (edge->count && edge->maybe_hot_p ()
3267 && (edge->count * 2
3268 > (edge->caller->global.inlined_to
3269 ? edge->caller->global.inlined_to->count : edge->caller->count)))
3270 hints |= INLINE_HINT_known_hot;
3271
3272 known_vals.release ();
3273 known_contexts.release ();
3274 known_aggs.release ();
3275 gcc_checking_assert (size >= 0);
3276 gcc_checking_assert (time >= 0);
3277
3278 /* When caching, update the cache entry. */
3279 if (edge_growth_cache.exists ())
3280 {
3281 inline_summaries->get (edge->callee)->min_size = min_size;
3282 if ((int) edge_growth_cache.length () <= edge->uid)
3283 edge_growth_cache.safe_grow_cleared (symtab->edges_max_uid);
3284 edge_growth_cache[edge->uid].time = time;
3285 edge_growth_cache[edge->uid].nonspec_time = nonspec_time;
3286
3287 edge_growth_cache[edge->uid].size = size + (size >= 0);
3288 hints |= simple_edge_hints (edge);
3289 edge_growth_cache[edge->uid].hints = hints + 1;
3290 }
3291 return time;
3292 }
3293
3294
3295 /* Return estimated callee growth after inlining EDGE.
3296 Only to be called via estimate_edge_size. */
3297
3298 int
3299 do_estimate_edge_size (struct cgraph_edge *edge)
3300 {
3301 int size;
3302 struct cgraph_node *callee;
3303 clause_t clause, nonspec_clause;
3304 vec<tree> known_vals;
3305 vec<ipa_polymorphic_call_context> known_contexts;
3306 vec<ipa_agg_jump_function_p> known_aggs;
3307
3308 /* When we do caching, use do_estimate_edge_time to populate the entry. */
3309
3310 if (edge_growth_cache.exists ())
3311 {
3312 do_estimate_edge_time (edge);
3313 size = edge_growth_cache[edge->uid].size;
3314 gcc_checking_assert (size);
3315 return size - (size > 0);
3316 }
3317
3318 callee = edge->callee->ultimate_alias_target ();
3319
3320 /* Early inliner runs without caching, go ahead and do the dirty work. */
3321 gcc_checking_assert (edge->inline_failed);
3322 evaluate_properties_for_edge (edge, true,
3323 &clause, &nonspec_clause,
3324 &known_vals, &known_contexts,
3325 &known_aggs);
3326 estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
3327 known_contexts, known_aggs, &size, NULL, NULL,
3328 NULL, NULL, vNULL);
3329 known_vals.release ();
3330 known_contexts.release ();
3331 known_aggs.release ();
3332 return size;
3333 }
3334
3335
3336 /* Estimate the growth of the caller when inlining EDGE.
3337 Only to be called via estimate_edge_size. */
3338
3339 inline_hints
3340 do_estimate_edge_hints (struct cgraph_edge *edge)
3341 {
3342 inline_hints hints;
3343 struct cgraph_node *callee;
3344 clause_t clause, nonspec_clause;
3345 vec<tree> known_vals;
3346 vec<ipa_polymorphic_call_context> known_contexts;
3347 vec<ipa_agg_jump_function_p> known_aggs;
3348
3349 /* When we do caching, use do_estimate_edge_time to populate the entry. */
3350
3351 if (edge_growth_cache.exists ())
3352 {
3353 do_estimate_edge_time (edge);
3354 hints = edge_growth_cache[edge->uid].hints;
3355 gcc_checking_assert (hints);
3356 return hints - 1;
3357 }
3358
3359 callee = edge->callee->ultimate_alias_target ();
3360
3361 /* Early inliner runs without caching, go ahead and do the dirty work. */
3362 gcc_checking_assert (edge->inline_failed);
3363 evaluate_properties_for_edge (edge, true,
3364 &clause, &nonspec_clause,
3365 &known_vals, &known_contexts,
3366 &known_aggs);
3367 estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
3368 known_contexts, known_aggs, NULL, NULL,
3369 NULL, NULL, &hints, vNULL);
3370 known_vals.release ();
3371 known_contexts.release ();
3372 known_aggs.release ();
3373 hints |= simple_edge_hints (edge);
3374 return hints;
3375 }
3376
3377 /* Estimate the size of NODE after inlining EDGE which should be an
3378 edge to either NODE or a call inlined into NODE. */
3379
3380 int
3381 estimate_size_after_inlining (struct cgraph_node *node,
3382 struct cgraph_edge *edge)
3383 {
3384 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
3385 if (!es->predicate || *es->predicate != false)
3386 {
3387 int size = inline_summaries->get (node)->size + estimate_edge_growth (edge);
3388 gcc_assert (size >= 0);
3389 return size;
3390 }
3391 return inline_summaries->get (node)->size;
3392 }
3393
3394
3395 struct growth_data
3396 {
3397 struct cgraph_node *node;
3398 bool self_recursive;
3399 bool uninlinable;
3400 int growth;
3401 };
3402
3403
3404 /* Worker for do_estimate_growth. Collect growth for all callers. */
3405
3406 static bool
3407 do_estimate_growth_1 (struct cgraph_node *node, void *data)
3408 {
3409 struct cgraph_edge *e;
3410 struct growth_data *d = (struct growth_data *) data;
3411
3412 for (e = node->callers; e; e = e->next_caller)
3413 {
3414 gcc_checking_assert (e->inline_failed);
3415
3416 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
3417 {
3418 d->uninlinable = true;
3419 continue;
3420 }
3421
3422 if (e->recursive_p ())
3423 {
3424 d->self_recursive = true;
3425 continue;
3426 }
3427 d->growth += estimate_edge_growth (e);
3428 }
3429 return false;
3430 }
3431
3432
3433 /* Estimate the growth caused by inlining NODE into all callees. */
3434
3435 int
3436 estimate_growth (struct cgraph_node *node)
3437 {
3438 struct growth_data d = { node, false, false, 0 };
3439 struct inline_summary *info = inline_summaries->get (node);
3440
3441 node->call_for_symbol_and_aliases (do_estimate_growth_1, &d, true);
3442
3443 /* For self recursive functions the growth estimation really should be
3444 infinity. We don't want to return very large values because the growth
3445 plays various roles in badness computation fractions. Be sure to not
3446 return zero or negative growths. */
3447 if (d.self_recursive)
3448 d.growth = d.growth < info->size ? info->size : d.growth;
3449 else if (DECL_EXTERNAL (node->decl) || d.uninlinable)
3450 ;
3451 else
3452 {
3453 if (node->will_be_removed_from_program_if_no_direct_calls_p ())
3454 d.growth -= info->size;
3455 /* COMDAT functions are very often not shared across multiple units
3456 since they come from various template instantiations.
3457 Take this into account. */
3458 else if (DECL_COMDAT (node->decl)
3459 && node->can_remove_if_no_direct_calls_p ())
3460 d.growth -= (info->size
3461 * (100 - PARAM_VALUE (PARAM_COMDAT_SHARING_PROBABILITY))
3462 + 50) / 100;
3463 }
3464
3465 return d.growth;
3466 }
3467
3468 /* Verify if there are fewer than MAX_CALLERS. */
3469
3470 static bool
3471 check_callers (cgraph_node *node, int *max_callers)
3472 {
3473 ipa_ref *ref;
3474
3475 if (!node->can_remove_if_no_direct_calls_and_refs_p ())
3476 return true;
3477
3478 for (cgraph_edge *e = node->callers; e; e = e->next_caller)
3479 {
3480 (*max_callers)--;
3481 if (!*max_callers
3482 || cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
3483 return true;
3484 }
3485
3486 FOR_EACH_ALIAS (node, ref)
3487 if (check_callers (dyn_cast <cgraph_node *> (ref->referring), max_callers))
3488 return true;
3489
3490 return false;
3491 }
3492
3493
3494 /* Make cheap estimation if growth of NODE is likely positive knowing
3495 EDGE_GROWTH of one particular edge.
3496 We assume that most of other edges will have similar growth
3497 and skip computation if there are too many callers. */
3498
3499 bool
3500 growth_likely_positive (struct cgraph_node *node,
3501 int edge_growth)
3502 {
3503 int max_callers;
3504 struct cgraph_edge *e;
3505 gcc_checking_assert (edge_growth > 0);
3506
3507 /* First quickly check if NODE is removable at all. */
3508 if (DECL_EXTERNAL (node->decl))
3509 return true;
3510 if (!node->can_remove_if_no_direct_calls_and_refs_p ()
3511 || node->address_taken)
3512 return true;
3513
3514 max_callers = inline_summaries->get (node)->size * 4 / edge_growth + 2;
3515
3516 for (e = node->callers; e; e = e->next_caller)
3517 {
3518 max_callers--;
3519 if (!max_callers
3520 || cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
3521 return true;
3522 }
3523
3524 ipa_ref *ref;
3525 FOR_EACH_ALIAS (node, ref)
3526 if (check_callers (dyn_cast <cgraph_node *> (ref->referring), &max_callers))
3527 return true;
3528
3529 /* Unlike for functions called once, we play unsafe with
3530 COMDATs. We can allow that since we know functions
3531 in consideration are small (and thus risk is small) and
3532 moreover grow estimates already accounts that COMDAT
3533 functions may or may not disappear when eliminated from
3534 current unit. With good probability making aggressive
3535 choice in all units is going to make overall program
3536 smaller. */
3537 if (DECL_COMDAT (node->decl))
3538 {
3539 if (!node->can_remove_if_no_direct_calls_p ())
3540 return true;
3541 }
3542 else if (!node->will_be_removed_from_program_if_no_direct_calls_p ())
3543 return true;
3544
3545 return estimate_growth (node) > 0;
3546 }
3547
3548
3549 /* This function performs intraprocedural analysis in NODE that is required to
3550 inline indirect calls. */
3551
3552 static void
3553 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
3554 {
3555 ipa_analyze_node (node);
3556 if (dump_file && (dump_flags & TDF_DETAILS))
3557 {
3558 ipa_print_node_params (dump_file, node);
3559 ipa_print_node_jump_functions (dump_file, node);
3560 }
3561 }
3562
3563
3564 /* Note function body size. */
3565
3566 void
3567 inline_analyze_function (struct cgraph_node *node)
3568 {
3569 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
3570
3571 if (dump_file)
3572 fprintf (dump_file, "\nAnalyzing function: %s/%u\n",
3573 node->name (), node->order);
3574 if (opt_for_fn (node->decl, optimize) && !node->thunk.thunk_p)
3575 inline_indirect_intraprocedural_analysis (node);
3576 compute_inline_parameters (node, false);
3577 if (!optimize)
3578 {
3579 struct cgraph_edge *e;
3580 for (e = node->callees; e; e = e->next_callee)
3581 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3582 for (e = node->indirect_calls; e; e = e->next_callee)
3583 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3584 }
3585
3586 pop_cfun ();
3587 }
3588
3589
3590 /* Called when new function is inserted to callgraph late. */
3591
3592 void
3593 inline_summary_t::insert (struct cgraph_node *node, inline_summary *)
3594 {
3595 inline_analyze_function (node);
3596 }
3597
3598 /* Note function body size. */
3599
3600 void
3601 inline_generate_summary (void)
3602 {
3603 struct cgraph_node *node;
3604
3605 FOR_EACH_DEFINED_FUNCTION (node)
3606 if (DECL_STRUCT_FUNCTION (node->decl))
3607 node->local.versionable = tree_versionable_function_p (node->decl);
3608
3609 /* When not optimizing, do not bother to analyze. Inlining is still done
3610 because edge redirection needs to happen there. */
3611 if (!optimize && !flag_generate_lto && !flag_generate_offload && !flag_wpa)
3612 return;
3613
3614 if (!inline_summaries)
3615 inline_summaries = (inline_summary_t*) inline_summary_t::create_ggc (symtab);
3616
3617 inline_summaries->enable_insertion_hook ();
3618
3619 ipa_register_cgraph_hooks ();
3620 inline_free_summary ();
3621
3622 FOR_EACH_DEFINED_FUNCTION (node)
3623 if (!node->alias)
3624 inline_analyze_function (node);
3625 }
3626
3627
3628 /* Write inline summary for edge E to OB. */
3629
3630 static void
3631 read_ipa_call_summary (struct lto_input_block *ib, struct cgraph_edge *e)
3632 {
3633 struct ipa_call_summary *es = ipa_call_summaries->get (e);
3634 predicate p;
3635 int length, i;
3636
3637 es->call_stmt_size = streamer_read_uhwi (ib);
3638 es->call_stmt_time = streamer_read_uhwi (ib);
3639 es->loop_depth = streamer_read_uhwi (ib);
3640 p.stream_in (ib);
3641 edge_set_predicate (e, &p);
3642 length = streamer_read_uhwi (ib);
3643 if (length)
3644 {
3645 es->param.safe_grow_cleared (length);
3646 for (i = 0; i < length; i++)
3647 es->param[i].change_prob = streamer_read_uhwi (ib);
3648 }
3649 }
3650
3651
3652 /* Stream in inline summaries from the section. */
3653
3654 static void
3655 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
3656 size_t len)
3657 {
3658 const struct lto_function_header *header =
3659 (const struct lto_function_header *) data;
3660 const int cfg_offset = sizeof (struct lto_function_header);
3661 const int main_offset = cfg_offset + header->cfg_size;
3662 const int string_offset = main_offset + header->main_size;
3663 struct data_in *data_in;
3664 unsigned int i, count2, j;
3665 unsigned int f_count;
3666
3667 lto_input_block ib ((const char *) data + main_offset, header->main_size,
3668 file_data->mode_table);
3669
3670 data_in =
3671 lto_data_in_create (file_data, (const char *) data + string_offset,
3672 header->string_size, vNULL);
3673 f_count = streamer_read_uhwi (&ib);
3674 for (i = 0; i < f_count; i++)
3675 {
3676 unsigned int index;
3677 struct cgraph_node *node;
3678 struct inline_summary *info;
3679 lto_symtab_encoder_t encoder;
3680 struct bitpack_d bp;
3681 struct cgraph_edge *e;
3682 predicate p;
3683
3684 index = streamer_read_uhwi (&ib);
3685 encoder = file_data->symtab_node_encoder;
3686 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
3687 index));
3688 info = inline_summaries->get (node);
3689
3690 info->estimated_stack_size
3691 = info->estimated_self_stack_size = streamer_read_uhwi (&ib);
3692 info->size = info->self_size = streamer_read_uhwi (&ib);
3693 info->time = sreal::stream_in (&ib);
3694
3695 bp = streamer_read_bitpack (&ib);
3696 info->inlinable = bp_unpack_value (&bp, 1);
3697 info->contains_cilk_spawn = bp_unpack_value (&bp, 1);
3698 info->fp_expressions = bp_unpack_value (&bp, 1);
3699
3700 count2 = streamer_read_uhwi (&ib);
3701 gcc_assert (!info->conds);
3702 for (j = 0; j < count2; j++)
3703 {
3704 struct condition c;
3705 c.operand_num = streamer_read_uhwi (&ib);
3706 c.size = streamer_read_uhwi (&ib);
3707 c.code = (enum tree_code) streamer_read_uhwi (&ib);
3708 c.val = stream_read_tree (&ib, data_in);
3709 bp = streamer_read_bitpack (&ib);
3710 c.agg_contents = bp_unpack_value (&bp, 1);
3711 c.by_ref = bp_unpack_value (&bp, 1);
3712 if (c.agg_contents)
3713 c.offset = streamer_read_uhwi (&ib);
3714 vec_safe_push (info->conds, c);
3715 }
3716 count2 = streamer_read_uhwi (&ib);
3717 gcc_assert (!info->size_time_table);
3718 for (j = 0; j < count2; j++)
3719 {
3720 struct size_time_entry e;
3721
3722 e.size = streamer_read_uhwi (&ib);
3723 e.time = sreal::stream_in (&ib);
3724 e.exec_predicate.stream_in (&ib);
3725 e.nonconst_predicate.stream_in (&ib);
3726
3727 vec_safe_push (info->size_time_table, e);
3728 }
3729
3730 p.stream_in (&ib);
3731 set_hint_predicate (&info->loop_iterations, p);
3732 p.stream_in (&ib);
3733 set_hint_predicate (&info->loop_stride, p);
3734 p.stream_in (&ib);
3735 set_hint_predicate (&info->array_index, p);
3736 for (e = node->callees; e; e = e->next_callee)
3737 read_ipa_call_summary (&ib, e);
3738 for (e = node->indirect_calls; e; e = e->next_callee)
3739 read_ipa_call_summary (&ib, e);
3740 }
3741
3742 lto_free_section_data (file_data, LTO_section_inline_summary, NULL, data,
3743 len);
3744 lto_data_in_delete (data_in);
3745 }
3746
3747
3748 /* Read inline summary. Jump functions are shared among ipa-cp
3749 and inliner, so when ipa-cp is active, we don't need to write them
3750 twice. */
3751
3752 void
3753 inline_read_summary (void)
3754 {
3755 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
3756 struct lto_file_decl_data *file_data;
3757 unsigned int j = 0;
3758
3759 inline_summary_alloc ();
3760
3761 while ((file_data = file_data_vec[j++]))
3762 {
3763 size_t len;
3764 const char *data = lto_get_section_data (file_data,
3765 LTO_section_inline_summary,
3766 NULL, &len);
3767 if (data)
3768 inline_read_section (file_data, data, len);
3769 else
3770 /* Fatal error here. We do not want to support compiling ltrans units
3771 with different version of compiler or different flags than the WPA
3772 unit, so this should never happen. */
3773 fatal_error (input_location,
3774 "ipa inline summary is missing in input file");
3775 }
3776 if (optimize)
3777 {
3778 ipa_register_cgraph_hooks ();
3779 if (!flag_ipa_cp)
3780 ipa_prop_read_jump_functions ();
3781 }
3782
3783 gcc_assert (inline_summaries);
3784 inline_summaries->enable_insertion_hook ();
3785 }
3786
3787
3788 /* Write inline summary for edge E to OB. */
3789
3790 static void
3791 write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
3792 {
3793 struct ipa_call_summary *es = ipa_call_summaries->get (e);
3794 int i;
3795
3796 streamer_write_uhwi (ob, es->call_stmt_size);
3797 streamer_write_uhwi (ob, es->call_stmt_time);
3798 streamer_write_uhwi (ob, es->loop_depth);
3799 if (es->predicate)
3800 es->predicate->stream_out (ob);
3801 else
3802 streamer_write_uhwi (ob, 0);
3803 streamer_write_uhwi (ob, es->param.length ());
3804 for (i = 0; i < (int) es->param.length (); i++)
3805 streamer_write_uhwi (ob, es->param[i].change_prob);
3806 }
3807
3808
3809 /* Write inline summary for node in SET.
3810 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
3811 active, we don't need to write them twice. */
3812
3813 void
3814 inline_write_summary (void)
3815 {
3816 struct output_block *ob = create_output_block (LTO_section_inline_summary);
3817 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
3818 unsigned int count = 0;
3819 int i;
3820
3821 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
3822 {
3823 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
3824 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
3825 if (cnode && cnode->definition && !cnode->alias)
3826 count++;
3827 }
3828 streamer_write_uhwi (ob, count);
3829
3830 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
3831 {
3832 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
3833 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
3834 if (cnode && cnode->definition && !cnode->alias)
3835 {
3836 struct inline_summary *info = inline_summaries->get (cnode);
3837 struct bitpack_d bp;
3838 struct cgraph_edge *edge;
3839 int i;
3840 size_time_entry *e;
3841 struct condition *c;
3842
3843 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
3844 streamer_write_hwi (ob, info->estimated_self_stack_size);
3845 streamer_write_hwi (ob, info->self_size);
3846 info->time.stream_out (ob);
3847 bp = bitpack_create (ob->main_stream);
3848 bp_pack_value (&bp, info->inlinable, 1);
3849 bp_pack_value (&bp, info->contains_cilk_spawn, 1);
3850 bp_pack_value (&bp, info->fp_expressions, 1);
3851 streamer_write_bitpack (&bp);
3852 streamer_write_uhwi (ob, vec_safe_length (info->conds));
3853 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
3854 {
3855 streamer_write_uhwi (ob, c->operand_num);
3856 streamer_write_uhwi (ob, c->size);
3857 streamer_write_uhwi (ob, c->code);
3858 stream_write_tree (ob, c->val, true);
3859 bp = bitpack_create (ob->main_stream);
3860 bp_pack_value (&bp, c->agg_contents, 1);
3861 bp_pack_value (&bp, c->by_ref, 1);
3862 streamer_write_bitpack (&bp);
3863 if (c->agg_contents)
3864 streamer_write_uhwi (ob, c->offset);
3865 }
3866 streamer_write_uhwi (ob, vec_safe_length (info->size_time_table));
3867 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3868 {
3869 streamer_write_uhwi (ob, e->size);
3870 e->time.stream_out (ob);
3871 e->exec_predicate.stream_out (ob);
3872 e->nonconst_predicate.stream_out (ob);
3873 }
3874 if (info->loop_iterations)
3875 info->loop_iterations->stream_out (ob);
3876 else
3877 streamer_write_uhwi (ob, 0);
3878 if (info->loop_stride)
3879 info->loop_stride->stream_out (ob);
3880 else
3881 streamer_write_uhwi (ob, 0);
3882 if (info->array_index)
3883 info->array_index->stream_out (ob);
3884 else
3885 streamer_write_uhwi (ob, 0);
3886 for (edge = cnode->callees; edge; edge = edge->next_callee)
3887 write_ipa_call_summary (ob, edge);
3888 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
3889 write_ipa_call_summary (ob, edge);
3890 }
3891 }
3892 streamer_write_char_stream (ob->main_stream, 0);
3893 produce_asm (ob, NULL);
3894 destroy_output_block (ob);
3895
3896 if (optimize && !flag_ipa_cp)
3897 ipa_prop_write_jump_functions ();
3898 }
3899
3900
3901 /* Release inline summary. */
3902
3903 void
3904 inline_free_summary (void)
3905 {
3906 struct cgraph_node *node;
3907 if (!ipa_call_summaries)
3908 return;
3909 FOR_EACH_DEFINED_FUNCTION (node)
3910 if (!node->alias)
3911 inline_summaries->get (node)->reset (node);
3912 inline_summaries->release ();
3913 inline_summaries = NULL;
3914 ipa_call_summaries->release ();
3915 delete ipa_call_summaries;
3916 ipa_call_summaries = NULL;
3917 edge_predicate_pool.release ();
3918 }