Remove gcc/params.* files.
[gcc.git] / gcc / ipa-fnsummary.c
1 /* Function summary pass.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Analysis of function bodies used by inter-procedural passes
22
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
26 - function frame size
27 For each call
28 - call statement size, time and how often the parameters change
29
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
34
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
37
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
44 we use predicates.
45
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
49
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
53
54 #include "config.h"
55 #include "system.h"
56 #include "coretypes.h"
57 #include "backend.h"
58 #include "tree.h"
59 #include "gimple.h"
60 #include "alloc-pool.h"
61 #include "tree-pass.h"
62 #include "ssa.h"
63 #include "tree-streamer.h"
64 #include "cgraph.h"
65 #include "diagnostic.h"
66 #include "fold-const.h"
67 #include "print-tree.h"
68 #include "tree-inline.h"
69 #include "gimple-pretty-print.h"
70 #include "cfganal.h"
71 #include "gimple-iterator.h"
72 #include "tree-cfg.h"
73 #include "tree-ssa-loop-niter.h"
74 #include "tree-ssa-loop.h"
75 #include "symbol-summary.h"
76 #include "ipa-prop.h"
77 #include "ipa-fnsummary.h"
78 #include "cfgloop.h"
79 #include "tree-scalar-evolution.h"
80 #include "ipa-utils.h"
81 #include "cfgexpand.h"
82 #include "gimplify.h"
83 #include "stringpool.h"
84 #include "attribs.h"
85
86 /* Summaries. */
87 fast_function_summary <ipa_fn_summary *, va_gc> *ipa_fn_summaries;
88 fast_function_summary <ipa_size_summary *, va_heap> *ipa_size_summaries;
89 fast_call_summary <ipa_call_summary *, va_heap> *ipa_call_summaries;
90
91 /* Edge predicates goes here. */
92 static object_allocator<predicate> edge_predicate_pool ("edge predicates");
93
94
95 /* Dump IPA hints. */
96 void
97 ipa_dump_hints (FILE *f, ipa_hints hints)
98 {
99 if (!hints)
100 return;
101 fprintf (f, "IPA hints:");
102 if (hints & INLINE_HINT_indirect_call)
103 {
104 hints &= ~INLINE_HINT_indirect_call;
105 fprintf (f, " indirect_call");
106 }
107 if (hints & INLINE_HINT_loop_iterations)
108 {
109 hints &= ~INLINE_HINT_loop_iterations;
110 fprintf (f, " loop_iterations");
111 }
112 if (hints & INLINE_HINT_loop_stride)
113 {
114 hints &= ~INLINE_HINT_loop_stride;
115 fprintf (f, " loop_stride");
116 }
117 if (hints & INLINE_HINT_same_scc)
118 {
119 hints &= ~INLINE_HINT_same_scc;
120 fprintf (f, " same_scc");
121 }
122 if (hints & INLINE_HINT_in_scc)
123 {
124 hints &= ~INLINE_HINT_in_scc;
125 fprintf (f, " in_scc");
126 }
127 if (hints & INLINE_HINT_cross_module)
128 {
129 hints &= ~INLINE_HINT_cross_module;
130 fprintf (f, " cross_module");
131 }
132 if (hints & INLINE_HINT_declared_inline)
133 {
134 hints &= ~INLINE_HINT_declared_inline;
135 fprintf (f, " declared_inline");
136 }
137 if (hints & INLINE_HINT_known_hot)
138 {
139 hints &= ~INLINE_HINT_known_hot;
140 fprintf (f, " known_hot");
141 }
142 gcc_assert (!hints);
143 }
144
145
146 /* Record SIZE and TIME to SUMMARY.
147 The accounted code will be executed when EXEC_PRED is true.
148 When NONCONST_PRED is false the code will evaulate to constant and
149 will get optimized out in specialized clones of the function. */
150
151 void
152 ipa_fn_summary::account_size_time (int size, sreal time,
153 const predicate &exec_pred,
154 const predicate &nonconst_pred_in)
155 {
156 size_time_entry *e;
157 bool found = false;
158 int i;
159 predicate nonconst_pred;
160
161 if (exec_pred == false)
162 return;
163
164 nonconst_pred = nonconst_pred_in & exec_pred;
165
166 if (nonconst_pred == false)
167 return;
168
169 /* We need to create initial empty unconitional clause, but otherwie
170 we don't need to account empty times and sizes. */
171 if (!size && time == 0 && size_time_table)
172 return;
173
174 gcc_assert (time >= 0);
175
176 for (i = 0; vec_safe_iterate (size_time_table, i, &e); i++)
177 if (e->exec_predicate == exec_pred
178 && e->nonconst_predicate == nonconst_pred)
179 {
180 found = true;
181 break;
182 }
183 if (i == 256)
184 {
185 i = 0;
186 found = true;
187 e = &(*size_time_table)[0];
188 if (dump_file && (dump_flags & TDF_DETAILS))
189 fprintf (dump_file,
190 "\t\tReached limit on number of entries, "
191 "ignoring the predicate.");
192 }
193 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
194 {
195 fprintf (dump_file,
196 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
197 ((double) size) / ipa_fn_summary::size_scale,
198 (time.to_double ()), found ? "" : "new ");
199 exec_pred.dump (dump_file, conds, 0);
200 if (exec_pred != nonconst_pred)
201 {
202 fprintf (dump_file, " nonconst:");
203 nonconst_pred.dump (dump_file, conds);
204 }
205 else
206 fprintf (dump_file, "\n");
207 }
208 if (!found)
209 {
210 class size_time_entry new_entry;
211 new_entry.size = size;
212 new_entry.time = time;
213 new_entry.exec_predicate = exec_pred;
214 new_entry.nonconst_predicate = nonconst_pred;
215 vec_safe_push (size_time_table, new_entry);
216 }
217 else
218 {
219 e->size += size;
220 e->time += time;
221 }
222 }
223
224 /* We proved E to be unreachable, redirect it to __bultin_unreachable. */
225
226 static struct cgraph_edge *
227 redirect_to_unreachable (struct cgraph_edge *e)
228 {
229 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
230 struct cgraph_node *target = cgraph_node::get_create
231 (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
232
233 if (e->speculative)
234 e = e->resolve_speculation (target->decl);
235 else if (!e->callee)
236 e->make_direct (target);
237 else
238 e->redirect_callee (target);
239 class ipa_call_summary *es = ipa_call_summaries->get (e);
240 e->inline_failed = CIF_UNREACHABLE;
241 e->count = profile_count::zero ();
242 es->call_stmt_size = 0;
243 es->call_stmt_time = 0;
244 if (callee)
245 callee->remove_symbol_and_inline_clones ();
246 return e;
247 }
248
249 /* Set predicate for edge E. */
250
251 static void
252 edge_set_predicate (struct cgraph_edge *e, predicate *predicate)
253 {
254 /* If the edge is determined to be never executed, redirect it
255 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
256 be optimized out. */
257 if (predicate && *predicate == false
258 /* When handling speculative edges, we need to do the redirection
259 just once. Do it always on the direct edge, so we do not
260 attempt to resolve speculation while duplicating the edge. */
261 && (!e->speculative || e->callee))
262 e = redirect_to_unreachable (e);
263
264 class ipa_call_summary *es = ipa_call_summaries->get (e);
265 if (predicate && *predicate != true)
266 {
267 if (!es->predicate)
268 es->predicate = edge_predicate_pool.allocate ();
269 *es->predicate = *predicate;
270 }
271 else
272 {
273 if (es->predicate)
274 edge_predicate_pool.remove (es->predicate);
275 es->predicate = NULL;
276 }
277 }
278
279 /* Set predicate for hint *P. */
280
281 static void
282 set_hint_predicate (predicate **p, predicate new_predicate)
283 {
284 if (new_predicate == false || new_predicate == true)
285 {
286 if (*p)
287 edge_predicate_pool.remove (*p);
288 *p = NULL;
289 }
290 else
291 {
292 if (!*p)
293 *p = edge_predicate_pool.allocate ();
294 **p = new_predicate;
295 }
296 }
297
298
299 /* Compute what conditions may or may not hold given invormation about
300 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
301 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
302 copy when called in a given context. It is a bitmask of conditions. Bit
303 0 means that condition is known to be false, while bit 1 means that condition
304 may or may not be true. These differs - for example NOT_INLINED condition
305 is always false in the second and also builtin_constant_p tests cannot use
306 the fact that parameter is indeed a constant.
307
308 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
309 KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
310 Return clause of possible truths. When INLINE_P is true, assume that we are
311 inlining.
312
313 ERROR_MARK means compile time invariant. */
314
315 static void
316 evaluate_conditions_for_known_args (struct cgraph_node *node,
317 bool inline_p,
318 vec<tree> known_vals,
319 vec<ipa_agg_jump_function_p>
320 known_aggs,
321 clause_t *ret_clause,
322 clause_t *ret_nonspec_clause)
323 {
324 clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition;
325 clause_t nonspec_clause = 1 << predicate::not_inlined_condition;
326 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
327 int i;
328 struct condition *c;
329
330 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
331 {
332 tree val;
333 tree res;
334 int j;
335 struct expr_eval_op *op;
336
337 /* We allow call stmt to have fewer arguments than the callee function
338 (especially for K&R style programs). So bound check here (we assume
339 known_aggs vector, if non-NULL, has the same length as
340 known_vals). */
341 gcc_checking_assert (!known_aggs.exists ()
342 || (known_vals.length () == known_aggs.length ()));
343 if (c->operand_num >= (int) known_vals.length ())
344 {
345 clause |= 1 << (i + predicate::first_dynamic_condition);
346 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
347 continue;
348 }
349
350 if (c->agg_contents)
351 {
352 struct ipa_agg_jump_function *agg;
353
354 if (c->code == predicate::changed
355 && !c->by_ref
356 && (known_vals[c->operand_num] == error_mark_node))
357 continue;
358
359 if (known_aggs.exists ())
360 {
361 agg = known_aggs[c->operand_num];
362 val = ipa_find_agg_cst_for_param (agg, known_vals[c->operand_num],
363 c->offset, c->by_ref);
364 }
365 else
366 val = NULL_TREE;
367 }
368 else
369 {
370 val = known_vals[c->operand_num];
371 if (val == error_mark_node && c->code != predicate::changed)
372 val = NULL_TREE;
373 }
374
375 if (!val)
376 {
377 clause |= 1 << (i + predicate::first_dynamic_condition);
378 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
379 continue;
380 }
381 if (c->code == predicate::changed)
382 {
383 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
384 continue;
385 }
386
387 if (TYPE_SIZE (c->type) != TYPE_SIZE (TREE_TYPE (val)))
388 {
389 clause |= 1 << (i + predicate::first_dynamic_condition);
390 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
391 continue;
392 }
393 if (c->code == predicate::is_not_constant)
394 {
395 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
396 continue;
397 }
398
399 val = fold_unary (VIEW_CONVERT_EXPR, c->type, val);
400 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
401 {
402 if (!val)
403 break;
404 if (!op->val[0])
405 val = fold_unary (op->code, op->type, val);
406 else if (!op->val[1])
407 val = fold_binary (op->code, op->type,
408 op->index ? op->val[0] : val,
409 op->index ? val : op->val[0]);
410 else if (op->index == 0)
411 val = fold_ternary (op->code, op->type,
412 val, op->val[0], op->val[1]);
413 else if (op->index == 1)
414 val = fold_ternary (op->code, op->type,
415 op->val[0], val, op->val[1]);
416 else if (op->index == 2)
417 val = fold_ternary (op->code, op->type,
418 op->val[0], op->val[1], val);
419 else
420 val = NULL_TREE;
421 }
422
423 res = val
424 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
425 : NULL;
426
427 if (res && integer_zerop (res))
428 continue;
429
430 clause |= 1 << (i + predicate::first_dynamic_condition);
431 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
432 }
433 *ret_clause = clause;
434 if (ret_nonspec_clause)
435 *ret_nonspec_clause = nonspec_clause;
436 }
437
438
439 /* Work out what conditions might be true at invocation of E. */
440
441 void
442 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
443 clause_t *clause_ptr,
444 clause_t *nonspec_clause_ptr,
445 vec<tree> *known_vals_ptr,
446 vec<ipa_polymorphic_call_context>
447 *known_contexts_ptr,
448 vec<ipa_agg_jump_function_p> *known_aggs_ptr)
449 {
450 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
451 class ipa_fn_summary *info = ipa_fn_summaries->get (callee);
452 vec<tree> known_vals = vNULL;
453 vec<ipa_agg_jump_function_p> known_aggs = vNULL;
454 class ipa_edge_args *args;
455
456 if (clause_ptr)
457 *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition;
458 if (known_vals_ptr)
459 known_vals_ptr->create (0);
460 if (known_contexts_ptr)
461 known_contexts_ptr->create (0);
462
463 if (ipa_node_params_sum
464 && !e->call_stmt_cannot_inline_p
465 && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr)
466 && (args = IPA_EDGE_REF (e)) != NULL)
467 {
468 class ipa_node_params *caller_parms_info, *callee_pi;
469 class ipa_call_summary *es = ipa_call_summaries->get (e);
470 int i, count = ipa_get_cs_argument_count (args);
471
472 if (e->caller->inlined_to)
473 caller_parms_info = IPA_NODE_REF (e->caller->inlined_to);
474 else
475 caller_parms_info = IPA_NODE_REF (e->caller);
476 callee_pi = IPA_NODE_REF (callee);
477
478 if (count && (info->conds || known_vals_ptr))
479 known_vals.safe_grow_cleared (count);
480 if (count && (info->conds || known_aggs_ptr))
481 known_aggs.safe_grow_cleared (count);
482 if (count && known_contexts_ptr)
483 known_contexts_ptr->safe_grow_cleared (count);
484
485 if (callee_pi)
486 for (i = 0; i < count; i++)
487 {
488 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
489 tree cst = ipa_value_from_jfunc (caller_parms_info, jf,
490 ipa_get_type (callee_pi, i));
491
492 if (!cst && e->call_stmt
493 && i < (int)gimple_call_num_args (e->call_stmt))
494 {
495 cst = gimple_call_arg (e->call_stmt, i);
496 if (!is_gimple_min_invariant (cst))
497 cst = NULL;
498 }
499 if (cst)
500 {
501 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
502 if (known_vals.exists ())
503 known_vals[i] = cst;
504 }
505 else if (inline_p && !es->param[i].change_prob)
506 known_vals[i] = error_mark_node;
507
508 if (known_contexts_ptr)
509 (*known_contexts_ptr)[i]
510 = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
511 /* TODO: When IPA-CP starts propagating and merging aggregate jump
512 functions, use its knowledge of the caller too, just like the
513 scalar case above. */
514 known_aggs[i] = &jf->agg;
515 }
516 else
517 gcc_assert (callee->thunk.thunk_p);
518 }
519 else if (e->call_stmt && !e->call_stmt_cannot_inline_p
520 && ((clause_ptr && info->conds) || known_vals_ptr))
521 {
522 int i, count = (int)gimple_call_num_args (e->call_stmt);
523
524 if (count && (info->conds || known_vals_ptr))
525 known_vals.safe_grow_cleared (count);
526 for (i = 0; i < count; i++)
527 {
528 tree cst = gimple_call_arg (e->call_stmt, i);
529 if (!is_gimple_min_invariant (cst))
530 cst = NULL;
531 if (cst)
532 known_vals[i] = cst;
533 }
534 }
535
536 evaluate_conditions_for_known_args (callee, inline_p,
537 known_vals, known_aggs, clause_ptr,
538 nonspec_clause_ptr);
539
540 if (known_vals_ptr)
541 *known_vals_ptr = known_vals;
542 else
543 known_vals.release ();
544
545 if (known_aggs_ptr)
546 *known_aggs_ptr = known_aggs;
547 else
548 known_aggs.release ();
549 }
550
551
552 /* Allocate the function summary. */
553
554 static void
555 ipa_fn_summary_alloc (void)
556 {
557 gcc_checking_assert (!ipa_fn_summaries);
558 ipa_size_summaries = new fast_function_summary <ipa_size_summary *, va_heap>
559 (symtab);
560 ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
561 ipa_call_summaries = new ipa_call_summary_t (symtab);
562 }
563
564 ipa_call_summary::~ipa_call_summary ()
565 {
566 if (predicate)
567 edge_predicate_pool.remove (predicate);
568
569 param.release ();
570 }
571
572 ipa_fn_summary::~ipa_fn_summary ()
573 {
574 if (loop_iterations)
575 edge_predicate_pool.remove (loop_iterations);
576 if (loop_stride)
577 edge_predicate_pool.remove (loop_stride);
578 vec_free (conds);
579 vec_free (size_time_table);
580 }
581
582 void
583 ipa_fn_summary_t::remove_callees (cgraph_node *node)
584 {
585 cgraph_edge *e;
586 for (e = node->callees; e; e = e->next_callee)
587 ipa_call_summaries->remove (e);
588 for (e = node->indirect_calls; e; e = e->next_callee)
589 ipa_call_summaries->remove (e);
590 }
591
592 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
593 Additionally care about allocating new memory slot for updated predicate
594 and set it to NULL when it becomes true or false (and thus uninteresting).
595 */
596
597 static void
598 remap_hint_predicate_after_duplication (predicate **p,
599 clause_t possible_truths)
600 {
601 predicate new_predicate;
602
603 if (!*p)
604 return;
605
606 new_predicate = (*p)->remap_after_duplication (possible_truths);
607 /* We do not want to free previous predicate; it is used by node origin. */
608 *p = NULL;
609 set_hint_predicate (p, new_predicate);
610 }
611
612
613 /* Hook that is called by cgraph.c when a node is duplicated. */
614 void
615 ipa_fn_summary_t::duplicate (cgraph_node *src,
616 cgraph_node *dst,
617 ipa_fn_summary *,
618 ipa_fn_summary *info)
619 {
620 new (info) ipa_fn_summary (*ipa_fn_summaries->get (src));
621 /* TODO: as an optimization, we may avoid copying conditions
622 that are known to be false or true. */
623 info->conds = vec_safe_copy (info->conds);
624
625 /* When there are any replacements in the function body, see if we can figure
626 out that something was optimized out. */
627 if (ipa_node_params_sum && dst->clone.tree_map)
628 {
629 vec<size_time_entry, va_gc> *entry = info->size_time_table;
630 /* Use SRC parm info since it may not be copied yet. */
631 class ipa_node_params *parms_info = IPA_NODE_REF (src);
632 vec<tree> known_vals = vNULL;
633 int count = ipa_get_param_count (parms_info);
634 int i, j;
635 clause_t possible_truths;
636 predicate true_pred = true;
637 size_time_entry *e;
638 int optimized_out_size = 0;
639 bool inlined_to_p = false;
640 struct cgraph_edge *edge, *next;
641
642 info->size_time_table = 0;
643 known_vals.safe_grow_cleared (count);
644 for (i = 0; i < count; i++)
645 {
646 struct ipa_replace_map *r;
647
648 for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++)
649 {
650 if (r->parm_num == i)
651 {
652 known_vals[i] = r->new_tree;
653 break;
654 }
655 }
656 }
657 evaluate_conditions_for_known_args (dst, false,
658 known_vals,
659 vNULL,
660 &possible_truths,
661 /* We are going to specialize,
662 so ignore nonspec truths. */
663 NULL);
664 known_vals.release ();
665
666 info->account_size_time (0, 0, true_pred, true_pred);
667
668 /* Remap size_time vectors.
669 Simplify the predicate by prunning out alternatives that are known
670 to be false.
671 TODO: as on optimization, we can also eliminate conditions known
672 to be true. */
673 for (i = 0; vec_safe_iterate (entry, i, &e); i++)
674 {
675 predicate new_exec_pred;
676 predicate new_nonconst_pred;
677 new_exec_pred = e->exec_predicate.remap_after_duplication
678 (possible_truths);
679 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
680 (possible_truths);
681 if (new_exec_pred == false || new_nonconst_pred == false)
682 optimized_out_size += e->size;
683 else
684 info->account_size_time (e->size, e->time, new_exec_pred,
685 new_nonconst_pred);
686 }
687
688 /* Remap edge predicates with the same simplification as above.
689 Also copy constantness arrays. */
690 for (edge = dst->callees; edge; edge = next)
691 {
692 predicate new_predicate;
693 class ipa_call_summary *es = ipa_call_summaries->get (edge);
694 next = edge->next_callee;
695
696 if (!edge->inline_failed)
697 inlined_to_p = true;
698 if (!es->predicate)
699 continue;
700 new_predicate = es->predicate->remap_after_duplication
701 (possible_truths);
702 if (new_predicate == false && *es->predicate != false)
703 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
704 edge_set_predicate (edge, &new_predicate);
705 }
706
707 /* Remap indirect edge predicates with the same simplificaiton as above.
708 Also copy constantness arrays. */
709 for (edge = dst->indirect_calls; edge; edge = next)
710 {
711 predicate new_predicate;
712 class ipa_call_summary *es = ipa_call_summaries->get (edge);
713 next = edge->next_callee;
714
715 gcc_checking_assert (edge->inline_failed);
716 if (!es->predicate)
717 continue;
718 new_predicate = es->predicate->remap_after_duplication
719 (possible_truths);
720 if (new_predicate == false && *es->predicate != false)
721 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
722 edge_set_predicate (edge, &new_predicate);
723 }
724 remap_hint_predicate_after_duplication (&info->loop_iterations,
725 possible_truths);
726 remap_hint_predicate_after_duplication (&info->loop_stride,
727 possible_truths);
728
729 /* If inliner or someone after inliner will ever start producing
730 non-trivial clones, we will get trouble with lack of information
731 about updating self sizes, because size vectors already contains
732 sizes of the calees. */
733 gcc_assert (!inlined_to_p || !optimized_out_size);
734 }
735 else
736 {
737 info->size_time_table = vec_safe_copy (info->size_time_table);
738 if (info->loop_iterations)
739 {
740 predicate p = *info->loop_iterations;
741 info->loop_iterations = NULL;
742 set_hint_predicate (&info->loop_iterations, p);
743 }
744 if (info->loop_stride)
745 {
746 predicate p = *info->loop_stride;
747 info->loop_stride = NULL;
748 set_hint_predicate (&info->loop_stride, p);
749 }
750 }
751 if (!dst->inlined_to)
752 ipa_update_overall_fn_summary (dst);
753 }
754
755
756 /* Hook that is called by cgraph.c when a node is duplicated. */
757
758 void
759 ipa_call_summary_t::duplicate (struct cgraph_edge *src,
760 struct cgraph_edge *dst,
761 class ipa_call_summary *srcinfo,
762 class ipa_call_summary *info)
763 {
764 new (info) ipa_call_summary (*srcinfo);
765 info->predicate = NULL;
766 edge_set_predicate (dst, srcinfo->predicate);
767 info->param = srcinfo->param.copy ();
768 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
769 {
770 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
771 - eni_size_weights.call_cost);
772 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
773 - eni_time_weights.call_cost);
774 }
775 }
776
777 /* Dump edge summaries associated to NODE and recursively to all clones.
778 Indent by INDENT. */
779
780 static void
781 dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
782 class ipa_fn_summary *info)
783 {
784 struct cgraph_edge *edge;
785 for (edge = node->callees; edge; edge = edge->next_callee)
786 {
787 class ipa_call_summary *es = ipa_call_summaries->get (edge);
788 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
789 int i;
790
791 fprintf (f,
792 "%*s%s/%i %s\n%*s freq:%4.2f",
793 indent, "", callee->name (), callee->order,
794 !edge->inline_failed
795 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
796 indent, "", edge->sreal_frequency ().to_double ());
797
798 if (es)
799 fprintf (f, " loop depth:%2i size:%2i time: %2i",
800 es->loop_depth, es->call_stmt_size, es->call_stmt_time);
801
802 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
803 ipa_size_summary *ss = ipa_size_summaries->get (callee);
804 if (s != NULL)
805 fprintf (f, " callee size:%2i stack:%2i",
806 (int) (ss->size / ipa_fn_summary::size_scale),
807 (int) s->estimated_stack_size);
808
809 if (es && es->predicate)
810 {
811 fprintf (f, " predicate: ");
812 es->predicate->dump (f, info->conds);
813 }
814 else
815 fprintf (f, "\n");
816 if (es && es->param.exists ())
817 for (i = 0; i < (int) es->param.length (); i++)
818 {
819 int prob = es->param[i].change_prob;
820
821 if (!prob)
822 fprintf (f, "%*s op%i is compile time invariant\n",
823 indent + 2, "", i);
824 else if (prob != REG_BR_PROB_BASE)
825 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
826 prob * 100.0 / REG_BR_PROB_BASE);
827 }
828 if (!edge->inline_failed)
829 {
830 ipa_size_summary *ss = ipa_size_summaries->get (callee);
831 fprintf (f, "%*sStack frame offset %i, callee self size %i\n",
832 indent + 2, "",
833 (int) ipa_get_stack_frame_offset (callee),
834 (int) ss->estimated_self_stack_size);
835 dump_ipa_call_summary (f, indent + 2, callee, info);
836 }
837 }
838 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
839 {
840 class ipa_call_summary *es = ipa_call_summaries->get (edge);
841 fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
842 " time: %2i",
843 indent, "",
844 es->loop_depth,
845 edge->sreal_frequency ().to_double (), es->call_stmt_size,
846 es->call_stmt_time);
847 if (es->predicate)
848 {
849 fprintf (f, "predicate: ");
850 es->predicate->dump (f, info->conds);
851 }
852 else
853 fprintf (f, "\n");
854 }
855 }
856
857
858 void
859 ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
860 {
861 if (node->definition)
862 {
863 class ipa_fn_summary *s = ipa_fn_summaries->get (node);
864 class ipa_size_summary *ss = ipa_size_summaries->get (node);
865 if (s != NULL)
866 {
867 size_time_entry *e;
868 int i;
869 fprintf (f, "IPA function summary for %s", node->dump_name ());
870 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
871 fprintf (f, " always_inline");
872 if (s->inlinable)
873 fprintf (f, " inlinable");
874 if (s->fp_expressions)
875 fprintf (f, " fp_expression");
876 fprintf (f, "\n global time: %f\n", s->time.to_double ());
877 fprintf (f, " self size: %i\n", ss->self_size);
878 fprintf (f, " global size: %i\n", ss->size);
879 fprintf (f, " min size: %i\n", s->min_size);
880 fprintf (f, " self stack: %i\n",
881 (int) ss->estimated_self_stack_size);
882 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
883 if (s->growth)
884 fprintf (f, " estimated growth:%i\n", (int) s->growth);
885 if (s->scc_no)
886 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
887 for (i = 0; vec_safe_iterate (s->size_time_table, i, &e); i++)
888 {
889 fprintf (f, " size:%f, time:%f",
890 (double) e->size / ipa_fn_summary::size_scale,
891 e->time.to_double ());
892 if (e->exec_predicate != true)
893 {
894 fprintf (f, ", executed if:");
895 e->exec_predicate.dump (f, s->conds, 0);
896 }
897 if (e->exec_predicate != e->nonconst_predicate)
898 {
899 fprintf (f, ", nonconst if:");
900 e->nonconst_predicate.dump (f, s->conds, 0);
901 }
902 fprintf (f, "\n");
903 }
904 if (s->loop_iterations)
905 {
906 fprintf (f, " loop iterations:");
907 s->loop_iterations->dump (f, s->conds);
908 }
909 if (s->loop_stride)
910 {
911 fprintf (f, " loop stride:");
912 s->loop_stride->dump (f, s->conds);
913 }
914 fprintf (f, " calls:\n");
915 dump_ipa_call_summary (f, 4, node, s);
916 fprintf (f, "\n");
917 }
918 else
919 fprintf (f, "IPA summary for %s is missing.\n", node->dump_name ());
920 }
921 }
922
923 DEBUG_FUNCTION void
924 ipa_debug_fn_summary (struct cgraph_node *node)
925 {
926 ipa_dump_fn_summary (stderr, node);
927 }
928
929 void
930 ipa_dump_fn_summaries (FILE *f)
931 {
932 struct cgraph_node *node;
933
934 FOR_EACH_DEFINED_FUNCTION (node)
935 if (!node->inlined_to)
936 ipa_dump_fn_summary (f, node);
937 }
938
939 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
940 boolean variable pointed to by DATA. */
941
942 static bool
943 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
944 void *data)
945 {
946 bool *b = (bool *) data;
947 *b = true;
948 return true;
949 }
950
951 /* If OP refers to value of function parameter, return the corresponding
952 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
953 PARM_DECL) will be stored to *SIZE_P in that case too. */
954
955 static tree
956 unmodified_parm_1 (ipa_func_body_info *fbi, gimple *stmt, tree op,
957 poly_int64 *size_p)
958 {
959 /* SSA_NAME referring to parm default def? */
960 if (TREE_CODE (op) == SSA_NAME
961 && SSA_NAME_IS_DEFAULT_DEF (op)
962 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
963 {
964 if (size_p)
965 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
966 return SSA_NAME_VAR (op);
967 }
968 /* Non-SSA parm reference? */
969 if (TREE_CODE (op) == PARM_DECL)
970 {
971 bool modified = false;
972
973 ao_ref refd;
974 ao_ref_init (&refd, op);
975 int walked = walk_aliased_vdefs (&refd, gimple_vuse (stmt),
976 mark_modified, &modified, NULL, NULL,
977 fbi->aa_walk_budget + 1);
978 if (walked < 0)
979 {
980 fbi->aa_walk_budget = 0;
981 return NULL_TREE;
982 }
983 if (!modified)
984 {
985 if (size_p)
986 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
987 return op;
988 }
989 }
990 return NULL_TREE;
991 }
992
993 /* If OP refers to value of function parameter, return the corresponding
994 parameter. Also traverse chains of SSA register assignments. If non-NULL,
995 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
996 stored to *SIZE_P in that case too. */
997
998 static tree
999 unmodified_parm (ipa_func_body_info *fbi, gimple *stmt, tree op,
1000 poly_int64 *size_p)
1001 {
1002 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1003 if (res)
1004 return res;
1005
1006 if (TREE_CODE (op) == SSA_NAME
1007 && !SSA_NAME_IS_DEFAULT_DEF (op)
1008 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1009 return unmodified_parm (fbi, SSA_NAME_DEF_STMT (op),
1010 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
1011 size_p);
1012 return NULL_TREE;
1013 }
1014
1015 /* If OP refers to a value of a function parameter or value loaded from an
1016 aggregate passed to a parameter (either by value or reference), return TRUE
1017 and store the number of the parameter to *INDEX_P, the access size into
1018 *SIZE_P, and information whether and how it has been loaded from an
1019 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1020 statement in which OP is used or loaded. */
1021
1022 static bool
1023 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1024 gimple *stmt, tree op, int *index_p,
1025 poly_int64 *size_p,
1026 struct agg_position_info *aggpos)
1027 {
1028 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1029
1030 gcc_checking_assert (aggpos);
1031 if (res)
1032 {
1033 *index_p = ipa_get_param_decl_index (fbi->info, res);
1034 if (*index_p < 0)
1035 return false;
1036 aggpos->agg_contents = false;
1037 aggpos->by_ref = false;
1038 return true;
1039 }
1040
1041 if (TREE_CODE (op) == SSA_NAME)
1042 {
1043 if (SSA_NAME_IS_DEFAULT_DEF (op)
1044 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1045 return false;
1046 stmt = SSA_NAME_DEF_STMT (op);
1047 op = gimple_assign_rhs1 (stmt);
1048 if (!REFERENCE_CLASS_P (op))
1049 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1050 aggpos);
1051 }
1052
1053 aggpos->agg_contents = true;
1054 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1055 stmt, op, index_p, &aggpos->offset,
1056 size_p, &aggpos->by_ref);
1057 }
1058
1059 /* See if statement might disappear after inlining.
1060 0 - means not eliminated
1061 1 - half of statements goes away
1062 2 - for sure it is eliminated.
1063 We are not terribly sophisticated, basically looking for simple abstraction
1064 penalty wrappers. */
1065
1066 static int
1067 eliminated_by_inlining_prob (ipa_func_body_info *fbi, gimple *stmt)
1068 {
1069 enum gimple_code code = gimple_code (stmt);
1070 enum tree_code rhs_code;
1071
1072 if (!optimize)
1073 return 0;
1074
1075 switch (code)
1076 {
1077 case GIMPLE_RETURN:
1078 return 2;
1079 case GIMPLE_ASSIGN:
1080 if (gimple_num_ops (stmt) != 2)
1081 return 0;
1082
1083 rhs_code = gimple_assign_rhs_code (stmt);
1084
1085 /* Casts of parameters, loads from parameters passed by reference
1086 and stores to return value or parameters are often free after
1087 inlining dua to SRA and further combining.
1088 Assume that half of statements goes away. */
1089 if (CONVERT_EXPR_CODE_P (rhs_code)
1090 || rhs_code == VIEW_CONVERT_EXPR
1091 || rhs_code == ADDR_EXPR
1092 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1093 {
1094 tree rhs = gimple_assign_rhs1 (stmt);
1095 tree lhs = gimple_assign_lhs (stmt);
1096 tree inner_rhs = get_base_address (rhs);
1097 tree inner_lhs = get_base_address (lhs);
1098 bool rhs_free = false;
1099 bool lhs_free = false;
1100
1101 if (!inner_rhs)
1102 inner_rhs = rhs;
1103 if (!inner_lhs)
1104 inner_lhs = lhs;
1105
1106 /* Reads of parameter are expected to be free. */
1107 if (unmodified_parm (fbi, stmt, inner_rhs, NULL))
1108 rhs_free = true;
1109 /* Match expressions of form &this->field. Those will most likely
1110 combine with something upstream after inlining. */
1111 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1112 {
1113 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1114 if (TREE_CODE (op) == PARM_DECL)
1115 rhs_free = true;
1116 else if (TREE_CODE (op) == MEM_REF
1117 && unmodified_parm (fbi, stmt, TREE_OPERAND (op, 0),
1118 NULL))
1119 rhs_free = true;
1120 }
1121
1122 /* When parameter is not SSA register because its address is taken
1123 and it is just copied into one, the statement will be completely
1124 free after inlining (we will copy propagate backward). */
1125 if (rhs_free && is_gimple_reg (lhs))
1126 return 2;
1127
1128 /* Reads of parameters passed by reference
1129 expected to be free (i.e. optimized out after inlining). */
1130 if (TREE_CODE (inner_rhs) == MEM_REF
1131 && unmodified_parm (fbi, stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1132 rhs_free = true;
1133
1134 /* Copying parameter passed by reference into gimple register is
1135 probably also going to copy propagate, but we can't be quite
1136 sure. */
1137 if (rhs_free && is_gimple_reg (lhs))
1138 lhs_free = true;
1139
1140 /* Writes to parameters, parameters passed by value and return value
1141 (either dirrectly or passed via invisible reference) are free.
1142
1143 TODO: We ought to handle testcase like
1144 struct a {int a,b;};
1145 struct a
1146 retrurnsturct (void)
1147 {
1148 struct a a ={1,2};
1149 return a;
1150 }
1151
1152 This translate into:
1153
1154 retrurnsturct ()
1155 {
1156 int a$b;
1157 int a$a;
1158 struct a a;
1159 struct a D.2739;
1160
1161 <bb 2>:
1162 D.2739.a = 1;
1163 D.2739.b = 2;
1164 return D.2739;
1165
1166 }
1167 For that we either need to copy ipa-split logic detecting writes
1168 to return value. */
1169 if (TREE_CODE (inner_lhs) == PARM_DECL
1170 || TREE_CODE (inner_lhs) == RESULT_DECL
1171 || (TREE_CODE (inner_lhs) == MEM_REF
1172 && (unmodified_parm (fbi, stmt, TREE_OPERAND (inner_lhs, 0),
1173 NULL)
1174 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1175 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1176 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1177 (inner_lhs,
1178 0))) == RESULT_DECL))))
1179 lhs_free = true;
1180 if (lhs_free
1181 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1182 rhs_free = true;
1183 if (lhs_free && rhs_free)
1184 return 1;
1185 }
1186 return 0;
1187 default:
1188 return 0;
1189 }
1190 }
1191
1192 /* Analyze EXPR if it represents a series of simple operations performed on
1193 a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and
1194 AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item.
1195 Type of the parameter or load from an aggregate via the parameter is
1196 stored in *TYPE_P. Operations on the parameter are recorded to
1197 PARAM_OPS_P if it is not NULL. */
1198
1199 static bool
1200 decompose_param_expr (struct ipa_func_body_info *fbi,
1201 gimple *stmt, tree expr,
1202 int *index_p, tree *type_p,
1203 struct agg_position_info *aggpos,
1204 expr_eval_ops *param_ops_p = NULL)
1205 {
1206 int op_limit = param_ipa_max_param_expr_ops;
1207 int op_count = 0;
1208
1209 if (param_ops_p)
1210 *param_ops_p = NULL;
1211
1212 while (true)
1213 {
1214 expr_eval_op eval_op;
1215 unsigned rhs_count;
1216 unsigned cst_count = 0;
1217
1218 if (unmodified_parm_or_parm_agg_item (fbi, stmt, expr, index_p, NULL,
1219 aggpos))
1220 {
1221 tree type = TREE_TYPE (expr);
1222
1223 if (aggpos->agg_contents)
1224 {
1225 /* Stop if containing bit-field. */
1226 if (TREE_CODE (expr) == BIT_FIELD_REF
1227 || contains_bitfld_component_ref_p (expr))
1228 break;
1229 }
1230
1231 *type_p = type;
1232 return true;
1233 }
1234
1235 if (TREE_CODE (expr) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (expr))
1236 break;
1237
1238 if (!is_gimple_assign (stmt = SSA_NAME_DEF_STMT (expr)))
1239 break;
1240
1241 switch (gimple_assign_rhs_class (stmt))
1242 {
1243 case GIMPLE_SINGLE_RHS:
1244 expr = gimple_assign_rhs1 (stmt);
1245 continue;
1246
1247 case GIMPLE_UNARY_RHS:
1248 rhs_count = 1;
1249 break;
1250
1251 case GIMPLE_BINARY_RHS:
1252 rhs_count = 2;
1253 break;
1254
1255 case GIMPLE_TERNARY_RHS:
1256 rhs_count = 3;
1257 break;
1258
1259 default:
1260 goto fail;
1261 }
1262
1263 /* Stop if expression is too complex. */
1264 if (op_count++ == op_limit)
1265 break;
1266
1267 if (param_ops_p)
1268 {
1269 eval_op.code = gimple_assign_rhs_code (stmt);
1270 eval_op.type = TREE_TYPE (gimple_assign_lhs (stmt));
1271 eval_op.val[0] = NULL_TREE;
1272 eval_op.val[1] = NULL_TREE;
1273 }
1274
1275 expr = NULL_TREE;
1276 for (unsigned i = 0; i < rhs_count; i++)
1277 {
1278 tree op = gimple_op (stmt, i + 1);
1279
1280 gcc_assert (op && !TYPE_P (op));
1281 if (is_gimple_ip_invariant (op))
1282 {
1283 if (++cst_count == rhs_count)
1284 goto fail;
1285
1286 eval_op.val[cst_count - 1] = op;
1287 }
1288 else if (!expr)
1289 {
1290 /* Found a non-constant operand, and record its index in rhs
1291 operands. */
1292 eval_op.index = i;
1293 expr = op;
1294 }
1295 else
1296 {
1297 /* Found more than one non-constant operands. */
1298 goto fail;
1299 }
1300 }
1301
1302 if (param_ops_p)
1303 vec_safe_insert (*param_ops_p, 0, eval_op);
1304 }
1305
1306 /* Failed to decompose, free resource and return. */
1307 fail:
1308 if (param_ops_p)
1309 vec_free (*param_ops_p);
1310
1311 return false;
1312 }
1313
1314 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1315 predicates to the CFG edges. */
1316
1317 static void
1318 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1319 class ipa_fn_summary *summary,
1320 class ipa_node_params *params_summary,
1321 basic_block bb)
1322 {
1323 gimple *last;
1324 tree op, op2;
1325 int index;
1326 struct agg_position_info aggpos;
1327 enum tree_code code, inverted_code;
1328 edge e;
1329 edge_iterator ei;
1330 gimple *set_stmt;
1331 tree param_type;
1332 expr_eval_ops param_ops;
1333
1334 last = last_stmt (bb);
1335 if (!last || gimple_code (last) != GIMPLE_COND)
1336 return;
1337 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1338 return;
1339 op = gimple_cond_lhs (last);
1340
1341 if (decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1342 &param_ops))
1343 {
1344 code = gimple_cond_code (last);
1345 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1346
1347 FOR_EACH_EDGE (e, ei, bb->succs)
1348 {
1349 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1350 ? code : inverted_code);
1351 /* invert_tree_comparison will return ERROR_MARK on FP
1352 comparsions that are not EQ/NE instead of returning proper
1353 unordered one. Be sure it is not confused with NON_CONSTANT.
1354
1355 And if the edge's target is the final block of diamond CFG graph
1356 of this conditional statement, we do not need to compute
1357 predicate for the edge because the final block's predicate must
1358 be at least as that of the first block of the statement. */
1359 if (this_code != ERROR_MARK
1360 && !dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1361 {
1362 predicate p
1363 = add_condition (summary, params_summary, index,
1364 param_type, &aggpos,
1365 this_code, gimple_cond_rhs (last), param_ops);
1366 e->aux = edge_predicate_pool.allocate ();
1367 *(predicate *) e->aux = p;
1368 }
1369 }
1370 vec_free (param_ops);
1371 }
1372
1373 if (TREE_CODE (op) != SSA_NAME)
1374 return;
1375 /* Special case
1376 if (builtin_constant_p (op))
1377 constant_code
1378 else
1379 nonconstant_code.
1380 Here we can predicate nonconstant_code. We can't
1381 really handle constant_code since we have no predicate
1382 for this and also the constant code is not known to be
1383 optimized away when inliner doen't see operand is constant.
1384 Other optimizers might think otherwise. */
1385 if (gimple_cond_code (last) != NE_EXPR
1386 || !integer_zerop (gimple_cond_rhs (last)))
1387 return;
1388 set_stmt = SSA_NAME_DEF_STMT (op);
1389 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1390 || gimple_call_num_args (set_stmt) != 1)
1391 return;
1392 op2 = gimple_call_arg (set_stmt, 0);
1393 if (!decompose_param_expr (fbi, set_stmt, op2, &index, &param_type, &aggpos))
1394 return;
1395 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1396 {
1397 predicate p = add_condition (summary, params_summary, index,
1398 param_type, &aggpos,
1399 predicate::is_not_constant, NULL_TREE);
1400 e->aux = edge_predicate_pool.allocate ();
1401 *(predicate *) e->aux = p;
1402 }
1403 }
1404
1405
1406 /* If BB ends by a switch we can turn into predicates, attach corresponding
1407 predicates to the CFG edges. */
1408
1409 static void
1410 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1411 class ipa_fn_summary *summary,
1412 class ipa_node_params *params_summary,
1413 basic_block bb)
1414 {
1415 gimple *lastg;
1416 tree op;
1417 int index;
1418 struct agg_position_info aggpos;
1419 edge e;
1420 edge_iterator ei;
1421 size_t n;
1422 size_t case_idx;
1423 tree param_type;
1424 expr_eval_ops param_ops;
1425
1426 lastg = last_stmt (bb);
1427 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
1428 return;
1429 gswitch *last = as_a <gswitch *> (lastg);
1430 op = gimple_switch_index (last);
1431 if (!decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1432 &param_ops))
1433 return;
1434
1435 auto_vec<std::pair<tree, tree> > ranges;
1436 tree type = TREE_TYPE (op);
1437 int bound_limit = param_ipa_max_switch_predicate_bounds;
1438 int bound_count = 0;
1439 wide_int vr_wmin, vr_wmax;
1440 value_range_kind vr_type = get_range_info (op, &vr_wmin, &vr_wmax);
1441
1442 FOR_EACH_EDGE (e, ei, bb->succs)
1443 {
1444 e->aux = edge_predicate_pool.allocate ();
1445 *(predicate *) e->aux = false;
1446 }
1447
1448 e = gimple_switch_edge (cfun, last, 0);
1449 /* Set BOUND_COUNT to maximum count to bypass computing predicate for
1450 default case if its target basic block is in convergence point of all
1451 switch cases, which can be determined by checking whether it
1452 post-dominates the switch statement. */
1453 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1454 bound_count = INT_MAX;
1455
1456 n = gimple_switch_num_labels (last);
1457 for (case_idx = 1; case_idx < n; ++case_idx)
1458 {
1459 tree cl = gimple_switch_label (last, case_idx);
1460 tree min = CASE_LOW (cl);
1461 tree max = CASE_HIGH (cl);
1462 predicate p;
1463
1464 e = gimple_switch_edge (cfun, last, case_idx);
1465
1466 /* The case value might not have same type as switch expression,
1467 extend the value based on the expression type. */
1468 if (TREE_TYPE (min) != type)
1469 min = wide_int_to_tree (type, wi::to_wide (min));
1470
1471 if (!max)
1472 max = min;
1473 else if (TREE_TYPE (max) != type)
1474 max = wide_int_to_tree (type, wi::to_wide (max));
1475
1476 /* The case's target basic block is in convergence point of all switch
1477 cases, its predicate should be at least as that of the switch
1478 statement. */
1479 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1480 p = true;
1481 else if (min == max)
1482 p = add_condition (summary, params_summary, index, param_type,
1483 &aggpos, EQ_EXPR, min, param_ops);
1484 else
1485 {
1486 predicate p1, p2;
1487 p1 = add_condition (summary, params_summary, index, param_type,
1488 &aggpos, GE_EXPR, min, param_ops);
1489 p2 = add_condition (summary, params_summary,index, param_type,
1490 &aggpos, LE_EXPR, max, param_ops);
1491 p = p1 & p2;
1492 }
1493 *(class predicate *) e->aux
1494 = p.or_with (summary->conds, *(class predicate *) e->aux);
1495
1496 /* If there are too many disjoint case ranges, predicate for default
1497 case might become too complicated. So add a limit here. */
1498 if (bound_count > bound_limit)
1499 continue;
1500
1501 bool new_range = true;
1502
1503 if (!ranges.is_empty ())
1504 {
1505 wide_int curr_wmin = wi::to_wide (min);
1506 wide_int last_wmax = wi::to_wide (ranges.last ().second);
1507
1508 /* Merge case ranges if they are continuous. */
1509 if (curr_wmin == last_wmax + 1)
1510 new_range = false;
1511 else if (vr_type == VR_ANTI_RANGE)
1512 {
1513 /* If two disjoint case ranges can be connected by anti-range
1514 of switch index, combine them to one range. */
1515 if (wi::lt_p (vr_wmax, curr_wmin - 1, TYPE_SIGN (type)))
1516 vr_type = VR_UNDEFINED;
1517 else if (wi::le_p (vr_wmin, last_wmax + 1, TYPE_SIGN (type)))
1518 new_range = false;
1519 }
1520 }
1521
1522 /* Create/extend a case range. And we count endpoints of range set,
1523 this number nearly equals to number of conditions that we will create
1524 for predicate of default case. */
1525 if (new_range)
1526 {
1527 bound_count += (min == max) ? 1 : 2;
1528 ranges.safe_push (std::make_pair (min, max));
1529 }
1530 else
1531 {
1532 bound_count += (ranges.last ().first == ranges.last ().second);
1533 ranges.last ().second = max;
1534 }
1535 }
1536
1537 e = gimple_switch_edge (cfun, last, 0);
1538 if (bound_count > bound_limit)
1539 {
1540 *(class predicate *) e->aux = true;
1541 vec_free (param_ops);
1542 return;
1543 }
1544
1545 predicate p_seg = true;
1546 predicate p_all = false;
1547
1548 if (vr_type != VR_RANGE)
1549 {
1550 vr_wmin = wi::to_wide (TYPE_MIN_VALUE (type));
1551 vr_wmax = wi::to_wide (TYPE_MAX_VALUE (type));
1552 }
1553
1554 /* Construct predicate to represent default range set that is negation of
1555 all case ranges. Case range is classified as containing single/non-single
1556 values. Suppose a piece of case ranges in the following.
1557
1558 [D1...D2] [S1] ... [Sn] [D3...D4]
1559
1560 To represent default case's range sets between two non-single value
1561 case ranges (From D2 to D3), we construct predicate as:
1562
1563 D2 < x < D3 && x != S1 && ... && x != Sn
1564 */
1565 for (size_t i = 0; i < ranges.length (); i++)
1566 {
1567 tree min = ranges[i].first;
1568 tree max = ranges[i].second;
1569
1570 if (min == max)
1571 p_seg &= add_condition (summary, params_summary, index,
1572 param_type, &aggpos, NE_EXPR,
1573 min, param_ops);
1574 else
1575 {
1576 /* Do not create sub-predicate for range that is beyond low bound
1577 of switch index. */
1578 if (wi::lt_p (vr_wmin, wi::to_wide (min), TYPE_SIGN (type)))
1579 {
1580 p_seg &= add_condition (summary, params_summary, index,
1581 param_type, &aggpos,
1582 LT_EXPR, min, param_ops);
1583 p_all = p_all.or_with (summary->conds, p_seg);
1584 }
1585
1586 /* Do not create sub-predicate for range that is beyond up bound
1587 of switch index. */
1588 if (wi::le_p (vr_wmax, wi::to_wide (max), TYPE_SIGN (type)))
1589 {
1590 p_seg = false;
1591 break;
1592 }
1593
1594 p_seg = add_condition (summary, params_summary, index,
1595 param_type, &aggpos, GT_EXPR,
1596 max, param_ops);
1597 }
1598 }
1599
1600 p_all = p_all.or_with (summary->conds, p_seg);
1601 *(class predicate *) e->aux
1602 = p_all.or_with (summary->conds, *(class predicate *) e->aux);
1603
1604 vec_free (param_ops);
1605 }
1606
1607
1608 /* For each BB in NODE attach to its AUX pointer predicate under
1609 which it is executable. */
1610
1611 static void
1612 compute_bb_predicates (struct ipa_func_body_info *fbi,
1613 struct cgraph_node *node,
1614 class ipa_fn_summary *summary,
1615 class ipa_node_params *params_summary)
1616 {
1617 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1618 bool done = false;
1619 basic_block bb;
1620
1621 FOR_EACH_BB_FN (bb, my_function)
1622 {
1623 set_cond_stmt_execution_predicate (fbi, summary, params_summary, bb);
1624 set_switch_stmt_execution_predicate (fbi, summary, params_summary, bb);
1625 }
1626
1627 /* Entry block is always executable. */
1628 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1629 = edge_predicate_pool.allocate ();
1630 *(predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
1631
1632 /* A simple dataflow propagation of predicates forward in the CFG.
1633 TODO: work in reverse postorder. */
1634 while (!done)
1635 {
1636 done = true;
1637 FOR_EACH_BB_FN (bb, my_function)
1638 {
1639 predicate p = false;
1640 edge e;
1641 edge_iterator ei;
1642 FOR_EACH_EDGE (e, ei, bb->preds)
1643 {
1644 if (e->src->aux)
1645 {
1646 predicate this_bb_predicate
1647 = *(predicate *) e->src->aux;
1648 if (e->aux)
1649 this_bb_predicate &= (*(class predicate *) e->aux);
1650 p = p.or_with (summary->conds, this_bb_predicate);
1651 if (p == true)
1652 break;
1653 }
1654 }
1655 if (p != false)
1656 {
1657 basic_block pdom_bb;
1658
1659 if (!bb->aux)
1660 {
1661 done = false;
1662 bb->aux = edge_predicate_pool.allocate ();
1663 *((predicate *) bb->aux) = p;
1664 }
1665 else if (p != *(predicate *) bb->aux)
1666 {
1667 /* This OR operation is needed to ensure monotonous data flow
1668 in the case we hit the limit on number of clauses and the
1669 and/or operations above give approximate answers. */
1670 p = p.or_with (summary->conds, *(predicate *)bb->aux);
1671 if (p != *(predicate *) bb->aux)
1672 {
1673 done = false;
1674 *((predicate *) bb->aux) = p;
1675 }
1676 }
1677
1678 /* For switch/if statement, we can OR-combine predicates of all
1679 its cases/branches to get predicate for basic block in their
1680 convergence point, but sometimes this will generate very
1681 complicated predicate. Actually, we can get simplified
1682 predicate in another way by using the fact that predicate
1683 for a basic block must also hold true for its post dominators.
1684 To be specific, basic block in convergence point of
1685 conditional statement should include predicate of the
1686 statement. */
1687 pdom_bb = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
1688 if (pdom_bb == EXIT_BLOCK_PTR_FOR_FN (my_function) || !pdom_bb)
1689 ;
1690 else if (!pdom_bb->aux)
1691 {
1692 done = false;
1693 pdom_bb->aux = edge_predicate_pool.allocate ();
1694 *((predicate *) pdom_bb->aux) = p;
1695 }
1696 else if (p != *(predicate *) pdom_bb->aux)
1697 {
1698 p = p.or_with (summary->conds, *(predicate *)pdom_bb->aux);
1699 if (p != *(predicate *) pdom_bb->aux)
1700 {
1701 done = false;
1702 *((predicate *) pdom_bb->aux) = p;
1703 }
1704 }
1705 }
1706 }
1707 }
1708 }
1709
1710
1711 /* Return predicate specifying when the STMT might have result that is not
1712 a compile time constant. */
1713
1714 static predicate
1715 will_be_nonconstant_expr_predicate (ipa_func_body_info *fbi,
1716 class ipa_fn_summary *summary,
1717 class ipa_node_params *params_summary,
1718 tree expr,
1719 vec<predicate> nonconstant_names)
1720 {
1721 tree parm;
1722 int index;
1723
1724 while (UNARY_CLASS_P (expr))
1725 expr = TREE_OPERAND (expr, 0);
1726
1727 parm = unmodified_parm (fbi, NULL, expr, NULL);
1728 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1729 return add_condition (summary, params_summary, index, TREE_TYPE (parm), NULL,
1730 predicate::changed, NULL_TREE);
1731 if (is_gimple_min_invariant (expr))
1732 return false;
1733 if (TREE_CODE (expr) == SSA_NAME)
1734 return nonconstant_names[SSA_NAME_VERSION (expr)];
1735 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
1736 {
1737 predicate p1
1738 = will_be_nonconstant_expr_predicate (fbi, summary,
1739 params_summary,
1740 TREE_OPERAND (expr, 0),
1741 nonconstant_names);
1742 if (p1 == true)
1743 return p1;
1744
1745 predicate p2
1746 = will_be_nonconstant_expr_predicate (fbi, summary,
1747 params_summary,
1748 TREE_OPERAND (expr, 1),
1749 nonconstant_names);
1750 return p1.or_with (summary->conds, p2);
1751 }
1752 else if (TREE_CODE (expr) == COND_EXPR)
1753 {
1754 predicate p1
1755 = will_be_nonconstant_expr_predicate (fbi, summary,
1756 params_summary,
1757 TREE_OPERAND (expr, 0),
1758 nonconstant_names);
1759 if (p1 == true)
1760 return p1;
1761
1762 predicate p2
1763 = will_be_nonconstant_expr_predicate (fbi, summary,
1764 params_summary,
1765 TREE_OPERAND (expr, 1),
1766 nonconstant_names);
1767 if (p2 == true)
1768 return p2;
1769 p1 = p1.or_with (summary->conds, p2);
1770 p2 = will_be_nonconstant_expr_predicate (fbi, summary,
1771 params_summary,
1772 TREE_OPERAND (expr, 2),
1773 nonconstant_names);
1774 return p2.or_with (summary->conds, p1);
1775 }
1776 else if (TREE_CODE (expr) == CALL_EXPR)
1777 return true;
1778 else
1779 {
1780 debug_tree (expr);
1781 gcc_unreachable ();
1782 }
1783 return false;
1784 }
1785
1786
1787 /* Return predicate specifying when the STMT might have result that is not
1788 a compile time constant. */
1789
1790 static predicate
1791 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
1792 class ipa_fn_summary *summary,
1793 class ipa_node_params *params_summary,
1794 gimple *stmt,
1795 vec<predicate> nonconstant_names)
1796 {
1797 predicate p = true;
1798 ssa_op_iter iter;
1799 tree use;
1800 tree param_type = NULL_TREE;
1801 predicate op_non_const;
1802 bool is_load;
1803 int base_index;
1804 struct agg_position_info aggpos;
1805
1806 /* What statments might be optimized away
1807 when their arguments are constant. */
1808 if (gimple_code (stmt) != GIMPLE_ASSIGN
1809 && gimple_code (stmt) != GIMPLE_COND
1810 && gimple_code (stmt) != GIMPLE_SWITCH
1811 && (gimple_code (stmt) != GIMPLE_CALL
1812 || !(gimple_call_flags (stmt) & ECF_CONST)))
1813 return p;
1814
1815 /* Stores will stay anyway. */
1816 if (gimple_store_p (stmt))
1817 return p;
1818
1819 is_load = gimple_assign_load_p (stmt);
1820
1821 /* Loads can be optimized when the value is known. */
1822 if (is_load)
1823 {
1824 tree op = gimple_assign_rhs1 (stmt);
1825 if (!decompose_param_expr (fbi, stmt, op, &base_index, &param_type,
1826 &aggpos))
1827 return p;
1828 }
1829 else
1830 base_index = -1;
1831
1832 /* See if we understand all operands before we start
1833 adding conditionals. */
1834 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1835 {
1836 tree parm = unmodified_parm (fbi, stmt, use, NULL);
1837 /* For arguments we can build a condition. */
1838 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
1839 continue;
1840 if (TREE_CODE (use) != SSA_NAME)
1841 return p;
1842 /* If we know when operand is constant,
1843 we still can say something useful. */
1844 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
1845 continue;
1846 return p;
1847 }
1848
1849 if (is_load)
1850 op_non_const =
1851 add_condition (summary, params_summary,
1852 base_index, param_type, &aggpos,
1853 predicate::changed, NULL_TREE);
1854 else
1855 op_non_const = false;
1856 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1857 {
1858 tree parm = unmodified_parm (fbi, stmt, use, NULL);
1859 int index;
1860
1861 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1862 {
1863 if (index != base_index)
1864 p = add_condition (summary, params_summary, index,
1865 TREE_TYPE (parm), NULL,
1866 predicate::changed, NULL_TREE);
1867 else
1868 continue;
1869 }
1870 else
1871 p = nonconstant_names[SSA_NAME_VERSION (use)];
1872 op_non_const = p.or_with (summary->conds, op_non_const);
1873 }
1874 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
1875 && gimple_op (stmt, 0)
1876 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1877 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
1878 = op_non_const;
1879 return op_non_const;
1880 }
1881
1882 struct record_modified_bb_info
1883 {
1884 tree op;
1885 bitmap bb_set;
1886 gimple *stmt;
1887 };
1888
1889 /* Value is initialized in INIT_BB and used in USE_BB. We want to copute
1890 probability how often it changes between USE_BB.
1891 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
1892 is in different loop nest, we can do better.
1893 This is all just estimate. In theory we look for minimal cut separating
1894 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
1895 anyway. */
1896
1897 static basic_block
1898 get_minimal_bb (basic_block init_bb, basic_block use_bb)
1899 {
1900 class loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
1901 if (l && l->header->count < init_bb->count)
1902 return l->header;
1903 return init_bb;
1904 }
1905
1906 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
1907 set except for info->stmt. */
1908
1909 static bool
1910 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
1911 {
1912 struct record_modified_bb_info *info =
1913 (struct record_modified_bb_info *) data;
1914 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
1915 return false;
1916 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef)))
1917 return false;
1918 bitmap_set_bit (info->bb_set,
1919 SSA_NAME_IS_DEFAULT_DEF (vdef)
1920 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
1921 : get_minimal_bb
1922 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
1923 gimple_bb (info->stmt))->index);
1924 if (dump_file)
1925 {
1926 fprintf (dump_file, " Param ");
1927 print_generic_expr (dump_file, info->op, TDF_SLIM);
1928 fprintf (dump_file, " changed at bb %i, minimal: %i stmt: ",
1929 gimple_bb (SSA_NAME_DEF_STMT (vdef))->index,
1930 get_minimal_bb
1931 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
1932 gimple_bb (info->stmt))->index);
1933 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (vdef), 0);
1934 }
1935 return false;
1936 }
1937
1938 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
1939 will change since last invocation of STMT.
1940
1941 Value 0 is reserved for compile time invariants.
1942 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
1943 ought to be REG_BR_PROB_BASE / estimated_iters. */
1944
1945 static int
1946 param_change_prob (ipa_func_body_info *fbi, gimple *stmt, int i)
1947 {
1948 tree op = gimple_call_arg (stmt, i);
1949 basic_block bb = gimple_bb (stmt);
1950
1951 if (TREE_CODE (op) == WITH_SIZE_EXPR)
1952 op = TREE_OPERAND (op, 0);
1953
1954 tree base = get_base_address (op);
1955
1956 /* Global invariants never change. */
1957 if (is_gimple_min_invariant (base))
1958 return 0;
1959
1960 /* We would have to do non-trivial analysis to really work out what
1961 is the probability of value to change (i.e. when init statement
1962 is in a sibling loop of the call).
1963
1964 We do an conservative estimate: when call is executed N times more often
1965 than the statement defining value, we take the frequency 1/N. */
1966 if (TREE_CODE (base) == SSA_NAME)
1967 {
1968 profile_count init_count;
1969
1970 if (!bb->count.nonzero_p ())
1971 return REG_BR_PROB_BASE;
1972
1973 if (SSA_NAME_IS_DEFAULT_DEF (base))
1974 init_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1975 else
1976 init_count = get_minimal_bb
1977 (gimple_bb (SSA_NAME_DEF_STMT (base)),
1978 gimple_bb (stmt))->count;
1979
1980 if (init_count < bb->count)
1981 return MAX ((init_count.to_sreal_scale (bb->count)
1982 * REG_BR_PROB_BASE).to_int (), 1);
1983 return REG_BR_PROB_BASE;
1984 }
1985 else
1986 {
1987 ao_ref refd;
1988 profile_count max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1989 struct record_modified_bb_info info;
1990 tree init = ctor_for_folding (base);
1991
1992 if (init != error_mark_node)
1993 return 0;
1994 if (!bb->count.nonzero_p ())
1995 return REG_BR_PROB_BASE;
1996 if (dump_file)
1997 {
1998 fprintf (dump_file, " Analyzing param change probability of ");
1999 print_generic_expr (dump_file, op, TDF_SLIM);
2000 fprintf (dump_file, "\n");
2001 }
2002 ao_ref_init (&refd, op);
2003 info.op = op;
2004 info.stmt = stmt;
2005 info.bb_set = BITMAP_ALLOC (NULL);
2006 int walked
2007 = walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
2008 NULL, NULL, fbi->aa_walk_budget);
2009 if (walked < 0 || bitmap_bit_p (info.bb_set, bb->index))
2010 {
2011 if (dump_file)
2012 {
2013 if (walked < 0)
2014 fprintf (dump_file, " Ran out of AA walking budget.\n");
2015 else
2016 fprintf (dump_file, " Set in same BB as used.\n");
2017 }
2018 BITMAP_FREE (info.bb_set);
2019 return REG_BR_PROB_BASE;
2020 }
2021
2022 bitmap_iterator bi;
2023 unsigned index;
2024 /* Lookup the most frequent update of the value and believe that
2025 it dominates all the other; precise analysis here is difficult. */
2026 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
2027 max = max.max (BASIC_BLOCK_FOR_FN (cfun, index)->count);
2028 if (dump_file)
2029 {
2030 fprintf (dump_file, " Set with count ");
2031 max.dump (dump_file);
2032 fprintf (dump_file, " and used with count ");
2033 bb->count.dump (dump_file);
2034 fprintf (dump_file, " freq %f\n",
2035 max.to_sreal_scale (bb->count).to_double ());
2036 }
2037
2038 BITMAP_FREE (info.bb_set);
2039 if (max < bb->count)
2040 return MAX ((max.to_sreal_scale (bb->count)
2041 * REG_BR_PROB_BASE).to_int (), 1);
2042 return REG_BR_PROB_BASE;
2043 }
2044 }
2045
2046 /* Find whether a basic block BB is the final block of a (half) diamond CFG
2047 sub-graph and if the predicate the condition depends on is known. If so,
2048 return true and store the pointer the predicate in *P. */
2049
2050 static bool
2051 phi_result_unknown_predicate (ipa_func_body_info *fbi,
2052 ipa_fn_summary *summary,
2053 class ipa_node_params *params_summary,
2054 basic_block bb,
2055 predicate *p,
2056 vec<predicate> nonconstant_names)
2057 {
2058 edge e;
2059 edge_iterator ei;
2060 basic_block first_bb = NULL;
2061 gimple *stmt;
2062
2063 if (single_pred_p (bb))
2064 {
2065 *p = false;
2066 return true;
2067 }
2068
2069 FOR_EACH_EDGE (e, ei, bb->preds)
2070 {
2071 if (single_succ_p (e->src))
2072 {
2073 if (!single_pred_p (e->src))
2074 return false;
2075 if (!first_bb)
2076 first_bb = single_pred (e->src);
2077 else if (single_pred (e->src) != first_bb)
2078 return false;
2079 }
2080 else
2081 {
2082 if (!first_bb)
2083 first_bb = e->src;
2084 else if (e->src != first_bb)
2085 return false;
2086 }
2087 }
2088
2089 if (!first_bb)
2090 return false;
2091
2092 stmt = last_stmt (first_bb);
2093 if (!stmt
2094 || gimple_code (stmt) != GIMPLE_COND
2095 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
2096 return false;
2097
2098 *p = will_be_nonconstant_expr_predicate (fbi, summary, params_summary,
2099 gimple_cond_lhs (stmt),
2100 nonconstant_names);
2101 if (*p == true)
2102 return false;
2103 else
2104 return true;
2105 }
2106
2107 /* Given a PHI statement in a function described by inline properties SUMMARY
2108 and *P being the predicate describing whether the selected PHI argument is
2109 known, store a predicate for the result of the PHI statement into
2110 NONCONSTANT_NAMES, if possible. */
2111
2112 static void
2113 predicate_for_phi_result (class ipa_fn_summary *summary, gphi *phi,
2114 predicate *p,
2115 vec<predicate> nonconstant_names)
2116 {
2117 unsigned i;
2118
2119 for (i = 0; i < gimple_phi_num_args (phi); i++)
2120 {
2121 tree arg = gimple_phi_arg (phi, i)->def;
2122 if (!is_gimple_min_invariant (arg))
2123 {
2124 gcc_assert (TREE_CODE (arg) == SSA_NAME);
2125 *p = p->or_with (summary->conds,
2126 nonconstant_names[SSA_NAME_VERSION (arg)]);
2127 if (*p == true)
2128 return;
2129 }
2130 }
2131
2132 if (dump_file && (dump_flags & TDF_DETAILS))
2133 {
2134 fprintf (dump_file, "\t\tphi predicate: ");
2135 p->dump (dump_file, summary->conds);
2136 }
2137 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
2138 }
2139
2140 /* For a typical usage of __builtin_expect (a<b, 1), we
2141 may introduce an extra relation stmt:
2142 With the builtin, we have
2143 t1 = a <= b;
2144 t2 = (long int) t1;
2145 t3 = __builtin_expect (t2, 1);
2146 if (t3 != 0)
2147 goto ...
2148 Without the builtin, we have
2149 if (a<=b)
2150 goto...
2151 This affects the size/time estimation and may have
2152 an impact on the earlier inlining.
2153 Here find this pattern and fix it up later. */
2154
2155 static gimple *
2156 find_foldable_builtin_expect (basic_block bb)
2157 {
2158 gimple_stmt_iterator bsi;
2159
2160 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2161 {
2162 gimple *stmt = gsi_stmt (bsi);
2163 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
2164 || gimple_call_builtin_p (stmt, BUILT_IN_EXPECT_WITH_PROBABILITY)
2165 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
2166 {
2167 tree var = gimple_call_lhs (stmt);
2168 tree arg = gimple_call_arg (stmt, 0);
2169 use_operand_p use_p;
2170 gimple *use_stmt;
2171 bool match = false;
2172 bool done = false;
2173
2174 if (!var || !arg)
2175 continue;
2176 gcc_assert (TREE_CODE (var) == SSA_NAME);
2177
2178 while (TREE_CODE (arg) == SSA_NAME)
2179 {
2180 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
2181 if (!is_gimple_assign (stmt_tmp))
2182 break;
2183 switch (gimple_assign_rhs_code (stmt_tmp))
2184 {
2185 case LT_EXPR:
2186 case LE_EXPR:
2187 case GT_EXPR:
2188 case GE_EXPR:
2189 case EQ_EXPR:
2190 case NE_EXPR:
2191 match = true;
2192 done = true;
2193 break;
2194 CASE_CONVERT:
2195 break;
2196 default:
2197 done = true;
2198 break;
2199 }
2200 if (done)
2201 break;
2202 arg = gimple_assign_rhs1 (stmt_tmp);
2203 }
2204
2205 if (match && single_imm_use (var, &use_p, &use_stmt)
2206 && gimple_code (use_stmt) == GIMPLE_COND)
2207 return use_stmt;
2208 }
2209 }
2210 return NULL;
2211 }
2212
2213 /* Return true when the basic blocks contains only clobbers followed by RESX.
2214 Such BBs are kept around to make removal of dead stores possible with
2215 presence of EH and will be optimized out by optimize_clobbers later in the
2216 game.
2217
2218 NEED_EH is used to recurse in case the clobber has non-EH predecestors
2219 that can be clobber only, too.. When it is false, the RESX is not necessary
2220 on the end of basic block. */
2221
2222 static bool
2223 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
2224 {
2225 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2226 edge_iterator ei;
2227 edge e;
2228
2229 if (need_eh)
2230 {
2231 if (gsi_end_p (gsi))
2232 return false;
2233 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
2234 return false;
2235 gsi_prev (&gsi);
2236 }
2237 else if (!single_succ_p (bb))
2238 return false;
2239
2240 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2241 {
2242 gimple *stmt = gsi_stmt (gsi);
2243 if (is_gimple_debug (stmt))
2244 continue;
2245 if (gimple_clobber_p (stmt))
2246 continue;
2247 if (gimple_code (stmt) == GIMPLE_LABEL)
2248 break;
2249 return false;
2250 }
2251
2252 /* See if all predecestors are either throws or clobber only BBs. */
2253 FOR_EACH_EDGE (e, ei, bb->preds)
2254 if (!(e->flags & EDGE_EH)
2255 && !clobber_only_eh_bb_p (e->src, false))
2256 return false;
2257
2258 return true;
2259 }
2260
2261 /* Return true if STMT compute a floating point expression that may be affected
2262 by -ffast-math and similar flags. */
2263
2264 static bool
2265 fp_expression_p (gimple *stmt)
2266 {
2267 ssa_op_iter i;
2268 tree op;
2269
2270 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
2271 if (FLOAT_TYPE_P (TREE_TYPE (op)))
2272 return true;
2273 return false;
2274 }
2275
2276 /* Analyze function body for NODE.
2277 EARLY indicates run from early optimization pipeline. */
2278
2279 static void
2280 analyze_function_body (struct cgraph_node *node, bool early)
2281 {
2282 sreal time = param_uninlined_function_time;
2283 /* Estimate static overhead for function prologue/epilogue and alignment. */
2284 int size = param_uninlined_function_insns;
2285 /* Benefits are scaled by probability of elimination that is in range
2286 <0,2>. */
2287 basic_block bb;
2288 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
2289 sreal freq;
2290 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
2291 class ipa_node_params *params_summary = early ? NULL : IPA_NODE_REF (node);
2292 predicate bb_predicate;
2293 struct ipa_func_body_info fbi;
2294 vec<predicate> nonconstant_names = vNULL;
2295 int nblocks, n;
2296 int *order;
2297 gimple *fix_builtin_expect_stmt;
2298
2299 gcc_assert (my_function && my_function->cfg);
2300 gcc_assert (cfun == my_function);
2301
2302 memset(&fbi, 0, sizeof(fbi));
2303 vec_free (info->conds);
2304 info->conds = NULL;
2305 vec_free (info->size_time_table);
2306 info->size_time_table = NULL;
2307
2308 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2309 so we can produce proper inline hints.
2310
2311 When optimizing and analyzing for early inliner, initialize node params
2312 so we can produce correct BB predicates. */
2313
2314 if (opt_for_fn (node->decl, optimize))
2315 {
2316 calculate_dominance_info (CDI_DOMINATORS);
2317 calculate_dominance_info (CDI_POST_DOMINATORS);
2318 if (!early)
2319 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2320 else
2321 {
2322 ipa_check_create_node_params ();
2323 ipa_initialize_node_params (node);
2324 }
2325
2326 if (ipa_node_params_sum)
2327 {
2328 fbi.node = node;
2329 fbi.info = IPA_NODE_REF (node);
2330 fbi.bb_infos = vNULL;
2331 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
2332 fbi.param_count = count_formal_params (node->decl);
2333 fbi.aa_walk_budget = param_ipa_max_aa_steps;
2334
2335 nonconstant_names.safe_grow_cleared
2336 (SSANAMES (my_function)->length ());
2337 }
2338 }
2339
2340 if (dump_file)
2341 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2342 node->name ());
2343
2344 /* When we run into maximal number of entries, we assign everything to the
2345 constant truth case. Be sure to have it in list. */
2346 bb_predicate = true;
2347 info->account_size_time (0, 0, bb_predicate, bb_predicate);
2348
2349 bb_predicate = predicate::not_inlined ();
2350 info->account_size_time (param_uninlined_function_insns
2351 * ipa_fn_summary::size_scale,
2352 param_uninlined_function_time,
2353 bb_predicate,
2354 bb_predicate);
2355
2356 if (fbi.info)
2357 compute_bb_predicates (&fbi, node, info, params_summary);
2358 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2359 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2360 for (n = 0; n < nblocks; n++)
2361 {
2362 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2363 freq = bb->count.to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count);
2364 if (clobber_only_eh_bb_p (bb))
2365 {
2366 if (dump_file && (dump_flags & TDF_DETAILS))
2367 fprintf (dump_file, "\n Ignoring BB %i;"
2368 " it will be optimized away by cleanup_clobbers\n",
2369 bb->index);
2370 continue;
2371 }
2372
2373 /* TODO: Obviously predicates can be propagated down across CFG. */
2374 if (fbi.info)
2375 {
2376 if (bb->aux)
2377 bb_predicate = *(predicate *) bb->aux;
2378 else
2379 bb_predicate = false;
2380 }
2381 else
2382 bb_predicate = true;
2383
2384 if (dump_file && (dump_flags & TDF_DETAILS))
2385 {
2386 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2387 bb_predicate.dump (dump_file, info->conds);
2388 }
2389
2390 if (fbi.info && nonconstant_names.exists ())
2391 {
2392 predicate phi_predicate;
2393 bool first_phi = true;
2394
2395 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2396 gsi_next (&bsi))
2397 {
2398 if (first_phi
2399 && !phi_result_unknown_predicate (&fbi, info,
2400 params_summary,
2401 bb,
2402 &phi_predicate,
2403 nonconstant_names))
2404 break;
2405 first_phi = false;
2406 if (dump_file && (dump_flags & TDF_DETAILS))
2407 {
2408 fprintf (dump_file, " ");
2409 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
2410 }
2411 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2412 nonconstant_names);
2413 }
2414 }
2415
2416 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2417
2418 for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb);
2419 !gsi_end_p (bsi); gsi_next_nondebug (&bsi))
2420 {
2421 gimple *stmt = gsi_stmt (bsi);
2422 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2423 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2424 int prob;
2425 predicate will_be_nonconstant;
2426
2427 /* This relation stmt should be folded after we remove
2428 buildin_expect call. Adjust the cost here. */
2429 if (stmt == fix_builtin_expect_stmt)
2430 {
2431 this_size--;
2432 this_time--;
2433 }
2434
2435 if (dump_file && (dump_flags & TDF_DETAILS))
2436 {
2437 fprintf (dump_file, " ");
2438 print_gimple_stmt (dump_file, stmt, 0);
2439 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2440 freq.to_double (), this_size,
2441 this_time);
2442 }
2443
2444 if (is_gimple_call (stmt)
2445 && !gimple_call_internal_p (stmt))
2446 {
2447 struct cgraph_edge *edge = node->get_edge (stmt);
2448 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2449
2450 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2451 resolved as constant. We however don't want to optimize
2452 out the cgraph edges. */
2453 if (nonconstant_names.exists ()
2454 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2455 && gimple_call_lhs (stmt)
2456 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2457 {
2458 predicate false_p = false;
2459 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2460 = false_p;
2461 }
2462 if (ipa_node_params_sum)
2463 {
2464 int count = gimple_call_num_args (stmt);
2465 int i;
2466
2467 if (count)
2468 es->param.safe_grow_cleared (count);
2469 for (i = 0; i < count; i++)
2470 {
2471 int prob = param_change_prob (&fbi, stmt, i);
2472 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2473 es->param[i].change_prob = prob;
2474 }
2475 }
2476
2477 es->call_stmt_size = this_size;
2478 es->call_stmt_time = this_time;
2479 es->loop_depth = bb_loop_depth (bb);
2480 edge_set_predicate (edge, &bb_predicate);
2481 if (edge->speculative)
2482 {
2483 cgraph_edge *direct, *indirect;
2484 ipa_ref *ref;
2485 edge->speculative_call_info (direct, indirect, ref);
2486 gcc_assert (direct == edge);
2487 ipa_call_summary *es2
2488 = ipa_call_summaries->get_create (indirect);
2489 ipa_call_summaries->duplicate (edge, indirect,
2490 es, es2);
2491 }
2492 }
2493
2494 /* TODO: When conditional jump or swithc is known to be constant, but
2495 we did not translate it into the predicates, we really can account
2496 just maximum of the possible paths. */
2497 if (fbi.info)
2498 will_be_nonconstant
2499 = will_be_nonconstant_predicate (&fbi, info, params_summary,
2500 stmt, nonconstant_names);
2501 else
2502 will_be_nonconstant = true;
2503 if (this_time || this_size)
2504 {
2505 sreal final_time = (sreal)this_time * freq;
2506
2507 prob = eliminated_by_inlining_prob (&fbi, stmt);
2508 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2509 fprintf (dump_file,
2510 "\t\t50%% will be eliminated by inlining\n");
2511 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2512 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2513
2514 class predicate p = bb_predicate & will_be_nonconstant;
2515
2516 /* We can ignore statement when we proved it is never going
2517 to happen, but we cannot do that for call statements
2518 because edges are accounted specially. */
2519
2520 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
2521 {
2522 time += final_time;
2523 size += this_size;
2524 }
2525
2526 /* We account everything but the calls. Calls have their own
2527 size/time info attached to cgraph edges. This is necessary
2528 in order to make the cost disappear after inlining. */
2529 if (!is_gimple_call (stmt))
2530 {
2531 if (prob)
2532 {
2533 predicate ip = bb_predicate & predicate::not_inlined ();
2534 info->account_size_time (this_size * prob,
2535 (final_time * prob) / 2, ip,
2536 p);
2537 }
2538 if (prob != 2)
2539 info->account_size_time (this_size * (2 - prob),
2540 (final_time * (2 - prob) / 2),
2541 bb_predicate,
2542 p);
2543 }
2544
2545 if (!info->fp_expressions && fp_expression_p (stmt))
2546 {
2547 info->fp_expressions = true;
2548 if (dump_file)
2549 fprintf (dump_file, " fp_expression set\n");
2550 }
2551 }
2552
2553 /* Account cost of address calculations in the statements. */
2554 for (unsigned int i = 0; i < gimple_num_ops (stmt); i++)
2555 {
2556 for (tree op = gimple_op (stmt, i);
2557 op && handled_component_p (op);
2558 op = TREE_OPERAND (op, 0))
2559 if ((TREE_CODE (op) == ARRAY_REF
2560 || TREE_CODE (op) == ARRAY_RANGE_REF)
2561 && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
2562 {
2563 predicate p = bb_predicate;
2564 if (fbi.info)
2565 p = p & will_be_nonconstant_expr_predicate
2566 (&fbi, info, params_summary,
2567 TREE_OPERAND (op, 1),
2568 nonconstant_names);
2569 if (p != false)
2570 {
2571 time += freq;
2572 size += 1;
2573 if (dump_file)
2574 fprintf (dump_file,
2575 "\t\tAccounting address calculation.\n");
2576 info->account_size_time (ipa_fn_summary::size_scale,
2577 freq,
2578 bb_predicate,
2579 p);
2580 }
2581 }
2582 }
2583
2584 }
2585 }
2586 free (order);
2587
2588 if (nonconstant_names.exists () && !early)
2589 {
2590 class loop *loop;
2591 predicate loop_iterations = true;
2592 predicate loop_stride = true;
2593
2594 if (dump_file && (dump_flags & TDF_DETAILS))
2595 flow_loops_dump (dump_file, NULL, 0);
2596 scev_initialize ();
2597 FOR_EACH_LOOP (loop, 0)
2598 {
2599 vec<edge> exits;
2600 edge ex;
2601 unsigned int j;
2602 class tree_niter_desc niter_desc;
2603 bb_predicate = *(predicate *) loop->header->aux;
2604
2605 exits = get_loop_exit_edges (loop);
2606 FOR_EACH_VEC_ELT (exits, j, ex)
2607 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
2608 && !is_gimple_min_invariant (niter_desc.niter))
2609 {
2610 predicate will_be_nonconstant
2611 = will_be_nonconstant_expr_predicate (&fbi, info,
2612 params_summary,
2613 niter_desc.niter,
2614 nonconstant_names);
2615 if (will_be_nonconstant != true)
2616 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2617 if (will_be_nonconstant != true
2618 && will_be_nonconstant != false)
2619 /* This is slightly inprecise. We may want to represent each
2620 loop with independent predicate. */
2621 loop_iterations &= will_be_nonconstant;
2622 }
2623 exits.release ();
2624 }
2625
2626 /* To avoid quadratic behavior we analyze stride predicates only
2627 with respect to the containing loop. Thus we simply iterate
2628 over all defs in the outermost loop body. */
2629 for (loop = loops_for_fn (cfun)->tree_root->inner;
2630 loop != NULL; loop = loop->next)
2631 {
2632 basic_block *body = get_loop_body (loop);
2633 for (unsigned i = 0; i < loop->num_nodes; i++)
2634 {
2635 gimple_stmt_iterator gsi;
2636 bb_predicate = *(predicate *) body[i]->aux;
2637 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
2638 gsi_next (&gsi))
2639 {
2640 gimple *stmt = gsi_stmt (gsi);
2641
2642 if (!is_gimple_assign (stmt))
2643 continue;
2644
2645 tree def = gimple_assign_lhs (stmt);
2646 if (TREE_CODE (def) != SSA_NAME)
2647 continue;
2648
2649 affine_iv iv;
2650 if (!simple_iv (loop_containing_stmt (stmt),
2651 loop_containing_stmt (stmt),
2652 def, &iv, true)
2653 || is_gimple_min_invariant (iv.step))
2654 continue;
2655
2656 predicate will_be_nonconstant
2657 = will_be_nonconstant_expr_predicate (&fbi, info,
2658 params_summary,
2659 iv.step,
2660 nonconstant_names);
2661 if (will_be_nonconstant != true)
2662 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2663 if (will_be_nonconstant != true
2664 && will_be_nonconstant != false)
2665 /* This is slightly inprecise. We may want to represent
2666 each loop with independent predicate. */
2667 loop_stride = loop_stride & will_be_nonconstant;
2668 }
2669 }
2670 free (body);
2671 }
2672 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2673 set_hint_predicate (&s->loop_iterations, loop_iterations);
2674 set_hint_predicate (&s->loop_stride, loop_stride);
2675 scev_finalize ();
2676 }
2677 FOR_ALL_BB_FN (bb, my_function)
2678 {
2679 edge e;
2680 edge_iterator ei;
2681
2682 if (bb->aux)
2683 edge_predicate_pool.remove ((predicate *)bb->aux);
2684 bb->aux = NULL;
2685 FOR_EACH_EDGE (e, ei, bb->succs)
2686 {
2687 if (e->aux)
2688 edge_predicate_pool.remove ((predicate *) e->aux);
2689 e->aux = NULL;
2690 }
2691 }
2692 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2693 ipa_size_summary *ss = ipa_size_summaries->get (node);
2694 s->time = time;
2695 ss->self_size = size;
2696 nonconstant_names.release ();
2697 ipa_release_body_info (&fbi);
2698 if (opt_for_fn (node->decl, optimize))
2699 {
2700 if (!early)
2701 loop_optimizer_finalize ();
2702 else if (!ipa_edge_args_sum)
2703 ipa_free_all_node_params ();
2704 free_dominance_info (CDI_DOMINATORS);
2705 free_dominance_info (CDI_POST_DOMINATORS);
2706 }
2707 if (dump_file)
2708 {
2709 fprintf (dump_file, "\n");
2710 ipa_dump_fn_summary (dump_file, node);
2711 }
2712 }
2713
2714
2715 /* Compute function summary.
2716 EARLY is true when we compute parameters during early opts. */
2717
2718 void
2719 compute_fn_summary (struct cgraph_node *node, bool early)
2720 {
2721 HOST_WIDE_INT self_stack_size;
2722 struct cgraph_edge *e;
2723
2724 gcc_assert (!node->inlined_to);
2725
2726 if (!ipa_fn_summaries)
2727 ipa_fn_summary_alloc ();
2728
2729 /* Create a new ipa_fn_summary. */
2730 ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node);
2731 ipa_fn_summaries->remove (node);
2732 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
2733 class ipa_size_summary *size_info = ipa_size_summaries->get_create (node);
2734
2735 /* Estimate the stack size for the function if we're optimizing. */
2736 self_stack_size = optimize && !node->thunk.thunk_p
2737 ? estimated_stack_frame_size (node) : 0;
2738 size_info->estimated_self_stack_size = self_stack_size;
2739 info->estimated_stack_size = self_stack_size;
2740
2741 if (node->thunk.thunk_p)
2742 {
2743 ipa_call_summary *es = ipa_call_summaries->get_create (node->callees);
2744 predicate t = true;
2745
2746 node->can_change_signature = false;
2747 es->call_stmt_size = eni_size_weights.call_cost;
2748 es->call_stmt_time = eni_time_weights.call_cost;
2749 info->account_size_time (ipa_fn_summary::size_scale
2750 * param_uninlined_function_thunk_insns,
2751 param_uninlined_function_thunk_time, t, t);
2752 t = predicate::not_inlined ();
2753 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t);
2754 ipa_update_overall_fn_summary (node);
2755 size_info->self_size = size_info->size;
2756 if (stdarg_p (TREE_TYPE (node->decl)))
2757 {
2758 info->inlinable = false;
2759 node->callees->inline_failed = CIF_VARIADIC_THUNK;
2760 }
2761 else
2762 info->inlinable = true;
2763 }
2764 else
2765 {
2766 /* Even is_gimple_min_invariant rely on current_function_decl. */
2767 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2768
2769 /* Can this function be inlined at all? */
2770 if (!opt_for_fn (node->decl, optimize)
2771 && !lookup_attribute ("always_inline",
2772 DECL_ATTRIBUTES (node->decl)))
2773 info->inlinable = false;
2774 else
2775 info->inlinable = tree_inlinable_function_p (node->decl);
2776
2777 /* Type attributes can use parameter indices to describe them. */
2778 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl))
2779 /* Likewise for #pragma omp declare simd functions or functions
2780 with simd attribute. */
2781 || lookup_attribute ("omp declare simd",
2782 DECL_ATTRIBUTES (node->decl)))
2783 node->can_change_signature = false;
2784 else
2785 {
2786 /* Otherwise, inlinable functions always can change signature. */
2787 if (info->inlinable)
2788 node->can_change_signature = true;
2789 else
2790 {
2791 /* Functions calling builtin_apply cannot change signature. */
2792 for (e = node->callees; e; e = e->next_callee)
2793 {
2794 tree cdecl = e->callee->decl;
2795 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS)
2796 || fndecl_built_in_p (cdecl, BUILT_IN_VA_START))
2797 break;
2798 }
2799 node->can_change_signature = !e;
2800 }
2801 }
2802 analyze_function_body (node, early);
2803 pop_cfun ();
2804 }
2805 for (e = node->callees; e; e = e->next_callee)
2806 if (e->callee->comdat_local_p ())
2807 break;
2808 node->calls_comdat_local = (e != NULL);
2809
2810 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
2811 size_info->size = size_info->self_size;
2812 info->estimated_stack_size = size_info->estimated_self_stack_size;
2813
2814 /* Code above should compute exactly the same result as
2815 ipa_update_overall_fn_summary but because computation happens in
2816 different order the roundoff errors result in slight changes. */
2817 ipa_update_overall_fn_summary (node);
2818 /* In LTO mode we may have speculative edges set. */
2819 gcc_assert (in_lto_p || size_info->size == size_info->self_size);
2820 }
2821
2822
2823 /* Compute parameters of functions used by inliner using
2824 current_function_decl. */
2825
2826 static unsigned int
2827 compute_fn_summary_for_current (void)
2828 {
2829 compute_fn_summary (cgraph_node::get (current_function_decl), true);
2830 return 0;
2831 }
2832
2833 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
2834 KNOWN_CONTEXTS and KNOWN_AGGS. */
2835
2836 static bool
2837 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
2838 int *size, int *time,
2839 vec<tree> known_vals,
2840 vec<ipa_polymorphic_call_context> known_contexts,
2841 vec<ipa_agg_jump_function_p> known_aggs)
2842 {
2843 tree target;
2844 struct cgraph_node *callee;
2845 class ipa_fn_summary *isummary;
2846 enum availability avail;
2847 bool speculative;
2848
2849 if (!known_vals.exists () && !known_contexts.exists ())
2850 return false;
2851 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
2852 return false;
2853
2854 target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts,
2855 known_aggs, &speculative);
2856 if (!target || speculative)
2857 return false;
2858
2859 /* Account for difference in cost between indirect and direct calls. */
2860 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
2861 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
2862 gcc_checking_assert (*time >= 0);
2863 gcc_checking_assert (*size >= 0);
2864
2865 callee = cgraph_node::get (target);
2866 if (!callee || !callee->definition)
2867 return false;
2868 callee = callee->function_symbol (&avail);
2869 if (avail < AVAIL_AVAILABLE)
2870 return false;
2871 isummary = ipa_fn_summaries->get (callee);
2872 if (isummary == NULL)
2873 return false;
2874
2875 return isummary->inlinable;
2876 }
2877
2878 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
2879 handle edge E with probability PROB.
2880 Set HINTS if edge may be devirtualized.
2881 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
2882 site. */
2883
2884 static inline void
2885 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
2886 sreal *time,
2887 int prob,
2888 vec<tree> known_vals,
2889 vec<ipa_polymorphic_call_context> known_contexts,
2890 vec<ipa_agg_jump_function_p> known_aggs,
2891 ipa_hints *hints)
2892 {
2893 class ipa_call_summary *es = ipa_call_summaries->get (e);
2894 int call_size = es->call_stmt_size;
2895 int call_time = es->call_stmt_time;
2896 int cur_size;
2897 if (!e->callee && hints && e->maybe_hot_p ()
2898 && estimate_edge_devirt_benefit (e, &call_size, &call_time,
2899 known_vals, known_contexts, known_aggs))
2900 *hints |= INLINE_HINT_indirect_call;
2901 cur_size = call_size * ipa_fn_summary::size_scale;
2902 *size += cur_size;
2903 if (min_size)
2904 *min_size += cur_size;
2905 if (!time)
2906 ;
2907 else if (prob == REG_BR_PROB_BASE)
2908 *time += ((sreal)call_time) * e->sreal_frequency ();
2909 else
2910 *time += ((sreal)call_time * prob) * e->sreal_frequency ();
2911 }
2912
2913
2914
2915 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
2916 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2917 describe context of the call site. */
2918
2919 static void
2920 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
2921 int *min_size, sreal *time,
2922 ipa_hints *hints,
2923 clause_t possible_truths,
2924 vec<tree> known_vals,
2925 vec<ipa_polymorphic_call_context> known_contexts,
2926 vec<ipa_agg_jump_function_p> known_aggs)
2927 {
2928 struct cgraph_edge *e;
2929 for (e = node->callees; e; e = e->next_callee)
2930 {
2931 if (!e->inline_failed)
2932 {
2933 gcc_checking_assert (!ipa_call_summaries->get (e));
2934 estimate_calls_size_and_time (e->callee, size, min_size, time,
2935 hints,
2936 possible_truths,
2937 known_vals, known_contexts,
2938 known_aggs);
2939 continue;
2940 }
2941 class ipa_call_summary *es = ipa_call_summaries->get (e);
2942
2943 /* Do not care about zero sized builtins. */
2944 if (!es->call_stmt_size)
2945 {
2946 gcc_checking_assert (!es->call_stmt_time);
2947 continue;
2948 }
2949 if (!es->predicate
2950 || es->predicate->evaluate (possible_truths))
2951 {
2952 /* Predicates of calls shall not use NOT_CHANGED codes,
2953 sowe do not need to compute probabilities. */
2954 estimate_edge_size_and_time (e, size,
2955 es->predicate ? NULL : min_size,
2956 time, REG_BR_PROB_BASE,
2957 known_vals, known_contexts,
2958 known_aggs, hints);
2959 }
2960 }
2961 for (e = node->indirect_calls; e; e = e->next_callee)
2962 {
2963 class ipa_call_summary *es = ipa_call_summaries->get (e);
2964 if (!es->predicate
2965 || es->predicate->evaluate (possible_truths))
2966 estimate_edge_size_and_time (e, size,
2967 es->predicate ? NULL : min_size,
2968 time, REG_BR_PROB_BASE,
2969 known_vals, known_contexts, known_aggs,
2970 hints);
2971 }
2972 }
2973
2974 /* Default constructor for ipa call context.
2975 Memory alloction of known_vals, known_contexts
2976 and known_aggs vectors is owned by the caller, but can
2977 be release by ipa_call_context::release.
2978
2979 inline_param_summary is owned by the caller. */
2980 ipa_call_context::ipa_call_context (cgraph_node *node,
2981 clause_t possible_truths,
2982 clause_t nonspec_possible_truths,
2983 vec<tree> known_vals,
2984 vec<ipa_polymorphic_call_context>
2985 known_contexts,
2986 vec<ipa_agg_jump_function_p> known_aggs,
2987 vec<inline_param_summary>
2988 inline_param_summary)
2989 : m_node (node), m_possible_truths (possible_truths),
2990 m_nonspec_possible_truths (nonspec_possible_truths),
2991 m_inline_param_summary (inline_param_summary),
2992 m_known_vals (known_vals),
2993 m_known_contexts (known_contexts),
2994 m_known_aggs (known_aggs)
2995 {
2996 }
2997
2998 /* Set THIS to be a duplicate of CTX. Copy all relevant info. */
2999
3000 void
3001 ipa_call_context::duplicate_from (const ipa_call_context &ctx)
3002 {
3003 m_node = ctx.m_node;
3004 m_possible_truths = ctx.m_possible_truths;
3005 m_nonspec_possible_truths = ctx.m_nonspec_possible_truths;
3006 class ipa_node_params *params_summary = IPA_NODE_REF (m_node);
3007 unsigned int nargs = params_summary
3008 ? ipa_get_param_count (params_summary) : 0;
3009
3010 m_inline_param_summary = vNULL;
3011 /* Copy the info only if there is at least one useful entry. */
3012 if (ctx.m_inline_param_summary.exists ())
3013 {
3014 unsigned int n = MIN (ctx.m_inline_param_summary.length (), nargs);
3015
3016 for (unsigned int i = 0; i < n; i++)
3017 if (ipa_is_param_used_by_ipa_predicates (params_summary, i)
3018 && !ctx.m_inline_param_summary[i].useless_p ())
3019 {
3020 m_inline_param_summary
3021 = ctx.m_inline_param_summary.copy ();
3022 break;
3023 }
3024 }
3025 m_known_vals = vNULL;
3026 if (ctx.m_known_vals.exists ())
3027 {
3028 unsigned int n = MIN (ctx.m_known_vals.length (), nargs);
3029
3030 for (unsigned int i = 0; i < n; i++)
3031 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3032 && ctx.m_known_vals[i])
3033 {
3034 m_known_vals = ctx.m_known_vals.copy ();
3035 break;
3036 }
3037 }
3038
3039 m_known_contexts = vNULL;
3040 if (ctx.m_known_contexts.exists ())
3041 {
3042 unsigned int n = MIN (ctx.m_known_contexts.length (), nargs);
3043
3044 for (unsigned int i = 0; i < n; i++)
3045 if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3046 && !ctx.m_known_contexts[i].useless_p ())
3047 {
3048 m_known_contexts = ctx.m_known_contexts.copy ();
3049 break;
3050 }
3051 }
3052
3053 m_known_aggs = vNULL;
3054 if (ctx.m_known_aggs.exists ())
3055 {
3056 unsigned int n = MIN (ctx.m_known_aggs.length (), nargs);
3057
3058 for (unsigned int i = 0; i < n; i++)
3059 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3060 && ctx.m_known_aggs[i])
3061 {
3062 m_known_aggs = ctx.m_known_aggs.copy ();
3063 break;
3064 }
3065 }
3066 }
3067
3068 /* Release memory used by known_vals/contexts/aggs vectors.
3069 If ALL is true release also inline_param_summary.
3070 This happens when context was previously duplciated to be stored
3071 into cache. */
3072
3073 void
3074 ipa_call_context::release (bool all)
3075 {
3076 /* See if context is initialized at first place. */
3077 if (!m_node)
3078 return;
3079 m_known_vals.release ();
3080 m_known_contexts.release ();
3081 m_known_aggs.release ();
3082 if (all)
3083 m_inline_param_summary.release ();
3084 }
3085
3086 /* Return true if CTX describes the same call context as THIS. */
3087
3088 bool
3089 ipa_call_context::equal_to (const ipa_call_context &ctx)
3090 {
3091 if (m_node != ctx.m_node
3092 || m_possible_truths != ctx.m_possible_truths
3093 || m_nonspec_possible_truths != ctx.m_nonspec_possible_truths)
3094 return false;
3095
3096 class ipa_node_params *params_summary = IPA_NODE_REF (m_node);
3097 unsigned int nargs = params_summary
3098 ? ipa_get_param_count (params_summary) : 0;
3099
3100 if (m_inline_param_summary.exists () || ctx.m_inline_param_summary.exists ())
3101 {
3102 for (unsigned int i = 0; i < nargs; i++)
3103 {
3104 if (!ipa_is_param_used_by_ipa_predicates (params_summary, i))
3105 continue;
3106 if (i >= m_inline_param_summary.length ()
3107 || m_inline_param_summary[i].useless_p ())
3108 {
3109 if (i < ctx.m_inline_param_summary.length ()
3110 && !ctx.m_inline_param_summary[i].useless_p ())
3111 return false;
3112 continue;
3113 }
3114 if (i >= ctx.m_inline_param_summary.length ()
3115 || ctx.m_inline_param_summary[i].useless_p ())
3116 {
3117 if (i < m_inline_param_summary.length ()
3118 && !m_inline_param_summary[i].useless_p ())
3119 return false;
3120 continue;
3121 }
3122 if (!m_inline_param_summary[i].equal_to
3123 (ctx.m_inline_param_summary[i]))
3124 return false;
3125 }
3126 }
3127 if (m_known_vals.exists () || ctx.m_known_vals.exists ())
3128 {
3129 for (unsigned int i = 0; i < nargs; i++)
3130 {
3131 if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3132 continue;
3133 if (i >= m_known_vals.length () || !m_known_vals[i])
3134 {
3135 if (i < ctx.m_known_vals.length () && ctx.m_known_vals[i])
3136 return false;
3137 continue;
3138 }
3139 if (i >= ctx.m_known_vals.length () || !ctx.m_known_vals[i])
3140 {
3141 if (i < m_known_vals.length () && m_known_vals[i])
3142 return false;
3143 continue;
3144 }
3145 if (m_known_vals[i] != ctx.m_known_vals[i])
3146 return false;
3147 }
3148 }
3149 if (m_known_contexts.exists () || ctx.m_known_contexts.exists ())
3150 {
3151 for (unsigned int i = 0; i < nargs; i++)
3152 {
3153 if (!ipa_is_param_used_by_polymorphic_call (params_summary, i))
3154 continue;
3155 if (i >= m_known_contexts.length ()
3156 || m_known_contexts[i].useless_p ())
3157 {
3158 if (i < ctx.m_known_contexts.length ()
3159 && !ctx.m_known_contexts[i].useless_p ())
3160 return false;
3161 continue;
3162 }
3163 if (i >= ctx.m_known_contexts.length ()
3164 || ctx.m_known_contexts[i].useless_p ())
3165 {
3166 if (i < m_known_contexts.length ()
3167 && !m_known_contexts[i].useless_p ())
3168 return false;
3169 continue;
3170 }
3171 if (!m_known_contexts[i].equal_to
3172 (ctx.m_known_contexts[i]))
3173 return false;
3174 }
3175 }
3176 if (m_known_aggs.exists () || ctx.m_known_aggs.exists ())
3177 {
3178 for (unsigned int i = 0; i < nargs; i++)
3179 {
3180 if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3181 continue;
3182 if (i >= m_known_aggs.length () || !m_known_aggs[i])
3183 {
3184 if (i < ctx.m_known_aggs.length () && ctx.m_known_aggs[i])
3185 return false;
3186 continue;
3187 }
3188 if (i >= ctx.m_known_aggs.length () || !ctx.m_known_aggs[i])
3189 {
3190 if (i < m_known_aggs.length () && m_known_aggs[i])
3191 return false;
3192 continue;
3193 }
3194 if (m_known_aggs[i] != ctx.m_known_aggs[i])
3195 return false;
3196 }
3197 }
3198 return true;
3199 }
3200
3201 /* Estimate size and time needed to execute call in the given context.
3202 Additionally detemine hints determined by the context. Finally compute
3203 minimal size needed for the call that is independent on the call context and
3204 can be used for fast estimates. Return the values in RET_SIZE,
3205 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
3206
3207 void
3208 ipa_call_context::estimate_size_and_time (int *ret_size,
3209 int *ret_min_size,
3210 sreal *ret_time,
3211 sreal *ret_nonspecialized_time,
3212 ipa_hints *ret_hints)
3213 {
3214 class ipa_fn_summary *info = ipa_fn_summaries->get (m_node);
3215 size_time_entry *e;
3216 int size = 0;
3217 sreal time = 0;
3218 int min_size = 0;
3219 ipa_hints hints = 0;
3220 int i;
3221
3222 if (dump_file && (dump_flags & TDF_DETAILS))
3223 {
3224 bool found = false;
3225 fprintf (dump_file, " Estimating body: %s/%i\n"
3226 " Known to be false: ", m_node->name (),
3227 m_node->order);
3228
3229 for (i = predicate::not_inlined_condition;
3230 i < (predicate::first_dynamic_condition
3231 + (int) vec_safe_length (info->conds)); i++)
3232 if (!(m_possible_truths & (1 << i)))
3233 {
3234 if (found)
3235 fprintf (dump_file, ", ");
3236 found = true;
3237 dump_condition (dump_file, info->conds, i);
3238 }
3239 }
3240
3241 estimate_calls_size_and_time (m_node, &size, &min_size,
3242 ret_time ? &time : NULL,
3243 ret_hints ? &hints : NULL, m_possible_truths,
3244 m_known_vals, m_known_contexts, m_known_aggs);
3245
3246 sreal nonspecialized_time = time;
3247
3248 min_size += (*info->size_time_table)[0].size;
3249 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3250 {
3251 bool exec = e->exec_predicate.evaluate (m_nonspec_possible_truths);
3252
3253 /* Because predicates are conservative, it can happen that nonconst is 1
3254 but exec is 0. */
3255 if (exec)
3256 {
3257 bool nonconst = e->nonconst_predicate.evaluate (m_possible_truths);
3258
3259 gcc_checking_assert (e->time >= 0);
3260 gcc_checking_assert (time >= 0);
3261
3262 /* We compute specialized size only because size of nonspecialized
3263 copy is context independent.
3264
3265 The difference between nonspecialized execution and specialized is
3266 that nonspecialized is not going to have optimized out computations
3267 known to be constant in a specialized setting. */
3268 if (nonconst)
3269 size += e->size;
3270 if (!ret_time)
3271 continue;
3272 nonspecialized_time += e->time;
3273 if (!nonconst)
3274 ;
3275 else if (!m_inline_param_summary.exists ())
3276 {
3277 if (nonconst)
3278 time += e->time;
3279 }
3280 else
3281 {
3282 int prob = e->nonconst_predicate.probability
3283 (info->conds, m_possible_truths,
3284 m_inline_param_summary);
3285 gcc_checking_assert (prob >= 0);
3286 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
3287 time += e->time * prob / REG_BR_PROB_BASE;
3288 }
3289 gcc_checking_assert (time >= 0);
3290 }
3291 }
3292 gcc_checking_assert ((*info->size_time_table)[0].exec_predicate == true);
3293 gcc_checking_assert ((*info->size_time_table)[0].nonconst_predicate == true);
3294 gcc_checking_assert (min_size >= 0);
3295 gcc_checking_assert (size >= 0);
3296 gcc_checking_assert (time >= 0);
3297 /* nonspecialized_time should be always bigger than specialized time.
3298 Roundoff issues however may get into the way. */
3299 gcc_checking_assert ((nonspecialized_time - time * 99 / 100) >= -1);
3300
3301 /* Roundoff issues may make specialized time bigger than nonspecialized
3302 time. We do not really want that to happen because some heurstics
3303 may get confused by seeing negative speedups. */
3304 if (time > nonspecialized_time)
3305 time = nonspecialized_time;
3306
3307 if (ret_hints)
3308 {
3309 if (info->loop_iterations
3310 && !info->loop_iterations->evaluate (m_possible_truths))
3311 hints |= INLINE_HINT_loop_iterations;
3312 if (info->loop_stride
3313 && !info->loop_stride->evaluate (m_possible_truths))
3314 hints |= INLINE_HINT_loop_stride;
3315 if (info->scc_no)
3316 hints |= INLINE_HINT_in_scc;
3317 if (DECL_DECLARED_INLINE_P (m_node->decl))
3318 hints |= INLINE_HINT_declared_inline;
3319 }
3320
3321 size = RDIV (size, ipa_fn_summary::size_scale);
3322 min_size = RDIV (min_size, ipa_fn_summary::size_scale);
3323
3324 if (dump_file && (dump_flags & TDF_DETAILS))
3325 fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size,
3326 time.to_double (), nonspecialized_time.to_double ());
3327 if (ret_time)
3328 *ret_time = time;
3329 if (ret_nonspecialized_time)
3330 *ret_nonspecialized_time = nonspecialized_time;
3331 if (ret_size)
3332 *ret_size = size;
3333 if (ret_min_size)
3334 *ret_min_size = min_size;
3335 if (ret_hints)
3336 *ret_hints = hints;
3337 return;
3338 }
3339
3340
3341 /* Estimate size and time needed to execute callee of EDGE assuming that
3342 parameters known to be constant at caller of EDGE are propagated.
3343 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
3344 and types for parameters. */
3345
3346 void
3347 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
3348 vec<tree> known_vals,
3349 vec<ipa_polymorphic_call_context>
3350 known_contexts,
3351 vec<ipa_agg_jump_function_p> known_aggs,
3352 int *ret_size, sreal *ret_time,
3353 sreal *ret_nonspec_time,
3354 ipa_hints *hints)
3355 {
3356 clause_t clause, nonspec_clause;
3357
3358 evaluate_conditions_for_known_args (node, false, known_vals, known_aggs,
3359 &clause, &nonspec_clause);
3360 ipa_call_context ctx (node, clause, nonspec_clause,
3361 known_vals, known_contexts,
3362 known_aggs, vNULL);
3363 ctx.estimate_size_and_time (ret_size, NULL, ret_time,
3364 ret_nonspec_time, hints);
3365 }
3366
3367 /* Return stack frame offset where frame of NODE is supposed to start inside
3368 of the function it is inlined to.
3369 Return 0 for functions that are not inlined. */
3370
3371 HOST_WIDE_INT
3372 ipa_get_stack_frame_offset (struct cgraph_node *node)
3373 {
3374 HOST_WIDE_INT offset = 0;
3375 if (!node->inlined_to)
3376 return 0;
3377 node = node->callers->caller;
3378 while (true)
3379 {
3380 offset += ipa_size_summaries->get (node)->estimated_self_stack_size;
3381 if (!node->inlined_to)
3382 return offset;
3383 node = node->callers->caller;
3384 }
3385 }
3386
3387
3388 /* Update summary information of inline clones after inlining.
3389 Compute peak stack usage. */
3390
3391 static void
3392 inline_update_callee_summaries (struct cgraph_node *node, int depth)
3393 {
3394 struct cgraph_edge *e;
3395
3396 ipa_propagate_frequency (node);
3397 for (e = node->callees; e; e = e->next_callee)
3398 {
3399 if (!e->inline_failed)
3400 inline_update_callee_summaries (e->callee, depth);
3401 else
3402 ipa_call_summaries->get (e)->loop_depth += depth;
3403 }
3404 for (e = node->indirect_calls; e; e = e->next_callee)
3405 ipa_call_summaries->get (e)->loop_depth += depth;
3406 }
3407
3408 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
3409 When function A is inlined in B and A calls C with parameter that
3410 changes with probability PROB1 and C is known to be passthroug
3411 of argument if B that change with probability PROB2, the probability
3412 of change is now PROB1*PROB2. */
3413
3414 static void
3415 remap_edge_change_prob (struct cgraph_edge *inlined_edge,
3416 struct cgraph_edge *edge)
3417 {
3418 if (ipa_node_params_sum)
3419 {
3420 int i;
3421 class ipa_edge_args *args = IPA_EDGE_REF (edge);
3422 if (!args)
3423 return;
3424 class ipa_call_summary *es = ipa_call_summaries->get (edge);
3425 class ipa_call_summary *inlined_es
3426 = ipa_call_summaries->get (inlined_edge);
3427
3428 if (es->param.length () == 0)
3429 return;
3430
3431 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
3432 {
3433 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3434 if (jfunc->type == IPA_JF_PASS_THROUGH
3435 || jfunc->type == IPA_JF_ANCESTOR)
3436 {
3437 int id = jfunc->type == IPA_JF_PASS_THROUGH
3438 ? ipa_get_jf_pass_through_formal_id (jfunc)
3439 : ipa_get_jf_ancestor_formal_id (jfunc);
3440 if (id < (int) inlined_es->param.length ())
3441 {
3442 int prob1 = es->param[i].change_prob;
3443 int prob2 = inlined_es->param[id].change_prob;
3444 int prob = combine_probabilities (prob1, prob2);
3445
3446 if (prob1 && prob2 && !prob)
3447 prob = 1;
3448
3449 es->param[i].change_prob = prob;
3450 }
3451 }
3452 }
3453 }
3454 }
3455
3456 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
3457
3458 Remap predicates of callees of NODE. Rest of arguments match
3459 remap_predicate.
3460
3461 Also update change probabilities. */
3462
3463 static void
3464 remap_edge_summaries (struct cgraph_edge *inlined_edge,
3465 struct cgraph_node *node,
3466 class ipa_fn_summary *info,
3467 class ipa_node_params *params_summary,
3468 class ipa_fn_summary *callee_info,
3469 vec<int> operand_map,
3470 vec<int> offset_map,
3471 clause_t possible_truths,
3472 predicate *toplev_predicate)
3473 {
3474 struct cgraph_edge *e, *next;
3475 for (e = node->callees; e; e = next)
3476 {
3477 predicate p;
3478 next = e->next_callee;
3479
3480 if (e->inline_failed)
3481 {
3482 class ipa_call_summary *es = ipa_call_summaries->get (e);
3483 remap_edge_change_prob (inlined_edge, e);
3484
3485 if (es->predicate)
3486 {
3487 p = es->predicate->remap_after_inlining
3488 (info, params_summary,
3489 callee_info, operand_map,
3490 offset_map, possible_truths,
3491 *toplev_predicate);
3492 edge_set_predicate (e, &p);
3493 }
3494 else
3495 edge_set_predicate (e, toplev_predicate);
3496 }
3497 else
3498 remap_edge_summaries (inlined_edge, e->callee, info,
3499 params_summary, callee_info,
3500 operand_map, offset_map, possible_truths,
3501 toplev_predicate);
3502 }
3503 for (e = node->indirect_calls; e; e = next)
3504 {
3505 class ipa_call_summary *es = ipa_call_summaries->get (e);
3506 predicate p;
3507 next = e->next_callee;
3508
3509 remap_edge_change_prob (inlined_edge, e);
3510 if (es->predicate)
3511 {
3512 p = es->predicate->remap_after_inlining
3513 (info, params_summary,
3514 callee_info, operand_map, offset_map,
3515 possible_truths, *toplev_predicate);
3516 edge_set_predicate (e, &p);
3517 }
3518 else
3519 edge_set_predicate (e, toplev_predicate);
3520 }
3521 }
3522
3523 /* Same as remap_predicate, but set result into hint *HINT. */
3524
3525 static void
3526 remap_hint_predicate (class ipa_fn_summary *info,
3527 class ipa_node_params *params_summary,
3528 class ipa_fn_summary *callee_info,
3529 predicate **hint,
3530 vec<int> operand_map,
3531 vec<int> offset_map,
3532 clause_t possible_truths,
3533 predicate *toplev_predicate)
3534 {
3535 predicate p;
3536
3537 if (!*hint)
3538 return;
3539 p = (*hint)->remap_after_inlining
3540 (info, params_summary, callee_info,
3541 operand_map, offset_map,
3542 possible_truths, *toplev_predicate);
3543 if (p != false && p != true)
3544 {
3545 if (!*hint)
3546 set_hint_predicate (hint, p);
3547 else
3548 **hint &= p;
3549 }
3550 }
3551
3552 /* We inlined EDGE. Update summary of the function we inlined into. */
3553
3554 void
3555 ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
3556 {
3557 ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee);
3558 struct cgraph_node *to = (edge->caller->inlined_to
3559 ? edge->caller->inlined_to : edge->caller);
3560 class ipa_fn_summary *info = ipa_fn_summaries->get (to);
3561 clause_t clause = 0; /* not_inline is known to be false. */
3562 size_time_entry *e;
3563 auto_vec<int, 8> operand_map;
3564 auto_vec<int, 8> offset_map;
3565 int i;
3566 predicate toplev_predicate;
3567 class ipa_call_summary *es = ipa_call_summaries->get (edge);
3568 class ipa_node_params *params_summary = (ipa_node_params_sum
3569 ? IPA_NODE_REF (to) : NULL);
3570
3571 if (es->predicate)
3572 toplev_predicate = *es->predicate;
3573 else
3574 toplev_predicate = true;
3575
3576 info->fp_expressions |= callee_info->fp_expressions;
3577
3578 if (callee_info->conds)
3579 evaluate_properties_for_edge (edge, true, &clause, NULL, NULL, NULL, NULL);
3580 if (ipa_node_params_sum && callee_info->conds)
3581 {
3582 class ipa_edge_args *args = IPA_EDGE_REF (edge);
3583 int count = args ? ipa_get_cs_argument_count (args) : 0;
3584 int i;
3585
3586 if (count)
3587 {
3588 operand_map.safe_grow_cleared (count);
3589 offset_map.safe_grow_cleared (count);
3590 }
3591 for (i = 0; i < count; i++)
3592 {
3593 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3594 int map = -1;
3595
3596 /* TODO: handle non-NOPs when merging. */
3597 if (jfunc->type == IPA_JF_PASS_THROUGH)
3598 {
3599 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3600 map = ipa_get_jf_pass_through_formal_id (jfunc);
3601 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
3602 offset_map[i] = -1;
3603 }
3604 else if (jfunc->type == IPA_JF_ANCESTOR)
3605 {
3606 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
3607 if (offset >= 0 && offset < INT_MAX)
3608 {
3609 map = ipa_get_jf_ancestor_formal_id (jfunc);
3610 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
3611 offset = -1;
3612 offset_map[i] = offset;
3613 }
3614 }
3615 operand_map[i] = map;
3616 gcc_assert (map < ipa_get_param_count (params_summary));
3617 }
3618 }
3619 sreal freq = edge->sreal_frequency ();
3620 for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++)
3621 {
3622 predicate p;
3623 p = e->exec_predicate.remap_after_inlining
3624 (info, params_summary,
3625 callee_info, operand_map,
3626 offset_map, clause,
3627 toplev_predicate);
3628 predicate nonconstp;
3629 nonconstp = e->nonconst_predicate.remap_after_inlining
3630 (info, params_summary,
3631 callee_info, operand_map,
3632 offset_map, clause,
3633 toplev_predicate);
3634 if (p != false && nonconstp != false)
3635 {
3636 sreal add_time = ((sreal)e->time * freq);
3637 int prob = e->nonconst_predicate.probability (callee_info->conds,
3638 clause, es->param);
3639 add_time = add_time * prob / REG_BR_PROB_BASE;
3640 if (prob != REG_BR_PROB_BASE
3641 && dump_file && (dump_flags & TDF_DETAILS))
3642 {
3643 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
3644 (double) prob / REG_BR_PROB_BASE);
3645 }
3646 info->account_size_time (e->size, add_time, p, nonconstp);
3647 }
3648 }
3649 remap_edge_summaries (edge, edge->callee, info, params_summary,
3650 callee_info, operand_map,
3651 offset_map, clause, &toplev_predicate);
3652 remap_hint_predicate (info, params_summary, callee_info,
3653 &callee_info->loop_iterations,
3654 operand_map, offset_map, clause, &toplev_predicate);
3655 remap_hint_predicate (info, params_summary, callee_info,
3656 &callee_info->loop_stride,
3657 operand_map, offset_map, clause, &toplev_predicate);
3658
3659 HOST_WIDE_INT stack_frame_offset = ipa_get_stack_frame_offset (edge->callee);
3660 HOST_WIDE_INT peak = stack_frame_offset + callee_info->estimated_stack_size;
3661
3662 if (info->estimated_stack_size < peak)
3663 info->estimated_stack_size = peak;
3664
3665 inline_update_callee_summaries (edge->callee, es->loop_depth);
3666
3667 /* Free summaries that are not maintained for inline clones/edges. */
3668 ipa_call_summaries->remove (edge);
3669 ipa_fn_summaries->remove (edge->callee);
3670 ipa_remove_from_growth_caches (edge);
3671 }
3672
3673 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
3674 overall size and time. Recompute it. */
3675
3676 void
3677 ipa_update_overall_fn_summary (struct cgraph_node *node)
3678 {
3679 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
3680 class ipa_size_summary *size_info = ipa_size_summaries->get (node);
3681 size_time_entry *e;
3682 int i;
3683
3684 size_info->size = 0;
3685 info->time = 0;
3686 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3687 {
3688 size_info->size += e->size;
3689 info->time += e->time;
3690 }
3691 info->min_size = (*info->size_time_table)[0].size;
3692 estimate_calls_size_and_time (node, &size_info->size, &info->min_size,
3693 &info->time, NULL,
3694 ~(clause_t) (1 << predicate::false_condition),
3695 vNULL, vNULL, vNULL);
3696 size_info->size = RDIV (size_info->size, ipa_fn_summary::size_scale);
3697 info->min_size = RDIV (info->min_size, ipa_fn_summary::size_scale);
3698 }
3699
3700
3701 /* This function performs intraprocedural analysis in NODE that is required to
3702 inline indirect calls. */
3703
3704 static void
3705 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
3706 {
3707 ipa_analyze_node (node);
3708 if (dump_file && (dump_flags & TDF_DETAILS))
3709 {
3710 ipa_print_node_params (dump_file, node);
3711 ipa_print_node_jump_functions (dump_file, node);
3712 }
3713 }
3714
3715
3716 /* Note function body size. */
3717
3718 void
3719 inline_analyze_function (struct cgraph_node *node)
3720 {
3721 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
3722
3723 if (dump_file)
3724 fprintf (dump_file, "\nAnalyzing function: %s/%u\n",
3725 node->name (), node->order);
3726 if (opt_for_fn (node->decl, optimize) && !node->thunk.thunk_p)
3727 inline_indirect_intraprocedural_analysis (node);
3728 compute_fn_summary (node, false);
3729 if (!optimize)
3730 {
3731 struct cgraph_edge *e;
3732 for (e = node->callees; e; e = e->next_callee)
3733 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3734 for (e = node->indirect_calls; e; e = e->next_callee)
3735 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3736 }
3737
3738 pop_cfun ();
3739 }
3740
3741
3742 /* Called when new function is inserted to callgraph late. */
3743
3744 void
3745 ipa_fn_summary_t::insert (struct cgraph_node *node, ipa_fn_summary *)
3746 {
3747 inline_analyze_function (node);
3748 }
3749
3750 /* Note function body size. */
3751
3752 static void
3753 ipa_fn_summary_generate (void)
3754 {
3755 struct cgraph_node *node;
3756
3757 FOR_EACH_DEFINED_FUNCTION (node)
3758 if (DECL_STRUCT_FUNCTION (node->decl))
3759 node->versionable = tree_versionable_function_p (node->decl);
3760
3761 ipa_fn_summary_alloc ();
3762
3763 ipa_fn_summaries->enable_insertion_hook ();
3764
3765 ipa_register_cgraph_hooks ();
3766
3767 FOR_EACH_DEFINED_FUNCTION (node)
3768 if (!node->alias
3769 && (flag_generate_lto || flag_generate_offload|| flag_wpa
3770 || opt_for_fn (node->decl, optimize)))
3771 inline_analyze_function (node);
3772 }
3773
3774
3775 /* Write inline summary for edge E to OB. */
3776
3777 static void
3778 read_ipa_call_summary (class lto_input_block *ib, struct cgraph_edge *e,
3779 bool prevails)
3780 {
3781 class ipa_call_summary *es = prevails
3782 ? ipa_call_summaries->get_create (e) : NULL;
3783 predicate p;
3784 int length, i;
3785
3786 int size = streamer_read_uhwi (ib);
3787 int time = streamer_read_uhwi (ib);
3788 int depth = streamer_read_uhwi (ib);
3789
3790 if (es)
3791 {
3792 es->call_stmt_size = size;
3793 es->call_stmt_time = time;
3794 es->loop_depth = depth;
3795 }
3796
3797 bitpack_d bp = streamer_read_bitpack (ib);
3798 if (es)
3799 es->is_return_callee_uncaptured = bp_unpack_value (&bp, 1);
3800 else
3801 bp_unpack_value (&bp, 1);
3802
3803 p.stream_in (ib);
3804 if (es)
3805 edge_set_predicate (e, &p);
3806 length = streamer_read_uhwi (ib);
3807 if (length && es && e->possibly_call_in_translation_unit_p ())
3808 {
3809 es->param.safe_grow_cleared (length);
3810 for (i = 0; i < length; i++)
3811 es->param[i].change_prob = streamer_read_uhwi (ib);
3812 }
3813 else
3814 {
3815 for (i = 0; i < length; i++)
3816 streamer_read_uhwi (ib);
3817 }
3818 }
3819
3820
3821 /* Stream in inline summaries from the section. */
3822
3823 static void
3824 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
3825 size_t len)
3826 {
3827 const struct lto_function_header *header =
3828 (const struct lto_function_header *) data;
3829 const int cfg_offset = sizeof (struct lto_function_header);
3830 const int main_offset = cfg_offset + header->cfg_size;
3831 const int string_offset = main_offset + header->main_size;
3832 class data_in *data_in;
3833 unsigned int i, count2, j;
3834 unsigned int f_count;
3835
3836 lto_input_block ib ((const char *) data + main_offset, header->main_size,
3837 file_data->mode_table);
3838
3839 data_in =
3840 lto_data_in_create (file_data, (const char *) data + string_offset,
3841 header->string_size, vNULL);
3842 f_count = streamer_read_uhwi (&ib);
3843 for (i = 0; i < f_count; i++)
3844 {
3845 unsigned int index;
3846 struct cgraph_node *node;
3847 class ipa_fn_summary *info;
3848 class ipa_node_params *params_summary;
3849 class ipa_size_summary *size_info;
3850 lto_symtab_encoder_t encoder;
3851 struct bitpack_d bp;
3852 struct cgraph_edge *e;
3853 predicate p;
3854
3855 index = streamer_read_uhwi (&ib);
3856 encoder = file_data->symtab_node_encoder;
3857 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
3858 index));
3859 info = node->prevailing_p () ? ipa_fn_summaries->get_create (node) : NULL;
3860 params_summary = node->prevailing_p () ? IPA_NODE_REF (node) : NULL;
3861 size_info = node->prevailing_p ()
3862 ? ipa_size_summaries->get_create (node) : NULL;
3863
3864 int stack_size = streamer_read_uhwi (&ib);
3865 int size = streamer_read_uhwi (&ib);
3866 sreal time = sreal::stream_in (&ib);
3867
3868 if (info)
3869 {
3870 info->estimated_stack_size
3871 = size_info->estimated_self_stack_size = stack_size;
3872 size_info->size = size_info->self_size = size;
3873 info->time = time;
3874 }
3875
3876 bp = streamer_read_bitpack (&ib);
3877 if (info)
3878 {
3879 info->inlinable = bp_unpack_value (&bp, 1);
3880 info->fp_expressions = bp_unpack_value (&bp, 1);
3881 }
3882 else
3883 {
3884 bp_unpack_value (&bp, 1);
3885 bp_unpack_value (&bp, 1);
3886 }
3887
3888 count2 = streamer_read_uhwi (&ib);
3889 gcc_assert (!info || !info->conds);
3890 if (info)
3891 vec_safe_reserve_exact (info->conds, count2);
3892 for (j = 0; j < count2; j++)
3893 {
3894 struct condition c;
3895 unsigned int k, count3;
3896 c.operand_num = streamer_read_uhwi (&ib);
3897 c.code = (enum tree_code) streamer_read_uhwi (&ib);
3898 c.type = stream_read_tree (&ib, data_in);
3899 c.val = stream_read_tree (&ib, data_in);
3900 bp = streamer_read_bitpack (&ib);
3901 c.agg_contents = bp_unpack_value (&bp, 1);
3902 c.by_ref = bp_unpack_value (&bp, 1);
3903 if (c.agg_contents)
3904 c.offset = streamer_read_uhwi (&ib);
3905 count3 = streamer_read_uhwi (&ib);
3906 c.param_ops = NULL;
3907 if (info)
3908 vec_safe_reserve_exact (c.param_ops, count3);
3909 if (params_summary)
3910 ipa_set_param_used_by_ipa_predicates
3911 (params_summary, c.operand_num, true);
3912 for (k = 0; k < count3; k++)
3913 {
3914 struct expr_eval_op op;
3915 enum gimple_rhs_class rhs_class;
3916 op.code = (enum tree_code) streamer_read_uhwi (&ib);
3917 op.type = stream_read_tree (&ib, data_in);
3918 switch (rhs_class = get_gimple_rhs_class (op.code))
3919 {
3920 case GIMPLE_UNARY_RHS:
3921 op.index = 0;
3922 op.val[0] = NULL_TREE;
3923 op.val[1] = NULL_TREE;
3924 break;
3925
3926 case GIMPLE_BINARY_RHS:
3927 case GIMPLE_TERNARY_RHS:
3928 bp = streamer_read_bitpack (&ib);
3929 op.index = bp_unpack_value (&bp, 2);
3930 op.val[0] = stream_read_tree (&ib, data_in);
3931 if (rhs_class == GIMPLE_BINARY_RHS)
3932 op.val[1] = NULL_TREE;
3933 else
3934 op.val[1] = stream_read_tree (&ib, data_in);
3935 break;
3936
3937 default:
3938 fatal_error (UNKNOWN_LOCATION,
3939 "invalid fnsummary in LTO stream");
3940 }
3941 if (info)
3942 c.param_ops->quick_push (op);
3943 }
3944 if (info)
3945 info->conds->quick_push (c);
3946 }
3947 count2 = streamer_read_uhwi (&ib);
3948 gcc_assert (!info || !info->size_time_table);
3949 if (info && count2)
3950 vec_safe_reserve_exact (info->size_time_table, count2);
3951 for (j = 0; j < count2; j++)
3952 {
3953 class size_time_entry e;
3954
3955 e.size = streamer_read_uhwi (&ib);
3956 e.time = sreal::stream_in (&ib);
3957 e.exec_predicate.stream_in (&ib);
3958 e.nonconst_predicate.stream_in (&ib);
3959
3960 if (info)
3961 info->size_time_table->quick_push (e);
3962 }
3963
3964 p.stream_in (&ib);
3965 if (info)
3966 set_hint_predicate (&info->loop_iterations, p);
3967 p.stream_in (&ib);
3968 if (info)
3969 set_hint_predicate (&info->loop_stride, p);
3970 for (e = node->callees; e; e = e->next_callee)
3971 read_ipa_call_summary (&ib, e, info != NULL);
3972 for (e = node->indirect_calls; e; e = e->next_callee)
3973 read_ipa_call_summary (&ib, e, info != NULL);
3974 }
3975
3976 lto_free_section_data (file_data, LTO_section_ipa_fn_summary, NULL, data,
3977 len);
3978 lto_data_in_delete (data_in);
3979 }
3980
3981
3982 /* Read inline summary. Jump functions are shared among ipa-cp
3983 and inliner, so when ipa-cp is active, we don't need to write them
3984 twice. */
3985
3986 static void
3987 ipa_fn_summary_read (void)
3988 {
3989 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
3990 struct lto_file_decl_data *file_data;
3991 unsigned int j = 0;
3992
3993 ipa_fn_summary_alloc ();
3994
3995 while ((file_data = file_data_vec[j++]))
3996 {
3997 size_t len;
3998 const char *data
3999 = lto_get_summary_section_data (file_data, LTO_section_ipa_fn_summary,
4000 &len);
4001 if (data)
4002 inline_read_section (file_data, data, len);
4003 else
4004 /* Fatal error here. We do not want to support compiling ltrans units
4005 with different version of compiler or different flags than the WPA
4006 unit, so this should never happen. */
4007 fatal_error (input_location,
4008 "ipa inline summary is missing in input file");
4009 }
4010 ipa_register_cgraph_hooks ();
4011 if (!flag_ipa_cp)
4012 ipa_prop_read_jump_functions ();
4013
4014 gcc_assert (ipa_fn_summaries);
4015 ipa_fn_summaries->enable_insertion_hook ();
4016 }
4017
4018
4019 /* Write inline summary for edge E to OB. */
4020
4021 static void
4022 write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
4023 {
4024 class ipa_call_summary *es = ipa_call_summaries->get (e);
4025 int i;
4026
4027 streamer_write_uhwi (ob, es->call_stmt_size);
4028 streamer_write_uhwi (ob, es->call_stmt_time);
4029 streamer_write_uhwi (ob, es->loop_depth);
4030
4031 bitpack_d bp = bitpack_create (ob->main_stream);
4032 bp_pack_value (&bp, es->is_return_callee_uncaptured, 1);
4033 streamer_write_bitpack (&bp);
4034
4035 if (es->predicate)
4036 es->predicate->stream_out (ob);
4037 else
4038 streamer_write_uhwi (ob, 0);
4039 streamer_write_uhwi (ob, es->param.length ());
4040 for (i = 0; i < (int) es->param.length (); i++)
4041 streamer_write_uhwi (ob, es->param[i].change_prob);
4042 }
4043
4044
4045 /* Write inline summary for node in SET.
4046 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
4047 active, we don't need to write them twice. */
4048
4049 static void
4050 ipa_fn_summary_write (void)
4051 {
4052 struct output_block *ob = create_output_block (LTO_section_ipa_fn_summary);
4053 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
4054 unsigned int count = 0;
4055 int i;
4056
4057 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
4058 {
4059 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
4060 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
4061 if (cnode && cnode->definition && !cnode->alias)
4062 count++;
4063 }
4064 streamer_write_uhwi (ob, count);
4065
4066 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
4067 {
4068 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
4069 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
4070 if (cnode && cnode->definition && !cnode->alias)
4071 {
4072 class ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
4073 class ipa_size_summary *size_info = ipa_size_summaries->get (cnode);
4074 struct bitpack_d bp;
4075 struct cgraph_edge *edge;
4076 int i;
4077 size_time_entry *e;
4078 struct condition *c;
4079
4080 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
4081 streamer_write_hwi (ob, size_info->estimated_self_stack_size);
4082 streamer_write_hwi (ob, size_info->self_size);
4083 info->time.stream_out (ob);
4084 bp = bitpack_create (ob->main_stream);
4085 bp_pack_value (&bp, info->inlinable, 1);
4086 bp_pack_value (&bp, false, 1);
4087 bp_pack_value (&bp, info->fp_expressions, 1);
4088 streamer_write_bitpack (&bp);
4089 streamer_write_uhwi (ob, vec_safe_length (info->conds));
4090 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
4091 {
4092 int j;
4093 struct expr_eval_op *op;
4094
4095 streamer_write_uhwi (ob, c->operand_num);
4096 streamer_write_uhwi (ob, c->code);
4097 stream_write_tree (ob, c->type, true);
4098 stream_write_tree (ob, c->val, true);
4099 bp = bitpack_create (ob->main_stream);
4100 bp_pack_value (&bp, c->agg_contents, 1);
4101 bp_pack_value (&bp, c->by_ref, 1);
4102 streamer_write_bitpack (&bp);
4103 if (c->agg_contents)
4104 streamer_write_uhwi (ob, c->offset);
4105 streamer_write_uhwi (ob, vec_safe_length (c->param_ops));
4106 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
4107 {
4108 streamer_write_uhwi (ob, op->code);
4109 stream_write_tree (ob, op->type, true);
4110 if (op->val[0])
4111 {
4112 bp = bitpack_create (ob->main_stream);
4113 bp_pack_value (&bp, op->index, 2);
4114 streamer_write_bitpack (&bp);
4115 stream_write_tree (ob, op->val[0], true);
4116 if (op->val[1])
4117 stream_write_tree (ob, op->val[1], true);
4118 }
4119 }
4120 }
4121 streamer_write_uhwi (ob, vec_safe_length (info->size_time_table));
4122 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
4123 {
4124 streamer_write_uhwi (ob, e->size);
4125 e->time.stream_out (ob);
4126 e->exec_predicate.stream_out (ob);
4127 e->nonconst_predicate.stream_out (ob);
4128 }
4129 if (info->loop_iterations)
4130 info->loop_iterations->stream_out (ob);
4131 else
4132 streamer_write_uhwi (ob, 0);
4133 if (info->loop_stride)
4134 info->loop_stride->stream_out (ob);
4135 else
4136 streamer_write_uhwi (ob, 0);
4137 for (edge = cnode->callees; edge; edge = edge->next_callee)
4138 write_ipa_call_summary (ob, edge);
4139 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
4140 write_ipa_call_summary (ob, edge);
4141 }
4142 }
4143 streamer_write_char_stream (ob->main_stream, 0);
4144 produce_asm (ob, NULL);
4145 destroy_output_block (ob);
4146
4147 if (!flag_ipa_cp)
4148 ipa_prop_write_jump_functions ();
4149 }
4150
4151
4152 /* Release function summary. */
4153
4154 void
4155 ipa_free_fn_summary (void)
4156 {
4157 if (!ipa_call_summaries)
4158 return;
4159 ggc_delete (ipa_fn_summaries);
4160 ipa_fn_summaries = NULL;
4161 delete ipa_call_summaries;
4162 ipa_call_summaries = NULL;
4163 edge_predicate_pool.release ();
4164 /* During IPA this is one of largest datastructures to release. */
4165 if (flag_wpa)
4166 ggc_trim ();
4167 }
4168
4169 /* Release function summary. */
4170
4171 void
4172 ipa_free_size_summary (void)
4173 {
4174 if (!ipa_size_summaries)
4175 return;
4176 delete ipa_size_summaries;
4177 ipa_size_summaries = NULL;
4178 }
4179
4180 namespace {
4181
4182 const pass_data pass_data_local_fn_summary =
4183 {
4184 GIMPLE_PASS, /* type */
4185 "local-fnsummary", /* name */
4186 OPTGROUP_INLINE, /* optinfo_flags */
4187 TV_INLINE_PARAMETERS, /* tv_id */
4188 0, /* properties_required */
4189 0, /* properties_provided */
4190 0, /* properties_destroyed */
4191 0, /* todo_flags_start */
4192 0, /* todo_flags_finish */
4193 };
4194
4195 class pass_local_fn_summary : public gimple_opt_pass
4196 {
4197 public:
4198 pass_local_fn_summary (gcc::context *ctxt)
4199 : gimple_opt_pass (pass_data_local_fn_summary, ctxt)
4200 {}
4201
4202 /* opt_pass methods: */
4203 opt_pass * clone () { return new pass_local_fn_summary (m_ctxt); }
4204 virtual unsigned int execute (function *)
4205 {
4206 return compute_fn_summary_for_current ();
4207 }
4208
4209 }; // class pass_local_fn_summary
4210
4211 } // anon namespace
4212
4213 gimple_opt_pass *
4214 make_pass_local_fn_summary (gcc::context *ctxt)
4215 {
4216 return new pass_local_fn_summary (ctxt);
4217 }
4218
4219
4220 /* Free inline summary. */
4221
4222 namespace {
4223
4224 const pass_data pass_data_ipa_free_fn_summary =
4225 {
4226 SIMPLE_IPA_PASS, /* type */
4227 "free-fnsummary", /* name */
4228 OPTGROUP_NONE, /* optinfo_flags */
4229 TV_IPA_FREE_INLINE_SUMMARY, /* tv_id */
4230 0, /* properties_required */
4231 0, /* properties_provided */
4232 0, /* properties_destroyed */
4233 0, /* todo_flags_start */
4234 0, /* todo_flags_finish */
4235 };
4236
4237 class pass_ipa_free_fn_summary : public simple_ipa_opt_pass
4238 {
4239 public:
4240 pass_ipa_free_fn_summary (gcc::context *ctxt)
4241 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary, ctxt),
4242 small_p (false)
4243 {}
4244
4245 /* opt_pass methods: */
4246 opt_pass *clone () { return new pass_ipa_free_fn_summary (m_ctxt); }
4247 void set_pass_param (unsigned int n, bool param)
4248 {
4249 gcc_assert (n == 0);
4250 small_p = param;
4251 }
4252 virtual bool gate (function *) { return true; }
4253 virtual unsigned int execute (function *)
4254 {
4255 ipa_free_fn_summary ();
4256 if (!flag_wpa)
4257 ipa_free_size_summary ();
4258 return 0;
4259 }
4260
4261 private:
4262 bool small_p;
4263 }; // class pass_ipa_free_fn_summary
4264
4265 } // anon namespace
4266
4267 simple_ipa_opt_pass *
4268 make_pass_ipa_free_fn_summary (gcc::context *ctxt)
4269 {
4270 return new pass_ipa_free_fn_summary (ctxt);
4271 }
4272
4273 namespace {
4274
4275 const pass_data pass_data_ipa_fn_summary =
4276 {
4277 IPA_PASS, /* type */
4278 "fnsummary", /* name */
4279 OPTGROUP_INLINE, /* optinfo_flags */
4280 TV_IPA_FNSUMMARY, /* tv_id */
4281 0, /* properties_required */
4282 0, /* properties_provided */
4283 0, /* properties_destroyed */
4284 0, /* todo_flags_start */
4285 ( TODO_dump_symtab ), /* todo_flags_finish */
4286 };
4287
4288 class pass_ipa_fn_summary : public ipa_opt_pass_d
4289 {
4290 public:
4291 pass_ipa_fn_summary (gcc::context *ctxt)
4292 : ipa_opt_pass_d (pass_data_ipa_fn_summary, ctxt,
4293 ipa_fn_summary_generate, /* generate_summary */
4294 ipa_fn_summary_write, /* write_summary */
4295 ipa_fn_summary_read, /* read_summary */
4296 NULL, /* write_optimization_summary */
4297 NULL, /* read_optimization_summary */
4298 NULL, /* stmt_fixup */
4299 0, /* function_transform_todo_flags_start */
4300 NULL, /* function_transform */
4301 NULL) /* variable_transform */
4302 {}
4303
4304 /* opt_pass methods: */
4305 virtual unsigned int execute (function *) { return 0; }
4306
4307 }; // class pass_ipa_fn_summary
4308
4309 } // anon namespace
4310
4311 ipa_opt_pass_d *
4312 make_pass_ipa_fn_summary (gcc::context *ctxt)
4313 {
4314 return new pass_ipa_fn_summary (ctxt);
4315 }
4316
4317 /* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
4318 within the same process. For use by toplev::finalize. */
4319
4320 void
4321 ipa_fnsummary_c_finalize (void)
4322 {
4323 ipa_free_fn_summary ();
4324 }