Add an "early rematerialisation" pass
[gcc.git] / gcc / ipa-fnsummary.c
1 /* Function summary pass.
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Analysis of function bodies used by inter-procedural passes
22
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
26 - function frame size
27 For each call
28 - call statement size, time and how often the parameters change
29
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
34
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
37
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
44 we use predicates.
45
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
49
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
53
54 #include "config.h"
55 #include "system.h"
56 #include "coretypes.h"
57 #include "backend.h"
58 #include "tree.h"
59 #include "gimple.h"
60 #include "alloc-pool.h"
61 #include "tree-pass.h"
62 #include "ssa.h"
63 #include "tree-streamer.h"
64 #include "cgraph.h"
65 #include "diagnostic.h"
66 #include "fold-const.h"
67 #include "print-tree.h"
68 #include "tree-inline.h"
69 #include "gimple-pretty-print.h"
70 #include "params.h"
71 #include "cfganal.h"
72 #include "gimple-iterator.h"
73 #include "tree-cfg.h"
74 #include "tree-ssa-loop-niter.h"
75 #include "tree-ssa-loop.h"
76 #include "symbol-summary.h"
77 #include "ipa-prop.h"
78 #include "ipa-fnsummary.h"
79 #include "cfgloop.h"
80 #include "tree-scalar-evolution.h"
81 #include "ipa-utils.h"
82 #include "cfgexpand.h"
83 #include "gimplify.h"
84 #include "stringpool.h"
85 #include "attribs.h"
86
87 /* Summaries. */
88 function_summary <ipa_fn_summary *> *ipa_fn_summaries;
89 call_summary <ipa_call_summary *> *ipa_call_summaries;
90
91 /* Edge predicates goes here. */
92 static object_allocator<predicate> edge_predicate_pool ("edge predicates");
93
94
95 /* Dump IPA hints. */
96 void
97 ipa_dump_hints (FILE *f, ipa_hints hints)
98 {
99 if (!hints)
100 return;
101 fprintf (f, "IPA hints:");
102 if (hints & INLINE_HINT_indirect_call)
103 {
104 hints &= ~INLINE_HINT_indirect_call;
105 fprintf (f, " indirect_call");
106 }
107 if (hints & INLINE_HINT_loop_iterations)
108 {
109 hints &= ~INLINE_HINT_loop_iterations;
110 fprintf (f, " loop_iterations");
111 }
112 if (hints & INLINE_HINT_loop_stride)
113 {
114 hints &= ~INLINE_HINT_loop_stride;
115 fprintf (f, " loop_stride");
116 }
117 if (hints & INLINE_HINT_same_scc)
118 {
119 hints &= ~INLINE_HINT_same_scc;
120 fprintf (f, " same_scc");
121 }
122 if (hints & INLINE_HINT_in_scc)
123 {
124 hints &= ~INLINE_HINT_in_scc;
125 fprintf (f, " in_scc");
126 }
127 if (hints & INLINE_HINT_cross_module)
128 {
129 hints &= ~INLINE_HINT_cross_module;
130 fprintf (f, " cross_module");
131 }
132 if (hints & INLINE_HINT_declared_inline)
133 {
134 hints &= ~INLINE_HINT_declared_inline;
135 fprintf (f, " declared_inline");
136 }
137 if (hints & INLINE_HINT_array_index)
138 {
139 hints &= ~INLINE_HINT_array_index;
140 fprintf (f, " array_index");
141 }
142 if (hints & INLINE_HINT_known_hot)
143 {
144 hints &= ~INLINE_HINT_known_hot;
145 fprintf (f, " known_hot");
146 }
147 gcc_assert (!hints);
148 }
149
150
151 /* Record SIZE and TIME to SUMMARY.
152 The accounted code will be executed when EXEC_PRED is true.
153 When NONCONST_PRED is false the code will evaulate to constant and
154 will get optimized out in specialized clones of the function. */
155
156 void
157 ipa_fn_summary::account_size_time (int size, sreal time,
158 const predicate &exec_pred,
159 const predicate &nonconst_pred_in)
160 {
161 size_time_entry *e;
162 bool found = false;
163 int i;
164 predicate nonconst_pred;
165
166 if (exec_pred == false)
167 return;
168
169 nonconst_pred = nonconst_pred_in & exec_pred;
170
171 if (nonconst_pred == false)
172 return;
173
174 /* We need to create initial empty unconitional clause, but otherwie
175 we don't need to account empty times and sizes. */
176 if (!size && time == 0 && size_time_table)
177 return;
178
179 gcc_assert (time >= 0);
180
181 for (i = 0; vec_safe_iterate (size_time_table, i, &e); i++)
182 if (e->exec_predicate == exec_pred
183 && e->nonconst_predicate == nonconst_pred)
184 {
185 found = true;
186 break;
187 }
188 if (i == 256)
189 {
190 i = 0;
191 found = true;
192 e = &(*size_time_table)[0];
193 if (dump_file && (dump_flags & TDF_DETAILS))
194 fprintf (dump_file,
195 "\t\tReached limit on number of entries, "
196 "ignoring the predicate.");
197 }
198 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
199 {
200 fprintf (dump_file,
201 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
202 ((double) size) / ipa_fn_summary::size_scale,
203 (time.to_double ()), found ? "" : "new ");
204 exec_pred.dump (dump_file, conds, 0);
205 if (exec_pred != nonconst_pred)
206 {
207 fprintf (dump_file, " nonconst:");
208 nonconst_pred.dump (dump_file, conds);
209 }
210 else
211 fprintf (dump_file, "\n");
212 }
213 if (!found)
214 {
215 struct size_time_entry new_entry;
216 new_entry.size = size;
217 new_entry.time = time;
218 new_entry.exec_predicate = exec_pred;
219 new_entry.nonconst_predicate = nonconst_pred;
220 vec_safe_push (size_time_table, new_entry);
221 }
222 else
223 {
224 e->size += size;
225 e->time += time;
226 }
227 }
228
229 /* We proved E to be unreachable, redirect it to __bultin_unreachable. */
230
231 static struct cgraph_edge *
232 redirect_to_unreachable (struct cgraph_edge *e)
233 {
234 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
235 struct cgraph_node *target = cgraph_node::get_create
236 (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
237
238 if (e->speculative)
239 e = e->resolve_speculation (target->decl);
240 else if (!e->callee)
241 e->make_direct (target);
242 else
243 e->redirect_callee (target);
244 struct ipa_call_summary *es = ipa_call_summaries->get (e);
245 e->inline_failed = CIF_UNREACHABLE;
246 e->count = profile_count::zero ();
247 es->call_stmt_size = 0;
248 es->call_stmt_time = 0;
249 if (callee)
250 callee->remove_symbol_and_inline_clones ();
251 return e;
252 }
253
254 /* Set predicate for edge E. */
255
256 static void
257 edge_set_predicate (struct cgraph_edge *e, predicate *predicate)
258 {
259 /* If the edge is determined to be never executed, redirect it
260 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
261 be optimized out. */
262 if (predicate && *predicate == false
263 /* When handling speculative edges, we need to do the redirection
264 just once. Do it always on the direct edge, so we do not
265 attempt to resolve speculation while duplicating the edge. */
266 && (!e->speculative || e->callee))
267 e = redirect_to_unreachable (e);
268
269 struct ipa_call_summary *es = ipa_call_summaries->get (e);
270 if (predicate && *predicate != true)
271 {
272 if (!es->predicate)
273 es->predicate = edge_predicate_pool.allocate ();
274 *es->predicate = *predicate;
275 }
276 else
277 {
278 if (es->predicate)
279 edge_predicate_pool.remove (es->predicate);
280 es->predicate = NULL;
281 }
282 }
283
284 /* Set predicate for hint *P. */
285
286 static void
287 set_hint_predicate (predicate **p, predicate new_predicate)
288 {
289 if (new_predicate == false || new_predicate == true)
290 {
291 if (*p)
292 edge_predicate_pool.remove (*p);
293 *p = NULL;
294 }
295 else
296 {
297 if (!*p)
298 *p = edge_predicate_pool.allocate ();
299 **p = new_predicate;
300 }
301 }
302
303
304 /* Compute what conditions may or may not hold given invormation about
305 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
306 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
307 copy when called in a given context. It is a bitmask of conditions. Bit
308 0 means that condition is known to be false, while bit 1 means that condition
309 may or may not be true. These differs - for example NOT_INLINED condition
310 is always false in the second and also builtin_constant_p tests can not use
311 the fact that parameter is indeed a constant.
312
313 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
314 KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
315 Return clause of possible truths. When INLINE_P is true, assume that we are
316 inlining.
317
318 ERROR_MARK means compile time invariant. */
319
320 static void
321 evaluate_conditions_for_known_args (struct cgraph_node *node,
322 bool inline_p,
323 vec<tree> known_vals,
324 vec<ipa_agg_jump_function_p>
325 known_aggs,
326 clause_t *ret_clause,
327 clause_t *ret_nonspec_clause)
328 {
329 clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition;
330 clause_t nonspec_clause = 1 << predicate::not_inlined_condition;
331 struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
332 int i;
333 struct condition *c;
334
335 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
336 {
337 tree val;
338 tree res;
339
340 /* We allow call stmt to have fewer arguments than the callee function
341 (especially for K&R style programs). So bound check here (we assume
342 known_aggs vector, if non-NULL, has the same length as
343 known_vals). */
344 gcc_checking_assert (!known_aggs.exists ()
345 || (known_vals.length () == known_aggs.length ()));
346 if (c->operand_num >= (int) known_vals.length ())
347 {
348 clause |= 1 << (i + predicate::first_dynamic_condition);
349 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
350 continue;
351 }
352
353 if (c->agg_contents)
354 {
355 struct ipa_agg_jump_function *agg;
356
357 if (c->code == predicate::changed
358 && !c->by_ref
359 && (known_vals[c->operand_num] == error_mark_node))
360 continue;
361
362 if (known_aggs.exists ())
363 {
364 agg = known_aggs[c->operand_num];
365 val = ipa_find_agg_cst_for_param (agg, known_vals[c->operand_num],
366 c->offset, c->by_ref);
367 }
368 else
369 val = NULL_TREE;
370 }
371 else
372 {
373 val = known_vals[c->operand_num];
374 if (val == error_mark_node && c->code != predicate::changed)
375 val = NULL_TREE;
376 }
377
378 if (!val)
379 {
380 clause |= 1 << (i + predicate::first_dynamic_condition);
381 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
382 continue;
383 }
384 if (c->code == predicate::changed)
385 {
386 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
387 continue;
388 }
389
390 if (tree_to_shwi (TYPE_SIZE (TREE_TYPE (val))) != c->size)
391 {
392 clause |= 1 << (i + predicate::first_dynamic_condition);
393 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
394 continue;
395 }
396 if (c->code == predicate::is_not_constant)
397 {
398 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
399 continue;
400 }
401
402 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (c->val), val);
403 res = val
404 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
405 : NULL;
406
407 if (res && integer_zerop (res))
408 continue;
409
410 clause |= 1 << (i + predicate::first_dynamic_condition);
411 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
412 }
413 *ret_clause = clause;
414 if (ret_nonspec_clause)
415 *ret_nonspec_clause = nonspec_clause;
416 }
417
418
419 /* Work out what conditions might be true at invocation of E. */
420
421 void
422 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
423 clause_t *clause_ptr,
424 clause_t *nonspec_clause_ptr,
425 vec<tree> *known_vals_ptr,
426 vec<ipa_polymorphic_call_context>
427 *known_contexts_ptr,
428 vec<ipa_agg_jump_function_p> *known_aggs_ptr)
429 {
430 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
431 struct ipa_fn_summary *info = ipa_fn_summaries->get (callee);
432 vec<tree> known_vals = vNULL;
433 vec<ipa_agg_jump_function_p> known_aggs = vNULL;
434
435 if (clause_ptr)
436 *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition;
437 if (known_vals_ptr)
438 known_vals_ptr->create (0);
439 if (known_contexts_ptr)
440 known_contexts_ptr->create (0);
441
442 if (ipa_node_params_sum
443 && !e->call_stmt_cannot_inline_p
444 && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr))
445 {
446 struct ipa_node_params *caller_parms_info, *callee_pi;
447 struct ipa_edge_args *args = IPA_EDGE_REF (e);
448 struct ipa_call_summary *es = ipa_call_summaries->get (e);
449 int i, count = ipa_get_cs_argument_count (args);
450
451 if (e->caller->global.inlined_to)
452 caller_parms_info = IPA_NODE_REF (e->caller->global.inlined_to);
453 else
454 caller_parms_info = IPA_NODE_REF (e->caller);
455 callee_pi = IPA_NODE_REF (e->callee);
456
457 if (count && (info->conds || known_vals_ptr))
458 known_vals.safe_grow_cleared (count);
459 if (count && (info->conds || known_aggs_ptr))
460 known_aggs.safe_grow_cleared (count);
461 if (count && known_contexts_ptr)
462 known_contexts_ptr->safe_grow_cleared (count);
463
464 for (i = 0; i < count; i++)
465 {
466 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
467 tree cst = ipa_value_from_jfunc (caller_parms_info, jf,
468 ipa_get_type (callee_pi, i));
469
470 if (!cst && e->call_stmt
471 && i < (int)gimple_call_num_args (e->call_stmt))
472 {
473 cst = gimple_call_arg (e->call_stmt, i);
474 if (!is_gimple_min_invariant (cst))
475 cst = NULL;
476 }
477 if (cst)
478 {
479 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
480 if (known_vals.exists ())
481 known_vals[i] = cst;
482 }
483 else if (inline_p && !es->param[i].change_prob)
484 known_vals[i] = error_mark_node;
485
486 if (known_contexts_ptr)
487 (*known_contexts_ptr)[i]
488 = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
489 /* TODO: When IPA-CP starts propagating and merging aggregate jump
490 functions, use its knowledge of the caller too, just like the
491 scalar case above. */
492 known_aggs[i] = &jf->agg;
493 }
494 }
495 else if (e->call_stmt && !e->call_stmt_cannot_inline_p
496 && ((clause_ptr && info->conds) || known_vals_ptr))
497 {
498 int i, count = (int)gimple_call_num_args (e->call_stmt);
499
500 if (count && (info->conds || known_vals_ptr))
501 known_vals.safe_grow_cleared (count);
502 for (i = 0; i < count; i++)
503 {
504 tree cst = gimple_call_arg (e->call_stmt, i);
505 if (!is_gimple_min_invariant (cst))
506 cst = NULL;
507 if (cst)
508 known_vals[i] = cst;
509 }
510 }
511
512 evaluate_conditions_for_known_args (callee, inline_p,
513 known_vals, known_aggs, clause_ptr,
514 nonspec_clause_ptr);
515
516 if (known_vals_ptr)
517 *known_vals_ptr = known_vals;
518 else
519 known_vals.release ();
520
521 if (known_aggs_ptr)
522 *known_aggs_ptr = known_aggs;
523 else
524 known_aggs.release ();
525 }
526
527
528 /* Allocate the function summary. */
529
530 static void
531 ipa_fn_summary_alloc (void)
532 {
533 gcc_checking_assert (!ipa_fn_summaries);
534 ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
535 ipa_call_summaries = new ipa_call_summary_t (symtab, false);
536 }
537
538 /* We are called multiple time for given function; clear
539 data from previous run so they are not cumulated. */
540
541 void
542 ipa_call_summary::reset ()
543 {
544 call_stmt_size = call_stmt_time = 0;
545 is_return_callee_uncaptured = false;
546 if (predicate)
547 edge_predicate_pool.remove (predicate);
548 predicate = NULL;
549 param.release ();
550 }
551
552 /* We are called multiple time for given function; clear
553 data from previous run so they are not cumulated. */
554
555 void
556 ipa_fn_summary::reset (struct cgraph_node *node)
557 {
558 struct cgraph_edge *e;
559
560 self_size = 0;
561 estimated_stack_size = 0;
562 estimated_self_stack_size = 0;
563 stack_frame_offset = 0;
564 size = 0;
565 time = 0;
566 growth = 0;
567 scc_no = 0;
568 if (loop_iterations)
569 {
570 edge_predicate_pool.remove (loop_iterations);
571 loop_iterations = NULL;
572 }
573 if (loop_stride)
574 {
575 edge_predicate_pool.remove (loop_stride);
576 loop_stride = NULL;
577 }
578 if (array_index)
579 {
580 edge_predicate_pool.remove (array_index);
581 array_index = NULL;
582 }
583 vec_free (conds);
584 vec_free (size_time_table);
585 for (e = node->callees; e; e = e->next_callee)
586 ipa_call_summaries->get (e)->reset ();
587 for (e = node->indirect_calls; e; e = e->next_callee)
588 ipa_call_summaries->get (e)->reset ();
589 fp_expressions = false;
590 }
591
592 /* Hook that is called by cgraph.c when a node is removed. */
593
594 void
595 ipa_fn_summary_t::remove (cgraph_node *node, ipa_fn_summary *info)
596 {
597 info->reset (node);
598 }
599
600 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
601 Additionally care about allocating new memory slot for updated predicate
602 and set it to NULL when it becomes true or false (and thus uninteresting).
603 */
604
605 static void
606 remap_hint_predicate_after_duplication (predicate **p,
607 clause_t possible_truths)
608 {
609 predicate new_predicate;
610
611 if (!*p)
612 return;
613
614 new_predicate = (*p)->remap_after_duplication (possible_truths);
615 /* We do not want to free previous predicate; it is used by node origin. */
616 *p = NULL;
617 set_hint_predicate (p, new_predicate);
618 }
619
620
621 /* Hook that is called by cgraph.c when a node is duplicated. */
622 void
623 ipa_fn_summary_t::duplicate (cgraph_node *src,
624 cgraph_node *dst,
625 ipa_fn_summary *,
626 ipa_fn_summary *info)
627 {
628 memcpy (info, ipa_fn_summaries->get (src), sizeof (ipa_fn_summary));
629 /* TODO: as an optimization, we may avoid copying conditions
630 that are known to be false or true. */
631 info->conds = vec_safe_copy (info->conds);
632
633 /* When there are any replacements in the function body, see if we can figure
634 out that something was optimized out. */
635 if (ipa_node_params_sum && dst->clone.tree_map)
636 {
637 vec<size_time_entry, va_gc> *entry = info->size_time_table;
638 /* Use SRC parm info since it may not be copied yet. */
639 struct ipa_node_params *parms_info = IPA_NODE_REF (src);
640 vec<tree> known_vals = vNULL;
641 int count = ipa_get_param_count (parms_info);
642 int i, j;
643 clause_t possible_truths;
644 predicate true_pred = true;
645 size_time_entry *e;
646 int optimized_out_size = 0;
647 bool inlined_to_p = false;
648 struct cgraph_edge *edge, *next;
649
650 info->size_time_table = 0;
651 known_vals.safe_grow_cleared (count);
652 for (i = 0; i < count; i++)
653 {
654 struct ipa_replace_map *r;
655
656 for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++)
657 {
658 if (((!r->old_tree && r->parm_num == i)
659 || (r->old_tree && r->old_tree == ipa_get_param (parms_info, i)))
660 && r->replace_p && !r->ref_p)
661 {
662 known_vals[i] = r->new_tree;
663 break;
664 }
665 }
666 }
667 evaluate_conditions_for_known_args (dst, false,
668 known_vals,
669 vNULL,
670 &possible_truths,
671 /* We are going to specialize,
672 so ignore nonspec truths. */
673 NULL);
674 known_vals.release ();
675
676 info->account_size_time (0, 0, true_pred, true_pred);
677
678 /* Remap size_time vectors.
679 Simplify the predicate by prunning out alternatives that are known
680 to be false.
681 TODO: as on optimization, we can also eliminate conditions known
682 to be true. */
683 for (i = 0; vec_safe_iterate (entry, i, &e); i++)
684 {
685 predicate new_exec_pred;
686 predicate new_nonconst_pred;
687 new_exec_pred = e->exec_predicate.remap_after_duplication
688 (possible_truths);
689 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
690 (possible_truths);
691 if (new_exec_pred == false || new_nonconst_pred == false)
692 optimized_out_size += e->size;
693 else
694 info->account_size_time (e->size, e->time, new_exec_pred,
695 new_nonconst_pred);
696 }
697
698 /* Remap edge predicates with the same simplification as above.
699 Also copy constantness arrays. */
700 for (edge = dst->callees; edge; edge = next)
701 {
702 predicate new_predicate;
703 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
704 next = edge->next_callee;
705
706 if (!edge->inline_failed)
707 inlined_to_p = true;
708 if (!es->predicate)
709 continue;
710 new_predicate = es->predicate->remap_after_duplication
711 (possible_truths);
712 if (new_predicate == false && *es->predicate != false)
713 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
714 edge_set_predicate (edge, &new_predicate);
715 }
716
717 /* Remap indirect edge predicates with the same simplificaiton as above.
718 Also copy constantness arrays. */
719 for (edge = dst->indirect_calls; edge; edge = next)
720 {
721 predicate new_predicate;
722 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
723 next = edge->next_callee;
724
725 gcc_checking_assert (edge->inline_failed);
726 if (!es->predicate)
727 continue;
728 new_predicate = es->predicate->remap_after_duplication
729 (possible_truths);
730 if (new_predicate == false && *es->predicate != false)
731 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
732 edge_set_predicate (edge, &new_predicate);
733 }
734 remap_hint_predicate_after_duplication (&info->loop_iterations,
735 possible_truths);
736 remap_hint_predicate_after_duplication (&info->loop_stride,
737 possible_truths);
738 remap_hint_predicate_after_duplication (&info->array_index,
739 possible_truths);
740
741 /* If inliner or someone after inliner will ever start producing
742 non-trivial clones, we will get trouble with lack of information
743 about updating self sizes, because size vectors already contains
744 sizes of the calees. */
745 gcc_assert (!inlined_to_p || !optimized_out_size);
746 }
747 else
748 {
749 info->size_time_table = vec_safe_copy (info->size_time_table);
750 if (info->loop_iterations)
751 {
752 predicate p = *info->loop_iterations;
753 info->loop_iterations = NULL;
754 set_hint_predicate (&info->loop_iterations, p);
755 }
756 if (info->loop_stride)
757 {
758 predicate p = *info->loop_stride;
759 info->loop_stride = NULL;
760 set_hint_predicate (&info->loop_stride, p);
761 }
762 if (info->array_index)
763 {
764 predicate p = *info->array_index;
765 info->array_index = NULL;
766 set_hint_predicate (&info->array_index, p);
767 }
768 }
769 if (!dst->global.inlined_to)
770 ipa_update_overall_fn_summary (dst);
771 }
772
773
774 /* Hook that is called by cgraph.c when a node is duplicated. */
775
776 void
777 ipa_call_summary_t::duplicate (struct cgraph_edge *src,
778 struct cgraph_edge *dst,
779 struct ipa_call_summary *srcinfo,
780 struct ipa_call_summary *info)
781 {
782 *info = *srcinfo;
783 info->predicate = NULL;
784 edge_set_predicate (dst, srcinfo->predicate);
785 info->param = srcinfo->param.copy ();
786 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
787 {
788 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
789 - eni_size_weights.call_cost);
790 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
791 - eni_time_weights.call_cost);
792 }
793 }
794
795
796 /* Keep edge cache consistent across edge removal. */
797
798 void
799 ipa_call_summary_t::remove (struct cgraph_edge *,
800 struct ipa_call_summary *sum)
801 {
802 sum->reset ();
803 }
804
805
806 /* Dump edge summaries associated to NODE and recursively to all clones.
807 Indent by INDENT. */
808
809 static void
810 dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
811 struct ipa_fn_summary *info)
812 {
813 struct cgraph_edge *edge;
814 for (edge = node->callees; edge; edge = edge->next_callee)
815 {
816 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
817 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
818 int i;
819
820 fprintf (f,
821 "%*s%s/%i %s\n%*s loop depth:%2i freq:%4.2f size:%2i"
822 " time: %2i callee size:%2i stack:%2i",
823 indent, "", callee->name (), callee->order,
824 !edge->inline_failed
825 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
826 indent, "", es->loop_depth, edge->sreal_frequency ().to_double (),
827 es->call_stmt_size, es->call_stmt_time,
828 (int) ipa_fn_summaries->get (callee)->size / ipa_fn_summary::size_scale,
829 (int) ipa_fn_summaries->get (callee)->estimated_stack_size);
830
831 if (es->predicate)
832 {
833 fprintf (f, " predicate: ");
834 es->predicate->dump (f, info->conds);
835 }
836 else
837 fprintf (f, "\n");
838 if (es->param.exists ())
839 for (i = 0; i < (int) es->param.length (); i++)
840 {
841 int prob = es->param[i].change_prob;
842
843 if (!prob)
844 fprintf (f, "%*s op%i is compile time invariant\n",
845 indent + 2, "", i);
846 else if (prob != REG_BR_PROB_BASE)
847 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
848 prob * 100.0 / REG_BR_PROB_BASE);
849 }
850 if (!edge->inline_failed)
851 {
852 fprintf (f, "%*sStack frame offset %i, callee self size %i,"
853 " callee size %i\n",
854 indent + 2, "",
855 (int) ipa_fn_summaries->get (callee)->stack_frame_offset,
856 (int) ipa_fn_summaries->get (callee)->estimated_self_stack_size,
857 (int) ipa_fn_summaries->get (callee)->estimated_stack_size);
858 dump_ipa_call_summary (f, indent + 2, callee, info);
859 }
860 }
861 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
862 {
863 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
864 fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
865 " time: %2i",
866 indent, "",
867 es->loop_depth,
868 edge->sreal_frequency ().to_double (), es->call_stmt_size,
869 es->call_stmt_time);
870 if (es->predicate)
871 {
872 fprintf (f, "predicate: ");
873 es->predicate->dump (f, info->conds);
874 }
875 else
876 fprintf (f, "\n");
877 }
878 }
879
880
881 void
882 ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
883 {
884 if (node->definition)
885 {
886 struct ipa_fn_summary *s = ipa_fn_summaries->get (node);
887 size_time_entry *e;
888 int i;
889 fprintf (f, "IPA function summary for %s/%i", node->name (),
890 node->order);
891 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
892 fprintf (f, " always_inline");
893 if (s->inlinable)
894 fprintf (f, " inlinable");
895 if (s->fp_expressions)
896 fprintf (f, " fp_expression");
897 fprintf (f, "\n global time: %f\n", s->time.to_double ());
898 fprintf (f, " self size: %i\n", s->self_size);
899 fprintf (f, " global size: %i\n", s->size);
900 fprintf (f, " min size: %i\n", s->min_size);
901 fprintf (f, " self stack: %i\n",
902 (int) s->estimated_self_stack_size);
903 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
904 if (s->growth)
905 fprintf (f, " estimated growth:%i\n", (int) s->growth);
906 if (s->scc_no)
907 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
908 for (i = 0; vec_safe_iterate (s->size_time_table, i, &e); i++)
909 {
910 fprintf (f, " size:%f, time:%f",
911 (double) e->size / ipa_fn_summary::size_scale,
912 e->time.to_double ());
913 if (e->exec_predicate != true)
914 {
915 fprintf (f, ", executed if:");
916 e->exec_predicate.dump (f, s->conds, 0);
917 }
918 if (e->exec_predicate != e->nonconst_predicate)
919 {
920 fprintf (f, ", nonconst if:");
921 e->nonconst_predicate.dump (f, s->conds, 0);
922 }
923 fprintf (f, "\n");
924 }
925 if (s->loop_iterations)
926 {
927 fprintf (f, " loop iterations:");
928 s->loop_iterations->dump (f, s->conds);
929 }
930 if (s->loop_stride)
931 {
932 fprintf (f, " loop stride:");
933 s->loop_stride->dump (f, s->conds);
934 }
935 if (s->array_index)
936 {
937 fprintf (f, " array index:");
938 s->array_index->dump (f, s->conds);
939 }
940 fprintf (f, " calls:\n");
941 dump_ipa_call_summary (f, 4, node, s);
942 fprintf (f, "\n");
943 }
944 }
945
946 DEBUG_FUNCTION void
947 ipa_debug_fn_summary (struct cgraph_node *node)
948 {
949 ipa_dump_fn_summary (stderr, node);
950 }
951
952 void
953 ipa_dump_fn_summaries (FILE *f)
954 {
955 struct cgraph_node *node;
956
957 FOR_EACH_DEFINED_FUNCTION (node)
958 if (!node->global.inlined_to)
959 ipa_dump_fn_summary (f, node);
960 }
961
962 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
963 boolean variable pointed to by DATA. */
964
965 static bool
966 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
967 void *data)
968 {
969 bool *b = (bool *) data;
970 *b = true;
971 return true;
972 }
973
974 /* If OP refers to value of function parameter, return the corresponding
975 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
976 PARM_DECL) will be stored to *SIZE_P in that case too. */
977
978 static tree
979 unmodified_parm_1 (gimple *stmt, tree op, HOST_WIDE_INT *size_p)
980 {
981 /* SSA_NAME referring to parm default def? */
982 if (TREE_CODE (op) == SSA_NAME
983 && SSA_NAME_IS_DEFAULT_DEF (op)
984 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
985 {
986 if (size_p)
987 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
988 return SSA_NAME_VAR (op);
989 }
990 /* Non-SSA parm reference? */
991 if (TREE_CODE (op) == PARM_DECL)
992 {
993 bool modified = false;
994
995 ao_ref refd;
996 ao_ref_init (&refd, op);
997 walk_aliased_vdefs (&refd, gimple_vuse (stmt), mark_modified, &modified,
998 NULL);
999 if (!modified)
1000 {
1001 if (size_p)
1002 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
1003 return op;
1004 }
1005 }
1006 return NULL_TREE;
1007 }
1008
1009 /* If OP refers to value of function parameter, return the corresponding
1010 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1011 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1012 stored to *SIZE_P in that case too. */
1013
1014 static tree
1015 unmodified_parm (gimple *stmt, tree op, HOST_WIDE_INT *size_p)
1016 {
1017 tree res = unmodified_parm_1 (stmt, op, size_p);
1018 if (res)
1019 return res;
1020
1021 if (TREE_CODE (op) == SSA_NAME
1022 && !SSA_NAME_IS_DEFAULT_DEF (op)
1023 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1024 return unmodified_parm (SSA_NAME_DEF_STMT (op),
1025 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
1026 size_p);
1027 return NULL_TREE;
1028 }
1029
1030 /* If OP refers to a value of a function parameter or value loaded from an
1031 aggregate passed to a parameter (either by value or reference), return TRUE
1032 and store the number of the parameter to *INDEX_P, the access size into
1033 *SIZE_P, and information whether and how it has been loaded from an
1034 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1035 statement in which OP is used or loaded. */
1036
1037 static bool
1038 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1039 gimple *stmt, tree op, int *index_p,
1040 HOST_WIDE_INT *size_p,
1041 struct agg_position_info *aggpos)
1042 {
1043 tree res = unmodified_parm_1 (stmt, op, size_p);
1044
1045 gcc_checking_assert (aggpos);
1046 if (res)
1047 {
1048 *index_p = ipa_get_param_decl_index (fbi->info, res);
1049 if (*index_p < 0)
1050 return false;
1051 aggpos->agg_contents = false;
1052 aggpos->by_ref = false;
1053 return true;
1054 }
1055
1056 if (TREE_CODE (op) == SSA_NAME)
1057 {
1058 if (SSA_NAME_IS_DEFAULT_DEF (op)
1059 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1060 return false;
1061 stmt = SSA_NAME_DEF_STMT (op);
1062 op = gimple_assign_rhs1 (stmt);
1063 if (!REFERENCE_CLASS_P (op))
1064 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1065 aggpos);
1066 }
1067
1068 aggpos->agg_contents = true;
1069 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1070 stmt, op, index_p, &aggpos->offset,
1071 size_p, &aggpos->by_ref);
1072 }
1073
1074 /* See if statement might disappear after inlining.
1075 0 - means not eliminated
1076 1 - half of statements goes away
1077 2 - for sure it is eliminated.
1078 We are not terribly sophisticated, basically looking for simple abstraction
1079 penalty wrappers. */
1080
1081 static int
1082 eliminated_by_inlining_prob (gimple *stmt)
1083 {
1084 enum gimple_code code = gimple_code (stmt);
1085 enum tree_code rhs_code;
1086
1087 if (!optimize)
1088 return 0;
1089
1090 switch (code)
1091 {
1092 case GIMPLE_RETURN:
1093 return 2;
1094 case GIMPLE_ASSIGN:
1095 if (gimple_num_ops (stmt) != 2)
1096 return 0;
1097
1098 rhs_code = gimple_assign_rhs_code (stmt);
1099
1100 /* Casts of parameters, loads from parameters passed by reference
1101 and stores to return value or parameters are often free after
1102 inlining dua to SRA and further combining.
1103 Assume that half of statements goes away. */
1104 if (CONVERT_EXPR_CODE_P (rhs_code)
1105 || rhs_code == VIEW_CONVERT_EXPR
1106 || rhs_code == ADDR_EXPR
1107 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1108 {
1109 tree rhs = gimple_assign_rhs1 (stmt);
1110 tree lhs = gimple_assign_lhs (stmt);
1111 tree inner_rhs = get_base_address (rhs);
1112 tree inner_lhs = get_base_address (lhs);
1113 bool rhs_free = false;
1114 bool lhs_free = false;
1115
1116 if (!inner_rhs)
1117 inner_rhs = rhs;
1118 if (!inner_lhs)
1119 inner_lhs = lhs;
1120
1121 /* Reads of parameter are expected to be free. */
1122 if (unmodified_parm (stmt, inner_rhs, NULL))
1123 rhs_free = true;
1124 /* Match expressions of form &this->field. Those will most likely
1125 combine with something upstream after inlining. */
1126 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1127 {
1128 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1129 if (TREE_CODE (op) == PARM_DECL)
1130 rhs_free = true;
1131 else if (TREE_CODE (op) == MEM_REF
1132 && unmodified_parm (stmt, TREE_OPERAND (op, 0), NULL))
1133 rhs_free = true;
1134 }
1135
1136 /* When parameter is not SSA register because its address is taken
1137 and it is just copied into one, the statement will be completely
1138 free after inlining (we will copy propagate backward). */
1139 if (rhs_free && is_gimple_reg (lhs))
1140 return 2;
1141
1142 /* Reads of parameters passed by reference
1143 expected to be free (i.e. optimized out after inlining). */
1144 if (TREE_CODE (inner_rhs) == MEM_REF
1145 && unmodified_parm (stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1146 rhs_free = true;
1147
1148 /* Copying parameter passed by reference into gimple register is
1149 probably also going to copy propagate, but we can't be quite
1150 sure. */
1151 if (rhs_free && is_gimple_reg (lhs))
1152 lhs_free = true;
1153
1154 /* Writes to parameters, parameters passed by value and return value
1155 (either dirrectly or passed via invisible reference) are free.
1156
1157 TODO: We ought to handle testcase like
1158 struct a {int a,b;};
1159 struct a
1160 retrurnsturct (void)
1161 {
1162 struct a a ={1,2};
1163 return a;
1164 }
1165
1166 This translate into:
1167
1168 retrurnsturct ()
1169 {
1170 int a$b;
1171 int a$a;
1172 struct a a;
1173 struct a D.2739;
1174
1175 <bb 2>:
1176 D.2739.a = 1;
1177 D.2739.b = 2;
1178 return D.2739;
1179
1180 }
1181 For that we either need to copy ipa-split logic detecting writes
1182 to return value. */
1183 if (TREE_CODE (inner_lhs) == PARM_DECL
1184 || TREE_CODE (inner_lhs) == RESULT_DECL
1185 || (TREE_CODE (inner_lhs) == MEM_REF
1186 && (unmodified_parm (stmt, TREE_OPERAND (inner_lhs, 0), NULL)
1187 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1188 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1189 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1190 (inner_lhs,
1191 0))) == RESULT_DECL))))
1192 lhs_free = true;
1193 if (lhs_free
1194 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1195 rhs_free = true;
1196 if (lhs_free && rhs_free)
1197 return 1;
1198 }
1199 return 0;
1200 default:
1201 return 0;
1202 }
1203 }
1204
1205
1206 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1207 predicates to the CFG edges. */
1208
1209 static void
1210 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1211 struct ipa_fn_summary *summary,
1212 basic_block bb)
1213 {
1214 gimple *last;
1215 tree op;
1216 int index;
1217 HOST_WIDE_INT size;
1218 struct agg_position_info aggpos;
1219 enum tree_code code, inverted_code;
1220 edge e;
1221 edge_iterator ei;
1222 gimple *set_stmt;
1223 tree op2;
1224
1225 last = last_stmt (bb);
1226 if (!last || gimple_code (last) != GIMPLE_COND)
1227 return;
1228 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1229 return;
1230 op = gimple_cond_lhs (last);
1231 /* TODO: handle conditionals like
1232 var = op0 < 4;
1233 if (var != 0). */
1234 if (unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
1235 {
1236 code = gimple_cond_code (last);
1237 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1238
1239 FOR_EACH_EDGE (e, ei, bb->succs)
1240 {
1241 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1242 ? code : inverted_code);
1243 /* invert_tree_comparison will return ERROR_MARK on FP
1244 comparsions that are not EQ/NE instead of returning proper
1245 unordered one. Be sure it is not confused with NON_CONSTANT. */
1246 if (this_code != ERROR_MARK)
1247 {
1248 predicate p
1249 = add_condition (summary, index, size, &aggpos, this_code,
1250 unshare_expr_without_location
1251 (gimple_cond_rhs (last)));
1252 e->aux = edge_predicate_pool.allocate ();
1253 *(predicate *) e->aux = p;
1254 }
1255 }
1256 }
1257
1258 if (TREE_CODE (op) != SSA_NAME)
1259 return;
1260 /* Special case
1261 if (builtin_constant_p (op))
1262 constant_code
1263 else
1264 nonconstant_code.
1265 Here we can predicate nonconstant_code. We can't
1266 really handle constant_code since we have no predicate
1267 for this and also the constant code is not known to be
1268 optimized away when inliner doen't see operand is constant.
1269 Other optimizers might think otherwise. */
1270 if (gimple_cond_code (last) != NE_EXPR
1271 || !integer_zerop (gimple_cond_rhs (last)))
1272 return;
1273 set_stmt = SSA_NAME_DEF_STMT (op);
1274 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1275 || gimple_call_num_args (set_stmt) != 1)
1276 return;
1277 op2 = gimple_call_arg (set_stmt, 0);
1278 if (!unmodified_parm_or_parm_agg_item (fbi, set_stmt, op2, &index, &size,
1279 &aggpos))
1280 return;
1281 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1282 {
1283 predicate p = add_condition (summary, index, size, &aggpos,
1284 predicate::is_not_constant, NULL_TREE);
1285 e->aux = edge_predicate_pool.allocate ();
1286 *(predicate *) e->aux = p;
1287 }
1288 }
1289
1290
1291 /* If BB ends by a switch we can turn into predicates, attach corresponding
1292 predicates to the CFG edges. */
1293
1294 static void
1295 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1296 struct ipa_fn_summary *summary,
1297 basic_block bb)
1298 {
1299 gimple *lastg;
1300 tree op;
1301 int index;
1302 HOST_WIDE_INT size;
1303 struct agg_position_info aggpos;
1304 edge e;
1305 edge_iterator ei;
1306 size_t n;
1307 size_t case_idx;
1308
1309 lastg = last_stmt (bb);
1310 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
1311 return;
1312 gswitch *last = as_a <gswitch *> (lastg);
1313 op = gimple_switch_index (last);
1314 if (!unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
1315 return;
1316
1317 FOR_EACH_EDGE (e, ei, bb->succs)
1318 {
1319 e->aux = edge_predicate_pool.allocate ();
1320 *(predicate *) e->aux = false;
1321 }
1322 n = gimple_switch_num_labels (last);
1323 for (case_idx = 0; case_idx < n; ++case_idx)
1324 {
1325 tree cl = gimple_switch_label (last, case_idx);
1326 tree min, max;
1327 predicate p;
1328
1329 e = find_edge (bb, label_to_block (CASE_LABEL (cl)));
1330 min = CASE_LOW (cl);
1331 max = CASE_HIGH (cl);
1332
1333 /* For default we might want to construct predicate that none
1334 of cases is met, but it is bit hard to do not having negations
1335 of conditionals handy. */
1336 if (!min && !max)
1337 p = true;
1338 else if (!max)
1339 p = add_condition (summary, index, size, &aggpos, EQ_EXPR,
1340 unshare_expr_without_location (min));
1341 else
1342 {
1343 predicate p1, p2;
1344 p1 = add_condition (summary, index, size, &aggpos, GE_EXPR,
1345 unshare_expr_without_location (min));
1346 p2 = add_condition (summary, index, size, &aggpos, LE_EXPR,
1347 unshare_expr_without_location (max));
1348 p = p1 & p2;
1349 }
1350 *(struct predicate *) e->aux
1351 = p.or_with (summary->conds, *(struct predicate *) e->aux);
1352 }
1353 }
1354
1355
1356 /* For each BB in NODE attach to its AUX pointer predicate under
1357 which it is executable. */
1358
1359 static void
1360 compute_bb_predicates (struct ipa_func_body_info *fbi,
1361 struct cgraph_node *node,
1362 struct ipa_fn_summary *summary)
1363 {
1364 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1365 bool done = false;
1366 basic_block bb;
1367
1368 FOR_EACH_BB_FN (bb, my_function)
1369 {
1370 set_cond_stmt_execution_predicate (fbi, summary, bb);
1371 set_switch_stmt_execution_predicate (fbi, summary, bb);
1372 }
1373
1374 /* Entry block is always executable. */
1375 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1376 = edge_predicate_pool.allocate ();
1377 *(predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
1378
1379 /* A simple dataflow propagation of predicates forward in the CFG.
1380 TODO: work in reverse postorder. */
1381 while (!done)
1382 {
1383 done = true;
1384 FOR_EACH_BB_FN (bb, my_function)
1385 {
1386 predicate p = false;
1387 edge e;
1388 edge_iterator ei;
1389 FOR_EACH_EDGE (e, ei, bb->preds)
1390 {
1391 if (e->src->aux)
1392 {
1393 predicate this_bb_predicate
1394 = *(predicate *) e->src->aux;
1395 if (e->aux)
1396 this_bb_predicate &= (*(struct predicate *) e->aux);
1397 p = p.or_with (summary->conds, this_bb_predicate);
1398 if (p == true)
1399 break;
1400 }
1401 }
1402 if (p == false)
1403 gcc_checking_assert (!bb->aux);
1404 else
1405 {
1406 if (!bb->aux)
1407 {
1408 done = false;
1409 bb->aux = edge_predicate_pool.allocate ();
1410 *((predicate *) bb->aux) = p;
1411 }
1412 else if (p != *(predicate *) bb->aux)
1413 {
1414 /* This OR operation is needed to ensure monotonous data flow
1415 in the case we hit the limit on number of clauses and the
1416 and/or operations above give approximate answers. */
1417 p = p.or_with (summary->conds, *(predicate *)bb->aux);
1418 if (p != *(predicate *) bb->aux)
1419 {
1420 done = false;
1421 *((predicate *) bb->aux) = p;
1422 }
1423 }
1424 }
1425 }
1426 }
1427 }
1428
1429
1430 /* Return predicate specifying when the STMT might have result that is not
1431 a compile time constant. */
1432
1433 static predicate
1434 will_be_nonconstant_expr_predicate (struct ipa_node_params *info,
1435 struct ipa_fn_summary *summary,
1436 tree expr,
1437 vec<predicate> nonconstant_names)
1438 {
1439 tree parm;
1440 int index;
1441 HOST_WIDE_INT size;
1442
1443 while (UNARY_CLASS_P (expr))
1444 expr = TREE_OPERAND (expr, 0);
1445
1446 parm = unmodified_parm (NULL, expr, &size);
1447 if (parm && (index = ipa_get_param_decl_index (info, parm)) >= 0)
1448 return add_condition (summary, index, size, NULL, predicate::changed,
1449 NULL_TREE);
1450 if (is_gimple_min_invariant (expr))
1451 return false;
1452 if (TREE_CODE (expr) == SSA_NAME)
1453 return nonconstant_names[SSA_NAME_VERSION (expr)];
1454 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
1455 {
1456 predicate p1 = will_be_nonconstant_expr_predicate
1457 (info, summary, TREE_OPERAND (expr, 0),
1458 nonconstant_names);
1459 if (p1 == true)
1460 return p1;
1461
1462 predicate p2;
1463 p2 = will_be_nonconstant_expr_predicate (info, summary,
1464 TREE_OPERAND (expr, 1),
1465 nonconstant_names);
1466 return p1.or_with (summary->conds, p2);
1467 }
1468 else if (TREE_CODE (expr) == COND_EXPR)
1469 {
1470 predicate p1 = will_be_nonconstant_expr_predicate
1471 (info, summary, TREE_OPERAND (expr, 0),
1472 nonconstant_names);
1473 if (p1 == true)
1474 return p1;
1475
1476 predicate p2;
1477 p2 = will_be_nonconstant_expr_predicate (info, summary,
1478 TREE_OPERAND (expr, 1),
1479 nonconstant_names);
1480 if (p2 == true)
1481 return p2;
1482 p1 = p1.or_with (summary->conds, p2);
1483 p2 = will_be_nonconstant_expr_predicate (info, summary,
1484 TREE_OPERAND (expr, 2),
1485 nonconstant_names);
1486 return p2.or_with (summary->conds, p1);
1487 }
1488 else
1489 {
1490 debug_tree (expr);
1491 gcc_unreachable ();
1492 }
1493 return false;
1494 }
1495
1496
1497 /* Return predicate specifying when the STMT might have result that is not
1498 a compile time constant. */
1499
1500 static predicate
1501 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
1502 struct ipa_fn_summary *summary,
1503 gimple *stmt,
1504 vec<predicate> nonconstant_names)
1505 {
1506 predicate p = true;
1507 ssa_op_iter iter;
1508 tree use;
1509 predicate op_non_const;
1510 bool is_load;
1511 int base_index;
1512 HOST_WIDE_INT size;
1513 struct agg_position_info aggpos;
1514
1515 /* What statments might be optimized away
1516 when their arguments are constant. */
1517 if (gimple_code (stmt) != GIMPLE_ASSIGN
1518 && gimple_code (stmt) != GIMPLE_COND
1519 && gimple_code (stmt) != GIMPLE_SWITCH
1520 && (gimple_code (stmt) != GIMPLE_CALL
1521 || !(gimple_call_flags (stmt) & ECF_CONST)))
1522 return p;
1523
1524 /* Stores will stay anyway. */
1525 if (gimple_store_p (stmt))
1526 return p;
1527
1528 is_load = gimple_assign_load_p (stmt);
1529
1530 /* Loads can be optimized when the value is known. */
1531 if (is_load)
1532 {
1533 tree op;
1534 gcc_assert (gimple_assign_single_p (stmt));
1535 op = gimple_assign_rhs1 (stmt);
1536 if (!unmodified_parm_or_parm_agg_item (fbi, stmt, op, &base_index, &size,
1537 &aggpos))
1538 return p;
1539 }
1540 else
1541 base_index = -1;
1542
1543 /* See if we understand all operands before we start
1544 adding conditionals. */
1545 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1546 {
1547 tree parm = unmodified_parm (stmt, use, NULL);
1548 /* For arguments we can build a condition. */
1549 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
1550 continue;
1551 if (TREE_CODE (use) != SSA_NAME)
1552 return p;
1553 /* If we know when operand is constant,
1554 we still can say something useful. */
1555 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
1556 continue;
1557 return p;
1558 }
1559
1560 if (is_load)
1561 op_non_const =
1562 add_condition (summary, base_index, size, &aggpos, predicate::changed,
1563 NULL);
1564 else
1565 op_non_const = false;
1566 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1567 {
1568 HOST_WIDE_INT size;
1569 tree parm = unmodified_parm (stmt, use, &size);
1570 int index;
1571
1572 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1573 {
1574 if (index != base_index)
1575 p = add_condition (summary, index, size, NULL, predicate::changed,
1576 NULL_TREE);
1577 else
1578 continue;
1579 }
1580 else
1581 p = nonconstant_names[SSA_NAME_VERSION (use)];
1582 op_non_const = p.or_with (summary->conds, op_non_const);
1583 }
1584 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
1585 && gimple_op (stmt, 0)
1586 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1587 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
1588 = op_non_const;
1589 return op_non_const;
1590 }
1591
1592 struct record_modified_bb_info
1593 {
1594 tree op;
1595 bitmap bb_set;
1596 gimple *stmt;
1597 };
1598
1599 /* Value is initialized in INIT_BB and used in USE_BB. We want to copute
1600 probability how often it changes between USE_BB.
1601 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
1602 is in different loop nest, we can do better.
1603 This is all just estimate. In theory we look for minimal cut separating
1604 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
1605 anyway. */
1606
1607 static basic_block
1608 get_minimal_bb (basic_block init_bb, basic_block use_bb)
1609 {
1610 struct loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
1611 if (l && l->header->count < init_bb->count)
1612 return l->header;
1613 return init_bb;
1614 }
1615
1616 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
1617 set except for info->stmt. */
1618
1619 static bool
1620 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
1621 {
1622 struct record_modified_bb_info *info =
1623 (struct record_modified_bb_info *) data;
1624 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
1625 return false;
1626 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef)))
1627 return false;
1628 bitmap_set_bit (info->bb_set,
1629 SSA_NAME_IS_DEFAULT_DEF (vdef)
1630 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
1631 : get_minimal_bb
1632 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
1633 gimple_bb (info->stmt))->index);
1634 if (dump_file)
1635 {
1636 fprintf (dump_file, " Param ");
1637 print_generic_expr (dump_file, info->op, TDF_SLIM);
1638 fprintf (dump_file, " changed at bb %i, minimal: %i stmt: ",
1639 gimple_bb (SSA_NAME_DEF_STMT (vdef))->index,
1640 get_minimal_bb
1641 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
1642 gimple_bb (info->stmt))->index);
1643 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (vdef), 0);
1644 }
1645 return false;
1646 }
1647
1648 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
1649 will change since last invocation of STMT.
1650
1651 Value 0 is reserved for compile time invariants.
1652 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
1653 ought to be REG_BR_PROB_BASE / estimated_iters. */
1654
1655 static int
1656 param_change_prob (gimple *stmt, int i)
1657 {
1658 tree op = gimple_call_arg (stmt, i);
1659 basic_block bb = gimple_bb (stmt);
1660
1661 if (TREE_CODE (op) == WITH_SIZE_EXPR)
1662 op = TREE_OPERAND (op, 0);
1663
1664 tree base = get_base_address (op);
1665
1666 /* Global invariants never change. */
1667 if (is_gimple_min_invariant (base))
1668 return 0;
1669
1670 /* We would have to do non-trivial analysis to really work out what
1671 is the probability of value to change (i.e. when init statement
1672 is in a sibling loop of the call).
1673
1674 We do an conservative estimate: when call is executed N times more often
1675 than the statement defining value, we take the frequency 1/N. */
1676 if (TREE_CODE (base) == SSA_NAME)
1677 {
1678 profile_count init_count;
1679
1680 if (!bb->count.nonzero_p ())
1681 return REG_BR_PROB_BASE;
1682
1683 if (SSA_NAME_IS_DEFAULT_DEF (base))
1684 init_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1685 else
1686 init_count = get_minimal_bb
1687 (gimple_bb (SSA_NAME_DEF_STMT (base)),
1688 gimple_bb (stmt))->count;
1689
1690 if (init_count < bb->count)
1691 return MAX ((init_count.to_sreal_scale (bb->count)
1692 * REG_BR_PROB_BASE).to_int (), 1);
1693 return REG_BR_PROB_BASE;
1694 }
1695 else
1696 {
1697 ao_ref refd;
1698 profile_count max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1699 struct record_modified_bb_info info;
1700 tree init = ctor_for_folding (base);
1701
1702 if (init != error_mark_node)
1703 return 0;
1704 if (!bb->count.nonzero_p ())
1705 return REG_BR_PROB_BASE;
1706 if (dump_file)
1707 {
1708 fprintf (dump_file, " Analyzing param change probablity of ");
1709 print_generic_expr (dump_file, op, TDF_SLIM);
1710 fprintf (dump_file, "\n");
1711 }
1712 ao_ref_init (&refd, op);
1713 info.op = op;
1714 info.stmt = stmt;
1715 info.bb_set = BITMAP_ALLOC (NULL);
1716 walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
1717 NULL);
1718 if (bitmap_bit_p (info.bb_set, bb->index))
1719 {
1720 if (dump_file)
1721 fprintf (dump_file, " Set in same BB as used.\n");
1722 BITMAP_FREE (info.bb_set);
1723 return REG_BR_PROB_BASE;
1724 }
1725
1726 bitmap_iterator bi;
1727 unsigned index;
1728 /* Lookup the most frequent update of the value and believe that
1729 it dominates all the other; precise analysis here is difficult. */
1730 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
1731 max = max.max (BASIC_BLOCK_FOR_FN (cfun, index)->count);
1732 if (dump_file)
1733 {
1734 fprintf (dump_file, " Set with count ");
1735 max.dump (dump_file);
1736 fprintf (dump_file, " and used with count ");
1737 bb->count.dump (dump_file);
1738 fprintf (dump_file, " freq %f\n",
1739 max.to_sreal_scale (bb->count).to_double ());
1740 }
1741
1742 BITMAP_FREE (info.bb_set);
1743 if (max < bb->count)
1744 return MAX ((max.to_sreal_scale (bb->count)
1745 * REG_BR_PROB_BASE).to_int (), 1);
1746 return REG_BR_PROB_BASE;
1747 }
1748 }
1749
1750 /* Find whether a basic block BB is the final block of a (half) diamond CFG
1751 sub-graph and if the predicate the condition depends on is known. If so,
1752 return true and store the pointer the predicate in *P. */
1753
1754 static bool
1755 phi_result_unknown_predicate (struct ipa_node_params *info,
1756 ipa_fn_summary *summary, basic_block bb,
1757 predicate *p,
1758 vec<predicate> nonconstant_names)
1759 {
1760 edge e;
1761 edge_iterator ei;
1762 basic_block first_bb = NULL;
1763 gimple *stmt;
1764
1765 if (single_pred_p (bb))
1766 {
1767 *p = false;
1768 return true;
1769 }
1770
1771 FOR_EACH_EDGE (e, ei, bb->preds)
1772 {
1773 if (single_succ_p (e->src))
1774 {
1775 if (!single_pred_p (e->src))
1776 return false;
1777 if (!first_bb)
1778 first_bb = single_pred (e->src);
1779 else if (single_pred (e->src) != first_bb)
1780 return false;
1781 }
1782 else
1783 {
1784 if (!first_bb)
1785 first_bb = e->src;
1786 else if (e->src != first_bb)
1787 return false;
1788 }
1789 }
1790
1791 if (!first_bb)
1792 return false;
1793
1794 stmt = last_stmt (first_bb);
1795 if (!stmt
1796 || gimple_code (stmt) != GIMPLE_COND
1797 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
1798 return false;
1799
1800 *p = will_be_nonconstant_expr_predicate (info, summary,
1801 gimple_cond_lhs (stmt),
1802 nonconstant_names);
1803 if (*p == true)
1804 return false;
1805 else
1806 return true;
1807 }
1808
1809 /* Given a PHI statement in a function described by inline properties SUMMARY
1810 and *P being the predicate describing whether the selected PHI argument is
1811 known, store a predicate for the result of the PHI statement into
1812 NONCONSTANT_NAMES, if possible. */
1813
1814 static void
1815 predicate_for_phi_result (struct ipa_fn_summary *summary, gphi *phi,
1816 predicate *p,
1817 vec<predicate> nonconstant_names)
1818 {
1819 unsigned i;
1820
1821 for (i = 0; i < gimple_phi_num_args (phi); i++)
1822 {
1823 tree arg = gimple_phi_arg (phi, i)->def;
1824 if (!is_gimple_min_invariant (arg))
1825 {
1826 gcc_assert (TREE_CODE (arg) == SSA_NAME);
1827 *p = p->or_with (summary->conds,
1828 nonconstant_names[SSA_NAME_VERSION (arg)]);
1829 if (*p == true)
1830 return;
1831 }
1832 }
1833
1834 if (dump_file && (dump_flags & TDF_DETAILS))
1835 {
1836 fprintf (dump_file, "\t\tphi predicate: ");
1837 p->dump (dump_file, summary->conds);
1838 }
1839 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
1840 }
1841
1842 /* Return predicate specifying when array index in access OP becomes non-constant. */
1843
1844 static predicate
1845 array_index_predicate (ipa_fn_summary *info,
1846 vec< predicate> nonconstant_names, tree op)
1847 {
1848 predicate p = false;
1849 while (handled_component_p (op))
1850 {
1851 if (TREE_CODE (op) == ARRAY_REF || TREE_CODE (op) == ARRAY_RANGE_REF)
1852 {
1853 if (TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
1854 p = p.or_with (info->conds,
1855 nonconstant_names[SSA_NAME_VERSION
1856 (TREE_OPERAND (op, 1))]);
1857 }
1858 op = TREE_OPERAND (op, 0);
1859 }
1860 return p;
1861 }
1862
1863 /* For a typical usage of __builtin_expect (a<b, 1), we
1864 may introduce an extra relation stmt:
1865 With the builtin, we have
1866 t1 = a <= b;
1867 t2 = (long int) t1;
1868 t3 = __builtin_expect (t2, 1);
1869 if (t3 != 0)
1870 goto ...
1871 Without the builtin, we have
1872 if (a<=b)
1873 goto...
1874 This affects the size/time estimation and may have
1875 an impact on the earlier inlining.
1876 Here find this pattern and fix it up later. */
1877
1878 static gimple *
1879 find_foldable_builtin_expect (basic_block bb)
1880 {
1881 gimple_stmt_iterator bsi;
1882
1883 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1884 {
1885 gimple *stmt = gsi_stmt (bsi);
1886 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
1887 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
1888 {
1889 tree var = gimple_call_lhs (stmt);
1890 tree arg = gimple_call_arg (stmt, 0);
1891 use_operand_p use_p;
1892 gimple *use_stmt;
1893 bool match = false;
1894 bool done = false;
1895
1896 if (!var || !arg)
1897 continue;
1898 gcc_assert (TREE_CODE (var) == SSA_NAME);
1899
1900 while (TREE_CODE (arg) == SSA_NAME)
1901 {
1902 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
1903 if (!is_gimple_assign (stmt_tmp))
1904 break;
1905 switch (gimple_assign_rhs_code (stmt_tmp))
1906 {
1907 case LT_EXPR:
1908 case LE_EXPR:
1909 case GT_EXPR:
1910 case GE_EXPR:
1911 case EQ_EXPR:
1912 case NE_EXPR:
1913 match = true;
1914 done = true;
1915 break;
1916 CASE_CONVERT:
1917 break;
1918 default:
1919 done = true;
1920 break;
1921 }
1922 if (done)
1923 break;
1924 arg = gimple_assign_rhs1 (stmt_tmp);
1925 }
1926
1927 if (match && single_imm_use (var, &use_p, &use_stmt)
1928 && gimple_code (use_stmt) == GIMPLE_COND)
1929 return use_stmt;
1930 }
1931 }
1932 return NULL;
1933 }
1934
1935 /* Return true when the basic blocks contains only clobbers followed by RESX.
1936 Such BBs are kept around to make removal of dead stores possible with
1937 presence of EH and will be optimized out by optimize_clobbers later in the
1938 game.
1939
1940 NEED_EH is used to recurse in case the clobber has non-EH predecestors
1941 that can be clobber only, too.. When it is false, the RESX is not necessary
1942 on the end of basic block. */
1943
1944 static bool
1945 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
1946 {
1947 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1948 edge_iterator ei;
1949 edge e;
1950
1951 if (need_eh)
1952 {
1953 if (gsi_end_p (gsi))
1954 return false;
1955 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
1956 return false;
1957 gsi_prev (&gsi);
1958 }
1959 else if (!single_succ_p (bb))
1960 return false;
1961
1962 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
1963 {
1964 gimple *stmt = gsi_stmt (gsi);
1965 if (is_gimple_debug (stmt))
1966 continue;
1967 if (gimple_clobber_p (stmt))
1968 continue;
1969 if (gimple_code (stmt) == GIMPLE_LABEL)
1970 break;
1971 return false;
1972 }
1973
1974 /* See if all predecestors are either throws or clobber only BBs. */
1975 FOR_EACH_EDGE (e, ei, bb->preds)
1976 if (!(e->flags & EDGE_EH)
1977 && !clobber_only_eh_bb_p (e->src, false))
1978 return false;
1979
1980 return true;
1981 }
1982
1983 /* Return true if STMT compute a floating point expression that may be affected
1984 by -ffast-math and similar flags. */
1985
1986 static bool
1987 fp_expression_p (gimple *stmt)
1988 {
1989 ssa_op_iter i;
1990 tree op;
1991
1992 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
1993 if (FLOAT_TYPE_P (TREE_TYPE (op)))
1994 return true;
1995 return false;
1996 }
1997
1998 /* Analyze function body for NODE.
1999 EARLY indicates run from early optimization pipeline. */
2000
2001 static void
2002 analyze_function_body (struct cgraph_node *node, bool early)
2003 {
2004 sreal time = 0;
2005 /* Estimate static overhead for function prologue/epilogue and alignment. */
2006 int size = 2;
2007 /* Benefits are scaled by probability of elimination that is in range
2008 <0,2>. */
2009 basic_block bb;
2010 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
2011 sreal freq;
2012 struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
2013 predicate bb_predicate;
2014 struct ipa_func_body_info fbi;
2015 vec<predicate> nonconstant_names = vNULL;
2016 int nblocks, n;
2017 int *order;
2018 predicate array_index = true;
2019 gimple *fix_builtin_expect_stmt;
2020
2021 gcc_assert (my_function && my_function->cfg);
2022 gcc_assert (cfun == my_function);
2023
2024 memset(&fbi, 0, sizeof(fbi));
2025 info->conds = NULL;
2026 info->size_time_table = NULL;
2027
2028 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2029 so we can produce proper inline hints.
2030
2031 When optimizing and analyzing for early inliner, initialize node params
2032 so we can produce correct BB predicates. */
2033
2034 if (opt_for_fn (node->decl, optimize))
2035 {
2036 calculate_dominance_info (CDI_DOMINATORS);
2037 if (!early)
2038 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2039 else
2040 {
2041 ipa_check_create_node_params ();
2042 ipa_initialize_node_params (node);
2043 }
2044
2045 if (ipa_node_params_sum)
2046 {
2047 fbi.node = node;
2048 fbi.info = IPA_NODE_REF (node);
2049 fbi.bb_infos = vNULL;
2050 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
2051 fbi.param_count = count_formal_params(node->decl);
2052 nonconstant_names.safe_grow_cleared
2053 (SSANAMES (my_function)->length ());
2054 }
2055 }
2056
2057 if (dump_file)
2058 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2059 node->name ());
2060
2061 /* When we run into maximal number of entries, we assign everything to the
2062 constant truth case. Be sure to have it in list. */
2063 bb_predicate = true;
2064 info->account_size_time (0, 0, bb_predicate, bb_predicate);
2065
2066 bb_predicate = predicate::not_inlined ();
2067 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, bb_predicate,
2068 bb_predicate);
2069
2070 if (fbi.info)
2071 compute_bb_predicates (&fbi, node, info);
2072 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2073 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2074 for (n = 0; n < nblocks; n++)
2075 {
2076 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2077 freq = bb->count.to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count);
2078 if (clobber_only_eh_bb_p (bb))
2079 {
2080 if (dump_file && (dump_flags & TDF_DETAILS))
2081 fprintf (dump_file, "\n Ignoring BB %i;"
2082 " it will be optimized away by cleanup_clobbers\n",
2083 bb->index);
2084 continue;
2085 }
2086
2087 /* TODO: Obviously predicates can be propagated down across CFG. */
2088 if (fbi.info)
2089 {
2090 if (bb->aux)
2091 bb_predicate = *(predicate *) bb->aux;
2092 else
2093 bb_predicate = false;
2094 }
2095 else
2096 bb_predicate = true;
2097
2098 if (dump_file && (dump_flags & TDF_DETAILS))
2099 {
2100 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2101 bb_predicate.dump (dump_file, info->conds);
2102 }
2103
2104 if (fbi.info && nonconstant_names.exists ())
2105 {
2106 predicate phi_predicate;
2107 bool first_phi = true;
2108
2109 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2110 gsi_next (&bsi))
2111 {
2112 if (first_phi
2113 && !phi_result_unknown_predicate (fbi.info, info, bb,
2114 &phi_predicate,
2115 nonconstant_names))
2116 break;
2117 first_phi = false;
2118 if (dump_file && (dump_flags & TDF_DETAILS))
2119 {
2120 fprintf (dump_file, " ");
2121 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
2122 }
2123 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2124 nonconstant_names);
2125 }
2126 }
2127
2128 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2129
2130 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
2131 gsi_next (&bsi))
2132 {
2133 gimple *stmt = gsi_stmt (bsi);
2134 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2135 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2136 int prob;
2137 predicate will_be_nonconstant;
2138
2139 /* This relation stmt should be folded after we remove
2140 buildin_expect call. Adjust the cost here. */
2141 if (stmt == fix_builtin_expect_stmt)
2142 {
2143 this_size--;
2144 this_time--;
2145 }
2146
2147 if (dump_file && (dump_flags & TDF_DETAILS))
2148 {
2149 fprintf (dump_file, " ");
2150 print_gimple_stmt (dump_file, stmt, 0);
2151 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2152 freq.to_double (), this_size,
2153 this_time);
2154 }
2155
2156 if (gimple_assign_load_p (stmt) && nonconstant_names.exists ())
2157 {
2158 predicate this_array_index;
2159 this_array_index =
2160 array_index_predicate (info, nonconstant_names,
2161 gimple_assign_rhs1 (stmt));
2162 if (this_array_index != false)
2163 array_index &= this_array_index;
2164 }
2165 if (gimple_store_p (stmt) && nonconstant_names.exists ())
2166 {
2167 predicate this_array_index;
2168 this_array_index =
2169 array_index_predicate (info, nonconstant_names,
2170 gimple_get_lhs (stmt));
2171 if (this_array_index != false)
2172 array_index &= this_array_index;
2173 }
2174
2175
2176 if (is_gimple_call (stmt)
2177 && !gimple_call_internal_p (stmt))
2178 {
2179 struct cgraph_edge *edge = node->get_edge (stmt);
2180 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
2181
2182 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2183 resolved as constant. We however don't want to optimize
2184 out the cgraph edges. */
2185 if (nonconstant_names.exists ()
2186 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2187 && gimple_call_lhs (stmt)
2188 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2189 {
2190 predicate false_p = false;
2191 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2192 = false_p;
2193 }
2194 if (ipa_node_params_sum)
2195 {
2196 int count = gimple_call_num_args (stmt);
2197 int i;
2198
2199 if (count)
2200 es->param.safe_grow_cleared (count);
2201 for (i = 0; i < count; i++)
2202 {
2203 int prob = param_change_prob (stmt, i);
2204 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2205 es->param[i].change_prob = prob;
2206 }
2207 }
2208
2209 es->call_stmt_size = this_size;
2210 es->call_stmt_time = this_time;
2211 es->loop_depth = bb_loop_depth (bb);
2212 edge_set_predicate (edge, &bb_predicate);
2213 }
2214
2215 /* TODO: When conditional jump or swithc is known to be constant, but
2216 we did not translate it into the predicates, we really can account
2217 just maximum of the possible paths. */
2218 if (fbi.info)
2219 will_be_nonconstant
2220 = will_be_nonconstant_predicate (&fbi, info,
2221 stmt, nonconstant_names);
2222 else
2223 will_be_nonconstant = true;
2224 if (this_time || this_size)
2225 {
2226 sreal final_time = (sreal)this_time * freq;
2227
2228 prob = eliminated_by_inlining_prob (stmt);
2229 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2230 fprintf (dump_file,
2231 "\t\t50%% will be eliminated by inlining\n");
2232 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2233 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2234
2235 struct predicate p = bb_predicate & will_be_nonconstant;
2236
2237 /* We can ignore statement when we proved it is never going
2238 to happen, but we can not do that for call statements
2239 because edges are accounted specially. */
2240
2241 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
2242 {
2243 time += final_time;
2244 size += this_size;
2245 }
2246
2247 /* We account everything but the calls. Calls have their own
2248 size/time info attached to cgraph edges. This is necessary
2249 in order to make the cost disappear after inlining. */
2250 if (!is_gimple_call (stmt))
2251 {
2252 if (prob)
2253 {
2254 predicate ip = bb_predicate & predicate::not_inlined ();
2255 info->account_size_time (this_size * prob,
2256 (this_time * prob) / 2, ip,
2257 p);
2258 }
2259 if (prob != 2)
2260 info->account_size_time (this_size * (2 - prob),
2261 (this_time * (2 - prob) / 2),
2262 bb_predicate,
2263 p);
2264 }
2265
2266 if (!info->fp_expressions && fp_expression_p (stmt))
2267 {
2268 info->fp_expressions = true;
2269 if (dump_file)
2270 fprintf (dump_file, " fp_expression set\n");
2271 }
2272
2273 gcc_assert (time >= 0);
2274 gcc_assert (size >= 0);
2275 }
2276 }
2277 }
2278 set_hint_predicate (&ipa_fn_summaries->get (node)->array_index, array_index);
2279 free (order);
2280
2281 if (nonconstant_names.exists () && !early)
2282 {
2283 struct loop *loop;
2284 predicate loop_iterations = true;
2285 predicate loop_stride = true;
2286
2287 if (dump_file && (dump_flags & TDF_DETAILS))
2288 flow_loops_dump (dump_file, NULL, 0);
2289 scev_initialize ();
2290 FOR_EACH_LOOP (loop, 0)
2291 {
2292 vec<edge> exits;
2293 edge ex;
2294 unsigned int j;
2295 struct tree_niter_desc niter_desc;
2296 bb_predicate = *(predicate *) loop->header->aux;
2297
2298 exits = get_loop_exit_edges (loop);
2299 FOR_EACH_VEC_ELT (exits, j, ex)
2300 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
2301 && !is_gimple_min_invariant (niter_desc.niter))
2302 {
2303 predicate will_be_nonconstant
2304 = will_be_nonconstant_expr_predicate (fbi.info, info,
2305 niter_desc.niter,
2306 nonconstant_names);
2307 if (will_be_nonconstant != true)
2308 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2309 if (will_be_nonconstant != true
2310 && will_be_nonconstant != false)
2311 /* This is slightly inprecise. We may want to represent each
2312 loop with independent predicate. */
2313 loop_iterations &= will_be_nonconstant;
2314 }
2315 exits.release ();
2316 }
2317
2318 /* To avoid quadratic behavior we analyze stride predicates only
2319 with respect to the containing loop. Thus we simply iterate
2320 over all defs in the outermost loop body. */
2321 for (loop = loops_for_fn (cfun)->tree_root->inner;
2322 loop != NULL; loop = loop->next)
2323 {
2324 basic_block *body = get_loop_body (loop);
2325 for (unsigned i = 0; i < loop->num_nodes; i++)
2326 {
2327 gimple_stmt_iterator gsi;
2328 bb_predicate = *(predicate *) body[i]->aux;
2329 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
2330 gsi_next (&gsi))
2331 {
2332 gimple *stmt = gsi_stmt (gsi);
2333
2334 if (!is_gimple_assign (stmt))
2335 continue;
2336
2337 tree def = gimple_assign_lhs (stmt);
2338 if (TREE_CODE (def) != SSA_NAME)
2339 continue;
2340
2341 affine_iv iv;
2342 if (!simple_iv (loop_containing_stmt (stmt),
2343 loop_containing_stmt (stmt),
2344 def, &iv, true)
2345 || is_gimple_min_invariant (iv.step))
2346 continue;
2347
2348 predicate will_be_nonconstant
2349 = will_be_nonconstant_expr_predicate (fbi.info, info,
2350 iv.step,
2351 nonconstant_names);
2352 if (will_be_nonconstant != true)
2353 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2354 if (will_be_nonconstant != true
2355 && will_be_nonconstant != false)
2356 /* This is slightly inprecise. We may want to represent
2357 each loop with independent predicate. */
2358 loop_stride = loop_stride & will_be_nonconstant;
2359 }
2360 }
2361 free (body);
2362 }
2363 set_hint_predicate (&ipa_fn_summaries->get (node)->loop_iterations,
2364 loop_iterations);
2365 set_hint_predicate (&ipa_fn_summaries->get (node)->loop_stride,
2366 loop_stride);
2367 scev_finalize ();
2368 }
2369 FOR_ALL_BB_FN (bb, my_function)
2370 {
2371 edge e;
2372 edge_iterator ei;
2373
2374 if (bb->aux)
2375 edge_predicate_pool.remove ((predicate *)bb->aux);
2376 bb->aux = NULL;
2377 FOR_EACH_EDGE (e, ei, bb->succs)
2378 {
2379 if (e->aux)
2380 edge_predicate_pool.remove ((predicate *) e->aux);
2381 e->aux = NULL;
2382 }
2383 }
2384 ipa_fn_summaries->get (node)->time = time;
2385 ipa_fn_summaries->get (node)->self_size = size;
2386 nonconstant_names.release ();
2387 ipa_release_body_info (&fbi);
2388 if (opt_for_fn (node->decl, optimize))
2389 {
2390 if (!early)
2391 loop_optimizer_finalize ();
2392 else if (!ipa_edge_args_sum)
2393 ipa_free_all_node_params ();
2394 free_dominance_info (CDI_DOMINATORS);
2395 }
2396 if (dump_file)
2397 {
2398 fprintf (dump_file, "\n");
2399 ipa_dump_fn_summary (dump_file, node);
2400 }
2401 }
2402
2403
2404 /* Compute function summary.
2405 EARLY is true when we compute parameters during early opts. */
2406
2407 void
2408 compute_fn_summary (struct cgraph_node *node, bool early)
2409 {
2410 HOST_WIDE_INT self_stack_size;
2411 struct cgraph_edge *e;
2412 struct ipa_fn_summary *info;
2413
2414 gcc_assert (!node->global.inlined_to);
2415
2416 if (!ipa_fn_summaries)
2417 ipa_fn_summary_alloc ();
2418
2419 info = ipa_fn_summaries->get (node);
2420 info->reset (node);
2421
2422 /* Estimate the stack size for the function if we're optimizing. */
2423 self_stack_size = optimize && !node->thunk.thunk_p
2424 ? estimated_stack_frame_size (node) : 0;
2425 info->estimated_self_stack_size = self_stack_size;
2426 info->estimated_stack_size = self_stack_size;
2427 info->stack_frame_offset = 0;
2428
2429 if (node->thunk.thunk_p)
2430 {
2431 struct ipa_call_summary *es = ipa_call_summaries->get (node->callees);
2432 predicate t = true;
2433
2434 node->local.can_change_signature = false;
2435 es->call_stmt_size = eni_size_weights.call_cost;
2436 es->call_stmt_time = eni_time_weights.call_cost;
2437 info->account_size_time (ipa_fn_summary::size_scale * 2, 2, t, t);
2438 t = predicate::not_inlined ();
2439 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t);
2440 ipa_update_overall_fn_summary (node);
2441 info->self_size = info->size;
2442 /* We can not inline instrumentation clones. */
2443 if (node->thunk.add_pointer_bounds_args)
2444 {
2445 info->inlinable = false;
2446 node->callees->inline_failed = CIF_CHKP;
2447 }
2448 else if (stdarg_p (TREE_TYPE (node->decl)))
2449 {
2450 info->inlinable = false;
2451 node->callees->inline_failed = CIF_VARIADIC_THUNK;
2452 }
2453 else
2454 info->inlinable = true;
2455 }
2456 else
2457 {
2458 /* Even is_gimple_min_invariant rely on current_function_decl. */
2459 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2460
2461 /* Can this function be inlined at all? */
2462 if (!opt_for_fn (node->decl, optimize)
2463 && !lookup_attribute ("always_inline",
2464 DECL_ATTRIBUTES (node->decl)))
2465 info->inlinable = false;
2466 else
2467 info->inlinable = tree_inlinable_function_p (node->decl);
2468
2469 /* Type attributes can use parameter indices to describe them. */
2470 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
2471 node->local.can_change_signature = false;
2472 else
2473 {
2474 /* Otherwise, inlinable functions always can change signature. */
2475 if (info->inlinable)
2476 node->local.can_change_signature = true;
2477 else
2478 {
2479 /* Functions calling builtin_apply can not change signature. */
2480 for (e = node->callees; e; e = e->next_callee)
2481 {
2482 tree cdecl = e->callee->decl;
2483 if (DECL_BUILT_IN (cdecl)
2484 && DECL_BUILT_IN_CLASS (cdecl) == BUILT_IN_NORMAL
2485 && (DECL_FUNCTION_CODE (cdecl) == BUILT_IN_APPLY_ARGS
2486 || DECL_FUNCTION_CODE (cdecl) == BUILT_IN_VA_START))
2487 break;
2488 }
2489 node->local.can_change_signature = !e;
2490 }
2491 }
2492 /* Functions called by instrumentation thunk can't change signature
2493 because instrumentation thunk modification is not supported. */
2494 if (node->local.can_change_signature)
2495 for (e = node->callers; e; e = e->next_caller)
2496 if (e->caller->thunk.thunk_p
2497 && e->caller->thunk.add_pointer_bounds_args)
2498 {
2499 node->local.can_change_signature = false;
2500 break;
2501 }
2502 analyze_function_body (node, early);
2503 pop_cfun ();
2504 }
2505 for (e = node->callees; e; e = e->next_callee)
2506 if (e->callee->comdat_local_p ())
2507 break;
2508 node->calls_comdat_local = (e != NULL);
2509
2510 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
2511 info->size = info->self_size;
2512 info->stack_frame_offset = 0;
2513 info->estimated_stack_size = info->estimated_self_stack_size;
2514
2515 /* Code above should compute exactly the same result as
2516 ipa_update_overall_fn_summary but because computation happens in
2517 different order the roundoff errors result in slight changes. */
2518 ipa_update_overall_fn_summary (node);
2519 gcc_assert (info->size == info->self_size);
2520 }
2521
2522
2523 /* Compute parameters of functions used by inliner using
2524 current_function_decl. */
2525
2526 static unsigned int
2527 compute_fn_summary_for_current (void)
2528 {
2529 compute_fn_summary (cgraph_node::get (current_function_decl), true);
2530 return 0;
2531 }
2532
2533 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
2534 KNOWN_CONTEXTS and KNOWN_AGGS. */
2535
2536 static bool
2537 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
2538 int *size, int *time,
2539 vec<tree> known_vals,
2540 vec<ipa_polymorphic_call_context> known_contexts,
2541 vec<ipa_agg_jump_function_p> known_aggs)
2542 {
2543 tree target;
2544 struct cgraph_node *callee;
2545 struct ipa_fn_summary *isummary;
2546 enum availability avail;
2547 bool speculative;
2548
2549 if (!known_vals.exists () && !known_contexts.exists ())
2550 return false;
2551 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
2552 return false;
2553
2554 target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts,
2555 known_aggs, &speculative);
2556 if (!target || speculative)
2557 return false;
2558
2559 /* Account for difference in cost between indirect and direct calls. */
2560 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
2561 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
2562 gcc_checking_assert (*time >= 0);
2563 gcc_checking_assert (*size >= 0);
2564
2565 callee = cgraph_node::get (target);
2566 if (!callee || !callee->definition)
2567 return false;
2568 callee = callee->function_symbol (&avail);
2569 if (avail < AVAIL_AVAILABLE)
2570 return false;
2571 isummary = ipa_fn_summaries->get (callee);
2572 return isummary->inlinable;
2573 }
2574
2575 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
2576 handle edge E with probability PROB.
2577 Set HINTS if edge may be devirtualized.
2578 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
2579 site. */
2580
2581 static inline void
2582 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
2583 sreal *time,
2584 int prob,
2585 vec<tree> known_vals,
2586 vec<ipa_polymorphic_call_context> known_contexts,
2587 vec<ipa_agg_jump_function_p> known_aggs,
2588 ipa_hints *hints)
2589 {
2590 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2591 int call_size = es->call_stmt_size;
2592 int call_time = es->call_stmt_time;
2593 int cur_size;
2594 if (!e->callee
2595 && estimate_edge_devirt_benefit (e, &call_size, &call_time,
2596 known_vals, known_contexts, known_aggs)
2597 && hints && e->maybe_hot_p ())
2598 *hints |= INLINE_HINT_indirect_call;
2599 cur_size = call_size * ipa_fn_summary::size_scale;
2600 *size += cur_size;
2601 if (min_size)
2602 *min_size += cur_size;
2603 if (prob == REG_BR_PROB_BASE)
2604 *time += ((sreal)call_time) * e->sreal_frequency ();
2605 else
2606 *time += ((sreal)call_time * prob) * e->sreal_frequency ();
2607 }
2608
2609
2610
2611 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
2612 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2613 describe context of the call site. */
2614
2615 static void
2616 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
2617 int *min_size, sreal *time,
2618 ipa_hints *hints,
2619 clause_t possible_truths,
2620 vec<tree> known_vals,
2621 vec<ipa_polymorphic_call_context> known_contexts,
2622 vec<ipa_agg_jump_function_p> known_aggs)
2623 {
2624 struct cgraph_edge *e;
2625 for (e = node->callees; e; e = e->next_callee)
2626 {
2627 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2628
2629 /* Do not care about zero sized builtins. */
2630 if (e->inline_failed && !es->call_stmt_size)
2631 {
2632 gcc_checking_assert (!es->call_stmt_time);
2633 continue;
2634 }
2635 if (!es->predicate
2636 || es->predicate->evaluate (possible_truths))
2637 {
2638 if (e->inline_failed)
2639 {
2640 /* Predicates of calls shall not use NOT_CHANGED codes,
2641 sowe do not need to compute probabilities. */
2642 estimate_edge_size_and_time (e, size,
2643 es->predicate ? NULL : min_size,
2644 time, REG_BR_PROB_BASE,
2645 known_vals, known_contexts,
2646 known_aggs, hints);
2647 }
2648 else
2649 estimate_calls_size_and_time (e->callee, size, min_size, time,
2650 hints,
2651 possible_truths,
2652 known_vals, known_contexts,
2653 known_aggs);
2654 }
2655 }
2656 for (e = node->indirect_calls; e; e = e->next_callee)
2657 {
2658 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2659 if (!es->predicate
2660 || es->predicate->evaluate (possible_truths))
2661 estimate_edge_size_and_time (e, size,
2662 es->predicate ? NULL : min_size,
2663 time, REG_BR_PROB_BASE,
2664 known_vals, known_contexts, known_aggs,
2665 hints);
2666 }
2667 }
2668
2669
2670 /* Estimate size and time needed to execute NODE assuming
2671 POSSIBLE_TRUTHS clause, and KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2672 information about NODE's arguments. If non-NULL use also probability
2673 information present in INLINE_PARAM_SUMMARY vector.
2674 Additionally detemine hints determined by the context. Finally compute
2675 minimal size needed for the call that is independent on the call context and
2676 can be used for fast estimates. Return the values in RET_SIZE,
2677 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
2678
2679 void
2680 estimate_node_size_and_time (struct cgraph_node *node,
2681 clause_t possible_truths,
2682 clause_t nonspec_possible_truths,
2683 vec<tree> known_vals,
2684 vec<ipa_polymorphic_call_context> known_contexts,
2685 vec<ipa_agg_jump_function_p> known_aggs,
2686 int *ret_size, int *ret_min_size,
2687 sreal *ret_time,
2688 sreal *ret_nonspecialized_time,
2689 ipa_hints *ret_hints,
2690 vec<inline_param_summary>
2691 inline_param_summary)
2692 {
2693 struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
2694 size_time_entry *e;
2695 int size = 0;
2696 sreal time = 0;
2697 int min_size = 0;
2698 ipa_hints hints = 0;
2699 int i;
2700
2701 if (dump_file && (dump_flags & TDF_DETAILS))
2702 {
2703 bool found = false;
2704 fprintf (dump_file, " Estimating body: %s/%i\n"
2705 " Known to be false: ", node->name (),
2706 node->order);
2707
2708 for (i = predicate::not_inlined_condition;
2709 i < (predicate::first_dynamic_condition
2710 + (int) vec_safe_length (info->conds)); i++)
2711 if (!(possible_truths & (1 << i)))
2712 {
2713 if (found)
2714 fprintf (dump_file, ", ");
2715 found = true;
2716 dump_condition (dump_file, info->conds, i);
2717 }
2718 }
2719
2720 estimate_calls_size_and_time (node, &size, &min_size, &time, &hints, possible_truths,
2721 known_vals, known_contexts, known_aggs);
2722 sreal nonspecialized_time = time;
2723
2724 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
2725 {
2726 bool exec = e->exec_predicate.evaluate (nonspec_possible_truths);
2727
2728 /* Because predicates are conservative, it can happen that nonconst is 1
2729 but exec is 0. */
2730 if (exec)
2731 {
2732 bool nonconst = e->nonconst_predicate.evaluate (possible_truths);
2733
2734 gcc_checking_assert (e->time >= 0);
2735 gcc_checking_assert (time >= 0);
2736
2737 /* We compute specialized size only because size of nonspecialized
2738 copy is context independent.
2739
2740 The difference between nonspecialized execution and specialized is
2741 that nonspecialized is not going to have optimized out computations
2742 known to be constant in a specialized setting. */
2743 if (nonconst)
2744 size += e->size;
2745 nonspecialized_time += e->time;
2746 if (!nonconst)
2747 ;
2748 else if (!inline_param_summary.exists ())
2749 {
2750 if (nonconst)
2751 time += e->time;
2752 }
2753 else
2754 {
2755 int prob = e->nonconst_predicate.probability
2756 (info->conds, possible_truths,
2757 inline_param_summary);
2758 gcc_checking_assert (prob >= 0);
2759 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
2760 time += e->time * prob / REG_BR_PROB_BASE;
2761 }
2762 gcc_checking_assert (time >= 0);
2763 }
2764 }
2765 gcc_checking_assert ((*info->size_time_table)[0].exec_predicate == true);
2766 gcc_checking_assert ((*info->size_time_table)[0].nonconst_predicate == true);
2767 min_size = (*info->size_time_table)[0].size;
2768 gcc_checking_assert (size >= 0);
2769 gcc_checking_assert (time >= 0);
2770 /* nonspecialized_time should be always bigger than specialized time.
2771 Roundoff issues however may get into the way. */
2772 gcc_checking_assert ((nonspecialized_time - time * 0.99) >= -1);
2773
2774 /* Roundoff issues may make specialized time bigger than nonspecialized
2775 time. We do not really want that to happen because some heurstics
2776 may get confused by seeing negative speedups. */
2777 if (time > nonspecialized_time)
2778 time = nonspecialized_time;
2779
2780 if (info->loop_iterations
2781 && !info->loop_iterations->evaluate (possible_truths))
2782 hints |= INLINE_HINT_loop_iterations;
2783 if (info->loop_stride
2784 && !info->loop_stride->evaluate (possible_truths))
2785 hints |= INLINE_HINT_loop_stride;
2786 if (info->array_index
2787 && !info->array_index->evaluate (possible_truths))
2788 hints |= INLINE_HINT_array_index;
2789 if (info->scc_no)
2790 hints |= INLINE_HINT_in_scc;
2791 if (DECL_DECLARED_INLINE_P (node->decl))
2792 hints |= INLINE_HINT_declared_inline;
2793
2794 size = RDIV (size, ipa_fn_summary::size_scale);
2795 min_size = RDIV (min_size, ipa_fn_summary::size_scale);
2796
2797 if (dump_file && (dump_flags & TDF_DETAILS))
2798 fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size,
2799 time.to_double (), nonspecialized_time.to_double ());
2800 if (ret_time)
2801 *ret_time = time;
2802 if (ret_nonspecialized_time)
2803 *ret_nonspecialized_time = nonspecialized_time;
2804 if (ret_size)
2805 *ret_size = size;
2806 if (ret_min_size)
2807 *ret_min_size = min_size;
2808 if (ret_hints)
2809 *ret_hints = hints;
2810 return;
2811 }
2812
2813
2814 /* Estimate size and time needed to execute callee of EDGE assuming that
2815 parameters known to be constant at caller of EDGE are propagated.
2816 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
2817 and types for parameters. */
2818
2819 void
2820 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
2821 vec<tree> known_vals,
2822 vec<ipa_polymorphic_call_context>
2823 known_contexts,
2824 vec<ipa_agg_jump_function_p> known_aggs,
2825 int *ret_size, sreal *ret_time,
2826 sreal *ret_nonspec_time,
2827 ipa_hints *hints)
2828 {
2829 clause_t clause, nonspec_clause;
2830
2831 evaluate_conditions_for_known_args (node, false, known_vals, known_aggs,
2832 &clause, &nonspec_clause);
2833 estimate_node_size_and_time (node, clause, nonspec_clause,
2834 known_vals, known_contexts,
2835 known_aggs, ret_size, NULL, ret_time,
2836 ret_nonspec_time, hints, vNULL);
2837 }
2838
2839
2840 /* Update summary information of inline clones after inlining.
2841 Compute peak stack usage. */
2842
2843 static void
2844 inline_update_callee_summaries (struct cgraph_node *node, int depth)
2845 {
2846 struct cgraph_edge *e;
2847 struct ipa_fn_summary *callee_info = ipa_fn_summaries->get (node);
2848 struct ipa_fn_summary *caller_info = ipa_fn_summaries->get (node->callers->caller);
2849 HOST_WIDE_INT peak;
2850
2851 callee_info->stack_frame_offset
2852 = caller_info->stack_frame_offset
2853 + caller_info->estimated_self_stack_size;
2854 peak = callee_info->stack_frame_offset
2855 + callee_info->estimated_self_stack_size;
2856 if (ipa_fn_summaries->get (node->global.inlined_to)->estimated_stack_size < peak)
2857 ipa_fn_summaries->get (node->global.inlined_to)->estimated_stack_size = peak;
2858 ipa_propagate_frequency (node);
2859 for (e = node->callees; e; e = e->next_callee)
2860 {
2861 if (!e->inline_failed)
2862 inline_update_callee_summaries (e->callee, depth);
2863 ipa_call_summaries->get (e)->loop_depth += depth;
2864 }
2865 for (e = node->indirect_calls; e; e = e->next_callee)
2866 ipa_call_summaries->get (e)->loop_depth += depth;
2867 }
2868
2869 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
2870 When functoin A is inlined in B and A calls C with parameter that
2871 changes with probability PROB1 and C is known to be passthroug
2872 of argument if B that change with probability PROB2, the probability
2873 of change is now PROB1*PROB2. */
2874
2875 static void
2876 remap_edge_change_prob (struct cgraph_edge *inlined_edge,
2877 struct cgraph_edge *edge)
2878 {
2879 if (ipa_node_params_sum)
2880 {
2881 int i;
2882 struct ipa_edge_args *args = IPA_EDGE_REF (edge);
2883 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
2884 struct ipa_call_summary *inlined_es
2885 = ipa_call_summaries->get (inlined_edge);
2886
2887 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
2888 {
2889 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
2890 if (jfunc->type == IPA_JF_PASS_THROUGH
2891 || jfunc->type == IPA_JF_ANCESTOR)
2892 {
2893 int id = jfunc->type == IPA_JF_PASS_THROUGH
2894 ? ipa_get_jf_pass_through_formal_id (jfunc)
2895 : ipa_get_jf_ancestor_formal_id (jfunc);
2896 if (id < (int) inlined_es->param.length ())
2897 {
2898 int prob1 = es->param[i].change_prob;
2899 int prob2 = inlined_es->param[id].change_prob;
2900 int prob = combine_probabilities (prob1, prob2);
2901
2902 if (prob1 && prob2 && !prob)
2903 prob = 1;
2904
2905 es->param[i].change_prob = prob;
2906 }
2907 }
2908 }
2909 }
2910 }
2911
2912 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
2913
2914 Remap predicates of callees of NODE. Rest of arguments match
2915 remap_predicate.
2916
2917 Also update change probabilities. */
2918
2919 static void
2920 remap_edge_summaries (struct cgraph_edge *inlined_edge,
2921 struct cgraph_node *node,
2922 struct ipa_fn_summary *info,
2923 struct ipa_fn_summary *callee_info,
2924 vec<int> operand_map,
2925 vec<int> offset_map,
2926 clause_t possible_truths,
2927 predicate *toplev_predicate)
2928 {
2929 struct cgraph_edge *e, *next;
2930 for (e = node->callees; e; e = next)
2931 {
2932 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2933 predicate p;
2934 next = e->next_callee;
2935
2936 if (e->inline_failed)
2937 {
2938 remap_edge_change_prob (inlined_edge, e);
2939
2940 if (es->predicate)
2941 {
2942 p = es->predicate->remap_after_inlining
2943 (info, callee_info, operand_map,
2944 offset_map, possible_truths,
2945 *toplev_predicate);
2946 edge_set_predicate (e, &p);
2947 }
2948 else
2949 edge_set_predicate (e, toplev_predicate);
2950 }
2951 else
2952 remap_edge_summaries (inlined_edge, e->callee, info, callee_info,
2953 operand_map, offset_map, possible_truths,
2954 toplev_predicate);
2955 }
2956 for (e = node->indirect_calls; e; e = next)
2957 {
2958 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2959 predicate p;
2960 next = e->next_callee;
2961
2962 remap_edge_change_prob (inlined_edge, e);
2963 if (es->predicate)
2964 {
2965 p = es->predicate->remap_after_inlining
2966 (info, callee_info, operand_map, offset_map,
2967 possible_truths, *toplev_predicate);
2968 edge_set_predicate (e, &p);
2969 }
2970 else
2971 edge_set_predicate (e, toplev_predicate);
2972 }
2973 }
2974
2975 /* Same as remap_predicate, but set result into hint *HINT. */
2976
2977 static void
2978 remap_hint_predicate (struct ipa_fn_summary *info,
2979 struct ipa_fn_summary *callee_info,
2980 predicate **hint,
2981 vec<int> operand_map,
2982 vec<int> offset_map,
2983 clause_t possible_truths,
2984 predicate *toplev_predicate)
2985 {
2986 predicate p;
2987
2988 if (!*hint)
2989 return;
2990 p = (*hint)->remap_after_inlining
2991 (info, callee_info,
2992 operand_map, offset_map,
2993 possible_truths, *toplev_predicate);
2994 if (p != false && p != true)
2995 {
2996 if (!*hint)
2997 set_hint_predicate (hint, p);
2998 else
2999 **hint &= p;
3000 }
3001 }
3002
3003 /* We inlined EDGE. Update summary of the function we inlined into. */
3004
3005 void
3006 ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
3007 {
3008 struct ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee);
3009 struct cgraph_node *to = (edge->caller->global.inlined_to
3010 ? edge->caller->global.inlined_to : edge->caller);
3011 struct ipa_fn_summary *info = ipa_fn_summaries->get (to);
3012 clause_t clause = 0; /* not_inline is known to be false. */
3013 size_time_entry *e;
3014 vec<int> operand_map = vNULL;
3015 vec<int> offset_map = vNULL;
3016 int i;
3017 predicate toplev_predicate;
3018 predicate true_p = true;
3019 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
3020
3021 if (es->predicate)
3022 toplev_predicate = *es->predicate;
3023 else
3024 toplev_predicate = true;
3025
3026 info->fp_expressions |= callee_info->fp_expressions;
3027
3028 if (callee_info->conds)
3029 evaluate_properties_for_edge (edge, true, &clause, NULL, NULL, NULL, NULL);
3030 if (ipa_node_params_sum && callee_info->conds)
3031 {
3032 struct ipa_edge_args *args = IPA_EDGE_REF (edge);
3033 int count = ipa_get_cs_argument_count (args);
3034 int i;
3035
3036 if (count)
3037 {
3038 operand_map.safe_grow_cleared (count);
3039 offset_map.safe_grow_cleared (count);
3040 }
3041 for (i = 0; i < count; i++)
3042 {
3043 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3044 int map = -1;
3045
3046 /* TODO: handle non-NOPs when merging. */
3047 if (jfunc->type == IPA_JF_PASS_THROUGH)
3048 {
3049 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3050 map = ipa_get_jf_pass_through_formal_id (jfunc);
3051 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
3052 offset_map[i] = -1;
3053 }
3054 else if (jfunc->type == IPA_JF_ANCESTOR)
3055 {
3056 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
3057 if (offset >= 0 && offset < INT_MAX)
3058 {
3059 map = ipa_get_jf_ancestor_formal_id (jfunc);
3060 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
3061 offset = -1;
3062 offset_map[i] = offset;
3063 }
3064 }
3065 operand_map[i] = map;
3066 gcc_assert (map < ipa_get_param_count (IPA_NODE_REF (to)));
3067 }
3068 }
3069 for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++)
3070 {
3071 predicate p;
3072 p = e->exec_predicate.remap_after_inlining
3073 (info, callee_info, operand_map,
3074 offset_map, clause,
3075 toplev_predicate);
3076 predicate nonconstp;
3077 nonconstp = e->nonconst_predicate.remap_after_inlining
3078 (info, callee_info, operand_map,
3079 offset_map, clause,
3080 toplev_predicate);
3081 if (p != false && nonconstp != false)
3082 {
3083 sreal add_time = ((sreal)e->time * edge->sreal_frequency ());
3084 int prob = e->nonconst_predicate.probability (callee_info->conds,
3085 clause, es->param);
3086 add_time = add_time * prob / REG_BR_PROB_BASE;
3087 if (prob != REG_BR_PROB_BASE
3088 && dump_file && (dump_flags & TDF_DETAILS))
3089 {
3090 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
3091 (double) prob / REG_BR_PROB_BASE);
3092 }
3093 info->account_size_time (e->size, add_time, p, nonconstp);
3094 }
3095 }
3096 remap_edge_summaries (edge, edge->callee, info, callee_info, operand_map,
3097 offset_map, clause, &toplev_predicate);
3098 remap_hint_predicate (info, callee_info,
3099 &callee_info->loop_iterations,
3100 operand_map, offset_map, clause, &toplev_predicate);
3101 remap_hint_predicate (info, callee_info,
3102 &callee_info->loop_stride,
3103 operand_map, offset_map, clause, &toplev_predicate);
3104 remap_hint_predicate (info, callee_info,
3105 &callee_info->array_index,
3106 operand_map, offset_map, clause, &toplev_predicate);
3107
3108 inline_update_callee_summaries (edge->callee,
3109 ipa_call_summaries->get (edge)->loop_depth);
3110
3111 /* We do not maintain predicates of inlined edges, free it. */
3112 edge_set_predicate (edge, &true_p);
3113 /* Similarly remove param summaries. */
3114 es->param.release ();
3115 operand_map.release ();
3116 offset_map.release ();
3117 }
3118
3119 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating overall size
3120 and time. Recompute it. */
3121
3122 void
3123 ipa_update_overall_fn_summary (struct cgraph_node *node)
3124 {
3125 struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
3126 size_time_entry *e;
3127 int i;
3128
3129 info->size = 0;
3130 info->time = 0;
3131 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3132 {
3133 info->size += e->size;
3134 info->time += e->time;
3135 }
3136 estimate_calls_size_and_time (node, &info->size, &info->min_size,
3137 &info->time, NULL,
3138 ~(clause_t) (1 << predicate::false_condition),
3139 vNULL, vNULL, vNULL);
3140 info->size = (info->size + ipa_fn_summary::size_scale / 2) / ipa_fn_summary::size_scale;
3141 }
3142
3143
3144 /* This function performs intraprocedural analysis in NODE that is required to
3145 inline indirect calls. */
3146
3147 static void
3148 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
3149 {
3150 ipa_analyze_node (node);
3151 if (dump_file && (dump_flags & TDF_DETAILS))
3152 {
3153 ipa_print_node_params (dump_file, node);
3154 ipa_print_node_jump_functions (dump_file, node);
3155 }
3156 }
3157
3158
3159 /* Note function body size. */
3160
3161 void
3162 inline_analyze_function (struct cgraph_node *node)
3163 {
3164 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
3165
3166 if (dump_file)
3167 fprintf (dump_file, "\nAnalyzing function: %s/%u\n",
3168 node->name (), node->order);
3169 if (opt_for_fn (node->decl, optimize) && !node->thunk.thunk_p)
3170 inline_indirect_intraprocedural_analysis (node);
3171 compute_fn_summary (node, false);
3172 if (!optimize)
3173 {
3174 struct cgraph_edge *e;
3175 for (e = node->callees; e; e = e->next_callee)
3176 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3177 for (e = node->indirect_calls; e; e = e->next_callee)
3178 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3179 }
3180
3181 pop_cfun ();
3182 }
3183
3184
3185 /* Called when new function is inserted to callgraph late. */
3186
3187 void
3188 ipa_fn_summary_t::insert (struct cgraph_node *node, ipa_fn_summary *)
3189 {
3190 inline_analyze_function (node);
3191 }
3192
3193 /* Note function body size. */
3194
3195 static void
3196 ipa_fn_summary_generate (void)
3197 {
3198 struct cgraph_node *node;
3199
3200 FOR_EACH_DEFINED_FUNCTION (node)
3201 if (DECL_STRUCT_FUNCTION (node->decl))
3202 node->local.versionable = tree_versionable_function_p (node->decl);
3203
3204 ipa_fn_summary_alloc ();
3205
3206 ipa_fn_summaries->enable_insertion_hook ();
3207
3208 ipa_register_cgraph_hooks ();
3209
3210 FOR_EACH_DEFINED_FUNCTION (node)
3211 if (!node->alias
3212 && (flag_generate_lto || flag_generate_offload|| flag_wpa
3213 || opt_for_fn (node->decl, optimize)))
3214 inline_analyze_function (node);
3215 }
3216
3217
3218 /* Write inline summary for edge E to OB. */
3219
3220 static void
3221 read_ipa_call_summary (struct lto_input_block *ib, struct cgraph_edge *e)
3222 {
3223 struct ipa_call_summary *es = ipa_call_summaries->get (e);
3224 predicate p;
3225 int length, i;
3226
3227 es->call_stmt_size = streamer_read_uhwi (ib);
3228 es->call_stmt_time = streamer_read_uhwi (ib);
3229 es->loop_depth = streamer_read_uhwi (ib);
3230
3231 bitpack_d bp = streamer_read_bitpack (ib);
3232 es->is_return_callee_uncaptured = bp_unpack_value (&bp, 1);
3233
3234 p.stream_in (ib);
3235 edge_set_predicate (e, &p);
3236 length = streamer_read_uhwi (ib);
3237 if (length)
3238 {
3239 es->param.safe_grow_cleared (length);
3240 for (i = 0; i < length; i++)
3241 es->param[i].change_prob = streamer_read_uhwi (ib);
3242 }
3243 }
3244
3245
3246 /* Stream in inline summaries from the section. */
3247
3248 static void
3249 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
3250 size_t len)
3251 {
3252 const struct lto_function_header *header =
3253 (const struct lto_function_header *) data;
3254 const int cfg_offset = sizeof (struct lto_function_header);
3255 const int main_offset = cfg_offset + header->cfg_size;
3256 const int string_offset = main_offset + header->main_size;
3257 struct data_in *data_in;
3258 unsigned int i, count2, j;
3259 unsigned int f_count;
3260
3261 lto_input_block ib ((const char *) data + main_offset, header->main_size,
3262 file_data->mode_table);
3263
3264 data_in =
3265 lto_data_in_create (file_data, (const char *) data + string_offset,
3266 header->string_size, vNULL);
3267 f_count = streamer_read_uhwi (&ib);
3268 for (i = 0; i < f_count; i++)
3269 {
3270 unsigned int index;
3271 struct cgraph_node *node;
3272 struct ipa_fn_summary *info;
3273 lto_symtab_encoder_t encoder;
3274 struct bitpack_d bp;
3275 struct cgraph_edge *e;
3276 predicate p;
3277
3278 index = streamer_read_uhwi (&ib);
3279 encoder = file_data->symtab_node_encoder;
3280 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
3281 index));
3282 info = ipa_fn_summaries->get (node);
3283
3284 info->estimated_stack_size
3285 = info->estimated_self_stack_size = streamer_read_uhwi (&ib);
3286 info->size = info->self_size = streamer_read_uhwi (&ib);
3287 info->time = sreal::stream_in (&ib);
3288
3289 bp = streamer_read_bitpack (&ib);
3290 info->inlinable = bp_unpack_value (&bp, 1);
3291 info->fp_expressions = bp_unpack_value (&bp, 1);
3292
3293 count2 = streamer_read_uhwi (&ib);
3294 gcc_assert (!info->conds);
3295 for (j = 0; j < count2; j++)
3296 {
3297 struct condition c;
3298 c.operand_num = streamer_read_uhwi (&ib);
3299 c.size = streamer_read_uhwi (&ib);
3300 c.code = (enum tree_code) streamer_read_uhwi (&ib);
3301 c.val = stream_read_tree (&ib, data_in);
3302 bp = streamer_read_bitpack (&ib);
3303 c.agg_contents = bp_unpack_value (&bp, 1);
3304 c.by_ref = bp_unpack_value (&bp, 1);
3305 if (c.agg_contents)
3306 c.offset = streamer_read_uhwi (&ib);
3307 vec_safe_push (info->conds, c);
3308 }
3309 count2 = streamer_read_uhwi (&ib);
3310 gcc_assert (!info->size_time_table);
3311 for (j = 0; j < count2; j++)
3312 {
3313 struct size_time_entry e;
3314
3315 e.size = streamer_read_uhwi (&ib);
3316 e.time = sreal::stream_in (&ib);
3317 e.exec_predicate.stream_in (&ib);
3318 e.nonconst_predicate.stream_in (&ib);
3319
3320 vec_safe_push (info->size_time_table, e);
3321 }
3322
3323 p.stream_in (&ib);
3324 set_hint_predicate (&info->loop_iterations, p);
3325 p.stream_in (&ib);
3326 set_hint_predicate (&info->loop_stride, p);
3327 p.stream_in (&ib);
3328 set_hint_predicate (&info->array_index, p);
3329 for (e = node->callees; e; e = e->next_callee)
3330 read_ipa_call_summary (&ib, e);
3331 for (e = node->indirect_calls; e; e = e->next_callee)
3332 read_ipa_call_summary (&ib, e);
3333 }
3334
3335 lto_free_section_data (file_data, LTO_section_ipa_fn_summary, NULL, data,
3336 len);
3337 lto_data_in_delete (data_in);
3338 }
3339
3340
3341 /* Read inline summary. Jump functions are shared among ipa-cp
3342 and inliner, so when ipa-cp is active, we don't need to write them
3343 twice. */
3344
3345 static void
3346 ipa_fn_summary_read (void)
3347 {
3348 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
3349 struct lto_file_decl_data *file_data;
3350 unsigned int j = 0;
3351
3352 ipa_fn_summary_alloc ();
3353
3354 while ((file_data = file_data_vec[j++]))
3355 {
3356 size_t len;
3357 const char *data = lto_get_section_data (file_data,
3358 LTO_section_ipa_fn_summary,
3359 NULL, &len);
3360 if (data)
3361 inline_read_section (file_data, data, len);
3362 else
3363 /* Fatal error here. We do not want to support compiling ltrans units
3364 with different version of compiler or different flags than the WPA
3365 unit, so this should never happen. */
3366 fatal_error (input_location,
3367 "ipa inline summary is missing in input file");
3368 }
3369 ipa_register_cgraph_hooks ();
3370 if (!flag_ipa_cp)
3371 ipa_prop_read_jump_functions ();
3372
3373 gcc_assert (ipa_fn_summaries);
3374 ipa_fn_summaries->enable_insertion_hook ();
3375 }
3376
3377
3378 /* Write inline summary for edge E to OB. */
3379
3380 static void
3381 write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
3382 {
3383 struct ipa_call_summary *es = ipa_call_summaries->get (e);
3384 int i;
3385
3386 streamer_write_uhwi (ob, es->call_stmt_size);
3387 streamer_write_uhwi (ob, es->call_stmt_time);
3388 streamer_write_uhwi (ob, es->loop_depth);
3389
3390 bitpack_d bp = bitpack_create (ob->main_stream);
3391 bp_pack_value (&bp, es->is_return_callee_uncaptured, 1);
3392 streamer_write_bitpack (&bp);
3393
3394 if (es->predicate)
3395 es->predicate->stream_out (ob);
3396 else
3397 streamer_write_uhwi (ob, 0);
3398 streamer_write_uhwi (ob, es->param.length ());
3399 for (i = 0; i < (int) es->param.length (); i++)
3400 streamer_write_uhwi (ob, es->param[i].change_prob);
3401 }
3402
3403
3404 /* Write inline summary for node in SET.
3405 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
3406 active, we don't need to write them twice. */
3407
3408 static void
3409 ipa_fn_summary_write (void)
3410 {
3411 struct output_block *ob = create_output_block (LTO_section_ipa_fn_summary);
3412 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
3413 unsigned int count = 0;
3414 int i;
3415
3416 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
3417 {
3418 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
3419 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
3420 if (cnode && cnode->definition && !cnode->alias)
3421 count++;
3422 }
3423 streamer_write_uhwi (ob, count);
3424
3425 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
3426 {
3427 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
3428 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
3429 if (cnode && cnode->definition && !cnode->alias)
3430 {
3431 struct ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
3432 struct bitpack_d bp;
3433 struct cgraph_edge *edge;
3434 int i;
3435 size_time_entry *e;
3436 struct condition *c;
3437
3438 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
3439 streamer_write_hwi (ob, info->estimated_self_stack_size);
3440 streamer_write_hwi (ob, info->self_size);
3441 info->time.stream_out (ob);
3442 bp = bitpack_create (ob->main_stream);
3443 bp_pack_value (&bp, info->inlinable, 1);
3444 bp_pack_value (&bp, false, 1);
3445 bp_pack_value (&bp, info->fp_expressions, 1);
3446 streamer_write_bitpack (&bp);
3447 streamer_write_uhwi (ob, vec_safe_length (info->conds));
3448 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
3449 {
3450 streamer_write_uhwi (ob, c->operand_num);
3451 streamer_write_uhwi (ob, c->size);
3452 streamer_write_uhwi (ob, c->code);
3453 stream_write_tree (ob, c->val, true);
3454 bp = bitpack_create (ob->main_stream);
3455 bp_pack_value (&bp, c->agg_contents, 1);
3456 bp_pack_value (&bp, c->by_ref, 1);
3457 streamer_write_bitpack (&bp);
3458 if (c->agg_contents)
3459 streamer_write_uhwi (ob, c->offset);
3460 }
3461 streamer_write_uhwi (ob, vec_safe_length (info->size_time_table));
3462 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3463 {
3464 streamer_write_uhwi (ob, e->size);
3465 e->time.stream_out (ob);
3466 e->exec_predicate.stream_out (ob);
3467 e->nonconst_predicate.stream_out (ob);
3468 }
3469 if (info->loop_iterations)
3470 info->loop_iterations->stream_out (ob);
3471 else
3472 streamer_write_uhwi (ob, 0);
3473 if (info->loop_stride)
3474 info->loop_stride->stream_out (ob);
3475 else
3476 streamer_write_uhwi (ob, 0);
3477 if (info->array_index)
3478 info->array_index->stream_out (ob);
3479 else
3480 streamer_write_uhwi (ob, 0);
3481 for (edge = cnode->callees; edge; edge = edge->next_callee)
3482 write_ipa_call_summary (ob, edge);
3483 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
3484 write_ipa_call_summary (ob, edge);
3485 }
3486 }
3487 streamer_write_char_stream (ob->main_stream, 0);
3488 produce_asm (ob, NULL);
3489 destroy_output_block (ob);
3490
3491 if (!flag_ipa_cp)
3492 ipa_prop_write_jump_functions ();
3493 }
3494
3495
3496 /* Release inline summary. */
3497
3498 void
3499 ipa_free_fn_summary (void)
3500 {
3501 struct cgraph_node *node;
3502 if (!ipa_call_summaries)
3503 return;
3504 FOR_EACH_DEFINED_FUNCTION (node)
3505 if (!node->alias)
3506 ipa_fn_summaries->get (node)->reset (node);
3507 ipa_fn_summaries->release ();
3508 ipa_fn_summaries = NULL;
3509 ipa_call_summaries->release ();
3510 delete ipa_call_summaries;
3511 ipa_call_summaries = NULL;
3512 edge_predicate_pool.release ();
3513 }
3514
3515 namespace {
3516
3517 const pass_data pass_data_local_fn_summary =
3518 {
3519 GIMPLE_PASS, /* type */
3520 "local-fnsummary", /* name */
3521 OPTGROUP_INLINE, /* optinfo_flags */
3522 TV_INLINE_PARAMETERS, /* tv_id */
3523 0, /* properties_required */
3524 0, /* properties_provided */
3525 0, /* properties_destroyed */
3526 0, /* todo_flags_start */
3527 0, /* todo_flags_finish */
3528 };
3529
3530 class pass_local_fn_summary : public gimple_opt_pass
3531 {
3532 public:
3533 pass_local_fn_summary (gcc::context *ctxt)
3534 : gimple_opt_pass (pass_data_local_fn_summary, ctxt)
3535 {}
3536
3537 /* opt_pass methods: */
3538 opt_pass * clone () { return new pass_local_fn_summary (m_ctxt); }
3539 virtual unsigned int execute (function *)
3540 {
3541 return compute_fn_summary_for_current ();
3542 }
3543
3544 }; // class pass_local_fn_summary
3545
3546 } // anon namespace
3547
3548 gimple_opt_pass *
3549 make_pass_local_fn_summary (gcc::context *ctxt)
3550 {
3551 return new pass_local_fn_summary (ctxt);
3552 }
3553
3554
3555 /* Free inline summary. */
3556
3557 namespace {
3558
3559 const pass_data pass_data_ipa_free_fn_summary =
3560 {
3561 SIMPLE_IPA_PASS, /* type */
3562 "free-fnsummary", /* name */
3563 OPTGROUP_NONE, /* optinfo_flags */
3564 TV_IPA_FREE_INLINE_SUMMARY, /* tv_id */
3565 0, /* properties_required */
3566 0, /* properties_provided */
3567 0, /* properties_destroyed */
3568 0, /* todo_flags_start */
3569 0, /* todo_flags_finish */
3570 };
3571
3572 class pass_ipa_free_fn_summary : public simple_ipa_opt_pass
3573 {
3574 public:
3575 pass_ipa_free_fn_summary (gcc::context *ctxt)
3576 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary, ctxt),
3577 small_p (false)
3578 {}
3579
3580 /* opt_pass methods: */
3581 opt_pass *clone () { return new pass_ipa_free_fn_summary (m_ctxt); }
3582 void set_pass_param (unsigned int n, bool param)
3583 {
3584 gcc_assert (n == 0);
3585 small_p = param;
3586 }
3587 virtual bool gate (function *) { return small_p || !flag_wpa; }
3588 virtual unsigned int execute (function *)
3589 {
3590 ipa_free_fn_summary ();
3591 /* Early optimizations may make function unreachable. We can not
3592 remove unreachable functions as part of the early opts pass because
3593 TODOs are run before subpasses. Do it here. */
3594 return small_p ? TODO_remove_functions | TODO_dump_symtab : 0;
3595 }
3596
3597 private:
3598 bool small_p;
3599 }; // class pass_ipa_free_fn_summary
3600
3601 } // anon namespace
3602
3603 simple_ipa_opt_pass *
3604 make_pass_ipa_free_fn_summary (gcc::context *ctxt)
3605 {
3606 return new pass_ipa_free_fn_summary (ctxt);
3607 }
3608
3609 namespace {
3610
3611 const pass_data pass_data_ipa_fn_summary =
3612 {
3613 IPA_PASS, /* type */
3614 "fnsummary", /* name */
3615 OPTGROUP_INLINE, /* optinfo_flags */
3616 TV_IPA_FNSUMMARY, /* tv_id */
3617 0, /* properties_required */
3618 0, /* properties_provided */
3619 0, /* properties_destroyed */
3620 0, /* todo_flags_start */
3621 ( TODO_dump_symtab ), /* todo_flags_finish */
3622 };
3623
3624 class pass_ipa_fn_summary : public ipa_opt_pass_d
3625 {
3626 public:
3627 pass_ipa_fn_summary (gcc::context *ctxt)
3628 : ipa_opt_pass_d (pass_data_ipa_fn_summary, ctxt,
3629 ipa_fn_summary_generate, /* generate_summary */
3630 ipa_fn_summary_write, /* write_summary */
3631 ipa_fn_summary_read, /* read_summary */
3632 NULL, /* write_optimization_summary */
3633 NULL, /* read_optimization_summary */
3634 NULL, /* stmt_fixup */
3635 0, /* function_transform_todo_flags_start */
3636 NULL, /* function_transform */
3637 NULL) /* variable_transform */
3638 {}
3639
3640 /* opt_pass methods: */
3641 virtual unsigned int execute (function *) { return 0; }
3642
3643 }; // class pass_ipa_fn_summary
3644
3645 } // anon namespace
3646
3647 ipa_opt_pass_d *
3648 make_pass_ipa_fn_summary (gcc::context *ctxt)
3649 {
3650 return new pass_ipa_fn_summary (ctxt);
3651 }
3652
3653 /* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
3654 within the same process. For use by toplev::finalize. */
3655
3656 void
3657 ipa_fnsummary_c_finalize (void)
3658 {
3659 ipa_free_fn_summary ();
3660 }