intrinsic.h (gfc_check_selected_real_kind, [...]): Update prototypes.
[gcc.git] / gcc / ipa-cp.c
1 /* Interprocedural constant propagation
2 Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* Interprocedural constant propagation. The aim of interprocedural constant
23 propagation (IPCP) is to find which function's argument has the same
24 constant value in each invocation throughout the whole program. For example,
25 consider the following program:
26
27 int g (int y)
28 {
29 printf ("value is %d",y);
30 }
31
32 int f (int x)
33 {
34 g (x);
35 }
36
37 int h (int y)
38 {
39 g (y);
40 }
41
42 void main (void)
43 {
44 f (3);
45 h (3);
46 }
47
48
49 The IPCP algorithm will find that g's formal argument y is always called
50 with the value 3.
51
52 The algorithm used is based on "Interprocedural Constant Propagation", by
53 Challahan David, Keith D Cooper, Ken Kennedy, Linda Torczon, Comp86, pg
54 152-161
55
56 The optimization is divided into three stages:
57
58 First stage - intraprocedural analysis
59 =======================================
60 This phase computes jump_function and modification flags.
61
62 A jump function for a callsite represents the values passed as an actual
63 arguments of a given callsite. There are three types of values:
64 Pass through - the caller's formal parameter is passed as an actual argument.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
67
68 The jump function info, ipa_jump_func, is stored in ipa_edge_args
69 structure (defined in ipa_prop.h and pointed to by cgraph_node->aux)
70 modified_flags are defined in ipa_node_params structure
71 (defined in ipa_prop.h and pointed to by cgraph_edge->aux).
72
73 -ipcp_init_stage() is the first stage driver.
74
75 Second stage - interprocedural analysis
76 ========================================
77 This phase does the interprocedural constant propagation.
78 It computes lattices for all formal parameters in the program
79 and their value that may be:
80 TOP - unknown.
81 BOTTOM - non constant.
82 CONSTANT - constant value.
83
84 Lattice describing a formal parameter p will have a constant value if all
85 callsites invoking this function have the same constant value passed to p.
86
87 The lattices are stored in ipcp_lattice which is itself in ipa_node_params
88 structure (defined in ipa_prop.h and pointed to by cgraph_edge->aux).
89
90 -ipcp_iterate_stage() is the second stage driver.
91
92 Third phase - transformation of function code
93 ============================================
94 Propagates the constant-valued formals into the function.
95 For each function whose parameters are constants, we create its clone.
96
97 Then we process the clone in two ways:
98 1. We insert an assignment statement 'parameter = const' at the beginning
99 of the cloned function.
100 2. For read-only parameters that do not live in memory, we replace all their
101 uses with the constant.
102
103 We also need to modify some callsites to call the cloned functions instead
104 of the original ones. For a callsite passing an argument found to be a
105 constant by IPCP, there are two different cases to handle:
106 1. A constant is passed as an argument. In this case the callsite in the
107 should be redirected to call the cloned callee.
108 2. A parameter (of the caller) passed as an argument (pass through
109 argument). In such cases both the caller and the callee have clones and
110 only the callsite in the cloned caller is redirected to call to the
111 cloned callee.
112
113 This update is done in two steps: First all cloned functions are created
114 during a traversal of the call graph, during which all callsites are
115 redirected to call the cloned function. Then the callsites are traversed
116 and many calls redirected back to fit the description above.
117
118 -ipcp_insert_stage() is the third phase driver.
119
120 */
121
122 #include "config.h"
123 #include "system.h"
124 #include "coretypes.h"
125 #include "tree.h"
126 #include "target.h"
127 #include "cgraph.h"
128 #include "ipa-prop.h"
129 #include "tree-flow.h"
130 #include "tree-pass.h"
131 #include "flags.h"
132 #include "timevar.h"
133 #include "diagnostic.h"
134 #include "tree-pretty-print.h"
135 #include "tree-dump.h"
136 #include "tree-inline.h"
137 #include "fibheap.h"
138 #include "params.h"
139
140 /* Number of functions identified as candidates for cloning. When not cloning
141 we can simplify iterate stage not forcing it to go through the decision
142 on what is profitable and what not. */
143 static int n_cloning_candidates;
144
145 /* Maximal count found in program. */
146 static gcov_type max_count;
147
148 /* Cgraph nodes that has been completely replaced by cloning during iterate
149 * stage and will be removed after ipcp is finished. */
150 static bitmap dead_nodes;
151
152 static void ipcp_print_profile_data (FILE *);
153 static void ipcp_function_scale_print (FILE *);
154
155 /* Get the original node field of ipa_node_params associated with node NODE. */
156 static inline struct cgraph_node *
157 ipcp_get_orig_node (struct cgraph_node *node)
158 {
159 return IPA_NODE_REF (node)->ipcp_orig_node;
160 }
161
162 /* Return true if NODE describes a cloned/versioned function. */
163 static inline bool
164 ipcp_node_is_clone (struct cgraph_node *node)
165 {
166 return (ipcp_get_orig_node (node) != NULL);
167 }
168
169 /* Create ipa_node_params and its data structures for NEW_NODE. Set ORIG_NODE
170 as the ipcp_orig_node field in ipa_node_params. */
171 static void
172 ipcp_init_cloned_node (struct cgraph_node *orig_node,
173 struct cgraph_node *new_node)
174 {
175 ipa_check_create_node_params ();
176 ipa_initialize_node_params (new_node);
177 IPA_NODE_REF (new_node)->ipcp_orig_node = orig_node;
178 }
179
180 /* Return scale for NODE. */
181 static inline gcov_type
182 ipcp_get_node_scale (struct cgraph_node *node)
183 {
184 return IPA_NODE_REF (node)->count_scale;
185 }
186
187 /* Set COUNT as scale for NODE. */
188 static inline void
189 ipcp_set_node_scale (struct cgraph_node *node, gcov_type count)
190 {
191 IPA_NODE_REF (node)->count_scale = count;
192 }
193
194 /* Return whether LAT is a constant lattice. */
195 static inline bool
196 ipcp_lat_is_const (struct ipcp_lattice *lat)
197 {
198 if (lat->type == IPA_CONST_VALUE)
199 return true;
200 else
201 return false;
202 }
203
204 /* Return whether LAT is a constant lattice that ipa-cp can actually insert
205 into the code (i.e. constants excluding member pointers and pointers). */
206 static inline bool
207 ipcp_lat_is_insertable (struct ipcp_lattice *lat)
208 {
209 return lat->type == IPA_CONST_VALUE;
210 }
211
212 /* Return true if LAT1 and LAT2 are equal. */
213 static inline bool
214 ipcp_lats_are_equal (struct ipcp_lattice *lat1, struct ipcp_lattice *lat2)
215 {
216 gcc_assert (ipcp_lat_is_const (lat1) && ipcp_lat_is_const (lat2));
217 if (lat1->type != lat2->type)
218 return false;
219
220 if (TREE_CODE (lat1->constant) == ADDR_EXPR
221 && TREE_CODE (lat2->constant) == ADDR_EXPR
222 && TREE_CODE (TREE_OPERAND (lat1->constant, 0)) == CONST_DECL
223 && TREE_CODE (TREE_OPERAND (lat2->constant, 0)) == CONST_DECL)
224 return operand_equal_p (DECL_INITIAL (TREE_OPERAND (lat1->constant, 0)),
225 DECL_INITIAL (TREE_OPERAND (lat2->constant, 0)), 0);
226 else
227 return operand_equal_p (lat1->constant, lat2->constant, 0);
228 }
229
230 /* Compute Meet arithmetics:
231 Meet (IPA_BOTTOM, x) = IPA_BOTTOM
232 Meet (IPA_TOP,x) = x
233 Meet (const_a,const_b) = IPA_BOTTOM, if const_a != const_b.
234 MEET (const_a,const_b) = const_a, if const_a == const_b.*/
235 static void
236 ipa_lattice_meet (struct ipcp_lattice *res, struct ipcp_lattice *lat1,
237 struct ipcp_lattice *lat2)
238 {
239 if (lat1->type == IPA_BOTTOM || lat2->type == IPA_BOTTOM)
240 {
241 res->type = IPA_BOTTOM;
242 return;
243 }
244 if (lat1->type == IPA_TOP)
245 {
246 res->type = lat2->type;
247 res->constant = lat2->constant;
248 return;
249 }
250 if (lat2->type == IPA_TOP)
251 {
252 res->type = lat1->type;
253 res->constant = lat1->constant;
254 return;
255 }
256 if (!ipcp_lats_are_equal (lat1, lat2))
257 {
258 res->type = IPA_BOTTOM;
259 return;
260 }
261 res->type = lat1->type;
262 res->constant = lat1->constant;
263 }
264
265 /* Return the lattice corresponding to the Ith formal parameter of the function
266 described by INFO. */
267 static inline struct ipcp_lattice *
268 ipcp_get_lattice (struct ipa_node_params *info, int i)
269 {
270 return &(info->params[i].ipcp_lattice);
271 }
272
273 /* Given the jump function JFUNC, compute the lattice LAT that describes the
274 value coming down the callsite. INFO describes the caller node so that
275 pass-through jump functions can be evaluated. */
276 static void
277 ipcp_lattice_from_jfunc (struct ipa_node_params *info, struct ipcp_lattice *lat,
278 struct ipa_jump_func *jfunc)
279 {
280 if (jfunc->type == IPA_JF_CONST)
281 {
282 lat->type = IPA_CONST_VALUE;
283 lat->constant = jfunc->value.constant;
284 }
285 else if (jfunc->type == IPA_JF_PASS_THROUGH)
286 {
287 struct ipcp_lattice *caller_lat;
288 tree cst;
289
290 caller_lat = ipcp_get_lattice (info, jfunc->value.pass_through.formal_id);
291 lat->type = caller_lat->type;
292 if (caller_lat->type != IPA_CONST_VALUE)
293 return;
294 cst = caller_lat->constant;
295
296 if (jfunc->value.pass_through.operation != NOP_EXPR)
297 {
298 tree restype;
299 if (TREE_CODE_CLASS (jfunc->value.pass_through.operation)
300 == tcc_comparison)
301 restype = boolean_type_node;
302 else
303 restype = TREE_TYPE (cst);
304 cst = fold_binary (jfunc->value.pass_through.operation,
305 restype, cst, jfunc->value.pass_through.operand);
306 }
307 if (!cst || !is_gimple_ip_invariant (cst))
308 lat->type = IPA_BOTTOM;
309 lat->constant = cst;
310 }
311 else if (jfunc->type == IPA_JF_ANCESTOR)
312 {
313 struct ipcp_lattice *caller_lat;
314 tree t;
315 bool ok;
316
317 caller_lat = ipcp_get_lattice (info, jfunc->value.ancestor.formal_id);
318 lat->type = caller_lat->type;
319 if (caller_lat->type != IPA_CONST_VALUE)
320 return;
321 if (TREE_CODE (caller_lat->constant) != ADDR_EXPR)
322 {
323 /* This can happen when the constant is a NULL pointer. */
324 lat->type = IPA_BOTTOM;
325 return;
326 }
327 t = TREE_OPERAND (caller_lat->constant, 0);
328 ok = build_ref_for_offset (&t, TREE_TYPE (t),
329 jfunc->value.ancestor.offset,
330 jfunc->value.ancestor.type, false);
331 if (!ok)
332 {
333 lat->type = IPA_BOTTOM;
334 lat->constant = NULL_TREE;
335 }
336 else
337 lat->constant = build_fold_addr_expr (t);
338 }
339 else
340 lat->type = IPA_BOTTOM;
341 }
342
343 /* True when OLD_LAT and NEW_LAT values are not the same. */
344
345 static bool
346 ipcp_lattice_changed (struct ipcp_lattice *old_lat,
347 struct ipcp_lattice *new_lat)
348 {
349 if (old_lat->type == new_lat->type)
350 {
351 if (!ipcp_lat_is_const (old_lat))
352 return false;
353 if (ipcp_lats_are_equal (old_lat, new_lat))
354 return false;
355 }
356 return true;
357 }
358
359 /* Print all ipcp_lattices of all functions to F. */
360 static void
361 ipcp_print_all_lattices (FILE * f)
362 {
363 struct cgraph_node *node;
364 int i, count;
365
366 fprintf (f, "\nLattice:\n");
367 for (node = cgraph_nodes; node; node = node->next)
368 {
369 struct ipa_node_params *info;
370
371 if (!node->analyzed)
372 continue;
373 info = IPA_NODE_REF (node);
374 fprintf (f, " Node: %s:\n", cgraph_node_name (node));
375 count = ipa_get_param_count (info);
376 for (i = 0; i < count; i++)
377 {
378 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
379
380 fprintf (f, " param [%d]: ", i);
381 if (lat->type == IPA_CONST_VALUE)
382 {
383 tree cst = lat->constant;
384 fprintf (f, "type is CONST ");
385 print_generic_expr (f, cst, 0);
386 if (TREE_CODE (cst) == ADDR_EXPR
387 && TREE_CODE (TREE_OPERAND (cst, 0)) == CONST_DECL)
388 {
389 fprintf (f, " -> ");
390 print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (cst, 0)),
391 0);
392 }
393 fprintf (f, "\n");
394 }
395 else if (lat->type == IPA_TOP)
396 fprintf (f, "type is TOP\n");
397 else
398 fprintf (f, "type is BOTTOM\n");
399 }
400 }
401 }
402
403 /* Return true if ipcp algorithms would allow cloning NODE. */
404
405 static bool
406 ipcp_versionable_function_p (struct cgraph_node *node)
407 {
408 struct cgraph_edge *edge;
409
410 /* There are a number of generic reasons functions cannot be versioned. */
411 if (!node->local.versionable)
412 return false;
413
414 /* Removing arguments doesn't work if the function takes varargs
415 or use __builtin_apply_args. */
416 for (edge = node->callees; edge; edge = edge->next_callee)
417 {
418 tree t = edge->callee->decl;
419 if (DECL_BUILT_IN_CLASS (t) == BUILT_IN_NORMAL
420 && (DECL_FUNCTION_CODE (t) == BUILT_IN_APPLY_ARGS
421 || DECL_FUNCTION_CODE (t) == BUILT_IN_VA_START))
422 return false;
423 }
424
425 return true;
426 }
427
428 /* Return true if this NODE is viable candidate for cloning. */
429 static bool
430 ipcp_cloning_candidate_p (struct cgraph_node *node)
431 {
432 int n_calls = 0;
433 int n_hot_calls = 0;
434 gcov_type direct_call_sum = 0;
435 struct cgraph_edge *e;
436
437 /* We never clone functions that are not visible from outside.
438 FIXME: in future we should clone such functions when they are called with
439 different constants, but current ipcp implementation is not good on this.
440 */
441 if (cgraph_only_called_directly_p (node) || !node->analyzed)
442 return false;
443
444 if (cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE)
445 {
446 if (dump_file)
447 fprintf (dump_file, "Not considering %s for cloning; body is overwrittable.\n",
448 cgraph_node_name (node));
449 return false;
450 }
451 if (!ipcp_versionable_function_p (node))
452 {
453 if (dump_file)
454 fprintf (dump_file, "Not considering %s for cloning; body is not versionable.\n",
455 cgraph_node_name (node));
456 return false;
457 }
458 for (e = node->callers; e; e = e->next_caller)
459 {
460 direct_call_sum += e->count;
461 n_calls ++;
462 if (cgraph_maybe_hot_edge_p (e))
463 n_hot_calls ++;
464 }
465
466 if (!n_calls)
467 {
468 if (dump_file)
469 fprintf (dump_file, "Not considering %s for cloning; no direct calls.\n",
470 cgraph_node_name (node));
471 return false;
472 }
473 if (node->local.inline_summary.self_size < n_calls)
474 {
475 if (dump_file)
476 fprintf (dump_file, "Considering %s for cloning; code would shrink.\n",
477 cgraph_node_name (node));
478 return true;
479 }
480
481 if (!flag_ipa_cp_clone)
482 {
483 if (dump_file)
484 fprintf (dump_file, "Not considering %s for cloning; -fipa-cp-clone disabled.\n",
485 cgraph_node_name (node));
486 return false;
487 }
488
489 if (!optimize_function_for_speed_p (DECL_STRUCT_FUNCTION (node->decl)))
490 {
491 if (dump_file)
492 fprintf (dump_file, "Not considering %s for cloning; optimizing it for size.\n",
493 cgraph_node_name (node));
494 return false;
495 }
496
497 /* When profile is available and function is hot, propagate into it even if
498 calls seems cold; constant propagation can improve function's speed
499 significandly. */
500 if (max_count)
501 {
502 if (direct_call_sum > node->count * 90 / 100)
503 {
504 if (dump_file)
505 fprintf (dump_file, "Considering %s for cloning; usually called directly.\n",
506 cgraph_node_name (node));
507 return true;
508 }
509 }
510 if (!n_hot_calls)
511 {
512 if (dump_file)
513 fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n",
514 cgraph_node_name (node));
515 return false;
516 }
517 if (dump_file)
518 fprintf (dump_file, "Considering %s for cloning.\n",
519 cgraph_node_name (node));
520 return true;
521 }
522
523 /* Initialize ipcp_lattices array. The lattices corresponding to supported
524 types (integers, real types and Fortran constants defined as const_decls)
525 are initialized to IPA_TOP, the rest of them to IPA_BOTTOM. */
526 static void
527 ipcp_initialize_node_lattices (struct cgraph_node *node)
528 {
529 int i;
530 struct ipa_node_params *info = IPA_NODE_REF (node);
531 enum ipa_lattice_type type;
532
533 if (ipa_is_called_with_var_arguments (info))
534 type = IPA_BOTTOM;
535 else if (cgraph_only_called_directly_p (node))
536 type = IPA_TOP;
537 /* When cloning is allowed, we can assume that externally visible functions
538 are not called. We will compensate this by cloning later. */
539 else if (ipcp_cloning_candidate_p (node))
540 type = IPA_TOP, n_cloning_candidates ++;
541 else
542 type = IPA_BOTTOM;
543
544 for (i = 0; i < ipa_get_param_count (info) ; i++)
545 ipcp_get_lattice (info, i)->type = type;
546 }
547
548 /* build INTEGER_CST tree with type TREE_TYPE and value according to LAT.
549 Return the tree. */
550 static tree
551 build_const_val (struct ipcp_lattice *lat, tree tree_type)
552 {
553 tree val;
554
555 gcc_assert (ipcp_lat_is_const (lat));
556 val = lat->constant;
557
558 if (!useless_type_conversion_p (tree_type, TREE_TYPE (val)))
559 {
560 if (fold_convertible_p (tree_type, val))
561 return fold_build1 (NOP_EXPR, tree_type, val);
562 else
563 return fold_build1 (VIEW_CONVERT_EXPR, tree_type, val);
564 }
565 return val;
566 }
567
568 /* Compute the proper scale for NODE. It is the ratio between the number of
569 direct calls (represented on the incoming cgraph_edges) and sum of all
570 invocations of NODE (represented as count in cgraph_node).
571
572 FIXME: This code is wrong. Since the callers can be also clones and
573 the clones are not scaled yet, the sums gets unrealistically high.
574 To properly compute the counts, we would need to do propagation across
575 callgraph (as external call to A might imply call to non-clonned B
576 if A's clone calls clonned B). */
577 static void
578 ipcp_compute_node_scale (struct cgraph_node *node)
579 {
580 gcov_type sum;
581 struct cgraph_edge *cs;
582
583 sum = 0;
584 /* Compute sum of all counts of callers. */
585 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
586 sum += cs->count;
587 /* Work around the unrealistically high sum problem. We just don't want
588 the non-cloned body to have negative or very low frequency. Since
589 majority of execution time will be spent in clones anyway, this should
590 give good enough profile. */
591 if (sum > node->count * 9 / 10)
592 sum = node->count * 9 / 10;
593 if (node->count == 0)
594 ipcp_set_node_scale (node, 0);
595 else
596 ipcp_set_node_scale (node, sum * REG_BR_PROB_BASE / node->count);
597 }
598
599 /* Initialization and computation of IPCP data structures. This is the initial
600 intraprocedural analysis of functions, which gathers information to be
601 propagated later on. */
602
603 static void
604 ipcp_init_stage (void)
605 {
606 struct cgraph_node *node;
607
608 for (node = cgraph_nodes; node; node = node->next)
609 if (node->analyzed)
610 {
611 /* Unreachable nodes should have been eliminated before ipcp. */
612 gcc_assert (node->needed || node->reachable);
613
614 node->local.versionable = tree_versionable_function_p (node->decl);
615 ipa_analyze_node (node);
616 }
617 }
618
619 /* Return true if there are some formal parameters whose value is IPA_TOP (in
620 the whole compilation unit). Change their values to IPA_BOTTOM, since they
621 most probably get their values from outside of this compilation unit. */
622 static bool
623 ipcp_change_tops_to_bottom (void)
624 {
625 int i, count;
626 struct cgraph_node *node;
627 bool prop_again;
628
629 prop_again = false;
630 for (node = cgraph_nodes; node; node = node->next)
631 {
632 struct ipa_node_params *info = IPA_NODE_REF (node);
633 count = ipa_get_param_count (info);
634 for (i = 0; i < count; i++)
635 {
636 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
637 if (lat->type == IPA_TOP)
638 {
639 prop_again = true;
640 if (dump_file)
641 {
642 fprintf (dump_file, "Forcing param ");
643 print_generic_expr (dump_file, ipa_get_param (info, i), 0);
644 fprintf (dump_file, " of node %s to bottom.\n",
645 cgraph_node_name (node));
646 }
647 lat->type = IPA_BOTTOM;
648 }
649 }
650 }
651 return prop_again;
652 }
653
654 /* Interprocedural analysis. The algorithm propagates constants from the
655 caller's parameters to the callee's arguments. */
656 static void
657 ipcp_propagate_stage (void)
658 {
659 int i;
660 struct ipcp_lattice inc_lat = { IPA_BOTTOM, NULL };
661 struct ipcp_lattice new_lat = { IPA_BOTTOM, NULL };
662 struct ipcp_lattice *dest_lat;
663 struct cgraph_edge *cs;
664 struct ipa_jump_func *jump_func;
665 struct ipa_func_list *wl;
666 int count;
667
668 ipa_check_create_node_params ();
669 ipa_check_create_edge_args ();
670
671 /* Initialize worklist to contain all functions. */
672 wl = ipa_init_func_list ();
673 while (wl)
674 {
675 struct cgraph_node *node = ipa_pop_func_from_list (&wl);
676 struct ipa_node_params *info = IPA_NODE_REF (node);
677
678 for (cs = node->callees; cs; cs = cs->next_callee)
679 {
680 struct ipa_node_params *callee_info = IPA_NODE_REF (cs->callee);
681 struct ipa_edge_args *args = IPA_EDGE_REF (cs);
682
683 if (ipa_is_called_with_var_arguments (callee_info)
684 || !cs->callee->analyzed
685 || ipa_is_called_with_var_arguments (callee_info))
686 continue;
687
688 count = ipa_get_cs_argument_count (args);
689 for (i = 0; i < count; i++)
690 {
691 jump_func = ipa_get_ith_jump_func (args, i);
692 ipcp_lattice_from_jfunc (info, &inc_lat, jump_func);
693 dest_lat = ipcp_get_lattice (callee_info, i);
694 ipa_lattice_meet (&new_lat, &inc_lat, dest_lat);
695 if (ipcp_lattice_changed (&new_lat, dest_lat))
696 {
697 dest_lat->type = new_lat.type;
698 dest_lat->constant = new_lat.constant;
699 ipa_push_func_to_list (&wl, cs->callee);
700 }
701 }
702 }
703 }
704 }
705
706 /* Call the constant propagation algorithm and re-call it if necessary
707 (if there are undetermined values left). */
708 static void
709 ipcp_iterate_stage (void)
710 {
711 struct cgraph_node *node;
712 n_cloning_candidates = 0;
713
714 if (dump_file)
715 fprintf (dump_file, "\nIPA iterate stage:\n\n");
716
717 if (in_lto_p)
718 ipa_update_after_lto_read ();
719
720 for (node = cgraph_nodes; node; node = node->next)
721 {
722 ipcp_initialize_node_lattices (node);
723 ipcp_compute_node_scale (node);
724 }
725 if (dump_file && (dump_flags & TDF_DETAILS))
726 {
727 ipcp_print_all_lattices (dump_file);
728 ipcp_function_scale_print (dump_file);
729 }
730
731 ipcp_propagate_stage ();
732 if (ipcp_change_tops_to_bottom ())
733 /* Some lattices have changed from IPA_TOP to IPA_BOTTOM.
734 This change should be propagated. */
735 {
736 gcc_assert (n_cloning_candidates);
737 ipcp_propagate_stage ();
738 }
739 if (dump_file)
740 {
741 fprintf (dump_file, "\nIPA lattices after propagation:\n");
742 ipcp_print_all_lattices (dump_file);
743 if (dump_flags & TDF_DETAILS)
744 ipcp_print_profile_data (dump_file);
745 }
746 }
747
748 /* Check conditions to forbid constant insertion to function described by
749 NODE. */
750 static inline bool
751 ipcp_node_modifiable_p (struct cgraph_node *node)
752 {
753 /* Once we will be able to do in-place replacement, we can be more
754 lax here. */
755 return ipcp_versionable_function_p (node);
756 }
757
758 /* Print count scale data structures. */
759 static void
760 ipcp_function_scale_print (FILE * f)
761 {
762 struct cgraph_node *node;
763
764 for (node = cgraph_nodes; node; node = node->next)
765 {
766 if (!node->analyzed)
767 continue;
768 fprintf (f, "printing scale for %s: ", cgraph_node_name (node));
769 fprintf (f, "value is " HOST_WIDE_INT_PRINT_DEC
770 " \n", (HOST_WIDE_INT) ipcp_get_node_scale (node));
771 }
772 }
773
774 /* Print counts of all cgraph nodes. */
775 static void
776 ipcp_print_func_profile_counts (FILE * f)
777 {
778 struct cgraph_node *node;
779
780 for (node = cgraph_nodes; node; node = node->next)
781 {
782 fprintf (f, "function %s: ", cgraph_node_name (node));
783 fprintf (f, "count is " HOST_WIDE_INT_PRINT_DEC
784 " \n", (HOST_WIDE_INT) node->count);
785 }
786 }
787
788 /* Print counts of all cgraph edges. */
789 static void
790 ipcp_print_call_profile_counts (FILE * f)
791 {
792 struct cgraph_node *node;
793 struct cgraph_edge *cs;
794
795 for (node = cgraph_nodes; node; node = node->next)
796 {
797 for (cs = node->callees; cs; cs = cs->next_callee)
798 {
799 fprintf (f, "%s -> %s ", cgraph_node_name (cs->caller),
800 cgraph_node_name (cs->callee));
801 fprintf (f, "count is " HOST_WIDE_INT_PRINT_DEC " \n",
802 (HOST_WIDE_INT) cs->count);
803 }
804 }
805 }
806
807 /* Print profile info for all functions. */
808 static void
809 ipcp_print_profile_data (FILE * f)
810 {
811 fprintf (f, "\nNODE COUNTS :\n");
812 ipcp_print_func_profile_counts (f);
813 fprintf (f, "\nCS COUNTS stage:\n");
814 ipcp_print_call_profile_counts (f);
815 }
816
817 /* Build and initialize ipa_replace_map struct according to LAT. This struct is
818 processed by versioning, which operates according to the flags set.
819 PARM_TREE is the formal parameter found to be constant. LAT represents the
820 constant. */
821 static struct ipa_replace_map *
822 ipcp_create_replace_map (tree parm_tree, struct ipcp_lattice *lat)
823 {
824 struct ipa_replace_map *replace_map;
825 tree const_val;
826
827 replace_map = ggc_alloc_ipa_replace_map ();
828 const_val = build_const_val (lat, TREE_TYPE (parm_tree));
829 if (dump_file)
830 {
831 fprintf (dump_file, " replacing param ");
832 print_generic_expr (dump_file, parm_tree, 0);
833 fprintf (dump_file, " with const ");
834 print_generic_expr (dump_file, const_val, 0);
835 fprintf (dump_file, "\n");
836 }
837 replace_map->old_tree = parm_tree;
838 replace_map->new_tree = const_val;
839 replace_map->replace_p = true;
840 replace_map->ref_p = false;
841
842 return replace_map;
843 }
844
845 /* Return true if this callsite should be redirected to the original callee
846 (instead of the cloned one). */
847 static bool
848 ipcp_need_redirect_p (struct cgraph_edge *cs)
849 {
850 struct ipa_node_params *orig_callee_info;
851 int i, count;
852 struct ipa_jump_func *jump_func;
853 struct cgraph_node *node = cs->callee, *orig;
854
855 if (!n_cloning_candidates)
856 return false;
857
858 if ((orig = ipcp_get_orig_node (node)) != NULL)
859 node = orig;
860 if (ipcp_get_orig_node (cs->caller))
861 return false;
862
863 orig_callee_info = IPA_NODE_REF (node);
864 count = ipa_get_param_count (orig_callee_info);
865 for (i = 0; i < count; i++)
866 {
867 struct ipcp_lattice *lat = ipcp_get_lattice (orig_callee_info, i);
868 if (ipcp_lat_is_const (lat))
869 {
870 jump_func = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i);
871 if (jump_func->type != IPA_JF_CONST)
872 return true;
873 }
874 }
875
876 return false;
877 }
878
879 /* Fix the callsites and the call graph after function cloning was done. */
880 static void
881 ipcp_update_callgraph (void)
882 {
883 struct cgraph_node *node;
884
885 for (node = cgraph_nodes; node; node = node->next)
886 if (node->analyzed && ipcp_node_is_clone (node))
887 {
888 bitmap args_to_skip = BITMAP_ALLOC (NULL);
889 struct cgraph_node *orig_node = ipcp_get_orig_node (node);
890 struct ipa_node_params *info = IPA_NODE_REF (orig_node);
891 int i, count = ipa_get_param_count (info);
892 struct cgraph_edge *cs, *next;
893
894 for (i = 0; i < count; i++)
895 {
896 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
897
898 /* We can proactively remove obviously unused arguments. */
899 if (!ipa_is_param_used (info, i))
900 {
901 bitmap_set_bit (args_to_skip, i);
902 continue;
903 }
904
905 if (lat->type == IPA_CONST_VALUE)
906 bitmap_set_bit (args_to_skip, i);
907 }
908 for (cs = node->callers; cs; cs = next)
909 {
910 next = cs->next_caller;
911 if (!ipcp_node_is_clone (cs->caller) && ipcp_need_redirect_p (cs))
912 cgraph_redirect_edge_callee (cs, orig_node);
913 }
914 }
915 }
916
917 /* Update profiling info for versioned functions and the functions they were
918 versioned from. */
919 static void
920 ipcp_update_profiling (void)
921 {
922 struct cgraph_node *node, *orig_node;
923 gcov_type scale, scale_complement;
924 struct cgraph_edge *cs;
925
926 for (node = cgraph_nodes; node; node = node->next)
927 {
928 if (ipcp_node_is_clone (node))
929 {
930 orig_node = ipcp_get_orig_node (node);
931 scale = ipcp_get_node_scale (orig_node);
932 node->count = orig_node->count * scale / REG_BR_PROB_BASE;
933 scale_complement = REG_BR_PROB_BASE - scale;
934 orig_node->count =
935 orig_node->count * scale_complement / REG_BR_PROB_BASE;
936 for (cs = node->callees; cs; cs = cs->next_callee)
937 cs->count = cs->count * scale / REG_BR_PROB_BASE;
938 for (cs = orig_node->callees; cs; cs = cs->next_callee)
939 cs->count = cs->count * scale_complement / REG_BR_PROB_BASE;
940 }
941 }
942 }
943
944 /* If NODE was cloned, how much would program grow? */
945 static long
946 ipcp_estimate_growth (struct cgraph_node *node)
947 {
948 struct cgraph_edge *cs;
949 int redirectable_node_callers = 0;
950 int removable_args = 0;
951 bool need_original = !cgraph_only_called_directly_p (node);
952 struct ipa_node_params *info;
953 int i, count;
954 int growth;
955
956 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
957 if (cs->caller == node || !ipcp_need_redirect_p (cs))
958 redirectable_node_callers++;
959 else
960 need_original = true;
961
962 /* If we will be able to fully replace orignal node, we never increase
963 program size. */
964 if (!need_original)
965 return 0;
966
967 info = IPA_NODE_REF (node);
968 count = ipa_get_param_count (info);
969 for (i = 0; i < count; i++)
970 {
971 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
972
973 /* We can proactively remove obviously unused arguments. */
974 if (!ipa_is_param_used (info, i))
975 removable_args++;
976
977 if (lat->type == IPA_CONST_VALUE)
978 removable_args++;
979 }
980
981 /* We make just very simple estimate of savings for removal of operand from
982 call site. Precise cost is dificult to get, as our size metric counts
983 constants and moves as free. Generally we are looking for cases that
984 small function is called very many times. */
985 growth = node->local.inline_summary.self_size
986 - removable_args * redirectable_node_callers;
987 if (growth < 0)
988 return 0;
989 return growth;
990 }
991
992
993 /* Estimate cost of cloning NODE. */
994 static long
995 ipcp_estimate_cloning_cost (struct cgraph_node *node)
996 {
997 int freq_sum = 1;
998 gcov_type count_sum = 1;
999 struct cgraph_edge *e;
1000 int cost;
1001
1002 cost = ipcp_estimate_growth (node) * 1000;
1003 if (!cost)
1004 {
1005 if (dump_file)
1006 fprintf (dump_file, "Versioning of %s will save code size\n",
1007 cgraph_node_name (node));
1008 return 0;
1009 }
1010
1011 for (e = node->callers; e; e = e->next_caller)
1012 if (!bitmap_bit_p (dead_nodes, e->caller->uid)
1013 && !ipcp_need_redirect_p (e))
1014 {
1015 count_sum += e->count;
1016 freq_sum += e->frequency + 1;
1017 }
1018
1019 if (max_count)
1020 cost /= count_sum * 1000 / max_count + 1;
1021 else
1022 cost /= freq_sum * 1000 / REG_BR_PROB_BASE + 1;
1023 if (dump_file)
1024 fprintf (dump_file, "Cost of versioning %s is %i, (size: %i, freq: %i)\n",
1025 cgraph_node_name (node), cost, node->local.inline_summary.self_size,
1026 freq_sum);
1027 return cost + 1;
1028 }
1029
1030 /* Return number of live constant parameters. */
1031 static int
1032 ipcp_const_param_count (struct cgraph_node *node)
1033 {
1034 int const_param = 0;
1035 struct ipa_node_params *info = IPA_NODE_REF (node);
1036 int count = ipa_get_param_count (info);
1037 int i;
1038
1039 for (i = 0; i < count; i++)
1040 {
1041 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
1042 if (ipcp_lat_is_insertable (lat)
1043 /* Do not count obviously unused arguments. */
1044 && ipa_is_param_used (info, i))
1045 const_param++;
1046 }
1047 return const_param;
1048 }
1049
1050 /* Propagate the constant parameters found by ipcp_iterate_stage()
1051 to the function's code. */
1052 static void
1053 ipcp_insert_stage (void)
1054 {
1055 struct cgraph_node *node, *node1 = NULL;
1056 int i;
1057 VEC (cgraph_edge_p, heap) * redirect_callers;
1058 VEC (ipa_replace_map_p,gc)* replace_trees;
1059 int node_callers, count;
1060 tree parm_tree;
1061 struct ipa_replace_map *replace_param;
1062 fibheap_t heap;
1063 long overall_size = 0, new_size = 0;
1064 long max_new_size;
1065
1066 ipa_check_create_node_params ();
1067 ipa_check_create_edge_args ();
1068 if (dump_file)
1069 fprintf (dump_file, "\nIPA insert stage:\n\n");
1070
1071 dead_nodes = BITMAP_ALLOC (NULL);
1072
1073 for (node = cgraph_nodes; node; node = node->next)
1074 if (node->analyzed)
1075 {
1076 if (node->count > max_count)
1077 max_count = node->count;
1078 overall_size += node->local.inline_summary.self_size;
1079 }
1080
1081 max_new_size = overall_size;
1082 if (max_new_size < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1083 max_new_size = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1084 max_new_size = max_new_size * PARAM_VALUE (PARAM_IPCP_UNIT_GROWTH) / 100 + 1;
1085
1086 /* First collect all functions we proved to have constant arguments to heap. */
1087 heap = fibheap_new ();
1088 for (node = cgraph_nodes; node; node = node->next)
1089 {
1090 struct ipa_node_params *info;
1091 /* Propagation of the constant is forbidden in certain conditions. */
1092 if (!node->analyzed || !ipcp_node_modifiable_p (node))
1093 continue;
1094 info = IPA_NODE_REF (node);
1095 if (ipa_is_called_with_var_arguments (info))
1096 continue;
1097 if (ipcp_const_param_count (node))
1098 node->aux = fibheap_insert (heap, ipcp_estimate_cloning_cost (node), node);
1099 }
1100
1101 /* Now clone in priority order until code size growth limits are met or
1102 heap is emptied. */
1103 while (!fibheap_empty (heap))
1104 {
1105 struct ipa_node_params *info;
1106 int growth = 0;
1107 bitmap args_to_skip;
1108 struct cgraph_edge *cs;
1109
1110 node = (struct cgraph_node *)fibheap_extract_min (heap);
1111 node->aux = NULL;
1112 if (dump_file)
1113 fprintf (dump_file, "considering function %s\n",
1114 cgraph_node_name (node));
1115
1116 growth = ipcp_estimate_growth (node);
1117
1118 if (new_size + growth > max_new_size)
1119 break;
1120 if (growth
1121 && optimize_function_for_size_p (DECL_STRUCT_FUNCTION (node->decl)))
1122 {
1123 if (dump_file)
1124 fprintf (dump_file, "Not versioning, cold code would grow");
1125 continue;
1126 }
1127
1128 new_size += growth;
1129
1130 /* Look if original function becomes dead after clonning. */
1131 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
1132 if (cs->caller == node || ipcp_need_redirect_p (cs))
1133 break;
1134 if (!cs && cgraph_only_called_directly_p (node))
1135 bitmap_set_bit (dead_nodes, node->uid);
1136
1137 info = IPA_NODE_REF (node);
1138 count = ipa_get_param_count (info);
1139
1140 replace_trees = VEC_alloc (ipa_replace_map_p, gc, 1);
1141 args_to_skip = BITMAP_GGC_ALLOC ();
1142 for (i = 0; i < count; i++)
1143 {
1144 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
1145 parm_tree = ipa_get_param (info, i);
1146
1147 /* We can proactively remove obviously unused arguments. */
1148 if (!ipa_is_param_used (info, i))
1149 {
1150 bitmap_set_bit (args_to_skip, i);
1151 continue;
1152 }
1153
1154 if (lat->type == IPA_CONST_VALUE)
1155 {
1156 replace_param =
1157 ipcp_create_replace_map (parm_tree, lat);
1158 VEC_safe_push (ipa_replace_map_p, gc, replace_trees, replace_param);
1159 bitmap_set_bit (args_to_skip, i);
1160 }
1161 }
1162
1163 /* Compute how many callers node has. */
1164 node_callers = 0;
1165 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
1166 node_callers++;
1167 redirect_callers = VEC_alloc (cgraph_edge_p, heap, node_callers);
1168 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
1169 VEC_quick_push (cgraph_edge_p, redirect_callers, cs);
1170
1171 /* Redirecting all the callers of the node to the
1172 new versioned node. */
1173 node1 =
1174 cgraph_create_virtual_clone (node, redirect_callers, replace_trees,
1175 args_to_skip, "constprop");
1176 args_to_skip = NULL;
1177 VEC_free (cgraph_edge_p, heap, redirect_callers);
1178 replace_trees = NULL;
1179
1180 if (node1 == NULL)
1181 continue;
1182 if (dump_file)
1183 fprintf (dump_file, "versioned function %s with growth %i, overall %i\n",
1184 cgraph_node_name (node), (int)growth, (int)new_size);
1185 ipcp_init_cloned_node (node, node1);
1186
1187 /* TODO: We can use indirect inlning info to produce new calls. */
1188
1189 if (dump_file)
1190 dump_function_to_file (node1->decl, dump_file, dump_flags);
1191
1192 for (cs = node->callees; cs; cs = cs->next_callee)
1193 if (cs->callee->aux)
1194 {
1195 fibheap_delete_node (heap, (fibnode_t) cs->callee->aux);
1196 cs->callee->aux = fibheap_insert (heap,
1197 ipcp_estimate_cloning_cost (cs->callee),
1198 cs->callee);
1199 }
1200 }
1201
1202 while (!fibheap_empty (heap))
1203 {
1204 if (dump_file)
1205 fprintf (dump_file, "skipping function %s\n",
1206 cgraph_node_name (node));
1207 node = (struct cgraph_node *) fibheap_extract_min (heap);
1208 node->aux = NULL;
1209 }
1210 fibheap_delete (heap);
1211 BITMAP_FREE (dead_nodes);
1212 ipcp_update_callgraph ();
1213 ipcp_update_profiling ();
1214 }
1215
1216 /* The IPCP driver. */
1217 static unsigned int
1218 ipcp_driver (void)
1219 {
1220 cgraph_remove_unreachable_nodes (true,dump_file);
1221 if (dump_file)
1222 {
1223 fprintf (dump_file, "\nIPA structures before propagation:\n");
1224 if (dump_flags & TDF_DETAILS)
1225 ipa_print_all_params (dump_file);
1226 ipa_print_all_jump_functions (dump_file);
1227 }
1228 /* 2. Do the interprocedural propagation. */
1229 ipcp_iterate_stage ();
1230 /* 3. Insert the constants found to the functions. */
1231 ipcp_insert_stage ();
1232 if (dump_file && (dump_flags & TDF_DETAILS))
1233 {
1234 fprintf (dump_file, "\nProfiling info after insert stage:\n");
1235 ipcp_print_profile_data (dump_file);
1236 }
1237 /* Free all IPCP structures. */
1238 ipa_free_all_structures_after_ipa_cp ();
1239 if (dump_file)
1240 fprintf (dump_file, "\nIPA constant propagation end\n");
1241 return 0;
1242 }
1243
1244 /* Note function body size. */
1245 static void
1246 ipcp_generate_summary (void)
1247 {
1248 if (dump_file)
1249 fprintf (dump_file, "\nIPA constant propagation start:\n");
1250 ipa_check_create_node_params ();
1251 ipa_check_create_edge_args ();
1252 ipa_register_cgraph_hooks ();
1253 /* 1. Call the init stage to initialize
1254 the ipa_node_params and ipa_edge_args structures. */
1255 ipcp_init_stage ();
1256 }
1257
1258 /* Write ipcp summary for nodes in SET. */
1259 static void
1260 ipcp_write_summary (cgraph_node_set set,
1261 varpool_node_set vset ATTRIBUTE_UNUSED)
1262 {
1263 ipa_prop_write_jump_functions (set);
1264 }
1265
1266 /* Read ipcp summary. */
1267 static void
1268 ipcp_read_summary (void)
1269 {
1270 ipa_prop_read_jump_functions ();
1271 }
1272
1273 /* Gate for IPCP optimization. */
1274 static bool
1275 cgraph_gate_cp (void)
1276 {
1277 return flag_ipa_cp;
1278 }
1279
1280 struct ipa_opt_pass_d pass_ipa_cp =
1281 {
1282 {
1283 IPA_PASS,
1284 "cp", /* name */
1285 cgraph_gate_cp, /* gate */
1286 ipcp_driver, /* execute */
1287 NULL, /* sub */
1288 NULL, /* next */
1289 0, /* static_pass_number */
1290 TV_IPA_CONSTANT_PROP, /* tv_id */
1291 0, /* properties_required */
1292 0, /* properties_provided */
1293 0, /* properties_destroyed */
1294 0, /* todo_flags_start */
1295 TODO_dump_cgraph | TODO_dump_func |
1296 TODO_remove_functions | TODO_ggc_collect /* todo_flags_finish */
1297 },
1298 ipcp_generate_summary, /* generate_summary */
1299 ipcp_write_summary, /* write_summary */
1300 ipcp_read_summary, /* read_summary */
1301 NULL, /* write_optimization_summary */
1302 NULL, /* read_optimization_summary */
1303 NULL, /* stmt_fixup */
1304 0, /* TODOs */
1305 NULL, /* function_transform */
1306 NULL, /* variable_transform */
1307 };