1 /* Interprocedural constant propagation
2 Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* Interprocedural constant propagation. The aim of interprocedural constant
23 propagation (IPCP) is to find which function's argument has the same
24 constant value in each invocation throughout the whole program. For example,
25 consider the following program:
29 printf ("value is %d",y);
49 The IPCP algorithm will find that g's formal argument y is always called
52 The algorithm used is based on "Interprocedural Constant Propagation", by
53 Challahan David, Keith D Cooper, Ken Kennedy, Linda Torczon, Comp86, pg
56 The optimization is divided into three stages:
58 First stage - intraprocedural analysis
59 =======================================
60 This phase computes jump_function and modification flags.
62 A jump function for a callsite represents the values passed as an actual
63 arguments of a given callsite. There are three types of values:
64 Pass through - the caller's formal parameter is passed as an actual argument.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
68 The jump function info, ipa_jump_func, is stored in ipa_edge_args
69 structure (defined in ipa_prop.h and pointed to by cgraph_node->aux)
70 modified_flags are defined in ipa_node_params structure
71 (defined in ipa_prop.h and pointed to by cgraph_edge->aux).
73 -ipcp_init_stage() is the first stage driver.
75 Second stage - interprocedural analysis
76 ========================================
77 This phase does the interprocedural constant propagation.
78 It computes lattices for all formal parameters in the program
79 and their value that may be:
81 BOTTOM - non constant.
82 CONSTANT - constant value.
84 Lattice describing a formal parameter p will have a constant value if all
85 callsites invoking this function have the same constant value passed to p.
87 The lattices are stored in ipcp_lattice which is itself in ipa_node_params
88 structure (defined in ipa_prop.h and pointed to by cgraph_edge->aux).
90 -ipcp_iterate_stage() is the second stage driver.
92 Third phase - transformation of function code
93 ============================================
94 Propagates the constant-valued formals into the function.
95 For each function whose parameters are constants, we create its clone.
97 Then we process the clone in two ways:
98 1. We insert an assignment statement 'parameter = const' at the beginning
99 of the cloned function.
100 2. For read-only parameters that do not live in memory, we replace all their
101 uses with the constant.
103 We also need to modify some callsites to call the cloned functions instead
104 of the original ones. For a callsite passing an argument found to be a
105 constant by IPCP, there are two different cases to handle:
106 1. A constant is passed as an argument. In this case the callsite in the
107 should be redirected to call the cloned callee.
108 2. A parameter (of the caller) passed as an argument (pass through
109 argument). In such cases both the caller and the callee have clones and
110 only the callsite in the cloned caller is redirected to call to the
113 This update is done in two steps: First all cloned functions are created
114 during a traversal of the call graph, during which all callsites are
115 redirected to call the cloned function. Then the callsites are traversed
116 and many calls redirected back to fit the description above.
118 -ipcp_insert_stage() is the third phase driver.
124 #include "coretypes.h"
128 #include "ipa-prop.h"
129 #include "tree-flow.h"
130 #include "tree-pass.h"
133 #include "diagnostic.h"
134 #include "tree-pretty-print.h"
135 #include "tree-dump.h"
136 #include "tree-inline.h"
140 /* Number of functions identified as candidates for cloning. When not cloning
141 we can simplify iterate stage not forcing it to go through the decision
142 on what is profitable and what not. */
143 static int n_cloning_candidates
;
145 /* Maximal count found in program. */
146 static gcov_type max_count
;
148 /* Cgraph nodes that has been completely replaced by cloning during iterate
149 * stage and will be removed after ipcp is finished. */
150 static bitmap dead_nodes
;
152 static void ipcp_print_profile_data (FILE *);
153 static void ipcp_function_scale_print (FILE *);
155 /* Get the original node field of ipa_node_params associated with node NODE. */
156 static inline struct cgraph_node
*
157 ipcp_get_orig_node (struct cgraph_node
*node
)
159 return IPA_NODE_REF (node
)->ipcp_orig_node
;
162 /* Return true if NODE describes a cloned/versioned function. */
164 ipcp_node_is_clone (struct cgraph_node
*node
)
166 return (ipcp_get_orig_node (node
) != NULL
);
169 /* Create ipa_node_params and its data structures for NEW_NODE. Set ORIG_NODE
170 as the ipcp_orig_node field in ipa_node_params. */
172 ipcp_init_cloned_node (struct cgraph_node
*orig_node
,
173 struct cgraph_node
*new_node
)
175 ipa_check_create_node_params ();
176 ipa_initialize_node_params (new_node
);
177 IPA_NODE_REF (new_node
)->ipcp_orig_node
= orig_node
;
180 /* Return scale for NODE. */
181 static inline gcov_type
182 ipcp_get_node_scale (struct cgraph_node
*node
)
184 return IPA_NODE_REF (node
)->count_scale
;
187 /* Set COUNT as scale for NODE. */
189 ipcp_set_node_scale (struct cgraph_node
*node
, gcov_type count
)
191 IPA_NODE_REF (node
)->count_scale
= count
;
194 /* Return whether LAT is a constant lattice. */
196 ipcp_lat_is_const (struct ipcp_lattice
*lat
)
198 if (lat
->type
== IPA_CONST_VALUE
)
204 /* Return whether LAT is a constant lattice that ipa-cp can actually insert
205 into the code (i.e. constants excluding member pointers and pointers). */
207 ipcp_lat_is_insertable (struct ipcp_lattice
*lat
)
209 return lat
->type
== IPA_CONST_VALUE
;
212 /* Return true if LAT1 and LAT2 are equal. */
214 ipcp_lats_are_equal (struct ipcp_lattice
*lat1
, struct ipcp_lattice
*lat2
)
216 gcc_assert (ipcp_lat_is_const (lat1
) && ipcp_lat_is_const (lat2
));
217 if (lat1
->type
!= lat2
->type
)
220 if (TREE_CODE (lat1
->constant
) == ADDR_EXPR
221 && TREE_CODE (lat2
->constant
) == ADDR_EXPR
222 && TREE_CODE (TREE_OPERAND (lat1
->constant
, 0)) == CONST_DECL
223 && TREE_CODE (TREE_OPERAND (lat2
->constant
, 0)) == CONST_DECL
)
224 return operand_equal_p (DECL_INITIAL (TREE_OPERAND (lat1
->constant
, 0)),
225 DECL_INITIAL (TREE_OPERAND (lat2
->constant
, 0)), 0);
227 return operand_equal_p (lat1
->constant
, lat2
->constant
, 0);
230 /* Compute Meet arithmetics:
231 Meet (IPA_BOTTOM, x) = IPA_BOTTOM
233 Meet (const_a,const_b) = IPA_BOTTOM, if const_a != const_b.
234 MEET (const_a,const_b) = const_a, if const_a == const_b.*/
236 ipa_lattice_meet (struct ipcp_lattice
*res
, struct ipcp_lattice
*lat1
,
237 struct ipcp_lattice
*lat2
)
239 if (lat1
->type
== IPA_BOTTOM
|| lat2
->type
== IPA_BOTTOM
)
241 res
->type
= IPA_BOTTOM
;
244 if (lat1
->type
== IPA_TOP
)
246 res
->type
= lat2
->type
;
247 res
->constant
= lat2
->constant
;
250 if (lat2
->type
== IPA_TOP
)
252 res
->type
= lat1
->type
;
253 res
->constant
= lat1
->constant
;
256 if (!ipcp_lats_are_equal (lat1
, lat2
))
258 res
->type
= IPA_BOTTOM
;
261 res
->type
= lat1
->type
;
262 res
->constant
= lat1
->constant
;
265 /* Return the lattice corresponding to the Ith formal parameter of the function
266 described by INFO. */
267 static inline struct ipcp_lattice
*
268 ipcp_get_lattice (struct ipa_node_params
*info
, int i
)
270 return &(info
->params
[i
].ipcp_lattice
);
273 /* Given the jump function JFUNC, compute the lattice LAT that describes the
274 value coming down the callsite. INFO describes the caller node so that
275 pass-through jump functions can be evaluated. */
277 ipcp_lattice_from_jfunc (struct ipa_node_params
*info
, struct ipcp_lattice
*lat
,
278 struct ipa_jump_func
*jfunc
)
280 if (jfunc
->type
== IPA_JF_CONST
)
282 lat
->type
= IPA_CONST_VALUE
;
283 lat
->constant
= jfunc
->value
.constant
;
285 else if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
287 struct ipcp_lattice
*caller_lat
;
290 caller_lat
= ipcp_get_lattice (info
, jfunc
->value
.pass_through
.formal_id
);
291 lat
->type
= caller_lat
->type
;
292 if (caller_lat
->type
!= IPA_CONST_VALUE
)
294 cst
= caller_lat
->constant
;
296 if (jfunc
->value
.pass_through
.operation
!= NOP_EXPR
)
299 if (TREE_CODE_CLASS (jfunc
->value
.pass_through
.operation
)
301 restype
= boolean_type_node
;
303 restype
= TREE_TYPE (cst
);
304 cst
= fold_binary (jfunc
->value
.pass_through
.operation
,
305 restype
, cst
, jfunc
->value
.pass_through
.operand
);
307 if (!cst
|| !is_gimple_ip_invariant (cst
))
308 lat
->type
= IPA_BOTTOM
;
311 else if (jfunc
->type
== IPA_JF_ANCESTOR
)
313 struct ipcp_lattice
*caller_lat
;
317 caller_lat
= ipcp_get_lattice (info
, jfunc
->value
.ancestor
.formal_id
);
318 lat
->type
= caller_lat
->type
;
319 if (caller_lat
->type
!= IPA_CONST_VALUE
)
321 if (TREE_CODE (caller_lat
->constant
) != ADDR_EXPR
)
323 /* This can happen when the constant is a NULL pointer. */
324 lat
->type
= IPA_BOTTOM
;
327 t
= TREE_OPERAND (caller_lat
->constant
, 0);
328 ok
= build_ref_for_offset (&t
, TREE_TYPE (t
),
329 jfunc
->value
.ancestor
.offset
,
330 jfunc
->value
.ancestor
.type
, false);
333 lat
->type
= IPA_BOTTOM
;
334 lat
->constant
= NULL_TREE
;
337 lat
->constant
= build_fold_addr_expr (t
);
340 lat
->type
= IPA_BOTTOM
;
343 /* True when OLD_LAT and NEW_LAT values are not the same. */
346 ipcp_lattice_changed (struct ipcp_lattice
*old_lat
,
347 struct ipcp_lattice
*new_lat
)
349 if (old_lat
->type
== new_lat
->type
)
351 if (!ipcp_lat_is_const (old_lat
))
353 if (ipcp_lats_are_equal (old_lat
, new_lat
))
359 /* Print all ipcp_lattices of all functions to F. */
361 ipcp_print_all_lattices (FILE * f
)
363 struct cgraph_node
*node
;
366 fprintf (f
, "\nLattice:\n");
367 for (node
= cgraph_nodes
; node
; node
= node
->next
)
369 struct ipa_node_params
*info
;
373 info
= IPA_NODE_REF (node
);
374 fprintf (f
, " Node: %s:\n", cgraph_node_name (node
));
375 count
= ipa_get_param_count (info
);
376 for (i
= 0; i
< count
; i
++)
378 struct ipcp_lattice
*lat
= ipcp_get_lattice (info
, i
);
380 fprintf (f
, " param [%d]: ", i
);
381 if (lat
->type
== IPA_CONST_VALUE
)
383 tree cst
= lat
->constant
;
384 fprintf (f
, "type is CONST ");
385 print_generic_expr (f
, cst
, 0);
386 if (TREE_CODE (cst
) == ADDR_EXPR
387 && TREE_CODE (TREE_OPERAND (cst
, 0)) == CONST_DECL
)
390 print_generic_expr (f
, DECL_INITIAL (TREE_OPERAND (cst
, 0)),
395 else if (lat
->type
== IPA_TOP
)
396 fprintf (f
, "type is TOP\n");
398 fprintf (f
, "type is BOTTOM\n");
403 /* Return true if ipcp algorithms would allow cloning NODE. */
406 ipcp_versionable_function_p (struct cgraph_node
*node
)
408 struct cgraph_edge
*edge
;
410 /* There are a number of generic reasons functions cannot be versioned. */
411 if (!node
->local
.versionable
)
414 /* Removing arguments doesn't work if the function takes varargs
415 or use __builtin_apply_args. */
416 for (edge
= node
->callees
; edge
; edge
= edge
->next_callee
)
418 tree t
= edge
->callee
->decl
;
419 if (DECL_BUILT_IN_CLASS (t
) == BUILT_IN_NORMAL
420 && (DECL_FUNCTION_CODE (t
) == BUILT_IN_APPLY_ARGS
421 || DECL_FUNCTION_CODE (t
) == BUILT_IN_VA_START
))
428 /* Return true if this NODE is viable candidate for cloning. */
430 ipcp_cloning_candidate_p (struct cgraph_node
*node
)
434 gcov_type direct_call_sum
= 0;
435 struct cgraph_edge
*e
;
437 /* We never clone functions that are not visible from outside.
438 FIXME: in future we should clone such functions when they are called with
439 different constants, but current ipcp implementation is not good on this.
441 if (cgraph_only_called_directly_p (node
) || !node
->analyzed
)
444 if (cgraph_function_body_availability (node
) <= AVAIL_OVERWRITABLE
)
447 fprintf (dump_file
, "Not considering %s for cloning; body is overwrittable.\n",
448 cgraph_node_name (node
));
451 if (!ipcp_versionable_function_p (node
))
454 fprintf (dump_file
, "Not considering %s for cloning; body is not versionable.\n",
455 cgraph_node_name (node
));
458 for (e
= node
->callers
; e
; e
= e
->next_caller
)
460 direct_call_sum
+= e
->count
;
462 if (cgraph_maybe_hot_edge_p (e
))
469 fprintf (dump_file
, "Not considering %s for cloning; no direct calls.\n",
470 cgraph_node_name (node
));
473 if (node
->local
.inline_summary
.self_size
< n_calls
)
476 fprintf (dump_file
, "Considering %s for cloning; code would shrink.\n",
477 cgraph_node_name (node
));
481 if (!flag_ipa_cp_clone
)
484 fprintf (dump_file
, "Not considering %s for cloning; -fipa-cp-clone disabled.\n",
485 cgraph_node_name (node
));
489 if (!optimize_function_for_speed_p (DECL_STRUCT_FUNCTION (node
->decl
)))
492 fprintf (dump_file
, "Not considering %s for cloning; optimizing it for size.\n",
493 cgraph_node_name (node
));
497 /* When profile is available and function is hot, propagate into it even if
498 calls seems cold; constant propagation can improve function's speed
502 if (direct_call_sum
> node
->count
* 90 / 100)
505 fprintf (dump_file
, "Considering %s for cloning; usually called directly.\n",
506 cgraph_node_name (node
));
513 fprintf (dump_file
, "Not considering %s for cloning; no hot calls.\n",
514 cgraph_node_name (node
));
518 fprintf (dump_file
, "Considering %s for cloning.\n",
519 cgraph_node_name (node
));
523 /* Initialize ipcp_lattices array. The lattices corresponding to supported
524 types (integers, real types and Fortran constants defined as const_decls)
525 are initialized to IPA_TOP, the rest of them to IPA_BOTTOM. */
527 ipcp_initialize_node_lattices (struct cgraph_node
*node
)
530 struct ipa_node_params
*info
= IPA_NODE_REF (node
);
531 enum ipa_lattice_type type
;
533 if (ipa_is_called_with_var_arguments (info
))
535 else if (cgraph_only_called_directly_p (node
))
537 /* When cloning is allowed, we can assume that externally visible functions
538 are not called. We will compensate this by cloning later. */
539 else if (ipcp_cloning_candidate_p (node
))
540 type
= IPA_TOP
, n_cloning_candidates
++;
544 for (i
= 0; i
< ipa_get_param_count (info
) ; i
++)
545 ipcp_get_lattice (info
, i
)->type
= type
;
548 /* build INTEGER_CST tree with type TREE_TYPE and value according to LAT.
551 build_const_val (struct ipcp_lattice
*lat
, tree tree_type
)
555 gcc_assert (ipcp_lat_is_const (lat
));
558 if (!useless_type_conversion_p (tree_type
, TREE_TYPE (val
)))
560 if (fold_convertible_p (tree_type
, val
))
561 return fold_build1 (NOP_EXPR
, tree_type
, val
);
563 return fold_build1 (VIEW_CONVERT_EXPR
, tree_type
, val
);
568 /* Compute the proper scale for NODE. It is the ratio between the number of
569 direct calls (represented on the incoming cgraph_edges) and sum of all
570 invocations of NODE (represented as count in cgraph_node).
572 FIXME: This code is wrong. Since the callers can be also clones and
573 the clones are not scaled yet, the sums gets unrealistically high.
574 To properly compute the counts, we would need to do propagation across
575 callgraph (as external call to A might imply call to non-clonned B
576 if A's clone calls clonned B). */
578 ipcp_compute_node_scale (struct cgraph_node
*node
)
581 struct cgraph_edge
*cs
;
584 /* Compute sum of all counts of callers. */
585 for (cs
= node
->callers
; cs
!= NULL
; cs
= cs
->next_caller
)
587 /* Work around the unrealistically high sum problem. We just don't want
588 the non-cloned body to have negative or very low frequency. Since
589 majority of execution time will be spent in clones anyway, this should
590 give good enough profile. */
591 if (sum
> node
->count
* 9 / 10)
592 sum
= node
->count
* 9 / 10;
593 if (node
->count
== 0)
594 ipcp_set_node_scale (node
, 0);
596 ipcp_set_node_scale (node
, sum
* REG_BR_PROB_BASE
/ node
->count
);
599 /* Initialization and computation of IPCP data structures. This is the initial
600 intraprocedural analysis of functions, which gathers information to be
601 propagated later on. */
604 ipcp_init_stage (void)
606 struct cgraph_node
*node
;
608 for (node
= cgraph_nodes
; node
; node
= node
->next
)
611 /* Unreachable nodes should have been eliminated before ipcp. */
612 gcc_assert (node
->needed
|| node
->reachable
);
614 node
->local
.versionable
= tree_versionable_function_p (node
->decl
);
615 ipa_analyze_node (node
);
619 /* Return true if there are some formal parameters whose value is IPA_TOP (in
620 the whole compilation unit). Change their values to IPA_BOTTOM, since they
621 most probably get their values from outside of this compilation unit. */
623 ipcp_change_tops_to_bottom (void)
626 struct cgraph_node
*node
;
630 for (node
= cgraph_nodes
; node
; node
= node
->next
)
632 struct ipa_node_params
*info
= IPA_NODE_REF (node
);
633 count
= ipa_get_param_count (info
);
634 for (i
= 0; i
< count
; i
++)
636 struct ipcp_lattice
*lat
= ipcp_get_lattice (info
, i
);
637 if (lat
->type
== IPA_TOP
)
642 fprintf (dump_file
, "Forcing param ");
643 print_generic_expr (dump_file
, ipa_get_param (info
, i
), 0);
644 fprintf (dump_file
, " of node %s to bottom.\n",
645 cgraph_node_name (node
));
647 lat
->type
= IPA_BOTTOM
;
654 /* Interprocedural analysis. The algorithm propagates constants from the
655 caller's parameters to the callee's arguments. */
657 ipcp_propagate_stage (void)
660 struct ipcp_lattice inc_lat
= { IPA_BOTTOM
, NULL
};
661 struct ipcp_lattice new_lat
= { IPA_BOTTOM
, NULL
};
662 struct ipcp_lattice
*dest_lat
;
663 struct cgraph_edge
*cs
;
664 struct ipa_jump_func
*jump_func
;
665 struct ipa_func_list
*wl
;
668 ipa_check_create_node_params ();
669 ipa_check_create_edge_args ();
671 /* Initialize worklist to contain all functions. */
672 wl
= ipa_init_func_list ();
675 struct cgraph_node
*node
= ipa_pop_func_from_list (&wl
);
676 struct ipa_node_params
*info
= IPA_NODE_REF (node
);
678 for (cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
680 struct ipa_node_params
*callee_info
= IPA_NODE_REF (cs
->callee
);
681 struct ipa_edge_args
*args
= IPA_EDGE_REF (cs
);
683 if (ipa_is_called_with_var_arguments (callee_info
)
684 || !cs
->callee
->analyzed
685 || ipa_is_called_with_var_arguments (callee_info
))
688 count
= ipa_get_cs_argument_count (args
);
689 for (i
= 0; i
< count
; i
++)
691 jump_func
= ipa_get_ith_jump_func (args
, i
);
692 ipcp_lattice_from_jfunc (info
, &inc_lat
, jump_func
);
693 dest_lat
= ipcp_get_lattice (callee_info
, i
);
694 ipa_lattice_meet (&new_lat
, &inc_lat
, dest_lat
);
695 if (ipcp_lattice_changed (&new_lat
, dest_lat
))
697 dest_lat
->type
= new_lat
.type
;
698 dest_lat
->constant
= new_lat
.constant
;
699 ipa_push_func_to_list (&wl
, cs
->callee
);
706 /* Call the constant propagation algorithm and re-call it if necessary
707 (if there are undetermined values left). */
709 ipcp_iterate_stage (void)
711 struct cgraph_node
*node
;
712 n_cloning_candidates
= 0;
715 fprintf (dump_file
, "\nIPA iterate stage:\n\n");
718 ipa_update_after_lto_read ();
720 for (node
= cgraph_nodes
; node
; node
= node
->next
)
722 ipcp_initialize_node_lattices (node
);
723 ipcp_compute_node_scale (node
);
725 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
727 ipcp_print_all_lattices (dump_file
);
728 ipcp_function_scale_print (dump_file
);
731 ipcp_propagate_stage ();
732 if (ipcp_change_tops_to_bottom ())
733 /* Some lattices have changed from IPA_TOP to IPA_BOTTOM.
734 This change should be propagated. */
736 gcc_assert (n_cloning_candidates
);
737 ipcp_propagate_stage ();
741 fprintf (dump_file
, "\nIPA lattices after propagation:\n");
742 ipcp_print_all_lattices (dump_file
);
743 if (dump_flags
& TDF_DETAILS
)
744 ipcp_print_profile_data (dump_file
);
748 /* Check conditions to forbid constant insertion to function described by
751 ipcp_node_modifiable_p (struct cgraph_node
*node
)
753 /* Once we will be able to do in-place replacement, we can be more
755 return ipcp_versionable_function_p (node
);
758 /* Print count scale data structures. */
760 ipcp_function_scale_print (FILE * f
)
762 struct cgraph_node
*node
;
764 for (node
= cgraph_nodes
; node
; node
= node
->next
)
768 fprintf (f
, "printing scale for %s: ", cgraph_node_name (node
));
769 fprintf (f
, "value is " HOST_WIDE_INT_PRINT_DEC
770 " \n", (HOST_WIDE_INT
) ipcp_get_node_scale (node
));
774 /* Print counts of all cgraph nodes. */
776 ipcp_print_func_profile_counts (FILE * f
)
778 struct cgraph_node
*node
;
780 for (node
= cgraph_nodes
; node
; node
= node
->next
)
782 fprintf (f
, "function %s: ", cgraph_node_name (node
));
783 fprintf (f
, "count is " HOST_WIDE_INT_PRINT_DEC
784 " \n", (HOST_WIDE_INT
) node
->count
);
788 /* Print counts of all cgraph edges. */
790 ipcp_print_call_profile_counts (FILE * f
)
792 struct cgraph_node
*node
;
793 struct cgraph_edge
*cs
;
795 for (node
= cgraph_nodes
; node
; node
= node
->next
)
797 for (cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
799 fprintf (f
, "%s -> %s ", cgraph_node_name (cs
->caller
),
800 cgraph_node_name (cs
->callee
));
801 fprintf (f
, "count is " HOST_WIDE_INT_PRINT_DEC
" \n",
802 (HOST_WIDE_INT
) cs
->count
);
807 /* Print profile info for all functions. */
809 ipcp_print_profile_data (FILE * f
)
811 fprintf (f
, "\nNODE COUNTS :\n");
812 ipcp_print_func_profile_counts (f
);
813 fprintf (f
, "\nCS COUNTS stage:\n");
814 ipcp_print_call_profile_counts (f
);
817 /* Build and initialize ipa_replace_map struct according to LAT. This struct is
818 processed by versioning, which operates according to the flags set.
819 PARM_TREE is the formal parameter found to be constant. LAT represents the
821 static struct ipa_replace_map
*
822 ipcp_create_replace_map (tree parm_tree
, struct ipcp_lattice
*lat
)
824 struct ipa_replace_map
*replace_map
;
827 replace_map
= ggc_alloc_ipa_replace_map ();
828 const_val
= build_const_val (lat
, TREE_TYPE (parm_tree
));
831 fprintf (dump_file
, " replacing param ");
832 print_generic_expr (dump_file
, parm_tree
, 0);
833 fprintf (dump_file
, " with const ");
834 print_generic_expr (dump_file
, const_val
, 0);
835 fprintf (dump_file
, "\n");
837 replace_map
->old_tree
= parm_tree
;
838 replace_map
->new_tree
= const_val
;
839 replace_map
->replace_p
= true;
840 replace_map
->ref_p
= false;
845 /* Return true if this callsite should be redirected to the original callee
846 (instead of the cloned one). */
848 ipcp_need_redirect_p (struct cgraph_edge
*cs
)
850 struct ipa_node_params
*orig_callee_info
;
852 struct ipa_jump_func
*jump_func
;
853 struct cgraph_node
*node
= cs
->callee
, *orig
;
855 if (!n_cloning_candidates
)
858 if ((orig
= ipcp_get_orig_node (node
)) != NULL
)
860 if (ipcp_get_orig_node (cs
->caller
))
863 orig_callee_info
= IPA_NODE_REF (node
);
864 count
= ipa_get_param_count (orig_callee_info
);
865 for (i
= 0; i
< count
; i
++)
867 struct ipcp_lattice
*lat
= ipcp_get_lattice (orig_callee_info
, i
);
868 if (ipcp_lat_is_const (lat
))
870 jump_func
= ipa_get_ith_jump_func (IPA_EDGE_REF (cs
), i
);
871 if (jump_func
->type
!= IPA_JF_CONST
)
879 /* Fix the callsites and the call graph after function cloning was done. */
881 ipcp_update_callgraph (void)
883 struct cgraph_node
*node
;
885 for (node
= cgraph_nodes
; node
; node
= node
->next
)
886 if (node
->analyzed
&& ipcp_node_is_clone (node
))
888 bitmap args_to_skip
= BITMAP_ALLOC (NULL
);
889 struct cgraph_node
*orig_node
= ipcp_get_orig_node (node
);
890 struct ipa_node_params
*info
= IPA_NODE_REF (orig_node
);
891 int i
, count
= ipa_get_param_count (info
);
892 struct cgraph_edge
*cs
, *next
;
894 for (i
= 0; i
< count
; i
++)
896 struct ipcp_lattice
*lat
= ipcp_get_lattice (info
, i
);
898 /* We can proactively remove obviously unused arguments. */
899 if (!ipa_is_param_used (info
, i
))
901 bitmap_set_bit (args_to_skip
, i
);
905 if (lat
->type
== IPA_CONST_VALUE
)
906 bitmap_set_bit (args_to_skip
, i
);
908 for (cs
= node
->callers
; cs
; cs
= next
)
910 next
= cs
->next_caller
;
911 if (!ipcp_node_is_clone (cs
->caller
) && ipcp_need_redirect_p (cs
))
912 cgraph_redirect_edge_callee (cs
, orig_node
);
917 /* Update profiling info for versioned functions and the functions they were
920 ipcp_update_profiling (void)
922 struct cgraph_node
*node
, *orig_node
;
923 gcov_type scale
, scale_complement
;
924 struct cgraph_edge
*cs
;
926 for (node
= cgraph_nodes
; node
; node
= node
->next
)
928 if (ipcp_node_is_clone (node
))
930 orig_node
= ipcp_get_orig_node (node
);
931 scale
= ipcp_get_node_scale (orig_node
);
932 node
->count
= orig_node
->count
* scale
/ REG_BR_PROB_BASE
;
933 scale_complement
= REG_BR_PROB_BASE
- scale
;
935 orig_node
->count
* scale_complement
/ REG_BR_PROB_BASE
;
936 for (cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
937 cs
->count
= cs
->count
* scale
/ REG_BR_PROB_BASE
;
938 for (cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
939 cs
->count
= cs
->count
* scale_complement
/ REG_BR_PROB_BASE
;
944 /* If NODE was cloned, how much would program grow? */
946 ipcp_estimate_growth (struct cgraph_node
*node
)
948 struct cgraph_edge
*cs
;
949 int redirectable_node_callers
= 0;
950 int removable_args
= 0;
951 bool need_original
= !cgraph_only_called_directly_p (node
);
952 struct ipa_node_params
*info
;
956 for (cs
= node
->callers
; cs
!= NULL
; cs
= cs
->next_caller
)
957 if (cs
->caller
== node
|| !ipcp_need_redirect_p (cs
))
958 redirectable_node_callers
++;
960 need_original
= true;
962 /* If we will be able to fully replace orignal node, we never increase
967 info
= IPA_NODE_REF (node
);
968 count
= ipa_get_param_count (info
);
969 for (i
= 0; i
< count
; i
++)
971 struct ipcp_lattice
*lat
= ipcp_get_lattice (info
, i
);
973 /* We can proactively remove obviously unused arguments. */
974 if (!ipa_is_param_used (info
, i
))
977 if (lat
->type
== IPA_CONST_VALUE
)
981 /* We make just very simple estimate of savings for removal of operand from
982 call site. Precise cost is dificult to get, as our size metric counts
983 constants and moves as free. Generally we are looking for cases that
984 small function is called very many times. */
985 growth
= node
->local
.inline_summary
.self_size
986 - removable_args
* redirectable_node_callers
;
993 /* Estimate cost of cloning NODE. */
995 ipcp_estimate_cloning_cost (struct cgraph_node
*node
)
998 gcov_type count_sum
= 1;
999 struct cgraph_edge
*e
;
1002 cost
= ipcp_estimate_growth (node
) * 1000;
1006 fprintf (dump_file
, "Versioning of %s will save code size\n",
1007 cgraph_node_name (node
));
1011 for (e
= node
->callers
; e
; e
= e
->next_caller
)
1012 if (!bitmap_bit_p (dead_nodes
, e
->caller
->uid
)
1013 && !ipcp_need_redirect_p (e
))
1015 count_sum
+= e
->count
;
1016 freq_sum
+= e
->frequency
+ 1;
1020 cost
/= count_sum
* 1000 / max_count
+ 1;
1022 cost
/= freq_sum
* 1000 / REG_BR_PROB_BASE
+ 1;
1024 fprintf (dump_file
, "Cost of versioning %s is %i, (size: %i, freq: %i)\n",
1025 cgraph_node_name (node
), cost
, node
->local
.inline_summary
.self_size
,
1030 /* Return number of live constant parameters. */
1032 ipcp_const_param_count (struct cgraph_node
*node
)
1034 int const_param
= 0;
1035 struct ipa_node_params
*info
= IPA_NODE_REF (node
);
1036 int count
= ipa_get_param_count (info
);
1039 for (i
= 0; i
< count
; i
++)
1041 struct ipcp_lattice
*lat
= ipcp_get_lattice (info
, i
);
1042 if (ipcp_lat_is_insertable (lat
)
1043 /* Do not count obviously unused arguments. */
1044 && ipa_is_param_used (info
, i
))
1050 /* Propagate the constant parameters found by ipcp_iterate_stage()
1051 to the function's code. */
1053 ipcp_insert_stage (void)
1055 struct cgraph_node
*node
, *node1
= NULL
;
1057 VEC (cgraph_edge_p
, heap
) * redirect_callers
;
1058 VEC (ipa_replace_map_p
,gc
)* replace_trees
;
1059 int node_callers
, count
;
1061 struct ipa_replace_map
*replace_param
;
1063 long overall_size
= 0, new_size
= 0;
1066 ipa_check_create_node_params ();
1067 ipa_check_create_edge_args ();
1069 fprintf (dump_file
, "\nIPA insert stage:\n\n");
1071 dead_nodes
= BITMAP_ALLOC (NULL
);
1073 for (node
= cgraph_nodes
; node
; node
= node
->next
)
1076 if (node
->count
> max_count
)
1077 max_count
= node
->count
;
1078 overall_size
+= node
->local
.inline_summary
.self_size
;
1081 max_new_size
= overall_size
;
1082 if (max_new_size
< PARAM_VALUE (PARAM_LARGE_UNIT_INSNS
))
1083 max_new_size
= PARAM_VALUE (PARAM_LARGE_UNIT_INSNS
);
1084 max_new_size
= max_new_size
* PARAM_VALUE (PARAM_IPCP_UNIT_GROWTH
) / 100 + 1;
1086 /* First collect all functions we proved to have constant arguments to heap. */
1087 heap
= fibheap_new ();
1088 for (node
= cgraph_nodes
; node
; node
= node
->next
)
1090 struct ipa_node_params
*info
;
1091 /* Propagation of the constant is forbidden in certain conditions. */
1092 if (!node
->analyzed
|| !ipcp_node_modifiable_p (node
))
1094 info
= IPA_NODE_REF (node
);
1095 if (ipa_is_called_with_var_arguments (info
))
1097 if (ipcp_const_param_count (node
))
1098 node
->aux
= fibheap_insert (heap
, ipcp_estimate_cloning_cost (node
), node
);
1101 /* Now clone in priority order until code size growth limits are met or
1103 while (!fibheap_empty (heap
))
1105 struct ipa_node_params
*info
;
1107 bitmap args_to_skip
;
1108 struct cgraph_edge
*cs
;
1110 node
= (struct cgraph_node
*)fibheap_extract_min (heap
);
1113 fprintf (dump_file
, "considering function %s\n",
1114 cgraph_node_name (node
));
1116 growth
= ipcp_estimate_growth (node
);
1118 if (new_size
+ growth
> max_new_size
)
1121 && optimize_function_for_size_p (DECL_STRUCT_FUNCTION (node
->decl
)))
1124 fprintf (dump_file
, "Not versioning, cold code would grow");
1130 /* Look if original function becomes dead after clonning. */
1131 for (cs
= node
->callers
; cs
!= NULL
; cs
= cs
->next_caller
)
1132 if (cs
->caller
== node
|| ipcp_need_redirect_p (cs
))
1134 if (!cs
&& cgraph_only_called_directly_p (node
))
1135 bitmap_set_bit (dead_nodes
, node
->uid
);
1137 info
= IPA_NODE_REF (node
);
1138 count
= ipa_get_param_count (info
);
1140 replace_trees
= VEC_alloc (ipa_replace_map_p
, gc
, 1);
1141 args_to_skip
= BITMAP_GGC_ALLOC ();
1142 for (i
= 0; i
< count
; i
++)
1144 struct ipcp_lattice
*lat
= ipcp_get_lattice (info
, i
);
1145 parm_tree
= ipa_get_param (info
, i
);
1147 /* We can proactively remove obviously unused arguments. */
1148 if (!ipa_is_param_used (info
, i
))
1150 bitmap_set_bit (args_to_skip
, i
);
1154 if (lat
->type
== IPA_CONST_VALUE
)
1157 ipcp_create_replace_map (parm_tree
, lat
);
1158 VEC_safe_push (ipa_replace_map_p
, gc
, replace_trees
, replace_param
);
1159 bitmap_set_bit (args_to_skip
, i
);
1163 /* Compute how many callers node has. */
1165 for (cs
= node
->callers
; cs
!= NULL
; cs
= cs
->next_caller
)
1167 redirect_callers
= VEC_alloc (cgraph_edge_p
, heap
, node_callers
);
1168 for (cs
= node
->callers
; cs
!= NULL
; cs
= cs
->next_caller
)
1169 VEC_quick_push (cgraph_edge_p
, redirect_callers
, cs
);
1171 /* Redirecting all the callers of the node to the
1172 new versioned node. */
1174 cgraph_create_virtual_clone (node
, redirect_callers
, replace_trees
,
1175 args_to_skip
, "constprop");
1176 args_to_skip
= NULL
;
1177 VEC_free (cgraph_edge_p
, heap
, redirect_callers
);
1178 replace_trees
= NULL
;
1183 fprintf (dump_file
, "versioned function %s with growth %i, overall %i\n",
1184 cgraph_node_name (node
), (int)growth
, (int)new_size
);
1185 ipcp_init_cloned_node (node
, node1
);
1187 /* TODO: We can use indirect inlning info to produce new calls. */
1190 dump_function_to_file (node1
->decl
, dump_file
, dump_flags
);
1192 for (cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
1193 if (cs
->callee
->aux
)
1195 fibheap_delete_node (heap
, (fibnode_t
) cs
->callee
->aux
);
1196 cs
->callee
->aux
= fibheap_insert (heap
,
1197 ipcp_estimate_cloning_cost (cs
->callee
),
1202 while (!fibheap_empty (heap
))
1205 fprintf (dump_file
, "skipping function %s\n",
1206 cgraph_node_name (node
));
1207 node
= (struct cgraph_node
*) fibheap_extract_min (heap
);
1210 fibheap_delete (heap
);
1211 BITMAP_FREE (dead_nodes
);
1212 ipcp_update_callgraph ();
1213 ipcp_update_profiling ();
1216 /* The IPCP driver. */
1220 cgraph_remove_unreachable_nodes (true,dump_file
);
1223 fprintf (dump_file
, "\nIPA structures before propagation:\n");
1224 if (dump_flags
& TDF_DETAILS
)
1225 ipa_print_all_params (dump_file
);
1226 ipa_print_all_jump_functions (dump_file
);
1228 /* 2. Do the interprocedural propagation. */
1229 ipcp_iterate_stage ();
1230 /* 3. Insert the constants found to the functions. */
1231 ipcp_insert_stage ();
1232 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1234 fprintf (dump_file
, "\nProfiling info after insert stage:\n");
1235 ipcp_print_profile_data (dump_file
);
1237 /* Free all IPCP structures. */
1238 ipa_free_all_structures_after_ipa_cp ();
1240 fprintf (dump_file
, "\nIPA constant propagation end\n");
1244 /* Note function body size. */
1246 ipcp_generate_summary (void)
1249 fprintf (dump_file
, "\nIPA constant propagation start:\n");
1250 ipa_check_create_node_params ();
1251 ipa_check_create_edge_args ();
1252 ipa_register_cgraph_hooks ();
1253 /* 1. Call the init stage to initialize
1254 the ipa_node_params and ipa_edge_args structures. */
1258 /* Write ipcp summary for nodes in SET. */
1260 ipcp_write_summary (cgraph_node_set set
,
1261 varpool_node_set vset ATTRIBUTE_UNUSED
)
1263 ipa_prop_write_jump_functions (set
);
1266 /* Read ipcp summary. */
1268 ipcp_read_summary (void)
1270 ipa_prop_read_jump_functions ();
1273 /* Gate for IPCP optimization. */
1275 cgraph_gate_cp (void)
1280 struct ipa_opt_pass_d pass_ipa_cp
=
1285 cgraph_gate_cp
, /* gate */
1286 ipcp_driver
, /* execute */
1289 0, /* static_pass_number */
1290 TV_IPA_CONSTANT_PROP
, /* tv_id */
1291 0, /* properties_required */
1292 0, /* properties_provided */
1293 0, /* properties_destroyed */
1294 0, /* todo_flags_start */
1295 TODO_dump_cgraph
| TODO_dump_func
|
1296 TODO_remove_functions
| TODO_ggc_collect
/* todo_flags_finish */
1298 ipcp_generate_summary
, /* generate_summary */
1299 ipcp_write_summary
, /* write_summary */
1300 ipcp_read_summary
, /* read_summary */
1301 NULL
, /* write_optimization_summary */
1302 NULL
, /* read_optimization_summary */
1303 NULL
, /* stmt_fixup */
1305 NULL
, /* function_transform */
1306 NULL
, /* variable_transform */