ipa-prop.c (ipa_print_node_jump_functions): Print jump functions also for indirect...
[gcc.git] / gcc / ipa-cp.c
1 /* Interprocedural constant propagation
2 Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* Interprocedural constant propagation. The aim of interprocedural constant
23 propagation (IPCP) is to find which function's argument has the same
24 constant value in each invocation throughout the whole program. For example,
25 consider the following program:
26
27 int g (int y)
28 {
29 printf ("value is %d",y);
30 }
31
32 int f (int x)
33 {
34 g (x);
35 }
36
37 int h (int y)
38 {
39 g (y);
40 }
41
42 void main (void)
43 {
44 f (3);
45 h (3);
46 }
47
48
49 The IPCP algorithm will find that g's formal argument y is always called
50 with the value 3.
51
52 The algorithm used is based on "Interprocedural Constant Propagation", by
53 Challahan David, Keith D Cooper, Ken Kennedy, Linda Torczon, Comp86, pg
54 152-161
55
56 The optimization is divided into three stages:
57
58 First stage - intraprocedural analysis
59 =======================================
60 This phase computes jump_function and modification flags.
61
62 A jump function for a callsite represents the values passed as an actual
63 arguments of a given callsite. There are three types of values:
64 Pass through - the caller's formal parameter is passed as an actual argument.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
67
68 The jump function info, ipa_jump_func, is stored in ipa_edge_args
69 structure (defined in ipa_prop.h and pointed to by cgraph_node->aux)
70 modified_flags are defined in ipa_node_params structure
71 (defined in ipa_prop.h and pointed to by cgraph_edge->aux).
72
73 -ipcp_init_stage() is the first stage driver.
74
75 Second stage - interprocedural analysis
76 ========================================
77 This phase does the interprocedural constant propagation.
78 It computes lattices for all formal parameters in the program
79 and their value that may be:
80 TOP - unknown.
81 BOTTOM - non constant.
82 CONSTANT - constant value.
83
84 Lattice describing a formal parameter p will have a constant value if all
85 callsites invoking this function have the same constant value passed to p.
86
87 The lattices are stored in ipcp_lattice which is itself in ipa_node_params
88 structure (defined in ipa_prop.h and pointed to by cgraph_edge->aux).
89
90 -ipcp_iterate_stage() is the second stage driver.
91
92 Third phase - transformation of function code
93 ============================================
94 Propagates the constant-valued formals into the function.
95 For each function whose parameters are constants, we create its clone.
96
97 Then we process the clone in two ways:
98 1. We insert an assignment statement 'parameter = const' at the beginning
99 of the cloned function.
100 2. For read-only parameters that do not live in memory, we replace all their
101 uses with the constant.
102
103 We also need to modify some callsites to call the cloned functions instead
104 of the original ones. For a callsite passing an argument found to be a
105 constant by IPCP, there are two different cases to handle:
106 1. A constant is passed as an argument. In this case the callsite in the
107 should be redirected to call the cloned callee.
108 2. A parameter (of the caller) passed as an argument (pass through
109 argument). In such cases both the caller and the callee have clones and
110 only the callsite in the cloned caller is redirected to call to the
111 cloned callee.
112
113 This update is done in two steps: First all cloned functions are created
114 during a traversal of the call graph, during which all callsites are
115 redirected to call the cloned function. Then the callsites are traversed
116 and many calls redirected back to fit the description above.
117
118 -ipcp_insert_stage() is the third phase driver.
119
120 */
121
122 #include "config.h"
123 #include "system.h"
124 #include "coretypes.h"
125 #include "tree.h"
126 #include "target.h"
127 #include "cgraph.h"
128 #include "ipa-prop.h"
129 #include "tree-flow.h"
130 #include "tree-pass.h"
131 #include "flags.h"
132 #include "timevar.h"
133 #include "diagnostic.h"
134 #include "tree-dump.h"
135 #include "tree-inline.h"
136 #include "fibheap.h"
137 #include "params.h"
138
139 /* Number of functions identified as candidates for cloning. When not cloning
140 we can simplify iterate stage not forcing it to go through the decision
141 on what is profitable and what not. */
142 static int n_cloning_candidates;
143
144 /* Maximal count found in program. */
145 static gcov_type max_count;
146
147 /* Cgraph nodes that has been completely replaced by cloning during iterate
148 * stage and will be removed after ipcp is finished. */
149 static bitmap dead_nodes;
150
151 static void ipcp_print_profile_data (FILE *);
152 static void ipcp_function_scale_print (FILE *);
153
154 /* Get the original node field of ipa_node_params associated with node NODE. */
155 static inline struct cgraph_node *
156 ipcp_get_orig_node (struct cgraph_node *node)
157 {
158 return IPA_NODE_REF (node)->ipcp_orig_node;
159 }
160
161 /* Return true if NODE describes a cloned/versioned function. */
162 static inline bool
163 ipcp_node_is_clone (struct cgraph_node *node)
164 {
165 return (ipcp_get_orig_node (node) != NULL);
166 }
167
168 /* Create ipa_node_params and its data structures for NEW_NODE. Set ORIG_NODE
169 as the ipcp_orig_node field in ipa_node_params. */
170 static void
171 ipcp_init_cloned_node (struct cgraph_node *orig_node,
172 struct cgraph_node *new_node)
173 {
174 ipa_check_create_node_params ();
175 ipa_initialize_node_params (new_node);
176 IPA_NODE_REF (new_node)->ipcp_orig_node = orig_node;
177 }
178
179 /* Perform intraprocedrual analysis needed for ipcp. */
180 static void
181 ipcp_analyze_node (struct cgraph_node *node)
182 {
183 /* Unreachable nodes should have been eliminated before ipcp. */
184 gcc_assert (node->needed || node->reachable);
185
186 node->local.versionable = tree_versionable_function_p (node->decl);
187 ipa_initialize_node_params (node);
188 ipa_detect_param_modifications (node);
189 }
190
191 /* Return scale for NODE. */
192 static inline gcov_type
193 ipcp_get_node_scale (struct cgraph_node *node)
194 {
195 return IPA_NODE_REF (node)->count_scale;
196 }
197
198 /* Set COUNT as scale for NODE. */
199 static inline void
200 ipcp_set_node_scale (struct cgraph_node *node, gcov_type count)
201 {
202 IPA_NODE_REF (node)->count_scale = count;
203 }
204
205 /* Return whether LAT is a constant lattice. */
206 static inline bool
207 ipcp_lat_is_const (struct ipcp_lattice *lat)
208 {
209 if (lat->type == IPA_CONST_VALUE)
210 return true;
211 else
212 return false;
213 }
214
215 /* Return whether LAT is a constant lattice that ipa-cp can actually insert
216 into the code (i.e. constants excluding member pointers and pointers). */
217 static inline bool
218 ipcp_lat_is_insertable (struct ipcp_lattice *lat)
219 {
220 return lat->type == IPA_CONST_VALUE;
221 }
222
223 /* Return true if LAT1 and LAT2 are equal. */
224 static inline bool
225 ipcp_lats_are_equal (struct ipcp_lattice *lat1, struct ipcp_lattice *lat2)
226 {
227 gcc_assert (ipcp_lat_is_const (lat1) && ipcp_lat_is_const (lat2));
228 if (lat1->type != lat2->type)
229 return false;
230
231 if (TREE_CODE (lat1->constant) == ADDR_EXPR
232 && TREE_CODE (lat2->constant) == ADDR_EXPR
233 && TREE_CODE (TREE_OPERAND (lat1->constant, 0)) == CONST_DECL
234 && TREE_CODE (TREE_OPERAND (lat2->constant, 0)) == CONST_DECL)
235 return operand_equal_p (DECL_INITIAL (TREE_OPERAND (lat1->constant, 0)),
236 DECL_INITIAL (TREE_OPERAND (lat2->constant, 0)), 0);
237 else
238 return operand_equal_p (lat1->constant, lat2->constant, 0);
239 }
240
241 /* Compute Meet arithmetics:
242 Meet (IPA_BOTTOM, x) = IPA_BOTTOM
243 Meet (IPA_TOP,x) = x
244 Meet (const_a,const_b) = IPA_BOTTOM, if const_a != const_b.
245 MEET (const_a,const_b) = const_a, if const_a == const_b.*/
246 static void
247 ipa_lattice_meet (struct ipcp_lattice *res, struct ipcp_lattice *lat1,
248 struct ipcp_lattice *lat2)
249 {
250 if (lat1->type == IPA_BOTTOM || lat2->type == IPA_BOTTOM)
251 {
252 res->type = IPA_BOTTOM;
253 return;
254 }
255 if (lat1->type == IPA_TOP)
256 {
257 res->type = lat2->type;
258 res->constant = lat2->constant;
259 return;
260 }
261 if (lat2->type == IPA_TOP)
262 {
263 res->type = lat1->type;
264 res->constant = lat1->constant;
265 return;
266 }
267 if (!ipcp_lats_are_equal (lat1, lat2))
268 {
269 res->type = IPA_BOTTOM;
270 return;
271 }
272 res->type = lat1->type;
273 res->constant = lat1->constant;
274 }
275
276 /* Return the lattice corresponding to the Ith formal parameter of the function
277 described by INFO. */
278 static inline struct ipcp_lattice *
279 ipcp_get_lattice (struct ipa_node_params *info, int i)
280 {
281 return &(info->params[i].ipcp_lattice);
282 }
283
284 /* Given the jump function JFUNC, compute the lattice LAT that describes the
285 value coming down the callsite. INFO describes the caller node so that
286 pass-through jump functions can be evaluated. */
287 static void
288 ipcp_lattice_from_jfunc (struct ipa_node_params *info, struct ipcp_lattice *lat,
289 struct ipa_jump_func *jfunc)
290 {
291 if (jfunc->type == IPA_JF_CONST)
292 {
293 lat->type = IPA_CONST_VALUE;
294 lat->constant = jfunc->value.constant;
295 }
296 else if (jfunc->type == IPA_JF_PASS_THROUGH)
297 {
298 struct ipcp_lattice *caller_lat;
299 tree cst;
300
301 caller_lat = ipcp_get_lattice (info, jfunc->value.pass_through.formal_id);
302 lat->type = caller_lat->type;
303 if (caller_lat->type != IPA_CONST_VALUE)
304 return;
305 cst = caller_lat->constant;
306
307 if (jfunc->value.pass_through.operation != NOP_EXPR)
308 {
309 tree restype;
310 if (TREE_CODE_CLASS (jfunc->value.pass_through.operation)
311 == tcc_comparison)
312 restype = boolean_type_node;
313 else
314 restype = TREE_TYPE (cst);
315 cst = fold_binary (jfunc->value.pass_through.operation,
316 restype, cst, jfunc->value.pass_through.operand);
317 }
318 if (!cst || !is_gimple_ip_invariant (cst))
319 lat->type = IPA_BOTTOM;
320 lat->constant = cst;
321 }
322 else if (jfunc->type == IPA_JF_ANCESTOR)
323 {
324 struct ipcp_lattice *caller_lat;
325 tree t;
326 bool ok;
327
328 caller_lat = ipcp_get_lattice (info, jfunc->value.ancestor.formal_id);
329 lat->type = caller_lat->type;
330 if (caller_lat->type != IPA_CONST_VALUE)
331 return;
332 if (TREE_CODE (caller_lat->constant) != ADDR_EXPR)
333 {
334 /* This can happen when the constant is a NULL pointer. */
335 lat->type = IPA_BOTTOM;
336 return;
337 }
338 t = TREE_OPERAND (caller_lat->constant, 0);
339 ok = build_ref_for_offset (&t, TREE_TYPE (t),
340 jfunc->value.ancestor.offset,
341 jfunc->value.ancestor.type, false);
342 if (!ok)
343 {
344 lat->type = IPA_BOTTOM;
345 lat->constant = NULL_TREE;
346 }
347 else
348 lat->constant = build_fold_addr_expr (t);
349 }
350 else
351 lat->type = IPA_BOTTOM;
352 }
353
354 /* True when OLD_LAT and NEW_LAT values are not the same. */
355
356 static bool
357 ipcp_lattice_changed (struct ipcp_lattice *old_lat,
358 struct ipcp_lattice *new_lat)
359 {
360 if (old_lat->type == new_lat->type)
361 {
362 if (!ipcp_lat_is_const (old_lat))
363 return false;
364 if (ipcp_lats_are_equal (old_lat, new_lat))
365 return false;
366 }
367 return true;
368 }
369
370 /* Print all ipcp_lattices of all functions to F. */
371 static void
372 ipcp_print_all_lattices (FILE * f)
373 {
374 struct cgraph_node *node;
375 int i, count;
376
377 fprintf (f, "\nLattice:\n");
378 for (node = cgraph_nodes; node; node = node->next)
379 {
380 struct ipa_node_params *info;
381
382 if (!node->analyzed)
383 continue;
384 info = IPA_NODE_REF (node);
385 fprintf (f, " Node: %s:\n", cgraph_node_name (node));
386 count = ipa_get_param_count (info);
387 for (i = 0; i < count; i++)
388 {
389 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
390
391 fprintf (f, " param [%d]: ", i);
392 if (lat->type == IPA_CONST_VALUE)
393 {
394 tree cst = lat->constant;
395 fprintf (f, "type is CONST ");
396 print_generic_expr (f, cst, 0);
397 if (TREE_CODE (cst) == ADDR_EXPR
398 && TREE_CODE (TREE_OPERAND (cst, 0)) == CONST_DECL)
399 {
400 fprintf (f, " -> ");
401 print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (cst, 0)),
402 0);
403 }
404 fprintf (f, "\n");
405 }
406 else if (lat->type == IPA_TOP)
407 fprintf (f, "type is TOP\n");
408 else
409 fprintf (f, "type is BOTTOM\n");
410 }
411 }
412 }
413
414 /* Return true if ipcp algorithms would allow cloning NODE. */
415
416 static bool
417 ipcp_versionable_function_p (struct cgraph_node *node)
418 {
419 struct cgraph_edge *edge;
420
421 /* There are a number of generic reasons functions cannot be versioned. */
422 if (!node->local.versionable)
423 return false;
424
425 /* Removing arguments doesn't work if the function takes varargs
426 or use __builtin_apply_args. */
427 for (edge = node->callees; edge; edge = edge->next_callee)
428 {
429 tree t = edge->callee->decl;
430 if (DECL_BUILT_IN_CLASS (t) == BUILT_IN_NORMAL
431 && (DECL_FUNCTION_CODE (t) == BUILT_IN_APPLY_ARGS
432 || DECL_FUNCTION_CODE (t) == BUILT_IN_VA_START))
433 return false;
434 }
435
436 return true;
437 }
438
439 /* Return true if this NODE is viable candidate for cloning. */
440 static bool
441 ipcp_cloning_candidate_p (struct cgraph_node *node)
442 {
443 int n_calls = 0;
444 int n_hot_calls = 0;
445 gcov_type direct_call_sum = 0;
446 struct cgraph_edge *e;
447
448 /* We never clone functions that are not visible from outside.
449 FIXME: in future we should clone such functions when they are called with
450 different constants, but current ipcp implementation is not good on this.
451 */
452 if (cgraph_only_called_directly_p (node) || !node->analyzed)
453 return false;
454
455 if (cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE)
456 {
457 if (dump_file)
458 fprintf (dump_file, "Not considering %s for cloning; body is overwrittable.\n",
459 cgraph_node_name (node));
460 return false;
461 }
462 if (!ipcp_versionable_function_p (node))
463 {
464 if (dump_file)
465 fprintf (dump_file, "Not considering %s for cloning; body is not versionable.\n",
466 cgraph_node_name (node));
467 return false;
468 }
469 for (e = node->callers; e; e = e->next_caller)
470 {
471 direct_call_sum += e->count;
472 n_calls ++;
473 if (cgraph_maybe_hot_edge_p (e))
474 n_hot_calls ++;
475 }
476
477 if (!n_calls)
478 {
479 if (dump_file)
480 fprintf (dump_file, "Not considering %s for cloning; no direct calls.\n",
481 cgraph_node_name (node));
482 return false;
483 }
484 if (node->local.inline_summary.self_size < n_calls)
485 {
486 if (dump_file)
487 fprintf (dump_file, "Considering %s for cloning; code would shrink.\n",
488 cgraph_node_name (node));
489 return true;
490 }
491
492 if (!flag_ipa_cp_clone)
493 {
494 if (dump_file)
495 fprintf (dump_file, "Not considering %s for cloning; -fipa-cp-clone disabled.\n",
496 cgraph_node_name (node));
497 return false;
498 }
499
500 if (!optimize_function_for_speed_p (DECL_STRUCT_FUNCTION (node->decl)))
501 {
502 if (dump_file)
503 fprintf (dump_file, "Not considering %s for cloning; optimizing it for size.\n",
504 cgraph_node_name (node));
505 return false;
506 }
507
508 /* When profile is available and function is hot, propagate into it even if
509 calls seems cold; constant propagation can improve function's speed
510 significandly. */
511 if (max_count)
512 {
513 if (direct_call_sum > node->count * 90 / 100)
514 {
515 if (dump_file)
516 fprintf (dump_file, "Considering %s for cloning; usually called directly.\n",
517 cgraph_node_name (node));
518 return true;
519 }
520 }
521 if (!n_hot_calls)
522 {
523 if (dump_file)
524 fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n",
525 cgraph_node_name (node));
526 return false;
527 }
528 if (dump_file)
529 fprintf (dump_file, "Considering %s for cloning.\n",
530 cgraph_node_name (node));
531 return true;
532 }
533
534 /* Initialize ipcp_lattices array. The lattices corresponding to supported
535 types (integers, real types and Fortran constants defined as const_decls)
536 are initialized to IPA_TOP, the rest of them to IPA_BOTTOM. */
537 static void
538 ipcp_initialize_node_lattices (struct cgraph_node *node)
539 {
540 int i;
541 struct ipa_node_params *info = IPA_NODE_REF (node);
542 enum ipa_lattice_type type;
543
544 if (ipa_is_called_with_var_arguments (info))
545 type = IPA_BOTTOM;
546 else if (cgraph_only_called_directly_p (node))
547 type = IPA_TOP;
548 /* When cloning is allowed, we can assume that externally visible functions
549 are not called. We will compensate this by cloning later. */
550 else if (ipcp_cloning_candidate_p (node))
551 type = IPA_TOP, n_cloning_candidates ++;
552 else
553 type = IPA_BOTTOM;
554
555 for (i = 0; i < ipa_get_param_count (info) ; i++)
556 ipcp_get_lattice (info, i)->type = type;
557 }
558
559 /* build INTEGER_CST tree with type TREE_TYPE and value according to LAT.
560 Return the tree. */
561 static tree
562 build_const_val (struct ipcp_lattice *lat, tree tree_type)
563 {
564 tree val;
565
566 gcc_assert (ipcp_lat_is_const (lat));
567 val = lat->constant;
568
569 if (!useless_type_conversion_p (tree_type, TREE_TYPE (val)))
570 {
571 if (fold_convertible_p (tree_type, val))
572 return fold_build1 (NOP_EXPR, tree_type, val);
573 else
574 return fold_build1 (VIEW_CONVERT_EXPR, tree_type, val);
575 }
576 return val;
577 }
578
579 /* Compute the proper scale for NODE. It is the ratio between the number of
580 direct calls (represented on the incoming cgraph_edges) and sum of all
581 invocations of NODE (represented as count in cgraph_node).
582
583 FIXME: This code is wrong. Since the callers can be also clones and
584 the clones are not scaled yet, the sums gets unrealistically high.
585 To properly compute the counts, we would need to do propagation across
586 callgraph (as external call to A might imply call to non-clonned B
587 if A's clone calls clonned B). */
588 static void
589 ipcp_compute_node_scale (struct cgraph_node *node)
590 {
591 gcov_type sum;
592 struct cgraph_edge *cs;
593
594 sum = 0;
595 /* Compute sum of all counts of callers. */
596 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
597 sum += cs->count;
598 /* Work around the unrealistically high sum problem. We just don't want
599 the non-cloned body to have negative or very low frequency. Since
600 majority of execution time will be spent in clones anyway, this should
601 give good enough profile. */
602 if (sum > node->count * 9 / 10)
603 sum = node->count * 9 / 10;
604 if (node->count == 0)
605 ipcp_set_node_scale (node, 0);
606 else
607 ipcp_set_node_scale (node, sum * REG_BR_PROB_BASE / node->count);
608 }
609
610 /* Initialization and computation of IPCP data structures. This is the initial
611 intraprocedural analysis of functions, which gathers information to be
612 propagated later on. */
613 static void
614 ipcp_init_stage (void)
615 {
616 struct cgraph_node *node;
617
618 for (node = cgraph_nodes; node; node = node->next)
619 if (node->analyzed)
620 ipcp_analyze_node (node);
621 for (node = cgraph_nodes; node; node = node->next)
622 {
623 if (!node->analyzed)
624 continue;
625
626 ipa_analyze_params_uses (node);
627 /* building jump functions */
628 ipa_compute_jump_functions (node);
629 }
630 }
631
632 /* Return true if there are some formal parameters whose value is IPA_TOP (in
633 the whole compilation unit). Change their values to IPA_BOTTOM, since they
634 most probably get their values from outside of this compilation unit. */
635 static bool
636 ipcp_change_tops_to_bottom (void)
637 {
638 int i, count;
639 struct cgraph_node *node;
640 bool prop_again;
641
642 prop_again = false;
643 for (node = cgraph_nodes; node; node = node->next)
644 {
645 struct ipa_node_params *info = IPA_NODE_REF (node);
646 count = ipa_get_param_count (info);
647 for (i = 0; i < count; i++)
648 {
649 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
650 if (lat->type == IPA_TOP)
651 {
652 prop_again = true;
653 if (dump_file)
654 {
655 fprintf (dump_file, "Forcing param ");
656 print_generic_expr (dump_file, ipa_get_param (info, i), 0);
657 fprintf (dump_file, " of node %s to bottom.\n",
658 cgraph_node_name (node));
659 }
660 lat->type = IPA_BOTTOM;
661 }
662 }
663 }
664 return prop_again;
665 }
666
667 /* Interprocedural analysis. The algorithm propagates constants from the
668 caller's parameters to the callee's arguments. */
669 static void
670 ipcp_propagate_stage (void)
671 {
672 int i;
673 struct ipcp_lattice inc_lat = { IPA_BOTTOM, NULL };
674 struct ipcp_lattice new_lat = { IPA_BOTTOM, NULL };
675 struct ipcp_lattice *dest_lat;
676 struct cgraph_edge *cs;
677 struct ipa_jump_func *jump_func;
678 struct ipa_func_list *wl;
679 int count;
680
681 ipa_check_create_node_params ();
682 ipa_check_create_edge_args ();
683
684 /* Initialize worklist to contain all functions. */
685 wl = ipa_init_func_list ();
686 while (wl)
687 {
688 struct cgraph_node *node = ipa_pop_func_from_list (&wl);
689 struct ipa_node_params *info = IPA_NODE_REF (node);
690
691 for (cs = node->callees; cs; cs = cs->next_callee)
692 {
693 struct ipa_node_params *callee_info = IPA_NODE_REF (cs->callee);
694 struct ipa_edge_args *args = IPA_EDGE_REF (cs);
695
696 if (ipa_is_called_with_var_arguments (callee_info)
697 || !cs->callee->analyzed
698 || ipa_is_called_with_var_arguments (callee_info))
699 continue;
700
701 count = ipa_get_cs_argument_count (args);
702 for (i = 0; i < count; i++)
703 {
704 jump_func = ipa_get_ith_jump_func (args, i);
705 ipcp_lattice_from_jfunc (info, &inc_lat, jump_func);
706 dest_lat = ipcp_get_lattice (callee_info, i);
707 ipa_lattice_meet (&new_lat, &inc_lat, dest_lat);
708 if (ipcp_lattice_changed (&new_lat, dest_lat))
709 {
710 dest_lat->type = new_lat.type;
711 dest_lat->constant = new_lat.constant;
712 ipa_push_func_to_list (&wl, cs->callee);
713 }
714 }
715 }
716 }
717 }
718
719 /* Call the constant propagation algorithm and re-call it if necessary
720 (if there are undetermined values left). */
721 static void
722 ipcp_iterate_stage (void)
723 {
724 struct cgraph_node *node;
725 n_cloning_candidates = 0;
726
727 if (dump_file)
728 fprintf (dump_file, "\nIPA iterate stage:\n\n");
729
730 if (in_lto_p)
731 ipa_update_after_lto_read ();
732
733 for (node = cgraph_nodes; node; node = node->next)
734 {
735 ipcp_initialize_node_lattices (node);
736 ipcp_compute_node_scale (node);
737 }
738 if (dump_file && (dump_flags & TDF_DETAILS))
739 {
740 ipcp_print_all_lattices (dump_file);
741 ipcp_function_scale_print (dump_file);
742 }
743
744 ipcp_propagate_stage ();
745 if (ipcp_change_tops_to_bottom ())
746 /* Some lattices have changed from IPA_TOP to IPA_BOTTOM.
747 This change should be propagated. */
748 {
749 gcc_assert (n_cloning_candidates);
750 ipcp_propagate_stage ();
751 }
752 if (dump_file)
753 {
754 fprintf (dump_file, "\nIPA lattices after propagation:\n");
755 ipcp_print_all_lattices (dump_file);
756 if (dump_flags & TDF_DETAILS)
757 ipcp_print_profile_data (dump_file);
758 }
759 }
760
761 /* Check conditions to forbid constant insertion to function described by
762 NODE. */
763 static inline bool
764 ipcp_node_modifiable_p (struct cgraph_node *node)
765 {
766 /* Once we will be able to do in-place replacement, we can be more
767 lax here. */
768 return ipcp_versionable_function_p (node);
769 }
770
771 /* Print count scale data structures. */
772 static void
773 ipcp_function_scale_print (FILE * f)
774 {
775 struct cgraph_node *node;
776
777 for (node = cgraph_nodes; node; node = node->next)
778 {
779 if (!node->analyzed)
780 continue;
781 fprintf (f, "printing scale for %s: ", cgraph_node_name (node));
782 fprintf (f, "value is " HOST_WIDE_INT_PRINT_DEC
783 " \n", (HOST_WIDE_INT) ipcp_get_node_scale (node));
784 }
785 }
786
787 /* Print counts of all cgraph nodes. */
788 static void
789 ipcp_print_func_profile_counts (FILE * f)
790 {
791 struct cgraph_node *node;
792
793 for (node = cgraph_nodes; node; node = node->next)
794 {
795 fprintf (f, "function %s: ", cgraph_node_name (node));
796 fprintf (f, "count is " HOST_WIDE_INT_PRINT_DEC
797 " \n", (HOST_WIDE_INT) node->count);
798 }
799 }
800
801 /* Print counts of all cgraph edges. */
802 static void
803 ipcp_print_call_profile_counts (FILE * f)
804 {
805 struct cgraph_node *node;
806 struct cgraph_edge *cs;
807
808 for (node = cgraph_nodes; node; node = node->next)
809 {
810 for (cs = node->callees; cs; cs = cs->next_callee)
811 {
812 fprintf (f, "%s -> %s ", cgraph_node_name (cs->caller),
813 cgraph_node_name (cs->callee));
814 fprintf (f, "count is " HOST_WIDE_INT_PRINT_DEC " \n",
815 (HOST_WIDE_INT) cs->count);
816 }
817 }
818 }
819
820 /* Print profile info for all functions. */
821 static void
822 ipcp_print_profile_data (FILE * f)
823 {
824 fprintf (f, "\nNODE COUNTS :\n");
825 ipcp_print_func_profile_counts (f);
826 fprintf (f, "\nCS COUNTS stage:\n");
827 ipcp_print_call_profile_counts (f);
828 }
829
830 /* Build and initialize ipa_replace_map struct according to LAT. This struct is
831 processed by versioning, which operates according to the flags set.
832 PARM_TREE is the formal parameter found to be constant. LAT represents the
833 constant. */
834 static struct ipa_replace_map *
835 ipcp_create_replace_map (tree parm_tree, struct ipcp_lattice *lat)
836 {
837 struct ipa_replace_map *replace_map;
838 tree const_val;
839
840 replace_map = GGC_NEW (struct ipa_replace_map);
841 const_val = build_const_val (lat, TREE_TYPE (parm_tree));
842 if (dump_file)
843 {
844 fprintf (dump_file, " replacing param ");
845 print_generic_expr (dump_file, parm_tree, 0);
846 fprintf (dump_file, " with const ");
847 print_generic_expr (dump_file, const_val, 0);
848 fprintf (dump_file, "\n");
849 }
850 replace_map->old_tree = parm_tree;
851 replace_map->new_tree = const_val;
852 replace_map->replace_p = true;
853 replace_map->ref_p = false;
854
855 return replace_map;
856 }
857
858 /* Return true if this callsite should be redirected to the original callee
859 (instead of the cloned one). */
860 static bool
861 ipcp_need_redirect_p (struct cgraph_edge *cs)
862 {
863 struct ipa_node_params *orig_callee_info;
864 int i, count;
865 struct ipa_jump_func *jump_func;
866 struct cgraph_node *node = cs->callee, *orig;
867
868 if (!n_cloning_candidates)
869 return false;
870
871 if ((orig = ipcp_get_orig_node (node)) != NULL)
872 node = orig;
873 if (ipcp_get_orig_node (cs->caller))
874 return false;
875
876 orig_callee_info = IPA_NODE_REF (node);
877 count = ipa_get_param_count (orig_callee_info);
878 for (i = 0; i < count; i++)
879 {
880 struct ipcp_lattice *lat = ipcp_get_lattice (orig_callee_info, i);
881 if (ipcp_lat_is_const (lat))
882 {
883 jump_func = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i);
884 if (jump_func->type != IPA_JF_CONST)
885 return true;
886 }
887 }
888
889 return false;
890 }
891
892 /* Fix the callsites and the call graph after function cloning was done. */
893 static void
894 ipcp_update_callgraph (void)
895 {
896 struct cgraph_node *node;
897
898 for (node = cgraph_nodes; node; node = node->next)
899 if (node->analyzed && ipcp_node_is_clone (node))
900 {
901 bitmap args_to_skip = BITMAP_ALLOC (NULL);
902 struct cgraph_node *orig_node = ipcp_get_orig_node (node);
903 struct ipa_node_params *info = IPA_NODE_REF (orig_node);
904 int i, count = ipa_get_param_count (info);
905 struct cgraph_edge *cs, *next;
906
907 for (i = 0; i < count; i++)
908 {
909 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
910
911 /* We can proactively remove obviously unused arguments. */
912 if (!ipa_is_param_used (info, i))
913 {
914 bitmap_set_bit (args_to_skip, i);
915 continue;
916 }
917
918 if (lat->type == IPA_CONST_VALUE)
919 bitmap_set_bit (args_to_skip, i);
920 }
921 for (cs = node->callers; cs; cs = next)
922 {
923 next = cs->next_caller;
924 if (!ipcp_node_is_clone (cs->caller) && ipcp_need_redirect_p (cs))
925 cgraph_redirect_edge_callee (cs, orig_node);
926 }
927 }
928 }
929
930 /* Update profiling info for versioned functions and the functions they were
931 versioned from. */
932 static void
933 ipcp_update_profiling (void)
934 {
935 struct cgraph_node *node, *orig_node;
936 gcov_type scale, scale_complement;
937 struct cgraph_edge *cs;
938
939 for (node = cgraph_nodes; node; node = node->next)
940 {
941 if (ipcp_node_is_clone (node))
942 {
943 orig_node = ipcp_get_orig_node (node);
944 scale = ipcp_get_node_scale (orig_node);
945 node->count = orig_node->count * scale / REG_BR_PROB_BASE;
946 scale_complement = REG_BR_PROB_BASE - scale;
947 orig_node->count =
948 orig_node->count * scale_complement / REG_BR_PROB_BASE;
949 for (cs = node->callees; cs; cs = cs->next_callee)
950 cs->count = cs->count * scale / REG_BR_PROB_BASE;
951 for (cs = orig_node->callees; cs; cs = cs->next_callee)
952 cs->count = cs->count * scale_complement / REG_BR_PROB_BASE;
953 }
954 }
955 }
956
957 /* If NODE was cloned, how much would program grow? */
958 static long
959 ipcp_estimate_growth (struct cgraph_node *node)
960 {
961 struct cgraph_edge *cs;
962 int redirectable_node_callers = 0;
963 int removable_args = 0;
964 bool need_original = !cgraph_only_called_directly_p (node);
965 struct ipa_node_params *info;
966 int i, count;
967 int growth;
968
969 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
970 if (cs->caller == node || !ipcp_need_redirect_p (cs))
971 redirectable_node_callers++;
972 else
973 need_original = true;
974
975 /* If we will be able to fully replace orignal node, we never increase
976 program size. */
977 if (!need_original)
978 return 0;
979
980 info = IPA_NODE_REF (node);
981 count = ipa_get_param_count (info);
982 for (i = 0; i < count; i++)
983 {
984 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
985
986 /* We can proactively remove obviously unused arguments. */
987 if (!ipa_is_param_used (info, i))
988 removable_args++;
989
990 if (lat->type == IPA_CONST_VALUE)
991 removable_args++;
992 }
993
994 /* We make just very simple estimate of savings for removal of operand from
995 call site. Precise cost is dificult to get, as our size metric counts
996 constants and moves as free. Generally we are looking for cases that
997 small function is called very many times. */
998 growth = node->local.inline_summary.self_size
999 - removable_args * redirectable_node_callers;
1000 if (growth < 0)
1001 return 0;
1002 return growth;
1003 }
1004
1005
1006 /* Estimate cost of cloning NODE. */
1007 static long
1008 ipcp_estimate_cloning_cost (struct cgraph_node *node)
1009 {
1010 int freq_sum = 1;
1011 gcov_type count_sum = 1;
1012 struct cgraph_edge *e;
1013 int cost;
1014
1015 cost = ipcp_estimate_growth (node) * 1000;
1016 if (!cost)
1017 {
1018 if (dump_file)
1019 fprintf (dump_file, "Versioning of %s will save code size\n",
1020 cgraph_node_name (node));
1021 return 0;
1022 }
1023
1024 for (e = node->callers; e; e = e->next_caller)
1025 if (!bitmap_bit_p (dead_nodes, e->caller->uid)
1026 && !ipcp_need_redirect_p (e))
1027 {
1028 count_sum += e->count;
1029 freq_sum += e->frequency + 1;
1030 }
1031
1032 if (max_count)
1033 cost /= count_sum * 1000 / max_count + 1;
1034 else
1035 cost /= freq_sum * 1000 / REG_BR_PROB_BASE + 1;
1036 if (dump_file)
1037 fprintf (dump_file, "Cost of versioning %s is %i, (size: %i, freq: %i)\n",
1038 cgraph_node_name (node), cost, node->local.inline_summary.self_size,
1039 freq_sum);
1040 return cost + 1;
1041 }
1042
1043 /* Return number of live constant parameters. */
1044 static int
1045 ipcp_const_param_count (struct cgraph_node *node)
1046 {
1047 int const_param = 0;
1048 struct ipa_node_params *info = IPA_NODE_REF (node);
1049 int count = ipa_get_param_count (info);
1050 int i;
1051
1052 for (i = 0; i < count; i++)
1053 {
1054 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
1055 if (ipcp_lat_is_insertable (lat)
1056 /* Do not count obviously unused arguments. */
1057 && ipa_is_param_used (info, i))
1058 const_param++;
1059 }
1060 return const_param;
1061 }
1062
1063 /* Propagate the constant parameters found by ipcp_iterate_stage()
1064 to the function's code. */
1065 static void
1066 ipcp_insert_stage (void)
1067 {
1068 struct cgraph_node *node, *node1 = NULL;
1069 int i;
1070 VEC (cgraph_edge_p, heap) * redirect_callers;
1071 VEC (ipa_replace_map_p,gc)* replace_trees;
1072 int node_callers, count;
1073 tree parm_tree;
1074 struct ipa_replace_map *replace_param;
1075 fibheap_t heap;
1076 long overall_size = 0, new_size = 0;
1077 long max_new_size;
1078
1079 ipa_check_create_node_params ();
1080 ipa_check_create_edge_args ();
1081 if (dump_file)
1082 fprintf (dump_file, "\nIPA insert stage:\n\n");
1083
1084 dead_nodes = BITMAP_ALLOC (NULL);
1085
1086 for (node = cgraph_nodes; node; node = node->next)
1087 if (node->analyzed)
1088 {
1089 if (node->count > max_count)
1090 max_count = node->count;
1091 overall_size += node->local.inline_summary.self_size;
1092 }
1093
1094 max_new_size = overall_size;
1095 if (max_new_size < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1096 max_new_size = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1097 max_new_size = max_new_size * PARAM_VALUE (PARAM_IPCP_UNIT_GROWTH) / 100 + 1;
1098
1099 /* First collect all functions we proved to have constant arguments to heap. */
1100 heap = fibheap_new ();
1101 for (node = cgraph_nodes; node; node = node->next)
1102 {
1103 struct ipa_node_params *info;
1104 /* Propagation of the constant is forbidden in certain conditions. */
1105 if (!node->analyzed || !ipcp_node_modifiable_p (node))
1106 continue;
1107 info = IPA_NODE_REF (node);
1108 if (ipa_is_called_with_var_arguments (info))
1109 continue;
1110 if (ipcp_const_param_count (node))
1111 node->aux = fibheap_insert (heap, ipcp_estimate_cloning_cost (node), node);
1112 }
1113
1114 /* Now clone in priority order until code size growth limits are met or
1115 heap is emptied. */
1116 while (!fibheap_empty (heap))
1117 {
1118 struct ipa_node_params *info;
1119 int growth = 0;
1120 bitmap args_to_skip;
1121 struct cgraph_edge *cs;
1122
1123 node = (struct cgraph_node *)fibheap_extract_min (heap);
1124 node->aux = NULL;
1125 if (dump_file)
1126 fprintf (dump_file, "considering function %s\n",
1127 cgraph_node_name (node));
1128
1129 growth = ipcp_estimate_growth (node);
1130
1131 if (new_size + growth > max_new_size)
1132 break;
1133 if (growth
1134 && optimize_function_for_size_p (DECL_STRUCT_FUNCTION (node->decl)))
1135 {
1136 if (dump_file)
1137 fprintf (dump_file, "Not versioning, cold code would grow");
1138 continue;
1139 }
1140
1141 new_size += growth;
1142
1143 /* Look if original function becomes dead after clonning. */
1144 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
1145 if (cs->caller == node || ipcp_need_redirect_p (cs))
1146 break;
1147 if (!cs && cgraph_only_called_directly_p (node))
1148 bitmap_set_bit (dead_nodes, node->uid);
1149
1150 info = IPA_NODE_REF (node);
1151 count = ipa_get_param_count (info);
1152
1153 replace_trees = VEC_alloc (ipa_replace_map_p, gc, 1);
1154 args_to_skip = BITMAP_GGC_ALLOC ();
1155 for (i = 0; i < count; i++)
1156 {
1157 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
1158 parm_tree = ipa_get_param (info, i);
1159
1160 /* We can proactively remove obviously unused arguments. */
1161 if (!ipa_is_param_used (info, i))
1162 {
1163 bitmap_set_bit (args_to_skip, i);
1164 continue;
1165 }
1166
1167 if (lat->type == IPA_CONST_VALUE)
1168 {
1169 replace_param =
1170 ipcp_create_replace_map (parm_tree, lat);
1171 VEC_safe_push (ipa_replace_map_p, gc, replace_trees, replace_param);
1172 bitmap_set_bit (args_to_skip, i);
1173 }
1174 }
1175
1176 /* Compute how many callers node has. */
1177 node_callers = 0;
1178 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
1179 node_callers++;
1180 redirect_callers = VEC_alloc (cgraph_edge_p, heap, node_callers);
1181 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
1182 VEC_quick_push (cgraph_edge_p, redirect_callers, cs);
1183
1184 /* Redirecting all the callers of the node to the
1185 new versioned node. */
1186 node1 =
1187 cgraph_create_virtual_clone (node, redirect_callers, replace_trees,
1188 args_to_skip);
1189 args_to_skip = NULL;
1190 VEC_free (cgraph_edge_p, heap, redirect_callers);
1191 replace_trees = NULL;
1192
1193 if (node1 == NULL)
1194 continue;
1195 if (dump_file)
1196 fprintf (dump_file, "versioned function %s with growth %i, overall %i\n",
1197 cgraph_node_name (node), (int)growth, (int)new_size);
1198 ipcp_init_cloned_node (node, node1);
1199
1200 /* TODO: We can use indirect inlning info to produce new calls. */
1201
1202 if (dump_file)
1203 dump_function_to_file (node1->decl, dump_file, dump_flags);
1204
1205 for (cs = node->callees; cs; cs = cs->next_callee)
1206 if (cs->callee->aux)
1207 {
1208 fibheap_delete_node (heap, (fibnode_t) cs->callee->aux);
1209 cs->callee->aux = fibheap_insert (heap,
1210 ipcp_estimate_cloning_cost (cs->callee),
1211 cs->callee);
1212 }
1213 }
1214
1215 while (!fibheap_empty (heap))
1216 {
1217 if (dump_file)
1218 fprintf (dump_file, "skipping function %s\n",
1219 cgraph_node_name (node));
1220 node = (struct cgraph_node *) fibheap_extract_min (heap);
1221 node->aux = NULL;
1222 }
1223 fibheap_delete (heap);
1224 BITMAP_FREE (dead_nodes);
1225 ipcp_update_callgraph ();
1226 ipcp_update_profiling ();
1227 }
1228
1229 /* The IPCP driver. */
1230 static unsigned int
1231 ipcp_driver (void)
1232 {
1233 cgraph_remove_unreachable_nodes (true,dump_file);
1234 if (dump_file)
1235 {
1236 fprintf (dump_file, "\nIPA structures before propagation:\n");
1237 if (dump_flags & TDF_DETAILS)
1238 ipa_print_all_params (dump_file);
1239 ipa_print_all_jump_functions (dump_file);
1240 }
1241 /* 2. Do the interprocedural propagation. */
1242 ipcp_iterate_stage ();
1243 /* 3. Insert the constants found to the functions. */
1244 ipcp_insert_stage ();
1245 if (dump_file && (dump_flags & TDF_DETAILS))
1246 {
1247 fprintf (dump_file, "\nProfiling info after insert stage:\n");
1248 ipcp_print_profile_data (dump_file);
1249 }
1250 /* Free all IPCP structures. */
1251 ipa_free_all_structures_after_ipa_cp ();
1252 if (dump_file)
1253 fprintf (dump_file, "\nIPA constant propagation end\n");
1254 return 0;
1255 }
1256
1257 /* Note function body size. */
1258 static void
1259 ipcp_generate_summary (void)
1260 {
1261 if (dump_file)
1262 fprintf (dump_file, "\nIPA constant propagation start:\n");
1263 ipa_check_create_node_params ();
1264 ipa_check_create_edge_args ();
1265 ipa_register_cgraph_hooks ();
1266 /* 1. Call the init stage to initialize
1267 the ipa_node_params and ipa_edge_args structures. */
1268 ipcp_init_stage ();
1269 }
1270
1271 /* Write ipcp summary for nodes in SET. */
1272 static void
1273 ipcp_write_summary (cgraph_node_set set,
1274 varpool_node_set vset ATTRIBUTE_UNUSED)
1275 {
1276 ipa_prop_write_jump_functions (set);
1277 }
1278
1279 /* Read ipcp summary. */
1280 static void
1281 ipcp_read_summary (void)
1282 {
1283 ipa_prop_read_jump_functions ();
1284 }
1285
1286 /* Gate for IPCP optimization. */
1287 static bool
1288 cgraph_gate_cp (void)
1289 {
1290 return flag_ipa_cp;
1291 }
1292
1293 struct ipa_opt_pass_d pass_ipa_cp =
1294 {
1295 {
1296 IPA_PASS,
1297 "cp", /* name */
1298 cgraph_gate_cp, /* gate */
1299 ipcp_driver, /* execute */
1300 NULL, /* sub */
1301 NULL, /* next */
1302 0, /* static_pass_number */
1303 TV_IPA_CONSTANT_PROP, /* tv_id */
1304 0, /* properties_required */
1305 0, /* properties_provided */
1306 0, /* properties_destroyed */
1307 0, /* todo_flags_start */
1308 TODO_dump_cgraph | TODO_dump_func |
1309 TODO_remove_functions | TODO_ggc_collect /* todo_flags_finish */
1310 },
1311 ipcp_generate_summary, /* generate_summary */
1312 ipcp_write_summary, /* write_summary */
1313 ipcp_read_summary, /* read_summary */
1314 NULL, /* write_optimization_summary */
1315 NULL, /* read_optimization_summary */
1316 NULL, /* stmt_fixup */
1317 0, /* TODOs */
1318 NULL, /* function_transform */
1319 NULL, /* variable_transform */
1320 };