From 74605a11f3c482656558d332058870d15cc718d5 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Sun, 8 May 2011 21:14:24 +0200 Subject: [PATCH] cgraph.c (cgraph_clone_node): Add call_duplication_hook parameter. * cgraph.c (cgraph_clone_node): Add call_duplication_hook parameter. (cgraph_create_virtual_clone): Call hooks once virtual clone is finished. * cgraph.h (cgraph_clone_node): Update prototype. * ipa-cp.c (ipcp_estimate_growth): Use estimate_ipcp_clone_size_and_time. * ipa-inline-transform.c (clone_inlined_nodes): Update. * lto-cgraph.c (input_node): Update. * ipa-inline.c (recursive_inlining): Update. * ipa-inline.h (estimate_ipcp_clone_size_and_time): New function. (evaluate_conditions_for_known_args): Break out from ... (evaluate_conditions_for_edge): ... here. (evaluate_conditions_for_ipcp_clone): New function. (inline_node_duplication_hook): Update clone summary based on parameter map. (estimate_callee_size_and_time): Rename to ... (estimate_node_size_and_time): take NODE instead of EDGE; take POSSIBLE_TRUTHS as argument. (estimate_callee_size_and_time): Update. (estimate_ipcp_clone_size_and_time): New function. (do_estimate_edge_time): Update. From-SVN: r173551 --- gcc/ChangeLog | 22 +++ gcc/cgraph.c | 17 ++- gcc/cgraph.h | 3 +- gcc/cgraphunit.c | 5 + gcc/ipa-cp.c | 3 +- gcc/ipa-inline-analysis.c | 292 ++++++++++++++++++++++++++++++++----- gcc/ipa-inline-transform.c | 2 +- gcc/ipa-inline.c | 2 +- gcc/ipa-inline.h | 1 + gcc/lto-cgraph.c | 2 +- 10 files changed, 303 insertions(+), 46 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5842b6ab758..fba19cc3a8e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,25 @@ +2011-05-08 Jan Hubicka + + * cgraph.c (cgraph_clone_node): Add call_duplication_hook parameter. + (cgraph_create_virtual_clone): Call hooks once virtual clone is finished. + * cgraph.h (cgraph_clone_node): Update prototype. + * ipa-cp.c (ipcp_estimate_growth): Use estimate_ipcp_clone_size_and_time. + * ipa-inline-transform.c (clone_inlined_nodes): Update. + * lto-cgraph.c (input_node): Update. + * ipa-inline.c (recursive_inlining): Update. + * ipa-inline.h (estimate_ipcp_clone_size_and_time): New function. + (evaluate_conditions_for_known_args): Break out from ... + (evaluate_conditions_for_edge): ... here. + (evaluate_conditions_for_ipcp_clone): New function. + (inline_node_duplication_hook): Update clone summary based + on parameter map. + (estimate_callee_size_and_time): Rename to ... + (estimate_node_size_and_time): take NODE instead of EDGE; + take POSSIBLE_TRUTHS as argument. + (estimate_callee_size_and_time): Update. + (estimate_ipcp_clone_size_and_time): New function. + (do_estimate_edge_time): Update. + 2011-05-08 Richard Guenther PR middle-end/48908 diff --git a/gcc/cgraph.c b/gcc/cgraph.c index 1f6f89f508f..a6d54823950 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -2128,6 +2128,7 @@ cgraph_clone_edge (struct cgraph_edge *e, struct cgraph_node *n, return new_edge; } + /* Create node representing clone of N executed COUNT times. Decrease the execution counts from original node too. The new clone will have decl set to DECL that may or may not be the same @@ -2135,11 +2136,15 @@ cgraph_clone_edge (struct cgraph_edge *e, struct cgraph_node *n, When UPDATE_ORIGINAL is true, the counts are subtracted from the original function's profile to reflect the fact that part of execution is handled - by node. */ + by node. + When CALL_DUPLICATOIN_HOOK is true, the ipa passes are acknowledged about + the new clone. Otherwise the caller is responsible for doing so later. */ + struct cgraph_node * cgraph_clone_node (struct cgraph_node *n, tree decl, gcov_type count, int freq, bool update_original, - VEC(cgraph_edge_p,heap) *redirect_callers) + VEC(cgraph_edge_p,heap) *redirect_callers, + bool call_duplication_hook) { struct cgraph_node *new_node = cgraph_create_node_1 (); struct cgraph_edge *e; @@ -2202,7 +2207,6 @@ cgraph_clone_node (struct cgraph_node *n, tree decl, gcov_type count, int freq, n->clones = new_node; new_node->clone_of = n; - cgraph_call_node_duplication_hooks (n, new_node); if (n->decl != decl) { struct cgraph_node **slot; @@ -2221,6 +2225,9 @@ cgraph_clone_node (struct cgraph_node *n, tree decl, gcov_type count, int freq, *aslot = new_node; } } + + if (call_duplication_hook) + cgraph_call_node_duplication_hooks (n, new_node); return new_node; } @@ -2287,7 +2294,7 @@ cgraph_create_virtual_clone (struct cgraph_node *old_node, new_node = cgraph_clone_node (old_node, new_decl, old_node->count, CGRAPH_FREQ_BASE, false, - redirect_callers); + redirect_callers, false); /* Update the properties. Make clone visible only within this translation unit. Make sure that is not weak also. @@ -2358,6 +2365,8 @@ cgraph_create_virtual_clone (struct cgraph_node *old_node, new_node->lowered = true; new_node->reachable = true; + cgraph_call_node_duplication_hooks (old_node, new_node); + return new_node; } diff --git a/gcc/cgraph.h b/gcc/cgraph.h index e250ecd40e7..8be0354c2e2 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -506,7 +506,8 @@ struct cgraph_edge * cgraph_clone_edge (struct cgraph_edge *, struct cgraph_node *, gimple, unsigned, gcov_type, int, bool); struct cgraph_node * cgraph_clone_node (struct cgraph_node *, tree, gcov_type, - int, bool, VEC(cgraph_edge_p,heap) *); + int, bool, VEC(cgraph_edge_p,heap) *, + bool); void cgraph_redirect_edge_callee (struct cgraph_edge *, struct cgraph_node *); void cgraph_make_edge_direct (struct cgraph_edge *, struct cgraph_node *, diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index 68eb91d35e8..834acb101e5 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -430,6 +430,11 @@ verify_edge_count_and_frequency (struct cgraph_edge *e) } if (gimple_has_body_p (e->caller->decl) && !e->caller->global.inlined_to + /* FIXME: Inline-analysis sets frequency to 0 when edge is optimized out. + Remove this once edges are actualy removed from the function at that time. */ + && (e->frequency + || (inline_edge_summary_vec + && !inline_edge_summary (e)->predicate)) && (e->frequency != compute_call_stmt_bb_frequency (e->caller->decl, gimple_bb (e->call_stmt)))) diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c index d22e709e871..41046d117ca 100644 --- a/gcc/ipa-cp.c +++ b/gcc/ipa-cp.c @@ -1102,7 +1102,8 @@ ipcp_estimate_growth (struct cgraph_node *node) call site. Precise cost is difficult to get, as our size metric counts constants and moves as free. Generally we are looking for cases that small function is called very many times. */ - growth = inline_summary (node)->self_size + estimate_ipcp_clone_size_and_time (node, &growth, NULL); + growth = growth - removable_args * redirectable_node_callers; if (growth < 0) return 0; diff --git a/gcc/ipa-inline-analysis.c b/gcc/ipa-inline-analysis.c index 9021fa2b929..86f5f12cc93 100644 --- a/gcc/ipa-inline-analysis.c +++ b/gcc/ipa-inline-analysis.c @@ -473,7 +473,7 @@ account_size_time (struct inline_summary *summary, int size, int time, struct pr /* We need to create initial empty unconitional clause, but otherwie we don't need to account empty times and sizes. */ - if (!size && !time && summary->conds) + if (!size && !time && summary->entry) return; /* Watch overflow that might result from insane profiles. */ @@ -539,6 +539,42 @@ edge_set_predicate (struct cgraph_edge *e, struct predicate *predicate) } +/* KNOWN_VALS is partial mapping of parameters of NODE to constant values. + Return clause of possible truths. When INLINE_P is true, assume that + we are inlining. */ + +static clause_t +evaluate_conditions_for_known_args (struct cgraph_node *node, + bool inline_p, + VEC (tree, heap) *known_vals) +{ + clause_t clause = inline_p ? 0 : 1 << predicate_not_inlined_condition; + struct inline_summary *info = inline_summary (node); + int i; + struct condition *c; + + for (i = 0; VEC_iterate (condition, info->conds, i, c); i++) + { + tree val = VEC_index (tree, known_vals, c->operand_num); + tree res; + + if (!val) + { + clause |= 1 << (i + predicate_first_dynamic_condition); + continue; + } + if (c->code == IS_NOT_CONSTANT) + continue; + res = fold_binary_to_constant (c->code, boolean_type_node, val, c->val); + if (res + && integer_zerop (res)) + continue; + clause |= 1 << (i + predicate_first_dynamic_condition); + } + return clause; +} + + /* Work out what conditions might be true at invocation of E. */ static clause_t @@ -557,7 +593,6 @@ evaluate_conditions_for_edge (struct cgraph_edge *e, bool inline_p) struct ipa_edge_args *args = IPA_EDGE_REF (e); int i, count = ipa_get_cs_argument_count (args); struct ipcp_lattice lat; - struct condition *c; VEC (tree, heap) *known_vals = NULL; if (e->caller->global.inlined_to) @@ -572,24 +607,8 @@ evaluate_conditions_for_edge (struct cgraph_edge *e, bool inline_p) if (lat.type == IPA_CONST_VALUE) VEC_replace (tree, known_vals, i, lat.constant); } - for (i = 0; VEC_iterate (condition, info->conds, i, c); i++) - { - tree val = VEC_index (tree, known_vals, c->operand_num); - tree res; - - if (!val) - { - clause |= 1 << (i + predicate_first_dynamic_condition); - continue; - } - if (c->code == IS_NOT_CONSTANT) - continue; - res = fold_binary_to_constant (c->code, boolean_type_node, val, c->val); - if (res - && integer_zerop (res)) - continue; - clause |= 1 << (i + predicate_first_dynamic_condition); - } + clause = evaluate_conditions_for_known_args (e->callee, + inline_p, known_vals); VEC_free (tree, heap, known_vals); } else @@ -600,6 +619,31 @@ evaluate_conditions_for_edge (struct cgraph_edge *e, bool inline_p) } +/* Work out what conditions might be true at invocation of NODE + that is (future) ipa-cp clone. */ + +static clause_t +evaluate_conditions_for_ipcp_clone (struct cgraph_node *node) +{ + struct ipa_node_params *parms_info = IPA_NODE_REF (node); + int i, count = ipa_get_param_count (parms_info); + struct ipcp_lattice *lat; + VEC (tree, heap) *known_vals = NULL; + clause_t clause; + + VEC_safe_grow_cleared (tree, heap, known_vals, count); + for (i = 0; i < count; i++) + { + lat = ipa_get_lattice (parms_info, i); + if (lat->type == IPA_CONST_VALUE) + VEC_replace (tree, known_vals, i, lat->constant); + } + clause = evaluate_conditions_for_known_args (node, false, known_vals); + VEC_free (tree, heap, known_vals); + return clause; +} + + /* Allocate the inline summary vector or resize it to cover all cgraph nodes. */ static void @@ -661,8 +705,165 @@ inline_node_duplication_hook (struct cgraph_node *src, struct cgraph_node *dst, info = inline_summary (dst); memcpy (info, inline_summary (src), sizeof (struct inline_summary)); + /* TODO: as an optimization, we may avoid copying conditions + that are known to be false or true. */ info->conds = VEC_copy (condition, gc, info->conds); - info->entry = VEC_copy (size_time_entry, gc, info->entry); + + /* When there are any replacements in the function body, see if we can figure + out that something was optimized out. */ + if (ipa_node_params_vector && dst->clone.tree_map) + { + VEC(size_time_entry,gc) *entry = info->entry; + /* Use SRC parm info since it may not be copied yet. */ + struct ipa_node_params *parms_info = IPA_NODE_REF (src); + VEC (tree, heap) *known_vals = NULL; + int count = ipa_get_param_count (parms_info); + int i,j; + clause_t possible_truths; + struct predicate true_pred = true_predicate (); + size_time_entry *e; + int optimized_out_size = 0; + gcov_type optimized_out_time = 0; + bool inlined_to_p = false; + struct cgraph_edge *edge; + + info->entry = false; + VEC_safe_grow_cleared (tree, heap, known_vals, count); + for (i = 0; i < count; i++) + { + tree t = ipa_get_param (parms_info, i); + struct ipa_replace_map *r; + + for (j = 0; + VEC_iterate (ipa_replace_map_p, dst->clone.tree_map, j, r); + j++) + { + if (r->old_tree == t + && r->replace_p + && !r->ref_p) + { + VEC_replace (tree, known_vals, i, r->new_tree); + break; + } + } + } + possible_truths = evaluate_conditions_for_known_args (dst, + false, known_vals); + VEC_free (tree, heap, known_vals); + + account_size_time (info, 0, 0, &true_pred); + + /* Remap size_time vectors. + Simplify the predicate by prunning out alternatives that are known + to be false. + TODO: as on optimization, we can also eliminate conditions known to be true. */ + for (i = 0; VEC_iterate (size_time_entry, entry, i, e); i++) + { + struct predicate new_predicate = true_predicate (); + for (j = 0; e->predicate.clause[j]; j++) + if (!(possible_truths & e->predicate.clause[j])) + { + new_predicate = false_predicate (); + break; + } + else + add_clause (&new_predicate, + possible_truths & e->predicate.clause[j]); + if (false_predicate_p (&new_predicate)) + { + optimized_out_size += e->size; + optimized_out_time += e->time; + } + else + account_size_time (info, e->size, e->time, &new_predicate); + } + + /* Remap edge predicates with the same simplificaiton as above. */ + for (edge = dst->callees; edge; edge = edge->next_callee) + { + struct predicate new_predicate = true_predicate (); + struct inline_edge_summary *es = inline_edge_summary (edge); + + if (!edge->inline_failed) + inlined_to_p = true; + if (!es->predicate) + continue; + for (j = 0; es->predicate->clause[j]; j++) + if (!(possible_truths & es->predicate->clause[j])) + { + new_predicate = false_predicate (); + break; + } + else + add_clause (&new_predicate, + possible_truths & es->predicate->clause[j]); + if (false_predicate_p (&new_predicate) + && !false_predicate_p (es->predicate)) + { + optimized_out_size += es->call_stmt_size * INLINE_SIZE_SCALE; + optimized_out_time += (es->call_stmt_time + * (INLINE_TIME_SCALE / CGRAPH_FREQ_BASE) + * edge->frequency); + edge->frequency = 0; + } + *es->predicate = new_predicate; + } + + /* Remap indirect edge predicates with the same simplificaiton as above. */ + for (edge = dst->indirect_calls; edge; edge = edge->next_callee) + { + struct predicate new_predicate = true_predicate (); + struct inline_edge_summary *es = inline_edge_summary (edge); + + if (!edge->inline_failed) + inlined_to_p = true; + if (!es->predicate) + continue; + for (j = 0; es->predicate->clause[j]; j++) + if (!(possible_truths & es->predicate->clause[j])) + { + new_predicate = false_predicate (); + break; + } + else + add_clause (&new_predicate, + possible_truths & es->predicate->clause[j]); + if (false_predicate_p (&new_predicate) + && !false_predicate_p (es->predicate)) + { + optimized_out_size += es->call_stmt_size * INLINE_SIZE_SCALE; + optimized_out_time += (es->call_stmt_time + * (INLINE_TIME_SCALE / CGRAPH_FREQ_BASE) + * edge->frequency); + edge->frequency = 0; + } + *es->predicate = new_predicate; + } + + /* If inliner or someone after inliner will ever start producing + non-trivial clones, we will get trouble with lack of information + about updating self sizes, because size vectors already contains + sizes of the calees. */ + gcc_assert (!inlined_to_p + || (!optimized_out_size && !optimized_out_time)); + + info->size -= optimized_out_size / INLINE_SIZE_SCALE; + info->self_size -= optimized_out_size / INLINE_SIZE_SCALE; + gcc_assert (info->size > 0); + gcc_assert (info->self_size > 0); + + optimized_out_time /= INLINE_TIME_SCALE; + if (optimized_out_time > MAX_TIME) + optimized_out_time = MAX_TIME; + info->time -= optimized_out_time; + info->self_time -= optimized_out_time; + if (info->time < 0) + info->time = 0; + if (info->self_time < 0) + info->self_time = 0; + } + else + info->entry = VEC_copy (size_time_entry, gc, info->entry); } @@ -1565,17 +1766,15 @@ estimate_calls_size_and_time (struct cgraph_node *node, int *size, int *time, } -/* Estimate size and time needed to execute callee of EDGE assuming - that parameters known to be constant at caller of EDGE are - propagated. If INLINE_P is true, it is assumed that call will - be inlined. */ +/* Estimate size and time needed to execute NODE assuming + POSSIBLE_TRUTHS clause. */ static void -estimate_callee_size_and_time (struct cgraph_edge *edge, bool inline_p, - int *ret_size, int *ret_time) +estimate_node_size_and_time (struct cgraph_node *node, + clause_t possible_truths, + int *ret_size, int *ret_time) { - struct inline_summary *info = inline_summary (edge->callee); - clause_t clause = evaluate_conditions_for_edge (edge, inline_p); + struct inline_summary *info = inline_summary (node); size_time_entry *e; int size = 0, time = 0; int i; @@ -1584,15 +1783,15 @@ estimate_callee_size_and_time (struct cgraph_edge *edge, bool inline_p, && (dump_flags & TDF_DETAILS)) { bool found = false; - fprintf (dump_file, " Estimating callee body: %s/%i\n" + fprintf (dump_file, " Estimating body: %s/%i\n" " Known to be false: ", - cgraph_node_name (edge->callee), - edge->callee->uid); + cgraph_node_name (node), + node->uid); for (i = predicate_not_inlined_condition; i < (predicate_first_dynamic_condition + (int)VEC_length (condition, info->conds)); i++) - if (!(clause & (1 << i))) + if (!(possible_truths & (1 << i))) { if (found) fprintf (dump_file, ", "); @@ -1602,13 +1801,13 @@ estimate_callee_size_and_time (struct cgraph_edge *edge, bool inline_p, } for (i = 0; VEC_iterate (size_time_entry, info->entry, i, e); i++) - if (evaluate_predicate (&e->predicate, clause)) + if (evaluate_predicate (&e->predicate, possible_truths)) time += e->time, size += e->size; if (time > MAX_TIME * INLINE_TIME_SCALE) time = MAX_TIME * INLINE_TIME_SCALE; - estimate_calls_size_and_time (edge->callee, &size, &time, clause); + estimate_calls_size_and_time (node, &size, &time, possible_truths); time = (time + INLINE_TIME_SCALE / 2) / INLINE_TIME_SCALE; size = (size + INLINE_SIZE_SCALE / 2) / INLINE_SIZE_SCALE; @@ -1624,6 +1823,21 @@ estimate_callee_size_and_time (struct cgraph_edge *edge, bool inline_p, } +/* Estimate size and time needed to execute callee of EDGE assuming + that parameters known to be constant at caller of EDGE are + propagated. If INLINE_P is true, it is assumed that call will + be inlined. */ + +void +estimate_ipcp_clone_size_and_time (struct cgraph_node *node, + int *ret_size, int *ret_time) +{ + estimate_node_size_and_time (node, + evaluate_conditions_for_ipcp_clone (node), + ret_size, ret_time); +} + + /* Translate all conditions from callee representation into caller representation and symbolically evaluate predicate P into new predicate. @@ -1872,7 +2086,9 @@ do_estimate_edge_time (struct cgraph_edge *edge) struct inline_edge_summary *es = inline_edge_summary (edge); gcc_checking_assert (edge->inline_failed); - estimate_callee_size_and_time (edge, true, &size, &time); + estimate_node_size_and_time (edge->callee, + evaluate_conditions_for_edge (edge, true), + &size, &time); ret = (((gcov_type)time - es->call_stmt_time) * edge->frequency + CGRAPH_FREQ_BASE / 2) / CGRAPH_FREQ_BASE; @@ -1921,7 +2137,9 @@ do_estimate_edge_growth (struct cgraph_edge *edge) /* Early inliner runs without caching, go ahead and do the dirty work. */ gcc_checking_assert (edge->inline_failed); - estimate_callee_size_and_time (edge, true, &size, NULL); + estimate_node_size_and_time (edge->callee, + evaluate_conditions_for_edge (edge, true), + &size, NULL); gcc_checking_assert (inline_edge_summary (edge)->call_stmt_size); return size - inline_edge_summary (edge)->call_stmt_size; } diff --git a/gcc/ipa-inline-transform.c b/gcc/ipa-inline-transform.c index cf24a62caf7..85a47f8a018 100644 --- a/gcc/ipa-inline-transform.c +++ b/gcc/ipa-inline-transform.c @@ -133,7 +133,7 @@ clone_inlined_nodes (struct cgraph_edge *e, bool duplicate, struct cgraph_node *n; n = cgraph_clone_node (e->callee, e->callee->decl, e->count, e->frequency, - update_original, NULL); + update_original, NULL, true); cgraph_redirect_edge_callee (e, n); } } diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c index fbc6918c12b..0718334e84e 100644 --- a/gcc/ipa-inline.c +++ b/gcc/ipa-inline.c @@ -1174,7 +1174,7 @@ recursive_inlining (struct cgraph_edge *edge, /* We need original clone to copy around. */ master_clone = cgraph_clone_node (node, node->decl, node->count, CGRAPH_FREQ_BASE, - false, NULL); + false, NULL, true); for (e = master_clone->callees; e; e = e->next_callee) if (!e->inline_failed) clone_inlined_nodes (e, true, false, NULL); diff --git a/gcc/ipa-inline.h b/gcc/ipa-inline.h index 11dd15e42b9..c9f881ecea7 100644 --- a/gcc/ipa-inline.h +++ b/gcc/ipa-inline.h @@ -149,6 +149,7 @@ void inline_free_summary (void); void initialize_inline_failed (struct cgraph_edge *); int estimate_time_after_inlining (struct cgraph_node *, struct cgraph_edge *); int estimate_size_after_inlining (struct cgraph_node *, struct cgraph_edge *); +void estimate_ipcp_clone_size_and_time (struct cgraph_node *, int *, int *); int do_estimate_growth (struct cgraph_node *); void inline_merge_summary (struct cgraph_edge *edge); int do_estimate_edge_growth (struct cgraph_edge *edge); diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c index f4fd2a3b7c4..22f44ad9783 100644 --- a/gcc/lto-cgraph.c +++ b/gcc/lto-cgraph.c @@ -999,7 +999,7 @@ input_node (struct lto_file_decl_data *file_data, if (clone_ref != LCC_NOT_FOUND) { node = cgraph_clone_node (VEC_index (cgraph_node_ptr, nodes, clone_ref), fn_decl, - 0, CGRAPH_FREQ_BASE, false, NULL); + 0, CGRAPH_FREQ_BASE, false, NULL, false); } else node = cgraph_get_create_node (fn_decl); -- 2.30.2