From 0e590b68fa3743656f40aee8374b788b108350c7 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Thu, 22 Oct 2020 17:32:32 +0200 Subject: [PATCH] Materialize clones on demand this patch removes the pass to materialize all clones and instead this is now done on demand. The motivation is to reduce lifetime of function bodies in ltrans that should noticeably reduce memory use for highly parallel compilations of large programs (like Martin does) or with partitioning reduced/disabled. For cc1 with one partition the memory use seems to go down from 4gb to cca 1.5gb (seeing from top, so this is not particularly accurate). gcc/ChangeLog: 2020-10-22 Jan Hubicka * cgraph.c (cgraph_node::get_untransformed_body): Perform lazy clone materialization. * cgraph.h (cgraph_node::materialize_clone): Declare. (symbol_table::materialize_all_clones): Remove. * cgraphclones.c (cgraph_materialize_clone): Turn to ... (cgraph_node::materialize_clone): .. this one; move here dumping from symbol_table::materialize_all_clones. (symbol_table::materialize_all_clones): Remove. * cgraphunit.c (mark_functions_to_output): Clear stmt references. (cgraph_node::expand): Initialize bitmaps early; do not call execute_all_ipa_transforms if there are no transforms. * ipa-inline-transform.c (save_inline_function_body): Fix formating. (inline_transform): Materialize all clones before function is modified. * ipa-param-manipulation.c (ipa_param_adjustments::modify_call): Materialize clone if needed. * ipa.c (class pass_materialize_all_clones): Remove. (make_pass_materialize_all_clones): Remove. * passes.c (execute_all_ipa_transforms): Materialize all clones. * passes.def: Remove pass_materialize_all_clones. * tree-pass.h (make_pass_materialize_all_clones): Remove. * tree-ssa-structalias.c (ipa_pta_execute): Clear refs. --- gcc/cgraph.c | 15 +++- gcc/cgraph.h | 13 +--- gcc/cgraphclones.c | 137 ++++++++++------------------------- gcc/cgraphunit.c | 10 ++- gcc/ipa-inline-transform.c | 16 +++- gcc/ipa-param-manipulation.c | 7 ++ gcc/ipa.c | 40 ---------- gcc/passes.c | 8 ++ gcc/passes.def | 1 - gcc/tree-pass.h | 2 - gcc/tree-ssa-structalias.c | 4 + 11 files changed, 94 insertions(+), 159 deletions(-) diff --git a/gcc/cgraph.c b/gcc/cgraph.c index 9480935ff84..067984d773c 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -3872,16 +3872,27 @@ cgraph_node::function_or_virtual_thunk_symbol } /* When doing LTO, read cgraph_node's body from disk if it is not already - present. */ + present. Also perform any necessary clone materializations. */ bool -cgraph_node::get_untransformed_body (void) +cgraph_node::get_untransformed_body () { lto_file_decl_data *file_data; const char *data, *name; size_t len; tree decl = this->decl; + /* See if there is clone to be materialized. + (inline clones does not need materialization, but we can be seeing + an inline clone of real clone). */ + cgraph_node *p = this; + for (cgraph_node *c = clone_of; c; c = c->clone_of) + { + if (c->decl != decl) + p->materialize_clone (); + p = c; + } + /* Check if body is already there. Either we have gimple body or the function is thunk and in that case we set DECL_ARGUMENTS. */ if (DECL_ARGUMENTS (decl) || gimple_has_body_p (decl)) diff --git a/gcc/cgraph.h b/gcc/cgraph.h index c953a1b6711..6c450342764 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -1145,12 +1145,14 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public symtab_node /* When doing LTO, read cgraph_node's body from disk if it is not already present. */ - bool get_untransformed_body (void); + bool get_untransformed_body (); /* Prepare function body. When doing LTO, read cgraph_node's body from disk if it is not already present. When some IPA transformations are scheduled, apply them. */ - bool get_body (void); + bool get_body (); + + void materialize_clone (void); /* Release memory used to represent body of function. Use this only for functions that are released before being translated to @@ -2286,13 +2288,6 @@ public: functions inserted into callgraph already at construction time. */ void process_new_functions (void); - /* Once all functions from compilation unit are in memory, produce all clones - and update all calls. We might also do this on demand if we don't want to - bring all functions to memory prior compilation, but current WHOPR - implementation does that and it is bit easier to keep everything right - in this order. */ - void materialize_all_clones (void); - /* Register a symbol NODE. */ inline void register_symbol (symtab_node *node); diff --git a/gcc/cgraphclones.c b/gcc/cgraphclones.c index f920dcb4c29..07a51a58aef 100644 --- a/gcc/cgraphclones.c +++ b/gcc/cgraphclones.c @@ -1083,114 +1083,57 @@ void cgraph_node::remove_from_clone_tree () /* Given virtual clone, turn it into actual clone. */ -static void -cgraph_materialize_clone (cgraph_node *node) -{ - bitmap_obstack_initialize (NULL); - node->former_clone_of = node->clone_of->decl; - if (node->clone_of->former_clone_of) - node->former_clone_of = node->clone_of->former_clone_of; - /* Copy the OLD_VERSION_NODE function tree to the new version. */ - tree_function_versioning (node->clone_of->decl, node->decl, - node->clone.tree_map, node->clone.param_adjustments, - true, NULL, NULL); - if (symtab->dump_file) - { - dump_function_to_file (node->clone_of->decl, symtab->dump_file, - dump_flags); - dump_function_to_file (node->decl, symtab->dump_file, dump_flags); - } - - cgraph_node *clone_of = node->clone_of; - /* Function is no longer clone. */ - node->remove_from_clone_tree (); - if (!clone_of->analyzed && !clone_of->clones) - { - clone_of->release_body (); - clone_of->remove_callees (); - clone_of->remove_all_references (); - } - bitmap_obstack_release (NULL); -} - -/* Once all functions from compilation unit are in memory, produce all clones - and update all calls. We might also do this on demand if we don't want to - bring all functions to memory prior compilation, but current WHOPR - implementation does that and it is a bit easier to keep everything right in - this order. */ - void -symbol_table::materialize_all_clones (void) +cgraph_node::materialize_clone () { - cgraph_node *node; - bool stabilized = false; - - + clone_of->get_untransformed_body (); + former_clone_of = clone_of->decl; + if (clone_of->former_clone_of) + former_clone_of = clone_of->former_clone_of; if (symtab->dump_file) - fprintf (symtab->dump_file, "Materializing clones\n"); - - cgraph_node::checking_verify_cgraph_nodes (); - - /* We can also do topological order, but number of iterations should be - bounded by number of IPA passes since single IPA pass is probably not - going to create clones of clones it created itself. */ - while (!stabilized) { - stabilized = true; - FOR_EACH_FUNCTION (node) + fprintf (symtab->dump_file, "cloning %s to %s\n", + clone_of->dump_name (), + dump_name ()); + if (clone.tree_map) { - if (node->clone_of && node->decl != node->clone_of->decl - && !gimple_has_body_p (node->decl)) + fprintf (symtab->dump_file, " replace map:"); + for (unsigned int i = 0; + i < vec_safe_length (clone.tree_map); + i++) { - if (!node->clone_of->clone_of) - node->clone_of->get_untransformed_body (); - if (gimple_has_body_p (node->clone_of->decl)) - { - if (symtab->dump_file) - { - fprintf (symtab->dump_file, "cloning %s to %s\n", - node->clone_of->dump_name (), - node->dump_name ()); - if (node->clone.tree_map) - { - unsigned int i; - fprintf (symtab->dump_file, " replace map:"); - for (i = 0; - i < vec_safe_length (node->clone.tree_map); - i++) - { - ipa_replace_map *replace_info; - replace_info = (*node->clone.tree_map)[i]; - fprintf (symtab->dump_file, "%s %i -> ", - i ? "," : "", replace_info->parm_num); - print_generic_expr (symtab->dump_file, - replace_info->new_tree); - } - fprintf (symtab->dump_file, "\n"); - } - if (node->clone.param_adjustments) - node->clone.param_adjustments->dump (symtab->dump_file); - } - cgraph_materialize_clone (node); - stabilized = false; - } + ipa_replace_map *replace_info; + replace_info = (*clone.tree_map)[i]; + fprintf (symtab->dump_file, "%s %i -> ", + i ? "," : "", replace_info->parm_num); + print_generic_expr (symtab->dump_file, + replace_info->new_tree); } + fprintf (symtab->dump_file, "\n"); } + if (clone.param_adjustments) + clone.param_adjustments->dump (symtab->dump_file); } - FOR_EACH_FUNCTION (node) - if (!node->analyzed && node->callees) - { - node->remove_callees (); - node->remove_all_references (); - } - else - node->clear_stmts_in_references (); + /* Copy the OLD_VERSION_NODE function tree to the new version. */ + tree_function_versioning (clone_of->decl, decl, + clone.tree_map, clone.param_adjustments, + true, NULL, NULL); if (symtab->dump_file) - fprintf (symtab->dump_file, "Materialization Call site updates done.\n"); - - cgraph_node::checking_verify_cgraph_nodes (); + { + dump_function_to_file (clone_of->decl, symtab->dump_file, + dump_flags); + dump_function_to_file (decl, symtab->dump_file, dump_flags); + } - symtab->remove_unreachable_nodes (symtab->dump_file); + cgraph_node *this_clone_of = clone_of; + /* Function is no longer clone. */ + remove_from_clone_tree (); + if (!this_clone_of->analyzed && !this_clone_of->clones) + { + this_clone_of->release_body (); + this_clone_of->remove_callees (); + this_clone_of->remove_all_references (); + } } #include "gt-cgraphclones.h" diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index 05713c28cf0..1e2262789dd 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -1601,6 +1601,7 @@ mark_functions_to_output (void) FOR_EACH_FUNCTION (node) { tree decl = node->decl; + node->clear_stmts_in_references (); gcc_assert (!node->process || node->same_comdat_group); if (node->process) @@ -2274,6 +2275,9 @@ cgraph_node::expand (void) announce_function (decl); process = 0; gcc_assert (lowered); + + /* Initialize the default bitmap obstack. */ + bitmap_obstack_initialize (NULL); get_untransformed_body (); /* Generate RTL for the body of DECL. */ @@ -2282,9 +2286,6 @@ cgraph_node::expand (void) gcc_assert (symtab->global_info_ready); - /* Initialize the default bitmap obstack. */ - bitmap_obstack_initialize (NULL); - /* Initialize the RTL code for the function. */ saved_loc = input_location; input_location = DECL_SOURCE_LOCATION (decl); @@ -2298,7 +2299,8 @@ cgraph_node::expand (void) bitmap_obstack_initialize (®_obstack); /* FIXME, only at RTL generation*/ update_ssa (TODO_update_ssa_only_virtuals); - execute_all_ipa_transforms (false); + if (ipa_transforms_to_apply.exists ()) + execute_all_ipa_transforms (false); /* Perform all tree transforms and optimizations. */ diff --git a/gcc/ipa-inline-transform.c b/gcc/ipa-inline-transform.c index af2c2856aaa..f419df04961 100644 --- a/gcc/ipa-inline-transform.c +++ b/gcc/ipa-inline-transform.c @@ -644,16 +644,16 @@ save_inline_function_body (struct cgraph_node *node) tree_function_versioning (node->decl, first_clone->decl, NULL, NULL, true, NULL, NULL); - /* The function will be short lived and removed after we inline all the clones, - but make it internal so we won't confuse ourself. */ + /* The function will be short lived and removed after we inline all the + clones, but make it internal so we won't confuse ourself. */ DECL_EXTERNAL (first_clone->decl) = 0; TREE_PUBLIC (first_clone->decl) = 0; DECL_COMDAT (first_clone->decl) = 0; first_clone->ipa_transforms_to_apply.release (); /* When doing recursive inlining, the clone may become unnecessary. - This is possible i.e. in the case when the recursive function is proved to be - non-throwing and the recursion happens only in the EH landing pad. + This is possible i.e. in the case when the recursive function is proved to + be non-throwing and the recursion happens only in the EH landing pad. We cannot remove the clone until we are done with saving the body. Remove it now. */ if (!first_clone->callers) @@ -696,6 +696,14 @@ inline_transform (struct cgraph_node *node) if (cfun->after_inlining) return 0; + cgraph_node *next_clone; + for (cgraph_node *n = node->clones; n; n = next_clone) + { + next_clone = n->next_sibling_clone; + if (n->decl != node->decl) + n->materialize_clone (); + } + /* We might need the body of this function so that we can expand it inline somewhere else. */ if (preserve_function_body_p (node)) diff --git a/gcc/ipa-param-manipulation.c b/gcc/ipa-param-manipulation.c index 5fc0de56556..438f4bd5a68 100644 --- a/gcc/ipa-param-manipulation.c +++ b/gcc/ipa-param-manipulation.c @@ -783,6 +783,13 @@ ipa_param_adjustments::modify_call (gcall *stmt, { vec **debug_args = NULL; unsigned i = 0; + cgraph_node *callee_node = cgraph_node::get (callee_decl); + + /* FIXME: we don't seem to be able to insert debug args before clone + is materialized. Materializing them early leads to extra memory + use. */ + if (callee_node->clone_of) + callee_node->get_untransformed_body (); for (tree old_parm = DECL_ARGUMENTS (old_decl); old_parm && i < old_nargs && ((int) i) < m_always_copy_start; old_parm = DECL_CHAIN (old_parm), i++) diff --git a/gcc/ipa.c b/gcc/ipa.c index 288b58cf73d..ab7256d857f 100644 --- a/gcc/ipa.c +++ b/gcc/ipa.c @@ -1386,43 +1386,3 @@ make_pass_ipa_single_use (gcc::context *ctxt) return new pass_ipa_single_use (ctxt); } -/* Materialize all clones. */ - -namespace { - -const pass_data pass_data_materialize_all_clones = -{ - SIMPLE_IPA_PASS, /* type */ - "materialize-all-clones", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - TV_IPA_OPT, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - 0, /* todo_flags_finish */ -}; - -class pass_materialize_all_clones : public simple_ipa_opt_pass -{ -public: - pass_materialize_all_clones (gcc::context *ctxt) - : simple_ipa_opt_pass (pass_data_materialize_all_clones, ctxt) - {} - - /* opt_pass methods: */ - virtual unsigned int execute (function *) - { - symtab->materialize_all_clones (); - return 0; - } - -}; // class pass_materialize_all_clones - -} // anon namespace - -simple_ipa_opt_pass * -make_pass_materialize_all_clones (gcc::context *ctxt) -{ - return new pass_materialize_all_clones (ctxt); -} diff --git a/gcc/passes.c b/gcc/passes.c index 6ff31ec37d7..1942b7cd1c3 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -2271,6 +2271,14 @@ execute_all_ipa_transforms (bool do_not_collect) return; node = cgraph_node::get (current_function_decl); + cgraph_node *next_clone; + for (cgraph_node *n = node->clones; n; n = next_clone) + { + next_clone = n->next_sibling_clone; + if (n->decl != node->decl) + n->materialize_clone (); + } + if (node->ipa_transforms_to_apply.exists ()) { unsigned int i; diff --git a/gcc/passes.def b/gcc/passes.def index f865bdc19ac..cf15d8eafca 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -172,7 +172,6 @@ along with GCC; see the file COPYING3. If not see passes are executed after partitioning and thus see just parts of the compiled unit. */ INSERT_PASSES_AFTER (all_late_ipa_passes) - NEXT_PASS (pass_materialize_all_clones); NEXT_PASS (pass_ipa_pta); NEXT_PASS (pass_omp_simd_clone); TERMINATE_PASS_LIST (all_late_ipa_passes) diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 62e5b696cab..1e8badfe4be 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -519,8 +519,6 @@ extern ipa_opt_pass_d *make_pass_ipa_cdtor_merge (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_single_use (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_comdats (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_modref (gcc::context *ctxt); -extern simple_ipa_opt_pass *make_pass_materialize_all_clones (gcc::context * - ctxt); extern gimple_opt_pass *make_pass_cleanup_cfg_post_optimizing (gcc::context *ctxt); diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c index 30a8c93b4ff..ac29365e809 100644 --- a/gcc/tree-ssa-structalias.c +++ b/gcc/tree-ssa-structalias.c @@ -8138,6 +8138,10 @@ ipa_pta_execute (void) from = constraints.length (); } + /* FIXME: Clone materialization is not preserving stmt references. */ + FOR_EACH_DEFINED_FUNCTION (node) + node->clear_stmts_in_references (); + /* Build the constraints. */ FOR_EACH_DEFINED_FUNCTION (node) { -- 2.30.2