From 366099ff0823c3c9929dd7fb911f4af04e63baef Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Fri, 6 Nov 2020 10:05:11 +0100 Subject: [PATCH] Move size time tables from GGC to heap this patch moves size time tables out of ggc allocated memory. This makes sources bit cleaner and saves about 60MB of GGC memory that turns to about 45MB of heap memory for cc1plus LTO build. * ipa-fnsummary.h (class size_time_entry): Do not GTY annotate. (class ipa_fnsummary): Turn size_time_table to auto_vec and call_size_time_table to effecient vec; update constructors. * ipa-fnsummary.c (ipa_fn_summary::account_size_time): Update. (ipa_fn_summary::~ipa_fn_summary): Update. (ipa_fn_summary_t::duplicate): Update. (ipa_dump_fn_summary): Update. (set_switch_stmt_execution_predicate): Update. (analyze_function_body): Update. (estimate_calls_size_and_time): Update. (ipa_call_context::estimate_size_and_time): Update. (ipa_merge_fn_summary_after_inlining): Update. (ipa_update_overall_fn_summary): Update. (inline_read_section): Update. (ipa_fn_summary_write): Update. --- gcc/ipa-fnsummary.c | 71 ++++++++++++++++++++++----------------------- gcc/ipa-fnsummary.h | 17 ++++++----- 2 files changed, 44 insertions(+), 44 deletions(-) diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c index 0393f2cad11..f2781d041b9 100644 --- a/gcc/ipa-fnsummary.c +++ b/gcc/ipa-fnsummary.c @@ -168,8 +168,7 @@ ipa_fn_summary::account_size_time (int size, sreal time, bool found = false; int i; predicate nonconst_pred; - vec *table = call - ? call_size_time_table : size_time_table; + vec *table = call ? &call_size_time_table : &size_time_table; if (exec_pred == false) return; @@ -181,13 +180,13 @@ ipa_fn_summary::account_size_time (int size, sreal time, /* We need to create initial empty unconditional clause, but otherwise we don't need to account empty times and sizes. */ - if (!size && time == 0 && table) + if (!size && time == 0 && table->length ()) return; /* Only for calls we are unaccounting what we previously recorded. */ gcc_checking_assert (time >= 0 || call); - for (i = 0; vec_safe_iterate (table, i, &e); i++) + for (i = 0; table->iterate (i, &e); i++) if (e->exec_predicate == exec_pred && e->nonconst_predicate == nonconst_pred) { @@ -227,9 +226,9 @@ ipa_fn_summary::account_size_time (int size, sreal time, new_entry.exec_predicate = exec_pred; new_entry.nonconst_predicate = nonconst_pred; if (call) - vec_safe_push (call_size_time_table, new_entry); + call_size_time_table.safe_push (new_entry); else - vec_safe_push (size_time_table, new_entry); + size_time_table.safe_push (new_entry); } else { @@ -753,8 +752,7 @@ ipa_fn_summary::~ipa_fn_summary () for (unsigned i = 0; i < len; i++) edge_predicate_pool.remove ((*loop_strides)[i].predicate); vec_free (conds); - vec_free (size_time_table); - vec_free (call_size_time_table); + call_size_time_table.release (); vec_free (loop_iterations); vec_free (loop_strides); builtin_constant_p_parms.release (); @@ -804,10 +802,10 @@ remap_freqcounting_preds_after_dup (vec *v, void ipa_fn_summary_t::duplicate (cgraph_node *src, cgraph_node *dst, - ipa_fn_summary *, + ipa_fn_summary *src_info, ipa_fn_summary *info) { - new (info) ipa_fn_summary (*ipa_fn_summaries->get (src)); + new (info) ipa_fn_summary (*src_info); /* TODO: as an optimization, we may avoid copying conditions that are known to be false or true. */ info->conds = vec_safe_copy (info->conds); @@ -817,7 +815,6 @@ ipa_fn_summary_t::duplicate (cgraph_node *src, out that something was optimized out. */ if (ipa_node_params_sum && cinfo && cinfo->tree_map) { - vec *entry = info->size_time_table; /* Use SRC parm info since it may not be copied yet. */ class ipa_node_params *parms_info = IPA_NODE_REF (src); ipa_auto_call_arg_values avals; @@ -830,7 +827,7 @@ ipa_fn_summary_t::duplicate (cgraph_node *src, bool inlined_to_p = false; struct cgraph_edge *edge, *next; - info->size_time_table = 0; + info->size_time_table.release (); avals.m_known_vals.safe_grow_cleared (count, true); for (i = 0; i < count; i++) { @@ -859,7 +856,7 @@ ipa_fn_summary_t::duplicate (cgraph_node *src, to be false. TODO: as on optimization, we can also eliminate conditions known to be true. */ - for (i = 0; vec_safe_iterate (entry, i, &e); i++) + for (i = 0; src_info->size_time_table.iterate (i, &e); i++) { predicate new_exec_pred; predicate new_nonconst_pred; @@ -935,8 +932,8 @@ ipa_fn_summary_t::duplicate (cgraph_node *src, } else { - info->size_time_table = vec_safe_copy (info->size_time_table); - info->loop_iterations = vec_safe_copy (info->loop_iterations); + info->size_time_table = src_info->size_time_table.copy (); + info->loop_iterations = vec_safe_copy (src_info->loop_iterations); info->loop_strides = vec_safe_copy (info->loop_strides); info->builtin_constant_p_parms @@ -1105,7 +1102,7 @@ ipa_dump_fn_summary (FILE *f, struct cgraph_node *node) fprintf (f, " estimated growth:%i\n", (int) s->growth); if (s->scc_no) fprintf (f, " In SCC: %i\n", (int) s->scc_no); - for (i = 0; vec_safe_iterate (s->size_time_table, i, &e); i++) + for (i = 0; s->size_time_table.iterate (i, &e); i++) { fprintf (f, " size:%f, time:%f", (double) e->size / ipa_fn_summary::size_scale, @@ -1844,7 +1841,7 @@ set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi, } p_seg = add_condition (summary, params_summary, index, - param_type, &aggpos, GT_EXPR, + param_type, &aggpos, GT_EXPR, max, param_ops); } } @@ -2595,8 +2592,8 @@ analyze_function_body (struct cgraph_node *node, bool early) memset(&fbi, 0, sizeof(fbi)); vec_free (info->conds); info->conds = NULL; - vec_free (info->size_time_table); - info->size_time_table = NULL; + info->size_time_table.release (); + info->call_size_time_table.release (); /* When optimizing and analyzing for IPA inliner, initialize loop optimizer so we can produce proper inline hints. @@ -3374,7 +3371,7 @@ estimate_calls_size_and_time (struct cgraph_node *node, int *size, if (use_table) { /* Build summary if it is absent. */ - if (!sum->call_size_time_table) + if (!sum->call_size_time_table.length ()) { predicate true_pred = true; sum->account_size_time (0, 0, true_pred, true_pred, true); @@ -3385,13 +3382,13 @@ estimate_calls_size_and_time (struct cgraph_node *node, int *size, sreal old_time = time ? *time : 0; if (min_size) - *min_size += (*sum->call_size_time_table)[0].size; + *min_size += sum->call_size_time_table[0].size; unsigned int i; size_time_entry *e; /* Walk the table and account sizes and times. */ - for (i = 0; vec_safe_iterate (sum->call_size_time_table, i, &e); + for (i = 0; sum->call_size_time_table.iterate (i, &e); i++) if (e->exec_predicate.evaluate (possible_truths)) { @@ -3404,7 +3401,7 @@ estimate_calls_size_and_time (struct cgraph_node *node, int *size, if ((flag_checking || dump_file) /* Do not try to sanity check when we know we lost some precision. */ - && sum->call_size_time_table->length () + && sum->call_size_time_table.length () < ipa_fn_summary::max_size_time_table_size) { estimate_calls_size_and_time_1 (node, &old_size, NULL, &old_time, NULL, @@ -3694,8 +3691,8 @@ ipa_call_context::estimate_size_and_time (ipa_call_estimates *estimates, sreal nonspecialized_time = time; - min_size += (*info->size_time_table)[0].size; - for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++) + min_size += info->size_time_table[0].size; + for (i = 0; info->size_time_table.iterate (i, &e); i++) { bool exec = e->exec_predicate.evaluate (m_nonspec_possible_truths); @@ -3741,8 +3738,8 @@ ipa_call_context::estimate_size_and_time (ipa_call_estimates *estimates, gcc_checking_assert (time >= 0); } } - gcc_checking_assert ((*info->size_time_table)[0].exec_predicate == true); - gcc_checking_assert ((*info->size_time_table)[0].nonconst_predicate == true); + gcc_checking_assert (info->size_time_table[0].exec_predicate == true); + gcc_checking_assert (info->size_time_table[0].nonconst_predicate == true); gcc_checking_assert (min_size >= 0); gcc_checking_assert (size >= 0); gcc_checking_assert (time >= 0); @@ -4099,7 +4096,7 @@ ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge) add_builtin_constant_p_parm (info, operand_map[ip]); } sreal freq = edge->sreal_frequency (); - for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++) + for (i = 0; callee_info->size_time_table.iterate (i, &e); i++) { predicate p; p = e->exec_predicate.remap_after_inlining @@ -4146,7 +4143,7 @@ ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge) info->estimated_stack_size = peak; inline_update_callee_summaries (edge->callee, es->loop_depth); - if (info->call_size_time_table) + if (info->call_size_time_table.length ()) { int edge_size = 0; sreal edge_time = 0; @@ -4181,14 +4178,14 @@ ipa_update_overall_fn_summary (struct cgraph_node *node, bool reset) size_info->size = 0; info->time = 0; - for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++) + for (i = 0; info->size_time_table.iterate (i, &e); i++) { size_info->size += e->size; info->time += e->time; } - info->min_size = (*info->size_time_table)[0].size; + info->min_size = info->size_time_table[0].size; if (reset) - vec_free (info->call_size_time_table); + info->call_size_time_table.release (); if (node->callees || node->indirect_calls) estimate_calls_size_and_time (node, &size_info->size, &info->min_size, &info->time, NULL, @@ -4452,9 +4449,9 @@ inline_read_section (struct lto_file_decl_data *file_data, const char *data, info->conds->quick_push (c); } count2 = streamer_read_uhwi (&ib); - gcc_assert (!info || !info->size_time_table); + gcc_assert (!info || !info->size_time_table.length ()); if (info && count2) - vec_safe_reserve_exact (info->size_time_table, count2); + info->size_time_table.reserve_exact (count2); for (j = 0; j < count2; j++) { class size_time_entry e; @@ -4465,7 +4462,7 @@ inline_read_section (struct lto_file_decl_data *file_data, const char *data, e.nonconst_predicate.stream_in (&ib); if (info) - info->size_time_table->quick_push (e); + info->size_time_table.quick_push (e); } count2 = streamer_read_uhwi (&ib); @@ -4658,8 +4655,8 @@ ipa_fn_summary_write (void) } } } - streamer_write_uhwi (ob, vec_safe_length (info->size_time_table)); - for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++) + streamer_write_uhwi (ob, info->size_time_table.length ()); + for (i = 0; info->size_time_table.iterate (i, &e); i++) { streamer_write_uhwi (ob, e->size); e->time.stream_out (ob); diff --git a/gcc/ipa-fnsummary.h b/gcc/ipa-fnsummary.h index 3ecedb5125f..66984a96482 100644 --- a/gcc/ipa-fnsummary.h +++ b/gcc/ipa-fnsummary.h @@ -72,7 +72,7 @@ struct agg_position_info /* Representation of function body size and time depending on the call context. We keep simple array of record, every containing of predicate and time/size to account. */ -class GTY(()) size_time_entry +class size_time_entry { public: /* Predicate for code to be executed. */ @@ -82,7 +82,7 @@ public: the executed code paths will simplify. */ predicate nonconst_predicate; int size; - sreal GTY((skip)) time; + sreal time; }; /* Summary about function and stack frame sizes. We keep this info @@ -129,7 +129,7 @@ public: fp_expressions (false), estimated_stack_size (false), time (0), conds (NULL), - size_time_table (NULL), call_size_time_table (NULL), + size_time_table (), call_size_time_table (vNULL), loop_iterations (NULL), loop_strides (NULL), builtin_constant_p_parms (vNULL), growth (0), scc_no (0) @@ -142,8 +142,8 @@ public: inlinable (s.inlinable), single_caller (s.single_caller), fp_expressions (s.fp_expressions), estimated_stack_size (s.estimated_stack_size), - time (s.time), conds (s.conds), size_time_table (s.size_time_table), - call_size_time_table (NULL), + time (s.time), conds (s.conds), size_time_table (), + call_size_time_table (vNULL), loop_iterations (s.loop_iterations), loop_strides (s.loop_strides), builtin_constant_p_parms (s.builtin_constant_p_parms), growth (s.growth), scc_no (s.scc_no) @@ -181,8 +181,11 @@ public: accounted in call_size_time_table. This is because calls are often adjusted by IPA optimizations and thus this summary is generated from call summary information when needed. */ - vec *size_time_table; - vec *call_size_time_table; + auto_vec GTY((skip)) size_time_table; + /* Unlike size_time_table that is initialized for all summaries + call_size_time_table is allocated only for functions with + many calls. Use effecient vl_ptr storage. */ + vec GTY((skip)) call_size_time_table; /* Predicates on when some loops in the function can have known bounds. */ vec *loop_iterations; -- 2.30.2