From 517048cef0edf7ec35d7091ef4fea232edd48a53 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Sat, 18 Nov 2017 23:55:56 +0100 Subject: [PATCH] cgraphclones.c (cgraph_edge::clone): Rename gcov_count to prof_count. * cgraphclones.c (cgraph_edge::clone): Rename gcov_count to prof_count. (cgraph_edge::clone): Cleanup updating of profile. * ipa-cp.c (update_profiling_info): Likewise. * ipa-inline-transform.c (inline_transform): Likewise. * ipa-inline.c (inline_small_functions): Add missing space to dump. * ipa-split.c (execute_split_functions): Do not split when function is cold. * predict.c (estimate_bb_frequencies): Cleanup updating of profile. * profile-count.c (profile_count::dump): Add global0. (profile_count::to_cgraph_frequency): Do not ICE when entry is undefined. (profile_count::to_sreal_scale): Likewise. (profile_count::adjust_for_ipa_scaling): Fix typo in comment. (profile_count::combine_with_ipa_count): New function. * profile-count.h (profile_guessed_global0adjusted): New. (profile_count::adjusted_zero): New. (profile_count::global0adjusted): New. (profile_count::combine_with_ipa_count): New. * tree-inline.c (copy_edges_for_bb): Add NUM/DEN arugment; correct profile of return block of split functions. (copy_cfg_body): Remove unused profile_count. (copy_body): Likewise. (expand_call_inline): Update. (tree_function_versioning): Update. From-SVN: r254919 --- gcc/ChangeLog | 27 ++++++++++++++++++ gcc/cgraphclones.c | 33 +++++++--------------- gcc/ipa-cp.c | 15 ++-------- gcc/ipa-inline-transform.c | 12 ++++---- gcc/ipa-inline.c | 5 ++-- gcc/ipa-split.c | 6 ++++ gcc/predict.c | 10 ++----- gcc/profile-count.c | 27 +++++++++++++++--- gcc/profile-count.h | 41 ++++++++++++++++++++++++---- gcc/tree-inline.c | 56 +++++++++++++++++++------------------- 10 files changed, 144 insertions(+), 88 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 2c3f733b6bb..60689054d05 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,30 @@ +2017-11-18 Jan Hubicka + + * cgraphclones.c (cgraph_edge::clone): Rename gcov_count to prof_count. + (cgraph_edge::clone): Cleanup updating of profile. + * ipa-cp.c (update_profiling_info): Likewise. + * ipa-inline-transform.c (inline_transform): Likewise. + * ipa-inline.c (inline_small_functions): Add missing space to dump. + * ipa-split.c (execute_split_functions): Do not split when function + is cold. + * predict.c (estimate_bb_frequencies): Cleanup updating of profile. + * profile-count.c (profile_count::dump): Add global0. + (profile_count::to_cgraph_frequency): Do not ICE when entry is + undefined. + (profile_count::to_sreal_scale): Likewise. + (profile_count::adjust_for_ipa_scaling): Fix typo in comment. + (profile_count::combine_with_ipa_count): New function. + * profile-count.h (profile_guessed_global0adjusted): New. + (profile_count::adjusted_zero): New. + (profile_count::global0adjusted): New. + (profile_count::combine_with_ipa_count): New. + * tree-inline.c (copy_edges_for_bb): Add NUM/DEN arugment; + correct profile of return block of split functions. + (copy_cfg_body): Remove unused profile_count. + (copy_body): Likewise. + (expand_call_inline): Update. + (tree_function_versioning): Update. + 2017-11-18 Aldy Hernandez * hash-set.h (hash_set::empty): New. diff --git a/gcc/cgraphclones.c b/gcc/cgraphclones.c index a575a34b0c6..ef39d85e88c 100644 --- a/gcc/cgraphclones.c +++ b/gcc/cgraphclones.c @@ -91,7 +91,7 @@ cgraph_edge::clone (cgraph_node *n, gcall *call_stmt, unsigned stmt_uid, { cgraph_edge *new_edge; profile_count::adjust_for_ipa_scaling (&num, &den); - profile_count gcov_count = count.apply_scale (num, den); + profile_count prof_count = count.apply_scale (num, den); if (indirect_unknown_callee) { @@ -104,19 +104,19 @@ cgraph_edge::clone (cgraph_node *n, gcall *call_stmt, unsigned stmt_uid, { cgraph_node *callee = cgraph_node::get (decl); gcc_checking_assert (callee); - new_edge = n->create_edge (callee, call_stmt, gcov_count); + new_edge = n->create_edge (callee, call_stmt, prof_count); } else { new_edge = n->create_indirect_edge (call_stmt, indirect_info->ecf_flags, - gcov_count, false); + prof_count, false); *new_edge->indirect_info = *indirect_info; } } else { - new_edge = n->create_edge (callee, call_stmt, gcov_count); + new_edge = n->create_edge (callee, call_stmt, prof_count); if (indirect_info) { new_edge->indirect_info @@ -135,12 +135,9 @@ cgraph_edge::clone (cgraph_node *n, gcall *call_stmt, unsigned stmt_uid, new_edge->in_polymorphic_cdtor = in_polymorphic_cdtor; /* Update IPA profile. Local profiles need no updating in original. */ - if (update_original - && count.ipa () == count && new_edge->count.ipa () == new_edge->count) - count -= new_edge->count; - else if (caller->count.global0 () == caller->count - && !(count == profile_count::zero ())) - count = count.global0 (); + if (update_original) + count = count.combine_with_ipa_count (count.ipa () + - new_edge->count.ipa ()); symtab->call_edge_duplication_hooks (this, new_edge); return new_edge; } @@ -431,22 +428,12 @@ cgraph_node::create_clone (tree new_decl, profile_count prof_count, if (new_inlined_to) dump_callgraph_transformation (this, new_inlined_to, "inlining to"); - if (prof_count == profile_count::zero () - && !(count == profile_count::zero ())) - prof_count = count.global0 (); - + prof_count = count.combine_with_ipa_count (prof_count); new_node->count = prof_count; /* Update IPA profile. Local profiles need no updating in original. */ - if (update_original && !(count == profile_count::zero ()) - && count.ipa () == count && prof_count.ipa () == prof_count) - { - if (count.nonzero_p () - && !(count - prof_count).nonzero_p ()) - count = count.global0 (); - else - count -= prof_count; - } + if (update_original) + count = count.combine_with_ipa_count (count.ipa () - prof_count.ipa ()); new_node->decl = new_decl; new_node->register_symbol (); new_node->origin = origin; diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c index 4421abf0b33..05228d0582d 100644 --- a/gcc/ipa-cp.c +++ b/gcc/ipa-cp.c @@ -3695,18 +3695,9 @@ update_profiling_info (struct cgraph_node *orig_node, } } - if (!new_sum.nonzero_p ()) - { - new_sum = new_sum.global0 (); - new_node->count = new_sum; - remainder = orig_node->count; - } - else - { - remainder = orig_node_count - new_sum; - if (!remainder.nonzero_p ()) - remainder = orig_node_count.global0 (); - } + remainder = orig_node_count.combine_with_ipa_count (orig_node_count.ipa () + - new_sum.ipa ()); + new_sum = orig_node_count.combine_with_ipa_count (new_sum); orig_node->count = remainder; for (cs = new_node->callees; cs; cs = cs->next_callee) diff --git a/gcc/ipa-inline-transform.c b/gcc/ipa-inline-transform.c index 8e66483016a..0ed3fbd2525 100644 --- a/gcc/ipa-inline-transform.c +++ b/gcc/ipa-inline-transform.c @@ -657,11 +657,10 @@ inline_transform (struct cgraph_node *node) { profile_count num = node->count; profile_count den = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; - bool scale = num.initialized_p () && den.ipa_p () - && (den.nonzero_p () || num == profile_count::zero ()) - && !(num == den.ipa ()); + bool scale = num.initialized_p () && !(num == den); if (scale) { + profile_count::adjust_for_ipa_scaling (&num, &den); if (dump_file) { fprintf (dump_file, "Applying count scale "); @@ -672,11 +671,12 @@ inline_transform (struct cgraph_node *node) } basic_block bb; + cfun->cfg->count_max = profile_count::uninitialized (); FOR_ALL_BB_FN (bb, cfun) - if (num == profile_count::zero ()) - bb->count = bb->count.global0 (); - else + { bb->count = bb->count.apply_scale (num, den); + cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count); + } ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = node->count; } todo = optimize_inline_calls (current_function_decl); diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c index 8d9ecb26d23..f4ce80ae2c6 100644 --- a/gcc/ipa-inline.c +++ b/gcc/ipa-inline.c @@ -1873,7 +1873,8 @@ inline_small_functions (void) /* Be sure that caches are maintained consistent. This check is affected by scaling roundoff errors when compiling for IPA this we skip it in that case. */ - if (!edge->callee->count.ipa_p ()) + if (!edge->callee->count.ipa_p () + && (!max_count.initialized_p () || !max_count.nonzero_p ())) { sreal cached_badness = edge_badness (edge, false); @@ -1951,7 +1952,7 @@ inline_small_functions (void) { fprintf (dump_file, " Called "); edge->count.ipa ().dump (dump_file); - fprintf (dump_file, "times\n"); + fprintf (dump_file, " times\n"); } if (dump_flags & TDF_DETAILS) edge_badness (edge, true); diff --git a/gcc/ipa-split.c b/gcc/ipa-split.c index a5b4c41b32b..7ebd51554e5 100644 --- a/gcc/ipa-split.c +++ b/gcc/ipa-split.c @@ -1752,6 +1752,12 @@ execute_split_functions (void) fprintf (dump_file, "Not splitting: main function.\n"); return 0; } + if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED) + { + if (dump_file) + fprintf (dump_file, "Not splitting: function is unlikely executed.\n"); + return 0; + } /* This can be relaxed; function might become inlinable after splitting away the uninlinable part. */ if (ipa_fn_summaries diff --git a/gcc/predict.c b/gcc/predict.c index 7e40f7773f1..0f34956f699 100644 --- a/gcc/predict.c +++ b/gcc/predict.c @@ -3607,9 +3607,6 @@ estimate_bb_frequencies (bool force) to outermost to examine frequencies for back edges. */ estimate_loops (); - bool global0 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p () - && ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa_p (); - freq_max = 0; FOR_EACH_BB_FN (bb, cfun) if (freq_max < BLOCK_INFO (bb)->frequency) @@ -3618,6 +3615,7 @@ estimate_bb_frequencies (bool force) freq_max = real_bb_freq_max / freq_max; if (freq_max < 16) freq_max = 16; + profile_count ipa_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa (); cfun->cfg->count_max = profile_count::uninitialized (); FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) { @@ -3626,10 +3624,8 @@ estimate_bb_frequencies (bool force) /* If we have profile feedback in which this function was never executed, then preserve this info. */ - if (global0) - bb->count = count.global0 (); - else if (!(bb->count == profile_count::zero ())) - bb->count = count.guessed_local (); + if (!(bb->count == profile_count::zero ())) + bb->count = count.guessed_local ().combine_with_ipa_count (ipa_count); cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count); } diff --git a/gcc/profile-count.c b/gcc/profile-count.c index 51c3b74fefa..e5d9faefc33 100644 --- a/gcc/profile-count.c +++ b/gcc/profile-count.c @@ -47,6 +47,8 @@ profile_count::dump (FILE *f) const fprintf (f, " (estimated locally)"); else if (m_quality == profile_guessed_global0) fprintf (f, " (estimated locally, globally 0)"); + else if (m_quality == profile_guessed_global0adjusted) + fprintf (f, " (estimated locally, globally 0 adjusted)"); else if (m_quality == profile_adjusted) fprintf (f, " (adjusted)"); else if (m_quality == profile_afdo) @@ -245,7 +247,7 @@ profile_count::to_frequency (struct function *fun) const int profile_count::to_cgraph_frequency (profile_count entry_bb_count) const { - if (!initialized_p ()) + if (!initialized_p () || !entry_bb_count.initialized_p ()) return CGRAPH_FREQ_BASE; if (*this == profile_count::zero ()) return 0; @@ -262,7 +264,7 @@ profile_count::to_cgraph_frequency (profile_count entry_bb_count) const sreal profile_count::to_sreal_scale (profile_count in, bool *known) const { - if (!initialized_p ()) + if (!initialized_p () || !in.initialized_p ()) { if (known) *known = false; @@ -272,7 +274,6 @@ profile_count::to_sreal_scale (profile_count in, bool *known) const *known = true; if (*this == profile_count::zero ()) return 0; - gcc_checking_assert (in.initialized_p ()); if (!in.m_val) { @@ -297,7 +298,7 @@ profile_count::adjust_for_ipa_scaling (profile_count *num, /* Scaling is no-op if NUM and DEN are the same. */ if (*num == *den) return; - /* Scaling to zero is always zeor. */ + /* Scaling to zero is always zero. */ if (*num == profile_count::zero ()) return; /* If den is non-zero we are safe. */ @@ -308,3 +309,21 @@ profile_count::adjust_for_ipa_scaling (profile_count *num, *den = den->force_nonzero (); *num = num->force_nonzero (); } + +/* THIS is a count of bb which is known to be executed IPA times. + Combine this information into bb counter. This means returning IPA + if it is nonzero, not changing anything if IPA is uninitialized + and if IPA is zero, turning THIS into corresponding local profile with + global0. */ +profile_count +profile_count::combine_with_ipa_count (profile_count ipa) +{ + ipa = ipa.ipa (); + if (ipa.nonzero_p ()) + return ipa; + if (!ipa.initialized_p ()) + return *this; + if (ipa == profile_count::zero ()) + return this->global0 (); + return this->global0adjusted (); +} diff --git a/gcc/profile-count.h b/gcc/profile-count.h index 66a217d39fd..2ffa33f639c 100644 --- a/gcc/profile-count.h +++ b/gcc/profile-count.h @@ -36,22 +36,24 @@ enum profile_quality { Never used by probabilities. */ profile_guessed_global0 = 1, + /* Same as profile_guessed_global0 but global count is adjusted 0. */ + profile_guessed_global0adjusted = 2, /* Profile is based on static branch prediction heuristics. It may or may not reflect the reality but it can be compared interprocedurally (for example, we inlined function w/o profile feedback into function with feedback and propagated from that). Never used by probablities. */ - profile_guessed = 2, + profile_guessed = 3, /* Profile was determined by autofdo. */ - profile_afdo = 3, + profile_afdo = 4, /* Profile was originally based on feedback but it was adjusted by code duplicating optimization. It may not precisely reflect the particular code path. */ - profile_adjusted = 4, + profile_adjusted = 5, /* Profile was read from profile feedback or determined by accurate static method. */ - profile_precise = 5 + profile_precise = 7 }; /* The base value for branch probability notes and edge probabilities. */ @@ -637,6 +639,13 @@ public: { return from_gcov_type (0); } + static profile_count adjusted_zero () + { + profile_count c; + c.m_val = 0; + c.m_quality = profile_adjusted; + return c; + } static profile_count guessed_zero () { profile_count c; @@ -978,7 +987,7 @@ public: return ret; } - /* We know that profile is globally0 but keep local profile if present. */ + /* We know that profile is globally 0 but keep local profile if present. */ profile_count global0 () const { profile_count ret = *this; @@ -988,6 +997,17 @@ public: return ret; } + /* We know that profile is globally adjusted 0 but keep local profile + if present. */ + profile_count global0adjusted () const + { + profile_count ret = *this; + if (!initialized_p ()) + return *this; + ret.m_quality = profile_guessed_global0adjusted; + return ret; + } + /* Return THIS with quality dropped to GUESSED. */ profile_count guessed () const { @@ -1000,10 +1020,12 @@ public: acorss functions. */ profile_count ipa () const { - if (m_quality > profile_guessed_global0) + if (m_quality > profile_guessed_global0adjusted) return *this; if (m_quality == profile_guessed_global0) return profile_count::zero (); + if (m_quality == profile_guessed_global0adjusted) + return profile_count::adjusted_zero (); return profile_count::uninitialized (); } @@ -1054,6 +1076,13 @@ public: kinds. */ static void adjust_for_ipa_scaling (profile_count *num, profile_count *den); + /* THIS is a count of bb which is known to be executed IPA times. + Combine this information into bb counter. This means returning IPA + if it is nonzero, not changing anything if IPA is uninitialized + and if IPA is zero, turning THIS into corresponding local profile with + global0. */ + profile_count combine_with_ipa_count (profile_count ipa); + /* LTO streaming support. */ static profile_count stream_in (struct lto_input_block *); void stream_out (struct output_block *); diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c index d4aa5bed739..6af2079b837 100644 --- a/gcc/tree-inline.c +++ b/gcc/tree-inline.c @@ -2173,7 +2173,7 @@ update_ssa_across_abnormal_edges (basic_block bb, basic_block ret_bb, debug stmts are left after a statement that must end the basic block. */ static bool -copy_edges_for_bb (basic_block bb, +copy_edges_for_bb (basic_block bb, profile_count num, profile_count den, basic_block ret_bb, basic_block abnormal_goto_dest) { basic_block new_bb = (basic_block) bb->aux; @@ -2204,6 +2204,14 @@ copy_edges_for_bb (basic_block bb, if (bb->index == ENTRY_BLOCK || bb->index == EXIT_BLOCK) return false; + /* When doing function splitting, we must decreate count of the return block + which was previously reachable by block we did not copy. */ + if (single_succ_p (bb) && single_succ_edge (bb)->dest->index == EXIT_BLOCK) + FOR_EACH_EDGE (old_edge, ei, bb->preds) + if (old_edge->src->index != ENTRY_BLOCK + && !old_edge->src->aux) + new_bb->count -= old_edge->count ().apply_scale (num, den); + for (si = gsi_start_bb (new_bb); !gsi_end_p (si);) { gimple *copy_stmt; @@ -2465,23 +2473,16 @@ initialize_cfun (tree new_fndecl, tree callee_fndecl, profile_count count) profile_status_for_fn (cfun) = profile_status_for_fn (src_cfun); - if (ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count.initialized_p () - && count.ipa ().initialized_p ()) - { - ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = - ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count.apply_scale (count, - ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count); - EXIT_BLOCK_PTR_FOR_FN (cfun)->count = - EXIT_BLOCK_PTR_FOR_FN (src_cfun)->count.apply_scale (count, - ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count); - } - else - { - ENTRY_BLOCK_PTR_FOR_FN (cfun)->count - = ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count; - EXIT_BLOCK_PTR_FOR_FN (cfun)->count - = EXIT_BLOCK_PTR_FOR_FN (src_cfun)->count; - } + profile_count num = count; + profile_count den = ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count; + profile_count::adjust_for_ipa_scaling (&num, &den); + + ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = + ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count.apply_scale (count, + ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count); + EXIT_BLOCK_PTR_FOR_FN (cfun)->count = + EXIT_BLOCK_PTR_FOR_FN (src_cfun)->count.apply_scale (count, + ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count); if (src_cfun->eh) init_eh_for_function (); @@ -2642,7 +2643,7 @@ redirect_all_calls (copy_body_data * id, basic_block bb) another function. Walks FN via CFG, returns new fndecl. */ static tree -copy_cfg_body (copy_body_data * id, profile_count, +copy_cfg_body (copy_body_data * id, basic_block entry_block_map, basic_block exit_block_map, basic_block new_entry) { @@ -2723,12 +2724,13 @@ copy_cfg_body (copy_body_data * id, profile_count, FOR_ALL_BB_FN (bb, cfun_to_copy) if (!id->blocks_to_copy || (bb->index > 0 && bitmap_bit_p (id->blocks_to_copy, bb->index))) - need_debug_cleanup |= copy_edges_for_bb (bb, exit_block_map, + need_debug_cleanup |= copy_edges_for_bb (bb, num, den, exit_block_map, abnormal_goto_dest); if (new_entry) { - edge e = make_edge (entry_block_map, (basic_block)new_entry->aux, EDGE_FALLTHRU); + edge e = make_edge (entry_block_map, (basic_block)new_entry->aux, + EDGE_FALLTHRU); e->probability = profile_probability::always (); } @@ -2927,7 +2929,7 @@ copy_tree_body (copy_body_data *id) another function. */ static tree -copy_body (copy_body_data *id, profile_count count, +copy_body (copy_body_data *id, basic_block entry_block_map, basic_block exit_block_map, basic_block new_entry) { @@ -2936,7 +2938,7 @@ copy_body (copy_body_data *id, profile_count count, /* If this body has a CFG, walk CFG and copy. */ gcc_assert (ENTRY_BLOCK_PTR_FOR_FN (DECL_STRUCT_FUNCTION (fndecl))); - body = copy_cfg_body (id, count, entry_block_map, exit_block_map, + body = copy_cfg_body (id, entry_block_map, exit_block_map, new_entry); copy_debug_stmts (id); @@ -4684,8 +4686,7 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id) function in any way before this point, as this CALL_EXPR may be a self-referential call; if we're calling ourselves, we need to duplicate our body before altering anything. */ - copy_body (id, cg_edge->callee->count, - bb, return_block, NULL); + copy_body (id, bb, return_block, NULL); reset_debug_bindings (id, stmt_gsi); @@ -5857,7 +5858,7 @@ tree_function_versioning (tree old_decl, tree new_decl, DECL_RESULT (new_decl) = DECL_RESULT (old_decl); DECL_ARGUMENTS (new_decl) = DECL_ARGUMENTS (old_decl); initialize_cfun (new_decl, old_decl, - old_entry_block->count); + new_entry ? new_entry->count : old_entry_block->count); if (DECL_STRUCT_FUNCTION (new_decl)->gimple_df) DECL_STRUCT_FUNCTION (new_decl)->gimple_df->ipa_pta = id.src_cfun->gimple_df->ipa_pta; @@ -6004,8 +6005,7 @@ tree_function_versioning (tree old_decl, tree new_decl, } /* Copy the Function's body. */ - copy_body (&id, old_entry_block->count, - ENTRY_BLOCK_PTR_FOR_FN (cfun), EXIT_BLOCK_PTR_FOR_FN (cfun), + copy_body (&id, ENTRY_BLOCK_PTR_FOR_FN (cfun), EXIT_BLOCK_PTR_FOR_FN (cfun), new_entry); /* Renumber the lexical scoping (non-code) blocks consecutively. */ -- 2.30.2