From b49d29d73ac1e25e1ec7c5279d7493f9be6961bb Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Thu, 28 Nov 2019 15:44:08 +0100 Subject: [PATCH] Fix profile adjusments while cloning This patch fixes profile updates while cloning. When new clone is produced its global profile is subtracted from the original function. If the original function profile drops to 0 we want to switch from global profiles to global0 profiles which is implemented by combine_with_ipa_count_within. However this is done on all edges independnetly and it may happen that we end up combining global and globa0 profiles in one functions which is not a good idea. This implements profile_count::combine_with_ipa_count_within which is able to take into account that the counter is inside function with a given count. * profile-count.h (profile_count::combine_with_ipa_count_within): Declare. * profile-count.c (profile_count::combine_with_ipa_count_within): New. * cgraphclones.c (cgraph_edge::clone, cgraph_node::create_clone): Use it. From-SVN: r278810 --- gcc/ChangeLog | 9 +++++ gcc/cgraphclones.c | 21 ++++++++++-- gcc/profile-count.c | 84 +++++++++++++++++++++++++++++++++++++++++++++ gcc/profile-count.h | 35 ++++++++++++++++--- 4 files changed, 141 insertions(+), 8 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index aecc351e0fa..064dd8ef83e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2019-11-28 Jan Hubicka + + * profile-count.h (profile_count::combine_with_ipa_count_within): + Declare. + * profile-count.c (profile_count::combine_with_ipa_count_within): + New. + * cgraphclones.c (cgraph_edge::clone, cgraph_node::create_clone): Use + it. + 2019-11-28 Jan Hubicka * ipa-utils.c (ipa_merge_profiles): Be sure that all type transtions diff --git a/gcc/cgraphclones.c b/gcc/cgraphclones.c index ac5c57a47aa..64ff1c0654b 100644 --- a/gcc/cgraphclones.c +++ b/gcc/cgraphclones.c @@ -80,6 +80,11 @@ along with GCC; see the file COPYING3. If not see #include "tree-inline.h" #include "dumpfile.h" #include "gimple-pretty-print.h" +#include "alloc-pool.h" +#include "symbol-summary.h" +#include "tree-vrp.h" +#include "ipa-prop.h" +#include "ipa-fnsummary.h" /* Create clone of edge in the node N represented by CALL_EXPR the callgraph. */ @@ -136,8 +141,9 @@ cgraph_edge::clone (cgraph_node *n, gcall *call_stmt, unsigned stmt_uid, /* Update IPA profile. Local profiles need no updating in original. */ if (update_original) - count = count.combine_with_ipa_count (count.ipa () - - new_edge->count.ipa ()); + count = count.combine_with_ipa_count_within (count.ipa () + - new_edge->count.ipa (), + caller->count); symtab->call_edge_duplication_hooks (this, new_edge); return new_edge; } @@ -268,6 +274,8 @@ cgraph_node::expand_all_artificial_thunks () thunk->thunk.thunk_p = false; thunk->analyze (); } + ipa_analyze_node (thunk); + inline_analyze_function (thunk); thunk->expand_all_artificial_thunks (); } else @@ -341,7 +349,14 @@ cgraph_node::create_clone (tree new_decl, profile_count prof_count, /* Update IPA profile. Local profiles need no updating in original. */ if (update_original) - count = count.combine_with_ipa_count (count.ipa () - prof_count.ipa ()); + { + if (inlined_to) + count = count.combine_with_ipa_count_within (count.ipa () + - prof_count.ipa (), + inlined_to->count); + else + count = count.combine_with_ipa_count (count.ipa () - prof_count.ipa ()); + } new_node->decl = new_decl; new_node->register_symbol (); new_node->origin = origin; diff --git a/gcc/profile-count.c b/gcc/profile-count.c index e8602a025f0..d0800b29d70 100644 --- a/gcc/profile-count.c +++ b/gcc/profile-count.c @@ -32,6 +32,7 @@ along with GCC; see the file COPYING3. If not see #include "cgraph.h" #include "wide-int.h" #include "sreal.h" +#include "selftest.h" /* Names from profile_quality enum values. */ @@ -291,6 +292,7 @@ profile_count::to_cgraph_frequency (profile_count entry_bb_count) const return 0; gcc_checking_assert (entry_bb_count.initialized_p ()); uint64_t scale; + gcc_checking_assert (compatible_p (entry_bb_count)); if (!safe_scale_64bit (!entry_bb_count.m_val ? m_val + 1 : m_val, CGRAPH_FREQ_BASE, MAX (1, entry_bb_count.m_val), &scale)) return CGRAPH_FREQ_MAX; @@ -328,6 +330,7 @@ profile_count::to_sreal_scale (profile_count in, bool *known) const return 0; if (m_val == in.m_val) return 1; + gcc_checking_assert (compatible_p (in)); if (!in.m_val) { @@ -373,6 +376,8 @@ profile_count::adjust_for_ipa_scaling (profile_count *num, profile_count profile_count::combine_with_ipa_count (profile_count ipa) { + if (!initialized_p ()) + return *this; ipa = ipa.ipa (); if (ipa.nonzero_p ()) return ipa; @@ -383,6 +388,23 @@ profile_count::combine_with_ipa_count (profile_count ipa) return this->global0adjusted (); } +/* Sae as profile_count::combine_with_ipa_count but within function with count + IPA2. */ +profile_count +profile_count::combine_with_ipa_count_within (profile_count ipa, + profile_count ipa2) +{ + profile_count ret; + if (!initialized_p ()) + return *this; + if (ipa2.ipa () == ipa2 && ipa.initialized_p ()) + ret = ipa; + else + ret = combine_with_ipa_count (ipa); + gcc_checking_assert (ret.compatible_p (ipa2)); + return ret; +} + /* The profiling runtime uses gcov_type, which is usually 64bit integer. Conversions back and forth are used to read the coverage and get it into internal representation. */ @@ -425,3 +447,65 @@ profile_probability::combine_with_count (profile_count count1, else return *this * even () + other * even (); } + +#if CHECKING_P +namespace selftest { + +/* Verify non-trivial type conversions for IPA scaling. This happens often + during inlining. */ + +static void +profile_count_verify_ipa_scaling (void) +{ + profile_count cnt1 = profile_count::from_gcov_type (4).global0 (); + profile_count cnt2 = profile_count::from_gcov_type (2); + profile_count cnt3 = profile_count::from_gcov_type (8); + profile_count cnt4 = cnt3.apply_scale (cnt1, cnt2); + + /* Result should be 16 with GUESSED_GLOBAL0. */ + ASSERT_EQ (cnt4.ipa (), profile_count::zero ()); + ASSERT_EQ (cnt4.to_gcov_type (), 16); + + cnt1 = profile_count::from_gcov_type (4).global0adjusted (); + cnt4 = cnt3.apply_scale (cnt1, cnt2); + /* Result should be 16 with GUESSED_GLOBAL0_ADJUSTED. */ + ASSERT_EQ (cnt4.ipa (), profile_count::adjusted_zero ()); + ASSERT_EQ (cnt4.to_gcov_type (), 16); +} + +/* Verify non-trivial cases of sreal scale calculations. */ + +static void +profile_count_verify_to_sreal_scale (void) +{ + profile_count cnt1 = profile_count::from_gcov_type (4).global0 (); + profile_count cnt2 = profile_count::from_gcov_type (8); + + /* If count is globally 0 it should have 0 scale in non-zero global count. */ + ASSERT_EQ (cnt1.to_sreal_scale (cnt2), 0); +} + +/* Verify non-trivial cases of probability_in calculations. */ + +static void +profile_count_verify_probability_in (void) +{ + /*profile_count cnt1 = profile_count::from_gcov_type (4).global0 (); + profile_count cnt2 = profile_count::from_gcov_type (8);*/ + + /* If count is globally 0 it should have 0 probability in non-zero global + count. */ + /*ASSERT_EQ (cnt1.probability_in (cnt2), profile_probability::never ());*/ +} + +/* Run all of the selftests within this file. */ + +void profile_count_c_tests (void) +{ + profile_count_verify_ipa_scaling (); + profile_count_verify_to_sreal_scale (); + profile_count_verify_probability_in (); +} + +} +#endif diff --git a/gcc/profile-count.h b/gcc/profile-count.h index 3d6e388f7fe..9af8d606bc4 100644 --- a/gcc/profile-count.h +++ b/gcc/profile-count.h @@ -700,6 +700,7 @@ private: uint64_t UINT64_BIT_FIELD_ALIGN m_val : n_bits; #undef UINT64_BIT_FIELD_ALIGN enum profile_quality m_quality : 3; +public: /* Return true if both values can meaningfully appear in single function body. We have either all counters in function local or global, otherwise @@ -711,9 +712,18 @@ private: if (*this == zero () || other == zero ()) return true; + /* Do not allow nonzero global profile together with local guesses + that are globally0. */ + if (ipa ().nonzero_p () + && !(other.ipa () == other)) + return false; + if (other.ipa ().nonzero_p () + && !(ipa () == *this)) + return false; + return ipa_p () == other.ipa_p (); } -public: + /* Used for counters which are expected to be never executed. */ static profile_count zero () { @@ -992,6 +1002,14 @@ public: profile_count max (profile_count other) const { + profile_count val = *this; + + /* Always prefer nonzero IPA counts over local counts. */ + if (ipa ().nonzero_p () || other.ipa ().nonzero_p ()) + { + val = ipa (); + other = other.ipa (); + } if (!initialized_p ()) return other; if (!other.initialized_p ()) @@ -1001,8 +1019,8 @@ public: if (other == zero ()) return *this; gcc_checking_assert (compatible_p (other)); - if (m_val < other.m_val || (m_val == other.m_val - && m_quality < other.m_quality)) + if (val.m_val < other.m_val || (m_val == other.m_val + && val.m_quality < other.m_quality)) return other; return *this; } @@ -1061,6 +1079,7 @@ public: { if (*this == zero ()) return *this; + if (num == zero ()) return num; if (!initialized_p () || !num.initialized_p () || !den.initialized_p ()) @@ -1075,7 +1094,9 @@ public: ret.m_val = MIN (val, max_count); ret.m_quality = MIN (MIN (MIN (m_quality, ADJUSTED), num.m_quality), den.m_quality); - if (num.ipa_p () && !ret.ipa_p ()) + /* Be sure that ret is not local or global0 type + if num is global. */ + if (num.ipa_p () && (!ret.ipa_p () || !(ret.ipa () == ret))) ret.m_quality = MIN (num.m_quality, GUESSED); return ret; } @@ -1153,8 +1174,8 @@ public: if (*this == overall && m_quality == PRECISE) return profile_probability::always (); profile_probability ret; - gcc_checking_assert (compatible_p (overall)); + gcc_checking_assert (compatible_p (overall)); if (overall.m_val < m_val) { ret.m_val = profile_probability::max_probability; @@ -1194,6 +1215,10 @@ public: global0. */ profile_count combine_with_ipa_count (profile_count ipa); + /* Same as combine_with_ipa_count but inside function with count IPA2. */ + profile_count combine_with_ipa_count_within + (profile_count ipa, profile_count ipa2); + /* The profiling runtime uses gcov_type, which is usually 64bit integer. Conversions back and forth are used to read the coverage and get it into internal representation. */ -- 2.30.2