From: Jan Hubicka Date: Wed, 21 Oct 2020 18:00:22 +0000 (+0200) Subject: Inline functions with builtin_constant_p more agressively. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=caaa218f912ccf932fdb79243ded68bb462bbe63;p=gcc.git Inline functions with builtin_constant_p more agressively. This patch implements heuristics that increases inline limits (by the hints mechanism) for inline functions that use builtin_constant_p on parameter. Those are very likely intended to be always inlined and simplify after inlining. The PR is about a function that we used to inline with --param inline-insns-single=200 but with new default of 70 for -O2 we no longer do so. Hints are currently configured to bump the bound up twice, so we get limit of 140 that is still not enough to inline the particular testcase but it should help in general. I can implement a stronger bump if that seems useful (maybe it is). The example is bit operation written as a decision chain with 64 conditions. This blows up the limit on number of conditions we track per funtion (which is 30) and thus the size/time estimates are not working that well. gcc/ChangeLog: PR ipa/97445 * ipa-fnsummary.c (ipa_dump_hints): Add INLINE_HINT_builtin_constant_p. (ipa_fn_summary::~ipa_fn_summary): Free builtin_constant_p_parms. (ipa_fn_summary_t::duplicate): Duplicate builtin_constant_p_parms. (ipa_dump_fn_summary): Dump builtin_constant_p_parms. (add_builtin_constant_p_parm): New function (set_cond_stmt_execution_predicate): Update builtin_constant_p_parms. (ipa_call_context::estimate_size_and_time): Set INLINE_HINT_builtin_constant_p.. (ipa_merge_fn_summary_after_inlining): Merge builtin_constant_p_parms. (inline_read_section): Read builtin_constant_p_parms. (ipa_fn_summary_write): Write builtin_constant_p_parms. * ipa-fnsummary.h (enum ipa_hints_vals): Add INLINE_HINT_builtin_constant_p. * ipa-inline.c (want_inline_small_function_p): Use INLINE_HINT_builtin_constant_p. (edge_badness): Use INLINE_HINT_builtin_constant_p. gcc/testsuite/ChangeLog: PR ipa/97445 * gcc.dg/ipa/inlinehint-5.c: New test. --- diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c index 9e3eda4d3cb..f680e4221e7 100644 --- a/gcc/ipa-fnsummary.c +++ b/gcc/ipa-fnsummary.c @@ -141,6 +141,11 @@ ipa_dump_hints (FILE *f, ipa_hints hints) hints &= ~INLINE_HINT_known_hot; fprintf (f, " known_hot"); } + if (hints & INLINE_HINT_builtin_constant_p) + { + hints &= ~INLINE_HINT_builtin_constant_p; + fprintf (f, " builtin_constant_p"); + } gcc_assert (!hints); } @@ -751,6 +756,7 @@ ipa_fn_summary::~ipa_fn_summary () vec_free (call_size_time_table); vec_free (loop_iterations); vec_free (loop_strides); + builtin_constant_p_parms.release (); } void @@ -899,7 +905,8 @@ ipa_fn_summary_t::duplicate (cgraph_node *src, new_predicate = es->predicate->remap_after_duplication (possible_truths); if (new_predicate == false && *es->predicate != false) - optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale; + optimized_out_size + += es->call_stmt_size * ipa_fn_summary::size_scale; edge_set_predicate (edge, &new_predicate); } info->loop_iterations @@ -908,6 +915,15 @@ ipa_fn_summary_t::duplicate (cgraph_node *src, info->loop_strides = remap_freqcounting_preds_after_dup (info->loop_strides, possible_truths); + if (info->builtin_constant_p_parms.length()) + { + vec parms = info->builtin_constant_p_parms; + int ip; + info->builtin_constant_p_parms = vNULL; + for (i = 0; parms.iterate (i, &ip); i++) + if (!avals.m_known_vals[ip]) + info->builtin_constant_p_parms.safe_push (ip); + } /* If inliner or someone after inliner will ever start producing non-trivial clones, we will get trouble with lack of information @@ -921,6 +937,9 @@ ipa_fn_summary_t::duplicate (cgraph_node *src, info->loop_iterations = vec_safe_copy (info->loop_iterations); info->loop_strides = vec_safe_copy (info->loop_strides); + info->builtin_constant_p_parms + = info->builtin_constant_p_parms.copy (); + ipa_freqcounting_predicate *f; for (int i = 0; vec_safe_iterate (info->loop_iterations, i, &f); i++) { @@ -1066,6 +1085,13 @@ ipa_dump_fn_summary (FILE *f, struct cgraph_node *node) fprintf (f, " inlinable"); if (s->fp_expressions) fprintf (f, " fp_expression"); + if (s->builtin_constant_p_parms.length ()) + { + fprintf (f, " builtin_constant_p_parms"); + for (unsigned int i = 0; + i < s->builtin_constant_p_parms.length (); i++) + fprintf (f, " %i", s->builtin_constant_p_parms[i]); + } fprintf (f, "\n global time: %f\n", s->time.to_double ()); fprintf (f, " self size: %i\n", ss->self_size); fprintf (f, " global size: %i\n", ss->size); @@ -1517,6 +1543,21 @@ fail: return false; } +/* Record to SUMMARY that PARM is used by builtin_constant_p. */ + +static void +add_builtin_constant_p_parm (class ipa_fn_summary *summary, int parm) +{ + int ip; + + /* Avoid duplicates. */ + for (unsigned int i = 0; + summary->builtin_constant_p_parms.iterate (i, &ip); i++) + if (ip == parm) + return; + summary->builtin_constant_p_parms.safe_push (parm); +} + /* If BB ends by a conditional we can turn into predicates, attach corresponding predicates to the CFG edges. */ @@ -1598,6 +1639,8 @@ set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi, op2 = gimple_call_arg (set_stmt, 0); if (!decompose_param_expr (fbi, set_stmt, op2, &index, ¶m_type, &aggpos)) return; + if (!aggpos.by_ref) + add_builtin_constant_p_parm (summary, index); FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE) { predicate p = add_condition (summary, params_summary, index, @@ -3717,6 +3760,9 @@ ipa_call_context::estimate_size_and_time (ipa_call_estimates *estimates, hints |= INLINE_HINT_in_scc; if (DECL_DECLARED_INLINE_P (m_node->decl)) hints |= INLINE_HINT_declared_inline; + if (info->builtin_constant_p_parms.length () + && DECL_DECLARED_INLINE_P (m_node->decl)) + hints |= INLINE_HINT_builtin_constant_p; ipa_freqcounting_predicate *fcp; for (i = 0; vec_safe_iterate (info->loop_iterations, i, &fcp); i++) @@ -4044,8 +4090,13 @@ ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge) operand_map[i] = map; gcc_assert (map < ipa_get_param_count (params_summary)); } + + int ip; + for (i = 0; callee_info->builtin_constant_p_parms.iterate (i, &ip); i++) + if (ip < count && operand_map[ip] >= 0) + add_builtin_constant_p_parm (info, operand_map[ip]); } - sreal freq = edge->sreal_frequency (); + sreal freq = edge->sreal_frequency (); for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++) { predicate p; @@ -4443,6 +4494,15 @@ inline_read_section (struct lto_file_decl_data *file_data, const char *data, vec_safe_push (info->loop_strides, fcp); } } + count2 = streamer_read_uhwi (&ib); + if (info && count2) + info->builtin_constant_p_parms.reserve_exact (count2); + for (j = 0; j < count2; j++) + { + int parm = streamer_read_uhwi (&ib); + if (info) + info->builtin_constant_p_parms.quick_push (parm); + } for (e = node->callees; e; e = e->next_callee) read_ipa_call_summary (&ib, e, info != NULL); for (e = node->indirect_calls; e; e = e->next_callee) @@ -4618,6 +4678,11 @@ ipa_fn_summary_write (void) fcp->predicate->stream_out (ob); fcp->freq.stream_out (ob); } + streamer_write_uhwi (ob, info->builtin_constant_p_parms.length ()); + int ip; + for (i = 0; info->builtin_constant_p_parms.iterate (i, &ip); + i++) + streamer_write_uhwi (ob, ip); for (edge = cnode->callees; edge; edge = edge->next_callee) write_ipa_call_summary (ob, edge); for (edge = cnode->indirect_calls; edge; edge = edge->next_callee) diff --git a/gcc/ipa-fnsummary.h b/gcc/ipa-fnsummary.h index f4dd5b85ab9..3ecedb5125f 100644 --- a/gcc/ipa-fnsummary.h +++ b/gcc/ipa-fnsummary.h @@ -49,7 +49,10 @@ enum ipa_hints_vals { Set by simple_edge_hints in ipa-inline-analysis.c. */ INLINE_HINT_cross_module = 64, /* We know that the callee is hot by profile. */ - INLINE_HINT_known_hot = 128 + INLINE_HINT_known_hot = 128, + /* There is builtin_constant_p dependent on parameter which is usually + a strong hint to inline. */ + INLINE_HINT_builtin_constant_p = 256 }; typedef int ipa_hints; @@ -123,10 +126,12 @@ public: ipa_fn_summary () : min_size (0), inlinable (false), single_caller (false), - fp_expressions (false), estimated_stack_size (false), + fp_expressions (false), + estimated_stack_size (false), time (0), conds (NULL), size_time_table (NULL), call_size_time_table (NULL), loop_iterations (NULL), loop_strides (NULL), + builtin_constant_p_parms (vNULL), growth (0), scc_no (0) { } @@ -140,6 +145,7 @@ public: time (s.time), conds (s.conds), size_time_table (s.size_time_table), call_size_time_table (NULL), loop_iterations (s.loop_iterations), loop_strides (s.loop_strides), + builtin_constant_p_parms (s.builtin_constant_p_parms), growth (s.growth), scc_no (s.scc_no) {} @@ -182,6 +188,8 @@ public: vec *loop_iterations; /* Predicates on when some loops in the function can have known strides. */ vec *loop_strides; + /* Parameters tested by builtin_constant_p. */ + vec GTY((skip)) builtin_constant_p_parms; /* Estimated growth for inlining all copies of the function before start of small functions inlining. This value will get out of date as the callers are duplicated, but diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c index 225a0140725..bc846eabb58 100644 --- a/gcc/ipa-inline.c +++ b/gcc/ipa-inline.c @@ -878,7 +878,8 @@ want_inline_small_function_p (struct cgraph_edge *e, bool report) bool apply_hints = (hints & (INLINE_HINT_indirect_call | INLINE_HINT_known_hot | INLINE_HINT_loop_iterations - | INLINE_HINT_loop_stride)); + | INLINE_HINT_loop_stride + | INLINE_HINT_builtin_constant_p)); if (growth <= opt_for_fn (to->decl, param_max_inline_insns_size)) @@ -1317,6 +1318,8 @@ edge_badness (struct cgraph_edge *edge, bool dump) | INLINE_HINT_loop_stride)) || callee_info->growth <= 0) badness = badness.shift (badness > 0 ? -2 : 2); + if (hints & INLINE_HINT_builtin_constant_p) + badness = badness.shift (badness > 0 ? -4 : 4); if (hints & (INLINE_HINT_same_scc)) badness = badness.shift (badness > 0 ? 3 : -3); else if (hints & (INLINE_HINT_in_scc)) diff --git a/gcc/testsuite/gcc.dg/ipa/inlinehint-5.c b/gcc/testsuite/gcc.dg/ipa/inlinehint-5.c new file mode 100644 index 00000000000..218f80573c0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/inlinehint-5.c @@ -0,0 +1,36 @@ +/* { dg-options "-O2 -fdump-ipa-inline-details -fno-early-inlining " } */ +/* { dg-add-options bind_pic_locally } */ +int j,k,l; +int test3(int); +int test4(int); + +static inline int +test2(int i) +{ + if (__builtin_constant_p (i)) + { + switch (i) + { + case 1: return j; + case 2: return k; + case 3: return l; + } + } + else return test3(i)+test4(i); +} + +static inline int +test (int i) +{ + return test2(i) + test2(i+1) + test3 (i) + test3(i) + test3(i) + test3 (i); +} + +int +run (int i) +{ + return test (i) + test (i); +} +/* The test should work by first inlining test2->test and then test to run + Both are called twice, so 4 hints (the second make sure that we propagate + to callers. */ +/* { dg-final { scan-ipa-dump-times "hints: declared_inline builtin_constant_p" 4 "inline" } } */