From 9b14fc3326e087975653b1af8ac54114041cde51 Mon Sep 17 00:00:00 2001 From: Feng Xue Date: Mon, 2 Dec 2019 06:37:30 +0000 Subject: [PATCH] Enable recursive function versioning 2019-12-02 Feng Xue PR ipa/92133 * doc/invoke.texi (ipa-cp-max-recursive-depth): Document new option. (ipa-cp-min-recursive-probability): Likewise. * params.opt (ipa-cp-max-recursive-depth): New. (ipa-cp-min-recursive-probability): Likewise. * ipa-cp.c (ipcp_lattice::add_value): Add two new parameters val_p and unlimited. (self_recursively_generated_p): New function. (get_val_across_arith_op): Likewise. (propagate_vals_across_arith_jfunc): Add constant propagation for self-recursive function. (incorporate_penalties): Do not penalize pure self-recursive function. (good_cloning_opportunity_p): Dump node_is_self_scc flag. (propagate_constants_topo): Set node_is_self_scc flag for cgraph node. (get_info_about_necessary_edges): Relax hotness check for edge to self-recursive function. * ipa-prop.h (ipa_node_params): Add new field node_is_self_scc. 2019-12-02 Feng Xue PR ipa/92133 * gcc.dg/ipa/ipa-clone-2.c: New test. From-SVN: r278893 --- gcc/ChangeLog | 20 +++ gcc/doc/invoke.texi | 7 + gcc/ipa-cp.c | 221 ++++++++++++++++++++++--- gcc/ipa-prop.h | 2 + gcc/params.opt | 8 + gcc/testsuite/ChangeLog | 5 + gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c | 47 ++++++ 7 files changed, 283 insertions(+), 27 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9a807baa38e..0765ee2be12 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,23 @@ +2019-12-02 Feng Xue + + PR ipa/92133 + * doc/invoke.texi (ipa-cp-max-recursive-depth): Document new option. + (ipa-cp-min-recursive-probability): Likewise. + * params.opt (ipa-cp-max-recursive-depth): New. + (ipa-cp-min-recursive-probability): Likewise. + * ipa-cp.c (ipcp_lattice::add_value): Add two new parameters + val_p and unlimited. + (self_recursively_generated_p): New function. + (get_val_across_arith_op): Likewise. + (propagate_vals_across_arith_jfunc): Add constant propagation for + self-recursive function. + (incorporate_penalties): Do not penalize pure self-recursive function. + (good_cloning_opportunity_p): Dump node_is_self_scc flag. + (propagate_constants_topo): Set node_is_self_scc flag for cgraph node. + (get_info_about_necessary_edges): Relax hotness check for edge to + self-recursive function. + * ipa-prop.h (ipa_node_params): Add new field node_is_self_scc. + 2019-12-01 Sandra Loosemore PR target/92499 diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 403be47d893..d165f31a865 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -12035,6 +12035,13 @@ IPA-CP calculates its own score of cloning profitability heuristics and performs those cloning opportunities with scores that exceed @option{ipa-cp-eval-threshold}. +@item ipa-cp-max-recursive-depth +Maximum depth of recursive cloning for self-recursive function. + +@item ipa-cp-min-recursive-probability +Recursive cloning only when the probability of call being executed exceeds +the parameter. + @item ipa-cp-recursion-penalty Percentage penalty the recursive functions will receive when they are evaluated for cloning. diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c index d0c6e91edd4..693c7a2fdc5 100644 --- a/gcc/ipa-cp.c +++ b/gcc/ipa-cp.c @@ -228,7 +228,9 @@ public: inline bool set_contains_variable (); bool add_value (valtype newval, cgraph_edge *cs, ipcp_value *src_val = NULL, - int src_idx = 0, HOST_WIDE_INT offset = -1); + int src_idx = 0, HOST_WIDE_INT offset = -1, + ipcp_value **val_p = NULL, + bool unlimited = false); void print (FILE * f, bool dump_sources, bool dump_benefits); }; @@ -1817,22 +1819,32 @@ allocate_and_init_ipcp_value (ipa_polymorphic_call_context source) /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS, SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same meaning. OFFSET -1 means the source is scalar and not a part of an - aggregate. */ + aggregate. If non-NULL, VAL_P records address of existing or newly added + ipcp_value. UNLIMITED means whether value count should not exceed the limit + given by PARAM_IPA_CP_VALUE_LIST_SIZE. */ template bool ipcp_lattice::add_value (valtype newval, cgraph_edge *cs, ipcp_value *src_val, - int src_idx, HOST_WIDE_INT offset) + int src_idx, HOST_WIDE_INT offset, + ipcp_value **val_p, + bool unlimited) { - ipcp_value *val; + ipcp_value *val, *last_val = NULL; + + if (val_p) + *val_p = NULL; if (bottom) return false; - for (val = values; val; val = val->next) + for (val = values; val; last_val = val, val = val->next) if (values_equal_for_ipcp_p (val->value, newval)) { + if (val_p) + *val_p = val; + if (ipa_edge_within_scc (cs)) { ipcp_value_source *s; @@ -1847,7 +1859,7 @@ ipcp_lattice::add_value (valtype newval, cgraph_edge *cs, return false; } - if (values_count == param_ipa_cp_value_list_size) + if (!unlimited && values_count == param_ipa_cp_value_list_size) { /* We can only free sources, not the values themselves, because sources of other values in this SCC might point to them. */ @@ -1860,7 +1872,6 @@ ipcp_lattice::add_value (valtype newval, cgraph_edge *cs, ipcp_sources_pool.remove ((ipcp_value_source*)src); } } - values = NULL; return set_to_bottom (); } @@ -1868,11 +1879,81 @@ ipcp_lattice::add_value (valtype newval, cgraph_edge *cs, values_count++; val = allocate_and_init_ipcp_value (newval); val->add_source (cs, src_val, src_idx, offset); - val->next = values; - values = val; + val->next = NULL; + + /* Add the new value to end of value list, which can reduce iterations + of propagation stage for recursive function. */ + if (last_val) + last_val->next = val; + else + values = val; + + if (val_p) + *val_p = val; + + return true; +} + +/* Return true, if a ipcp_value VAL is orginated from parameter value of + self-feeding recursive function by applying non-passthrough arithmetic + transformation. */ + +static bool +self_recursively_generated_p (ipcp_value *val) +{ + class ipa_node_params *info = NULL; + + for (ipcp_value_source *src = val->sources; src; src = src->next) + { + cgraph_edge *cs = src->cs; + + if (!src->val || cs->caller != cs->callee->function_symbol () + || src->val == val) + return false; + + if (!info) + info = IPA_NODE_REF (cs->caller); + + class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, + src->index); + ipcp_lattice *src_lat = src->offset == -1 ? &plats->itself + : plats->aggs; + ipcp_value *src_val; + + for (src_val = src_lat->values; src_val; src_val = src_val->next) + if (src_val == val) + break; + + if (!src_val) + return false; + } + return true; } +/* A helper function that returns result of operation specified by OPCODE on + the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the + value of SRC_VAL. If the operation is binary, OPND2 is a constant value + acting as its second operand. If non-NULL, RES_TYPE is expected type of + the result. */ + +static tree +get_val_across_arith_op (enum tree_code opcode, + tree opnd1_type, + tree opnd2, + ipcp_value *src_val, + tree res_type) +{ + tree opnd1 = src_val->value; + + /* Skip source values that is incompatible with specified type. */ + if (opnd1_type + && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1))) + return NULL_TREE; + + return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type); +} + /* Propagate values through an arithmetic transformation described by a jump function associated with edge CS, taking values from SRC_LAT and putting them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT. @@ -1896,24 +1977,76 @@ propagate_vals_across_arith_jfunc (cgraph_edge *cs, ipcp_value *src_val; bool ret = false; - /* Do not create new values when propagating within an SCC because if there - are arithmetic functions with circular dependencies, there is infinite - number of them and we would just make lattices bottom. If this condition - is ever relaxed we have to detect self-feeding recursive calls in - cgraph_edge_brings_value_p in a smarter way. */ + /* Due to circular dependencies, propagating within an SCC through arithmetic + transformation would create infinite number of values. But for + self-feeding recursive function, we could allow propagation in a limited + count, and this can enable a simple kind of recursive function versioning. + For other scenario, we would just make lattices bottom. */ if (opcode != NOP_EXPR && ipa_edge_within_scc (cs)) - ret = dest_lat->set_contains_variable (); + { + int i; + + if (src_lat != dest_lat || param_ipa_cp_max_recursive_depth < 1) + return dest_lat->set_contains_variable (); + + /* No benefit if recursive execution is in low probability. */ + if (cs->sreal_frequency () * 100 + <= ((sreal) 1) * param_ipa_cp_min_recursive_probability) + return dest_lat->set_contains_variable (); + + auto_vec *, 8> val_seeds; + + for (src_val = src_lat->values; src_val; src_val = src_val->next) + { + /* Now we do not use self-recursively generated value as propagation + source, this is absolutely conservative, but could avoid explosion + of lattice's value space, especially when one recursive function + calls another recursive. */ + if (self_recursively_generated_p (src_val)) + { + ipcp_value_source *s; + + /* If the lattice has already been propagated for the call site, + no need to do that again. */ + for (s = src_val->sources; s; s = s->next) + if (s->cs == cs) + return dest_lat->set_contains_variable (); + } + else + val_seeds.safe_push (src_val); + } + + /* Recursively generate lattice values with a limited count. */ + FOR_EACH_VEC_ELT (val_seeds, i, src_val) + { + for (int j = 1; j < param_ipa_cp_max_recursive_depth; j++) + { + tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2, + src_val, res_type); + if (!cstval) + break; + + ret |= dest_lat->add_value (cstval, cs, src_val, src_idx, + src_offset, &src_val, true); + gcc_checking_assert (src_val); + } + } + ret |= dest_lat->set_contains_variable (); + } else for (src_val = src_lat->values; src_val; src_val = src_val->next) { - tree opnd1 = src_val->value; - tree cstval = NULL_TREE; - - /* Skip source values that is incompatible with specified type. */ - if (!opnd1_type - || useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1))) - cstval = ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type); + /* Now we do not use self-recursively generated value as propagation + source, otherwise it is easy to make value space of normal lattice + overflow. */ + if (self_recursively_generated_p (src_val)) + { + ret |= dest_lat->set_contains_variable (); + continue; + } + tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2, + src_val, res_type); if (cstval) ret |= dest_lat->add_value (cstval, cs, src_val, src_idx, src_offset); @@ -3038,7 +3171,7 @@ hint_time_bonus (ipa_hints hints) static inline int64_t incorporate_penalties (ipa_node_params *info, int64_t evaluation) { - if (info->node_within_scc) + if (info->node_within_scc && !info->node_is_self_scc) evaluation = (evaluation * (100 - param_ipa_cp_recursion_penalty)) / 100; @@ -3082,7 +3215,8 @@ good_cloning_opportunity_p (struct cgraph_node *node, int time_benefit, count_sum.dump (dump_file); fprintf (dump_file, "%s%s) -> evaluation: " "%" PRId64 ", threshold: %i\n", - info->node_within_scc ? ", scc" : "", + info->node_within_scc + ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "", info->node_calling_single_call ? ", single_call" : "", evaluation, param_ipa_cp_eval_threshold); } @@ -3100,7 +3234,8 @@ good_cloning_opportunity_p (struct cgraph_node *node, int time_benefit, "size: %i, freq_sum: %i%s%s) -> evaluation: " "%" PRId64 ", threshold: %i\n", time_benefit, size_cost, freq_sum, - info->node_within_scc ? ", scc" : "", + info->node_within_scc + ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "", info->node_calling_single_call ? ", single_call" : "", evaluation, param_ipa_cp_eval_threshold); @@ -3594,14 +3729,30 @@ propagate_constants_topo (class ipa_topo_info *topo) while (v) { struct cgraph_edge *cs; + class ipa_node_params *info = NULL; + bool self_scc = true; for (cs = v->callees; cs; cs = cs->next_callee) if (ipa_edge_within_scc (cs)) { - IPA_NODE_REF (v)->node_within_scc = true; + cgraph_node *callee = cs->callee->function_symbol (); + + if (v != callee) + self_scc = false; + + if (!info) + { + info = IPA_NODE_REF (v); + info->node_within_scc = true; + } + if (propagate_constants_across_call (cs)) - push_node_to_stack (topo, cs->callee->function_symbol ()); + push_node_to_stack (topo, callee); } + + if (info) + info->node_is_self_scc = self_scc; + v = pop_node_from_stack (topo); } @@ -3983,6 +4134,9 @@ get_info_about_necessary_edges (ipcp_value *val, cgraph_node *dest, hot |= cs->maybe_hot_p (); if (cs->caller != dest) non_self_recursive = true; + else if (src->val) + gcc_assert (values_equal_for_ipcp_p (src->val->value, + val->value)); } cs = get_next_cgraph_edge_clone (cs); } @@ -3996,6 +4150,19 @@ get_info_about_necessary_edges (ipcp_value *val, cgraph_node *dest, *freq_sum = freq; *count_sum = cnt; *caller_count = count; + + if (!hot && IPA_NODE_REF (dest)->node_within_scc) + { + struct cgraph_edge *cs; + + /* Cold non-SCC source edge could trigger hot recursive execution of + function. Consider the case as hot and rely on following cost model + computation to further select right one. */ + for (cs = dest->callers; cs; cs = cs->next_caller) + if (cs->caller == dest && cs->maybe_hot_p ()) + return true; + } + return hot; } diff --git a/gcc/ipa-prop.h b/gcc/ipa-prop.h index e9d6a5e8305..1958e1ee9cc 100644 --- a/gcc/ipa-prop.h +++ b/gcc/ipa-prop.h @@ -497,6 +497,8 @@ public: unsigned node_dead : 1; /* Node is involved in a recursion, potentionally indirect. */ unsigned node_within_scc : 1; + /* Node contains only direct recursion. */ + unsigned node_is_self_scc : 1; /* Node is calling a private function called only once. */ unsigned node_calling_single_call : 1; /* False when there is something makes versioning impossible. */ diff --git a/gcc/params.opt b/gcc/params.opt index 586b539ec5f..d88ae0c468b 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -198,6 +198,14 @@ Threshold ipa-cp opportunity evaluation that is still considered beneficial to c Common Joined UInteger Var(param_ipa_cp_loop_hint_bonus) Init(64) Param Compile-time bonus IPA-CP assigns to candidates which make loop bounds or strides known. +-param=ipa-cp-max-recursive-depth= +Common Joined UInteger Var(param_ipa_cp_max_recursive_depth) Init(8) Param +Maximum depth of recursive cloning for self-recursive function. + +-param=ipa-cp-min-recursive-probability= +Common Joined UInteger Var(param_ipa_cp_min_recursive_probability) Init(2) Param +Recursive cloning only when the probability of call being executed exceeds the parameter. + -param=ipa-cp-recursion-penalty= Common Joined UInteger Var(param_ipa_cp_recursion_penalty) Init(40) IntegerRange(0, 100) Param Percentage penalty the recursive functions will receive when they are evaluated for cloning. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index edf8eadae03..bb36544cfe4 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2019-12-02 Feng Xue + + PR ipa/92133 + * gcc.dg/ipa/ipa-clone-2.c: New test. + 2019-12-01 Sandra Loosemore PR target/92499 diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c b/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c new file mode 100644 index 00000000000..d513020ee8b --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-ipa-cp-details -fno-early-inlining --param ipa-cp-max-recursive-depth=8" } */ + +int fn(); + +int data[100]; + +int recur_fn (int i) +{ + int j; + + if (i == 6) + { + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + return 10; + } + + data[i] = i; + + for (j = 0; j < 100; j++) + recur_fn (i + 1); + + return i; +} + +int main () +{ + int i; + + for (i = 0; i < 100; i++) + recur_fn (1) + recur_fn (-5); + + return 1; +} + +/* { dg-final { scan-ipa-dump-times "Creating a specialized node of recur_fn/\[0-9\]*\\." 12 "cp" } } */ -- 2.30.2