Enable recursive function versioning
authorFeng Xue <fxue@os.amperecomputing.com>
Mon, 2 Dec 2019 06:37:30 +0000 (06:37 +0000)
committerFeng Xue <fxue@gcc.gnu.org>
Mon, 2 Dec 2019 06:37:30 +0000 (06:37 +0000)
2019-12-02  Feng Xue <fxue@os.amperecomputing.com>

        PR ipa/92133
        * doc/invoke.texi (ipa-cp-max-recursive-depth): Document new option.
        (ipa-cp-min-recursive-probability): Likewise.
        * params.opt (ipa-cp-max-recursive-depth): New.
        (ipa-cp-min-recursive-probability): Likewise.
        * ipa-cp.c (ipcp_lattice<valtype>::add_value): Add two new parameters
        val_p and unlimited.
        (self_recursively_generated_p): New function.
        (get_val_across_arith_op): Likewise.
        (propagate_vals_across_arith_jfunc): Add constant propagation for
        self-recursive function.
        (incorporate_penalties): Do not penalize pure self-recursive function.
        (good_cloning_opportunity_p): Dump node_is_self_scc flag.
        (propagate_constants_topo): Set node_is_self_scc flag for cgraph node.
        (get_info_about_necessary_edges): Relax hotness check for edge to
        self-recursive function.
        * ipa-prop.h (ipa_node_params): Add new field node_is_self_scc.

2019-12-02  Feng Xue  <fxue@os.amperecomputing.com>

        PR ipa/92133
        * gcc.dg/ipa/ipa-clone-2.c: New test.

From-SVN: r278893

gcc/ChangeLog
gcc/doc/invoke.texi
gcc/ipa-cp.c
gcc/ipa-prop.h
gcc/params.opt
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c [new file with mode: 0644]

index 9a807baa38e8e31c8198c1609fb008a898c0d4a0..0765ee2be1201be8d72fd7079cea43591a1ea4c1 100644 (file)
@@ -1,3 +1,23 @@
+2019-12-02  Feng Xue <fxue@os.amperecomputing.com>
+
+       PR ipa/92133
+       * doc/invoke.texi (ipa-cp-max-recursive-depth): Document new option.
+       (ipa-cp-min-recursive-probability): Likewise.
+       * params.opt (ipa-cp-max-recursive-depth): New.
+       (ipa-cp-min-recursive-probability): Likewise.
+       * ipa-cp.c (ipcp_lattice<valtype>::add_value): Add two new parameters
+       val_p and unlimited.
+       (self_recursively_generated_p): New function.
+       (get_val_across_arith_op): Likewise.
+       (propagate_vals_across_arith_jfunc): Add constant propagation for
+       self-recursive function.
+       (incorporate_penalties): Do not penalize pure self-recursive function.
+       (good_cloning_opportunity_p): Dump node_is_self_scc flag.
+       (propagate_constants_topo): Set node_is_self_scc flag for cgraph node.
+       (get_info_about_necessary_edges): Relax hotness check for edge to
+       self-recursive function.
+       * ipa-prop.h (ipa_node_params): Add new field node_is_self_scc.
+
 2019-12-01  Sandra Loosemore  <sandra@codesourcery.com>
 
        PR target/92499
index 403be47d893d5d630ca213e46feea4f1c04413e2..d165f31a865245770c324ef84671612471de0d48 100644 (file)
@@ -12035,6 +12035,13 @@ IPA-CP calculates its own score of cloning profitability heuristics
 and performs those cloning opportunities with scores that exceed
 @option{ipa-cp-eval-threshold}.
 
+@item ipa-cp-max-recursive-depth
+Maximum depth of recursive cloning for self-recursive function.
+
+@item ipa-cp-min-recursive-probability
+Recursive cloning only when the probability of call being executed exceeds
+the parameter.
+
 @item ipa-cp-recursion-penalty
 Percentage penalty the recursive functions will receive when they
 are evaluated for cloning.
index d0c6e91edd4c177b2455edd6d2945ef5569e3c8b..693c7a2fdc51b1e1688e422d12b0e00f0f0071d0 100644 (file)
@@ -228,7 +228,9 @@ public:
   inline bool set_contains_variable ();
   bool add_value (valtype newval, cgraph_edge *cs,
                  ipcp_value<valtype> *src_val = NULL,
-                 int src_idx = 0, HOST_WIDE_INT offset = -1);
+                 int src_idx = 0, HOST_WIDE_INT offset = -1,
+                 ipcp_value<valtype> **val_p = NULL,
+                 bool unlimited = false);
   void print (FILE * f, bool dump_sources, bool dump_benefits);
 };
 
@@ -1817,22 +1819,32 @@ allocate_and_init_ipcp_value (ipa_polymorphic_call_context source)
 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it.  CS,
    SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
    meaning.  OFFSET -1 means the source is scalar and not a part of an
-   aggregate.  */
+   aggregate.  If non-NULL, VAL_P records address of existing or newly added
+   ipcp_value.  UNLIMITED means whether value count should not exceed the limit
+   given by PARAM_IPA_CP_VALUE_LIST_SIZE.  */
 
 template <typename valtype>
 bool
 ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
                                  ipcp_value<valtype> *src_val,
-                                 int src_idx, HOST_WIDE_INT offset)
+                                 int src_idx, HOST_WIDE_INT offset,
+                                 ipcp_value<valtype> **val_p,
+                                 bool unlimited)
 {
-  ipcp_value<valtype> *val;
+  ipcp_value<valtype> *val, *last_val = NULL;
+
+  if (val_p)
+    *val_p = NULL;
 
   if (bottom)
     return false;
 
-  for (val = values; val; val = val->next)
+  for (val = values; val; last_val = val, val = val->next)
     if (values_equal_for_ipcp_p (val->value, newval))
       {
+       if (val_p)
+         *val_p = val;
+
        if (ipa_edge_within_scc (cs))
          {
            ipcp_value_source<valtype> *s;
@@ -1847,7 +1859,7 @@ ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
        return false;
       }
 
-  if (values_count == param_ipa_cp_value_list_size)
+  if (!unlimited && values_count == param_ipa_cp_value_list_size)
     {
       /* We can only free sources, not the values themselves, because sources
         of other values in this SCC might point to them.   */
@@ -1860,7 +1872,6 @@ ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
              ipcp_sources_pool.remove ((ipcp_value_source<tree>*)src);
            }
        }
-
       values = NULL;
       return set_to_bottom ();
     }
@@ -1868,11 +1879,81 @@ ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
   values_count++;
   val = allocate_and_init_ipcp_value (newval);
   val->add_source (cs, src_val, src_idx, offset);
-  val->next = values;
-  values = val;
+  val->next = NULL;
+
+  /* Add the new value to end of value list, which can reduce iterations
+     of propagation stage for recursive function.  */
+  if (last_val)
+    last_val->next = val;
+  else
+    values = val;
+
+  if (val_p)
+    *val_p = val;
+
+  return true;
+}
+
+/* Return true, if a ipcp_value VAL is orginated from parameter value of
+   self-feeding recursive function by applying non-passthrough arithmetic
+   transformation.  */
+
+static bool
+self_recursively_generated_p (ipcp_value<tree> *val)
+{
+  class ipa_node_params *info = NULL;
+
+  for (ipcp_value_source<tree> *src = val->sources; src; src = src->next)
+    {
+      cgraph_edge *cs = src->cs;
+
+      if (!src->val || cs->caller != cs->callee->function_symbol ()
+         || src->val == val)
+       return false;
+
+      if (!info)
+       info = IPA_NODE_REF (cs->caller);
+
+      class ipcp_param_lattices *plats = ipa_get_parm_lattices (info,
+                                                               src->index);
+      ipcp_lattice<tree> *src_lat = src->offset == -1 ? &plats->itself
+                                                     : plats->aggs;
+      ipcp_value<tree> *src_val;
+
+      for (src_val = src_lat->values; src_val; src_val = src_val->next)
+       if (src_val == val)
+         break;
+
+      if (!src_val)
+       return false;
+    }
+
   return true;
 }
 
+/* A helper function that returns result of operation specified by OPCODE on
+   the value of SRC_VAL.  If non-NULL, OPND1_TYPE is expected type for the
+   value of SRC_VAL.  If the operation is binary, OPND2 is a constant value
+   acting as its second operand.  If non-NULL, RES_TYPE is expected type of
+   the result.  */
+
+static tree
+get_val_across_arith_op (enum tree_code opcode,
+                        tree opnd1_type,
+                        tree opnd2,
+                        ipcp_value<tree> *src_val,
+                        tree res_type)
+{
+  tree opnd1 = src_val->value;
+
+  /* Skip source values that is incompatible with specified type.  */
+  if (opnd1_type
+      && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
+    return NULL_TREE;
+
+  return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type);
+}
+
 /* Propagate values through an arithmetic transformation described by a jump
    function associated with edge CS, taking values from SRC_LAT and putting
    them into DEST_LAT.  OPND1_TYPE is expected type for the values in SRC_LAT.
@@ -1896,24 +1977,76 @@ propagate_vals_across_arith_jfunc (cgraph_edge *cs,
   ipcp_value<tree> *src_val;
   bool ret = false;
 
-  /* Do not create new values when propagating within an SCC because if there
-     are arithmetic functions with circular dependencies, there is infinite
-     number of them and we would just make lattices bottom.  If this condition
-     is ever relaxed we have to detect self-feeding recursive calls in
-     cgraph_edge_brings_value_p in a smarter way.  */
+  /* Due to circular dependencies, propagating within an SCC through arithmetic
+     transformation would create infinite number of values.  But for
+     self-feeding recursive function, we could allow propagation in a limited
+     count, and this can enable a simple kind of recursive function versioning.
+     For other scenario, we would just make lattices bottom.  */
   if (opcode != NOP_EXPR && ipa_edge_within_scc (cs))
-    ret = dest_lat->set_contains_variable ();
+    {
+      int i;
+
+      if (src_lat != dest_lat || param_ipa_cp_max_recursive_depth < 1)
+       return dest_lat->set_contains_variable ();
+
+      /* No benefit if recursive execution is in low probability.  */
+      if (cs->sreal_frequency () * 100
+         <= ((sreal) 1) * param_ipa_cp_min_recursive_probability)
+       return dest_lat->set_contains_variable ();
+
+      auto_vec<ipcp_value<tree> *, 8> val_seeds;
+
+      for (src_val = src_lat->values; src_val; src_val = src_val->next)
+       {
+         /* Now we do not use self-recursively generated value as propagation
+            source, this is absolutely conservative, but could avoid explosion
+            of lattice's value space, especially when one recursive function
+            calls another recursive.  */
+         if (self_recursively_generated_p (src_val))
+           {
+             ipcp_value_source<tree> *s;
+
+             /* If the lattice has already been propagated for the call site,
+                no need to do that again.  */
+             for (s = src_val->sources; s; s = s->next)
+               if (s->cs == cs)
+                 return dest_lat->set_contains_variable ();
+           }
+         else
+           val_seeds.safe_push (src_val);
+       }
+
+      /* Recursively generate lattice values with a limited count.  */
+      FOR_EACH_VEC_ELT (val_seeds, i, src_val)
+       {
+         for (int j = 1; j < param_ipa_cp_max_recursive_depth; j++)
+           {
+             tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
+                                                    src_val, res_type);
+             if (!cstval)
+               break;
+
+             ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
+                                         src_offset, &src_val, true);
+             gcc_checking_assert (src_val);
+           }
+       }
+      ret |= dest_lat->set_contains_variable ();
+    }
   else
     for (src_val = src_lat->values; src_val; src_val = src_val->next)
       {
-       tree opnd1 = src_val->value;
-       tree cstval = NULL_TREE;
-
-       /* Skip source values that is incompatible with specified type.  */
-       if (!opnd1_type
-           || useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
-         cstval = ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type);
+       /* Now we do not use self-recursively generated value as propagation
+          source, otherwise it is easy to make value space of normal lattice
+          overflow.  */
+       if (self_recursively_generated_p (src_val))
+         {
+           ret |= dest_lat->set_contains_variable ();
+           continue;
+         }
 
+       tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
+                                              src_val, res_type);
        if (cstval)
          ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
                                      src_offset);
@@ -3038,7 +3171,7 @@ hint_time_bonus (ipa_hints hints)
 static inline int64_t
 incorporate_penalties (ipa_node_params *info, int64_t evaluation)
 {
-  if (info->node_within_scc)
+  if (info->node_within_scc && !info->node_is_self_scc)
     evaluation = (evaluation
                  * (100 - param_ipa_cp_recursion_penalty)) / 100;
 
@@ -3082,7 +3215,8 @@ good_cloning_opportunity_p (struct cgraph_node *node, int time_benefit,
          count_sum.dump (dump_file);
          fprintf (dump_file, "%s%s) -> evaluation: " "%" PRId64
                 ", threshold: %i\n",
-                info->node_within_scc ? ", scc" : "",
+                info->node_within_scc
+                  ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
                 info->node_calling_single_call ? ", single_call" : "",
                 evaluation, param_ipa_cp_eval_threshold);
        }
@@ -3100,7 +3234,8 @@ good_cloning_opportunity_p (struct cgraph_node *node, int time_benefit,
                 "size: %i, freq_sum: %i%s%s) -> evaluation: "
                 "%" PRId64 ", threshold: %i\n",
                 time_benefit, size_cost, freq_sum,
-                info->node_within_scc ? ", scc" : "",
+                info->node_within_scc
+                  ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
                 info->node_calling_single_call ? ", single_call" : "",
                 evaluation, param_ipa_cp_eval_threshold);
 
@@ -3594,14 +3729,30 @@ propagate_constants_topo (class ipa_topo_info *topo)
       while (v)
        {
          struct cgraph_edge *cs;
+         class ipa_node_params *info = NULL;
+         bool self_scc = true;
 
          for (cs = v->callees; cs; cs = cs->next_callee)
            if (ipa_edge_within_scc (cs))
              {
-               IPA_NODE_REF (v)->node_within_scc = true;
+               cgraph_node *callee = cs->callee->function_symbol ();
+
+               if (v != callee)
+                 self_scc = false;
+
+               if (!info)
+                 {
+                   info = IPA_NODE_REF (v);
+                   info->node_within_scc = true;
+                 }
+
                if (propagate_constants_across_call (cs))
-                 push_node_to_stack (topo, cs->callee->function_symbol ());
+                 push_node_to_stack (topo, callee);
              }
+
+         if (info)
+           info->node_is_self_scc = self_scc;
+
          v = pop_node_from_stack (topo);
        }
 
@@ -3983,6 +4134,9 @@ get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
              hot |= cs->maybe_hot_p ();
              if (cs->caller != dest)
                non_self_recursive = true;
+             else if (src->val)
+               gcc_assert (values_equal_for_ipcp_p (src->val->value,
+                                                    val->value));
            }
          cs = get_next_cgraph_edge_clone (cs);
        }
@@ -3996,6 +4150,19 @@ get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
   *freq_sum = freq;
   *count_sum = cnt;
   *caller_count = count;
+
+  if (!hot && IPA_NODE_REF (dest)->node_within_scc)
+    {
+      struct cgraph_edge *cs;
+
+      /* Cold non-SCC source edge could trigger hot recursive execution of
+        function. Consider the case as hot and rely on following cost model
+        computation to further select right one.  */
+      for (cs = dest->callers; cs; cs = cs->next_caller)
+       if (cs->caller == dest && cs->maybe_hot_p ())
+         return true;
+    }
+
   return hot;
 }
 
index e9d6a5e8305f1145876bc8a8795a751da38a84cf..1958e1ee9cc37490f4871802ca6aff5846be2624 100644 (file)
@@ -497,6 +497,8 @@ public:
   unsigned node_dead : 1;
   /* Node is involved in a recursion, potentionally indirect.  */
   unsigned node_within_scc : 1;
+  /* Node contains only direct recursion.  */
+  unsigned node_is_self_scc : 1;
   /* Node is calling a private function called only once.  */
   unsigned node_calling_single_call : 1;
   /* False when there is something makes versioning impossible.  */
index 586b539ec5f24512a34caa1e2156aaf40c832a06..d88ae0c468b57975f4ed348243bfc64b63912542 100644 (file)
@@ -198,6 +198,14 @@ Threshold ipa-cp opportunity evaluation that is still considered beneficial to c
 Common Joined UInteger Var(param_ipa_cp_loop_hint_bonus) Init(64) Param
 Compile-time bonus IPA-CP assigns to candidates which make loop bounds or strides known.
 
+-param=ipa-cp-max-recursive-depth=
+Common Joined UInteger Var(param_ipa_cp_max_recursive_depth) Init(8) Param
+Maximum depth of recursive cloning for self-recursive function.
+
+-param=ipa-cp-min-recursive-probability=
+Common Joined UInteger Var(param_ipa_cp_min_recursive_probability) Init(2) Param
+Recursive cloning only when the probability of call being executed exceeds the parameter.
+
 -param=ipa-cp-recursion-penalty=
 Common Joined UInteger Var(param_ipa_cp_recursion_penalty) Init(40) IntegerRange(0, 100) Param
 Percentage penalty the recursive functions will receive when they are evaluated for cloning.
index edf8eadae037175157bf84c9f2d70ba0a30d6e58..bb36544cfe4383d0f3cb16997190e71600a847db 100644 (file)
@@ -1,3 +1,8 @@
+2019-12-02  Feng Xue  <fxue@os.amperecomputing.com>
+
+       PR ipa/92133
+       * gcc.dg/ipa/ipa-clone-2.c: New test.
+
 2019-12-01  Sandra Loosemore  <sandra@codesourcery.com>
 
        PR target/92499
diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c b/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c
new file mode 100644 (file)
index 0000000..d513020
--- /dev/null
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-ipa-cp-details -fno-early-inlining --param ipa-cp-max-recursive-depth=8" } */
+
+int fn();
+
+int data[100];
+
+int recur_fn (int i)
+{
+  int j;
+   
+  if (i == 6)
+    {
+      fn();
+      fn();
+      fn();
+      fn();
+      fn();
+      fn();
+      fn();
+      fn();
+      fn();
+      fn();
+      fn();
+      fn();
+      return 10;
+    }
+
+  data[i] = i; 
+
+  for (j = 0; j < 100; j++)
+    recur_fn (i + 1);
+
+  return i; 
+}
+
+int main ()
+{
+  int i;
+
+  for (i = 0; i < 100; i++)
+    recur_fn (1) + recur_fn (-5);
+
+  return 1;
+}
+
+/* { dg-final { scan-ipa-dump-times "Creating a specialized node of recur_fn/\[0-9\]*\\." 12 "cp" } } */