Implement inline call summaries.
authorJan Hubicka <jh@suse.cz>
Wed, 20 Nov 2019 17:03:43 +0000 (18:03 +0100)
committerJan Hubicka <hubicka@gcc.gnu.org>
Wed, 20 Nov 2019 17:03:43 +0000 (17:03 +0000)
* ipa-fnsummary.c (ipa_fn_summary::account_size_time): Add CALL
parameter and update call_size_time_table.
(ipa_fn_summary::max_size_time_table_size): New constant.
(estimate_calls_size_and_time_1): Break out from ...
(estimate_calls_size_and_time): ... here; implement summary production.
(summarize_calls_size_and_time): New function.
(ipa_call_context::estimate_size_and_time): Bypass
estimate_calls_size_and_time for leaf functions.
(ipa_update_overall_fn_summary): Likewise.
* ipa-fnsummary.h (call_size_time_table): New.
(ipa_fn_summary::account_size_time): Update prototype.

From-SVN: r278513

gcc/ChangeLog
gcc/ipa-fnsummary.c
gcc/ipa-fnsummary.h

index f5045b602b94a71215486d78a38c1d8d4c0a945b..0f526b1a9f7f8ac36878524f4737db05d4bad23a 100644 (file)
@@ -1,3 +1,17 @@
+2019-11-20  Jan Hubicka  <jh@suse.cz>
+
+       * ipa-fnsummary.c (ipa_fn_summary::account_size_time): Add CALL
+       parameter and update call_size_time_table.
+       (ipa_fn_summary::max_size_time_table_size): New constant.
+       (estimate_calls_size_and_time_1): Break out from ...
+       (estimate_calls_size_and_time): ... here; implement summary production.
+       (summarize_calls_size_and_time): New function.
+       (ipa_call_context::estimate_size_and_time): Bypass
+       estimate_calls_size_and_time for leaf functions.
+       (ipa_update_overall_fn_summary): Likewise.
+       * ipa-fnsummary.h (call_size_time_table): New.
+       (ipa_fn_summary::account_size_time): Update prototype.
+
 2019-11-20  Joseph Myers  <joseph@codesourcery.com>
 
        * doc/invoke.texi (-Wc11-c2x-compat): Document.
index a883325d7632736838be1ec182ed7b552efab5f5..0652207220a66f4fc89d3a5081a6a317a6244fc3 100644 (file)
@@ -146,17 +146,22 @@ ipa_dump_hints (FILE *f, ipa_hints hints)
 /* Record SIZE and TIME to SUMMARY.
    The accounted code will be executed when EXEC_PRED is true.
    When NONCONST_PRED is false the code will evaulate to constant and
-   will get optimized out in specialized clones of the function.   */
+   will get optimized out in specialized clones of the function.
+   If CALL is true account to call_size_time_table rather than
+   size_time_table.   */
 
 void
 ipa_fn_summary::account_size_time (int size, sreal time,
                                   const predicate &exec_pred,
-                                  const predicate &nonconst_pred_in)
+                                  const predicate &nonconst_pred_in,
+                                  bool call)
 {
   size_time_entry *e;
   bool found = false;
   int i;
   predicate nonconst_pred;
+  vec<size_time_entry, va_gc> *table = call
+                                      ? call_size_time_table : size_time_table;
 
   if (exec_pred == false)
     return;
@@ -168,23 +173,23 @@ ipa_fn_summary::account_size_time (int size, sreal time,
 
   /* We need to create initial empty unconitional clause, but otherwie
      we don't need to account empty times and sizes.  */
-  if (!size && time == 0 && size_time_table)
+  if (!size && time == 0 && table)
     return;
 
   gcc_assert (time >= 0);
 
-  for (i = 0; vec_safe_iterate (size_time_table, i, &e); i++)
+  for (i = 0; vec_safe_iterate (table, i, &e); i++)
     if (e->exec_predicate == exec_pred
        && e->nonconst_predicate == nonconst_pred)
       {
        found = true;
        break;
       }
-  if (i == 256)
+  if (i == max_size_time_table_size)
     {
       i = 0;
       found = true;
-      e = &(*size_time_table)[0];
+      e = &(*table)[0];
       if (dump_file && (dump_flags & TDF_DETAILS))
        fprintf (dump_file,
                 "\t\tReached limit on number of entries, "
@@ -212,7 +217,10 @@ ipa_fn_summary::account_size_time (int size, sreal time,
       new_entry.time = time;
       new_entry.exec_predicate = exec_pred;
       new_entry.nonconst_predicate = nonconst_pred;
-      vec_safe_push (size_time_table, new_entry);
+      if (call)
+        vec_safe_push (call_size_time_table, new_entry);
+      else
+        vec_safe_push (size_time_table, new_entry);
     }
   else
     {
@@ -642,6 +650,7 @@ ipa_fn_summary::~ipa_fn_summary ()
     edge_predicate_pool.remove (loop_stride);
   vec_free (conds);
   vec_free (size_time_table);
+  vec_free (call_size_time_table);
 }
 
 void
@@ -2973,19 +2982,21 @@ estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
 }
 
 
-
 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
    calls in NODE.  POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
-   describe context of the call site.  */
+   describe context of the call site.
+   Helper for estimate_calls_size_and_time which does the same but
+   (in most cases) faster.  */
 
 static void
-estimate_calls_size_and_time (struct cgraph_node *node, int *size,
-                             int *min_size, sreal *time,
-                             ipa_hints *hints,
-                             clause_t possible_truths,
-                             vec<tree> known_vals,
-                             vec<ipa_polymorphic_call_context> known_contexts,
-                             vec<ipa_agg_value_set> known_aggs)
+estimate_calls_size_and_time_1 (struct cgraph_node *node, int *size,
+                               int *min_size, sreal *time,
+                               ipa_hints *hints,
+                               clause_t possible_truths,
+                               vec<tree> known_vals,
+                               vec<ipa_polymorphic_call_context> known_contexts,
+                               vec<ipa_agg_value_set> known_aggs)
 {
   struct cgraph_edge *e;
   for (e = node->callees; e; e = e->next_callee)
@@ -2993,11 +3004,11 @@ estimate_calls_size_and_time (struct cgraph_node *node, int *size,
       if (!e->inline_failed)
        {
          gcc_checking_assert (!ipa_call_summaries->get (e));
-         estimate_calls_size_and_time (e->callee, size, min_size, time,
-                                       hints,
-                                       possible_truths,
-                                       known_vals, known_contexts,
-                                       known_aggs);
+         estimate_calls_size_and_time_1 (e->callee, size, min_size, time,
+                                         hints,
+                                         possible_truths,
+                                         known_vals, known_contexts,
+                                         known_aggs);
          continue;
        }
       class ipa_call_summary *es = ipa_call_summaries->get (e);
@@ -3033,6 +3044,157 @@ estimate_calls_size_and_time (struct cgraph_node *node, int *size,
     }
 }
 
+/* Populate sum->call_size_time_table for edges from NODE.  */
+
+static void
+summarize_calls_size_and_time (struct cgraph_node *node,
+                              ipa_fn_summary *sum)
+{
+  struct cgraph_edge *e;
+  for (e = node->callees; e; e = e->next_callee)
+    {
+      if (!e->inline_failed)
+       {
+         gcc_checking_assert (!ipa_call_summaries->get (e));
+         summarize_calls_size_and_time (e->callee, sum);
+         continue;
+       }
+      int size = 0;
+      sreal time = 0;
+
+      estimate_edge_size_and_time (e, &size, NULL, &time,
+                                  vNULL, vNULL, vNULL, NULL);
+
+      struct predicate pred = true;
+      class ipa_call_summary *es = ipa_call_summaries->get (e);
+
+      if (es->predicate)
+       pred = *es->predicate;
+      sum->account_size_time (size, time, pred, pred, true);
+    }
+  for (e = node->indirect_calls; e; e = e->next_callee)
+    {
+      int size = 0;
+      sreal time = 0;
+
+      estimate_edge_size_and_time (e, &size, NULL, &time,
+                                  vNULL, vNULL, vNULL, NULL);
+      struct predicate pred = true;
+      class ipa_call_summary *es = ipa_call_summaries->get (e);
+
+      if (es->predicate)
+       pred = *es->predicate;
+      sum->account_size_time (size, time, pred, pred, true);
+    }
+}
+
+/* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
+   calls in NODE.  POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
+   describe context of the call site.  */
+
+static void
+estimate_calls_size_and_time (struct cgraph_node *node, int *size,
+                             int *min_size, sreal *time,
+                             ipa_hints *hints,
+                             clause_t possible_truths,
+                             vec<tree> known_vals,
+                             vec<ipa_polymorphic_call_context> known_contexts,
+                             vec<ipa_agg_value_set> known_aggs)
+{
+  class ipa_fn_summary *sum = ipa_fn_summaries->get (node);
+  bool use_table = true;
+
+  gcc_assert (node->callees || node->indirect_calls);
+
+  /* During early inlining we do not calculate info for very
+     large functions and thus there is no need for producing
+     summaries.  */
+  if (!ipa_node_params_sum)
+    use_table = false;
+  /* Do not calculate summaries for simple wrappers; it is waste
+     of memory.  */
+  else if (node->callees && node->indirect_calls
+           && node->callees->inline_failed && !node->callees->next_callee)
+    use_table = false;
+  /* If there is an indirect edge that may be optimized, we need
+     to go the slow way.  */
+  else if ((known_vals.length ()
+           || known_contexts.length ()
+           || known_aggs.length ()) && hints)
+    {
+      class ipa_node_params *params_summary = IPA_NODE_REF (node);
+      unsigned int nargs = params_summary
+                          ? ipa_get_param_count (params_summary) : 0;
+
+      for (unsigned int i = 0; i < nargs && use_table; i++)
+       {
+         if (ipa_is_param_used_by_indirect_call (params_summary, i)
+             && ((known_vals.length () > i && known_vals[i])
+                 || (known_aggs.length () > i
+                     && known_aggs[i].items.length ())))
+           use_table = false;
+         else if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
+                  && (known_contexts.length () > i
+                      && !known_contexts[i].useless_p ()))
+           use_table = false;
+       }
+    }
+
+  /* Fast path is via the call size time table.  */
+  if (use_table)
+    {
+      /* Build summary if it is absent.  */
+      if (!sum->call_size_time_table)
+       {
+         predicate true_pred = true;
+         sum->account_size_time (0, 0, true_pred, true_pred, true);
+         summarize_calls_size_and_time (node, sum);
+       }
+
+      int old_size = *size;
+      sreal old_time = time ? *time : 0;
+
+      if (min_size)
+       *min_size += (*sum->call_size_time_table)[0].size;
+
+      unsigned int i;
+      size_time_entry *e;
+
+      /* Walk the table and account sizes and times.  */
+      for (i = 0; vec_safe_iterate (sum->call_size_time_table, i, &e);
+          i++)
+       if (e->exec_predicate.evaluate (possible_truths))
+         {
+           *size += e->size;
+           if (time)
+             *time += e->time;
+         }
+
+      /* Be careful and see if both methods agree.  */
+      if ((flag_checking || dump_file)
+         /* Do not try to sanity check when we know we lost some
+            precision.  */
+         && sum->call_size_time_table->length ()
+            < ipa_fn_summary::max_size_time_table_size)
+       {
+         estimate_calls_size_and_time_1 (node, &old_size, NULL, &old_time, NULL,
+                                         possible_truths, known_vals,
+                                         known_contexts, known_aggs);
+         gcc_assert (*size == old_size);
+         if (time && (*time - old_time > 1 || *time - old_time < -1)
+             && dump_file)
+           fprintf (dump_file, "Time mismatch in call summary %f!=%f",
+                    old_time.to_double (),
+                    time->to_double ());
+       }
+    }
+  /* Slow path by walking all edges.  */
+  else
+    estimate_calls_size_and_time_1 (node, size, min_size, time, hints,
+                                   possible_truths, known_vals, known_contexts,
+                                   known_aggs);
+}
+
 /* Default constructor for ipa call context.
    Memory alloction of known_vals, known_contexts
    and known_aggs vectors is owned by the caller, but can
@@ -3303,10 +3465,11 @@ ipa_call_context::estimate_size_and_time (int *ret_size,
          }
     }
 
-  estimate_calls_size_and_time (m_node, &size, &min_size,
-                               ret_time ? &time : NULL,
-                               ret_hints ? &hints : NULL, m_possible_truths,
-                               m_known_vals, m_known_contexts, m_known_aggs);
+  if (m_node->callees || m_node->indirect_calls)
+    estimate_calls_size_and_time (m_node, &size, &min_size,
+                                 ret_time ? &time : NULL,
+                                 ret_hints ? &hints : NULL, m_possible_truths,
+                                 m_known_vals, m_known_contexts, m_known_aggs);
 
   sreal nonspecialized_time = time;
 
@@ -3760,10 +3923,12 @@ ipa_update_overall_fn_summary (struct cgraph_node *node)
       info->time += e->time;
     }
   info->min_size = (*info->size_time_table)[0].size;
-  estimate_calls_size_and_time (node, &size_info->size, &info->min_size,
-                               &info->time, NULL,
-                               ~(clause_t) (1 << predicate::false_condition),
-                               vNULL, vNULL, vNULL);
+  vec_free (info->call_size_time_table);
+  if (node->callees || node->indirect_calls)
+    estimate_calls_size_and_time (node, &size_info->size, &info->min_size,
+                                 &info->time, NULL,
+                                 ~(clause_t) (1 << predicate::false_condition),
+                                 vNULL, vNULL, vNULL);
   size_info->size = RDIV (size_info->size, ipa_fn_summary::size_scale);
   info->min_size = RDIV (info->min_size, ipa_fn_summary::size_scale);
 }
index 0d7467c01af051f49c19cc5040552999adde2e85..5822adbce203816039290e50cd58ea272eaa5589 100644 (file)
@@ -117,8 +117,8 @@ public:
       inlinable (false), single_caller (false),
       fp_expressions (false), estimated_stack_size (false),
       time (0), conds (NULL),
-      size_time_table (NULL), loop_iterations (NULL), loop_stride (NULL),
-      growth (0), scc_no (0)
+      size_time_table (NULL), call_size_time_table (NULL), loop_iterations (NULL),
+      loop_stride (NULL), growth (0), scc_no (0)
   {
   }
 
@@ -129,6 +129,7 @@ public:
     fp_expressions (s.fp_expressions),
     estimated_stack_size (s.estimated_stack_size),
     time (s.time), conds (s.conds), size_time_table (s.size_time_table),
+    call_size_time_table (NULL),
     loop_iterations (s.loop_iterations), loop_stride (s.loop_stride),
     growth (s.growth), scc_no (s.scc_no)
   {}
@@ -161,7 +162,12 @@ public:
   /* Conditional size/time information.  The summaries are being
      merged during inlining.  */
   conditions conds;
+  /* Normal code is acocunted in size_time_table, while calls are
+     accounted in call_size_time_table.  This is because calls
+     are often adjusted by IPA optimizations and thus this summary
+     is generated from call summary information when needed.  */
   vec<size_time_entry, va_gc> *size_time_table;
+  vec<size_time_entry, va_gc> *call_size_time_table;
 
   /* Predicate on when some loop in the function becomes to have known
      bounds.   */
@@ -179,10 +185,13 @@ public:
   int scc_no;
 
   /* Record time and size under given predicates.  */
-  void account_size_time (int, sreal, const predicate &, const predicate &);
+  void account_size_time (int, sreal, const predicate &, const predicate &,
+                         bool call = false);
 
   /* We keep values scaled up, so fractional sizes can be accounted.  */
   static const int size_scale = 2;
+  /* Maximal size of size_time_table before we start to be conservative.  */
+  static const int max_size_time_table_size = 256;
 };
 
 class GTY((user)) ipa_fn_summary_t: