ipa-inline.c (compute_uninlined_call_time, [...]): New functions.
authorJan Hubicka <jh@suse.cz>
Mon, 5 Nov 2012 14:00:46 +0000 (15:00 +0100)
committerJan Hubicka <hubicka@gcc.gnu.org>
Mon, 5 Nov 2012 14:00:46 +0000 (14:00 +0000)
* ipa-inline.c (compute_uninlined_call_time,
compute_inlined_call_time): New functions.
(RELATIVE_TIME_BENEFIT_RANGE): New macro.
(relative_time_benefit): Rewrite.
(edge_badness): Rewrite path with guessed profile and estimated profile.
* ipa-inline.h (INLINE_HINT_declared_inline, INLINE_HINT_cross_module):
New hints.
(struct inline_summary): Add GROWTH filed.
* ipa-inline-analysis.c (dump_inline_hints): Update.
(reset_inline_summary): Update.
(dump_inline_summary): Update.
(will_be_nonconstant_predicate): Cleanup to use gimple_store_p and
gimple_assign_load_p predicates.
(estimate_node_size_and_time): Drop INLINE_HINT_declared_inline hint.
(simple_edge_hints): New function.
(do_estimate_edge_time): Return time of invocation of callee rather
than the time scaled by edge frequency; update hints code.
(do_estimate_edge_hints): Update.
(do_estimate_growth): Cleanup.

From-SVN: r193161

gcc/ChangeLog
gcc/ipa-inline-analysis.c
gcc/ipa-inline.c
gcc/ipa-inline.h

index be526da445b2d2b220400ed6def37f5be8edf170..50148685ef90b5d08ffc8b5d5fb23d8ab5d5d5a2 100644 (file)
@@ -1,3 +1,25 @@
+2012-11-05  Jan Hubicka  <jh@suse.cz>
+
+       * ipa-inline.c (compute_uninlined_call_time,
+       compute_inlined_call_time): New functions.
+       (RELATIVE_TIME_BENEFIT_RANGE): New macro.
+       (relative_time_benefit): Rewrite.
+       (edge_badness): Rewrite path with guessed profile and estimated profile.
+       * ipa-inline.h (INLINE_HINT_declared_inline, INLINE_HINT_cross_module):
+       New hints.
+       (struct inline_summary): Add GROWTH filed.
+       * ipa-inline-analysis.c (dump_inline_hints): Update.
+       (reset_inline_summary): Update.
+       (dump_inline_summary): Update.
+       (will_be_nonconstant_predicate): Cleanup to use gimple_store_p and
+       gimple_assign_load_p predicates.
+       (estimate_node_size_and_time): Drop INLINE_HINT_declared_inline hint.
+       (simple_edge_hints): New function.
+       (do_estimate_edge_time): Return time of invocation of callee rather
+       than the time scaled by edge frequency; update hints code.
+       (do_estimate_edge_hints): Update.
+       (do_estimate_growth): Cleanup.
+
 2012-11-05  Jakub Jelinek  <jakub@redhat.com>
 
        PR target/55194
index 9db7f1c0958e782ec54995b3606de7d477d6de9f..595cb682a5b7445f1ca015da22c8eef0620143fe 100644 (file)
@@ -649,6 +649,16 @@ dump_inline_hints (FILE *f, inline_hints hints)
       hints &= ~INLINE_HINT_in_scc;
       fprintf (f, " in_scc");
     }
+  if (hints & INLINE_HINT_cross_module)
+    {
+      hints &= ~INLINE_HINT_cross_module;
+      fprintf (f, " cross_module");
+    }
+  if (hints & INLINE_HINT_declared_inline)
+    {
+      hints &= ~INLINE_HINT_declared_inline;
+      fprintf (f, " declared_inline");
+    }
   gcc_assert (!hints);
 }
 
@@ -983,6 +993,7 @@ reset_inline_summary (struct cgraph_node *node)
   info->stack_frame_offset = 0;
   info->size = 0;
   info->time = 0;
+  info->growth = 0;
   info->scc_no = 0;
   if (info->loop_iterations)
     {
@@ -1375,6 +1386,9 @@ dump_inline_summary (FILE * f, struct cgraph_node *node)
               (int) s->estimated_self_stack_size);
       fprintf (f, "  global stack:    %i\n",
               (int) s->estimated_stack_size);
+      if (s->growth)
+        fprintf (f, "  estimated growth:%i\n",
+                (int) s->growth);
       if (s->scc_no)
         fprintf (f, "  In SCC:          %i\n",
                 (int) s->scc_no);
@@ -1977,10 +1991,11 @@ will_be_nonconstant_predicate (struct ipa_node_params *info,
     return p;
 
   /* Stores will stay anyway.  */
-  if (gimple_vdef (stmt))
+  if (gimple_store_p (stmt))
     return p;
 
-  is_load = gimple_vuse (stmt) != NULL;
+  is_load = gimple_assign_load_p (stmt);
+
   /* Loads can be optimized when the value is known.  */
   if (is_load)
     {
@@ -2857,6 +2872,8 @@ estimate_node_size_and_time (struct cgraph_node *node,
     hints |=INLINE_HINT_loop_stride;
   if (info->scc_no)
     hints |= INLINE_HINT_in_scc;
+  if (DECL_DECLARED_INLINE_P (node->symbol.decl))
+    hints |= INLINE_HINT_declared_inline;
 
   estimate_calls_size_and_time (node, &size, &time, &hints, possible_truths,
                                known_vals, known_binfos, known_aggs);
@@ -2865,7 +2882,6 @@ estimate_node_size_and_time (struct cgraph_node *node,
   time = RDIV (time, INLINE_TIME_SCALE);
   size = RDIV (size, INLINE_SIZE_SCALE);
 
-
   if (dump_file
       && (dump_flags & TDF_DETAILS))
     fprintf (dump_file, "\n   size:%i time:%i\n", (int)size, (int)time);
@@ -3315,6 +3331,26 @@ inline_update_overall_summary (struct cgraph_node *node)
   info->size = (info->size + INLINE_SIZE_SCALE / 2) / INLINE_SIZE_SCALE;
 }
 
+/* Return hints derrived from EDGE.   */
+int
+simple_edge_hints (struct cgraph_edge *edge)
+{
+  int hints = 0;
+  struct cgraph_node *to = (edge->caller->global.inlined_to
+                           ? edge->caller->global.inlined_to
+                           : edge->caller);
+  if (inline_summary (to)->scc_no
+      && inline_summary (to)->scc_no == inline_summary (edge->callee)->scc_no
+      && !cgraph_edge_recursive_p (edge))
+    hints |= INLINE_HINT_same_scc;
+
+  if (to->symbol.lto_file_data && edge->callee->symbol.lto_file_data
+      && to->symbol.lto_file_data != edge->callee->symbol.lto_file_data)
+    hints |= INLINE_HINT_cross_module;
+
+  return hints;
+}
+
 /* Estimate the time cost for the caller when inlining EDGE.
    Only to be called via estimate_edge_time, that handles the
    caching mechanism.
@@ -3328,7 +3364,6 @@ do_estimate_edge_time (struct cgraph_edge *edge)
   int time;
   int size;
   inline_hints hints;
-  gcov_type ret;
   struct cgraph_node *callee;
   clause_t clause;
   VEC (tree, heap) *known_vals;
@@ -3347,33 +3382,26 @@ do_estimate_edge_time (struct cgraph_edge *edge)
   VEC_free (tree, heap, known_vals);
   VEC_free (tree, heap, known_binfos);
   VEC_free (ipa_agg_jump_function_p, heap, known_aggs);
-
-  ret = RDIV ((gcov_type)time * edge->frequency,
-             CGRAPH_FREQ_BASE);
+  gcc_checking_assert (size >= 0);
+  gcc_checking_assert (time >= 0);
 
   /* When caching, update the cache entry.  */
   if (edge_growth_cache)
     {
-      struct cgraph_node *to = (edge->caller->global.inlined_to
-                               ? edge->caller->global.inlined_to
-                               : edge->caller);
       if ((int)VEC_length (edge_growth_cache_entry, edge_growth_cache)
          <= edge->uid)
        VEC_safe_grow_cleared (edge_growth_cache_entry, heap, edge_growth_cache,
                               cgraph_edge_max_uid);
       VEC_index (edge_growth_cache_entry, edge_growth_cache, edge->uid).time
-       = ret + (ret >= 0);
+       = time + (time >= 0);
 
       VEC_index (edge_growth_cache_entry, edge_growth_cache, edge->uid).size
        = size + (size >= 0);
-      if (inline_summary (to)->scc_no
-         && inline_summary (to)->scc_no == inline_summary (callee)->scc_no
-         && !cgraph_edge_recursive_p (edge))
-       hints |= INLINE_HINT_same_scc;
+      hints |= simple_edge_hints (edge);
       VEC_index (edge_growth_cache_entry, edge_growth_cache, edge->uid).hints
        = hints + 1;
     }
-  return ret;
+  return time;
 }
 
 
@@ -3430,9 +3458,6 @@ do_estimate_edge_hints (struct cgraph_edge *edge)
   VEC (tree, heap) *known_vals;
   VEC (tree, heap) *known_binfos;
   VEC (ipa_agg_jump_function_p, heap) *known_aggs;
-  struct cgraph_node *to = (edge->caller->global.inlined_to
-                           ? edge->caller->global.inlined_to
-                           : edge->caller);
 
   /* When we do caching, use do_estimate_edge_time to populate the entry.  */
 
@@ -3458,10 +3483,7 @@ do_estimate_edge_hints (struct cgraph_edge *edge)
   VEC_free (tree, heap, known_vals);
   VEC_free (tree, heap, known_binfos);
   VEC_free (ipa_agg_jump_function_p, heap, known_aggs);
-  if (inline_summary (to)->scc_no
-      && inline_summary (to)->scc_no == inline_summary (callee)->scc_no
-      && !cgraph_edge_recursive_p (edge))
-    hints |= INLINE_HINT_same_scc;
+  hints |= simple_edge_hints (edge);
   return hints;
 }
 
@@ -3549,10 +3571,11 @@ do_estimate_growth (struct cgraph_node *node)
      return zero or negative growths. */
   if (d.self_recursive)
     d.growth = d.growth < info->size ? info->size : d.growth;
+  else if (DECL_EXTERNAL (node->symbol.decl))
+    ;
   else
     {
-      if (!DECL_EXTERNAL (node->symbol.decl)
-         && cgraph_will_be_removed_from_program_if_no_direct_calls (node))
+      if (cgraph_will_be_removed_from_program_if_no_direct_calls (node))
        d.growth -= info->size;
       /* COMDAT functions are very often not shared across multiple units
         since they come from various template instantiations.
index 2bca2c5fa97e33ed44d2cbd544972df00bfaef7f..b6a69cbbc7c14bf886b2e63d5baebe4f89396a0d 100644 (file)
@@ -456,6 +456,42 @@ want_early_inline_function_p (struct cgraph_edge *e)
   return want_inline;
 }
 
+/* Compute time of the edge->caller + edge->callee execution when inlining
+   does not happen.  */
+
+inline int
+compute_uninlined_call_time (struct inline_summary *callee_info,
+                            struct cgraph_edge *edge)
+{
+  int uninlined_call_time =
+    RDIV ((gcov_type)callee_info->time * MAX (edge->frequency, 1),
+         CGRAPH_FREQ_BASE);
+  int caller_time = inline_summary (edge->caller->global.inlined_to
+                                   ? edge->caller->global.inlined_to
+                                   : edge->caller)->time;
+  return uninlined_call_time + caller_time;
+}
+
+/* Same as compute_uinlined_call_time but compute time when inlining
+   does happen.  */
+
+inline gcov_type
+compute_inlined_call_time (struct cgraph_edge *edge,
+                          int edge_time)
+{
+  int caller_time = inline_summary (edge->caller->global.inlined_to
+                                   ? edge->caller->global.inlined_to
+                                   : edge->caller)->time;
+  int time = caller_time + RDIV ((edge_time - inline_edge_summary (edge)->call_stmt_time)
+                                * MAX (edge->frequency, 1),
+                                CGRAPH_FREQ_BASE);
+  /* Possible one roundoff error, but watch for overflows.  */
+  gcc_checking_assert (time >= INT_MIN / 2);
+  if (time < 0)
+    time = 0;
+  return time;
+}
+
 /* Return true if we are interested in inlining small function.
    When REPORT is true, report reason to dump file.  */
 
@@ -724,31 +760,41 @@ want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
    return true;
 }
 
+#define RELATIVE_TIME_BENEFIT_RANGE (INT_MAX / 64)
 
 /* Return relative time improvement for inlining EDGE in range
-   1...2^9.  */
+   1...RELATIVE_TIME_BENEFIT_RANGE  */
 
 static inline int
 relative_time_benefit (struct inline_summary *callee_info,
                       struct cgraph_edge *edge,
-                      int time_growth)
+                      int edge_time)
 {
   int relbenefit;
-  gcov_type uninlined_call_time;
+  int uninlined_call_time = compute_uninlined_call_time (callee_info, edge);
+  int inlined_call_time = compute_inlined_call_time (edge, edge_time);
+
+  /* Inlining into extern inline function is not a win.  */
+  if (DECL_EXTERNAL (edge->caller->global.inlined_to
+                    ? edge->caller->global.inlined_to->symbol.decl
+                    : edge->caller->symbol.decl))
+    return 1;
+
+  /* Watch overflows.  */
+  gcc_checking_assert (uninlined_call_time >= 0);
+  gcc_checking_assert (inlined_call_time >= 0);
+  gcc_checking_assert (uninlined_call_time >= inlined_call_time);
 
-  uninlined_call_time =
-    ((gcov_type)
-     (callee_info->time
-      + inline_edge_summary (edge)->call_stmt_time) * edge->frequency
-     + CGRAPH_FREQ_BASE / 2) / CGRAPH_FREQ_BASE;
   /* Compute relative time benefit, i.e. how much the call becomes faster.
      ??? perhaps computing how much the caller+calle together become faster
      would lead to more realistic results.  */
   if (!uninlined_call_time)
     uninlined_call_time = 1;
   relbenefit =
-    (uninlined_call_time - time_growth) * 256 / (uninlined_call_time);
-  relbenefit = MIN (relbenefit, 512);
+    RDIV (((gcov_type)uninlined_call_time - inlined_call_time) * RELATIVE_TIME_BENEFIT_RANGE,
+         uninlined_call_time);
+  relbenefit = MIN (relbenefit, RELATIVE_TIME_BENEFIT_RANGE);
+  gcc_checking_assert (relbenefit >= 0);
   relbenefit = MAX (relbenefit, 1);
   return relbenefit;
 }
@@ -764,7 +810,7 @@ static int
 edge_badness (struct cgraph_edge *edge, bool dump)
 {
   gcov_type badness;
-  int growth, time_growth;
+  int growth, edge_time;
   struct cgraph_node *callee = cgraph_function_or_thunk_node (edge->callee,
                                                              NULL);
   struct inline_summary *callee_info = inline_summary (callee);
@@ -774,17 +820,20 @@ edge_badness (struct cgraph_edge *edge, bool dump)
     return INT_MIN;
 
   growth = estimate_edge_growth (edge);
-  time_growth = estimate_edge_time (edge);
+  edge_time = estimate_edge_time (edge);
   hints = estimate_edge_hints (edge);
+  gcc_checking_assert (edge_time >= 0);
+  gcc_checking_assert (edge_time <= callee_info->time);
+  gcc_checking_assert (growth <= callee_info->size);
 
   if (dump)
     {
       fprintf (dump_file, "    Badness calculation for %s -> %s\n",
               xstrdup (cgraph_node_name (edge->caller)),
               xstrdup (cgraph_node_name (callee)));
-      fprintf (dump_file, "      size growth %i, time growth %i ",
+      fprintf (dump_file, "      size growth %i, time %i ",
               growth,
-              time_growth);
+              edge_time);
       dump_inline_hints (dump_file, hints);
       fprintf (dump_file, "\n");
     }
@@ -802,7 +851,7 @@ edge_badness (struct cgraph_edge *edge, bool dump)
 
                relative_edge_count * relative_time_benefit
      goodness = -------------------------------------------
-               edge_growth
+               growth_f_caller
      badness = -goodness  
 
     The fraction is upside down, because on edge counts and time beneits
@@ -810,11 +859,11 @@ edge_badness (struct cgraph_edge *edge, bool dump)
 
   else if (max_count)
     {
-      int relbenefit = relative_time_benefit (callee_info, edge, time_growth);
+      int relbenefit = relative_time_benefit (callee_info, edge, edge_time);
       badness =
        ((int)
-        ((double) edge->count * INT_MIN / 2 / max_count / 512) *
-        relative_time_benefit (callee_info, edge, time_growth)) / growth;
+        ((double) edge->count * INT_MIN / 2 / max_count / RELATIVE_TIME_BENEFIT_RANGE) *
+        relbenefit) / growth;
       
       /* Be sure that insanity of the profile won't lead to increasing counts
         in the scalling and thus to overflow in the computation above.  */
@@ -826,73 +875,53 @@ edge_badness (struct cgraph_edge *edge, bool dump)
                   " * Relative benefit %f\n",
                   (int) badness, (double) badness / INT_MIN,
                   (double) edge->count / max_count,
-                  relbenefit * 100 / 256.0);
+                  relbenefit * 100.0 / RELATIVE_TIME_BENEFIT_RANGE);
        }
     }
 
   /* When function local profile is available. Compute badness as:
-
      
-               growth_of_callee
-     badness = -------------------------------------- + growth_for-all
-              relative_time_benefit * edge_frequency
+                 relative_time_benefit
+     goodness =  ---------------------------------
+                growth_of_caller * overall_growth
 
+     badness = - goodness
+
+     compensated by the inline hints.
   */
   else if (flag_guess_branch_prob)
     {
-      int div = edge->frequency * (1<<10) / CGRAPH_FREQ_MAX;
-
-      div = MAX (div, 1);
-      gcc_checking_assert (edge->frequency <= CGRAPH_FREQ_MAX);
-      div *= relative_time_benefit (callee_info, edge, time_growth);
-
-      /* frequency is normalized in range 1...2^10.
-         relbenefit in range 1...2^9
-        DIV should be in range 1....2^19.  */
-      gcc_checking_assert (div >= 1 && div <= (1<<19));
-
-      /* Result must be integer in range 0...INT_MAX.
-        Set the base of fixed point calculation so we don't lose much of
-        precision for small bandesses (those are interesting) yet we don't
-        overflow for growths that are still in interesting range.
-
-        Fixed point arithmetic with point at 6th bit. */
-      badness = ((gcov_type)growth) * (1<<(19+6));
-      badness = (badness + div / 2) / div;
-
-      /* Overall growth of inlining all calls of function matters: we want to
-        inline so offline copy of function is no longer needed.
-
-        Additionally functions that can be fully inlined without much of
-        effort are better inline candidates than functions that can be fully
-        inlined only after noticeable overall unit growths. The latter
-        are better in a sense compressing of code size by factoring out common
-        code into separate function shared by multiple code paths.
-
-        We might mix the valud into the fraction by taking into account
-        relative growth of the unit, but for now just add the number
-        into resulting fraction.  */
-      if (badness > INT_MAX / 8)
-       {
-         badness = INT_MAX / 8;
-         if (dump)
-           fprintf (dump_file, "Badness overflow\n");
-       }
-      if (hints & (INLINE_HINT_indirect_call
-                  | INLINE_HINT_loop_iterations
-                  | INLINE_HINT_loop_stride))
-       badness /= 8;
+      badness = (relative_time_benefit (callee_info, edge, edge_time)
+                * (INT_MIN / 16 / RELATIVE_TIME_BENEFIT_RANGE));
+      badness /= (growth * MAX (1, callee_info->growth));
+      gcc_checking_assert (badness <=0 && badness >= INT_MIN / 16);
+      if ((hints & (INLINE_HINT_indirect_call
+                   | INLINE_HINT_loop_iterations
+                   | INLINE_HINT_loop_stride))
+         || callee_info->growth <= 0)
+       badness *= 8;
       if (hints & (INLINE_HINT_same_scc))
-       badness *= 4;
-      if (hints & (INLINE_HINT_in_scc))
-       badness *= 2;
+       badness /= 16;
+      else if (hints & (INLINE_HINT_in_scc))
+       badness /= 8;
+      else if (hints & (INLINE_HINT_cross_module))
+       badness /= 2;
+      gcc_checking_assert (badness <= 0 && badness >= INT_MIN / 2);
+      if ((hints & INLINE_HINT_declared_inline) && badness >= INT_MIN / 32)
+       badness *= 16;
       if (dump)
        {
          fprintf (dump_file,
                   "      %i: guessed profile. frequency %f,"
-                  " benefit %f%%, divisor %i\n",
+                  " benefit %f%%, time w/o inlining %i, time w inlining %i"
+                  " overall growth %i (current) %i (original)\n",
                   (int) badness, (double)edge->frequency / CGRAPH_FREQ_BASE,
-                  relative_time_benefit (callee_info, edge, time_growth) * 100 / 256.0, div);
+                  relative_time_benefit (callee_info, edge, edge_time) * 100.0
+                  / RELATIVE_TIME_BENEFIT_RANGE, 
+                  compute_uninlined_call_time (callee_info, edge),
+                  (int)compute_inlined_call_time (edge, edge_time),
+                  estimate_growth (callee),
+                  callee_info->growth);
        }
     }
   /* When function local profile is not available or it does not give
@@ -1371,6 +1400,7 @@ inline_small_functions (void)
 
            if (!DECL_EXTERNAL (node->symbol.decl))
              initial_size += info->size;
+           info->growth = estimate_growth (node);
            if (dfs && dfs->next_cycle)
              {
                struct cgraph_node *n2;
index ca59a7f70998ebb7258178e1257c4af7acdb0aa7..fb055c49c5fbb3138835d6b68af6c3c37d89a99a 100644 (file)
@@ -49,7 +49,9 @@ enum inline_hints_vals {
   INLINE_HINT_loop_iterations = 2,
   INLINE_HINT_loop_stride = 4,
   INLINE_HINT_same_scc = 8,
-  INLINE_HINT_in_scc = 16
+  INLINE_HINT_in_scc = 16,
+  INLINE_HINT_declared_inline = 32,
+  INLINE_HINT_cross_module = 64
 };
 typedef int inline_hints;
 
@@ -129,6 +131,12 @@ struct GTY(()) inline_summary
   /* Predicate on when some loop in the function becomes to have known
      stride.   */
   struct predicate * GTY((skip)) loop_stride;
+  /* Estimated growth for inlining all copies of the function before start
+     of small functions inlining.
+     This value will get out of date as the callers are duplicated, but
+     using up-to-date value in the badness metric mean a lot of extra
+     expenses.  */
+  int growth;
   /* Number of SCC on the beggining of inlining process.  */
   int scc_no;
 };