omp-builtins.def (BUILT_IN_GOMP_LOOP_NONMONOTONIC_RUNTIME_START, [...]): Fix up funct...
[gcc.git] / gcc / ipa-inline-transform.c
index 52493cc4d0017f782a7ce24913e0d80a5b973b38..0e7499852911e43794c6562a3f99b22b8f2a0aec 100644 (file)
@@ -1,5 +1,5 @@
 /* Callgraph transformations to handle inlining
-   Copyright (C) 2003-2015 Free Software Foundation, Inc.
+   Copyright (C) 2003-2018 Free Software Foundation, Inc.
    Contributed by Jan Hubicka
 
 This file is part of GCC.
@@ -32,65 +32,44 @@ along with GCC; see the file COPYING3.  If not see
 #include "system.h"
 #include "coretypes.h"
 #include "tm.h"
-#include "hash-set.h"
-#include "machmode.h"
-#include "vec.h"
-#include "double-int.h"
-#include "input.h"
-#include "alias.h"
-#include "symtab.h"
-#include "wide-int.h"
-#include "inchash.h"
-#include "tree.h"
-#include "langhooks.h"
-#include "intl.h"
-#include "coverage.h"
-#include "ggc.h"
-#include "tree-cfg.h"
-#include "hash-map.h"
-#include "is-a.h"
-#include "plugin-api.h"
-#include "hard-reg-set.h"
-#include "input.h"
 #include "function.h"
-#include "ipa-ref.h"
-#include "cgraph.h"
+#include "tree.h"
 #include "alloc-pool.h"
+#include "tree-pass.h"
+#include "cgraph.h"
+#include "tree-cfg.h"
 #include "symbol-summary.h"
+#include "tree-vrp.h"
 #include "ipa-prop.h"
+#include "ipa-fnsummary.h"
 #include "ipa-inline.h"
 #include "tree-inline.h"
-#include "tree-pass.h"
+#include "function.h"
+#include "cfg.h"
+#include "basic-block.h"
 
 int ncalls_inlined;
 int nfunctions_inlined;
-bool speculation_removed;
 
-/* Scale frequency of NODE edges by FREQ_SCALE.  */
+/* Scale counts of NODE edges by NUM/DEN.  */
 
 static void
-update_noncloned_frequencies (struct cgraph_node *node,
-                             int freq_scale)
+update_noncloned_counts (struct cgraph_node *node, 
+                        profile_count num, profile_count den)
 {
   struct cgraph_edge *e;
 
-  /* We do not want to ignore high loop nest after freq drops to 0.  */
-  if (!freq_scale)
-    freq_scale = 1;
+  profile_count::adjust_for_ipa_scaling (&num, &den);
+
   for (e = node->callees; e; e = e->next_callee)
     {
-      e->frequency = e->frequency * (gcov_type) freq_scale / CGRAPH_FREQ_BASE;
-      if (e->frequency > CGRAPH_FREQ_MAX)
-        e->frequency = CGRAPH_FREQ_MAX;
       if (!e->inline_failed)
-        update_noncloned_frequencies (e->callee, freq_scale);
+        update_noncloned_counts (e->callee, num, den);
+      e->count = e->count.apply_scale (num, den);
     }
   for (e = node->indirect_calls; e; e = e->next_callee)
-    {
-      e->frequency = e->frequency * (gcov_type) freq_scale / CGRAPH_FREQ_BASE;
-      if (e->frequency > CGRAPH_FREQ_MAX)
-        e->frequency = CGRAPH_FREQ_MAX;
-    }
+    e->count = e->count.apply_scale (num, den);
+  node->count = node->count.apply_scale (num, den);
 }
 
 /* We removed or are going to remove the last call to NODE.
@@ -112,9 +91,12 @@ can_remove_node_now_p_1 (struct cgraph_node *node, struct cgraph_edge *e)
     }
   /* FIXME: When address is taken of DECL_EXTERNAL function we still
      can remove its offline copy, but we would need to keep unanalyzed node in
-     the callgraph so references can point to it.  */
+     the callgraph so references can point to it.
+
+     Also for comdat group we can ignore references inside a group as we
+     want to prove the group as a whole to be dead.  */
   return (!node->address_taken
-         && node->can_remove_if_no_direct_calls_p ()
+         && node->can_remove_if_no_direct_calls_and_refs_p ()
          /* Inlining might enable more devirtualizing, so we want to remove
             those only after all devirtualizable virtual calls are processed.
             Lacking may edges in callgraph we just preserve them post
@@ -175,12 +157,11 @@ master_clone_with_noninline_clones_p (struct cgraph_node *node)
    By default the offline copy is removed, when it appears dead after inlining.
    UPDATE_ORIGINAL prevents this transformation.
    If OVERALL_SIZE is non-NULL, the size is updated to reflect the
-   transformation.
-   FREQ_SCALE specify the scaling of frequencies of call sites.  */
+   transformation.  */
 
 void
 clone_inlined_nodes (struct cgraph_edge *e, bool duplicate,
-                    bool update_original, int *overall_size, int freq_scale)
+                    bool update_original, int *overall_size)
 {
   struct cgraph_node *inlining_into;
   struct cgraph_edge *next;
@@ -213,28 +194,28 @@ clone_inlined_nodes (struct cgraph_edge *e, bool duplicate,
             For now we keep the ohter functions in the group in program until
             cgraph_remove_unreachable_functions gets rid of them.  */
          gcc_assert (!e->callee->global.inlined_to);
-         e->callee->dissolve_same_comdat_group_list ();
+         e->callee->remove_from_same_comdat_group ();
          if (e->callee->definition
              && inline_account_function_p (e->callee))
            {
              gcc_assert (!e->callee->alias);
              if (overall_size)
-               *overall_size -= inline_summaries->get (e->callee)->size;
+               *overall_size -= ipa_fn_summaries->get (e->callee)->size;
              nfunctions_inlined++;
            }
          duplicate = false;
          e->callee->externally_visible = false;
-          update_noncloned_frequencies (e->callee, e->frequency);
+          update_noncloned_counts (e->callee, e->count, e->callee->count);
+
+         dump_callgraph_transformation (e->callee, inlining_into,
+                                        "inlining to");
        }
       else
        {
          struct cgraph_node *n;
 
-         if (freq_scale == -1)
-           freq_scale = e->frequency;
          n = e->callee->create_clone (e->callee->decl,
-                                      MIN (e->count, e->callee->count),
-                                      freq_scale,
+                                      e->count,
                                       update_original, vNULL, true,
                                       inlining_into,
                                       NULL);
@@ -243,7 +224,7 @@ clone_inlined_nodes (struct cgraph_edge *e, bool duplicate,
        }
     }
   else
-    e->callee->dissolve_same_comdat_group_list ();
+    e->callee->remove_from_same_comdat_group ();
 
   e->callee->global.inlined_to = inlining_into;
 
@@ -252,13 +233,30 @@ clone_inlined_nodes (struct cgraph_edge *e, bool duplicate,
     {
       next = e->next_callee;
       if (!e->inline_failed)
-        clone_inlined_nodes (e, duplicate, update_original, overall_size, freq_scale);
+        clone_inlined_nodes (e, duplicate, update_original, overall_size);
+    }
+}
+
+/* Check all speculations in N and resolve them if they seems useless. */
+
+static bool
+check_speculations (cgraph_node *n)
+{
+  bool speculation_removed = false;
+  cgraph_edge *next;
+
+  for (cgraph_edge *e = n->callees; e; e = next)
+    {
+      next = e->next_callee;
       if (e->speculative && !speculation_useful_p (e, true))
        {
          e->resolve_speculation (NULL);
          speculation_removed = true;
        }
+      else if (!e->inline_failed)
+       speculation_removed |= check_speculations (e->callee);
     }
+  return speculation_removed;
 }
 
 /* Mark all call graph edges coming out of NODE and all nodes that have been
@@ -271,7 +269,7 @@ mark_all_inlined_calls_cdtor (cgraph_node *node)
     {
       cs->in_polymorphic_cdtor = true;
       if (!cs->inline_failed)
-    mark_all_inlined_calls_cdtor (cs->callee);
+       mark_all_inlined_calls_cdtor (cs->callee);
     }
   for (cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee)
     cs->in_polymorphic_cdtor = true;
@@ -283,7 +281,7 @@ mark_all_inlined_calls_cdtor (cgraph_node *node)
    indirect edges are discovered in the process, add them to NEW_EDGES, unless
    it is NULL. If UPDATE_OVERALL_SUMMARY is false, do not bother to recompute overall
    size of caller after inlining. Caller is required to eventually do it via
-   inline_update_overall_summary.
+   ipa_update_overall_fn_summary.
    If callee_removed is non-NULL, set it to true if we removed callee node.
 
    Return true iff any new callgraph edges were discovered as a
@@ -301,23 +299,123 @@ inline_call (struct cgraph_edge *e, bool update_original,
   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
   bool new_edges_found = false;
 
-#ifdef ENABLE_CHECKING
-  int estimated_growth = estimate_edge_growth (e);
+  int estimated_growth = 0;
+  if (! update_overall_summary)
+    estimated_growth = estimate_edge_growth (e);
+  /* This is used only for assert bellow.  */
+#if 0
   bool predicated = inline_edge_summary (e)->predicate != NULL;
 #endif
 
-  speculation_removed = false;
   /* Don't inline inlined edges.  */
   gcc_assert (e->inline_failed);
   /* Don't even think of inlining inline clone.  */
   gcc_assert (!callee->global.inlined_to);
 
-  e->inline_failed = CIF_OK;
-  DECL_POSSIBLY_INLINED (callee->decl) = true;
-
   to = e->caller;
   if (to->global.inlined_to)
     to = to->global.inlined_to;
+  if (to->thunk.thunk_p)
+    {
+      struct cgraph_node *target = to->callees->callee;
+      if (in_lto_p)
+       to->get_untransformed_body ();
+      to->expand_thunk (false, true);
+      /* When thunk is instrumented we may have multiple callees.  */
+      for (e = to->callees; e && e->callee != target; e = e->next_callee)
+       ;
+      gcc_assert (e);
+    }
+
+
+  e->inline_failed = CIF_OK;
+  DECL_POSSIBLY_INLINED (callee->decl) = true;
+
+  if (DECL_FUNCTION_PERSONALITY (callee->decl))
+    DECL_FUNCTION_PERSONALITY (to->decl)
+      = DECL_FUNCTION_PERSONALITY (callee->decl);
+
+  bool reload_optimization_node = false;
+  if (!opt_for_fn (callee->decl, flag_strict_aliasing)
+      && opt_for_fn (to->decl, flag_strict_aliasing))
+    {
+      struct gcc_options opts = global_options;
+
+      cl_optimization_restore (&opts, opts_for_fn (to->decl));
+      opts.x_flag_strict_aliasing = false;
+      if (dump_file)
+       fprintf (dump_file, "Dropping flag_strict_aliasing on %s\n",
+                to->dump_name ());
+      DECL_FUNCTION_SPECIFIC_OPTIMIZATION (to->decl)
+        = build_optimization_node (&opts);
+      reload_optimization_node = true;
+    }
+
+  ipa_fn_summary *caller_info = ipa_fn_summaries->get (to);
+  ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
+  if (!caller_info->fp_expressions && callee_info->fp_expressions)
+    {
+      caller_info->fp_expressions = true;
+      if (opt_for_fn (callee->decl, flag_rounding_math)
+         != opt_for_fn (to->decl, flag_rounding_math)
+         || opt_for_fn (callee->decl, flag_trapping_math)
+            != opt_for_fn (to->decl, flag_trapping_math)
+         || opt_for_fn (callee->decl, flag_unsafe_math_optimizations)
+            != opt_for_fn (to->decl, flag_unsafe_math_optimizations)
+         || opt_for_fn (callee->decl, flag_finite_math_only)
+            != opt_for_fn (to->decl, flag_finite_math_only)
+         || opt_for_fn (callee->decl, flag_signaling_nans)
+            != opt_for_fn (to->decl, flag_signaling_nans)
+         || opt_for_fn (callee->decl, flag_cx_limited_range)
+            != opt_for_fn (to->decl, flag_cx_limited_range)
+         || opt_for_fn (callee->decl, flag_signed_zeros)
+            != opt_for_fn (to->decl, flag_signed_zeros)
+         || opt_for_fn (callee->decl, flag_associative_math)
+            != opt_for_fn (to->decl, flag_associative_math)
+         || opt_for_fn (callee->decl, flag_reciprocal_math)
+            != opt_for_fn (to->decl, flag_reciprocal_math)
+         || opt_for_fn (callee->decl, flag_fp_int_builtin_inexact)
+            != opt_for_fn (to->decl, flag_fp_int_builtin_inexact)
+         || opt_for_fn (callee->decl, flag_errno_math)
+            != opt_for_fn (to->decl, flag_errno_math))
+       {
+         struct gcc_options opts = global_options;
+
+         cl_optimization_restore (&opts, opts_for_fn (to->decl));
+         opts.x_flag_rounding_math
+           = opt_for_fn (callee->decl, flag_rounding_math);
+         opts.x_flag_trapping_math
+           = opt_for_fn (callee->decl, flag_trapping_math);
+         opts.x_flag_unsafe_math_optimizations
+           = opt_for_fn (callee->decl, flag_unsafe_math_optimizations);
+         opts.x_flag_finite_math_only
+           = opt_for_fn (callee->decl, flag_finite_math_only);
+         opts.x_flag_signaling_nans
+           = opt_for_fn (callee->decl, flag_signaling_nans);
+         opts.x_flag_cx_limited_range
+           = opt_for_fn (callee->decl, flag_cx_limited_range);
+         opts.x_flag_signed_zeros
+           = opt_for_fn (callee->decl, flag_signed_zeros);
+         opts.x_flag_associative_math
+           = opt_for_fn (callee->decl, flag_associative_math);
+         opts.x_flag_reciprocal_math
+           = opt_for_fn (callee->decl, flag_reciprocal_math);
+         opts.x_flag_fp_int_builtin_inexact
+           = opt_for_fn (callee->decl, flag_fp_int_builtin_inexact);
+         opts.x_flag_errno_math
+           = opt_for_fn (callee->decl, flag_errno_math);
+         if (dump_file)
+           fprintf (dump_file, "Copying FP flags from %s to %s\n",
+                    callee->dump_name (), to->dump_name ());
+         DECL_FUNCTION_SPECIFIC_OPTIMIZATION (to->decl)
+            = build_optimization_node (&opts);
+         reload_optimization_node = true;
+       }
+    }
+
+  /* Reload global optimization flags.  */
+  if (reload_optimization_node && DECL_STRUCT_FUNCTION (to->decl) == cfun)
+    set_cfun (cfun, true);
 
   /* If aliases are involved, redirect edge to the actual destination and
      possibly remove the aliases.  */
@@ -342,19 +440,26 @@ inline_call (struct cgraph_edge *e, bool update_original,
        }
     }
 
-  clone_inlined_nodes (e, true, update_original, overall_size, e->frequency);
+  clone_inlined_nodes (e, true, update_original, overall_size);
 
   gcc_assert (curr->callee->global.inlined_to == to);
 
-  old_size = inline_summaries->get (to)->size;
-  inline_merge_summary (e);
+  old_size = ipa_fn_summaries->get (to)->size;
+  ipa_merge_fn_summary_after_inlining (e);
   if (e->in_polymorphic_cdtor)
     mark_all_inlined_calls_cdtor (e->callee);
   if (opt_for_fn (e->caller->decl, optimize))
     new_edges_found = ipa_propagate_indirect_call_infos (curr, new_edges);
+  check_speculations (e->callee);
   if (update_overall_summary)
-   inline_update_overall_summary (to);
-  new_size = inline_summaries->get (to)->size;
+    ipa_update_overall_fn_summary (to);
+  else
+    /* Update self size by the estimate so overall function growth limits
+       work for further inlining into this function.  Before inlining
+       the function we inlined to again we expect the caller to update
+       the overall summary.  */
+    ipa_fn_summaries->get (to)->size += estimated_growth;
+  new_size = ipa_fn_summaries->get (to)->size;
 
   if (callee->calls_comdat_local)
     to->calls_comdat_local = true;
@@ -368,9 +473,12 @@ inline_call (struct cgraph_edge *e, bool update_original,
        to->calls_comdat_local = false;
     }
 
-#ifdef ENABLE_CHECKING
+  /* FIXME: This assert suffers from roundoff errors, disable it for GCC 5
+     and revisit it after conversion to sreals in GCC 6.
+     See PR 65654.  */
+#if 0
   /* Verify that estimated growth match real growth.  Allow off-by-one
-     error due to INLINE_SIZE_SCALE roudoff errors.  */
+     error due to ipa_fn_summary::size_scale roudoff errors.  */
   gcc_assert (!update_overall_summary || !overall_size || new_edges_found
              || abs (estimated_growth - (new_size - old_size)) <= 1
              || speculation_removed
@@ -385,7 +493,7 @@ inline_call (struct cgraph_edge *e, bool update_original,
     *overall_size += new_size - old_size;
   ncalls_inlined++;
 
-  /* This must happen after inline_merge_summary that rely on jump
+  /* This must happen after ipa_merge_fn_summary_after_inlining that rely on jump
      functions of callee to not be updated.  */
   return new_edges_found;
 }
@@ -411,6 +519,22 @@ save_inline_function_body (struct cgraph_node *node)
 
   /* first_clone will be turned into real function.  */
   first_clone = node->clones;
+
+  /* Arrange first clone to not be thunk as those do not have bodies.  */
+  if (first_clone->thunk.thunk_p)
+    {
+      while (first_clone->thunk.thunk_p)
+        first_clone = first_clone->next_sibling_clone;
+      first_clone->prev_sibling_clone->next_sibling_clone
+       = first_clone->next_sibling_clone;
+      if (first_clone->next_sibling_clone)
+       first_clone->next_sibling_clone->prev_sibling_clone
+          = first_clone->prev_sibling_clone;
+      first_clone->next_sibling_clone = node->clones;
+      first_clone->prev_sibling_clone = NULL;
+      node->clones->prev_sibling_clone = first_clone;
+      node->clones = first_clone;
+    }
   first_clone->decl = copy_node (node->decl);
   first_clone->decl->decl_with_vis.symtab_node = first_clone;
   gcc_assert (first_clone == cgraph_node::get (first_clone->decl));
@@ -419,7 +543,8 @@ save_inline_function_body (struct cgraph_node *node)
      first_clone.  */
   if (first_clone->next_sibling_clone)
     {
-      for (n = first_clone->next_sibling_clone; n->next_sibling_clone; n = n->next_sibling_clone)
+      for (n = first_clone->next_sibling_clone; n->next_sibling_clone;
+          n = n->next_sibling_clone)
         n->clone_of = first_clone;
       n->clone_of = first_clone;
       n->next_sibling_clone = first_clone->clones;
@@ -478,10 +603,9 @@ save_inline_function_body (struct cgraph_node *node)
       first_clone->remove_symbol_and_inline_clones ();
       first_clone = NULL;
     }
-#ifdef ENABLE_CHECKING
-  else
+  else if (flag_checking)
     first_clone->verify ();
-#endif
+
   return first_clone;
 }
 
@@ -493,9 +617,10 @@ preserve_function_body_p (struct cgraph_node *node)
   gcc_assert (symtab->global_info_ready);
   gcc_assert (!node->alias && !node->thunk.thunk_p);
 
-  /* Look if there is any clone around.  */
-  if (node->clones)
-    return true;
+  /* Look if there is any non-thunk clone around.  */
+  for (node = node->clones; node; node = node->next_sibling_clone)
+    if (!node->thunk.thunk_p)
+      return true;
   return false;
 }
 
@@ -529,7 +654,33 @@ inline_transform (struct cgraph_node *node)
 
   timevar_push (TV_INTEGRATION);
   if (node->callees && (opt_for_fn (node->decl, optimize) || has_inline))
-    todo = optimize_inline_calls (current_function_decl);
+    {
+      profile_count num = node->count;
+      profile_count den = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
+      bool scale = num.initialized_p () && !(num == den);
+      if (scale)
+       {
+         profile_count::adjust_for_ipa_scaling (&num, &den);
+         if (dump_file)
+           {
+             fprintf (dump_file, "Applying count scale ");
+             num.dump (dump_file);
+             fprintf (dump_file, "/");
+             den.dump (dump_file);
+             fprintf (dump_file, "\n");
+           }
+
+         basic_block bb;
+         cfun->cfg->count_max = profile_count::uninitialized ();
+         FOR_ALL_BB_FN (bb, cfun)
+           {
+             bb->count = bb->count.apply_scale (num, den);
+             cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count);
+           }
+         ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = node->count;
+       }
+      todo = optimize_inline_calls (current_function_decl);
+   }
   timevar_pop (TV_INTEGRATION);
 
   cfun->always_inline_functions_inlined = true;