re PR target/65697 (__atomic memory barriers not strong enough for __sync builtins)
[gcc.git] / gcc / ipa-cp.c
index 0529f174615e3404e0f14ac49de636d2a077ad09..02b027d0c52796934d8b6caffaa3e9d2b25aad54 100644 (file)
@@ -1,5 +1,5 @@
 /* Interprocedural constant propagation
-   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+   Copyright (C) 2005-2015 Free Software Foundation, Inc.
 
    Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
    <mjambor@suse.cz>
@@ -103,26 +103,22 @@ along with GCC; see the file COPYING3.  If not see
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
+#include "alias.h"
+#include "symtab.h"
+#include "options.h"
 #include "tree.h"
+#include "fold-const.h"
 #include "gimple-fold.h"
 #include "gimple-expr.h"
 #include "target.h"
 #include "predict.h"
 #include "basic-block.h"
-#include "vec.h"
-#include "hash-map.h"
-#include "is-a.h"
-#include "plugin-api.h"
-#include "hashtab.h"
-#include "hash-set.h"
-#include "machmode.h"
 #include "tm.h"
 #include "hard-reg-set.h"
-#include "input.h"
 #include "function.h"
-#include "ipa-ref.h"
 #include "cgraph.h"
 #include "alloc-pool.h"
+#include "symbol-summary.h"
 #include "ipa-prop.h"
 #include "bitmap.h"
 #include "tree-pass.h"
@@ -262,6 +258,9 @@ public:
   ipcp_lattice<ipa_polymorphic_call_context> ctxlat;
   /* Lattices describing aggregate parts.  */
   ipcp_agg_lattice *aggs;
+  /* Alignment information.  Very basic one value lattice where !known means
+     TOP and zero alignment bottom.  */
+  ipa_alignment alignment;
   /* Number of aggregate lattices */
   int aggs_count;
   /* True if aggregate data were passed by reference (as opposed to by
@@ -280,10 +279,17 @@ public:
 
 /* Allocation pools for values and their sources in ipa-cp.  */
 
-alloc_pool ipcp_cst_values_pool;
-alloc_pool ipcp_poly_ctx_values_pool;
-alloc_pool ipcp_sources_pool;
-alloc_pool ipcp_agg_lattice_pool;
+pool_allocator<ipcp_value<tree> > ipcp_cst_values_pool
+  ("IPA-CP constant values", 32);
+
+pool_allocator<ipcp_value<ipa_polymorphic_call_context> >
+  ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts", 32);
+
+pool_allocator<ipcp_value_source<tree> > ipcp_sources_pool
+  ("IPA-CP value sources", 64);
+
+pool_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool
+  ("IPA_CP aggregate lattices", 32);
 
 /* Maximal count found in program.  */
 
@@ -444,6 +450,13 @@ print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits)
          plats->itself.print (f, dump_sources, dump_benefits);
          fprintf (f, "         ctxs: ");
          plats->ctxlat.print (f, dump_sources, dump_benefits);
+         if (plats->alignment.known && plats->alignment.align > 0)
+           fprintf (f, "         Alignment %u, misalignment %u\n",
+                    plats->alignment.align, plats->alignment.misalign);
+         else if (plats->alignment.known)
+           fprintf (f, "         Alignment unusable\n");
+         else
+           fprintf (f, "         Alignment unknown\n");
          if (plats->virt_call)
            fprintf (f, "        virt_call flag set\n");
 
@@ -542,10 +555,7 @@ gather_caller_stats (struct cgraph_node *node, void *data)
   struct cgraph_edge *cs;
 
   for (cs = node->callers; cs; cs = cs->next_caller)
-    if (cs->caller->thunk.thunk_p)
-      cs->caller->call_for_symbol_thunks_and_aliases (gather_caller_stats,
-                                                   stats, false);
-    else
+    if (!cs->caller->thunk.thunk_p)
       {
        stats->count_sum += cs->count;
        stats->freq_sum += cs->frequency;
@@ -566,7 +576,7 @@ ipcp_cloning_candidate_p (struct cgraph_node *node)
 
   gcc_checking_assert (node->has_gimple_body_p ());
 
-  if (!flag_ipa_cp_clone)
+  if (!opt_for_fn (node->decl, flag_ipa_cp_clone))
     {
       if (dump_file)
         fprintf (dump_file, "Not considering %s for cloning; "
@@ -587,7 +597,7 @@ ipcp_cloning_candidate_p (struct cgraph_node *node)
   init_caller_stats (&stats);
   node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false);
 
-  if (inline_summary (node)->self_size < stats.n_calls)
+  if (inline_summaries->get (node)->self_size < stats.n_calls)
     {
       if (dump_file)
         fprintf (dump_file, "Considering %s for cloning; code might shrink.\n",
@@ -761,6 +771,27 @@ set_agg_lats_contain_variable (struct ipcp_param_lattices *plats)
   return ret;
 }
 
+/* Return true if alignment information in PLATS is known to be unusable.  */
+
+static inline bool
+alignment_bottom_p (ipcp_param_lattices *plats)
+{
+  return plats->alignment.known && (plats->alignment.align == 0);
+}
+
+/* Set alignment information in PLATS to unusable.  Return true if it
+   previously was usable or unknown.  */
+
+static inline bool
+set_alignment_to_bottom (ipcp_param_lattices *plats)
+{
+  if (alignment_bottom_p (plats))
+    return false;
+  plats->alignment.known = true;
+  plats->alignment.align = 0;
+  return true;
+}
+
 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
    return true is any of them has not been marked as such so far.  */
 
@@ -771,9 +802,44 @@ set_all_contains_variable (struct ipcp_param_lattices *plats)
   ret = plats->itself.set_contains_variable ();
   ret |= plats->ctxlat.set_contains_variable ();
   ret |= set_agg_lats_contain_variable (plats);
+  ret |= set_alignment_to_bottom (plats);
   return ret;
 }
 
+/* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
+   points to by the number of callers to NODE.  */
+
+static bool
+count_callers (cgraph_node *node, void *data)
+{
+  int *caller_count = (int *) data;
+
+  for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
+    /* Local thunks can be handled transparently, but if the thunk can not
+       be optimized out, count it as a real use.  */
+    if (!cs->caller->thunk.thunk_p || !cs->caller->local.local)
+      ++*caller_count;
+  return false;
+}
+
+/* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
+   the one caller of some other node.  Set the caller's corresponding flag.  */
+
+static bool
+set_single_call_flag (cgraph_node *node, void *)
+{
+  cgraph_edge *cs = node->callers;
+  /* Local thunks can be handled transparently, skip them.  */
+  while (cs && cs->caller->thunk.thunk_p && cs->caller->local.local)
+    cs = cs->next_caller;
+  if (cs)
+    {
+      IPA_NODE_REF (cs->caller)->node_calling_single_call = true;
+      return true;
+    }
+  return false;
+}
+
 /* Initialize ipcp_lattices.  */
 
 static void
@@ -785,7 +851,17 @@ initialize_node_lattices (struct cgraph_node *node)
   int i;
 
   gcc_checking_assert (node->has_gimple_body_p ());
-  if (!cgraph_local_p (node))
+  if (cgraph_local_p (node))
+    {
+      int caller_count = 0;
+      node->call_for_symbol_thunks_and_aliases (count_callers, &caller_count,
+                                               true);
+      gcc_checking_assert (caller_count > 0);
+      if (caller_count == 1)
+       node->call_for_symbol_thunks_and_aliases (set_single_call_flag,
+                                                 NULL, true);
+    }
+  else
     {
       /* When cloning is allowed, we can assume that externally visible
         functions are not called.  We will compensate this by cloning
@@ -807,6 +883,7 @@ initialize_node_lattices (struct cgraph_node *node)
              plats->itself.set_to_bottom ();
              plats->ctxlat.set_to_bottom ();
              set_agg_lats_to_bottom (plats);
+             set_alignment_to_bottom (plats);
            }
          else
            set_all_contains_variable (plats);
@@ -901,11 +978,9 @@ ipa_value_from_jfunc (struct ipa_node_params *info, struct ipa_jump_func *jfunc)
        {
          ipcp_lattice<tree> *lat;
 
-         if (!info->lattices)
-           {
-             gcc_checking_assert (!flag_ipa_cp);
-             return NULL_TREE;
-           }
+         if (!info->lattices
+             || idx >= ipa_get_param_count (info))
+           return NULL_TREE;
          lat = ipa_get_scalar_lat (info, idx);
          if (!lat->is_single_const ())
            return NULL_TREE;
@@ -946,17 +1021,17 @@ ipa_context_from_jfunc (ipa_node_params *info, cgraph_edge *cs, int csidx,
     {
       ipa_polymorphic_call_context srcctx;
       int srcidx;
+      bool type_preserved = true;
       if (jfunc->type == IPA_JF_PASS_THROUGH)
        {
-         if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR
-             || !ipa_get_jf_pass_through_type_preserved (jfunc))
+         if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
            return ctx;
+         type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
          srcidx = ipa_get_jf_pass_through_formal_id (jfunc);
        }
       else
        {
-         if (!ipa_get_jf_ancestor_type_preserved (jfunc))
-           return ctx;
+         type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
          srcidx = ipa_get_jf_ancestor_formal_id (jfunc);
        }
       if (info->ipcp_orig_node)
@@ -966,11 +1041,9 @@ ipa_context_from_jfunc (ipa_node_params *info, cgraph_edge *cs, int csidx,
        }
       else
        {
-         if (!info->lattices)
-           {
-             gcc_checking_assert (!flag_ipa_cp);
-             return ctx;
-           }
+         if (!info->lattices
+             || srcidx >= ipa_get_param_count (info))
+           return ctx;
          ipcp_lattice<ipa_polymorphic_call_context> *lat;
          lat = ipa_get_poly_ctx_lat (info, srcidx);
          if (!lat->is_single_const ())
@@ -981,7 +1054,10 @@ ipa_context_from_jfunc (ipa_node_params *info, cgraph_edge *cs, int csidx,
        return ctx;
       if (jfunc->type == IPA_JF_ANCESTOR)
        srcctx.offset_by (ipa_get_jf_ancestor_offset (jfunc));
-      ctx.combine_with (srcctx);
+      if (!type_preserved)
+       srcctx.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
+      srcctx.combine_with (ctx);
+      return srcctx;
     }
 
   return ctx;
@@ -1066,7 +1142,7 @@ ipcp_value<valtype>::add_source (cgraph_edge *cs, ipcp_value *src_val,
 {
   ipcp_value_source<valtype> *src;
 
-  src = new (pool_alloc (ipcp_sources_pool)) ipcp_value_source<valtype>;
+  src = new (ipcp_sources_pool.allocate ()) ipcp_value_source<valtype>;
   src->offset = offset;
   src->cs = cs;
   src->val = src_val;
@@ -1084,7 +1160,7 @@ allocate_and_init_ipcp_value (tree source)
 {
   ipcp_value<tree> *val;
 
-  val = new (pool_alloc (ipcp_cst_values_pool)) ipcp_value<tree>;
+  val = ipcp_cst_values_pool.allocate ();
   memset (val, 0, sizeof (*val));
   val->value = source;
   return val;
@@ -1098,8 +1174,8 @@ allocate_and_init_ipcp_value (ipa_polymorphic_call_context source)
 {
   ipcp_value<ipa_polymorphic_call_context> *val;
 
-  val = new (pool_alloc (ipcp_poly_ctx_values_pool))
-    ipcp_value<ipa_polymorphic_call_context>;
+  // TODO
+  val = ipcp_poly_ctx_values_pool.allocate ();
   memset (val, 0, sizeof (*val));
   val->value = source;
   return val;
@@ -1148,7 +1224,7 @@ ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
            {
              ipcp_value_source<valtype> *src = val->sources;
              val->sources = src->next;
-             pool_free (ipcp_sources_pool, src);
+             ipcp_sources_pool.remove ((ipcp_value_source<tree>*)src);
            }
        }
 
@@ -1298,15 +1374,13 @@ propagate_context_accross_jump_function (cgraph_edge *cs,
     return false;
   bool ret = false;
   bool added_sth = false;
+  bool type_preserved = true;
 
   ipa_polymorphic_call_context edge_ctx, *edge_ctx_ptr
     = ipa_get_ith_polymorhic_call_context (args, idx);
 
   if (edge_ctx_ptr)
-    {
-      edge_ctx = *edge_ctx_ptr;
-      edge_ctx.clear_speculation ();
-    }
+    edge_ctx = *edge_ctx_ptr;
 
   if (jfunc->type == IPA_JF_PASS_THROUGH
       || jfunc->type == IPA_JF_ANCESTOR)
@@ -1320,15 +1394,14 @@ propagate_context_accross_jump_function (cgraph_edge *cs,
         not set instead of punting.  */
       if (jfunc->type == IPA_JF_PASS_THROUGH)
        {
-         if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR
-             || !ipa_get_jf_pass_through_type_preserved (jfunc))
+         if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
            goto prop_fail;
+         type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
          src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
        }
       else
        {
-         if (!ipa_get_jf_ancestor_type_preserved (jfunc))
-           goto prop_fail;
+         type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
          src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
        }
 
@@ -1338,21 +1411,24 @@ propagate_context_accross_jump_function (cgraph_edge *cs,
          && (src_lat->contains_variable
              || (src_lat->values_count > 1)))
        goto prop_fail;
-      if (src_lat->contains_variable)
-         ret |= dest_lat->set_contains_variable ();
 
       ipcp_value<ipa_polymorphic_call_context> *src_val;
       for (src_val = src_lat->values; src_val; src_val = src_val->next)
        {
          ipa_polymorphic_call_context cur = src_val->value;
+
+         if (!type_preserved)
+           cur.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
          if (jfunc->type == IPA_JF_ANCESTOR)
            cur.offset_by (ipa_get_jf_ancestor_offset (jfunc));
-         /* TODO: Perhaps attempt to look up some used OTR type? */
-         cur.clear_speculation ();
-         if (!edge_ctx.useless_p ())
-           cur.combine_with (edge_ctx);
+         /* TODO: In cases we know how the context is going to be used,
+            we can improve the result by passing proper OTR_TYPE.  */
+         cur.combine_with (edge_ctx);
          if (!cur.useless_p ())
            {
+             if (src_lat->contains_variable
+                 && !edge_ctx.equal_to (cur))
+               ret |= dest_lat->set_contains_variable ();
              ret |= dest_lat->add_value (cur, cs, src_val, src_idx);
              added_sth = true;
            }
@@ -1372,6 +1448,76 @@ propagate_context_accross_jump_function (cgraph_edge *cs,
   return ret;
 }
 
+/* Propagate alignments across jump function JFUNC that is associated with
+   edge CS and update DEST_LAT accordingly.  */
+
+static bool
+propagate_alignment_accross_jump_function (struct cgraph_edge *cs,
+                                          struct ipa_jump_func *jfunc,
+                                          struct ipcp_param_lattices *dest_lat)
+{
+  if (alignment_bottom_p (dest_lat))
+    return false;
+
+  ipa_alignment cur;
+  cur.known = false;
+  if (jfunc->alignment.known)
+    cur = jfunc->alignment;
+  else if (jfunc->type == IPA_JF_PASS_THROUGH
+          || jfunc->type == IPA_JF_ANCESTOR)
+    {
+      struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
+      struct ipcp_param_lattices *src_lats;
+      HOST_WIDE_INT offset = 0;
+      int src_idx;
+
+      if (jfunc->type == IPA_JF_PASS_THROUGH)
+       {
+         enum tree_code op = ipa_get_jf_pass_through_operation (jfunc);
+         if (op != NOP_EXPR)
+           {
+             if (op != POINTER_PLUS_EXPR
+                 && op != PLUS_EXPR)
+               goto prop_fail;
+             tree operand = ipa_get_jf_pass_through_operand (jfunc);
+             if (!tree_fits_shwi_p (operand))
+               goto prop_fail;
+             offset = tree_to_shwi (operand);
+           }
+         src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
+       }
+      else
+       {
+         src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
+         offset = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;;
+       }
+
+      src_lats = ipa_get_parm_lattices (caller_info, src_idx);
+      if (!src_lats->alignment.known
+         || alignment_bottom_p (src_lats))
+       goto prop_fail;
+
+      cur = src_lats->alignment;
+      cur.misalign = (cur.misalign + offset) % cur.align;
+    }
+
+  if (cur.known)
+    {
+      if (!dest_lat->alignment.known)
+       {
+         dest_lat->alignment = cur;
+         return true;
+       }
+      else if (dest_lat->alignment.align == cur.align
+              && dest_lat->alignment.misalign == cur.misalign)
+       return false;
+    }
+
+ prop_fail:
+  set_alignment_to_bottom (dest_lat);
+  return true;
+}
+
 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
    NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
    other cases, return false).  If there are no aggregate items, set
@@ -1448,7 +1594,7 @@ merge_agg_lats_step (struct ipcp_param_lattices *dest_plats,
       if (dest_plats->aggs_count == PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS))
        return false;
       dest_plats->aggs_count++;
-      new_al = (struct ipcp_agg_lattice *) pool_alloc (ipcp_agg_lattice_pool);
+      new_al = ipcp_agg_lattice_pool.allocate ();
       memset (new_al, 0, sizeof (*new_al));
 
       new_al->offset = offset;
@@ -1708,6 +1854,8 @@ propagate_constants_accross_call (struct cgraph_edge *cs)
                                                         &dest_plats->itself);
          ret |= propagate_context_accross_jump_function (cs, jump_func, i,
                                                          &dest_plats->ctxlat);
+         ret |= propagate_alignment_accross_jump_function (cs, jump_func,
+                                                           dest_plats);
          ret |= propagate_aggs_accross_jump_function (cs, jump_func,
                                                       dest_plats);
        }
@@ -1727,13 +1875,16 @@ ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
                                vec<tree> known_csts,
                                vec<ipa_polymorphic_call_context> known_contexts,
                                vec<ipa_agg_jump_function_p> known_aggs,
-                               struct ipa_agg_replacement_value *agg_reps)
+                               struct ipa_agg_replacement_value *agg_reps,
+                               bool *speculative)
 {
   int param_index = ie->indirect_info->param_index;
   HOST_WIDE_INT anc_offset;
   tree t;
   tree target = NULL;
 
+  *speculative = false;
+
   if (param_index == -1
       || known_csts.length () <= (unsigned int) param_index)
     return NULL_TREE;
@@ -1780,7 +1931,7 @@ ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
        return NULL_TREE;
     }
 
-  if (!flag_devirtualize)
+  if (!opt_for_fn (ie->caller->decl, flag_devirtualize))
     return NULL_TREE;
 
   gcc_assert (!ie->indirect_info->agg_contents);
@@ -1789,8 +1940,7 @@ ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
   t = NULL;
 
   /* Try to work out value of virtual table pointer value in replacemnets.  */
-  if (!t && agg_reps && !ie->indirect_info->by_ref
-      && !ie->indirect_info->vptr_changed)
+  if (!t && agg_reps && !ie->indirect_info->by_ref)
     {
       while (agg_reps)
        {
@@ -1808,8 +1958,7 @@ ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
   /* Try to work out value of virtual table pointer value in known
      aggregate values.  */
   if (!t && known_aggs.length () > (unsigned int) param_index
-      && !ie->indirect_info->by_ref
-      && !ie->indirect_info->vptr_changed)
+      && !ie->indirect_info->by_ref)
     {
        struct ipa_agg_jump_function *agg;
        agg = known_aggs[param_index];
@@ -1833,7 +1982,9 @@ ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
                  || !possible_polymorphic_call_target_p
                       (ie, cgraph_node::get (target)))
                target = ipa_impossible_devirt_target (ie, target);
-             return target;
+              *speculative = ie->indirect_info->vptr_changed;
+             if (!*speculative)
+               return target;
            }
        }
     }
@@ -1848,6 +1999,10 @@ ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
   if (known_contexts.length () > (unsigned int) param_index)
     {
       context = known_contexts[param_index];
+      context.offset_by (anc_offset);
+      if (ie->indirect_info->vptr_changed)
+       context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
+                                             ie->indirect_info->otr_type);
       if (t)
        {
          ipa_polymorphic_call_context ctx2 = ipa_polymorphic_call_context
@@ -1857,8 +2012,13 @@ ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
        }
     }
   else if (t)
-    context = ipa_polymorphic_call_context (t, ie->indirect_info->otr_type,
-                                           anc_offset);
+    {
+      context = ipa_polymorphic_call_context (t, ie->indirect_info->otr_type,
+                                             anc_offset);
+      if (ie->indirect_info->vptr_changed)
+       context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
+                                             ie->indirect_info->otr_type);
+    }
   else
     return NULL_TREE;
 
@@ -1870,11 +2030,32 @@ ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
      ie->indirect_info->otr_token,
      context, &final);
   if (!final || targets.length () > 1)
-    return NULL_TREE;
-  if (targets.length () == 1)
-    target = targets[0]->decl;
+    {
+      struct cgraph_node *node;
+      if (*speculative)
+       return target;
+      if (!opt_for_fn (ie->caller->decl, flag_devirtualize_speculatively)
+         || ie->speculative || !ie->maybe_hot_p ())
+       return NULL;
+      node = try_speculative_devirtualization (ie->indirect_info->otr_type,
+                                              ie->indirect_info->otr_token,
+                                              context);
+      if (node)
+       {
+         *speculative = true;
+         target = node->decl;
+       }
+      else
+       return NULL;
+    }
   else
-    target = ipa_impossible_devirt_target (ie, NULL_TREE);
+    {
+      *speculative = false;
+      if (targets.length () == 1)
+       target = targets[0]->decl;
+      else
+       target = ipa_impossible_devirt_target (ie, NULL_TREE);
+    }
 
   if (target && !possible_polymorphic_call_target_p (ie,
                                                     cgraph_node::get (target)))
@@ -1892,10 +2073,11 @@ tree
 ipa_get_indirect_edge_target (struct cgraph_edge *ie,
                              vec<tree> known_csts,
                              vec<ipa_polymorphic_call_context> known_contexts,
-                             vec<ipa_agg_jump_function_p> known_aggs)
+                             vec<ipa_agg_jump_function_p> known_aggs,
+                             bool *speculative)
 {
   return ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts,
-                                        known_aggs, NULL);
+                                        known_aggs, NULL, speculative);
 }
 
 /* Calculate devirtualization time bonus for NODE, assuming we know KNOWN_CSTS
@@ -1916,9 +2098,10 @@ devirtualization_time_bonus (struct cgraph_node *node,
       struct inline_summary *isummary;
       enum availability avail;
       tree target;
+      bool speculative;
 
       target = ipa_get_indirect_edge_target (ie, known_csts, known_contexts,
-                                            known_aggs);
+                                            known_aggs, &speculative);
       if (!target)
        continue;
 
@@ -1930,19 +2113,19 @@ devirtualization_time_bonus (struct cgraph_node *node,
       callee = callee->function_symbol (&avail);
       if (avail < AVAIL_AVAILABLE)
        continue;
-      isummary = inline_summary (callee);
+      isummary = inline_summaries->get (callee);
       if (!isummary->inlinable)
        continue;
 
       /* FIXME: The values below need re-considering and perhaps also
         integrating into the cost metrics, at lest in some very basic way.  */
       if (isummary->size <= MAX_INLINE_INSNS_AUTO / 4)
-       res += 31;
+       res += 31 / ((int)speculative + 1);
       else if (isummary->size <= MAX_INLINE_INSNS_AUTO / 2)
-       res += 15;
+       res += 15 / ((int)speculative + 1);
       else if (isummary->size <= MAX_INLINE_INSNS_AUTO
               || DECL_DECLARED_INLINE_P (callee->decl))
-       res += 7;
+       res += 7 / ((int)speculative + 1);
     }
 
   return res;
@@ -1961,6 +2144,24 @@ hint_time_bonus (inline_hints hints)
   return result;
 }
 
+/* If there is a reason to penalize the function described by INFO in the
+   cloning goodness evaluation, do so.  */
+
+static inline int64_t
+incorporate_penalties (ipa_node_params *info, int64_t evaluation)
+{
+  if (info->node_within_scc)
+    evaluation = (evaluation
+                 * (100 - PARAM_VALUE (PARAM_IPA_CP_RECURSION_PENALTY))) / 100;
+
+  if (info->node_calling_single_call)
+    evaluation = (evaluation
+                 * (100 - PARAM_VALUE (PARAM_IPA_CP_SINGLE_CALL_PENALTY)))
+      / 100;
+
+  return evaluation;
+}
+
 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
    and SIZE_COST and with the sum of frequencies of incoming edges to the
    potential new clone in FREQUENCIES.  */
@@ -1970,24 +2171,28 @@ good_cloning_opportunity_p (struct cgraph_node *node, int time_benefit,
                            int freq_sum, gcov_type count_sum, int size_cost)
 {
   if (time_benefit == 0
-      || !flag_ipa_cp_clone
+      || !opt_for_fn (node->decl, flag_ipa_cp_clone)
       || !optimize_function_for_speed_p (DECL_STRUCT_FUNCTION (node->decl)))
     return false;
 
   gcc_assert (size_cost > 0);
 
+  struct ipa_node_params *info = IPA_NODE_REF (node);
   if (max_count)
     {
       int factor = (count_sum * 1000) / max_count;
       int64_t evaluation = (((int64_t) time_benefit * factor)
                                    / size_cost);
+      evaluation = incorporate_penalties (info, evaluation);
 
       if (dump_file && (dump_flags & TDF_DETAILS))
        fprintf (dump_file, "     good_cloning_opportunity_p (time: %i, "
                 "size: %i, count_sum: " HOST_WIDE_INT_PRINT_DEC
-                ") -> evaluation: " "%"PRId64
+                "%s%s) -> evaluation: " "%" PRId64
                 ", threshold: %i\n",
                 time_benefit, size_cost, (HOST_WIDE_INT) count_sum,
+                info->node_within_scc ? ", scc" : "",
+                info->node_calling_single_call ? ", single_call" : "",
                 evaluation, PARAM_VALUE (PARAM_IPA_CP_EVAL_THRESHOLD));
 
       return evaluation >= PARAM_VALUE (PARAM_IPA_CP_EVAL_THRESHOLD);
@@ -1996,13 +2201,16 @@ good_cloning_opportunity_p (struct cgraph_node *node, int time_benefit,
     {
       int64_t evaluation = (((int64_t) time_benefit * freq_sum)
                                    / size_cost);
+      evaluation = incorporate_penalties (info, evaluation);
 
       if (dump_file && (dump_flags & TDF_DETAILS))
        fprintf (dump_file, "     good_cloning_opportunity_p (time: %i, "
-                "size: %i, freq_sum: %i) -> evaluation: "
-                "%"PRId64 ", threshold: %i\n",
-                time_benefit, size_cost, freq_sum, evaluation,
-                PARAM_VALUE (PARAM_IPA_CP_EVAL_THRESHOLD));
+                "size: %i, freq_sum: %i%s%s) -> evaluation: "
+                "%" PRId64 ", threshold: %i\n",
+                time_benefit, size_cost, freq_sum,
+                info->node_within_scc ? ", scc" : "",
+                info->node_calling_single_call ? ", single_call" : "",
+                evaluation, PARAM_VALUE (PARAM_IPA_CP_EVAL_THRESHOLD));
 
       return evaluation >= PARAM_VALUE (PARAM_IPA_CP_EVAL_THRESHOLD);
     }
@@ -2177,7 +2385,7 @@ estimate_local_effects (struct cgraph_node *node)
   vec<ipa_agg_jump_function> known_aggs;
   vec<ipa_agg_jump_function_p> known_aggs_ptrs;
   bool always_const;
-  int base_time = inline_summary (node)->time;
+  int base_time = inline_summaries->get (node)->time;
   int removable_params_cost;
 
   if (!count || !ipcp_versionable_function_p (node))
@@ -2493,9 +2701,12 @@ propagate_constants_topo (struct ipa_topo_info *topo)
          struct cgraph_edge *cs;
 
          for (cs = v->callees; cs; cs = cs->next_callee)
-           if (ipa_edge_within_scc (cs)
-               && propagate_constants_accross_call (cs))
-             push_node_to_stack (topo, cs->callee);
+           if (ipa_edge_within_scc (cs))
+             {
+               IPA_NODE_REF (v)->node_within_scc = true;
+               if (propagate_constants_accross_call (cs))
+                 push_node_to_stack (topo, cs->callee->function_symbol ());
+             }
          v = pop_node_from_stack (topo);
        }
 
@@ -2594,7 +2805,7 @@ ipcp_propagate_stage (struct ipa_topo_info *topo)
        initialize_node_lattices (node);
       }
     if (node->definition && !node->alias)
-      overall_size += inline_summary (node)->self_size;
+      overall_size += inline_summaries->get (node)->self_size;
     if (node->count > max_count)
       max_count = node->count;
   }
@@ -2638,16 +2849,18 @@ ipcp_discover_new_direct_edges (struct cgraph_node *node,
   for (ie = node->indirect_calls; ie; ie = next_ie)
     {
       tree target;
+      bool speculative;
 
       next_ie = ie->next_callee;
       target = ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts,
-                                              vNULL, aggvals);
+                                              vNULL, aggvals, &speculative);
       if (target)
        {
          bool agg_contents = ie->indirect_info->agg_contents;
          bool polymorphic = ie->indirect_info->polymorphic;
          int param_index = ie->indirect_info->param_index;
-         struct cgraph_edge *cs = ipa_make_edge_direct_to_target (ie, target);
+         struct cgraph_edge *cs = ipa_make_edge_direct_to_target (ie, target,
+                                                                  speculative);
          found = true;
 
          if (cs && !agg_contents && !polymorphic)
@@ -2750,17 +2963,31 @@ get_clone_agg_value (struct cgraph_node *node, HOST_WIDE_INT offset,
   return NULL_TREE;
 }
 
-/* Return true if edge CS does bring about the value described by SRC.  */
+/* Return true is NODE is DEST or its clone for all contexts.  */
+
+static bool
+same_node_or_its_all_contexts_clone_p (cgraph_node *node, cgraph_node *dest)
+{
+  if (node == dest)
+    return true;
+
+  struct ipa_node_params *info = IPA_NODE_REF (node);
+  return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
+}
+
+/* Return true if edge CS does bring about the value described by SRC to node
+   DEST or its clone for all contexts.  */
 
 static bool
-cgraph_edge_brings_value_p (struct cgraph_edge *cs,
-                           ipcp_value_source<tree> *src)
+cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
+                           cgraph_node *dest)
 {
   struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
-  cgraph_node *real_dest = cs->callee->function_symbol ();
-  struct ipa_node_params *dst_info = IPA_NODE_REF (real_dest);
+  enum availability availability;
+  cgraph_node *real_dest = cs->callee->function_symbol (&availability);
 
-  if ((dst_info->ipcp_orig_node && !dst_info->is_all_contexts_clone)
+  if (!same_node_or_its_all_contexts_clone_p (real_dest, dest)
+      || availability <= AVAIL_INTERPOSABLE
       || caller_info->node_dead)
     return false;
   if (!src->val)
@@ -2799,18 +3026,18 @@ cgraph_edge_brings_value_p (struct cgraph_edge *cs,
     }
 }
 
-/* Return true if edge CS does bring about the value described by SRC.  */
+/* Return true if edge CS does bring about the value described by SRC to node
+   DEST or its clone for all contexts.  */
 
 static bool
-cgraph_edge_brings_value_p (struct cgraph_edge *cs,
-                           ipcp_value_source<ipa_polymorphic_call_context>
-                           *src)
+cgraph_edge_brings_value_p (cgraph_edge *cs,
+                           ipcp_value_source<ipa_polymorphic_call_context> *src,
+                           cgraph_node *dest)
 {
   struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
   cgraph_node *real_dest = cs->callee->function_symbol ();
-  struct ipa_node_params *dst_info = IPA_NODE_REF (real_dest);
 
-  if ((dst_info->ipcp_orig_node && !dst_info->is_all_contexts_clone)
+  if (!same_node_or_its_all_contexts_clone_p (real_dest, dest)
       || caller_info->node_dead)
     return false;
   if (!src->val)
@@ -2836,13 +3063,14 @@ get_next_cgraph_edge_clone (struct cgraph_edge *cs)
   return next_edge_clone[cs->uid];
 }
 
-/* Given VAL, iterate over all its sources and if they still hold, add their
-   edge frequency and their number into *FREQUENCY and *CALLER_COUNT
-   respectively.  */
+/* Given VAL that is intended for DEST, iterate over all its sources and if
+   they still hold, add their edge frequency and their number into *FREQUENCY
+   and *CALLER_COUNT respectively.  */
 
 template <typename valtype>
 static bool
-get_info_about_necessary_edges (ipcp_value<valtype> *val, int *freq_sum,
+get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
+                               int *freq_sum,
                                gcov_type *count_sum, int *caller_count)
 {
   ipcp_value_source<valtype> *src;
@@ -2855,7 +3083,7 @@ get_info_about_necessary_edges (ipcp_value<valtype> *val, int *freq_sum,
       struct cgraph_edge *cs = src->cs;
       while (cs)
        {
-         if (cgraph_edge_brings_value_p (cs, src))
+         if (cgraph_edge_brings_value_p (cs, src, dest))
            {
              count++;
              freq += cs->frequency;
@@ -2872,12 +3100,13 @@ get_info_about_necessary_edges (ipcp_value<valtype> *val, int *freq_sum,
   return hot;
 }
 
-/* Return a vector of incoming edges that do bring value VAL.  It is assumed
-   their number is known and equal to CALLER_COUNT.  */
+/* Return a vector of incoming edges that do bring value VAL to node DEST.  It
+   is assumed their number is known and equal to CALLER_COUNT.  */
 
 template <typename valtype>
 static vec<cgraph_edge *>
-gather_edges_for_value (ipcp_value<valtype> *val, int caller_count)
+gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest,
+                       int caller_count)
 {
   ipcp_value_source<valtype> *src;
   vec<cgraph_edge *> ret;
@@ -2888,7 +3117,7 @@ gather_edges_for_value (ipcp_value<valtype> *val, int caller_count)
       struct cgraph_edge *cs = src->cs;
       while (cs)
        {
-         if (cgraph_edge_brings_value_p (cs, src))
+         if (cgraph_edge_brings_value_p (cs, src, dest))
            ret.quick_push (cs);
          cs = get_next_cgraph_edge_clone (cs);
        }
@@ -3160,6 +3389,7 @@ find_more_scalar_values_for_callers_subset (struct cgraph_node *node,
       struct cgraph_edge *cs;
       tree newval = NULL_TREE;
       int j;
+      bool first = true;
 
       if (ipa_get_scalar_lat (info, i)->bottom || known_csts[i])
        continue;
@@ -3178,13 +3408,15 @@ find_more_scalar_values_for_callers_subset (struct cgraph_node *node,
          t = ipa_value_from_jfunc (IPA_NODE_REF (cs->caller), jump_func);
          if (!t
              || (newval
-                 && !values_equal_for_ipcp_p (t, newval)))
+                 && !values_equal_for_ipcp_p (t, newval))
+             || (!first && !newval))
            {
              newval = NULL_TREE;
              break;
            }
          else
            newval = t;
+         first = false;
        }
 
       if (newval)
@@ -3226,7 +3458,7 @@ find_more_contexts_for_caller_subset (cgraph_node *node,
        continue;
 
       ipa_polymorphic_call_context newval;
-      bool found = false;
+      bool first = true;
       int j;
 
       FOR_EACH_VEC_ELT (callers, j, cs)
@@ -3238,21 +3470,18 @@ find_more_contexts_for_caller_subset (cgraph_node *node,
          ipa_polymorphic_call_context ctx;
          ctx = ipa_context_from_jfunc (IPA_NODE_REF (cs->caller), cs, i,
                                        jfunc);
-         ctx.clear_speculation ();
-         if (ctx.useless_p ()
-             || (found && !values_equal_for_ipcp_p (newval, ctx)))
+         if (first)
            {
-             found = false;
-             break;
-           }
-         else if (!found)
-           {
-             found = true;
              newval = ctx;
+             first = false;
            }
+         else
+           newval.meet_with (ctx);
+         if (newval.useless_p ())
+           break;
        }
 
-      if (found)
+      if (!newval.useless_p ())
        {
          if (dump_file && (dump_flags & TDF_DETAILS))
            {
@@ -3749,27 +3978,21 @@ perhaps_add_new_callers (cgraph_node *node, ipcp_value<valtype> *val)
       struct cgraph_edge *cs = src->cs;
       while (cs)
        {
-         enum availability availability;
-         struct cgraph_node *dst = cs->callee->function_symbol (&availability);
-         if ((dst == node || IPA_NODE_REF (dst)->is_all_contexts_clone)
-             && availability > AVAIL_INTERPOSABLE
-             && cgraph_edge_brings_value_p (cs, src))
+         if (cgraph_edge_brings_value_p (cs, src, node)
+             && cgraph_edge_brings_all_scalars_for_node (cs, val->spec_node)
+             && cgraph_edge_brings_all_agg_vals_for_node (cs, val->spec_node))
            {
-             if (cgraph_edge_brings_all_scalars_for_node (cs, val->spec_node)
-                 && cgraph_edge_brings_all_agg_vals_for_node (cs,
-                                                              val->spec_node))
-               {
-                 if (dump_file)
-                   fprintf (dump_file, " - adding an extra caller %s/%i"
-                            " of %s/%i\n",
-                            xstrdup (cs->caller->name ()),
-                            cs->caller->order,
-                            xstrdup (val->spec_node->name ()),
-                            val->spec_node->order);
-
-                 cs->redirect_callee (val->spec_node);
-                 redirected_sum += cs->count;
-               }
+             if (dump_file)
+               fprintf (dump_file, " - adding an extra caller %s/%i"
+                        " of %s/%i\n",
+                        xstrdup_for_dump (cs->caller->name ()),
+                        cs->caller->order,
+                        xstrdup_for_dump (val->spec_node->name ()),
+                        val->spec_node->order);
+
+             cs->redirect_callee_duplicating_thunks (val->spec_node);
+             val->spec_node->expand_all_artificial_thunks ();
+             redirected_sum += cs->count;
            }
          cs = get_next_cgraph_edge_clone (cs);
        }
@@ -3894,7 +4117,7 @@ decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset,
                 val->local_size_cost + overall_size);
       return false;
     }
-  else if (!get_info_about_necessary_edges (val, &freq_sum, &count_sum,
+  else if (!get_info_about_necessary_edges (val, node, &freq_sum, &count_sum,
                                            &caller_count))
     return false;
 
@@ -3924,7 +4147,7 @@ decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset,
     fprintf (dump_file, "  Creating a specialized node of %s/%i.\n",
             node->name (), node->order);
 
-  callers = gather_edges_for_value (val, caller_count);
+  callers = gather_edges_for_value (val, node, caller_count);
   if (offset == -1)
     modify_known_vectors_with_val (&known_csts, &known_contexts, val, index);
   else
@@ -4152,6 +4375,72 @@ ipcp_decision_stage (struct ipa_topo_info *topo)
     }
 }
 
+/* Look up all alignment information that we have discovered and copy it over
+   to the transformation summary.  */
+
+static void
+ipcp_store_alignment_results (void)
+{
+  cgraph_node *node;
+
+  FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
+  {
+    ipa_node_params *info = IPA_NODE_REF (node);
+    bool dumped_sth = false;
+    bool found_useful_result = false;
+
+    if (!opt_for_fn (node->decl, flag_ipa_cp_alignment))
+      {
+       if (dump_file)
+         fprintf (dump_file, "Not considering %s for alignment discovery "
+                  "and propagate; -fipa-cp-alignment: disabled.\n",
+                  node->name ());
+       continue;
+      }
+
+   if (info->ipcp_orig_node)
+      info = IPA_NODE_REF (info->ipcp_orig_node);
+
+   unsigned count = ipa_get_param_count (info);
+   for (unsigned i = 0; i < count ; i++)
+     {
+       ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
+       if (plats->alignment.known
+          && plats->alignment.align > 0)
+        {
+          found_useful_result = true;
+          break;
+        }
+     }
+   if (!found_useful_result)
+     continue;
+
+  ipcp_grow_transformations_if_necessary ();
+   ipcp_transformation_summary *ts = ipcp_get_transformation_summary (node);
+   vec_safe_reserve_exact (ts->alignments, count);
+
+   for (unsigned i = 0; i < count ; i++)
+     {
+       ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
+
+       if (plats->alignment.align == 0)
+        plats->alignment.known = false;
+
+       ts->alignments->quick_push (plats->alignment);
+       if (!dump_file || !plats->alignment.known)
+        continue;
+       if (!dumped_sth)
+        {
+          fprintf (dump_file, "Propagated alignment info for function %s/%i:\n",
+                   node->name (), node->order);
+          dumped_sth = true;
+        }
+       fprintf (dump_file, "  param %i: align: %u, misalign: %u\n",
+               i, plats->alignment.align, plats->alignment.misalign);
+     }
+  }
+}
+
 /* The IPCP driver.  */
 
 static unsigned int
@@ -4169,16 +4458,6 @@ ipcp_driver (void)
   edge_removal_hook_holder =
     symtab->add_edge_removal_hook (&ipcp_edge_removal_hook, NULL);
 
-  ipcp_cst_values_pool = create_alloc_pool ("IPA-CP constant values",
-                                           sizeof (ipcp_value<tree>), 32);
-  ipcp_poly_ctx_values_pool = create_alloc_pool
-    ("IPA-CP polymorphic contexts",
-     sizeof (ipcp_value<ipa_polymorphic_call_context>), 32);
-  ipcp_sources_pool = create_alloc_pool ("IPA-CP value sources",
-                                        sizeof (ipcp_value_source<tree>), 64);
-  ipcp_agg_lattice_pool = create_alloc_pool ("IPA_CP aggregate lattices",
-                                            sizeof (struct ipcp_agg_lattice),
-                                            32);
   if (dump_file)
     {
       fprintf (dump_file, "\nIPA structures before propagation:\n");
@@ -4193,10 +4472,13 @@ ipcp_driver (void)
   ipcp_propagate_stage (&topo);
   /* Decide what constant propagation and cloning should be performed.  */
   ipcp_decision_stage (&topo);
+  /* Store results of alignment propagation. */
+  ipcp_store_alignment_results ();
 
   /* Free all IPCP structures.  */
   free_toporder_info (&topo);
   next_edge_clone.release ();
+  prev_edge_clone.release ();
   symtab->remove_edge_removal_hook (edge_removal_hook_holder);
   symtab->remove_edge_duplication_hook (edge_duplication_hook_holder);
   ipa_free_all_structures_after_ipa_cp ();
@@ -4265,9 +4547,9 @@ public:
                      ipcp_generate_summary, /* generate_summary */
                      ipcp_write_summary, /* write_summary */
                      ipcp_read_summary, /* read_summary */
-                     ipa_prop_write_all_agg_replacement, /*
+                     ipcp_write_transformation_summaries, /*
                      write_optimization_summary */
-                     ipa_prop_read_all_agg_replacement, /*
+                     ipcp_read_transformation_summaries, /*
                      read_optimization_summary */
                      NULL, /* stmt_fixup */
                      0, /* function_transform_todo_flags_start */
@@ -4280,7 +4562,7 @@ public:
     {
       /* FIXME: We should remove the optimize check after we ensure we never run
         IPA passes when not optimizing.  */
-      return flag_ipa_cp && optimize;
+      return (flag_ipa_cp && optimize) || in_lto_p;
     }
 
   virtual unsigned int execute (function *) { return ipcp_driver (); }