sem_aux.adb, [...] (Get_Low_Bound): Use Type_Low_Bound.

[gcc.git] / gcc / tree-ssa-loop-ivopts.c
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c

index c45f3167f2f4e3b295effc98f5a110fa40c14bc7..854d7baf10ae55746f3fd77bbb595d519c3e686c 100644 (file)
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -1,5 +1,5 @@
  /* Induction variable optimizations.
-   Copyright (C) 2003-2013 Free Software Foundation, Inc.
+   Copyright (C) 2003-2015 Free Software Foundation, Inc.
  
  This file is part of GCC.
  
@@ -65,32 +65,84 @@ along with GCC; see the file COPYING3.  If not see
  #include "system.h"
  #include "coretypes.h"
  #include "tm.h"
+#include "hash-set.h"
+#include "machmode.h"
+#include "vec.h"
+#include "double-int.h"
+#include "input.h"
+#include "alias.h"
+#include "symtab.h"
+#include "wide-int.h"
+#include "inchash.h"
  #include "tree.h"
+#include "fold-const.h"
+#include "stor-layout.h"
  #include "tm_p.h"
+#include "predict.h"
+#include "hard-reg-set.h"
+#include "function.h"
+#include "dominance.h"
+#include "cfg.h"
  #include "basic-block.h"
  #include "gimple-pretty-print.h"
-#include "tree-flow.h"
+#include "hash-map.h"
+#include "hash-table.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "tree-eh.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimplify.h"
+#include "gimple-iterator.h"
+#include "gimplify-me.h"
+#include "gimple-ssa.h"
+#include "plugin-api.h"
+#include "ipa-ref.h"
+#include "cgraph.h"
+#include "tree-cfg.h"
+#include "tree-phinodes.h"
+#include "ssa-iterators.h"
+#include "stringpool.h"
+#include "tree-ssanames.h"
+#include "tree-ssa-loop-ivopts.h"
+#include "tree-ssa-loop-manip.h"
+#include "tree-ssa-loop-niter.h"
+#include "tree-ssa-loop.h"
+#include "hashtab.h"
+#include "rtl.h"
+#include "flags.h"
+#include "statistics.h"
+#include "real.h"
+#include "fixed-value.h"
+#include "insn-config.h"
+#include "expmed.h"
+#include "dojump.h"
+#include "explow.h"
+#include "calls.h"
+#include "emit-rtl.h"
+#include "varasm.h"
+#include "stmt.h"
+#include "expr.h"
+#include "tree-dfa.h"
+#include "tree-ssa.h"
  #include "cfgloop.h"
  #include "tree-pass.h"
-#include "ggc.h"
-#include "insn-config.h"
-#include "pointer-set.h"
-#include "hash-table.h"
  #include "tree-chrec.h"
  #include "tree-scalar-evolution.h"
-#include "cfgloop.h"
  #include "params.h"
  #include "langhooks.h"
  #include "tree-affine.h"
  #include "target.h"
  #include "tree-inline.h"
  #include "tree-ssa-propagate.h"
-#include "expmed.h"
+#include "tree-ssa-address.h"
+#include "builtins.h"
+#include "tree-vectorizer.h"
  
  /* FIXME: Expressions are expanded to RTL in this pass to determine the
     cost of different addressing modes.  This should be moved to a TBD
     interface between the GIMPLE and RTL worlds.  */
-#include "expr.h"
  #include "recog.h"
  
  /* The infinite cost.  */
@@ -174,6 +226,7 @@ struct cost_pair
  struct iv_use
  {
    unsigned id;         /* The id of the use.  */
+  unsigned sub_id;     /* The id of the sub use.  */
    enum use_type type;  /* Type of the use.  */
    struct iv *iv;       /* The induction variable it is based on.  */
    gimple stmt;         /* Statement in that it occurs.  */
@@ -187,6 +240,11 @@ struct iv_use
  
    struct iv_cand *selected;
                         /* The selected candidate.  */
+
+  struct iv_use *next; /* The next sub use.  */
+  tree addr_base;      /* Base address with const offset stripped.  */
+  unsigned HOST_WIDE_INT addr_offset;
+                       /* Const offset stripped from base address.  */
  };
  
  /* The position where the iv is computed.  */
@@ -240,16 +298,16 @@ typedef struct iv_cand *iv_cand_p;
  
  struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
  {
-  typedef iv_inv_expr_ent value_type;
-  typedef iv_inv_expr_ent compare_type;
-  static inline hashval_t hash (const value_type *);
-  static inline bool equal (const value_type *, const compare_type *);
+  typedef iv_inv_expr_ent *value_type;
+  typedef iv_inv_expr_ent *compare_type;
+  static inline hashval_t hash (const iv_inv_expr_ent *);
+  static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
  };
  
  /* Hash function for loop invariant expressions.  */
  
  inline hashval_t
-iv_inv_expr_hasher::hash (const value_type *expr)
+iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
  {
    return expr->hash;
  }
@@ -257,7 +315,8 @@ iv_inv_expr_hasher::hash (const value_type *expr)
  /* Hash table equality function for expressions.  */
  
  inline bool
-iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
+iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
+                          const iv_inv_expr_ent *expr2)
  {
    return expr1->hash == expr2->hash
          && operand_equal_p (expr1->expr, expr2->expr, 0);
@@ -267,9 +326,10 @@ struct ivopts_data
  {
    /* The currently optimized loop.  */
    struct loop *current_loop;
+  source_location loop_loc;
  
    /* Numbers of iterations for all exits of the current loop.  */
-  struct pointer_map_t *niters;
+  hash_map<edge, tree_niter_desc *> *niters;
  
    /* Number of registers used in it.  */
    unsigned regs_used;
@@ -282,7 +342,7 @@ struct ivopts_data
  
    /* The hashtable of loop invariant expressions created
       by ivopt.  */
-  hash_table <iv_inv_expr_hasher> inv_expr_tab;
+  hash_table<iv_inv_expr_hasher> *inv_expr_tab;
  
    /* Loop invariant expression id.  */
    int inv_expr_id;
@@ -299,6 +359,9 @@ struct ivopts_data
    /* A bitmap of important candidates.  */
    bitmap important_candidates;
  
+  /* Cache used by tree_to_aff_combination_expand.  */
+  hash_map<tree, name_expansion *> *name_expansion_cache;
+
    /* The maximum invariant id.  */
    unsigned max_inv_id;
  
@@ -452,7 +515,6 @@ single_dom_exit (struct loop *loop)
  
  /* Dumps information about the induction variable IV to FILE.  */
  
-extern void dump_iv (FILE *, struct iv *);
  void
  dump_iv (FILE *file, struct iv *iv)
  {
@@ -497,11 +559,14 @@ dump_iv (FILE *file, struct iv *iv)
  
  /* Dumps information about the USE to FILE.  */
  
-extern void dump_use (FILE *, struct iv_use *);
  void
  dump_use (FILE *file, struct iv_use *use)
  {
-  fprintf (file, "use %d\n", use->id);
+  fprintf (file, "use %d", use->id);
+  if (use->sub_id)
+    fprintf (file, ".%d", use->sub_id);
+
+  fprintf (file, "\n");
  
    switch (use->type)
      {
@@ -541,7 +606,6 @@ dump_use (FILE *file, struct iv_use *use)
  
  /* Dumps information about the uses to FILE.  */
  
-extern void dump_uses (FILE *, struct ivopts_data *);
  void
  dump_uses (FILE *file, struct ivopts_data *data)
  {
@@ -551,15 +615,18 @@ dump_uses (FILE *file, struct ivopts_data *data)
    for (i = 0; i < n_iv_uses (data); i++)
      {
        use = iv_use (data, i);
-
-      dump_use (file, use);
+      do
+       {
+         dump_use (file, use);
+         use = use->next;
+       }
+      while (use);
        fprintf (file, "\n");
      }
  }
  
  /* Dumps information about induction variable candidate CAND to FILE.  */
  
-extern void dump_cand (FILE *, struct iv_cand *);
  void
  dump_cand (FILE *file, struct iv_cand *cand)
  {
@@ -794,15 +861,15 @@ static struct tree_niter_desc *
  niter_for_exit (struct ivopts_data *data, edge exit)
  {
    struct tree_niter_desc *desc;
-  void **slot;
+  tree_niter_desc **slot;
  
    if (!data->niters)
      {
-      data->niters = pointer_map_create ();
+      data->niters = new hash_map<edge, tree_niter_desc *>;
        slot = NULL;
      }
    else
-    slot = pointer_map_contains (data->niters, exit);
+    slot = data->niters->get (exit);
  
    if (!slot)
      {
@@ -817,11 +884,10 @@ niter_for_exit (struct ivopts_data *data, edge exit)
           XDELETE (desc);
           desc = NULL;
         }
-      slot = pointer_map_insert (data->niters, exit);
-      *slot = desc;
+      data->niters->put (exit, desc);
      }
    else
-    desc = (struct tree_niter_desc *) *slot;
+    desc = *slot;
  
    return desc;
  }
@@ -855,8 +921,9 @@ tree_ssa_iv_optimize_init (struct ivopts_data *data)
    data->niters = NULL;
    data->iv_uses.create (20);
    data->iv_candidates.create (20);
-  data->inv_expr_tab.create (10);
+  data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
    data->inv_expr_id = 0;
+  data->name_expansion_cache = NULL;
    decl_rtl_to_reset.create (20);
  }
  
@@ -909,15 +976,58 @@ determine_base_object (tree expr)
      }
  }
  
+/* Return true if address expression with non-DECL_P operand appears
+   in EXPR.  */
+
+static bool
+contain_complex_addr_expr (tree expr)
+{
+  bool res = false;
+
+  STRIP_NOPS (expr);
+  switch (TREE_CODE (expr))
+    {
+    case POINTER_PLUS_EXPR:
+    case PLUS_EXPR:
+    case MINUS_EXPR:
+      res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
+      res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
+      break;
+
+    case ADDR_EXPR:
+      return (!DECL_P (TREE_OPERAND (expr, 0)));
+
+    default:
+      return false;
+    }
+
+  return res;
+}
+
  /* Allocates an induction variable with given initial value BASE and step STEP
     for loop LOOP.  */
  
  static struct iv *
  alloc_iv (tree base, tree step)
  {
+  tree expr = base;
    struct iv *iv = XCNEW (struct iv);
    gcc_assert (step != NULL_TREE);
  
+  /* Lower address expression in base except ones with DECL_P as operand.
+     By doing this:
+       1) More accurate cost can be computed for address expressions;
+       2) Duplicate candidates won't be created for bases in different
+          forms, like &a[0] and &a.  */
+  STRIP_NOPS (expr);
+  if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
+      || contain_complex_addr_expr (expr))
+    {
+      aff_tree comb;
+      tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
+      base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
+    }
+
    iv->base = base;
    iv->base_object = determine_base_object (base);
    iv->step = step;
@@ -971,7 +1081,7 @@ get_iv (struct ivopts_data *data, tree var)
     not define a simple affine biv with nonzero step.  */
  
  static tree
-determine_biv_step (gimple phi)
+determine_biv_step (gphi *phi)
  {
    struct loop *loop = gimple_bb (phi)->loop_father;
    tree name = PHI_RESULT (phi);
@@ -986,20 +1096,47 @@ determine_biv_step (gimple phi)
    return integer_zerop (iv.step) ? NULL_TREE : iv.step;
  }
  
+/* Return the first non-invariant ssa var found in EXPR.  */
+
+static tree
+extract_single_var_from_expr (tree expr)
+{
+  int i, n;
+  tree tmp;
+  enum tree_code code;
+
+  if (!expr || is_gimple_min_invariant (expr))
+    return NULL;
+
+  code = TREE_CODE (expr);
+  if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
+    {
+      n = TREE_OPERAND_LENGTH (expr);
+      for (i = 0; i < n; i++)
+       {
+         tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
+
+         if (tmp)
+           return tmp;
+       }
+    }
+  return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
+}
+
  /* Finds basic ivs.  */
  
  static bool
  find_bivs (struct ivopts_data *data)
  {
-  gimple phi;
-  tree step, type, base;
+  gphi *phi;
+  tree step, type, base, stop;
    bool found = false;
    struct loop *loop = data->current_loop;
-  gimple_stmt_iterator psi;
+  gphi_iterator psi;
  
    for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
      {
-      phi = gsi_stmt (psi);
+      phi = psi.phi ();
  
        if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
         continue;
@@ -1009,7 +1146,13 @@ find_bivs (struct ivopts_data *data)
         continue;
  
        base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
-      base = expand_simple_operations (base);
+      /* Stop expanding iv base at the first ssa var referred by iv step.
+        Ideally we should stop at any ssa var, because that's expensive
+        and unusual to happen, we just do it on the first one.
+
+        See PR64705 for the rationale.  */
+      stop = extract_single_var_from_expr (step);
+      base = expand_simple_operations (base, stop);
        if (contains_abnormal_ssa_name_p (base)
           || contains_abnormal_ssa_name_p (step))
         continue;
@@ -1036,22 +1179,30 @@ find_bivs (struct ivopts_data *data)
  static void
  mark_bivs (struct ivopts_data *data)
  {
-  gimple phi;
+  gphi *phi;
+  gimple def;
    tree var;
    struct iv *iv, *incr_iv;
    struct loop *loop = data->current_loop;
    basic_block incr_bb;
-  gimple_stmt_iterator psi;
+  gphi_iterator psi;
  
    for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
      {
-      phi = gsi_stmt (psi);
+      phi = psi.phi ();
  
        iv = get_iv (data, PHI_RESULT (phi));
        if (!iv)
         continue;
  
        var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
+      def = SSA_NAME_DEF_STMT (var);
+      /* Don't mark iv peeled from other one as biv.  */
+      if (def
+         && gimple_code (def) == GIMPLE_PHI
+         && gimple_bb (def) == loop->header)
+       continue;
+
        incr_iv = get_iv (data, var);
        if (!incr_iv)
         continue;
@@ -1073,7 +1224,7 @@ mark_bivs (struct ivopts_data *data)
  static bool
  find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
  {
-  tree lhs;
+  tree lhs, stop;
    struct loop *loop = data->current_loop;
  
    iv->base = NULL_TREE;
@@ -1088,13 +1239,19 @@ find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
  
    if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
      return false;
-  iv->base = expand_simple_operations (iv->base);
  
+  /* Stop expanding iv base at the first ssa var referred by iv step.
+     Ideally we should stop at any ssa var, because that's expensive
+     and unusual to happen, we just do it on the first one.
+
+     See PR64705 for the rationale.  */
+  stop = extract_single_var_from_expr (iv->step);
+  iv->base = expand_simple_operations (iv->base, stop);
    if (contains_abnormal_ssa_name_p (iv->base)
        || contains_abnormal_ssa_name_p (iv->step))
      return false;
  
-  /* If STMT could throw, then do not consider STMT as defining a GIV.  
+  /* If STMT could throw, then do not consider STMT as defining a GIV.
       While this will suppress optimizations, we can not safely delete this
       GIV and associated statements, even if it appears it is not used.  */
    if (stmt_could_throw_p (stmt))
@@ -1184,33 +1341,88 @@ find_induction_variables (struct ivopts_data *data)
    return true;
  }
  
-/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
+/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
+   For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
+   is the const offset stripped from IV base.  For uses of other types,
+   ADDR_BASE and ADDR_OFFSET are zero by default.  */
  
  static struct iv_use *
  record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
-           gimple stmt, enum use_type use_type)
+           gimple stmt, enum use_type use_type, tree addr_base = NULL,
+           unsigned HOST_WIDE_INT addr_offset = 0)
  {
    struct iv_use *use = XCNEW (struct iv_use);
  
    use->id = n_iv_uses (data);
+  use->sub_id = 0;
    use->type = use_type;
    use->iv = iv;
    use->stmt = stmt;
    use->op_p = use_p;
    use->related_cands = BITMAP_ALLOC (NULL);
+  use->next = NULL;
+  use->addr_base = addr_base;
+  use->addr_offset = addr_offset;
  
    /* To avoid showing ssa name in the dumps, if it was not reset by the
       caller.  */
    iv->ssa_name = NULL_TREE;
  
-  if (dump_file && (dump_flags & TDF_DETAILS))
-    dump_use (dump_file, use);
-
    data->iv_uses.safe_push (use);
  
    return use;
  }
  
+/* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
+   The sub use is recorded under the one whose use id is ID_GROUP.  */
+
+static struct iv_use *
+record_sub_use (struct ivopts_data *data, tree *use_p,
+                   struct iv *iv, gimple stmt, enum use_type use_type,
+                   tree addr_base, unsigned HOST_WIDE_INT addr_offset,
+                   unsigned int id_group)
+{
+  struct iv_use *use = XCNEW (struct iv_use);
+  struct iv_use *group = iv_use (data, id_group);
+
+  use->id = group->id;
+  use->sub_id = 0;
+  use->type = use_type;
+  use->iv = iv;
+  use->stmt = stmt;
+  use->op_p = use_p;
+  use->related_cands = NULL;
+  use->addr_base = addr_base;
+  use->addr_offset = addr_offset;
+
+  /* Sub use list is maintained in offset ascending order.  */
+  if (addr_offset <= group->addr_offset)
+    {
+      use->related_cands = group->related_cands;
+      group->related_cands = NULL;
+      use->next = group;
+      data->iv_uses[id_group] = use;
+    }
+  else
+    {
+      struct iv_use *pre;
+      do
+       {
+         pre = group;
+         group = group->next;
+       }
+      while (group && addr_offset > group->addr_offset);
+      use->next = pre->next;
+      pre->next = use;
+    }
+
+  /* To avoid showing ssa name in the dumps, if it was not reset by the
+     caller.  */
+  iv->ssa_name = NULL_TREE;
+
+  return use;
+}
+
  /* Checks whether OP is a loop-level invariant and if so, records it.
     NONLINEAR_USE is true if the invariant is used in a way we do not
     handle specially.  */
@@ -1305,8 +1517,9 @@ extract_cond_operands (struct ivopts_data *data, gimple stmt,
  
    if (gimple_code (stmt) == GIMPLE_COND)
      {
-      op0 = gimple_cond_lhs_ptr (stmt);
-      op1 = gimple_cond_rhs_ptr (stmt);
+      gcond *cond_stmt = as_a <gcond *> (stmt);
+      op0 = gimple_cond_lhs_ptr (cond_stmt);
+      op1 = gimple_cond_rhs_ptr (cond_stmt);
      }
    else
      {
@@ -1337,9 +1550,9 @@ extract_cond_operands (struct ivopts_data *data, gimple stmt,
  
  end:
    if (control_var)
-    *control_var = op0;;
+    *control_var = op0;
    if (iv_var)
-    *iv_var = iv0;;
+    *iv_var = iv0;
    if (bound)
      *bound = op1;
    if (iv_bound)
@@ -1454,29 +1667,6 @@ expr_invariant_in_loop_p (struct loop *loop, tree expr)
    return true;
  }
  
-/* Returns true if statement STMT is obviously invariant in LOOP,
-   i.e. if all its operands on the RHS are defined outside of the LOOP.
-   LOOP should not be the function body.  */
-
-bool
-stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
-{
-  unsigned i;
-  tree lhs;
-
-  gcc_assert (loop_depth (loop) > 0);
-
-  lhs = gimple_get_lhs (stmt);
-  for (i = 0; i < gimple_num_ops (stmt); i++)
-    {
-      tree op = gimple_op (stmt, i);
-      if (op != lhs && !expr_invariant_in_loop_p (loop, op))
-       return false;
-    }
-
-  return true;
-}
-
  /* Cumulates the steps of indices into DATA and replaces their values with the
     initial ones.  Returns false when the value of the index cannot be determined.
     Callback for for_each_index.  */
@@ -1589,19 +1779,19 @@ idx_record_use (tree base, tree *idx,
     signedness of TOP and BOT.  */
  
  static bool
-constant_multiple_of (tree top, tree bot, double_int *mul)
+constant_multiple_of (tree top, tree bot, widest_int *mul)
  {
    tree mby;
    enum tree_code code;
-  double_int res, p0, p1;
    unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
+  widest_int res, p0, p1;
  
    STRIP_NOPS (top);
    STRIP_NOPS (bot);
  
    if (operand_equal_p (top, bot, 0))
      {
-      *mul = double_int_one;
+      *mul = 1;
        return true;
      }
  
@@ -1616,7 +1806,7 @@ constant_multiple_of (tree top, tree bot, double_int *mul)
        if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
         return false;
  
-      *mul = (res * tree_to_double_int (mby)).sext (precision);
+      *mul = wi::sext (res * wi::to_widest (mby), precision);
        return true;
  
      case PLUS_EXPR:
@@ -1627,69 +1817,51 @@ constant_multiple_of (tree top, tree bot, double_int *mul)
  
        if (code == MINUS_EXPR)
         p1 = -p1;
-      *mul = (p0 + p1).sext (precision);
+      *mul = wi::sext (p0 + p1, precision);
        return true;
  
      case INTEGER_CST:
        if (TREE_CODE (bot) != INTEGER_CST)
         return false;
  
-      p0 = tree_to_double_int (top).sext (precision);
-      p1 = tree_to_double_int (bot).sext (precision);
-      if (p1.is_zero ())
+      p0 = widest_int::from (top, SIGNED);
+      p1 = widest_int::from (bot, SIGNED);
+      if (p1 == 0)
         return false;
-      *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
-      return res.is_zero ();
+      *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
+      return res == 0;
  
      default:
        return false;
      }
  }
  
-/* Returns true if memory reference REF with step STEP may be unaligned.  */
+/* Return true if memory reference REF with step STEP may be unaligned.  */
  
  static bool
  may_be_unaligned_p (tree ref, tree step)
  {
-  tree base;
-  tree base_type;
-  HOST_WIDE_INT bitsize;
-  HOST_WIDE_INT bitpos;
-  tree toffset;
-  enum machine_mode mode;
-  int unsignedp, volatilep;
-  unsigned base_align;
-
    /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
       thus they are not misaligned.  */
    if (TREE_CODE (ref) == TARGET_MEM_REF)
      return false;
  
-  /* The test below is basically copy of what expr.c:normal_inner_ref
-     does to check whether the object must be loaded by parts when
-     STRICT_ALIGNMENT is true.  */
-  base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
-                             &unsignedp, &volatilep, true);
-  base_type = TREE_TYPE (base);
-  base_align = get_object_alignment (base);
-  base_align = MAX (base_align, TYPE_ALIGN (base_type));
-
-  if (mode != BLKmode)
-    {
-      unsigned mode_align = GET_MODE_ALIGNMENT (mode);
+  unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
+  if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
+    align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
  
-      if (base_align < mode_align
-         || (bitpos % mode_align) != 0
-         || (bitpos % BITS_PER_UNIT) != 0)
-       return true;
-
-      if (toffset
-         && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
-       return true;
+  unsigned HOST_WIDE_INT bitpos;
+  unsigned int ref_align;
+  get_object_alignment_1 (ref, &ref_align, &bitpos);
+  if (ref_align < align
+      || (bitpos % align) != 0
+      || (bitpos % BITS_PER_UNIT) != 0)
+    return true;
  
-      if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
-       return true;
-    }
+  unsigned int trailing_zeros = tree_ctz (step);
+  if (trailing_zeros < HOST_BITS_PER_INT
+      && (1U << trailing_zeros) * BITS_PER_UNIT < align)
+    return true;
  
    return false;
  }
@@ -1735,6 +1907,50 @@ may_be_nonaddressable_p (tree expr)
    return false;
  }
  
+static tree
+strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
+
+/* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
+   If there is an existing use which has same stripped iv base and step,
+   this function records this one as a sub use to that; otherwise records
+   it as a normal one.  */
+
+static struct iv_use *
+record_group_use (struct ivopts_data *data, tree *use_p,
+                 struct iv *iv, gimple stmt, enum use_type use_type)
+{
+  unsigned int i;
+  struct iv_use *use;
+  tree addr_base;
+  unsigned HOST_WIDE_INT addr_offset;
+
+  /* Only support sub use for address type uses, that is, with base
+     object.  */
+  if (!iv->base_object)
+    return record_use (data, use_p, iv, stmt, use_type);
+
+  addr_base = strip_offset (iv->base, &addr_offset);
+  for (i = 0; i < n_iv_uses (data); i++)
+    {
+      use = iv_use (data, i);
+      if (use->type != USE_ADDRESS || !use->iv->base_object)
+       continue;
+
+      /* Check if it has the same stripped base and step.  */
+      if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
+         && operand_equal_p (iv->step, use->iv->step, 0)
+         && operand_equal_p (addr_base, use->addr_base, 0))
+       break;
+    }
+
+  if (i == n_iv_uses (data))
+    return record_use (data, use_p, iv, stmt,
+                      use_type, addr_base, addr_offset);
+  else
+    return record_sub_use (data, use_p, iv, stmt,
+                          use_type, addr_base, addr_offset, i);
+}
+
  /* Finds addresses in *OP_P inside STMT.  */
  
  static void
@@ -1845,7 +2061,7 @@ find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p
      }
  
    civ = alloc_iv (base, step);
-  record_use (data, op_p, civ, stmt, USE_ADDRESS);
+  record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
    return;
  
  fail:
@@ -1962,13 +2178,13 @@ find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
  static void
  find_interesting_uses_outside (struct ivopts_data *data, edge exit)
  {
-  gimple phi;
-  gimple_stmt_iterator psi;
+  gphi *phi;
+  gphi_iterator psi;
    tree def;
  
    for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
      {
-      phi = gsi_stmt (psi);
+      phi = psi.phi ();
        def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
        if (!virtual_operand_p (def))
          find_interesting_uses_op (data, def);
@@ -1996,7 +2212,7 @@ find_interesting_uses (struct ivopts_data *data)
        bb = body[i];
  
        FOR_EACH_EDGE (e, ei, bb->succs)
-       if (e->dest != EXIT_BLOCK_PTR
+       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
           find_interesting_uses_outside (data, e);
  
@@ -2031,18 +2247,184 @@ find_interesting_uses (struct ivopts_data *data)
    free (body);
  }
  
+/* Compute maximum offset of [base + offset] addressing mode
+   for memory reference represented by USE.  */
+
+static HOST_WIDE_INT
+compute_max_addr_offset (struct iv_use *use)
+{
+  int width;
+  rtx reg, addr;
+  HOST_WIDE_INT i, off;
+  unsigned list_index, num;
+  addr_space_t as;
+  machine_mode mem_mode, addr_mode;
+  static vec<HOST_WIDE_INT> max_offset_list;
+
+  as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
+  mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
+
+  num = max_offset_list.length ();
+  list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
+  if (list_index >= num)
+    {
+      max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
+      for (; num < max_offset_list.length (); num++)
+       max_offset_list[num] = -1;
+    }
+
+  off = max_offset_list[list_index];
+  if (off != -1)
+    return off;
+
+  addr_mode = targetm.addr_space.address_mode (as);
+  reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
+  addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
+
+  width = GET_MODE_BITSIZE (addr_mode) - 1;
+  if (width > (HOST_BITS_PER_WIDE_INT - 1))
+    width = HOST_BITS_PER_WIDE_INT - 1;
+
+  for (i = width; i > 0; i--)
+    {
+      off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
+      XEXP (addr, 1) = gen_int_mode (off, addr_mode);
+      if (memory_address_addr_space_p (mem_mode, addr, as))
+       break;
+
+      /* For some strict-alignment targets, the offset must be naturally
+        aligned.  Try an aligned offset if mem_mode is not QImode.  */
+      off = ((unsigned HOST_WIDE_INT) 1 << i);
+      if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
+       {
+         off -= GET_MODE_SIZE (mem_mode);
+         XEXP (addr, 1) = gen_int_mode (off, addr_mode);
+         if (memory_address_addr_space_p (mem_mode, addr, as))
+           break;
+       }
+    }
+  if (i == 0)
+    off = 0;
+
+  max_offset_list[list_index] = off;
+  return off;
+}
+
+/* Check if all small groups should be split.  Return true if and
+   only if:
+
+     1) At least one groups contain two uses with different offsets.
+     2) No group contains more than two uses with different offsets.
+
+   Return false otherwise.  We want to split such groups because:
+
+     1) Small groups don't have much benefit and may interfer with
+       general candidate selection.
+     2) Size for problem with only small groups is usually small and
+       general algorithm can handle it well.
+
+   TODO -- Above claim may not hold when auto increment is supported.  */
+
+static bool
+split_all_small_groups (struct ivopts_data *data)
+{
+  bool split_p = false;
+  unsigned int i, n, distinct;
+  struct iv_use *pre, *use;
+
+  n = n_iv_uses (data);
+  for (i = 0; i < n; i++)
+    {
+      use = iv_use (data, i);
+      if (!use->next)
+       continue;
+
+      distinct = 1;
+      gcc_assert (use->type == USE_ADDRESS);
+      for (pre = use, use = use->next; use; pre = use, use = use->next)
+       {
+         if (pre->addr_offset != use->addr_offset)
+           distinct++;
+
+         if (distinct > 2)
+           return false;
+       }
+      if (distinct == 2)
+       split_p = true;
+    }
+
+  return split_p;
+}
+
+/* For each group of address type uses, this function further groups
+   these uses according to the maximum offset supported by target's
+   [base + offset] addressing mode.  */
+
+static void
+group_address_uses (struct ivopts_data *data)
+{
+  HOST_WIDE_INT max_offset = -1;
+  unsigned int i, n, sub_id;
+  struct iv_use *pre, *use;
+  unsigned HOST_WIDE_INT addr_offset_first;
+
+  /* Reset max offset to split all small groups.  */
+  if (split_all_small_groups (data))
+    max_offset = 0;
+
+  n = n_iv_uses (data);
+  for (i = 0; i < n; i++)
+    {
+      use = iv_use (data, i);
+      if (!use->next)
+       continue;
+
+      gcc_assert (use->type == USE_ADDRESS);
+      if (max_offset != 0)
+       max_offset = compute_max_addr_offset (use);
+
+      while (use)
+       {
+         sub_id = 0;
+         addr_offset_first = use->addr_offset;
+         /* Only uses with offset that can fit in offset part against
+            the first use can be grouped together.  */
+         for (pre = use, use = use->next;
+              use && (use->addr_offset - addr_offset_first
+                      <= (unsigned HOST_WIDE_INT) max_offset);
+              pre = use, use = use->next)
+           {
+             use->id = pre->id;
+             use->sub_id = ++sub_id;
+           }
+
+         /* Break the list and create new group.  */
+         if (use)
+           {
+             pre->next = NULL;
+             use->id = n_iv_uses (data);
+             use->related_cands = BITMAP_ALLOC (NULL);
+             data->iv_uses.safe_push (use);
+           }
+       }
+    }
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    dump_uses (dump_file, data);
+}
+
  /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
     is true, assume we are inside an address.  If TOP_COMPREF is true, assume
     we are at the top-level of the processed address.  */
  
  static tree
  strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
-               unsigned HOST_WIDE_INT *offset)
+               HOST_WIDE_INT *offset)
  {
    tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
    enum tree_code code;
    tree type, orig_type = TREE_TYPE (expr);
-  unsigned HOST_WIDE_INT off0, off1, st;
+  HOST_WIDE_INT off0, off1, st;
    tree orig_expr = expr;
  
    STRIP_NOPS (expr);
@@ -2133,19 +2515,32 @@ strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
        break;
  
      case COMPONENT_REF:
-      if (!inside_addr)
-       return orig_expr;
+      {
+       tree field;
  
-      tmp = component_ref_field_offset (expr);
-      if (top_compref
-         && cst_and_fits_in_hwi (tmp))
-       {
-         /* Strip the component reference completely.  */
-         op0 = TREE_OPERAND (expr, 0);
-         op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
-         *offset = off0 + int_cst_value (tmp);
-         return op0;
-       }
+       if (!inside_addr)
+         return orig_expr;
+
+       tmp = component_ref_field_offset (expr);
+       field = TREE_OPERAND (expr, 1);
+       if (top_compref
+           && cst_and_fits_in_hwi (tmp)
+           && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
+         {
+           HOST_WIDE_INT boffset, abs_off;
+
+           /* Strip the component reference completely.  */
+           op0 = TREE_OPERAND (expr, 0);
+           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
+           boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
+           abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
+           if (boffset < 0)
+             abs_off = -abs_off;
+
+           *offset = off0 + int_cst_value (tmp) + abs_off;
+           return op0;
+         }
+      }
        break;
  
      case ADDR_EXPR:
@@ -2196,7 +2591,10 @@ strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
  static tree
  strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
  {
-  return strip_offset_1 (expr, false, false, offset);
+  HOST_WIDE_INT off;
+  tree core = strip_offset_1 (expr, false, false, &off);
+  *offset = off;
+  return core;
  }
  
  /* Returns variant of TYPE that can be used as base for different uses.
@@ -2380,7 +2778,7 @@ add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
                         bool important, struct iv_use *use)
  {
    basic_block use_bb = gimple_bb (use->stmt);
-  enum machine_mode mem_mode;
+  machine_mode mem_mode;
    unsigned HOST_WIDE_INT cstepi;
  
    /* If we insert the increment in any position other than the standard
@@ -2438,6 +2836,8 @@ static void
  add_candidate (struct ivopts_data *data,
                tree base, tree step, bool important, struct iv_use *use)
  {
+  gcc_assert (use == NULL || use->sub_id == 0);
+
    if (ip_normal_pos (data->current_loop))
      add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
    if (ip_end_pos (data->current_loop)
@@ -2495,11 +2895,19 @@ add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
        /* Additionally record the possibility of leaving the original iv
          untouched.  */
        def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
-      cand = add_candidate_1 (data,
-                             iv->base, iv->step, true, IP_ORIGINAL, NULL,
-                             SSA_NAME_DEF_STMT (def));
-      cand->var_before = iv->ssa_name;
-      cand->var_after = def;
+      /* Don't add candidate if it's from another PHI node because
+        it's an affine iv appearing in the form of PEELED_CHREC.  */
+      phi = SSA_NAME_DEF_STMT (def);
+      if (gimple_code (phi) != GIMPLE_PHI)
+       {
+         cand = add_candidate_1 (data,
+                                 iv->base, iv->step, true, IP_ORIGINAL, NULL,
+                                 SSA_NAME_DEF_STMT (def));
+         cand->var_before = iv->ssa_name;
+         cand->var_after = def;
+       }
+      else
+       gcc_assert (gimple_bb (phi) == data->current_loop->header);
      }
  }
  
@@ -2659,11 +3067,22 @@ new_cost (unsigned runtime, unsigned complexity)
    return cost;
  }
  
+/* Returns true if COST is infinite.  */
+
+static bool
+infinite_cost_p (comp_cost cost)
+{
+  return cost.cost == INFTY;
+}
+
  /* Adds costs COST1 and COST2.  */
  
  static comp_cost
  add_costs (comp_cost cost1, comp_cost cost2)
  {
+  if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
+    return infinite_cost;
+
    cost1.cost += cost2.cost;
    cost1.complexity += cost2.complexity;
  
@@ -2692,14 +3111,6 @@ compare_costs (comp_cost cost1, comp_cost cost2)
    return cost1.cost - cost2.cost;
  }
  
-/* Returns true if COST is infinite.  */
-
-static bool
-infinite_cost_p (comp_cost cost)
-{
-  return cost.cost == INFTY;
-}
-
  /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
     on invariants DEPENDS_ON and that the value used in expressing it
     is VALUE, and in case of iv elimination the comparison operator is COMP.  */
@@ -2786,32 +3197,12 @@ get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
    return NULL;
  }
  
-/* Returns estimate on cost of computing SEQ.  */
-
-static unsigned
-seq_cost (rtx seq, bool speed)
-{
-  unsigned cost = 0;
-  rtx set;
-
-  for (; seq; seq = NEXT_INSN (seq))
-    {
-      set = single_set (seq);
-      if (set)
-       cost += set_src_cost (SET_SRC (set), speed);
-      else
-       cost++;
-    }
-
-  return cost;
-}
-
  /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
  static rtx
  produce_memory_decl_rtl (tree obj, int *regno)
  {
    addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
-  enum machine_mode address_mode = targetm.addr_space.address_mode (as);
+  machine_mode address_mode = targetm.addr_space.address_mode (as);
    rtx x;
  
    gcc_assert (obj);
@@ -2900,12 +3291,13 @@ prepare_decl_rtl (tree *expr_p, int *ws, void *data)
  static unsigned
  computation_cost (tree expr, bool speed)
  {
-  rtx seq, rslt;
+  rtx_insn *seq;
+  rtx rslt;
    tree type = TREE_TYPE (expr);
    unsigned cost;
    /* Avoid using hard regs in ways which may be unsupported.  */
    int regno = LAST_VIRTUAL_REGISTER + 1;
-  struct cgraph_node *node = cgraph_get_node (current_function_decl);
+  struct cgraph_node *node = cgraph_node::get (current_function_decl);
    enum node_frequency real_frequency = node->frequency;
  
    node->frequency = NODE_FREQUENCY_NORMAL;
@@ -2982,7 +3374,7 @@ determine_common_wider_type (tree *a, tree *b)
  static bool
  get_computation_aff (struct loop *loop,
                      struct iv_use *use, struct iv_cand *cand, gimple at,
-                    struct affine_tree_combination *aff)
+                    struct aff_tree *aff)
  {
    tree ubase = use->iv->base;
    tree ustep = use->iv->step;
@@ -2992,7 +3384,7 @@ get_computation_aff (struct loop *loop,
    tree common_type, var;
    tree uutype;
    aff_tree cbase_aff, var_aff;
-  double_int rat;
+  widest_int rat;
  
    if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
      {
@@ -3118,7 +3510,7 @@ adjust_setup_cost (struct ivopts_data *data, unsigned cost)
  
  
  bool
-multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
+multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
                                  addr_space_t as)
  {
  #define MAX_RATIO 128
@@ -3132,18 +3524,21 @@ multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
    valid_mult = valid_mult_list[data_index];
    if (!valid_mult)
      {
-      enum machine_mode address_mode = targetm.addr_space.address_mode (as);
+      machine_mode address_mode = targetm.addr_space.address_mode (as);
        rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
-      rtx addr;
+      rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
+      rtx addr, scaled;
        HOST_WIDE_INT i;
  
        valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
        bitmap_clear (valid_mult);
-      addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
+      scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
+      addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
        for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
         {
-         XEXP (addr, 1) = gen_int_mode (i, address_mode);
-         if (memory_address_addr_space_p (mode, addr, as))
+         XEXP (scaled, 1) = gen_int_mode (i, address_mode);
+         if (memory_address_addr_space_p (mode, addr, as)
+             || memory_address_addr_space_p (mode, scaled, as))
             bitmap_set_bit (valid_mult, i + MAX_RATIO);
         }
  
@@ -3180,27 +3575,38 @@ multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
  
     TODO -- there must be some better way.  This all is quite crude.  */
  
+enum ainc_type
+{
+  AINC_PRE_INC,                /* Pre increment.  */
+  AINC_PRE_DEC,                /* Pre decrement.  */
+  AINC_POST_INC,       /* Post increment.  */
+  AINC_POST_DEC,       /* Post decrement.  */
+  AINC_NONE            /* Also the number of auto increment types.  */
+};
+
  typedef struct address_cost_data_s
  {
    HOST_WIDE_INT min_offset, max_offset;
    unsigned costs[2][2][2][2];
+  unsigned ainc_costs[AINC_NONE];
  } *address_cost_data;
  
  
  static comp_cost
  get_address_cost (bool symbol_present, bool var_present,
                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
-                 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
+                 HOST_WIDE_INT cstep, machine_mode mem_mode,
                   addr_space_t as, bool speed,
                   bool stmt_after_inc, bool *may_autoinc)
  {
-  enum machine_mode address_mode = targetm.addr_space.address_mode (as);
+  machine_mode address_mode = targetm.addr_space.address_mode (as);
    static vec<address_cost_data> address_cost_data_list;
    unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
    address_cost_data data;
    static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
    static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
    unsigned cost, acost, complexity;
+  enum ainc_type autoinc_type;
    bool offset_p, ratio_p, autoinc;
    HOST_WIDE_INT s_offset, autoinc_offset, msize;
    unsigned HOST_WIDE_INT mask;
@@ -3216,7 +3622,8 @@ get_address_cost (bool symbol_present, bool var_present,
        HOST_WIDE_INT rat, off = 0;
        int old_cse_not_expected, width;
        unsigned sym_p, var_p, off_p, rat_p, add_c;
-      rtx seq, addr, base;
+      rtx_insn *seq;
+      rtx addr, base;
        rtx reg0, reg1;
  
        data = (address_cost_data) xcalloc (1, sizeof (*data));
@@ -3243,6 +3650,18 @@ get_address_cost (bool symbol_present, bool var_present,
           XEXP (addr, 1) = gen_int_mode (off, address_mode);
           if (memory_address_addr_space_p (mem_mode, addr, as))
             break;
+         /* For some strict-alignment targets, the offset must be naturally
+            aligned.  Try an aligned offset if mem_mode is not QImode.  */
+         off = mem_mode != QImode
+               ? ((unsigned HOST_WIDE_INT) 1 << i)
+                   - GET_MODE_SIZE (mem_mode)
+               : 0;
+         if (off > 0)
+           {
+             XEXP (addr, 1) = gen_int_mode (off, address_mode);
+             if (memory_address_addr_space_p (mem_mode, addr, as))
+               break;
+           }
         }
        if (i == -1)
          off = 0;
@@ -3272,33 +3691,49 @@ get_address_cost (bool symbol_present, bool var_present,
        reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
        reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
  
-      if (USE_LOAD_PRE_DECREMENT (mem_mode) 
+      if (USE_LOAD_PRE_DECREMENT (mem_mode)
           || USE_STORE_PRE_DECREMENT (mem_mode))
         {
           addr = gen_rtx_PRE_DEC (address_mode, reg0);
           has_predec[mem_mode]
             = memory_address_addr_space_p (mem_mode, addr, as);
+
+         if (has_predec[mem_mode])
+           data->ainc_costs[AINC_PRE_DEC]
+             = address_cost (addr, mem_mode, as, speed);
         }
-      if (USE_LOAD_POST_DECREMENT (mem_mode) 
+      if (USE_LOAD_POST_DECREMENT (mem_mode)
           || USE_STORE_POST_DECREMENT (mem_mode))
         {
           addr = gen_rtx_POST_DEC (address_mode, reg0);
           has_postdec[mem_mode]
             = memory_address_addr_space_p (mem_mode, addr, as);
+
+         if (has_postdec[mem_mode])
+           data->ainc_costs[AINC_POST_DEC]
+             = address_cost (addr, mem_mode, as, speed);
         }
-      if (USE_LOAD_PRE_INCREMENT (mem_mode) 
+      if (USE_LOAD_PRE_INCREMENT (mem_mode)
           || USE_STORE_PRE_DECREMENT (mem_mode))
         {
           addr = gen_rtx_PRE_INC (address_mode, reg0);
           has_preinc[mem_mode]
             = memory_address_addr_space_p (mem_mode, addr, as);
+
+         if (has_preinc[mem_mode])
+           data->ainc_costs[AINC_PRE_INC]
+             = address_cost (addr, mem_mode, as, speed);
         }
-      if (USE_LOAD_POST_INCREMENT (mem_mode) 
+      if (USE_LOAD_POST_INCREMENT (mem_mode)
           || USE_STORE_POST_INCREMENT (mem_mode))
         {
           addr = gen_rtx_POST_INC (address_mode, reg0);
           has_postinc[mem_mode]
             = memory_address_addr_space_p (mem_mode, addr, as);
+
+         if (has_postinc[mem_mode])
+           data->ainc_costs[AINC_POST_INC]
+             = address_cost (addr, mem_mode, as, speed);
         }
        for (i = 0; i < 16; i++)
         {
@@ -3424,21 +3859,31 @@ get_address_cost (bool symbol_present, bool var_present,
    s_offset = offset;
  
    autoinc = false;
+  autoinc_type = AINC_NONE;
    msize = GET_MODE_SIZE (mem_mode);
    autoinc_offset = offset;
    if (stmt_after_inc)
      autoinc_offset += ratio * cstep;
    if (symbol_present || var_present || ratio != 1)
      autoinc = false;
-  else if ((has_postinc[mem_mode] && autoinc_offset == 0
-              && msize == cstep)
-          || (has_postdec[mem_mode] && autoinc_offset == 0
+  else
+    {
+      if (has_postinc[mem_mode] && autoinc_offset == 0
+         && msize == cstep)
+       autoinc_type = AINC_POST_INC;
+      else if (has_postdec[mem_mode] && autoinc_offset == 0
                && msize == -cstep)
-          || (has_preinc[mem_mode] && autoinc_offset == msize
+       autoinc_type = AINC_POST_DEC;
+      else if (has_preinc[mem_mode] && autoinc_offset == msize
                && msize == cstep)
-          || (has_predec[mem_mode] && autoinc_offset == -msize
-              && msize == -cstep))
-    autoinc = true;
+       autoinc_type = AINC_PRE_INC;
+      else if (has_predec[mem_mode] && autoinc_offset == -msize
+              && msize == -cstep)
+       autoinc_type = AINC_PRE_DEC;
+
+      if (autoinc_type != AINC_NONE)
+       autoinc = true;
+    }
  
    cost = 0;
    offset_p = (s_offset != 0
@@ -3455,7 +3900,10 @@ get_address_cost (bool symbol_present, bool var_present,
  
    if (may_autoinc)
      *may_autoinc = autoinc;
-  acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
+  if (autoinc)
+    acost = data->ainc_costs[autoinc_type];
+  else
+    acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
    complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
    return new_cost (cost + acost, complexity);
  }
@@ -3466,7 +3914,7 @@ get_address_cost (bool symbol_present, bool var_present,
      the cost in COST.  */
  
  static bool
-get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
+get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
                     comp_cost cost1, tree mult, bool speed, comp_cost *cost)
  {
    comp_cost res;
@@ -3475,18 +3923,26 @@ get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
    tree multop = TREE_OPERAND (mult, 0);
    int m = exact_log2 (int_cst_value (cst));
    int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
-  int sa_cost;
+  int as_cost, sa_cost;
+  bool mult_in_op1;
  
    if (!(m >= 0 && m < maxm))
      return false;
  
+  mult_in_op1 = operand_equal_p (op1, mult, 0);
+
+  as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
+
+  /* If the target has a cheap shift-and-add or shift-and-sub instruction,
+     use that in preference to a shift insn followed by an add insn.  */
    sa_cost = (TREE_CODE (expr) != MINUS_EXPR
               ? shiftadd_cost (speed, mode, m)
-             : (mult == op1
+             : (mult_in_op1
                  ? shiftsub1_cost (speed, mode, m)
                  : shiftsub0_cost (speed, mode, m)));
-  res = new_cost (sa_cost, 0);
-  res = add_costs (res, mult == op1 ? cost0 : cost1);
+
+  res = new_cost (MIN (as_cost, sa_cost), 0);
+  res = add_costs (res, mult_in_op1 ? cost0 : cost1);
  
    STRIP_NOPS (multop);
    if (!is_gimple_val (multop))
@@ -3507,7 +3963,7 @@ force_expr_to_var_cost (tree expr, bool speed)
    static unsigned address_cost [2];
    tree op0, op1;
    comp_cost cost0, cost1, cost;
-  enum machine_mode mode;
+  machine_mode mode;
  
    if (!costs_initialized)
      {
@@ -3580,30 +4036,13 @@ force_expr_to_var_cost (tree expr, bool speed)
        op1 = TREE_OPERAND (expr, 1);
        STRIP_NOPS (op0);
        STRIP_NOPS (op1);
-
-      if (is_gimple_val (op0))
-       cost0 = no_cost;
-      else
-       cost0 = force_expr_to_var_cost (op0, speed);
-
-      if (is_gimple_val (op1))
-       cost1 = no_cost;
-      else
-       cost1 = force_expr_to_var_cost (op1, speed);
-
        break;
  
+    CASE_CONVERT:
      case NEGATE_EXPR:
        op0 = TREE_OPERAND (expr, 0);
        STRIP_NOPS (op0);
        op1 = NULL_TREE;
-
-      if (is_gimple_val (op0))
-       cost0 = no_cost;
-      else
-       cost0 = force_expr_to_var_cost (op0, speed);
-
-      cost1 = no_cost;
        break;
  
      default:
@@ -3611,6 +4050,18 @@ force_expr_to_var_cost (tree expr, bool speed)
        return new_cost (target_spill_cost[speed], 0);
      }
  
+  if (op0 == NULL_TREE
+      || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
+    cost0 = no_cost;
+  else
+    cost0 = force_expr_to_var_cost (op0, speed);
+
+  if (op1 == NULL_TREE
+      || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
+    cost1 = no_cost;
+  else
+    cost1 = force_expr_to_var_cost (op1, speed);
+
    mode = TYPE_MODE (TREE_TYPE (expr));
    switch (TREE_CODE (expr))
      {
@@ -3636,6 +4087,16 @@ force_expr_to_var_cost (tree expr, bool speed)
          }
        break;
  
+    CASE_CONVERT:
+      {
+       tree inner_mode, outer_mode;
+       outer_mode = TREE_TYPE (expr);
+       inner_mode = TREE_TYPE (op0);
+       cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
+                                      TYPE_MODE (inner_mode), speed), 0);
+      }
+      break;
+
      case MULT_EXPR:
        if (cst_and_fits_in_hwi (op0))
         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
@@ -3694,7 +4155,7 @@ split_address_cost (struct ivopts_data *data,
    HOST_WIDE_INT bitsize;
    HOST_WIDE_INT bitpos;
    tree toffset;
-  enum machine_mode mode;
+  machine_mode mode;
    int unsignedp, volatilep;
  
    core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
@@ -3760,7 +4221,7 @@ ptr_difference_cost (struct ivopts_data *data,
    type = signed_type_for (TREE_TYPE (e1));
    tree_to_aff_combination (e1, type, &aff_e1);
    tree_to_aff_combination (e2, type, &aff_e2);
-  aff_combination_scale (&aff_e2, double_int_minus_one);
+  aff_combination_scale (&aff_e2, -1);
    aff_combination_add (&aff_e1, &aff_e2);
  
    return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
@@ -3777,7 +4238,7 @@ difference_cost (struct ivopts_data *data,
                  tree e1, tree e2, bool *symbol_present, bool *var_present,
                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
  {
-  enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
+  machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
    unsigned HOST_WIDE_INT off1, off2;
    aff_tree aff_e1, aff_e2;
    tree type;
@@ -3815,7 +4276,7 @@ difference_cost (struct ivopts_data *data,
    type = signed_type_for (TREE_TYPE (e1));
    tree_to_aff_combination (e1, type, &aff_e1);
    tree_to_aff_combination (e2, type, &aff_e2);
-  aff_combination_scale (&aff_e2, double_int_minus_one);
+  aff_combination_scale (&aff_e2, -1);
    aff_combination_add (&aff_e1, &aff_e2);
  
    return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
@@ -3852,7 +4313,7 @@ get_expr_id (struct ivopts_data *data, tree expr)
  
    ent.expr = expr;
    ent.hash = iterative_hash_expr (expr, 0);
-  slot = data->inv_expr_tab.find_slot (&ent, INSERT);
+  slot = data->inv_expr_tab->find_slot (&ent, INSERT);
    if (*slot)
      return (*slot)->id;
  
@@ -3920,7 +4381,7 @@ get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
  
    if (ratio == 1)
      {
-      if(operand_equal_p (ubase, cbase, 0))
+      if (operand_equal_p (ubase, cbase, 0))
          return -1;
  
        if (TREE_CODE (ubase) == ADDR_EXPR
@@ -3934,16 +4395,16 @@ get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
              {
                tree ind = TREE_OPERAND (usym, 1);
                if (TREE_CODE (ind) == INTEGER_CST
-                  && host_integerp (ind, 0)
-                  && TREE_INT_CST_LOW (ind) == 0)
+                  && tree_fits_shwi_p (ind)
+                  && tree_to_shwi (ind) == 0)
                  usym = TREE_OPERAND (usym, 0);
              }
            if (TREE_CODE (csym) == ARRAY_REF)
              {
                tree ind = TREE_OPERAND (csym, 1);
                if (TREE_CODE (ind) == INTEGER_CST
-                  && host_integerp (ind, 0)
-                  && TREE_INT_CST_LOW (ind) == 0)
+                  && tree_fits_shwi_p (ind)
+                  && tree_to_shwi (ind) == 0)
                  csym = TREE_OPERAND (csym, 0);
              }
            if (operand_equal_p (usym, csym, 0))
@@ -3959,7 +4420,7 @@ get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
    tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
    tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
  
-  aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
+  aff_combination_scale (&cbase_aff, -1 * ratio);
    aff_combination_add (&ubase_aff, &cbase_aff);
    expr = aff_combination_to_tree (&ubase_aff);
    return get_expr_id (data, expr);
@@ -3989,9 +4450,9 @@ get_computation_cost_at (struct ivopts_data *data,
    HOST_WIDE_INT ratio, aratio;
    bool var_present, symbol_present, stmt_is_after_inc;
    comp_cost cost;
-  double_int rat;
+  widest_int rat;
    bool speed = optimize_bb_for_speed_p (gimple_bb (at));
-  enum machine_mode mem_mode = (address_p
+  machine_mode mem_mode = (address_p
                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
                                 : VOIDmode);
  
@@ -4048,7 +4509,7 @@ get_computation_cost_at (struct ivopts_data *data,
    if (!constant_multiple_of (ustep, cstep, &rat))
      return infinite_cost;
  
-  if (rat.fits_shwi ())
+  if (wi::fits_shwi_p (rat))
      ratio = rat.to_shwi ();
    else
      return infinite_cost;
@@ -4067,7 +4528,7 @@ get_computation_cost_at (struct ivopts_data *data,
  
    if (cst_and_fits_in_hwi (cbase))
      {
-      offset = - ratio * int_cst_value (cbase);
+      offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
        cost = difference_cost (data,
                               ubase, build_int_cst (utype, 0),
                               &symbol_present, &var_present, &offset,
@@ -4124,7 +4585,15 @@ get_computation_cost_at (struct ivopts_data *data,
        cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
      }
  
-  if (inv_expr_id)
+  /* Set of invariants depended on by sub use has already been computed
+     for the first use in the group.  */
+  if (use->sub_id)
+    {
+      cost.cost = 0;
+      if (depends_on && *depends_on)
+       bitmap_clear (*depends_on);
+    }
+  else if (inv_expr_id)
      {
        *inv_expr_id =
            get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
@@ -4253,6 +4722,8 @@ determine_use_iv_cost_address (struct ivopts_data *data,
    bitmap depends_on;
    bool can_autoinc;
    int inv_expr_id = -1;
+  struct iv_use *sub_use;
+  comp_cost sub_cost;
    comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
                                          &can_autoinc, &inv_expr_id);
  
@@ -4266,6 +4737,15 @@ determine_use_iv_cost_address (struct ivopts_data *data,
        else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
         cost = infinite_cost;
      }
+  for (sub_use = use->next;
+       sub_use && !infinite_cost_p (cost);
+       sub_use = sub_use->next)
+    {
+       sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
+                                       &can_autoinc, &inv_expr_id);
+       cost = add_costs (cost, sub_cost);
+    }
+
    set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
                     inv_expr_id);
  
@@ -4285,8 +4765,10 @@ cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
    tree steptype = type;
    if (POINTER_TYPE_P (type))
      steptype = sizetype;
+  steptype = unsigned_type_for (type);
  
-  tree_to_aff_combination (iv->step, steptype, &step);
+  tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
+  aff_combination_convert (&step, steptype);
    tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
    aff_combination_convert (&nit, steptype);
    aff_combination_mult (&nit, &step, &delta);
@@ -4294,6 +4776,8 @@ cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
      aff_combination_add (&delta, &step);
  
    tree_to_aff_combination (iv->base, type, val);
+  if (!POINTER_TYPE_P (type))
+    aff_combination_convert (val, steptype);
    aff_combination_add (val, &delta);
  }
  
@@ -4320,7 +4804,7 @@ iv_period (struct iv *iv)
  
    period = build_low_bits_mask (type,
                                  (TYPE_PRECISION (type)
-                                 - tree_low_cst (pow2div, 1)));
+                                 - tree_to_uhwi (pow2div)));
  
    return period;
  }
@@ -4342,75 +4826,20 @@ iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
    return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
  }
  
-static tree
-strip_wrap_conserving_type_conversions (tree exp)
-{
-  while (tree_ssa_useless_type_conversion (exp)
-        && (nowrap_type_p (TREE_TYPE (exp))
-            == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
-    exp = TREE_OPERAND (exp, 0);
-  return exp;
-}
-
-/* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
-   check for an exact match.  */
-
-static bool
-expr_equal_p (tree e, tree what)
-{
-  gimple stmt;
-  enum tree_code code;
-
-  e = strip_wrap_conserving_type_conversions (e);
-  what = strip_wrap_conserving_type_conversions (what);
-
-  code = TREE_CODE (what);
-  if (TREE_TYPE (e) != TREE_TYPE (what))
-    return false;
-
-  if (operand_equal_p (e, what, 0))
-    return true;
-
-  if (TREE_CODE (e) != SSA_NAME)
-    return false;
-
-  stmt = SSA_NAME_DEF_STMT (e);
-  if (gimple_code (stmt) != GIMPLE_ASSIGN
-      || gimple_assign_rhs_code (stmt) != code)
-    return false;
-
-  switch (get_gimple_rhs_class (code))
-    {
-    case GIMPLE_BINARY_RHS:
-      if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
-       return false;
-      /* Fallthru.  */
-
-    case GIMPLE_UNARY_RHS:
-    case GIMPLE_SINGLE_RHS:
-      return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
-    default:
-      return false;
-    }
-}
-
  /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
     we only detect the situation that BASE = SOMETHING + OFFSET, where the
     calculation is performed in non-wrapping type.
  
     TODO: More generally, we could test for the situation that
          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
-        This would require knowing the sign of OFFSET.
-
-        Also, we only look for the first addition in the computation of BASE.
-        More complex analysis would be better, but introducing it just for
-        this optimization seems like an overkill.  */
+        This would require knowing the sign of OFFSET.  */
  
  static bool
-difference_cannot_overflow_p (tree base, tree offset)
+difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
  {
    enum tree_code code;
    tree e1, e2;
+  aff_tree aff_e1, aff_e2, aff_offset;
  
    if (!nowrap_type_p (TREE_TYPE (base)))
      return false;
@@ -4440,13 +4869,27 @@ difference_cannot_overflow_p (tree base, tree offset)
        e2 = TREE_OPERAND (base, 1);
      }
  
-  /* TODO: deeper inspection may be necessary to prove the equality.  */
+  /* Use affine expansion as deeper inspection to prove the equality.  */
+  tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
+                                 &aff_e2, &data->name_expansion_cache);
+  tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
+                                 &aff_offset, &data->name_expansion_cache);
+  aff_combination_scale (&aff_offset, -1);
    switch (code)
      {
      case PLUS_EXPR:
-      return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
+      aff_combination_add (&aff_e2, &aff_offset);
+      if (aff_combination_zero_p (&aff_e2))
+       return true;
+
+      tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
+                                     &aff_e1, &data->name_expansion_cache);
+      aff_combination_add (&aff_e1, &aff_offset);
+      return aff_combination_zero_p (&aff_e1);
+
      case POINTER_PLUS_EXPR:
-      return expr_equal_p (e2, offset);
+      aff_combination_add (&aff_e2, &aff_offset);
+      return aff_combination_zero_p (&aff_e2);
  
      default:
        return false;
@@ -4498,7 +4941,7 @@ iv_elimination_compare_lt (struct ivopts_data *data,
                            struct tree_niter_desc *niter)
  {
    tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
-  struct affine_tree_combination nit, tmpa, tmpb;
+  struct aff_tree nit, tmpa, tmpb;
    enum tree_code comp;
    HOST_WIDE_INT step;
  
@@ -4558,11 +5001,11 @@ iv_elimination_compare_lt (struct ivopts_data *data,
    tree_to_aff_combination (niter->niter, nit_type, &nit);
    tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
    tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
-  aff_combination_scale (&nit, double_int_minus_one);
-  aff_combination_scale (&tmpa, double_int_minus_one);
+  aff_combination_scale (&nit, -1);
+  aff_combination_scale (&tmpa, -1);
    aff_combination_add (&tmpb, &tmpa);
    aff_combination_add (&tmpb, &nit);
-  if (tmpb.n != 0 || tmpb.offset != double_int_one)
+  if (tmpb.n != 0 || tmpb.offset != 1)
      return false;
  
    /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
@@ -4570,7 +5013,7 @@ iv_elimination_compare_lt (struct ivopts_data *data,
    offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
                         cand->iv->step,
                         fold_convert (TREE_TYPE (cand->iv->step), a));
-  if (!difference_cannot_overflow_p (cand->iv->base, offset))
+  if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
      return false;
  
    /* Determine the new comparison operator.  */
@@ -4648,13 +5091,13 @@ may_eliminate_iv (struct ivopts_data *data,
       entire loop and compare against that instead.  */
    else
      {
-      double_int period_value, max_niter;
+      widest_int period_value, max_niter;
  
        max_niter = desc->max;
        if (stmt_after_increment (loop, cand, use->stmt))
-        max_niter += double_int_one;
-      period_value = tree_to_double_int (period);
-      if (max_niter.ugt (period_value))
+        max_niter += 1;
+      period_value = wi::to_widest (period);
+      if (wi::gtu_p (max_niter, period_value))
          {
            /* See if we can take advantage of inferred loop bound information.  */
            if (data->loop_single_exit_p)
@@ -4662,7 +5105,7 @@ may_eliminate_iv (struct ivopts_data *data,
                if (!max_loop_iterations (loop, &max_niter))
                  return false;
                /* The loop bound is already adjusted by adding 1.  */
-              if (max_niter.ugt (period_value))
+              if (wi::gtu_p (max_niter, period_value))
                  return false;
              }
            else
@@ -4672,7 +5115,8 @@ may_eliminate_iv (struct ivopts_data *data,
  
    cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
  
-  *bound = aff_combination_to_tree (&bnd);
+  *bound = fold_convert (TREE_TYPE (cand->iv->base),
+                        aff_combination_to_tree (&bnd));
    *comp = iv_elimination_compare (data, use);
  
    /* It is unlikely that computing the number of iterations using division
@@ -5099,8 +5543,8 @@ static void
  determine_set_costs (struct ivopts_data *data)
  {
    unsigned j, n;
-  gimple phi;
-  gimple_stmt_iterator psi;
+  gphi *phi;
+  gphi_iterator psi;
    tree op;
    struct loop *loop = data->current_loop;
    bitmap_iterator bi;
@@ -5117,7 +5561,7 @@ determine_set_costs (struct ivopts_data *data)
    n = 0;
    for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
      {
-      phi = gsi_stmt (psi);
+      phi = psi.phi ();
        op = PHI_RESULT (phi);
  
        if (virtual_operand_p (op))
@@ -5331,36 +5775,40 @@ iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
  }
  
  /* Extend set IVS by expressing USE by some of the candidates in it
-   if possible. All important candidates will be considered
-   if IMPORTANT_CANDIDATES is true.  */
+   if possible.  Consider all important candidates if candidates in
+   set IVS don't give any result.  */
  
  static void
  iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
-              struct iv_use *use, bool important_candidates)
+              struct iv_use *use)
  {
    struct cost_pair *best_cp = NULL, *cp;
    bitmap_iterator bi;
-  bitmap cands;
    unsigned i;
+  struct iv_cand *cand;
  
    gcc_assert (ivs->upto >= use->id);
+  ivs->upto++;
+  ivs->bad_uses++;
  
-  if (ivs->upto == use->id)
-    {
-      ivs->upto++;
-      ivs->bad_uses++;
-    }
-
-  cands = (important_candidates ? data->important_candidates : ivs->cands);
-  EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
+  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
      {
-      struct iv_cand *cand = iv_cand (data, i);
-
+      cand = iv_cand (data, i);
        cp = get_use_iv_cost (data, use, cand);
-
        if (cheaper_cost_pair (cp, best_cp))
         best_cp = cp;
      }
+   
+  if (best_cp == NULL)
+    {
+      EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
+       {
+         cand = iv_cand (data, i);
+         cp = get_use_iv_cost (data, use, cand);
+         if (cheaper_cost_pair (cp, best_cp))
+           best_cp = cp;
+       }
+    }
  
    iv_ca_set_cp (data, ivs, use, best_cp);
  }
@@ -5442,7 +5890,6 @@ static struct iv_ca_delta *
  iv_ca_delta_reverse (struct iv_ca_delta *delta)
  {
    struct iv_ca_delta *act, *next, *prev = NULL;
-  struct cost_pair *tmp;
  
    for (act = delta; act; act = next)
      {
@@ -5450,9 +5897,7 @@ iv_ca_delta_reverse (struct iv_ca_delta *delta)
        act->next_change = prev;
        prev = act;
  
-      tmp = act->old_cp;
-      act->old_cp = act->new_cp;
-      act->new_cp = tmp;
+      std::swap (act->old_cp, act->new_cp);
      }
  
    return prev;
@@ -5635,18 +6080,20 @@ iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
  }
  
  /* Try narrowing set IVS by removing CAND.  Return the cost of
-   the new set and store the differences in DELTA.  */
+   the new set and store the differences in DELTA.  START is
+   the candidate with which we start narrowing.  */
  
  static comp_cost
  iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
-             struct iv_cand *cand, struct iv_ca_delta **delta)
+             struct iv_cand *cand, struct iv_cand *start,
+             struct iv_ca_delta **delta)
  {
    unsigned i, ci;
    struct iv_use *use;
    struct cost_pair *old_cp, *new_cp, *cp;
    bitmap_iterator bi;
    struct iv_cand *cnd;
-  comp_cost cost;
+  comp_cost cost, best_cost, acost;
  
    *delta = NULL;
    for (i = 0; i < n_iv_uses (data); i++)
@@ -5657,13 +6104,15 @@ iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
        if (old_cp->cand != cand)
         continue;
  
-      new_cp = NULL;
+      best_cost = iv_ca_cost (ivs);
+      /* Start narrowing with START.  */
+      new_cp = get_use_iv_cost (data, use, start);
  
        if (data->consider_all_candidates)
         {
           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
             {
-             if (ci == cand->id)
+             if (ci == cand->id || (start && ci == start->id))
                 continue;
  
               cnd = iv_cand (data, ci);
@@ -5672,20 +6121,21 @@ iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
               if (!cp)
                 continue;
  
-             if (!iv_ca_has_deps (ivs, cp))
-                continue; 
-
-             if (!cheaper_cost_pair (cp, new_cp))
-               continue;
+             iv_ca_set_cp (data, ivs, use, cp);
+             acost = iv_ca_cost (ivs);
  
-             new_cp = cp;
+             if (compare_costs (acost, best_cost) < 0)
+               {
+                 best_cost = acost;
+                 new_cp = cp;
+               }
             }
         }
        else
         {
           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
             {
-             if (ci == cand->id)
+             if (ci == cand->id || (start && ci == start->id))
                 continue;
  
               cnd = iv_cand (data, ci);
@@ -5693,15 +6143,19 @@ iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
               cp = get_use_iv_cost (data, use, cnd);
               if (!cp)
                 continue;
-             if (!iv_ca_has_deps (ivs, cp))
-               continue;
  
-             if (!cheaper_cost_pair (cp, new_cp))
-               continue;
+             iv_ca_set_cp (data, ivs, use, cp);
+             acost = iv_ca_cost (ivs);
  
-             new_cp = cp;
+             if (compare_costs (acost, best_cost) < 0)
+               {
+                 best_cost = acost;
+                 new_cp = cp;
+               }
             }
         }
+      /* Restore to old cp for use.  */
+      iv_ca_set_cp (data, ivs, use, old_cp);
  
        if (!new_cp)
         {
@@ -5743,7 +6197,7 @@ iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
        if (cand == except_cand)
         continue;
  
-      acost = iv_ca_narrow (data, ivs, cand, &act_delta);
+      acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
  
        if (compare_costs (acost, best_cost) < 0)
         {
@@ -5769,6 +6223,108 @@ iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
    return best_cost;
  }
  
+/* Check if CAND_IDX is a candidate other than OLD_CAND and has
+   cheaper local cost for USE than BEST_CP.  Return pointer to
+   the corresponding cost_pair, otherwise just return BEST_CP.  */
+
+static struct cost_pair*
+cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
+                       unsigned int cand_idx, struct iv_cand *old_cand,
+                       struct cost_pair *best_cp)
+{
+  struct iv_cand *cand;
+  struct cost_pair *cp;
+
+  gcc_assert (old_cand != NULL && best_cp != NULL);
+  if (cand_idx == old_cand->id)
+    return best_cp;
+
+  cand = iv_cand (data, cand_idx);
+  cp = get_use_iv_cost (data, use, cand);
+  if (cp != NULL && cheaper_cost_pair (cp, best_cp))
+    return cp;
+
+  return best_cp;
+}
+
+/* Try breaking local optimal fixed-point for IVS by replacing candidates
+   which are used by more than one iv uses.  For each of those candidates,
+   this function tries to represent iv uses under that candidate using
+   other ones with lower local cost, then tries to prune the new set.
+   If the new set has lower cost, It returns the new cost after recording
+   candidate replacement in list DELTA.  */
+
+static comp_cost
+iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
+              struct iv_ca_delta **delta)
+{
+  bitmap_iterator bi, bj;
+  unsigned int i, j, k;
+  struct iv_use *use;
+  struct iv_cand *cand;
+  comp_cost orig_cost, acost;
+  struct iv_ca_delta *act_delta, *tmp_delta;
+  struct cost_pair *old_cp, *best_cp = NULL;
+
+  *delta = NULL;
+  orig_cost = iv_ca_cost (ivs);
+
+  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
+    {
+      if (ivs->n_cand_uses[i] == 1
+         || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
+       continue;
+
+      cand = iv_cand (data, i);
+  
+      act_delta = NULL;
+      /*  Represent uses under current candidate using other ones with
+         lower local cost.  */
+      for (j = 0; j < ivs->upto; j++)
+       {
+         use = iv_use (data, j);
+         old_cp = iv_ca_cand_for_use (ivs, use);
+
+         if (old_cp->cand != cand)
+           continue;
+
+         best_cp = old_cp;
+         if (data->consider_all_candidates)
+           for (k = 0; k < n_iv_cands (data); k++)
+             best_cp = cheaper_cost_with_cand (data, use, k,
+                                               old_cp->cand, best_cp);
+         else
+           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
+             best_cp = cheaper_cost_with_cand (data, use, k,
+                                               old_cp->cand, best_cp);
+
+         if (best_cp == old_cp)
+           continue;
+
+         act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
+       }
+      /* No need for further prune.  */
+      if (!act_delta)
+       continue;
+
+      /* Prune the new candidate set.  */
+      iv_ca_delta_commit (data, ivs, act_delta, true);
+      acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
+      iv_ca_delta_commit (data, ivs, act_delta, false);
+      act_delta = iv_ca_delta_join (act_delta, tmp_delta);
+
+      if (compare_costs (acost, orig_cost) < 0)
+       {
+         *delta = act_delta;
+         return acost;
+       }
+      else
+       iv_ca_delta_free (&act_delta);
+    }
+
+  return orig_cost;
+}
+
  /* Tries to extend the sets IVS in the best possible way in order
     to express the USE.  If ORIGINALP is true, prefer candidates from
     the original set of IVs, otherwise favor important candidates not
@@ -5785,18 +6341,9 @@ try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
    struct iv_ca_delta *best_delta = NULL, *act_delta;
    struct cost_pair *cp;
  
-  iv_ca_add_use (data, ivs, use, false);
+  iv_ca_add_use (data, ivs, use);
    best_cost = iv_ca_cost (ivs);
-
    cp = iv_ca_cand_for_use (ivs, use);
-  if (!cp)
-    {
-      ivs->upto--;
-      ivs->bad_uses--;
-      iv_ca_add_use (data, ivs, use, true);
-      best_cost = iv_ca_cost (ivs);
-      cp = iv_ca_cand_for_use (ivs, use);
-    }
    if (cp)
      {
        best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
@@ -5911,10 +6458,13 @@ get_initial_solution (struct ivopts_data *data, bool originalp)
    return ivs;
  }
  
-/* Tries to improve set of induction variables IVS.  */
+/* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
+   points to a bool variable, this function tries to break local
+   optimal fixed-point by replacing candidates in IVS if it's true.  */
  
  static bool
-try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
+try_improve_iv_set (struct ivopts_data *data,
+                   struct iv_ca *ivs, bool *try_replace_p)
  {
    unsigned i, n_ivs;
    comp_cost acost, best_cost = iv_ca_cost (ivs);
@@ -5958,7 +6508,20 @@ try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
        /* Try removing the candidates from the set instead.  */
        best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
  
-      /* Nothing more we can do.  */
+      if (!best_delta && *try_replace_p)
+       {
+         *try_replace_p = false;
+         /* So far candidate selecting algorithm tends to choose fewer IVs
+            so that it can handle cases in which loops have many variables
+            but the best choice is often to use only one general biv.  One
+            weakness is it can't handle opposite cases, in which different
+            candidates should be chosen with respect to each use.  To solve
+            the problem, we replace candidates in a manner described by the
+            comments of iv_ca_replace, thus give general algorithm a chance
+            to break local optimal fixed-point in these cases.  */
+         best_cost = iv_ca_replace (data, ivs, &best_delta);
+       }
+
        if (!best_delta)
         return false;
      }
@@ -5977,6 +6540,7 @@ static struct iv_ca *
  find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
  {
    struct iv_ca *set;
+  bool try_replace_p = true;
  
    /* Get the initial solution.  */
    set = get_initial_solution (data, originalp);
@@ -5993,7 +6557,7 @@ find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
        iv_ca_dump (data, dump_file, set);
      }
  
-  while (try_improve_iv_set (data, set))
+  while (try_improve_iv_set (data, set, &try_replace_p))
      {
        if (dump_file && (dump_flags & TDF_DETAILS))
         {
@@ -6118,7 +6682,12 @@ create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
  
    if (dump_file && (dump_flags & TDF_DETAILS))
      {
-      fprintf (dump_file, "\nSelected IV set: \n");
+      fprintf (dump_file, "Selected IV set for loop %d",
+              data->current_loop->num);
+      if (data->loop_loc != UNKNOWN_LOCATION)
+       fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
+                LOCATION_LINE (data->loop_loc));
+      fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
        EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
          {
            cand = iv_cand (data, i);
@@ -6137,7 +6706,7 @@ rewrite_use_nonlinear_expr (struct ivopts_data *data,
  {
    tree comp;
    tree op, tgt;
-  gimple ass;
+  gassign *ass;
    gimple_stmt_iterator bsi;
  
    /* An important special case -- if we are asked to express value of
@@ -6320,8 +6889,8 @@ adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
  /* Rewrites USE (address that is an iv) using candidate CAND.  */
  
  static void
-rewrite_use_address (struct ivopts_data *data,
-                    struct iv_use *use, struct iv_cand *cand)
+rewrite_use_address_1 (struct ivopts_data *data,
+                      struct iv_use *use, struct iv_cand *cand)
  {
    aff_tree aff;
    gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
@@ -6356,6 +6925,28 @@ rewrite_use_address (struct ivopts_data *data,
    *use->op_p = ref;
  }
  
+/* Rewrites USE (address that is an iv) using candidate CAND.  If it's the
+   first use of a group, rewrites sub uses in the group too.  */
+
+static void
+rewrite_use_address (struct ivopts_data *data,
+                     struct iv_use *use, struct iv_cand *cand)
+{
+  struct iv_use *next;
+
+  gcc_assert (use->sub_id == 0);
+  rewrite_use_address_1 (data, use, cand);
+  update_stmt (use->stmt);
+
+  for (next = use->next; next != NULL; next = next->next)
+    {
+      rewrite_use_address_1 (data, next, cand);
+      update_stmt (next->stmt);
+    }
+
+  return;
+}
+
  /* Rewrites USE (the condition such that one of the arguments is an iv) using
     candidate CAND.  */
  
@@ -6389,9 +6980,10 @@ rewrite_use_compare (struct ivopts_data *data,
                 loop_preheader_edge (data->current_loop),
                 stmts);
  
-      gimple_cond_set_lhs (use->stmt, var);
-      gimple_cond_set_code (use->stmt, compare);
-      gimple_cond_set_rhs (use->stmt, op);
+      gcond *cond_stmt = as_a <gcond *> (use->stmt);
+      gimple_cond_set_lhs (cond_stmt, var);
+      gimple_cond_set_code (cond_stmt, compare);
+      gimple_cond_set_rhs (cond_stmt, op);
        return;
      }
  
@@ -6554,7 +7146,8 @@ remove_unused_ivs (struct ivopts_data *data)
                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
                   else
                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
-                 gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
+                 gdebug *def_temp
+                   = gimple_build_debug_bind (vexpr, comp, NULL);
                   gimple_stmt_iterator gsi;
  
                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
@@ -6587,15 +7180,12 @@ remove_unused_ivs (struct ivopts_data *data)
  }
  
  /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
-   for pointer_map_traverse.  */
+   for hash_map::traverse.  */
  
-static bool
-free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
-                      void *data ATTRIBUTE_UNUSED)
+bool
+free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
  {
-  struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
-
-  free (niter);
+  free (value);
    return true;
  }
  
@@ -6610,8 +7200,8 @@ free_loop_data (struct ivopts_data *data)
  
    if (data->niters)
      {
-      pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
-      pointer_map_destroy (data->niters);
+      data->niters->traverse<void *, free_tree_niter_desc> (NULL);
+      delete data->niters;
        data->niters = NULL;
      }
  
@@ -6632,6 +7222,18 @@ free_loop_data (struct ivopts_data *data)
    for (i = 0; i < n_iv_uses (data); i++)
      {
        struct iv_use *use = iv_use (data, i);
+      struct iv_use *pre = use, *sub = use->next;
+
+      while (sub)
+       {
+         gcc_assert (sub->related_cands == NULL);
+         gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
+
+         free (sub->iv);
+         pre = sub;
+         sub = sub->next;
+         free (pre);
+       }
  
        free (use->iv);
        BITMAP_FREE (use->related_cands);
@@ -6668,7 +7270,7 @@ free_loop_data (struct ivopts_data *data)
  
    decl_rtl_to_reset.truncate (0);
  
-  data->inv_expr_tab.empty ();
+  data->inv_expr_tab->empty ();
    data->inv_expr_id = 0;
  }
  
@@ -6686,7 +7288,9 @@ tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
    decl_rtl_to_reset.release ();
    data->iv_uses.release ();
    data->iv_candidates.release ();
-  data->inv_expr_tab.dispose ();
+  delete data->inv_expr_tab;
+  data->inv_expr_tab = NULL;
+  free_affine_expand_cache (&data->name_expansion_cache);
  }
  
  /* Returns true if the loop body BODY includes any function calls.  */
@@ -6720,11 +7324,16 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
  
    gcc_assert (!data->niters);
    data->current_loop = loop;
+  data->loop_loc = find_loop_location (loop);
    data->speed = optimize_loop_for_speed_p (loop);
  
    if (dump_file && (dump_flags & TDF_DETAILS))
      {
-      fprintf (dump_file, "Processing loop %d\n", loop->num);
+      fprintf (dump_file, "Processing loop %d", loop->num);
+      if (data->loop_loc != UNKNOWN_LOCATION)
+       fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
+                LOCATION_LINE (data->loop_loc));
+      fprintf (dump_file, "\n");
  
        if (exit)
         {
@@ -6751,6 +7360,7 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
  
    /* Finds interesting uses (item 1).  */
    find_interesting_uses (data);
+  group_address_uses (data);
    if (n_iv_uses (data) > MAX_CONSIDERED_USES)
      goto finish;
  
@@ -6796,12 +7406,11 @@ tree_ssa_iv_optimize (void)
  {
    struct loop *loop;
    struct ivopts_data data;
-  loop_iterator li;
  
    tree_ssa_iv_optimize_init (&data);
  
    /* Optimize the loops starting with the innermost ones.  */
-  FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
+  FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
      {
        if (dump_file && (dump_flags & TDF_DETAILS))
         flow_loop_dump (loop, dump_file, NULL, 1);