i386.c (legitimize_tls_address): Generate tls_initial_exec_64_sun only when !TARGET_X32.
[gcc.git] / gcc / tree-vect-loop.c
index fb15d6e3d03ded602c4cd655a2fbdc67c493b459..305ea7ea5a88c4459e08237afa9a411120973aed 100644 (file)
@@ -1,5 +1,5 @@
 /* Loop Vectorization
-   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
    Free Software Foundation, Inc.
    Contributed by Dorit Naishlos <dorit@il.ibm.com> and
    Ira Rosen <irar@il.ibm.com>
@@ -181,8 +181,10 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
   stmt_vec_info stmt_info;
   int i;
   HOST_WIDE_INT dummy;
-  gimple stmt, pattern_stmt = NULL, pattern_def_stmt = NULL;
-  bool analyze_pattern_stmt = false, pattern_def = false;
+  gimple stmt, pattern_stmt = NULL;
+  gimple_seq pattern_def_seq = NULL;
+  gimple_stmt_iterator pattern_def_si = gsi_none ();
+  bool analyze_pattern_stmt = false;
 
   if (vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
@@ -248,10 +250,7 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
           tree vf_vectype;
 
           if (analyze_pattern_stmt)
-            {
-              stmt = pattern_stmt;
-              analyze_pattern_stmt = false;
-            }
+           stmt = pattern_stmt;
           else
             stmt = gsi_stmt (si);
 
@@ -296,28 +295,54 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
             analyze_pattern_stmt = true;
 
-          /* If a pattern statement has a def stmt, analyze it too.  */
-          if (is_pattern_stmt_p (stmt_info)
-              && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
-              && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
-                  || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
-            {
-              if (pattern_def)
-                pattern_def = false;
-              else
-                {
-                  if (vect_print_dump_info (REPORT_DETAILS))
-                    {
-                      fprintf (vect_dump, "==> examining pattern def stmt: ");
-                      print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
-                                         TDF_SLIM);
-                    }
+         /* If a pattern statement has def stmts, analyze them too.  */
+         if (is_pattern_stmt_p (stmt_info))
+           {
+             if (pattern_def_seq == NULL)
+               {
+                 pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
+                 pattern_def_si = gsi_start (pattern_def_seq);
+               }
+             else if (!gsi_end_p (pattern_def_si))
+               gsi_next (&pattern_def_si);
+             if (pattern_def_seq != NULL)
+               {
+                 gimple pattern_def_stmt = NULL;
+                 stmt_vec_info pattern_def_stmt_info = NULL;
 
-                  pattern_def = true;
-                  stmt = pattern_def_stmt;
-                  stmt_info = vinfo_for_stmt (stmt);
-                }
-            }
+                 while (!gsi_end_p (pattern_def_si))
+                   {
+                     pattern_def_stmt = gsi_stmt (pattern_def_si);
+                     pattern_def_stmt_info
+                       = vinfo_for_stmt (pattern_def_stmt);
+                     if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
+                         || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
+                       break;
+                     gsi_next (&pattern_def_si);
+                   }
+
+                 if (!gsi_end_p (pattern_def_si))
+                   {
+                     if (vect_print_dump_info (REPORT_DETAILS))
+                       {
+                         fprintf (vect_dump,
+                                  "==> examining pattern def stmt: ");
+                         print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
+                                            TDF_SLIM);
+                       }
+
+                     stmt = pattern_def_stmt;
+                     stmt_info = pattern_def_stmt_info;
+                   }
+                 else
+                   {
+                     pattern_def_si = gsi_none ();
+                     analyze_pattern_stmt = false;
+                   }
+               }
+             else
+               analyze_pattern_stmt = false;
+           }
 
          if (gimple_get_lhs (stmt) == NULL_TREE)
            {
@@ -347,7 +372,7 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
                 idiom).  */
              gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
                          || is_pattern_stmt_p (stmt_info)
-                         || pattern_def);
+                         || !gsi_end_p (pattern_def_si));
              vectype = STMT_VINFO_VECTYPE (stmt_info);
            }
          else
@@ -425,8 +450,11 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
              || (nunits > vectorization_factor))
            vectorization_factor = nunits;
 
-          if (!analyze_pattern_stmt && !pattern_def)
-            gsi_next (&si);
+         if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
+           {
+             pattern_def_seq = NULL;
+             gsi_next (&si);
+           }
         }
     }
 
@@ -537,11 +565,15 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
       /* Analyze the evolution function.  */
       access_fn = analyze_scalar_evolution (loop, def);
       if (access_fn)
-       STRIP_NOPS (access_fn);
-      if (access_fn && vect_print_dump_info (REPORT_DETAILS))
        {
-         fprintf (vect_dump, "Access function of PHI: ");
-         print_generic_expr (vect_dump, access_fn, TDF_SLIM);
+         STRIP_NOPS (access_fn);
+         if (vect_print_dump_info (REPORT_DETAILS))
+           {
+             fprintf (vect_dump, "Access function of PHI: ");
+             print_generic_expr (vect_dump, access_fn, TDF_SLIM);
+           }
+         STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
+           = evolution_part_in_loop_num (access_fn, loop->num);
        }
 
       if (!access_fn
@@ -551,6 +583,8 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
          continue;
        }
 
+      gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
+
       if (vect_print_dump_info (REPORT_DETAILS))
        fprintf (vect_dump, "Detected induction.");
       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
@@ -813,7 +847,7 @@ new_loop_vec_info (struct loop *loop)
   LOOP_VINFO_MAY_ALIAS_DDRS (res) =
     VEC_alloc (ddr_p, heap,
                PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
-  LOOP_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
+  LOOP_VINFO_GROUPED_STORES (res) = VEC_alloc (gimple, heap, 10);
   LOOP_VINFO_REDUCTIONS (res) = VEC_alloc (gimple, heap, 10);
   LOOP_VINFO_REDUCTION_CHAINS (res) = VEC_alloc (gimple, heap, 10);
   LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
@@ -889,7 +923,7 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
     vect_free_slp_instance (instance);
 
   VEC_free (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
-  VEC_free (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo));
+  VEC_free (gimple, heap, LOOP_VINFO_GROUPED_STORES (loop_vinfo));
   VEC_free (gimple, heap, LOOP_VINFO_REDUCTIONS (loop_vinfo));
   VEC_free (gimple, heap, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo));
 
@@ -1201,6 +1235,7 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
   int min_scalar_loop_bound;
   unsigned int th;
   bool only_slp_in_loop = true, ok;
+  HOST_WIDE_INT max_niter;
 
   if (vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump, "=== vect_analyze_loop_operations ===");
@@ -1292,7 +1327,9 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
                     return false;
 
                   op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
-                  if (!op_def_stmt || !vinfo_for_stmt (op_def_stmt))
+                 if (!op_def_stmt
+                     || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
+                     || !vinfo_for_stmt (op_def_stmt))
                     return false;
 
                   if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
@@ -1373,8 +1410,10 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
         "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
         vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
 
-  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-      && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
+  if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+       && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
+      || ((max_niter = max_stmt_executions_int (loop)) != -1
+         && (unsigned HOST_WIDE_INT) max_niter < vectorization_factor))
     {
       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
         fprintf (vect_dump, "not vectorized: iteration count too small.");
@@ -1486,7 +1525,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
 
   vect_analyze_scalar_cycles (loop_vinfo);
 
-  vect_pattern_recog (loop_vinfo);
+  vect_pattern_recog (loop_vinfo, NULL);
 
   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
 
@@ -1892,7 +1931,7 @@ vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
              swap_tree_operands (next_stmt,
                                  gimple_assign_rhs1_ptr (next_stmt),
                                   gimple_assign_rhs2_ptr (next_stmt));
-             mark_symbols_for_renaming (next_stmt);
+             update_stmt (next_stmt);
            }
          else
            return false;
@@ -2389,7 +2428,8 @@ vect_get_single_scalar_iteraion_cost (loop_vec_info loop_vinfo)
           if (stmt_info
               && !STMT_VINFO_RELEVANT_P (stmt_info)
               && (!STMT_VINFO_LIVE_P (stmt_info)
-                  || STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
+                  || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
+             && !STMT_VINFO_IN_PATTERN_P (stmt_info))
             continue;
 
           if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
@@ -2536,15 +2576,46 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
        {
          gimple stmt = gsi_stmt (si);
          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+
+         if (STMT_VINFO_IN_PATTERN_P (stmt_info))
+           {
+             stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+             stmt_info = vinfo_for_stmt (stmt);
+           }
+
          /* Skip stmts that are not vectorized inside the loop.  */
          if (!STMT_VINFO_RELEVANT_P (stmt_info)
              && (!STMT_VINFO_LIVE_P (stmt_info)
-                 || STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
+                 || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))))
            continue;
+
          vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
          /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
             some of the "outside" costs are generated inside the outer-loop.  */
          vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
+          if (is_pattern_stmt_p (stmt_info)
+             && STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))
+            {
+             gimple_stmt_iterator gsi;
+             
+             for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
+                  !gsi_end_p (gsi); gsi_next (&gsi))
+                {
+                  gimple pattern_def_stmt = gsi_stmt (gsi);
+                  stmt_vec_info pattern_def_stmt_info
+                   = vinfo_for_stmt (pattern_def_stmt);
+                  if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
+                      || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
+                   {
+                      vec_inside_cost
+                       += STMT_VINFO_INSIDE_OF_LOOP_COST
+                          (pattern_def_stmt_info) * factor;
+                      vec_outside_cost
+                       += STMT_VINFO_OUTSIDE_OF_LOOP_COST
+                          (pattern_def_stmt_info);
+                    }
+               }
+           }
        }
     }
 
@@ -2981,6 +3052,8 @@ get_initial_def_for_induction (gimple iv_phi)
     }
   else
     {
+      VEC(constructor_elt,gc) *v;
+
       /* iv_loop is the loop to be vectorized. Create:
         vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr)  */
       new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
@@ -2993,8 +3066,8 @@ get_initial_def_for_induction (gimple iv_phi)
          gcc_assert (!new_bb);
        }
 
-      t = NULL_TREE;
-      t = tree_cons (NULL_TREE, new_name, t);
+      v = VEC_alloc (constructor_elt, gc, nunits);
+      CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
       for (i = 1; i < nunits; i++)
        {
          /* Create: new_name_i = new_name + step_expr  */
@@ -3013,10 +3086,10 @@ get_initial_def_for_induction (gimple iv_phi)
              fprintf (vect_dump, "created new init_stmt: ");
              print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
            }
-         t = tree_cons (NULL_TREE, new_name, t);
+         CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
        }
       /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1]  */
-      vec = build_constructor_from_list (vectype, nreverse (t));
+      vec = build_constructor (vectype, v);
       vec_init = vect_init_vector (iv_phi, vec, vectype, NULL);
     }
 
@@ -3245,7 +3318,7 @@ get_initial_def_for_reduction (gimple stmt, tree init_val,
   enum tree_code code = gimple_assign_rhs_code (stmt);
   tree def_for_init;
   tree init_def;
-  tree t = NULL_TREE;
+  tree *elts;
   int i;
   bool nested_in_vect_loop = false;
   tree init_value;
@@ -3326,23 +3399,31 @@ get_initial_def_for_reduction (gimple stmt, tree init_val,
           def_for_init = build_int_cst (scalar_type, int_init_val);
 
         /* Create a vector of '0' or '1' except the first element.  */
+       elts = XALLOCAVEC (tree, nunits);
         for (i = nunits - 2; i >= 0; --i)
-          t = tree_cons (NULL_TREE, def_for_init, t);
+         elts[i + 1] = def_for_init;
 
         /* Option1: the first element is '0' or '1' as well.  */
         if (adjustment_def)
           {
-            t = tree_cons (NULL_TREE, def_for_init, t);
-            init_def = build_vector (vectype, t);
+           elts[0] = def_for_init;
+            init_def = build_vector (vectype, elts);
             break;
           }
 
         /* Option2: the first element is INIT_VAL.  */
-        t = tree_cons (NULL_TREE, init_value, t);
+       elts[0] = init_val;
         if (TREE_CONSTANT (init_val))
-          init_def = build_vector (vectype, t);
+          init_def = build_vector (vectype, elts);
         else
-          init_def = build_constructor_from_list (vectype, t);
+         {
+           VEC(constructor_elt,gc) *v;
+           v = VEC_alloc (constructor_elt, gc, nunits);
+           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init_val);
+           for (i = 1; i < nunits; ++i)
+             CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[i]);
+           init_def = build_constructor (vectype, v);
+         }
 
         break;
 
@@ -3462,6 +3543,7 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
   gimple use_stmt, orig_stmt, reduction_phi = NULL;
   bool nested_in_vect_loop = false;
   VEC (gimple, heap) *new_phis = NULL;
+  VEC (gimple, heap) *inner_phis = NULL;
   enum vect_def_type dt = vect_unknown_def_type;
   int j, i;
   VEC (tree, heap) *scalar_results = NULL;
@@ -3470,6 +3552,7 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
   VEC (gimple, heap) *phis;
   bool slp_reduc = false;
   tree new_phi_result;
+  gimple inner_phi = NULL;
 
   if (slp_node)
     group_size = VEC_length (gimple, SLP_TREE_SCALAR_STMTS (slp_node)); 
@@ -3626,11 +3709,36 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
     }
 
   /* The epilogue is created for the outer-loop, i.e., for the loop being
-     vectorized.  */
+     vectorized.  Create exit phis for the outer loop.  */
   if (double_reduc)
     {
       loop = outer_loop;
       exit_bb = single_exit (loop)->dest;
+      inner_phis = VEC_alloc (gimple, heap, VEC_length (tree, vect_defs));
+      FOR_EACH_VEC_ELT (gimple, new_phis, i, phi)
+       {
+         gimple outer_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
+                                             exit_bb);
+         SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
+                          PHI_RESULT (phi));
+         set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
+                                                           loop_vinfo, NULL));
+         VEC_quick_push (gimple, inner_phis, phi);
+         VEC_replace (gimple, new_phis, i, outer_phi);
+         prev_phi_info = vinfo_for_stmt (outer_phi);
+          while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
+            {
+             phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
+             outer_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
+                                          exit_bb);
+             SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
+                              PHI_RESULT (phi));
+             set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
+                                                       loop_vinfo, NULL));
+             STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
+             prev_phi_info = vinfo_for_stmt (outer_phi);
+           }
+       }
     }
 
   exit_gsi = gsi_after_labels (exit_bb);
@@ -4040,6 +4148,8 @@ vect_finalize_reduction:
         {
           epilog_stmt = VEC_index (gimple, new_phis, k / ratio);
           reduction_phi = VEC_index (gimple, reduction_phis, k / ratio);
+         if (double_reduc)
+           inner_phi = VEC_index (gimple, inner_phis, k / ratio);
         }
 
       if (slp_reduc)
@@ -4123,7 +4233,7 @@ vect_finalize_reduction:
                      vs1 was created previously in this function by a call to
                        vect_get_vec_def_for_operand and is stored in
                        vec_initial_def;
-                     vs2 is defined by EPILOG_STMT, the vectorized EXIT_PHI;
+                     vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
                      vs0 is created here.  */
 
                   /* Create vector phi node.  */
@@ -4144,7 +4254,7 @@ vect_finalize_reduction:
                   add_phi_arg (vect_phi, vect_phi_init,
                                loop_preheader_edge (outer_loop),
                                UNKNOWN_LOCATION);
-                  add_phi_arg (vect_phi, PHI_RESULT (epilog_stmt),
+                  add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
                                loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
                   if (vect_print_dump_info (REPORT_DETAILS))
                     {
@@ -4429,7 +4539,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
       if (i == 0 && code == COND_EXPR)
         continue;
 
-      is_simple_use = vect_is_simple_use_1 (ops[i], loop_vinfo, NULL,
+      is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
                                            &def_stmt, &def, &dt, &tem);
       if (!vectype_in)
        vectype_in = tem;
@@ -4450,8 +4560,8 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
         }
     }
 
-  is_simple_use = vect_is_simple_use_1 (ops[i], loop_vinfo, NULL, &def_stmt,
-                                       &def, &dt, &tem);
+  is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
+                                       &def_stmt, &def, &dt, &tem);
   if (!vectype_in)
     vectype_in = tem;
   gcc_assert (is_simple_use);
@@ -4807,14 +4917,14 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
               gimple dummy_stmt;
               tree dummy;
 
-              vect_is_simple_use (ops[!reduc_index], loop_vinfo, NULL,
+              vect_is_simple_use (ops[!reduc_index], stmt, loop_vinfo, NULL,
                                   &dummy_stmt, &dummy, &dt);
               loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
                                                               loop_vec_def0);
               VEC_replace (tree, vec_oprnds0, 0, loop_vec_def0);
               if (op_type == ternary_op)
                 {
-                  vect_is_simple_use (op1, loop_vinfo, NULL, &dummy_stmt,
+                  vect_is_simple_use (op1, stmt, loop_vinfo, NULL, &dummy_stmt,
                                       &dummy, &dt);
                   loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
                                                                 loop_vec_def1);
@@ -4953,12 +5063,46 @@ vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
   tree vec_def;
 
   gcc_assert (ncopies >= 1);
-  /* FORNOW. This restriction should be relaxed.  */
-  if (nested_in_vect_loop_p (loop, phi) && ncopies > 1)
+  /* FORNOW. These restrictions should be relaxed.  */
+  if (nested_in_vect_loop_p (loop, phi))
     {
-      if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump, "multiple types in nested loop.");
-      return false;
+      imm_use_iterator imm_iter;
+      use_operand_p use_p;
+      gimple exit_phi;
+      edge latch_e;
+      tree loop_arg;
+
+      if (ncopies > 1)
+       {
+         if (vect_print_dump_info (REPORT_DETAILS))
+           fprintf (vect_dump, "multiple types in nested loop.");
+         return false;
+       }
+
+      exit_phi = NULL;
+      latch_e = loop_latch_edge (loop->inner);
+      loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
+      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
+       {
+         if (!flow_bb_inside_loop_p (loop->inner,
+                                     gimple_bb (USE_STMT (use_p))))
+           {
+             exit_phi = USE_STMT (use_p);
+             break;
+           }
+       }
+      if (exit_phi)
+       {
+         stmt_vec_info exit_phi_vinfo  = vinfo_for_stmt (exit_phi);
+         if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
+               && !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
+           {
+             if (vect_print_dump_info (REPORT_DETAILS))
+               fprintf (vect_dump, "inner-loop induction only used outside "
+                        "of the outer vectorized loop.");
+             return false;
+           }
+       }
     }
 
   if (!STMT_VINFO_RELEVANT_P (stmt_info))
@@ -5046,7 +5190,8 @@ vectorizable_live_operation (gimple stmt,
       else
        op = gimple_op (stmt, i + 1);
       if (op
-          && !vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def, &dt))
+          && !vect_is_simple_use (op, stmt, loop_vinfo, NULL, &def_stmt, &def,
+                                 &dt))
         {
           if (vect_print_dump_info (REPORT_DETAILS))
             fprintf (vect_dump, "use not simple.");
@@ -5115,35 +5260,51 @@ vect_transform_loop (loop_vec_info loop_vinfo)
   int i;
   tree ratio = NULL;
   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
-  bool strided_store;
+  bool grouped_store;
   bool slp_scheduled = false;
   unsigned int nunits;
-  tree cond_expr = NULL_TREE;
-  gimple_seq cond_expr_stmt_list = NULL;
-  bool do_peeling_for_loop_bound;
-  gimple stmt, pattern_stmt, pattern_def_stmt;
-  bool transform_pattern_stmt = false, pattern_def = false;
+  gimple stmt, pattern_stmt;
+  gimple_seq pattern_def_seq = NULL;
+  gimple_stmt_iterator pattern_def_si = gsi_none ();
+  bool transform_pattern_stmt = false;
+  bool check_profitability;
+  int th;
 
   if (vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump, "=== vec_transform_loop ===");
 
+  /* Use the more conservative vectorization threshold.  If the number
+     of iterations is constant assume the cost check has been performed
+     by our caller.  If the threshold makes all loops profitable that
+     run at least the vectorization factor number of times checking
+     is pointless, too.  */
+  th = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
+        * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
+  th = MAX (th, LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo));
+  if (th >= LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1
+      && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+    {
+      if (vect_print_dump_info (REPORT_COST))
+       fprintf (vect_dump,
+                "Profitability threshold is %d loop iterations.", th);
+      check_profitability = true;
+    }
+
   /* Peel the loop if there are data refs with unknown alignment.
      Only one data ref with unknown store is allowed.  */
 
   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
-    vect_do_peeling_for_alignment (loop_vinfo);
-
-  do_peeling_for_loop_bound
-    = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-       || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-          && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
-       || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
+    {
+      vect_do_peeling_for_alignment (loop_vinfo, th, check_profitability);
+      check_profitability = false;
+    }
 
   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
-    vect_loop_versioning (loop_vinfo,
-                         !do_peeling_for_loop_bound,
-                         &cond_expr, &cond_expr_stmt_list);
+    {
+      vect_loop_versioning (loop_vinfo, th, check_profitability);
+      check_profitability = false;
+    }
 
   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
      compile time constant), or it is a constant that doesn't divide by the
@@ -5153,9 +5314,12 @@ vect_transform_loop (loop_vec_info loop_vinfo)
      will remain scalar and will compute the remaining (n%VF) iterations.
      (VF is the vectorization factor).  */
 
-  if (do_peeling_for_loop_bound)
+  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+       || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+          && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
+       || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
     vect_do_peeling_for_loop_bound (loop_vinfo, &ratio,
-                                   cond_expr, cond_expr_stmt_list);
+                                   th, check_profitability);
   else
     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
                LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
@@ -5216,10 +5380,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
          bool is_store;
 
           if (transform_pattern_stmt)
-            {
-              stmt = pattern_stmt;
-              transform_pattern_stmt = false;
-            }
+           stmt = pattern_stmt;
           else
             stmt = gsi_stmt (si);
 
@@ -5266,28 +5427,53 @@ vect_transform_loop (loop_vec_info loop_vinfo)
                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
             transform_pattern_stmt = true;
 
-          /* If pattern statement has a def stmt, vectorize it too.  */
-          if (is_pattern_stmt_p (stmt_info)
-              && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
-              && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
-                  || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
-            {
-              if (pattern_def)
-                pattern_def = false;
-              else
-                {
-                  if (vect_print_dump_info (REPORT_DETAILS))
-                    {
-                      fprintf (vect_dump, "==> vectorizing pattern def"
-                                          " stmt: ");
-                      print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
-                                         TDF_SLIM);
-                    }
+         /* If pattern statement has def stmts, vectorize them too.  */
+         if (is_pattern_stmt_p (stmt_info))
+           {
+             if (pattern_def_seq == NULL)
+               {
+                 pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
+                 pattern_def_si = gsi_start (pattern_def_seq);
+               }
+             else if (!gsi_end_p (pattern_def_si))
+               gsi_next (&pattern_def_si);
+             if (pattern_def_seq != NULL)
+               {
+                 gimple pattern_def_stmt = NULL;
+                 stmt_vec_info pattern_def_stmt_info = NULL;
 
-                  pattern_def = true;
-                  stmt = pattern_def_stmt;
-                  stmt_info = vinfo_for_stmt (stmt);
-                }
+                 while (!gsi_end_p (pattern_def_si))
+                   {
+                     pattern_def_stmt = gsi_stmt (pattern_def_si);
+                     pattern_def_stmt_info
+                       = vinfo_for_stmt (pattern_def_stmt);
+                     if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
+                         || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
+                       break;
+                     gsi_next (&pattern_def_si);
+                   }
+
+                 if (!gsi_end_p (pattern_def_si))
+                   {
+                     if (vect_print_dump_info (REPORT_DETAILS))
+                       {
+                         fprintf (vect_dump, "==> vectorizing pattern def"
+                                             " stmt: ");
+                         print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
+                                            TDF_SLIM);
+                       }
+
+                     stmt = pattern_def_stmt;
+                     stmt_info = pattern_def_stmt_info;
+                   }
+                 else
+                   {
+                     pattern_def_si = gsi_none ();
+                     transform_pattern_stmt = false;
+                   }
+               }
+             else
+               transform_pattern_stmt = false;
             }
 
          gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
@@ -5317,9 +5503,12 @@ vect_transform_loop (loop_vec_info loop_vinfo)
              /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
              if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
                {
-                  if (!transform_pattern_stmt && !pattern_def)
-                   gsi_next (&si);
-                 continue;
+                 if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
+                   {
+                     pattern_def_seq = NULL;
+                     gsi_next (&si);
+                   }
+                 continue;
                }
            }
 
@@ -5327,11 +5516,11 @@ vect_transform_loop (loop_vec_info loop_vinfo)
          if (vect_print_dump_info (REPORT_DETAILS))
            fprintf (vect_dump, "transform statement.");
 
-         strided_store = false;
-         is_store = vect_transform_stmt (stmt, &si, &strided_store, NULL, NULL);
+         grouped_store = false;
+         is_store = vect_transform_stmt (stmt, &si, &grouped_store, NULL, NULL);
           if (is_store)
             {
-             if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
+             if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
                {
                  /* Interleaving. If IS_STORE is TRUE, the vectorization of the
                     interleaving chain was completed - free all the stores in
@@ -5343,14 +5532,20 @@ vect_transform_loop (loop_vec_info loop_vinfo)
              else
                {
                  /* Free the attached stmt_vec_info and remove the stmt.  */
-                 free_stmt_vec_info (gsi_stmt (si));
+                 gimple store = gsi_stmt (si);
+                 free_stmt_vec_info (store);
+                 unlink_stmt_vdef (store);
                  gsi_remove (&si, true);
+                 release_defs (store);
                  continue;
                }
            }
 
-          if (!transform_pattern_stmt && !pattern_def)
-           gsi_next (&si);
+         if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
+           {
+             pattern_def_seq = NULL;
+             gsi_next (&si);
+           }
        }                       /* stmts in BB */
     }                          /* BBs in loop */