re PR fortran/62283 (basic-block vectorization fails)
authorRichard Biener <rguenther@suse.de>
Tue, 28 Apr 2015 08:30:44 +0000 (08:30 +0000)
committerRichard Biener <rguenth@gcc.gnu.org>
Tue, 28 Apr 2015 08:30:44 +0000 (08:30 +0000)
2015-04-28  Richard Biener  <rguenther@suse.de>

PR tree-optimization/62283
* tree-vect-slp.c (vect_build_slp_tree): When the SLP build
fails fatally and we are vectorizing a basic-block simply
cause the child to be constructed piecewise.
(vect_analyze_slp_cost_1): Adjust.
(vect_detect_hybrid_slp_stmts): Likewise.
(vect_bb_slp_scalar_cost): Likewise.
(vect_get_constant_vectors): For piecewise constructed
constants place them after the last def.
(vect_get_slp_defs): Adjust.
* tree-vect-stmts.c (vect_is_simple_use): Detect in-BB
externals for basic-block vectorization.

* gfortran.dg/vect/pr62283-2.f: New testcase.
* gcc.dg/vect/bb-slp-14.c: Adjust.

From-SVN: r222514

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/vect/bb-slp-14.c
gcc/testsuite/gfortran.dg/vect/pr62283-2.f [new file with mode: 0644]
gcc/tree-vect-slp.c
gcc/tree-vect-stmts.c

index 070a150b31d525eb7d616de6e787507fc8fbd269..8efe3d87e442717605a8bc5b41ad097034f7f844 100644 (file)
@@ -1,3 +1,18 @@
+2015-04-28  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/62283
+       * tree-vect-slp.c (vect_build_slp_tree): When the SLP build
+       fails fatally and we are vectorizing a basic-block simply
+       cause the child to be constructed piecewise.
+       (vect_analyze_slp_cost_1): Adjust.
+       (vect_detect_hybrid_slp_stmts): Likewise.
+       (vect_bb_slp_scalar_cost): Likewise.
+       (vect_get_constant_vectors): For piecewise constructed
+       constants place them after the last def.
+       (vect_get_slp_defs): Adjust.
+       * tree-vect-stmts.c (vect_is_simple_use): Detect in-BB
+       externals for basic-block vectorization.
+
 2015-04-28  Thomas Preud'homme  <thomas.preudhomme@arm.com>
 
        PR target/63503
index f9cce49d453d8f12dfb94434ebb848516224d2ef..703bc3aa19e4fcc3467437f92df701eec6c7b9f3 100644 (file)
@@ -1,3 +1,9 @@
+2015-04-28  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/62283
+       * gfortran.dg/vect/pr62283-2.f: New testcase.
+       * gcc.dg/vect/bb-slp-14.c: Adjust.
+
 2015-04-28  Richard Biener  <rguenther@suse.de>
 
        PR tree-optimization/65851
index a55c48eb09bb51ce5e9913ccfbea082bc67926e2..d8ba10b974fe88871d72556473816f2c39695919 100644 (file)
@@ -14,7 +14,8 @@ main1 (unsigned int x, unsigned int y)
   int i;
   unsigned int a0, a1, a2, a3;
 
-  /* Not consecutive load with permutation - not supported.  */
+  /* Not consecutive load with permutation - supported with building up
+     the vector from scalars.  */
   a0 = in[0] + 23;
   a1 = in[1] + 142;
   a2 = in[1] + 2;
@@ -47,6 +48,6 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "basic block vectorized" 0 "slp2"  } } */
+/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2"  } } */
 /* { dg-final { cleanup-tree-dump "slp2" } } */
   
diff --git a/gcc/testsuite/gfortran.dg/vect/pr62283-2.f b/gcc/testsuite/gfortran.dg/vect/pr62283-2.f
new file mode 100644 (file)
index 0000000..b71ac3e
--- /dev/null
@@ -0,0 +1,13 @@
+! { dg-do compile }
+! { dg-require-effective-target vect_float }
+! { dg-additional-options "-fdump-tree-slp2-details" }
+      subroutine saxpy(alpha,x,y)
+      real x(4),y(4),alpha
+      y(1)=y(1)+alpha*x(1)
+      y(2)=y(2)+alpha*x(2)
+      y(3)=y(3)+alpha*x(3)
+      y(4)=y(4)+alpha*x(4)
+      end
+! { dg-final { scan-tree-dump "basic block vectorized" "slp2" } }
+! { dg-final { cleanup-tree-dump "slp2" } }
+! { dg-final { cleanup-tree-dump "vect" } }
index d82df3e5daa44c04135fa49a58661ada66c06ae0..b066763bec78780a4169b8ca70618a3823a9ac4b 100644 (file)
@@ -1017,6 +1017,29 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
          continue;
        }
 
+      /* If the SLP build failed fatally and we analyze a basic-block
+         simply treat nodes we fail to build as externally defined
+        (and thus build vectors from the scalar defs).
+        The cost model will reject outright expensive cases.
+        ???  This doesn't treat cases where permutation ultimatively
+        fails (or we don't try permutation below).  Ideally we'd
+        even compute a permutation that will end up with the maximum
+        SLP tree size...  */
+      if (bb_vinfo
+         && !matches[0]
+         /* ???  Rejecting patterns this way doesn't work.  We'd have to
+            do extra work to cancel the pattern so the uses see the
+            scalar version.  */
+         && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
+       {
+         dump_printf_loc (MSG_NOTE, vect_location,
+                          "Building vector operands from scalars\n");
+         oprnd_info->def_stmts = vNULL;
+         vect_free_slp_tree (child);
+         SLP_TREE_CHILDREN (*node).quick_push (NULL);
+         continue;
+       }
+
       /* If the SLP build for operand zero failed and operand zero
         and one can be commutated try that for the scalar stmts
         that failed the match.  */
@@ -1417,9 +1440,10 @@ vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
 
   /* Recurse down the SLP tree.  */
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
-    vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
-                            instance, child, prologue_cost_vec,
-                            ncopies_for_cost);
+    if (child)
+      vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
+                              instance, child, prologue_cost_vec,
+                              ncopies_for_cost);
 
   /* Look at the first scalar stmt to determine the cost.  */
   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
@@ -1885,7 +1909,8 @@ vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
     STMT_SLP_TYPE (stmt_vinfo) = hybrid;
 
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
-    vect_detect_hybrid_slp_stmts (child, i, stype);
+    if (child)
+      vect_detect_hybrid_slp_stmts (child, i, stype);
 }
 
 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses.  */
@@ -2162,7 +2187,8 @@ vect_bb_slp_scalar_cost (basic_block bb,
     }
 
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
-    scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
+    if (child)
+      scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
 
   return scalar_cost;
 }
@@ -2612,6 +2638,7 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
 
   number_of_places_left_in_vector = nunits;
   elts = XALLOCAVEC (tree, nunits);
+  bool place_after_defs = false;
   for (j = 0; j < number_of_copies; j++)
     {
       for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
@@ -2682,6 +2709,7 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
 
           /* Create 'vect_ = {op0,op1,...,opn}'.  */
           number_of_places_left_in_vector--;
+         tree orig_op = op;
          if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
            {
              if (CONSTANT_CLASS_P (op))
@@ -2704,6 +2732,12 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
          elts[number_of_places_left_in_vector] = op;
          if (!CONSTANT_CLASS_P (op))
            constant_p = false;
+         if (TREE_CODE (orig_op) == SSA_NAME
+             && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
+             && STMT_VINFO_BB_VINFO (stmt_vinfo)
+             && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
+                 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
+           place_after_defs = true;
 
           if (number_of_places_left_in_vector == 0)
             {
@@ -2720,16 +2754,25 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
                    CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
                  vec_cst = build_constructor (vector_type, v);
                }
-              voprnds.quick_push (vect_init_vector (stmt, vec_cst,
-                                                   vector_type, NULL));
+             tree init;
+             gimple_stmt_iterator gsi;
+             if (place_after_defs)
+               {
+                 gsi = gsi_for_stmt
+                         (vect_find_last_scalar_stmt_in_slp (slp_node));
+                 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
+               }
+             else
+               init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
              if (ctor_seq != NULL)
                {
-                 gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
-                 gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
+                 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
                  gsi_insert_seq_before_without_update (&gsi, ctor_seq,
                                                        GSI_SAME_STMT);
                  ctor_seq = NULL;
                }
+             voprnds.quick_push (init);
+             place_after_defs = false;
             }
         }
     }
@@ -2825,20 +2868,26 @@ vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
           child = SLP_TREE_CHILDREN (slp_node)[child_index];
 
          /* We have to check both pattern and original def, if available.  */
-         gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
-         gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
-
-         if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
-             || (related
-                 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
+         if (child)
            {
-             /* The number of vector defs is determined by the number of
-                vector statements in the node from which we get those
-                statements.  */
-             number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
-             vectorized_defs = true;
-             child_index++;
+             gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
+             gimple related
+               = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
+
+             if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
+                 || (related
+                     && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
+               {
+                 /* The number of vector defs is determined by the number of
+                    vector statements in the node from which we get those
+                    statements.  */
+                 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
+                 vectorized_defs = true;
+                 child_index++;
+               }
            }
+         else
+           child_index++;
         }
 
       if (!vectorized_defs)
index 4496293fb4616a27f02953e97ec5ad08e0460f5a..2ce6d4d7c2510e7a5cfe2207e1ae883b8a42a681 100644 (file)
@@ -7752,7 +7752,10 @@ vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
   else
     {
       stmt_vinfo = vinfo_for_stmt (*def_stmt);
-      *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
+      if (!loop && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
+       *dt = vect_external_def;
+      else
+       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
     }
 
   if (dump_enabled_p ())