tree-vect-loop.c (vect_fixup_reduc_chain): New function.
authorRichard Biener <rguenther@suse.de>
Thu, 28 May 2015 13:29:41 +0000 (13:29 +0000)
committerRichard Biener <rguenth@gcc.gnu.org>
Thu, 28 May 2015 13:29:41 +0000 (13:29 +0000)
2015-05-28  Richard Biener  <rguenther@suse.de>

* tree-vect-loop.c (vect_fixup_reduc_chain): New function.
(vect_fixup_scalar_cycles_with_patterns): Likewise.
(vect_analyze_loop_2): Call vect_fixup_scalar_cycles_with_patterns
after pattern recog.
(vect_create_epilog_for_reduction): Properly handle reductions
with patterns.
(vectorizable_reduction): Likewise.
* tree-vect-slp.c (vect_analyze_slp_instance): Properly mark
reduction chains.
(vect_get_constant_vectors): Create the correct number of
initial values for reductions.
(vect_schedule_slp_instance): Handle reduction chains that are
type changing properly.
* tree-vect-stmts.c (vect_analyze_stmt): Adjust.

* gcc.dg/vect/slp-reduc-sad.c: New testcase.

From-SVN: r223818

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/vect/slp-reduc-sad.c [new file with mode: 0644]
gcc/tree-vect-loop.c
gcc/tree-vect-slp.c
gcc/tree-vect-stmts.c

index 9e8110443aa8ada0df86345b7e5745c270b0e8bf..2a2829dad3418f7cb629ba9b104b1e655d57e648 100644 (file)
@@ -1,3 +1,20 @@
+2015-05-28  Richard Biener  <rguenther@suse.de>
+
+       * tree-vect-loop.c (vect_fixup_reduc_chain): New function.
+       (vect_fixup_scalar_cycles_with_patterns): Likewise.
+       (vect_analyze_loop_2): Call vect_fixup_scalar_cycles_with_patterns
+       after pattern recog.
+       (vect_create_epilog_for_reduction): Properly handle reductions
+       with patterns.
+       (vectorizable_reduction): Likewise.
+       * tree-vect-slp.c (vect_analyze_slp_instance): Properly mark
+       reduction chains.
+       (vect_get_constant_vectors): Create the correct number of
+       initial values for reductions.
+       (vect_schedule_slp_instance): Handle reduction chains that are
+       type changing properly.
+       * tree-vect-stmts.c (vect_analyze_stmt): Adjust.
+
 2015-05-28  Richard Biener  <rguenther@suse.de>
 
        PR tree-optimization/66142
index ddc74fc274c4bdae50a0afc998d28d6b97111757..1a2b185eff0d21507c9a3d7120462b31124b37d1 100644 (file)
@@ -1,3 +1,7 @@
+2015-05-28  Richard Biener  <rguenther@suse.de>
+
+       * gcc.dg/vect/slp-reduc-sad.c: New testcase.
+
 2015-05-28  Richard Biener  <rguenther@suse.de>
 
        PR tree-optimization/66142
diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-sad.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-sad.c
new file mode 100644 (file)
index 0000000..472544f
--- /dev/null
@@ -0,0 +1,64 @@
+/* { dg-require-effective-target vect_usad_char } */
+
+#include "tree-vect.h"
+
+typedef unsigned int uint32_t;
+typedef unsigned short uint16_t;
+typedef unsigned char uint8_t;
+
+extern int abs (int);
+extern void abort (void);
+
+int __attribute__((noinline,noclone))
+foo (uint8_t *pix1, uint8_t *pix2, int i_stride_pix2)
+{
+  int i_sum = 0;
+  for( int y = 0; y < 16; y++ )
+    {
+      i_sum += abs ( pix1[0] - pix2[0] );
+      i_sum += abs ( pix1[1] - pix2[1] );
+      i_sum += abs ( pix1[2] - pix2[2] );
+      i_sum += abs ( pix1[3] - pix2[3] );
+      i_sum += abs ( pix1[4] - pix2[4] );
+      i_sum += abs ( pix1[5] - pix2[5] );
+      i_sum += abs ( pix1[6] - pix2[6] );
+      i_sum += abs ( pix1[7] - pix2[7] );
+      i_sum += abs ( pix1[8] - pix2[8] );
+      i_sum += abs ( pix1[9] - pix2[9] );
+      i_sum += abs ( pix1[10] - pix2[10] );
+      i_sum += abs ( pix1[11] - pix2[11] );
+      i_sum += abs ( pix1[12] - pix2[12] );
+      i_sum += abs ( pix1[13] - pix2[13] );
+      i_sum += abs ( pix1[14] - pix2[14] );
+      i_sum += abs ( pix1[15] - pix2[15] );
+      pix1 += 16;
+      pix2 += i_stride_pix2;
+    }
+  return i_sum; 
+}
+
+int
+main ()
+{
+  check_vect ();
+
+  uint8_t X[16*16];
+  uint8_t Y[16*16];
+
+  for (int i = 0; i < 16*16; ++i)
+    {
+      X[i] = i;
+      Y[i] = 16*16 - i;
+      __asm__ volatile ("");
+    }
+
+  if (foo (X, Y, 16) != 32512)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "vect_recog_sad_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
index 8fe4dc6ad30995685065e74d2b3fe71077b4c50e..71df11c0f6f7069ac4e7f2efc2085bbb21847839 100644 (file)
@@ -828,6 +828,45 @@ vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
 }
 
+/* Transfer group and reduction information from STMT to its pattern stmt.  */
+
+static void
+vect_fixup_reduc_chain (gimple stmt)
+{
+  gimple firstp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
+  gimple stmtp;
+  gcc_assert (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (firstp))
+             && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
+  GROUP_SIZE (vinfo_for_stmt (firstp)) = GROUP_SIZE (vinfo_for_stmt (stmt));
+  do
+    {
+      stmtp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
+      GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmtp)) = firstp;
+      stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
+      if (stmt)
+       GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmtp))
+         = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
+    }
+  while (stmt);
+  STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmtp)) = vect_reduction_def;
+}
+
+/* Fixup scalar cycles that now have their stmts detected as patterns.  */
+
+static void
+vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
+{
+  gimple first;
+  unsigned i;
+
+  FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)
+    if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first)))
+      {
+       vect_fixup_reduc_chain (first);
+       LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
+         = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first));
+      }
+}
 
 /* Function vect_get_loop_niters.
 
@@ -1708,6 +1747,8 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
 
   vect_pattern_recog (loop_vinfo, NULL);
 
+  vect_fixup_scalar_cycles_with_patterns (loop_vinfo);
+
   /* Analyze the access patterns of the data-refs in the loop (consecutive,
      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
 
@@ -4573,8 +4614,12 @@ vect_finalize_reduction:
      exit phi node.  */
   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
     {
-      scalar_dest = gimple_assign_lhs (
-                       SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]);
+      gimple dest_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
+      /* Handle reduction patterns.  */
+      if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt)))
+       dest_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt));
+
+      scalar_dest = gimple_assign_lhs (dest_stmt);
       group_size = 1;
     }
 
@@ -4875,12 +4920,17 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
   auto_vec<gimple> phis;
   int vec_num;
   tree def0, def1, tem, op0, op1 = NULL_TREE;
+  bool first_p = true;
 
   /* In case of reduction chain we switch to the first stmt in the chain, but
      we don't update STMT_INFO, since only the last stmt is marked as reduction
      and has reduction properties.  */
-  if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
-    stmt = GROUP_FIRST_ELEMENT (stmt_info);
+  if (GROUP_FIRST_ELEMENT (stmt_info)
+      && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
+    {
+      stmt = GROUP_FIRST_ELEMENT (stmt_info);
+      first_p = false;
+    }
 
   if (nested_in_vect_loop_p (loop, stmt))
     {
@@ -4903,8 +4953,8 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
     return false;
 
   /* Make sure it was already recognized as a reduction computation.  */
-  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
-      && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
+  if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_reduction_def
+      && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_nested_cycle)
     return false;
 
   /* 2. Has this been recognized as a reduction pattern?
@@ -4914,7 +4964,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
      the STMT_VINFO_RELATED_STMT field records the last stmt in
      the original sequence that constitutes the pattern.  */
 
-  orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+  orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
   if (orig_stmt)
     {
       orig_stmt_info = vinfo_for_stmt (orig_stmt);
@@ -5040,20 +5090,16 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
       return false;
     }
 
+  gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
+                                        !nested_cycle, &dummy);
   if (orig_stmt)
-    gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo,
-                                                       reduc_def_stmt,
-                                                       !nested_cycle,
-                                                       &dummy));
+    gcc_assert (tmp == orig_stmt
+               || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
   else
-    {
-      gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
-                                             !nested_cycle, &dummy);
-      /* We changed STMT to be the first stmt in reduction chain, hence we
-         check that in this case the first element in the chain is STMT.  */
-      gcc_assert (stmt == tmp
-                  || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
-    }
+    /* We changed STMT to be the first stmt in reduction chain, hence we
+       check that in this case the first element in the chain is STMT.  */
+    gcc_assert (stmt == tmp
+               || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
 
   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
     return false;
@@ -5267,8 +5313,9 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
 
   if (!vec_stmt) /* transformation not required.  */
     {
-      if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies,
-                                     reduc_index))
+      if (first_p
+         && !vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies,
+                                        reduc_index))
         return false;
       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
       return true;
@@ -5324,11 +5371,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
   prev_stmt_info = NULL;
   prev_phi_info = NULL;
   if (slp_node)
-    {
-      vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
-      gcc_assert (TYPE_VECTOR_SUBPARTS (vectype_out) 
-                  == TYPE_VECTOR_SUBPARTS (vectype_in));
-    }
+    vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
   else
     {
       vec_num = 1;
index 9137144e67a8b86636484cc329f55a49055e708f..f38191d9884565aa7c19ce6644ae16c6261c50f9 100644 (file)
@@ -1793,6 +1793,11 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
             scalar_stmts.safe_push (next);
           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
         }
+      /* Mark the first element of the reduction chain as reduction to properly
+        transform the node.  In the reduction analysis phase only the last
+        element of the chain is marked as reduction.  */
+      if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
+       STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
     }
   else
     {
@@ -2738,7 +2743,7 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
      (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
      {s5, s6, s7, s8}.  */
 
-  number_of_copies = least_common_multiple (nunits, group_size) / group_size;
+  number_of_copies = nunits * number_of_vectors / group_size;
 
   number_of_places_left_in_vector = nunits;
   elts = XALLOCAVEC (tree, nunits);
@@ -3383,8 +3388,14 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
      for the scalar stmts in each node of the SLP tree.  Number of vector
      elements in one vector iteration is the number of scalar elements in
      one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
-     size.  */
-  vec_stmts_size = (vectorization_factor * group_size) / nunits;
+     size.
+     Unless this is a SLP reduction in which case the number of vector
+     stmts is equal to the number of vector stmts of the children.  */
+  if (GROUP_FIRST_ELEMENT (stmt_info)
+      && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
+    vec_stmts_size = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
+  else
+    vec_stmts_size = (vectorization_factor * group_size) / nunits;
 
   if (!SLP_TREE_VEC_STMTS (node).exists ())
     {
index 06a59cb39959e73cce68344c2822d18f47cfc17b..6b018e53aadef6d7e68746a1f6196ad3489bfdae 100644 (file)
@@ -7310,9 +7310,11 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
 
       case vect_reduction_def:
       case vect_nested_cycle:
-         gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
-                     || relevance == vect_used_in_outer_by_reduction
-                     || relevance == vect_unused_in_scope));
+         gcc_assert (!bb_vinfo
+                    && (relevance == vect_used_in_outer
+                        || relevance == vect_used_in_outer_by_reduction
+                        || relevance == vect_used_by_reduction
+                        || relevance == vect_unused_in_scope));
          break;
 
       case vect_induction_def: