re PR tree-optimization/58135 ([x86] Missed opportunities for partial SLP)

author Venkataramanan Kumar <venkataramanan.kumar@amd.com>

Mon, 23 May 2016 09:48:54 +0000 (09:48 +0000)

committer Venkataramanan Kumar <vekumar@gcc.gnu.org>

Mon, 23 May 2016 09:48:54 +0000 (09:48 +0000)
author Venkataramanan Kumar <venkataramanan.kumar@amd.com>
Mon, 23 May 2016 09:48:54 +0000 (09:48 +0000)
committer Venkataramanan Kumar <vekumar@gcc.gnu.org>
Mon, 23 May 2016 09:48:54 +0000 (09:48 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 27f6c665aeb318100e22d9e007f40ed69c81c4b0..56ccb0ef7c9e27a6543732c0cd325799be1a3eac 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2016-05-23  Venkataramanan Kumar  <venkataramanan.kumar@amd.com>
+
+       PR tree-optimization/58135
+       * tree-vect-slp.c: When group size is not multiple
+       of vector size, allow splitting of store group at
+       vector boundary.
+
  2016-05-23  Christophe Lyon  <christophe.lyon@linaro.org>
  
         * config/arm/arm_neon.h (vtst_p16, vtstq_p16): New.
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 215de618ca14c2db0eb4cddcd922f4ce6a94acaa..b0ba70f85c79ad72caec2e8df90cbe5d74f4fd45 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2016-05-23  Venkataramanan Kumar  <venkataramanan.kumar@amd.com>
+
+       * gcc.dg/vect/bb-slp-19.c:  Remove XFAIL. 
+       * gcc.dg/vect/pr58135.c:  Add new.
+       * gfortran.dg/pr46519-1.f: Adjust test case.
+
  2016-05-23  Paolo Carlini  <paolo.carlini@oracle.com>
  
         PR c++/53401
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-19.c b/gcc/testsuite/gcc.dg/vect/bb-slp-19.c

index 42cd294957588fa00e8c5353b41c8b95446aaf95..c2821551c8628ea70baa8389518c6a0e27bf6744 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-19.c
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-19.c
@@ -53,5 +53,5 @@ int main (void)
    return 0;
  }
  
-/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2"  { xfail *-*-* }  } } */
+/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
    
diff --git a/gcc/testsuite/gcc.dg/vect/pr58135.c b/gcc/testsuite/gcc.dg/vect/pr58135.c

new file mode 100644 (file)

index 0000000..ca25000
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr58135.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+
+int a[100];
+void foo ()
+{
+  a[0] = a[1] = a[2] = a[3] = a[4]= 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
diff --git a/gcc/testsuite/gfortran.dg/pr46519-1.f b/gcc/testsuite/gfortran.dg/pr46519-1.f

index 51c64b87d281e40b145897152626ce837b169fc7..46be9f590ddec7d865422ed8847bf6a2cf267930 100644 (file)
--- a/gcc/testsuite/gfortran.dg/pr46519-1.f
+++ b/gcc/testsuite/gfortran.dg/pr46519-1.f
@@ -1,5 +1,5 @@
  ! { dg-do compile { target i?86-*-* x86_64-*-* } }
-! { dg-options "-O3 -mavx -mvzeroupper -mtune=generic -dp" }
+! { dg-options "-O3 -mavx -mvzeroupper -fno-tree-slp-vectorize -mtune=generic -dp" }
  
        PROGRAM MG3XDEMO 
        INTEGER LM, NM, NV, NR, NIT
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c

index 66db7d5c01e994ed18acd0c9c0a302c6e96ab83e..3de53d1188c690cddc2a95ee4d0dde7628fde004 100644 (file)
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -1757,18 +1757,6 @@ vect_analyze_slp_instance (vec_info *vinfo,
      }
    nunits = TYPE_VECTOR_SUBPARTS (vectype);
  
-  /* Calculate the unrolling factor.  */
-  unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
-  if (unrolling_factor != 1 && is_a <bb_vec_info> (vinfo))
-    {
-      if (dump_enabled_p ())
-        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                        "Build SLP failed: unrolling required in basic"
-                        " block SLP\n");
-
-      return false;
-    }
-
    /* Create a node (a root of the SLP tree) for the packed grouped stores.  */
    scalar_stmts.create (group_size);
    next = stmt;
@@ -1804,26 +1792,36 @@ vect_analyze_slp_instance (vec_info *vinfo,
    /* Build the tree for the SLP instance.  */
    bool *matches = XALLOCAVEC (bool, group_size);
    unsigned npermutes = 0;
-  if ((node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
+  node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
                                    &max_nunits, &loads, matches, &npermutes,
-                                  NULL, max_tree_size)) != NULL)
+                             NULL, max_tree_size);
+  if (node != NULL)
      {
        /* Calculate the unrolling factor based on the smallest type.  */
-      if (max_nunits > nunits)
-        unrolling_factor = least_common_multiple (max_nunits, group_size)
-                           / group_size;
+      unrolling_factor
+       = least_common_multiple (max_nunits, group_size) / group_size;
+
+      if (unrolling_factor != 1
+         && is_a <bb_vec_info> (vinfo))
+       {
  
-      if (unrolling_factor != 1 && is_a <bb_vec_info> (vinfo))
+         if (max_nunits > group_size)
          {
-          if (dump_enabled_p ())
              dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "Build SLP failed: unrolling required in basic"
-                            " block SLP\n");
+                              "Build SLP failed: store group "
+                              "size not a multiple of the vector size "
+                              "in basic block SLP\n");
           vect_free_slp_tree (node);
           loads.release ();
            return false;
          }
-
+         /* Fatal mismatch.  */
+         matches[group_size/max_nunits * max_nunits] = false;
+         vect_free_slp_tree (node);
+         loads.release ();
+       }
+      else
+       {
        /* Create a new SLP instance.  */
        new_instance = XNEW (struct _slp_instance);
        SLP_INSTANCE_TREE (new_instance) = node;
@@ -1845,8 +1843,8 @@ vect_analyze_slp_instance (vec_info *vinfo,
               (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
             {
-             int load_place
-               = vect_get_place_in_interleaving_chain (load, first_stmt);
+                 int load_place = vect_get_place_in_interleaving_chain
+                                    (load, first_stmt);
               gcc_assert (load_place != -1);
               if (load_place != j)
                 this_load_permuted = true;
@@ -1876,7 +1874,8 @@ vect_analyze_slp_instance (vec_info *vinfo,
                    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                                    "Build SLP failed: unsupported load "
                                    "permutation ");
-                  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
+                     dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
+                                       TDF_SLIM, stmt, 0);
                    dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
                  }
                vect_free_slp_instance (new_instance);
@@ -1884,7 +1883,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
              }
          }
  
-      /* If the loads and stores can be handled with load/store-lane
+         /* If the loads and stores can be handled with load/store-lan
          instructions do not generate this SLP instance.  */
        if (is_a <loop_vec_info> (vinfo)
           && loads_permuted
@@ -1896,7 +1895,8 @@ vect_analyze_slp_instance (vec_info *vinfo,
               gimple *first_stmt = GROUP_FIRST_ELEMENT
                   (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
               stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
-             /* Use SLP for strided accesses (or if we can't load-lanes).  */
+                 /* Use SLP for strided accesses (or if we
+                    can't load-lanes).  */
               if (STMT_VINFO_STRIDED_P (stmt_vinfo)
                   || ! vect_load_lanes_supported
                         (STMT_VINFO_VECTYPE (stmt_vinfo),
@@ -1925,11 +1925,14 @@ vect_analyze_slp_instance (vec_info *vinfo,
  
        return true;
      }
-
+    }
+  else
+    {
    /* Failed to SLP.  */
    /* Free the allocated memory.  */
    scalar_stmts.release ();
    loads.release ();
+    }
  
    /* For basic block SLP, try to break the group up into multiples of the
       vector size.  */
author	Venkataramanan Kumar <venkataramanan.kumar@amd.com>
	Mon, 23 May 2016 09:48:54 +0000 (09:48 +0000)
committer	Venkataramanan Kumar <vekumar@gcc.gnu.org>
	Mon, 23 May 2016 09:48:54 +0000 (09:48 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/bb-slp-19.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/pr58135.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gfortran.dg/pr46519-1.f		patch \| blob \| history
gcc/tree-vect-slp.c		patch \| blob \| history