re PR tree-optimization/87105 (Autovectorization [X86, SSE2, AVX2, DoublePrecision])
authorRichard Biener <rguenther@suse.de>
Wed, 24 Oct 2018 11:46:58 +0000 (11:46 +0000)
committerRichard Biener <rguenth@gcc.gnu.org>
Wed, 24 Oct 2018 11:46:58 +0000 (11:46 +0000)
2018-10-24  Richard Biener  <rguenther@suse.de>

PR tree-optimization/87105
* tree-vect-data-refs.c (vect_analyze_group_access_1): Adjust
dump classification.
(vect_analyze_data_ref_accesses): Handle duplicate loads and
stores by splitting the affected group after the fact.
* tree-vect-slp.c (vect_build_slp_tree_2): Dump when we
fail the SLP build because of size constraints.

* gcc.dg/vect/bb-slp-39.c: New testcase.
* gfortran.dg/vect/pr83232.f90: Un-XFAIL.

From-SVN: r265457

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/vect/bb-slp-39.c [new file with mode: 0644]
gcc/testsuite/gfortran.dg/vect/pr83232.f90
gcc/tree-vect-data-refs.c
gcc/tree-vect-slp.c

index 55ac3b99e2c5da9a6fc3d6928ec37e0953662d0a..bd70bab9265426ed675bb412e4062d4b7273b1e0 100644 (file)
@@ -1,3 +1,13 @@
+2018-10-24  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/87105
+       * tree-vect-data-refs.c (vect_analyze_group_access_1): Adjust
+       dump classification.
+       (vect_analyze_data_ref_accesses): Handle duplicate loads and
+       stores by splitting the affected group after the fact.
+       * tree-vect-slp.c (vect_build_slp_tree_2): Dump when we
+       fail the SLP build because of size constraints.
+
 2018-10-24  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
 
        * configure.ac (gcc_cv_ld_aligned_shf_merge): New test.
index c17488a87c0e7c54a0456c08acfcf57339d8e683..df476ec2eb98c097f7d6c54dbaccf48ca7f20f55 100644 (file)
@@ -1,3 +1,9 @@
+2018-10-24  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/87105
+       * gcc.dg/vect/bb-slp-39.c: New testcase.
+       * gfortran.dg/vect/pr83232.f90: Un-XFAIL.
+
 2018-10-24  Richard Biener  <rguenther@suse.de>
 
        PR tree-optimization/84013
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-39.c b/gcc/testsuite/gcc.dg/vect/bb-slp-39.c
new file mode 100644 (file)
index 0000000..255bb10
--- /dev/null
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+
+double x[1024];
+
+void foo (double *p)
+{
+  x[0] = 1.;
+  x[1] = 2.;
+  *p = 7.; // aliasing store
+  x[0] = x[0] + 1;
+  x[1] = x[1] + 1;
+  *p = 8.; // aliasing store
+  x[1] = x[1] + 1;
+  x[0] = x[0] + 1;
+}
+
+/* See that we vectorize three SLP instances.  */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "slp2" } } */
index 8fd9f0373ebe2e33a8c81c206d736ea1e8db45de..a35357839fedc3695aa753090a459e07dc7dd5c5 100644 (file)
@@ -27,7 +27,5 @@
       call foo (Einc)
       END SUBROUTINE
 
-! We should vectorize (1) and (2)
-! { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "slp1" } }
-! We fail to vectorize at (3), this can be fixed in the future
-! { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "slp1" { xfail *-*-* } } }
+! We should vectorize (1), (2) and (3)
+! { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "slp1" } }
index a24e1853e038325e35ededa2feb5d9d40d17931f..9185b1bd1c0ccba6664f2c84063a1226f6ac3d28 100644 (file)
@@ -2472,7 +2472,7 @@ vect_analyze_group_access_1 (dr_vec_info *dr_info)
                 }
 
              if (dump_enabled_p ())
-               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+               dump_printf_loc (MSG_NOTE, vect_location,
                                 "Two or more load stmts share the same dr.\n");
 
              /* For load use the same data-ref load.  */
@@ -2838,6 +2838,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
      determining what dependencies are reversed.  */
   vec<data_reference_p> datarefs_copy = datarefs.copy ();
   datarefs_copy.qsort (dr_group_sort_cmp);
+  hash_set<stmt_vec_info> to_fixup;
 
   /* Build the interleaving chains.  */
   for (i = 0; i < datarefs_copy.length () - 1;)
@@ -2920,36 +2921,32 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
            {
              gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1]))
                          < gimple_uid (DR_STMT (drb)));
-             /* ???  For now we simply "drop" the later reference which is
-                otherwise the same rather than finishing off this group.
-                In the end we'd want to re-process duplicates forming
-                multiple groups from the refs, likely by just collecting
-                all candidates (including duplicates and split points
-                below) in a vector and then process them together.  */
-             continue;
+             /* Simply link in duplicates and fix up the chain below.  */
            }
-
-         /* If init_b == init_a + the size of the type * k, we have an
-            interleaving, and DRA is accessed before DRB.  */
-         HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
-         if (type_size_a == 0
-             || (init_b - init_a) % type_size_a != 0)
-           break;
-
-         /* If we have a store, the accesses are adjacent.  This splits
-            groups into chunks we support (we don't support vectorization
-            of stores with gaps).  */
-         if (!DR_IS_READ (dra) && init_b - init_prev != type_size_a)
-           break;
-
-         /* If the step (if not zero or non-constant) is greater than the
-            difference between data-refs' inits this splits groups into
-            suitable sizes.  */
-         if (tree_fits_shwi_p (DR_STEP (dra)))
+         else
            {
-             HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
-             if (step != 0 && step <= (init_b - init_a))
+             /* If init_b == init_a + the size of the type * k, we have an
+                interleaving, and DRA is accessed before DRB.  */
+             HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
+             if (type_size_a == 0
+                 || (init_b - init_a) % type_size_a != 0)
                break;
+
+             /* If we have a store, the accesses are adjacent.  This splits
+                groups into chunks we support (we don't support vectorization
+                of stores with gaps).  */
+             if (!DR_IS_READ (dra) && init_b - init_prev != type_size_a)
+               break;
+
+             /* If the step (if not zero or non-constant) is greater than the
+                difference between data-refs' inits this splits groups into
+                suitable sizes.  */
+             if (tree_fits_shwi_p (DR_STEP (dra)))
+               {
+                 HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
+                 if (step != 0 && step <= (init_b - init_a))
+                   break;
+               }
            }
 
          if (dump_enabled_p ())
@@ -2968,9 +2965,64 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
          DR_GROUP_FIRST_ELEMENT (stmtinfo_b) = stmtinfo_a;
          DR_GROUP_NEXT_ELEMENT (lastinfo) = stmtinfo_b;
          lastinfo = stmtinfo_b;
+
+         if (init_b == init_prev
+             && !to_fixup.add (DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
+             && dump_enabled_p ())
+           dump_printf_loc (MSG_NOTE, vect_location,
+                            "Queuing group with duplicate access for fixup\n");
        }
     }
 
+  /* Fixup groups with duplicate entries by splitting it.  */
+  while (1)
+    {
+      hash_set<stmt_vec_info>::iterator it = to_fixup.begin ();
+      if (!(it != to_fixup.end ()))
+       break;
+      stmt_vec_info grp = *it;
+      to_fixup.remove (grp);
+
+      /* Find the earliest duplicate group member.  */
+      unsigned first_duplicate = -1u;
+      stmt_vec_info next, g = grp;
+      while ((next = DR_GROUP_NEXT_ELEMENT (g)))
+       {
+         if ((DR_INIT (STMT_VINFO_DR_INFO (next)->dr)
+              == DR_INIT (STMT_VINFO_DR_INFO (g)->dr))
+             && gimple_uid (STMT_VINFO_STMT (next)) < first_duplicate)
+           first_duplicate = gimple_uid (STMT_VINFO_STMT (next));
+         g = next;
+       }
+      if (first_duplicate == -1U)
+       continue;
+
+      /* Then move all stmts after the first duplicate to a new group.
+         Note this is a heuristic but one with the property that *it
+        is fixed up completely.  */
+      g = grp;
+      stmt_vec_info newgroup = NULL, ng;
+      while ((next = DR_GROUP_NEXT_ELEMENT (g)))
+       {
+         if (gimple_uid (STMT_VINFO_STMT (next)) >= first_duplicate)
+           {
+             DR_GROUP_NEXT_ELEMENT (g) = DR_GROUP_NEXT_ELEMENT (next);
+             if (!newgroup)
+               newgroup = next;
+             else
+               DR_GROUP_NEXT_ELEMENT (ng) = next;
+             ng = next;
+             DR_GROUP_FIRST_ELEMENT (ng) = newgroup;
+           }
+         else
+           g = DR_GROUP_NEXT_ELEMENT (g);
+       }
+      DR_GROUP_NEXT_ELEMENT (ng) = NULL;
+
+      /* Fixup the new group which still may contain duplicates.  */
+      to_fixup.add (newgroup);
+    }
+
   FOR_EACH_VEC_ELT (datarefs_copy, i, dr)
     {
       dr_vec_info *dr_info = vinfo->lookup_dr (dr);
index f60fea0a581e40c8b099a3ae0f3f740c5dc57158..3aae1776ef91b27ea98d4e68e6e11cf566dfcbd6 100644 (file)
@@ -1191,6 +1191,10 @@ vect_build_slp_tree_2 (vec_info *vinfo,
 
       if (++this_tree_size > max_tree_size)
        {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
+                            vect_location,
+                            "Build SLP failed: SLP tree too large\n");
          FOR_EACH_VEC_ELT (children, j, child)
            vect_free_slp_tree (child, false);
          vect_free_oprnd_info (oprnds_info);