re PR tree-optimization/91178 (Infinite recursion in split_constant_offset in slp...
authorRichard Biener <rguenther@suse.de>
Wed, 17 Jul 2019 10:26:25 +0000 (10:26 +0000)
committerRichard Biener <rguenth@gcc.gnu.org>
Wed, 17 Jul 2019 10:26:25 +0000 (10:26 +0000)
2019-07-17  Richard Biener  <rguenther@suse.de>

PR tree-optimization/91178
* tree-vect-stmts.c (get_group_load_store_type): For SLP
loads with a gap larger than the vector size always use
VMAT_STRIDED_SLP.
(vectorizable_load): For VMAT_STRIDED_SLP with a permutation
avoid loading vectors that are only contained in the gap
and thus are not needed.

* gcc.dg/torture/pr91178.c: New testcase.

From-SVN: r273549

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/torture/pr91178.c [new file with mode: 0644]
gcc/tree-vect-stmts.c

index 877de1982b86e864499605e0694cb930af88c30f..b15db2d089e83a89dcb1d2f3fdb2366a1305fdd3 100644 (file)
@@ -1,3 +1,13 @@
+2019-07-17  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/91178
+       * tree-vect-stmts.c (get_group_load_store_type): For SLP
+       loads with a gap larger than the vector size always use
+       VMAT_STRIDED_SLP.
+       (vectorizable_load): For VMAT_STRIDED_SLP with a permutation
+       avoid loading vectors that are only contained in the gap
+       and thus are not needed.
+
 2019-07-17  Richard Biener  <rguenther@suse.de>
 
        PR tree-optimization/91180
index f9ee4825f17b80cc379a8bfda528aa8282760461..2d7a6bfc6541f1848085e5e3a1af5668ccf5cdd9 100644 (file)
@@ -1,3 +1,8 @@
+2019-07-17  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/91178
+       * gcc.dg/torture/pr91178.c: New testcase.
+
 2019-07-17  Richard Biener  <rguenther@suse.de>
 
        PR tree-optimization/91180
diff --git a/gcc/testsuite/gcc.dg/torture/pr91178.c b/gcc/testsuite/gcc.dg/torture/pr91178.c
new file mode 100644 (file)
index 0000000..b7a2dbe
--- /dev/null
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+
+int a;
+extern int f[10][91125];
+int b[50];
+void c()
+{
+  for (int d = 6; d <= a; d++)
+    for (int e = 16; e <= 24; e++)
+      b[e] -= f[d][d];
+}
index 601a6f55fbff388c89f88d994e790aebf2bf960e..5d05e108ede61fc3558f94ec92beabc3858ee328 100644 (file)
@@ -2267,6 +2267,14 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
                        / vect_get_scalar_dr_size (first_dr_info)))
            overrun_p = false;
 
+         /* If the gap at the end of the group exceeds a whole vector
+            in size use the strided SLP code which can skip code-generation
+            for the gap.  */
+         if (vls_type == VLS_LOAD && known_gt (gap, nunits))
+           *memory_access_type = VMAT_STRIDED_SLP;
+         else
+           *memory_access_type = VMAT_CONTIGUOUS;
+
          /* If the gap splits the vector in half and the target
             can do half-vector operations avoid the epilogue peeling
             by simply loading half of the vector only.  Usually
@@ -2274,7 +2282,8 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
          dr_alignment_support alignment_support_scheme;
          scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
          machine_mode vmode;
-         if (overrun_p
+         if (*memory_access_type == VMAT_CONTIGUOUS
+             && overrun_p
              && !masked_p
              && (((alignment_support_scheme
                      = vect_supportable_dr_alignment (first_dr_info, false)))
@@ -2297,7 +2306,6 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
                                 "Peeling for outer loop is not supported\n");
              return false;
            }
-         *memory_access_type = VMAT_CONTIGUOUS;
        }
     }
   else
@@ -8732,6 +8740,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       /* Checked by get_load_store_type.  */
       unsigned int const_nunits = nunits.to_constant ();
       unsigned HOST_WIDE_INT cst_offset = 0;
+      unsigned int group_gap = 0;
 
       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
       gcc_assert (!nested_in_vect_loop);
@@ -8749,6 +8758,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       if (slp && grouped_load)
        {
          group_size = DR_GROUP_SIZE (first_stmt_info);
+         group_gap = DR_GROUP_GAP (first_stmt_info);
          ref_type = get_group_alias_ptr_type (first_stmt_info);
        }
       else
@@ -8892,6 +8902,14 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
          if (nloads > 1)
            vec_alloc (v, nloads);
          stmt_vec_info new_stmt_info = NULL;
+         if (slp && slp_perm
+             && (group_el % group_size) > group_size - group_gap
+             && (group_el % group_size) + nloads * lnel < group_size)
+           {
+             dr_chain.quick_push (NULL_TREE);
+             group_el += nloads * lnel;
+             continue;
+           }
          for (i = 0; i < nloads; i++)
            {
              tree this_off = build_int_cst (TREE_TYPE (alias_off),