Record the loop masks needed for EXTRACT_LAST_REDUCTIONs
authorRichard Sandiford <richard.sandiford@arm.com>
Tue, 10 Dec 2019 12:20:07 +0000 (12:20 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Tue, 10 Dec 2019 12:20:07 +0000 (12:20 +0000)
The analysis phase of vectorizable_condition wasn't recording the
loop masks needed by the transform phase.  This meant that the masks
wouldn't be created in the (rare) case that no other statement needed
them.

2019-12-10  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
* tree-vect-stmts.c (vectorizable_condition): Record the loop
masks required for extract-last reductions.

gcc/testsuite/
* gcc.target/aarch64/sve/clastb_9.c: New test.

From-SVN: r279163

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/sve/clastb_9.c [new file with mode: 0644]
gcc/tree-vect-stmts.c

index 1c1501eba5b8eadbe9820949aff9bdf9ccdcdf1f..0ca3e15f4c9f41461bf05c65991150d114d7cc34 100644 (file)
@@ -1,3 +1,8 @@
+2019-12-10  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * tree-vect-stmts.c (vectorizable_condition): Record the loop
+       masks required for extract-last reductions.
+
 2019-12-10  Richard Sandiford  <richard.sandiford@arm.com>
 
        * tree-vect-stmts.c (vect_finish_replace_stmt): Always use the
index b43497cc2b3af8c304b25f666742a9df060a6d16..5020620fc4dc6fd9189ff407061fa55a7f691368 100644 (file)
@@ -1,3 +1,7 @@
+2019-12-10  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * gcc.target/aarch64/sve/clastb_9.c: New test.
+
 2019-12-10  Tobias Burnus  <tobias@codesourcery.com>
 
        PR fortran/92872
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_9.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_9.c
new file mode 100644 (file)
index 0000000..e9db935
--- /dev/null
@@ -0,0 +1,21 @@
+/* Originally gcc.dg/vect/O1-pr41008.c.  */
+/* { dg-options "-O1 -ftree-vectorize -fno-vect-cost-model -msve-vector-bits=256" } */
+
+double heating[2][2];
+
+void foo (int, int);
+
+void map_do()
+{
+  int jsav, ksav, k, j;
+
+  for(k = 0; k < 2; k++)
+    for(j = 0; j < 2; j++)
+      if (heating[k][j] > 0.)
+        {
+          jsav = j;
+          ksav = k;
+        }
+
+  foo (jsav, ksav);
+}
index 08ffb72fe2d5f7fa1a33a0813e578d34be073df1..706da5b4e92008fc8566b5aa3a551dd5a3cbdcfe 100644 (file)
@@ -9912,6 +9912,7 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
        vect_unknown_def_type, vect_unknown_def_type};
   int ndts = 4;
   int ncopies;
+  int vec_num;
   enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
   stmt_vec_info prev_stmt_info = NULL;
   int i, j;
@@ -9969,9 +9970,15 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
 
   if (slp_node)
-    ncopies = 1;
+    {
+      ncopies = 1;
+      vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+    }
   else
-    ncopies = vect_get_num_copies (loop_vinfo, vectype);
+    {
+      ncopies = vect_get_num_copies (loop_vinfo, vectype);
+      vec_num = 1;
+    }
 
   gcc_assert (ncopies >= 1);
   if (for_reduction && ncopies > 1)
@@ -10094,6 +10101,12 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
            }
        }
 
+      if (loop_vinfo
+         && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
+         && reduction_type == EXTRACT_LAST_REDUCTION)
+       vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
+                              ncopies * vec_num, vectype, NULL);
+
       vect_cost_for_stmt kind = vector_stmt;
       if (reduction_type == EXTRACT_LAST_REDUCTION)
        /* Count one reduction-like operation per vector.  */