tree-vect-loop.c (vect_model_reduction_cost): Handle COND_REDUCTION and INTEGER_INDUC...
authorRichard Biener <rguenther@suse.de>
Thu, 22 Jun 2017 12:07:07 +0000 (12:07 +0000)
committerRichard Biener <rguenth@gcc.gnu.org>
Thu, 22 Jun 2017 12:07:07 +0000 (12:07 +0000)
2016-06-22  Richard Biener  <rguenther@suse.de>

* tree-vect-loop.c (vect_model_reduction_cost): Handle
COND_REDUCTION and INTEGER_INDUC_COND_REDUCTION without
REDUC_MAX_EXPR support.
(vectorizable_reduction): Likewise.
(vect_create_epilog_for_reduction): Likewise.

* gcc.dg/vect/pr65947-1.c: Remove xfail.
* gcc.dg/vect/pr65947-2.c: Likewise.
* gcc.dg/vect/pr65947-3.c: Likewise.
* gcc.dg/vect/pr65947-4.c: Likewise.
* gcc.dg/vect/pr65947-5.c: Likewise.
* gcc.dg/vect/pr65947-6.c: Likewise.
* gcc.dg/vect/pr65947-8.c: Likewise.
* gcc.dg/vect/pr65947-9.c: Likewise.
* gcc.dg/vect/pr65947-10.c: Likewise.
* gcc.dg/vect/pr65947-12.c: Likewise.
* gcc.dg/vect/pr65947-13.c: Likewise.
* gcc.dg/vect/pr65947-14.c: Likewise.
* gcc.dg/vect/vect-cond-2.c: Likewise.
* gcc.dg/vect/vect-pr69848.c: Likewise.

From-SVN: r249553

17 files changed:
gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/vect/pr65947-1.c
gcc/testsuite/gcc.dg/vect/pr65947-10.c
gcc/testsuite/gcc.dg/vect/pr65947-12.c
gcc/testsuite/gcc.dg/vect/pr65947-13.c
gcc/testsuite/gcc.dg/vect/pr65947-14.c
gcc/testsuite/gcc.dg/vect/pr65947-2.c
gcc/testsuite/gcc.dg/vect/pr65947-3.c
gcc/testsuite/gcc.dg/vect/pr65947-4.c
gcc/testsuite/gcc.dg/vect/pr65947-5.c
gcc/testsuite/gcc.dg/vect/pr65947-6.c
gcc/testsuite/gcc.dg/vect/pr65947-8.c
gcc/testsuite/gcc.dg/vect/pr65947-9.c
gcc/testsuite/gcc.dg/vect/vect-cond-2.c
gcc/testsuite/gcc.dg/vect/vect-pr69848.c
gcc/tree-vect-loop.c

index 9b150a486453b98c1a7f0c4e9f50cb89e586d0e4..4b78674ee3fc80539b5a3e944e152270b39d780a 100644 (file)
@@ -1,3 +1,11 @@
+2016-06-22  Richard Biener  <rguenther@suse.de>
+
+       * tree-vect-loop.c (vect_model_reduction_cost): Handle
+       COND_REDUCTION and INTEGER_INDUC_COND_REDUCTION without
+       REDUC_MAX_EXPR support.
+       (vectorizable_reduction): Likewise.
+       (vect_create_epilog_for_reduction): Likewise.
+
 2017-06-22  James Greenhalgh  <james.greenhalgh@arm.com>
 
        * match.pd (A / (1 << B) -> A >> B): New.
index af774e433f7529e48e7a445ba3b0157eae5ab26e..641e4124e377cec2d1341937263b41d2e8b28ab7 100644 (file)
@@ -1,3 +1,20 @@
+2016-06-22  Richard Biener  <rguenther@suse.de>
+
+       * gcc.dg/vect/pr65947-1.c: Remove xfail.
+       * gcc.dg/vect/pr65947-2.c: Likewise.
+       * gcc.dg/vect/pr65947-3.c: Likewise.
+       * gcc.dg/vect/pr65947-4.c: Likewise.
+       * gcc.dg/vect/pr65947-5.c: Likewise.
+       * gcc.dg/vect/pr65947-6.c: Likewise.
+       * gcc.dg/vect/pr65947-8.c: Likewise.
+       * gcc.dg/vect/pr65947-9.c: Likewise.
+       * gcc.dg/vect/pr65947-10.c: Likewise.
+       * gcc.dg/vect/pr65947-12.c: Likewise.
+       * gcc.dg/vect/pr65947-13.c: Likewise.
+       * gcc.dg/vect/pr65947-14.c: Likewise.
+       * gcc.dg/vect/vect-cond-2.c: Likewise.
+       * gcc.dg/vect/vect-pr69848.c: Likewise.
+
 2017-06-22  Martin Liska  <mliska@suse.cz>
 
        * gcc.dg/tree-ssa/ipa-split-5.c: Make function bigger in order
index 93ca4dbcc21df67c5bd1b1a31a379dd5442aa58b..9072f11a104cf9bef736c9c54afaa36e9a5cc61c 100644 (file)
@@ -40,5 +40,5 @@ main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
-/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" } } */
index 9bdfd6db518455eb42fc39f475ab30b9313e43f5..a8a674f40f0d404b9f8718656b34d4b50bdb1e4e 100644 (file)
@@ -40,6 +40,6 @@ main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
 /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
 
index 2f37aaf01ecd6d7603372914cda2fee24f215f2d..8e2c46f1a6b75c838d0f6b9fdad813d3c34e6902 100644 (file)
@@ -41,5 +41,5 @@ main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
 /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
index e1b626e45a376a12c482f9c12cab1c7678b73211..061777af34c2945b3c4732a18280257092e7c41f 100644 (file)
@@ -41,5 +41,5 @@ main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
 /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
index 0d47a7da18dbe33ef276685e2e7019e478bd9a29..a28e80bb9fcfa881b559886e4322aab05ed8d317 100644 (file)
@@ -40,5 +40,5 @@ main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
 /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" } } */
index 6a36db131e4600ae5613b39f0d23da297bc7b0e3..d72fffa6720a08682641efc9a131142634243d5a 100644 (file)
@@ -41,5 +41,5 @@ main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
 /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
index 1323ed07c805383991783f12bd0907b675997b19..98945ba505d3dd8fdbd8fb33962b1801ebcf097f 100644 (file)
@@ -51,5 +51,5 @@ main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
 /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
index d754b8d3ccb54a08078e3751ff8abf823e526523..695889d743b422c599a6b7b6576e6384259bd56e 100644 (file)
@@ -40,6 +40,6 @@ main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
-/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" } } */
 
index b2af501f40d13681f56828bbe35ec57a5d0e6191..04d9b38d58aa678b90c10b6ab75d63b7896e52fc 100644 (file)
@@ -41,6 +41,6 @@ main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { xfail { ! vect_max_reduc } } } } */
-/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
+/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" } } */
 /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
index 12dc852a32ff92c35a26c5258887e8cdd100e90c..caa4a14120ad6c49bf16368d5dfbc7792d04feb9 100644 (file)
@@ -40,5 +40,5 @@ main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
 /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
index 293118582aa5a01dbf47c17a8585d877bbd92455..f0f1ac29699fabb01582b9ee10705e17ee3b802d 100644 (file)
@@ -42,4 +42,4 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
-/* { dg-final { scan-tree-dump "multiple types in double reduction or condition reduction" "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump "multiple types in double reduction or condition reduction" "vect" } } */
index a43560b68701a296a4ffd935830109eb56127909..d769af9ec732fd00e483636cee66652f0d1cebd9 100644 (file)
@@ -46,4 +46,4 @@ main ()
 }
 
 /* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
-/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" } } */
index 646eac12a383dbea158f36886eabe18d4de73cc4..094cfe76701be295891c30b3253c06dcfe23ab1b 100644 (file)
@@ -39,6 +39,4 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { ! vect_max_reduc } } } } */
-
-
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
index 779a657b34e3c7e18cfbaf1f1979ed1e4483e04b..c08f1e2f5c0a14b55de7cd070c8c94fb1d872e06 100644 (file)
@@ -34,4 +34,4 @@ int main (void)
 
   return 0;
 }
-/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
index d601296ab3e4280773184ce25cd249bf7c424957..a7c3d3d7e29752f4a4b9b2658c5da6d02e49d91b 100644 (file)
@@ -3772,6 +3772,18 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
                                              vect_epilogue);
            }
        }
+      else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
+       {
+         unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
+         /* Extraction of scalar elements.  */
+         epilogue_cost += add_stmt_cost (target_cost_data, 2 * nunits,
+                                         vec_to_scalar, stmt_info, 0,
+                                         vect_epilogue);
+         /* Scalar max reductions via COND_EXPR / MAX_EXPR.  */
+         epilogue_cost += add_stmt_cost (target_cost_data, 2 * nunits - 3,
+                                         scalar_stmt, stmt_info, 0,
+                                         vect_epilogue);
+       }
       else
        {
          int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
@@ -3780,10 +3792,14 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
          int element_bitsize = tree_to_uhwi (bitsize);
          int nelements = vec_size_in_bits / element_bitsize;
 
+         if (code == COND_EXPR)
+           code = MAX_EXPR;
+
          optab = optab_for_tree_code (code, vectype, optab_default);
 
          /* We have a whole vector shift available.  */
-         if (VECTOR_MODE_P (mode)
+         if (optab != unknown_optab
+             && VECTOR_MODE_P (mode)
              && optab_handler (optab, mode) != CODE_FOR_nothing
              && have_whole_vector_shift (mode))
            {
@@ -4424,7 +4440,8 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
   else
     new_phi_result = PHI_RESULT (new_phis[0]);
 
-  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
+  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
+      && reduc_code != ERROR_MARK)
     {
       /* For condition reductions, we have a vector (NEW_PHI_RESULT) containing
         various data values where the condition matched and another vector
@@ -4536,6 +4553,70 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
       scalar_results.safe_push (new_temp);
     }
+  else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
+          && reduc_code == ERROR_MARK)
+    {
+      /* Condition redution without supported REDUC_MAX_EXPR.  Generate
+        idx = 0;
+         idx_val = induction_index[0];
+        val = data_reduc[0];
+         for (idx = 0, val = init, i = 0; i < nelts; ++i)
+          if (induction_index[i] > idx_val)
+            val = data_reduc[i], idx_val = induction_index[i];
+        return val;  */
+
+      tree data_eltype = TREE_TYPE (TREE_TYPE (new_phi_result));
+      tree idx_eltype = TREE_TYPE (TREE_TYPE (induction_index));
+      unsigned HOST_WIDE_INT el_size = tree_to_uhwi (TYPE_SIZE (idx_eltype));
+      unsigned HOST_WIDE_INT v_size
+       = el_size * TYPE_VECTOR_SUBPARTS (TREE_TYPE (induction_index));
+      tree idx_val = NULL_TREE, val = NULL_TREE;
+      for (unsigned HOST_WIDE_INT off = 0; off < v_size; off += el_size)
+       {
+         tree old_idx_val = idx_val;
+         tree old_val = val;
+         idx_val = make_ssa_name (idx_eltype);
+         epilog_stmt = gimple_build_assign (idx_val, BIT_FIELD_REF,
+                                            build3 (BIT_FIELD_REF, idx_eltype,
+                                                    induction_index,
+                                                    bitsize_int (el_size),
+                                                    bitsize_int (off)));
+         gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+         val = make_ssa_name (data_eltype);
+         epilog_stmt = gimple_build_assign (val, BIT_FIELD_REF,
+                                            build3 (BIT_FIELD_REF,
+                                                    data_eltype,
+                                                    new_phi_result,
+                                                    bitsize_int (el_size),
+                                                    bitsize_int (off)));
+         gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+         if (off != 0)
+           {
+             tree new_idx_val = idx_val;
+             tree new_val = val;
+             if (off != v_size - el_size)
+               {
+                 new_idx_val = make_ssa_name (idx_eltype);
+                 epilog_stmt = gimple_build_assign (new_idx_val,
+                                                    MAX_EXPR, idx_val,
+                                                    old_idx_val);
+                 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+               }
+             new_val = make_ssa_name (data_eltype);
+             epilog_stmt = gimple_build_assign (new_val,
+                                                COND_EXPR,
+                                                build2 (GT_EXPR,
+                                                        boolean_type_node,
+                                                        idx_val,
+                                                        old_idx_val),
+                                                val, old_val);
+             gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+             idx_val = new_idx_val;
+             val = new_val;
+           }
+       }
+      scalar_results.safe_push (val);
+    }
 
   /* 2.3 Create the reduction code, using one of the three schemes described
          above. In SLP we simply need to extract all the elements from the 
@@ -4598,6 +4679,10 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
       int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
       tree vec_temp;
 
+      /* COND reductions all do the final reduction with MAX_EXPR.  */
+      if (code == COND_EXPR)
+       code = MAX_EXPR;
+
       /* Regardless of whether we have a whole vector shift, if we're
          emulating the operation via tree-vect-generic, we don't want
          to use it.  Only the first round of the reduction is likely
@@ -4763,6 +4848,22 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
             /* Not SLP - we have one scalar to keep in SCALAR_RESULTS.  */
             scalar_results.safe_push (new_temp);
         }
+
+      if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+         == INTEGER_INDUC_COND_REDUCTION)
+       {
+         /* Earlier we set the initial value to be zero.  Check the result
+            and if it is zero then replace with the original initial
+            value.  */
+         tree zero = build_zero_cst (scalar_type);
+         tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp, zero);
+
+         tree tmp = make_ssa_name (new_scalar_dest);
+         epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
+                                            initial_def, new_temp);
+         gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+         scalar_results[0] = tmp;
+       }
     }
   
 vect_finalize_reduction:
@@ -5639,21 +5740,6 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
 
              epilog_reduc_code = ERROR_MARK;
            }
-
-         /* When epilog_reduc_code is ERROR_MARK then a reduction will be
-            generated in the epilog using multiple expressions.  This does not
-            work for condition reductions.  */
-         if (epilog_reduc_code == ERROR_MARK
-             && (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
-                       == INTEGER_INDUC_COND_REDUCTION
-                 || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
-                       == CONST_COND_REDUCTION))
-           {
-             if (dump_enabled_p ())
-               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                "no reduc code for scalar code.\n");
-             return false;
-           }
        }
       else
        {
@@ -5674,17 +5760,11 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
       cr_index_vector_type = build_vector_type
        (cr_index_scalar_type, TYPE_VECTOR_SUBPARTS (vectype_out));
 
-      epilog_reduc_code = REDUC_MAX_EXPR;
       optab = optab_for_tree_code (REDUC_MAX_EXPR, cr_index_vector_type,
                                   optab_default);
       if (optab_handler (optab, TYPE_MODE (cr_index_vector_type))
-         == CODE_FOR_nothing)
-       {
-         if (dump_enabled_p ())
-           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "reduc max op not supported by target.\n");
-         return false;
-       }
+         != CODE_FOR_nothing)
+       epilog_reduc_code = REDUC_MAX_EXPR;
     }
 
   if ((double_reduc