re PR tree-optimization/80631 (Compiling with -O3 -mavx2 gives wrong code)
authorJakub Jelinek <jakub@redhat.com>
Tue, 12 Dec 2017 08:55:02 +0000 (09:55 +0100)
committerJakub Jelinek <jakub@gcc.gnu.org>
Tue, 12 Dec 2017 08:55:02 +0000 (09:55 +0100)
PR tree-optimization/80631
* tree-vect-loop.c (get_initial_def_for_reduction): Fix comment typo.
(vect_create_epilog_for_reduction): Add INDUC_VAL and INDUC_CODE
arguments, for INTEGER_INDUC_COND_REDUCTION use INDUC_VAL instead of
hardcoding zero as the value if COND_EXPR is never true.  For
INTEGER_INDUC_COND_REDUCTION don't emit the final COND_EXPR if
INDUC_VAL is equal to INITIAL_DEF, and use INDUC_CODE instead of
hardcoding MAX_EXPR as the reduction operation.
(is_nonwrapping_integer_induction): Allow negative step.
(vectorizable_reduction): Compute INDUC_VAL and INDUC_CODE for
vect_create_epilog_for_reduction, if no value is suitable, don't
use INTEGER_INDUC_COND_REDUCTION for now.  Formatting fixes.

* gcc.dg/vect/pr80631-1.c: New test.
* gcc.dg/vect/pr80631-2.c: New test.
* gcc.dg/vect/pr65947-13.c: Expect integer induc cond reduction
vectorization.

From-SVN: r255574

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/vect/pr65947-13.c
gcc/testsuite/gcc.dg/vect/pr80631-1.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/pr80631-2.c [new file with mode: 0644]
gcc/tree-vect-loop.c

index 6ab0e47c2d055c0c47dc87171187c1ed868be802..1e26d7b9747d80d5eee91e9a4ec3b5874c0aad71 100644 (file)
@@ -1,3 +1,18 @@
+2017-12-12  Jakub Jelinek  <jakub@redhat.com>
+
+       PR tree-optimization/80631
+       * tree-vect-loop.c (get_initial_def_for_reduction): Fix comment typo.
+       (vect_create_epilog_for_reduction): Add INDUC_VAL and INDUC_CODE
+       arguments, for INTEGER_INDUC_COND_REDUCTION use INDUC_VAL instead of
+       hardcoding zero as the value if COND_EXPR is never true.  For
+       INTEGER_INDUC_COND_REDUCTION don't emit the final COND_EXPR if
+       INDUC_VAL is equal to INITIAL_DEF, and use INDUC_CODE instead of
+       hardcoding MAX_EXPR as the reduction operation.
+       (is_nonwrapping_integer_induction): Allow negative step.
+       (vectorizable_reduction): Compute INDUC_VAL and INDUC_CODE for
+       vect_create_epilog_for_reduction, if no value is suitable, don't
+       use INTEGER_INDUC_COND_REDUCTION for now.  Formatting fixes.
+
 2017-12-12  Richard Biener  <rguenther@suse.de>
 
        PR tree-optimization/81889
index dfc5ed854ca240f45aee199b76f5729723533921..fcb4c6d8323491a2aae42965610f631f1674e292 100644 (file)
@@ -1,3 +1,11 @@
+2017-12-12  Jakub Jelinek  <jakub@redhat.com>
+
+       PR tree-optimization/80631
+       * gcc.dg/vect/pr80631-1.c: New test.
+       * gcc.dg/vect/pr80631-2.c: New test.
+       * gcc.dg/vect/pr65947-13.c: Expect integer induc cond reduction
+       vectorization.
+
 2017-12-12  Richard Biener  <rguenther@suse.de>
 
        PR tree-optimization/81889
index 061777af34c2945b3c4732a18280257092e7c41f..ce290459c50943b1a62a1de8eaf0b2b44d6fe7fd 100644 (file)
@@ -6,8 +6,7 @@ extern void abort (void) __attribute__ ((noreturn));
 
 #define N 32
 
-/* Simple condition reduction with a reversed loop.
-   Will fail to vectorize to a simple case.  */
+/* Simple condition reduction with a reversed loop.  */
 
 int
 condition_reduction (int *a, int min_v)
@@ -42,4 +41,4 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr80631-1.c b/gcc/testsuite/gcc.dg/vect/pr80631-1.c
new file mode 100644 (file)
index 0000000..4af741d
--- /dev/null
@@ -0,0 +1,76 @@
+/* PR tree-optimization/80631 */
+/* { dg-do run } */
+
+#include "tree-vect.h"
+
+int v[8] = { 77, 1, 79, 3, 4, 3, 6, 7 };
+
+__attribute__((noipa)) void
+f1 (void)
+{
+  int k, r = -1;
+  for (k = 0; k < 8; k++)
+    if (v[k] == 77)
+      r = k;
+  if (r != 0)
+    abort ();
+}
+
+__attribute__((noipa)) void
+f2 (void)
+{
+  int k, r = 4;
+  for (k = 0; k < 8; k++)
+    if (v[k] == 79)
+      r = k;
+  if (r != 2)
+    abort ();
+}
+
+__attribute__((noipa)) void
+f3 (void)
+{
+  int k, r = -17;
+  for (k = 0; k < 8; k++)
+    if (v[k] == 78)
+      r = k;
+  if (r != -17)
+    abort ();
+}
+
+__attribute__((noipa)) void
+f4 (void)
+{
+  int k, r = 7;
+  for (k = 0; k < 8; k++)
+    if (v[k] == 78)
+      r = k;
+  if (r != 7)
+    abort ();
+}
+
+__attribute__((noipa)) void
+f5 (void)
+{
+  int k, r = -1;
+  for (k = 0; k < 8; k++)
+    if (v[k] == 3)
+      r = k;
+  if (r != 5)
+    abort ();
+}
+
+int
+main ()
+{
+  check_vect ();
+  f1 ();
+  f2 ();
+  f3 ();
+  f4 ();
+  f5 ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 5 "vect" { target vect_condition } } } */
+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 10 "vect" { target vect_condition } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr80631-2.c b/gcc/testsuite/gcc.dg/vect/pr80631-2.c
new file mode 100644 (file)
index 0000000..777738a
--- /dev/null
@@ -0,0 +1,76 @@
+/* PR tree-optimization/80631 */
+/* { dg-do run } */
+
+#include "tree-vect.h"
+
+int v[8] = { 77, 1, 79, 3, 4, 3, 6, 7 };
+
+__attribute__((noipa)) void
+f1 (void)
+{
+  int k, r = -1;
+  for (k = 7; k >= 0; k--)
+    if (v[k] == 77)
+      r = k;
+  if (r != 0)
+    abort ();
+}
+
+__attribute__((noipa)) void
+f2 (void)
+{
+  int k, r = 4;
+  for (k = 7; k >= 0; k--)
+    if (v[k] == 79)
+      r = k;
+  if (r != 2)
+    abort ();
+}
+
+__attribute__((noipa)) void
+f3 (void)
+{
+  int k, r = -17;
+  for (k = 7; k >= 0; k--)
+    if (v[k] == 78)
+      r = k;
+  if (r != -17)
+    abort ();
+}
+
+__attribute__((noipa)) void
+f4 (void)
+{
+  int k, r = 7;
+  for (k = 7; k >= 0; k--)
+    if (v[k] == 78)
+      r = k;
+  if (r != 7)
+    abort ();
+}
+
+__attribute__((noipa)) void
+f5 (void)
+{
+  int k, r = -1;
+  for (k = 7; k >= 0; k--)
+    if (v[k] == 3)
+      r = k;
+  if (r != 3)
+    abort ();
+}
+
+int
+main ()
+{
+  check_vect ();
+  f1 ();
+  f2 ();
+  f3 ();
+  f4 ();
+  f5 ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 5 "vect" { target vect_condition } } } */
+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 10 "vect" { target vect_condition } } } */
index 3e4703a360049119deafe1c9fdf4317bd506a9dc..a66c8cfccad3c010aaa9be804a0ce4b7fb62ec8d 100644 (file)
@@ -4034,7 +4034,7 @@ get_initial_def_for_reduction (gimple *stmt, tree init_val,
     case MULT_EXPR:
     case BIT_AND_EXPR:
       {
-        /* ADJUSMENT_DEF is NULL when called from
+        /* ADJUSTMENT_DEF is NULL when called from
            vect_create_epilog_for_reduction to vectorize double reduction.  */
         if (adjustment_def)
          *adjustment_def = init_val;
@@ -4283,6 +4283,11 @@ get_initial_defs_for_reduction (slp_tree slp_node,
    DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
    SLP_NODE is an SLP node containing a group of reduction statements. The 
      first one in this group is STMT.
+   INDUC_VAL is for INTEGER_INDUC_COND_REDUCTION the value to use for the case
+     when the COND_EXPR is never true in the loop.  For MAX_EXPR, it needs to
+     be smaller than any value of the IV in the loop, for MIN_EXPR larger than
+     any value of the IV in the loop.
+   INDUC_CODE is the code for epilog reduction if INTEGER_INDUC_COND_REDUCTION.
 
    This function:
    1. Creates the reduction def-use cycles: sets the arguments for 
@@ -4330,7 +4335,8 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
                                  vec<gimple *> reduction_phis,
                                   bool double_reduc, 
                                  slp_tree slp_node,
-                                 slp_instance slp_node_instance)
+                                 slp_instance slp_node_instance,
+                                 tree induc_val, enum tree_code induc_code)
 {
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   stmt_vec_info prev_phi_info;
@@ -4419,6 +4425,18 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
       gimple *def_stmt;
       initial_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
                                           loop_preheader_edge (loop));
+      /* Optimize: if initial_def is for REDUC_MAX smaller than the base
+        and we can't use zero for induc_val, use initial_def.  Similarly
+        for REDUC_MIN and initial_def larger than the base.  */
+      if (TREE_CODE (initial_def) == INTEGER_CST
+         && (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+             == INTEGER_INDUC_COND_REDUCTION)
+         && !integer_zerop (induc_val)
+         && ((reduc_fn == IFN_REDUC_MAX
+              && tree_int_cst_lt (initial_def, induc_val))
+             || (reduc_fn == IFN_REDUC_MIN
+                 && tree_int_cst_lt (induc_val, initial_def))))
+       induc_val = initial_def;
       vect_is_simple_use (initial_def, loop_vinfo, &def_stmt, &initial_def_dt);
       vec_initial_def = get_initial_def_for_reduction (stmt, initial_def,
                                                       &adjustment_def);
@@ -4453,9 +4471,10 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
              gcc_assert (i == 0);
 
              tree vec_init_def_type = TREE_TYPE (vec_init_def);
-             tree zero_vec = build_zero_cst (vec_init_def_type);
+             tree induc_val_vec
+               = build_vector_from_val (vec_init_def_type, induc_val);
 
-             add_phi_arg (as_a <gphi *> (phi), zero_vec,
+             add_phi_arg (as_a <gphi *> (phi), induc_val_vec,
                           loop_preheader_edge (loop), UNKNOWN_LOCATION);
            }
          else
@@ -4983,14 +5002,16 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
       gimple_set_lhs (epilog_stmt, new_temp);
       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
 
-      if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
-         == INTEGER_INDUC_COND_REDUCTION)
+      if ((STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+          == INTEGER_INDUC_COND_REDUCTION)
+         && !operand_equal_p (initial_def, induc_val, 0))
        {
-         /* Earlier we set the initial value to be zero.  Check the result
-            and if it is zero then replace with the original initial
-            value.  */
-         tree zero = build_zero_cst (scalar_type);
-         tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp, zero);
+         /* Earlier we set the initial value to be a vector if induc_val
+            values.  Check the result and if it is induc_val then replace
+            with the original initial value, unless induc_val is
+            the same as initial_def already.  */
+         tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp,
+                                 induc_val);
 
          tmp = make_ssa_name (new_scalar_dest);
          epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
@@ -5008,9 +5029,16 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
       int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
       tree vec_temp;
 
-      /* COND reductions all do the final reduction with MAX_EXPR.  */
+      /* COND reductions all do the final reduction with MAX_EXPR
+        or MIN_EXPR.  */
       if (code == COND_EXPR)
-       code = MAX_EXPR;
+       {
+         if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+             == INTEGER_INDUC_COND_REDUCTION)
+           code = induc_code;
+         else
+           code = MAX_EXPR;
+       }
 
       /* Regardless of whether we have a whole vector shift, if we're
          emulating the operation via tree-vect-generic, we don't want
@@ -5110,7 +5138,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
               else
                 vec_temp = gimple_assign_lhs (new_phi);
               tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
-                            bitsize_zero_node);
+                                bitsize_zero_node);
               epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
               new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
               gimple_assign_set_lhs (epilog_stmt, new_temp);
@@ -5179,14 +5207,16 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
             scalar_results.safe_push (new_temp);
         }
 
-      if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
-         == INTEGER_INDUC_COND_REDUCTION)
+      if ((STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+          == INTEGER_INDUC_COND_REDUCTION)
+         && !operand_equal_p (initial_def, induc_val, 0))
        {
-         /* Earlier we set the initial value to be zero.  Check the result
-            and if it is zero then replace with the original initial
-            value.  */
-         tree zero = build_zero_cst (scalar_type);
-         tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp, zero);
+         /* Earlier we set the initial value to be a vector if induc_val
+            values.  Check the result and if it is induc_val then replace
+            with the original initial value, unless induc_val is
+            the same as initial_def already.  */
+         tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp,
+                                 induc_val);
 
          tree tmp = make_ssa_name (new_scalar_dest);
          epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
@@ -5513,10 +5543,6 @@ is_nonwrapping_integer_induction (gimple *stmt, struct loop *loop)
       || TREE_CODE (step) != INTEGER_CST)
     return false;
 
-  /* Check that the induction increments.  */
-  if (tree_int_cst_sgn (step) == -1)
-    return false;
-
   /* Check that the max size of the loop will not wrap.  */
 
   if (TYPE_OVERFLOW_UNDEFINED (lhs_type))
@@ -5608,6 +5634,8 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
   tree new_temp = NULL_TREE;
   gimple *def_stmt;
   enum vect_def_type dt, cond_reduc_dt = vect_unknown_def_type;
+  gimple *cond_reduc_def_stmt = NULL;
+  enum tree_code cond_reduc_op_code = ERROR_MARK;
   tree scalar_type;
   bool is_simple_use;
   gimple *orig_stmt;
@@ -5879,9 +5907,13 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
              cond_reduc_dt = dt;
              cond_reduc_val = ops[i];
            }
-         if (dt == vect_induction_def && def_stmt != NULL
+         if (dt == vect_induction_def
+             && def_stmt != NULL
              && is_nonwrapping_integer_induction (def_stmt, loop))
-           cond_reduc_dt = dt;
+           {
+             cond_reduc_dt = dt;
+             cond_reduc_def_stmt = def_stmt;
+           }
        }
     }
 
@@ -5928,12 +5960,46 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
     {
       if (cond_reduc_dt == vect_induction_def)
        {
-         if (dump_enabled_p ())
-           dump_printf_loc (MSG_NOTE, vect_location,
-                            "condition expression based on "
-                            "integer induction.\n");
-         STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
-           = INTEGER_INDUC_COND_REDUCTION;
+         stmt_vec_info cond_stmt_vinfo = vinfo_for_stmt (cond_reduc_def_stmt);
+         tree base
+           = STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (cond_stmt_vinfo);
+         tree step = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (cond_stmt_vinfo);
+
+         gcc_assert (TREE_CODE (base) == INTEGER_CST
+                     && TREE_CODE (step) == INTEGER_CST);
+         cond_reduc_val = NULL_TREE;
+         /* Find a suitable value, for MAX_EXPR below base, for MIN_EXPR
+            above base; punt if base is the minimum value of the type for
+            MAX_EXPR or maximum value of the type for MIN_EXPR for now.  */
+         if (tree_int_cst_sgn (step) == -1)
+           {
+             cond_reduc_op_code = MIN_EXPR;
+             if (tree_int_cst_sgn (base) == -1)
+               cond_reduc_val = build_int_cst (TREE_TYPE (base), 0);
+             else if (tree_int_cst_lt (base,
+                                       TYPE_MAX_VALUE (TREE_TYPE (base))))
+               cond_reduc_val
+                 = int_const_binop (PLUS_EXPR, base, integer_one_node);
+           }
+         else
+           {
+             cond_reduc_op_code = MAX_EXPR;
+             if (tree_int_cst_sgn (base) == 1)
+               cond_reduc_val = build_int_cst (TREE_TYPE (base), 0);
+             else if (tree_int_cst_lt (TYPE_MIN_VALUE (TREE_TYPE (base)),
+                                       base))
+               cond_reduc_val
+                 = int_const_binop (MINUS_EXPR, base, integer_one_node);
+           }
+         if (cond_reduc_val)
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_NOTE, vect_location,
+                                "condition expression based on "
+                                "integer induction.\n");
+             STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+               = INTEGER_INDUC_COND_REDUCTION;
+           }
        }
 
       /* Loop peeling modifies initial value of reduction PHI, which
@@ -6127,8 +6193,8 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
          gcc_assert (orig_code == MAX_EXPR || orig_code == MIN_EXPR);
        }
       else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
-                == INTEGER_INDUC_COND_REDUCTION)
-       orig_code = MAX_EXPR;
+              == INTEGER_INDUC_COND_REDUCTION)
+       orig_code = cond_reduc_op_code;
     }
 
   if (nested_cycle)
@@ -6464,7 +6530,8 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
 
   vect_create_epilog_for_reduction (vect_defs, stmt, reduc_def_stmt,
                                    epilog_copies, reduc_fn, phis,
-                                   double_reduc, slp_node, slp_node_instance);
+                                   double_reduc, slp_node, slp_node_instance,
+                                   cond_reduc_val, cond_reduc_op_code);
 
   return true;
 }