tree-optimization/97678 - fix SLP induction epilogue vectorization
authorRichard Biener <rguenther@suse.de>
Tue, 3 Nov 2020 08:53:11 +0000 (09:53 +0100)
committerRichard Biener <rguenther@suse.de>
Tue, 3 Nov 2020 08:56:40 +0000 (09:56 +0100)
This restores not tracking SLP nodes for induction initial values
in not nested context because this interferes with peeling and
epilogue vectorization.

2020-11-03  Richard Biener  <rguenther@suse.de>

PR tree-optimization/97678
* tree-vect-slp.c (vect_build_slp_tree_2): Do not track
the initial values of inductions when not nested.
* tree-vect-loop.c (vectorizable_induction): Look at
PHI node initial values again for SLP and not nested
inductions.  Handle LOOP_VINFO_MASK_SKIP_NITERS and cost
invariants.

* gcc.dg/vect/pr97678.c: New testcase.

gcc/testsuite/gcc.dg/vect/pr97678.c [new file with mode: 0644]
gcc/tree-vect-loop.c
gcc/tree-vect-slp.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr97678.c b/gcc/testsuite/gcc.dg/vect/pr97678.c
new file mode 100644 (file)
index 0000000..ebe4a35
--- /dev/null
@@ -0,0 +1,29 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-mavx2" { target avx2_runtime } } */
+
+#include "tree-vect.h"
+
+int
+main ()
+{
+  unsigned int i = 0;
+  unsigned short b[158 * 2];
+
+  check_vect ();
+
+  for (i = 0; i < 158; i++)
+    {
+      b[i * 2] = i * 7;
+      b[i * 2 + 1] = i * 8;
+    }
+
+  for (i = 0; i < 158; ++i)
+    if (b[i*2] != (unsigned short)(i*7)
+        || b[i*2+1] != (unsigned short)(i*8))
+      abort ();
+
+  return 0;
+}
+
+/* The init loop should be vectorized with SLP.  */
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
index fcea28935bc6d0daac8f03ed0f82b04a41798f00..6fa185daa2836062814f9c9a6659011a3153c6a2 100644 (file)
@@ -7800,6 +7800,10 @@ vectorizable_induction (loop_vec_info loop_vinfo,
            = record_stmt_cost (cost_vec,
                                SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
                                vector_stmt, stmt_info, 0, vect_body);
+         /* prologue cost for vec_init (if not nested) and step.  */
+         prologue_cost = record_stmt_cost (cost_vec, 1 + !nested_in_vect_loop,
+                                           scalar_to_vec,
+                                           stmt_info, 0, vect_prologue);
        }
       else /* if (!slp_node) */
        {
@@ -7858,9 +7862,15 @@ vectorizable_induction (loop_vec_info loop_vinfo,
         cycles we have to reconstruct the step from SCEV data.  */
       unsigned group_size = SLP_TREE_LANES (slp_node);
       tree *steps = XALLOCAVEC (tree, group_size);
+      tree *inits = XALLOCAVEC (tree, group_size);
       stmt_vec_info phi_info;
       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, phi_info)
-       steps[i] = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
+       {
+         steps[i] = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
+         if (!init_node)
+           inits[i] = gimple_phi_arg_def (as_a<gphi *> (phi_info->stmt),
+                                          pe->dest_idx);
+       }
 
       /* Now generate the IVs.  */
       unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
@@ -7875,16 +7885,39 @@ vectorizable_induction (loop_vec_info loop_vinfo,
                                 ? build_real_from_wide (stept, lup_mul,
                                                         UNSIGNED)
                                 : build_int_cstu (stept, lup_mul));
+      tree peel_mul = NULL_TREE;
+      if (LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo))
+       {
+         stmts = NULL;
+         if (SCALAR_FLOAT_TYPE_P (stept))
+           peel_mul = gimple_build (&stmts, FLOAT_EXPR, stept,
+                                    LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
+         else
+           peel_mul = gimple_convert (&stmts, stept,
+                                      LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
+         peel_mul = gimple_build_vector_from_val (&stmts, step_vectype, peel_mul);
+         if (stmts)
+           {
+             new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
+             gcc_assert (!new_bb);
+           }
+       }
       unsigned ivn;
       auto_vec<tree> vec_steps;
       for (ivn = 0; ivn < nivs; ++ivn)
        {
-         tree_vector_builder elts (step_vectype, const_nunits, 1);
+         tree_vector_builder step_elts (step_vectype, const_nunits, 1);
+         tree_vector_builder init_elts (vectype, const_nunits, 1);
          tree_vector_builder mul_elts (step_vectype, const_nunits, 1);
          for (unsigned eltn = 0; eltn < const_nunits; ++eltn)
            {
              tree elt = steps[(ivn*const_nunits + eltn) % group_size];
-             elts.quick_push (elt);
+             step_elts.quick_push (elt);
+             if (!init_node)
+               {
+                 elt = inits[(ivn*const_nunits + eltn) % group_size];
+                 init_elts.quick_push (elt);
+               }
              unsigned mul_elt = (ivn*const_nunits + eltn) / group_size;
              mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept)
                                   ? build_real_from_wide (stept,
@@ -7892,10 +7925,15 @@ vectorizable_induction (loop_vec_info loop_vinfo,
                                   : build_int_cstu (stept, mul_elt));
            }
          stmts = NULL;
-         vec_step = gimple_build_vector (&stmts, &elts);
+         vec_step = gimple_build_vector (&stmts, &step_elts);
          vec_step = gimple_convert (&stmts, step_vectype, vec_step);
          vec_steps.safe_push (vec_step);
          tree step_mul = gimple_build_vector (&stmts, &mul_elts);
+         if (peel_mul)
+           gimple_build (&stmts, PLUS_EXPR, step_vectype,
+                         step_mul, peel_mul);
+         if (!init_node)
+           vec_init = gimple_build_vector (&stmts, &init_elts);
          if (stmts)
            {
              new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
@@ -7926,7 +7964,8 @@ vectorizable_induction (loop_vec_info loop_vinfo,
          add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
                       UNKNOWN_LOCATION);
 
-         vec_init = vect_get_slp_vect_def (init_node, ivn);
+         if (init_node)
+           vec_init = vect_get_slp_vect_def (init_node, ivn);
          if (!integer_zerop (step_mul))
            {
              stmts = NULL;
index 63a59c0c8ed388f24d782048fa0187283403ba59..e97fbe897a76008d50ee94c3b1b009344cc37d4a 100644 (file)
@@ -1444,9 +1444,13 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
        if (def_type == vect_induction_def)
          {
            /* Induction PHIs are not cycles but walk the initial
-              value.  */
+              value.  Only for inner loops through, for outer loops
+              we need to pick up the value from the actual PHIs
+              to more easily support peeling and epilogue vectorization.  */
            class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
-           if (nested_in_vect_loop_p (loop, stmt_info))
+           if (!nested_in_vect_loop_p (loop, stmt_info))
+             skip_args[loop_preheader_edge (loop)->dest_idx] = true;
+           else
              loop = loop->inner;
            skip_args[loop_latch_edge (loop)->dest_idx] = true;
          }