tree-vectorizer.h (vect_loop_vectorized_call): Declare.
authorRichard Biener <rguenther@suse.de>
Thu, 13 Jun 2019 10:10:05 +0000 (10:10 +0000)
committerRichard Biener <rguenth@gcc.gnu.org>
Thu, 13 Jun 2019 10:10:05 +0000 (10:10 +0000)
2019-06-13  Richard Biener  <rguenther@suse.de>

* tree-vectorizer.h (vect_loop_vectorized_call): Declare.
* tree-vectorizer.c (vect_loop_vectorized_call): Export and
also return the condition stmt.
* tree-vect-loop-manip.c (vect_loop_versioning): Compute outermost
loop we can version and version that, reusing the loop version
created by if-conversion instead of versioning again.

* gcc.dg/vect/vect-version-1.c: New testcase.
* gcc.dg/vect/vect-version-2.c: Likewise.

From-SVN: r272239

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/vect/vect-version-1.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-version-2.c [new file with mode: 0644]
gcc/tree-vect-loop-manip.c
gcc/tree-vectorizer.c
gcc/tree-vectorizer.h

index f97ee51b5232071b8a26e45bcd8af624ede2afa5..7fe06d26f49b49d0a1a59f902e762d89f4d7dce8 100644 (file)
@@ -1,3 +1,12 @@
+2019-06-13  Richard Biener  <rguenther@suse.de>
+
+       * tree-vectorizer.h (vect_loop_vectorized_call): Declare.
+       * tree-vectorizer.c (vect_loop_vectorized_call): Export and
+       also return the condition stmt.
+       * tree-vect-loop-manip.c (vect_loop_versioning): Compute outermost
+       loop we can version and version that, reusing the loop version
+       created by if-conversion instead of versioning again.
+
 2019-06-13  Aldy Hernandez  <aldyh@redhat.com>
 
        * gimple-loop-versioning.cc (prune_loop_conditions): Use
index 865408b76dd35bafd93af8dd3cc5da2a24076204..ac0290971eb6929319fc90681a4559433889be44 100644 (file)
@@ -1,3 +1,8 @@
+2019-06-13  Richard Biener  <rguenther@suse.de>
+
+       * gcc.dg/vect/vect-version-1.c: New testcase.
+       * gcc.dg/vect/vect-version-2.c: Likewise.
+
 2019-06-13  Paolo Carlini  <paolo.carlini@oracle.com>
 
        * g++.dg/diagnostic/variably-modified-type-1.C: New.
diff --git a/gcc/testsuite/gcc.dg/vect/vect-version-1.c b/gcc/testsuite/gcc.dg/vect/vect-version-1.c
new file mode 100644 (file)
index 0000000..4540a11
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+/* { dg-require-effective-target vect_condition } */
+
+void foo (double *x, double *y, int m, int n, int o, int p)
+{
+  for (int i = 0; i < m; ++i)
+    for (int j = 0; j < n; ++j)
+      for (int k = 0; k < o; ++k)
+       for (int l = 0; l < p; ++l)
+         {
+           double tem = x[l] + y[l];
+           if (tem != 0.)
+             y[l] = x[l];
+           else
+             y[l] = 0.;
+         }
+}
+
+/* { dg-final { scan-tree-dump "applying loop versioning to outer loop 1" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-version-2.c b/gcc/testsuite/gcc.dg/vect/vect-version-2.c
new file mode 100644 (file)
index 0000000..0ea39e3
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+/* { dg-require-effective-target vect_condition } */
+
+void foo (double *x, double *y, int m, int n, int o, int p)
+{
+  for (int i = 0; i < m; ++i)
+    for (int j = 0; j < n; ++j)
+      for (int k = 0; k < o; ++k)
+       for (int l = 0; l < k; ++l)
+         {
+           double tem = x[l] + y[l];
+           if (tem != 0.)
+             y[l] = x[l];
+           else
+             y[l] = 0.;
+         }
+}
+
+/* { dg-final { scan-tree-dump "reusing loop version created by if conversion" "vect" } } */
index a0a1bee9408e4f4483ecea6d4a17c4677c5042e5..ee0d92a625a5e6c8289126354a525c2b16aa65e5 100644 (file)
@@ -3016,7 +3016,8 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
     vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
 
   if (cond_expr)
-    cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
+    cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
+                                       &cond_expr_stmt_list,
                                        is_gimple_condexpr, NULL_TREE);
 
   if (version_align)
@@ -3060,45 +3061,136 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
                                      is_gimple_condexpr, NULL_TREE);
   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
 
-  initialize_original_copy_tables ();
-  if (scalar_loop)
+  /* Compute the outermost loop cond_expr and cond_expr_stmt_list are
+     invariant in.  */
+  struct loop *outermost = outermost_invariant_loop_for_expr (loop, cond_expr);
+  for (gimple_stmt_iterator gsi = gsi_start (cond_expr_stmt_list);
+       !gsi_end_p (gsi); gsi_next (&gsi))
     {
-      edge scalar_e;
-      basic_block preheader, scalar_preheader;
+      gimple *stmt = gsi_stmt (gsi);
+      update_stmt (stmt);
+      ssa_op_iter iter;
+      use_operand_p use_p;
+      basic_block def_bb;
+      FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
+       if ((def_bb = gimple_bb (SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p))))
+           && flow_bb_inside_loop_p (outermost, def_bb))
+         outermost = superloop_at_depth (loop, bb_loop_depth (def_bb) + 1);
+    }
 
-      /* We don't want to scale SCALAR_LOOP's frequencies, we need to
-        scale LOOP's frequencies instead.  */
-      nloop = loop_version (scalar_loop, cond_expr, &condition_bb,
-                           prob, prob.invert (), prob, prob.invert (), true);
-      scale_loop_frequencies (loop, prob);
-      /* CONDITION_BB was created above SCALAR_LOOP's preheader,
-        while we need to move it above LOOP's preheader.  */
-      e = loop_preheader_edge (loop);
-      scalar_e = loop_preheader_edge (scalar_loop);
-      /* The vector loop preheader might not be empty, since new
-        invariants could have been created while analyzing the loop.  */
-      gcc_assert (single_pred_p (e->src));
-      gcc_assert (empty_block_p (scalar_e->src)
-                 && single_pred_p (scalar_e->src));
-      gcc_assert (single_pred_p (condition_bb));
-      preheader = e->src;
-      scalar_preheader = scalar_e->src;
-      scalar_e = find_edge (condition_bb, scalar_preheader);
-      e = single_pred_edge (preheader);
-      redirect_edge_and_branch_force (single_pred_edge (condition_bb),
-                                     scalar_preheader);
-      redirect_edge_and_branch_force (scalar_e, preheader);
-      redirect_edge_and_branch_force (e, condition_bb);
-      set_immediate_dominator (CDI_DOMINATORS, condition_bb,
-                              single_pred (condition_bb));
-      set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
-                              single_pred (scalar_preheader));
-      set_immediate_dominator (CDI_DOMINATORS, preheader,
-                              condition_bb);
+  /* Search for the outermost loop we can version.  Avoid versioning of
+     non-perfect nests but allow if-conversion versioned loops inside.  */
+  struct loop *loop_to_version = loop;
+  if (flow_loop_nested_p (outermost, loop))
+    { 
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_NOTE, vect_location,
+                        "trying to apply versioning to outer loop %d\n",
+                        outermost->num);
+      if (outermost->num == 0)
+       outermost = superloop_at_depth (loop, 1);
+      /* And avoid applying versioning on non-perfect nests.  */
+      while (loop_to_version != outermost
+            && single_exit (loop_outer (loop_to_version))
+            && (!loop_outer (loop_to_version)->inner->next
+                || vect_loop_vectorized_call (loop_to_version))
+            && (!loop_outer (loop_to_version)->inner->next
+                || !loop_outer (loop_to_version)->inner->next->next))
+       loop_to_version = loop_outer (loop_to_version);
+    }
+
+  /* Apply versioning.  If there is already a scalar version created by
+     if-conversion re-use that.  */
+  gcond *cond;
+  if (gimple *call = vect_loop_vectorized_call (loop_to_version, &cond))
+    {
+      gcc_assert (scalar_loop);
+      condition_bb = gimple_bb (cond);
+      gimple_cond_set_condition_from_tree (cond, cond_expr);
+      update_stmt (cond);
+
+      if (cond_expr_stmt_list)
+       {
+         cond_exp_gsi = gsi_for_stmt (call);
+         gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
+                                GSI_SAME_STMT);
+       }
+
+      /* ???  if-conversion uses profile_probability::always () but
+         prob below is profile_probability::likely ().  */
+      nloop = scalar_loop;
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_NOTE, vect_location,
+                        "reusing %sloop version created by if conversion\n",
+                        loop_to_version != loop ? "outer " : "");
     }
   else
-    nloop = loop_version (loop, cond_expr, &condition_bb,
-                         prob, prob.invert (), prob, prob.invert (), true);
+    {
+      if (loop_to_version != loop
+         && dump_enabled_p ())
+       dump_printf_loc (MSG_NOTE, vect_location,
+                        "applying loop versioning to outer loop %d\n",
+                        loop_to_version->num);
+
+      initialize_original_copy_tables ();
+      nloop = loop_version (loop_to_version, cond_expr, &condition_bb,
+                           prob, prob.invert (), prob, prob.invert (), true);
+      gcc_assert (nloop);
+      nloop = get_loop_copy (loop);
+
+      /* Kill off IFN_LOOP_VECTORIZED_CALL in the copy, nobody will
+         reap those otherwise;  they also refer to the original
+        loops.  */
+      struct loop *l = loop;
+      while (gimple *call = vect_loop_vectorized_call (l))
+       {
+         call = SSA_NAME_DEF_STMT (get_current_def (gimple_call_lhs (call)));
+         fold_loop_internal_call (call, boolean_false_node);
+         l = loop_outer (l);
+       }
+      free_original_copy_tables ();
+
+      if (cond_expr_stmt_list)
+       {
+         cond_exp_gsi = gsi_last_bb (condition_bb);
+         gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
+                                GSI_SAME_STMT);
+       }
+
+      /* Loop versioning violates an assumption we try to maintain during
+        vectorization - that the loop exit block has a single predecessor.
+        After versioning, the exit block of both loop versions is the same
+        basic block (i.e. it has two predecessors). Just in order to simplify
+        following transformations in the vectorizer, we fix this situation
+        here by adding a new (empty) block on the exit-edge of the loop,
+        with the proper loop-exit phis to maintain loop-closed-form.
+        If loop versioning wasn't done from loop, but scalar_loop instead,
+        merge_bb will have already just a single successor.  */
+
+      merge_bb = single_exit (loop_to_version)->dest;
+      if (EDGE_COUNT (merge_bb->preds) >= 2)
+       {
+         gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
+         new_exit_bb = split_edge (single_exit (loop_to_version));
+         new_exit_e = single_exit (loop_to_version);
+         e = EDGE_SUCC (new_exit_bb, 0);
+
+         for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi);
+              gsi_next (&gsi))
+           {
+             tree new_res;
+             orig_phi = gsi.phi ();
+             new_res = copy_ssa_name (PHI_RESULT (orig_phi));
+             new_phi = create_phi_node (new_res, new_exit_bb);
+             arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
+             add_phi_arg (new_phi, arg, new_exit_e,
+                          gimple_phi_arg_location_from_edge (orig_phi, e));
+             adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
+           }
+       }
+
+      update_ssa (TODO_update_ssa);
+    }
 
   if (version_niter)
     {
@@ -3125,48 +3217,6 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
                         "alignment\n");
 
     }
-  free_original_copy_tables ();
-
-  /* Loop versioning violates an assumption we try to maintain during
-     vectorization - that the loop exit block has a single predecessor.
-     After versioning, the exit block of both loop versions is the same
-     basic block (i.e. it has two predecessors). Just in order to simplify
-     following transformations in the vectorizer, we fix this situation
-     here by adding a new (empty) block on the exit-edge of the loop,
-     with the proper loop-exit phis to maintain loop-closed-form.
-     If loop versioning wasn't done from loop, but scalar_loop instead,
-     merge_bb will have already just a single successor.  */
-
-  merge_bb = single_exit (loop)->dest;
-  if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
-    {
-      gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
-      new_exit_bb = split_edge (single_exit (loop));
-      new_exit_e = single_exit (loop);
-      e = EDGE_SUCC (new_exit_bb, 0);
-
-      for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
-       {
-         tree new_res;
-         orig_phi = gsi.phi ();
-         new_res = copy_ssa_name (PHI_RESULT (orig_phi));
-         new_phi = create_phi_node (new_res, new_exit_bb);
-         arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
-         add_phi_arg (new_phi, arg, new_exit_e,
-                      gimple_phi_arg_location_from_edge (orig_phi, e));
-         adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
-       }
-    }
-
-  /* End loop-exit-fixes after versioning.  */
-
-  if (cond_expr_stmt_list)
-    {
-      cond_exp_gsi = gsi_last_bb (condition_bb);
-      gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
-                            GSI_SAME_STMT);
-    }
-  update_ssa (TODO_update_ssa);
 
   return nloop;
 }
index 4f6c65faf640acfb79ce40d8b75fff041566c396..325ef58722d21a65ab896a9358677b07111b060b 100644 (file)
@@ -727,8 +727,8 @@ vect_free_loop_info_assumptions (struct loop *loop)
 /* If LOOP has been versioned during ifcvt, return the internal call
    guarding it.  */
 
-static gimple *
-vect_loop_vectorized_call (struct loop *loop)
+gimple *
+vect_loop_vectorized_call (struct loop *loop, gcond **cond)
 {
   basic_block bb = loop_preheader_edge (loop)->src;
   gimple *g;
@@ -744,6 +744,8 @@ vect_loop_vectorized_call (struct loop *loop)
   while (1);
   if (g && gimple_code (g) == GIMPLE_COND)
     {
+      if (cond)
+       *cond = as_a <gcond *> (g);
       gimple_stmt_iterator gsi = gsi_for_stmt (g);
       gsi_prev (&gsi);
       if (!gsi_end_p (gsi))
index eb0f21f84fb4d0dca777e6724e8795227c2633f5..19a8af090a64f0f9936e19f3b17203366f42f989 100644 (file)
@@ -1656,5 +1656,7 @@ void vect_pattern_recog (vec_info *);
 /* In tree-vectorizer.c.  */
 unsigned vectorize_loops (void);
 void vect_free_loop_info_assumptions (struct loop *);
+gimple *vect_loop_vectorized_call (struct loop *, gcond **cond = NULL);
+
 
 #endif  /* GCC_TREE_VECTORIZER_H  */