emit SLP vectorized loads earlier
authorRichard Biener <rguenther@suse.de>
Tue, 23 Jun 2020 12:47:47 +0000 (14:47 +0200)
committerRichard Biener <rguenther@suse.de>
Wed, 24 Jun 2020 11:00:02 +0000 (13:00 +0200)
This makes sure to emit SLP vectorized loads where the first scalar
load is.  This makes SLP dependence checking more powerful because
hoisting loads can use TBAA and it increases the freedom for
vector placement when there are constraints from live lanes.

Vectorized shifts block inserting vectorized stmts always after
vectorized defs because it ends up using the original scalar
operand even when the SLP graph indicates the shift operand
is vectorized (and we actually emit and cost those stmts).

vect_slp_analyze_and_verify_node_alignment shows we need alignment
for too many places, this is a temporary solution and my plan
is to have a single meta-info for a dataref group instead
(also getting rid of DR_GROUP_FIRST/NEXT_ELEMENT).

2020-06-24  Richard Biener  <rguenther@suse.de>

* tree-vectorizer.h (vect_find_first_scalar_stmt_in_slp):
Declare.
* tree-vect-data-refs.c (vect_preserves_scalar_order_p):
Simplify for new position of vectorized SLP loads.
(vect_slp_analyze_node_dependences): Adjust for it.
(vect_slp_analyze_and_verify_node_alignment): Compute alignment
for the first stmts dataref.
* tree-vect-slp.c (vect_find_first_scalar_stmt_in_slp): New.
(vect_schedule_slp_instance): Emit loads before the
first scalar stmt.
* tree-vect-stmts.c (vectorizable_load): Do what the comment
says and use vect_find_first_scalar_stmt_in_slp.

gcc/tree-vect-data-refs.c
gcc/tree-vect-slp.c
gcc/tree-vect-stmts.c
gcc/tree-vectorizer.h

index 2365a3925bb815fc864822456811d561d62d3bb7..eb8288e7a851b6388172725e7aaf08b3e0b41a5c 100644 (file)
@@ -232,61 +232,43 @@ vect_preserves_scalar_order_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b)
       && !STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
     return true;
 
-  /* STMT_A and STMT_B belong to overlapping groups.  All loads in a
-     SLP group are emitted at the position of the last scalar load and
-     all loads in an interleaving group are emitted at the position
-     of the first scalar load.
+  /* STMT_A and STMT_B belong to overlapping groups.  All loads are
+     emitted at the position of the first scalar load.
      Stores in a group are emitted at the position of the last scalar store.
      Compute that position and check whether the resulting order matches
-     the current one.
-     We have not yet decided between SLP and interleaving so we have
-     to conservatively assume both.  */
-  stmt_vec_info il_a;
-  stmt_vec_info last_a = il_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a);
-  if (last_a)
+     the current one.  */
+  stmt_vec_info il_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a);
+  if (il_a)
     {
-      for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_a); s;
-          s = DR_GROUP_NEXT_ELEMENT (s))
-       last_a = get_later_stmt (last_a, s);
-      if (!DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_a)))
-       {
-         for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
-              s = DR_GROUP_NEXT_ELEMENT (s))
-           if (get_later_stmt (il_a, s) == il_a)
-             il_a = s;
-       }
-      else
-       il_a = last_a;
+      if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_a)))
+       for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
+            s = DR_GROUP_NEXT_ELEMENT (s))
+         il_a = get_later_stmt (il_a, s);
+      else /* DR_IS_READ */
+       for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
+            s = DR_GROUP_NEXT_ELEMENT (s))
+         if (get_later_stmt (il_a, s) == il_a)
+           il_a = s;
     }
   else
-    last_a = il_a = stmtinfo_a;
-  stmt_vec_info il_b;
-  stmt_vec_info last_b = il_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b);
-  if (last_b)
+    il_a = stmtinfo_a;
+  stmt_vec_info il_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b);
+  if (il_b)
     {
-      for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_b); s;
-          s = DR_GROUP_NEXT_ELEMENT (s))
-       last_b = get_later_stmt (last_b, s);
-      if (!DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_b)))
-       {
-         for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
-              s = DR_GROUP_NEXT_ELEMENT (s))
-           if (get_later_stmt (il_b, s) == il_b)
-             il_b = s;
-       }
-      else
-       il_b = last_b;
+      if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_b)))
+       for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
+            s = DR_GROUP_NEXT_ELEMENT (s))
+         il_b = get_later_stmt (il_b, s);
+      else /* DR_IS_READ */
+       for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
+            s = DR_GROUP_NEXT_ELEMENT (s))
+         if (get_later_stmt (il_b, s) == il_b)
+           il_b = s;
     }
   else
-    last_b = il_b = stmtinfo_b;
+    il_b = stmtinfo_b;
   bool a_after_b = (get_later_stmt (stmtinfo_a, stmtinfo_b) == stmtinfo_a);
-  return (/* SLP */
-         (get_later_stmt (last_a, last_b) == last_a) == a_after_b
-         /* Interleaving */
-         && (get_later_stmt (il_a, il_b) == il_a) == a_after_b
-         /* Mixed */
-         && (get_later_stmt (il_a, last_b) == il_a) == a_after_b
-         && (get_later_stmt (last_a, il_b) == last_a) == a_after_b);
+  return (get_later_stmt (il_a, il_b) == il_a) == a_after_b;
 }
 
 /* A subroutine of vect_analyze_data_ref_dependence.  Handle
@@ -702,71 +684,144 @@ vect_slp_analyze_node_dependences (vec_info *vinfo, slp_tree node,
   /* This walks over all stmts involved in the SLP load/store done
      in NODE verifying we can sink them up to the last stmt in the
      group.  */
-  stmt_vec_info last_access_info = vect_find_last_scalar_stmt_in_slp (node);
-  for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
+  if (DR_IS_WRITE (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (node))))
     {
-      stmt_vec_info access_info = SLP_TREE_SCALAR_STMTS (node)[k];
-      if (access_info == last_access_info)
-       continue;
-      data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
-      ao_ref ref;
-      bool ref_initialized_p = false;
-      for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
-          gsi_stmt (gsi) != last_access_info->stmt; gsi_next (&gsi))
+      stmt_vec_info last_access_info = vect_find_last_scalar_stmt_in_slp (node);
+      for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
        {
-         gimple *stmt = gsi_stmt (gsi);
-         if (! gimple_vuse (stmt)
-             || (DR_IS_READ (dr_a) && ! gimple_vdef (stmt)))
+         stmt_vec_info access_info = SLP_TREE_SCALAR_STMTS (node)[k];
+         if (access_info == last_access_info)
            continue;
-
-         /* If we couldn't record a (single) data reference for this
-            stmt we have to resort to the alias oracle.  */
-         stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
-         data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
-         if (!dr_b)
+         data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
+         ao_ref ref;
+         bool ref_initialized_p = false;
+         for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
+              gsi_stmt (gsi) != last_access_info->stmt; gsi_next (&gsi))
            {
-             /* We are moving a store or sinking a load - this means
-                we cannot use TBAA for disambiguation.  */
-             if (!ref_initialized_p)
-               ao_ref_init (&ref, DR_REF (dr_a));
-             if (stmt_may_clobber_ref_p_1 (stmt, &ref, false)
-                 || ref_maybe_used_by_stmt_p (stmt, &ref, false))
+             gimple *stmt = gsi_stmt (gsi);
+             if (! gimple_vuse (stmt))
+               continue;
+
+             /* If we couldn't record a (single) data reference for this
+                stmt we have to resort to the alias oracle.  */
+             stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
+             data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
+             if (!dr_b)
+               {
+                 /* We are moving a store - this means
+                    we cannot use TBAA for disambiguation.  */
+                 if (!ref_initialized_p)
+                   ao_ref_init (&ref, DR_REF (dr_a));
+                 if (stmt_may_clobber_ref_p_1 (stmt, &ref, false)
+                     || ref_maybe_used_by_stmt_p (stmt, &ref, false))
+                   return false;
+                 continue;
+               }
+
+             bool dependent = false;
+             /* If we run into a store of this same instance (we've just
+                marked those) then delay dependence checking until we run
+                into the last store because this is where it will have
+                been sunk to (and we verify if we can do that as well).  */
+             if (gimple_visited_p (stmt))
+               {
+                 if (stmt_info != last_store_info)
+                   continue;
+                 unsigned i;
+                 stmt_vec_info store_info;
+                 FOR_EACH_VEC_ELT (stores, i, store_info)
+                   {
+                     data_reference *store_dr
+                       = STMT_VINFO_DATA_REF (store_info);
+                     ddr_p ddr = initialize_data_dependence_relation
+                                   (dr_a, store_dr, vNULL);
+                     dependent
+                       = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
+                     free_dependence_relation (ddr);
+                     if (dependent)
+                       break;
+                   }
+               }
+             else
+               {
+                 ddr_p ddr = initialize_data_dependence_relation (dr_a,
+                                                                  dr_b, vNULL);
+                 dependent = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
+                 free_dependence_relation (ddr);
+               }
+             if (dependent)
                return false;
-             continue;
            }
-
-         bool dependent = false;
-         /* If we run into a store of this same instance (we've just
-            marked those) then delay dependence checking until we run
-            into the last store because this is where it will have
-            been sunk to (and we verify if we can do that as well).  */
-         if (gimple_visited_p (stmt))
+       }
+    }
+  else /* DR_IS_READ */
+    {
+      stmt_vec_info first_access_info
+       = vect_find_first_scalar_stmt_in_slp (node);
+      for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
+       {
+         stmt_vec_info access_info = SLP_TREE_SCALAR_STMTS (node)[k];
+         if (access_info == first_access_info)
+           continue;
+         data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
+         ao_ref ref;
+         bool ref_initialized_p = false;
+         for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
+              gsi_stmt (gsi) != first_access_info->stmt; gsi_prev (&gsi))
            {
-             if (stmt_info != last_store_info)
+             gimple *stmt = gsi_stmt (gsi);
+             if (! gimple_vdef (stmt))
                continue;
-             unsigned i;
-             stmt_vec_info store_info;
-             FOR_EACH_VEC_ELT (stores, i, store_info)
+
+             /* If we couldn't record a (single) data reference for this
+                stmt we have to resort to the alias oracle.  */
+             stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
+             data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
+             if (!dr_b)
+               {
+                 /* We are hoisting a load - this means we can use
+                    TBAA for disambiguation.  */
+                 if (!ref_initialized_p)
+                   ao_ref_init (&ref, DR_REF (dr_a));
+                 if (stmt_may_clobber_ref_p_1 (stmt, &ref, true))
+                   return false;
+                 continue;
+               }
+
+             bool dependent = false;
+             /* If we run into a store of this same instance (we've just
+                marked those) then delay dependence checking until we run
+                into the last store because this is where it will have
+                been sunk to (and we verify if we can do that as well).  */
+             if (gimple_visited_p (stmt))
+               {
+                 if (stmt_info != last_store_info)
+                   continue;
+                 unsigned i;
+                 stmt_vec_info store_info;
+                 FOR_EACH_VEC_ELT (stores, i, store_info)
+                   {
+                     data_reference *store_dr
+                       = STMT_VINFO_DATA_REF (store_info);
+                     ddr_p ddr = initialize_data_dependence_relation
+                                   (dr_a, store_dr, vNULL);
+                     dependent
+                       = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
+                     free_dependence_relation (ddr);
+                     if (dependent)
+                       break;
+                   }
+               }
+             else
                {
-                 data_reference *store_dr = STMT_VINFO_DATA_REF (store_info);
-                 ddr_p ddr = initialize_data_dependence_relation
-                               (dr_a, store_dr, vNULL);
-                 dependent
-                   = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
+                 ddr_p ddr = initialize_data_dependence_relation (dr_a,
+                                                                  dr_b, vNULL);
+                 dependent = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
                  free_dependence_relation (ddr);
-                 if (dependent)
-                   break;
                }
+             if (dependent)
+               return false;
            }
-         else
-           {
-             ddr_p ddr = initialize_data_dependence_relation (dr_a,
-                                                              dr_b, vNULL);
-             dependent = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
-             free_dependence_relation (ddr);
-           }
-         if (dependent)
-           return false;
        }
     }
   return true;
@@ -2399,10 +2454,20 @@ vect_slp_analyze_and_verify_node_alignment (vec_info *vinfo, slp_tree node)
 
   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
   vect_compute_data_ref_alignment (vinfo, dr_info);
-  /* For creating the data-ref pointer we need alignment of the
-     first element anyway.  */
+  /* In several places we need alignment of the first element anyway.  */
   if (dr_info != first_dr_info)
     vect_compute_data_ref_alignment (vinfo, first_dr_info);
+
+  /* For creating the data-ref pointer we need alignment of the
+     first element as well.  */
+  first_stmt_info = vect_find_first_scalar_stmt_in_slp (node);
+  if (first_stmt_info != SLP_TREE_SCALAR_STMTS (node)[0])
+    {
+      first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
+      if (dr_info != first_dr_info)
+       vect_compute_data_ref_alignment (vinfo, first_dr_info);
+    }
+
   if (! verify_data_ref_alignment (vinfo, dr_info))
     {
       if (dump_enabled_p ())
index 4031db4a9c6f91cf89849f2e29e8d08750cbeeca..e7a260877a9172e4ac8960e440aa13e1a1d75a55 100644 (file)
@@ -1915,6 +1915,25 @@ vect_find_last_scalar_stmt_in_slp (slp_tree node)
   return last;
 }
 
+/* Find the first stmt in NODE.  */
+
+stmt_vec_info
+vect_find_first_scalar_stmt_in_slp (slp_tree node)
+{
+  stmt_vec_info first = NULL;
+  stmt_vec_info stmt_vinfo;
+
+  for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
+    {
+      stmt_vinfo = vect_orig_stmt (stmt_vinfo);
+      if (!first
+         || get_later_stmt (stmt_vinfo, first) == first)
+       first = stmt_vinfo;
+    }
+
+  return first;
+}
+
 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
    two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
    (also containing the first GROUP1_SIZE stmts, since stores are
@@ -4205,15 +4224,33 @@ vect_schedule_slp_instance (vec_info *vinfo,
                     "------>vectorizing SLP node starting from: %G",
                     stmt_info->stmt);
 
-  /* Vectorized stmts go before the last scalar stmt which is where
-     all uses are ready.  */
-  stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
-  if (last_stmt_info)
-    si = gsi_for_stmt (last_stmt_info->stmt);
+  if (STMT_VINFO_DATA_REF (stmt_info)
+      && SLP_TREE_CODE (node) != VEC_PERM_EXPR)
+    {
+      /* Vectorized loads go before the first scalar load to make it
+        ready early, vectorized stores go before the last scalar
+        stmt which is where all uses are ready.  */
+      stmt_vec_info last_stmt_info = NULL;
+      if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
+       last_stmt_info = vect_find_first_scalar_stmt_in_slp (node);
+      else /* DR_IS_WRITE */
+       last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
+      si = gsi_for_stmt (last_stmt_info->stmt);
+    }
+  else if (SLP_TREE_CHILDREN (node).is_empty ())
+    /* This happens for reduction PHIs.  */
+    si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node)->stmt);
+  else if (stmt_vec_info first_stmt_info
+            = vect_find_last_scalar_stmt_in_slp (node))
+    /* ???  Shifts by scalars hit us here again, we end up vectorizing
+       the shift operand but end up using the scalar operand anyway.
+       This needs to be better reflected in the SLP tree.  For now
+       use the last position if available.  */
+    si = gsi_for_stmt (first_stmt_info->stmt);
   else
     {
-      /* Or if we do not have 1:1 matching scalar stmts emit after the
-        children vectorized defs.  */
+      /* Emit other stmts after the children vectorized defs which is
+        earliest possible.  */
       gimple *last_stmt = NULL;
       FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
        if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
index 23799f0ac187a65d548e6539bd8ab67e0245a8c6..de7d77f3872b33c5621830fd2738f8d3925a1a4a 100644 (file)
@@ -8728,7 +8728,8 @@ vectorizable_load (vec_info *vinfo,
       /* For BB vectorization always use the first stmt to base
         the data ref pointer on.  */
       if (bb_vinfo)
-       first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
+       first_stmt_info_for_drptr
+         = vect_find_first_scalar_stmt_in_slp (slp_node);
 
       /* Check if the chain of loads is already vectorized.  */
       if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
index 32feec3e24b45c821d81beb70ed150deefcf4744..e4d132493ca24c9babd89aedb3156c456218659a 100644 (file)
@@ -2001,6 +2001,7 @@ extern void vect_get_slp_defs (vec_info *, slp_tree, vec<vec<tree> > *,
                               unsigned n = -1U);
 extern bool vect_slp_bb (basic_block);
 extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree);
+extern stmt_vec_info vect_find_first_scalar_stmt_in_slp (slp_tree);
 extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info);
 extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, tree,
                                            unsigned int * = NULL,