Remove SLP_INSTANCE_GROUP_SIZE
authorRichard Biener <rguenther@suse.de>
Fri, 21 Feb 2020 09:40:25 +0000 (10:40 +0100)
committerRichard Biener <rguenther@suse.de>
Wed, 13 May 2020 13:26:45 +0000 (15:26 +0200)
This removes the SLP_INSTANCE_GROUP_SIZE member since the number of
lanes throughout a SLP subgraph is not necessarily constant.

2020-05-13  Richard Biener  <rguenther@suse.de>

* tree-vectorizer.h (SLP_INSTANCE_GROUP_SIZE): Remove.
(_slp_instance::group_size): Likewise.
* tree-vect-loop.c (vectorizable_reduction): The group size
is the number of lanes in the node.
* tree-vect-slp.c (vect_attempt_slp_rearrange_stmts): Likewise.
(vect_analyze_slp_instance): Do not set SLP_INSTANCE_GROUP_SIZE,
verify it matches the instance trees number of lanes.
(vect_slp_analyze_node_operations_1): Use the numer of lanes
in the node as group size.
(vect_bb_vectorization_profitable_p): Use the instance root
number of lanes for the size of life.
(vect_schedule_slp_instance): Use the number of lanes as
group_size.
* tree-vect-stmts.c (vectorizable_load): Remove SLP instance
parameter.  Use the number of lanes of the load for the group
size in the gap adjustment code.
(vect_analyze_stmt): Adjust.
(vect_transform_stmt): Likewise.

gcc/ChangeLog
gcc/tree-vect-loop.c
gcc/tree-vect-slp.c
gcc/tree-vect-stmts.c
gcc/tree-vectorizer.h

index 7fe1c6c93ee6276075c90ee4912ac0ff082d6590..ed6e1025efd5106edeb496569c45d1d9148df4bf 100644 (file)
@@ -1,3 +1,24 @@
+2020-05-13  Richard Biener  <rguenther@suse.de>
+
+       * tree-vectorizer.h (SLP_INSTANCE_GROUP_SIZE): Remove.
+       (_slp_instance::group_size): Likewise.
+       * tree-vect-loop.c (vectorizable_reduction): The group size
+       is the number of lanes in the node.
+       * tree-vect-slp.c (vect_attempt_slp_rearrange_stmts): Likewise.
+       (vect_analyze_slp_instance): Do not set SLP_INSTANCE_GROUP_SIZE,
+       verify it matches the instance trees number of lanes.
+       (vect_slp_analyze_node_operations_1): Use the numer of lanes
+       in the node as group size.
+       (vect_bb_vectorization_profitable_p): Use the instance root
+       number of lanes for the size of life.
+       (vect_schedule_slp_instance): Use the number of lanes as
+       group_size.
+       * tree-vect-stmts.c (vectorizable_load): Remove SLP instance
+       parameter.  Use the number of lanes of the load for the group
+       size in the gap adjustment code.
+       (vect_analyze_stmt): Adjust.
+       (vect_transform_stmt): Likewise.
+
 2020-05-13  Jakub Jelinek  <jakub@redhat.com>
 
        PR debug/95080
index 180790abf421ceffe5af9a53f4847e37ba776b6f..a1f52dcc2ad8b792fcf8b79951a8b0ccad4f014d 100644 (file)
@@ -6574,7 +6574,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
         which each SLP statement has its own initial value and in which
         that value needs to be repeated for every instance of the
         statement within the initial vector.  */
-      unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
+      unsigned int group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
       if (!neutral_op
          && !can_duplicate_and_interleave_p (loop_vinfo, group_size,
                                              TREE_TYPE (vectype_out)))
index f9ad0821fa0e570e3fd6947f3350b389c49a21ec..6f623955ce5191edfe81efed519f9f8cf26b4fcb 100644 (file)
@@ -1810,7 +1810,6 @@ vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
 static bool
 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
 {
-  unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
   unsigned int i, j;
   unsigned int lidx;
   slp_tree node, load;
@@ -1821,14 +1820,16 @@ vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
   /* Compare all the permutation sequences to the first one.  We know
      that at least one load is permuted.  */
   node = SLP_INSTANCE_LOADS (slp_instn)[0];
-  if (!node->load_permutation.exists ())
+  if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
     return false;
+  unsigned int group_size = SLP_TREE_LOAD_PERMUTATION (node).length ();
   for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
     {
-      if (!load->load_permutation.exists ())
+      if (!SLP_TREE_LOAD_PERMUTATION (load).exists ()
+         || SLP_TREE_LOAD_PERMUTATION (load).length () != group_size)
        return false;
-      FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
-       if (lidx != node->load_permutation[j])
+      FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (load), j, lidx)
+       if (lidx != SLP_TREE_LOAD_PERMUTATION (node)[j])
          return false;
     }
 
@@ -2151,7 +2152,6 @@ vect_analyze_slp_instance (vec_info *vinfo,
          /* Create a new SLP instance.  */
          new_instance = XNEW (class _slp_instance);
          SLP_INSTANCE_TREE (new_instance) = node;
-         SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
          SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
          SLP_INSTANCE_LOADS (new_instance) = vNULL;
          SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL;
@@ -2240,6 +2240,12 @@ vect_analyze_slp_instance (vec_info *vinfo,
 
          vinfo->slp_instances.safe_push (new_instance);
 
+         /* ???  We've replaced the old SLP_INSTANCE_GROUP_SIZE with
+            the number of scalar stmts in the root in a few places.
+            Verify that assumption holds.  */
+         gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance))
+                       .length () == group_size);
+
          if (dump_enabled_p ())
            {
              dump_printf_loc (MSG_NOTE, vect_location,
@@ -2670,7 +2676,7 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
        vf = loop_vinfo->vectorization_factor;
       else
        vf = 1;
-      unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
+      unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length ();
       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
       SLP_TREE_NUMBER_OF_VEC_STMTS (node)
        = vect_get_num_vectors (vf * group_size, vectype);
@@ -2956,7 +2962,8 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
   FOR_EACH_VEC_ELT (slp_instances, i, instance)
     {
       auto_vec<bool, 20> life;
-      life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
+      life.safe_grow_cleared
+       (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance)).length ());
       vect_bb_slp_scalar_cost (bb_vinfo,
                               SLP_INSTANCE_TREE (instance),
                               &life, &scalar_costs, visited);
@@ -3791,8 +3798,7 @@ vect_get_slp_defs (vec_info *vinfo,
 
 /* Generate vector permute statements from a list of loads in DR_CHAIN.
    If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
-   permute statements for the SLP node NODE of the SLP instance
-   SLP_NODE_INSTANCE.  */
+   permute statements for the SLP node NODE.  */
 
 bool
 vect_transform_slp_perm_load (vec_info *vinfo,
@@ -4040,7 +4046,7 @@ vect_schedule_slp_instance (vec_info *vinfo,
   /* VECTYPE is the type of the destination.  */
   vectype = STMT_VINFO_VECTYPE (stmt_info);
   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
-  group_size = SLP_INSTANCE_GROUP_SIZE (instance);
+  group_size = SLP_TREE_SCALAR_STMTS (node).length ();
 
   gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
   SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
index 9a715b82f1acb54e6df768fb3c94c06973e3e2d2..61b1ac3531807e0c622d7aba7dfd3f074c8835e7 100644 (file)
@@ -8606,7 +8606,6 @@ static bool
 vectorizable_load (vec_info *vinfo,
                   stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                   stmt_vec_info *vec_stmt, slp_tree slp_node,
-                  slp_instance slp_node_instance,
                   stmt_vector_for_cost *cost_vec)
 {
   tree scalar_dest;
@@ -9221,8 +9220,9 @@ vectorizable_load (vec_info *vinfo,
             unpermuted sequence.  In other cases we need to load the
             whole group, not only the number of vector stmts the
             permutation result fits in.  */
+         unsigned scalar_lanes = SLP_TREE_SCALAR_STMTS (slp_node).length ();
          if (slp_perm
-             && (group_size != SLP_INSTANCE_GROUP_SIZE (slp_node_instance)
+             && (group_size != scalar_lanes 
                  || !multiple_p (nunits, group_size)))
            {
              /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
@@ -9236,7 +9236,7 @@ vectorizable_load (vec_info *vinfo,
            {
              vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
              group_gap_adj
-               = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
+               = group_size - scalar_lanes;
            }
        }
       else
@@ -11062,8 +11062,7 @@ vect_analyze_stmt (vec_info *vinfo,
                                     NULL, NULL, node, cost_vec)
          || vectorizable_assignment (vinfo, stmt_info,
                                      NULL, NULL, node, cost_vec)
-         || vectorizable_load (vinfo, stmt_info,
-                               NULL, NULL, node, node_instance, cost_vec)
+         || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
          || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
          || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
                                     node, node_instance, cost_vec)
@@ -11091,7 +11090,7 @@ vect_analyze_stmt (vec_info *vinfo,
              || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
                                          cost_vec)
              || vectorizable_load (vinfo, stmt_info,
-                                   NULL, NULL, node, node_instance, cost_vec)
+                                   NULL, NULL, node, cost_vec)
              || vectorizable_store (vinfo, stmt_info,
                                     NULL, NULL, node, cost_vec)
              || vectorizable_condition (vinfo, stmt_info,
@@ -11182,7 +11181,7 @@ vect_transform_stmt (vec_info *vinfo,
 
     case load_vec_info_type:
       done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
-                                slp_node_instance, NULL);
+                               NULL);
       gcc_assert (done);
       break;
 
index aa8bd33b9d150e2bca791e6253c41b485288ed15..20784251b6fbfc927a55273a8092eef5f7502a42 100644 (file)
@@ -119,13 +119,16 @@ typedef struct _slp_tree *slp_tree;
 struct _slp_tree {
   /* Nodes that contain def-stmts of this node statements operands.  */
   vec<slp_tree> children;
+
   /* A group of scalar stmts to be vectorized together.  */
   vec<stmt_vec_info> stmts;
   /* A group of scalar operands to be vectorized together.  */
   vec<tree> ops;
+
   /* Load permutation relative to the stores, NULL if there is no
      permutation.  */
   vec<unsigned> load_permutation;
+
   /* Vectorized stmt/s.  */
   vec<stmt_vec_info> vec_stmts;
   /* Number of vector stmts that are created to replace the group of scalar
@@ -133,6 +136,7 @@ struct _slp_tree {
      scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF
      divided by vector size.  */
   unsigned int vec_stmts_size;
+
   /* Reference count in the SLP graph.  */
   unsigned int refcnt;
   /* The maximum number of vector elements for the subtree rooted
@@ -156,9 +160,6 @@ public:
      from, NULL otherwise.  */
   stmt_vec_info root_stmt;
 
-  /* Size of groups of scalar stmts that will be replaced by SIMD stmt/s.  */
-  unsigned int group_size;
-
   /* The unrolling factor required to vectorized this SLP instance.  */
   poly_uint64 unrolling_factor;
 
@@ -172,7 +173,6 @@ public:
 
 /* Access Functions.  */
 #define SLP_INSTANCE_TREE(S)                     (S)->root
-#define SLP_INSTANCE_GROUP_SIZE(S)               (S)->group_size
 #define SLP_INSTANCE_UNROLLING_FACTOR(S)         (S)->unrolling_factor
 #define SLP_INSTANCE_LOADS(S)                    (S)->loads
 #define SLP_INSTANCE_ROOT_STMT(S)                (S)->root_stmt