poly_int: vect_nunits_for_cost
authorRichard Sandiford <richard.sandiford@linaro.org>
Wed, 3 Jan 2018 07:14:24 +0000 (07:14 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Wed, 3 Jan 2018 07:14:24 +0000 (07:14 +0000)
This patch adds a function for getting the number of elements in
a vector for cost purposes, which is always constant.  It makes
it possible for a later patch to change GET_MODE_NUNITS and
TYPE_VECTOR_SUBPARTS to a poly_int.

2018-01-03  Richard Sandiford  <richard.sandiford@linaro.org>
    Alan Hayward  <alan.hayward@arm.com>
    David Sherwood  <david.sherwood@arm.com>

gcc/
* tree-vectorizer.h (vect_nunits_for_cost): New function.
* tree-vect-loop.c (vect_model_reduction_cost): Use it.
* tree-vect-slp.c (vect_analyze_slp_cost_1): Likewise.
(vect_analyze_slp_cost): Likewise.
* tree-vect-stmts.c (vect_model_store_cost): Likewise.
(vect_model_load_cost): Likewise.

Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r256128

gcc/ChangeLog
gcc/tree-vect-loop.c
gcc/tree-vect-slp.c
gcc/tree-vect-stmts.c
gcc/tree-vectorizer.h

index 45579c740bc324e6bed0bdd47621c10b11fb68a5..52a10ad2305a10629d0f0d25fb37cd5c389cd527 100644 (file)
@@ -1,3 +1,14 @@
+2018-01-03  Richard Sandiford  <richard.sandiford@linaro.org>
+           Alan Hayward  <alan.hayward@arm.com>
+           David Sherwood  <david.sherwood@arm.com>
+
+       * tree-vectorizer.h (vect_nunits_for_cost): New function.
+       * tree-vect-loop.c (vect_model_reduction_cost): Use it.
+       * tree-vect-slp.c (vect_analyze_slp_cost_1): Likewise.
+       (vect_analyze_slp_cost): Likewise.
+       * tree-vect-stmts.c (vect_model_store_cost): Likewise.
+       (vect_model_load_cost): Likewise.
+
 2018-01-03  Richard Sandiford  <richard.sandiford@linaro.org>
            Alan Hayward  <alan.hayward@arm.com>
            David Sherwood  <david.sherwood@arm.com>
index 4c5729796ef583b793763863e27d2d74760fce03..c58a08d8d383873d565ffdf12e88e7a20ec3d679 100644 (file)
@@ -3854,13 +3854,15 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn,
        }
       else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
        {
-         unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
+         unsigned estimated_nunits = vect_nunits_for_cost (vectype);
          /* Extraction of scalar elements.  */
-         epilogue_cost += add_stmt_cost (target_cost_data, 2 * nunits,
+         epilogue_cost += add_stmt_cost (target_cost_data,
+                                         2 * estimated_nunits,
                                          vec_to_scalar, stmt_info, 0,
                                          vect_epilogue);
          /* Scalar max reductions via COND_EXPR / MAX_EXPR.  */
-         epilogue_cost += add_stmt_cost (target_cost_data, 2 * nunits - 3,
+         epilogue_cost += add_stmt_cost (target_cost_data,
+                                         2 * estimated_nunits - 3,
                                          scalar_stmt, stmt_info, 0,
                                          vect_epilogue);
        }
index 26e7d652f44cea3562409b35ac2fe8659fa46faf..d41056932da79aa35999b676092956351fa9a0a9 100644 (file)
@@ -1730,8 +1730,8 @@ vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
                                            &n_perms);
              record_stmt_cost (body_cost_vec, n_perms, vec_perm,
                                stmt_info, 0, vect_body);
-             unsigned nunits
-               = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
+             unsigned assumed_nunits
+               = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info));
              /* And adjust the number of loads performed.  This handles
                 redundancies as well as loads that are later dead.  */
              auto_sbitmap perm (GROUP_SIZE (stmt_info));
@@ -1742,7 +1742,7 @@ vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
              bool load_seen = false;
              for (i = 0; i < GROUP_SIZE (stmt_info); ++i)
                {
-                 if (i % nunits == 0)
+                 if (i % assumed_nunits == 0)
                    {
                      if (load_seen)
                        ncopies_for_cost++;
@@ -1755,7 +1755,7 @@ vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
                ncopies_for_cost++;
              gcc_assert (ncopies_for_cost
                          <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
-                             + nunits - 1) / nunits);
+                             + assumed_nunits - 1) / assumed_nunits);
              poly_uint64 uf = SLP_INSTANCE_UNROLLING_FACTOR (instance);
              ncopies_for_cost *= estimated_poly_value (uf);
            }
@@ -1868,9 +1868,9 @@ vect_analyze_slp_cost (slp_instance instance, void *data)
     assumed_vf = vect_vf_for_cost (STMT_VINFO_LOOP_VINFO (stmt_info));
   else
     assumed_vf = 1;
-  unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
   /* For reductions look at a reduction operand in case the reduction
      operation is widening like DOT_PROD or SAD.  */
+  tree vectype_for_cost = STMT_VINFO_VECTYPE (stmt_info);
   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
     {
       gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
@@ -1878,14 +1878,16 @@ vect_analyze_slp_cost (slp_instance instance, void *data)
        {
        case DOT_PROD_EXPR:
        case SAD_EXPR:
-         nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
-                               (TREE_TYPE (gimple_assign_rhs1 (stmt))));
+         vectype_for_cost = get_vectype_for_scalar_type
+           (TREE_TYPE (gimple_assign_rhs1 (stmt)));
          break;
        default:;
        }
     }
-  ncopies_for_cost = least_common_multiple (nunits,
-                                           group_size * assumed_vf) / nunits;
+  unsigned int assumed_nunits = vect_nunits_for_cost (vectype_for_cost);
+  ncopies_for_cost = (least_common_multiple (assumed_nunits,
+                                            group_size * assumed_vf)
+                     / assumed_nunits);
 
   prologue_cost_vec.create (10);
   body_cost_vec.create (10);
index bae72d098549e318a4df6fba6ebbd5aea9748a56..6ca3a16c4fcea3b790475213f23e643ba66e84ea 100644 (file)
@@ -958,18 +958,25 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
   /* Costs of the stores.  */
   if (memory_access_type == VMAT_ELEMENTWISE
       || memory_access_type == VMAT_GATHER_SCATTER)
-    /* N scalar stores plus extracting the elements.  */
-    inside_cost += record_stmt_cost (body_cost_vec,
-                                    ncopies * TYPE_VECTOR_SUBPARTS (vectype),
-                                    scalar_store, stmt_info, 0, vect_body);
+    {
+      /* N scalar stores plus extracting the elements.  */
+      unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
+      inside_cost += record_stmt_cost (body_cost_vec,
+                                      ncopies * assumed_nunits,
+                                      scalar_store, stmt_info, 0, vect_body);
+    }
   else
     vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
 
   if (memory_access_type == VMAT_ELEMENTWISE
       || memory_access_type == VMAT_STRIDED_SLP)
-    inside_cost += record_stmt_cost (body_cost_vec,
-                                    ncopies * TYPE_VECTOR_SUBPARTS (vectype),
-                                    vec_to_scalar, stmt_info, 0, vect_body);
+    {
+      /* N scalar stores plus extracting the elements.  */
+      unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
+      inside_cost += record_stmt_cost (body_cost_vec,
+                                      ncopies * assumed_nunits,
+                                      vec_to_scalar, stmt_info, 0, vect_body);
+    }
 
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
@@ -1089,8 +1096,9 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
     {
       /* N scalar loads plus gathering them into a vector.  */
       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+      unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
       inside_cost += record_stmt_cost (body_cost_vec,
-                                      ncopies * TYPE_VECTOR_SUBPARTS (vectype),
+                                      ncopies * assumed_nunits,
                                       scalar_load, stmt_info, 0, vect_body);
     }
   else
index 9619286ed34b94798539fd733eabd3876e4eef91..f6938e4f60f64ffb82f13ddcc0c3a9f9be1a44fc 100644 (file)
@@ -1154,6 +1154,16 @@ vect_vf_for_cost (loop_vec_info loop_vinfo)
   return estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
 }
 
+/* Estimate the number of elements in VEC_TYPE for costing purposes.
+   Pick a reasonable estimate if the exact number isn't known at
+   compile time.  */
+
+static inline unsigned int
+vect_nunits_for_cost (tree vec_type)
+{
+  return estimated_poly_value (TYPE_VECTOR_SUBPARTS (vec_type));
+}
+
 /* Return the size of the value accessed by unvectorized data reference DR.
    This is only valid once STMT_VINFO_VECTYPE has been calculated for the
    associated gimple statement, since that guarantees that DR accesses