Fix vectorizable_conversion costs
authorRichard Sandiford <richard.sandiford@arm.com>
Wed, 13 Nov 2019 09:00:53 +0000 (09:00 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Wed, 13 Nov 2019 09:00:53 +0000 (09:00 +0000)
This patch makes two tweaks to vectorizable_conversion.  The first
is to use "modifier" to distinguish between promotion, demotion,
and neither promotion nor demotion, rather than using a code for
some cases and "modifier" for others.  The second is to take ncopies
into account for the promotion and demotion costs; previously we gave
multiple copies the same cost as a single copy.

Later patches test this, but it seemed worth splitting out.

2019-11-13  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
* tree-vect-stmts.c (vect_model_promotion_demotion_cost): Take the
number of ncopies as an additional argument.
(vectorizable_conversion): Update call accordingly.  Use "modifier"
to check whether a conversion is between vectors with the same
numbers of units.

From-SVN: r278121

gcc/ChangeLog
gcc/tree-vect-stmts.c

index e569d176e556651653308a357434b9fe6b551851..cb34395694785b2a24dca5456e701b85e6eb6e3b 100644 (file)
@@ -1,3 +1,11 @@
+2019-11-13  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * tree-vect-stmts.c (vect_model_promotion_demotion_cost): Take the
+       number of ncopies as an additional argument.
+       (vectorizable_conversion): Update call accordingly.  Use "modifier"
+       to check whether a conversion is between vectors with the same
+       numbers of units.
+
 2019-11-13  Richard Sandiford  <richard.sandiford@arm.com>
 
        * config/aarch64/aarch64-sve-builtins-functions.h
index 750af2e3dd0175a1f2b4d4c9cb9e8afd9749fcc8..53f4189d939fa276e0cd3ec0f2d3ad012be051fe 100644 (file)
@@ -917,26 +917,27 @@ vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
 }
 
 
-/* Model cost for type demotion and promotion operations.  PWR is normally
-   zero for single-step promotions and demotions.  It will be one if 
-   two-step promotion/demotion is required, and so on.  Each additional
+/* Model cost for type demotion and promotion operations.  PWR is
+   normally zero for single-step promotions and demotions.  It will be
+   one if two-step promotion/demotion is required, and so on.  NCOPIES
+   is the number of vector results (and thus number of instructions)
+   for the narrowest end of the operation chain.  Each additional
    step doubles the number of instructions required.  */
 
 static void
 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
-                                   enum vect_def_type *dt, int pwr,
+                                   enum vect_def_type *dt,
+                                   unsigned int ncopies, int pwr,
                                    stmt_vector_for_cost *cost_vec)
 {
-  int i, tmp;
+  int i;
   int inside_cost = 0, prologue_cost = 0;
 
   for (i = 0; i < pwr + 1; i++)
     {
-      tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
-       (i + 1) : i;
-      inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
-                                      vec_promote_demote, stmt_info, 0,
-                                      vect_body);
+      inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
+                                      stmt_info, 0, vect_body);
+      ncopies *= 2;
     }
 
   /* FORNOW: Assuming maximum 2 args per stmts.  */
@@ -4961,7 +4962,7 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   if (!vec_stmt)               /* transformation not required.  */
     {
       DUMP_VECT_SCOPE ("vectorizable_conversion");
-      if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
+      if (modifier == NONE)
         {
          STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
          vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
@@ -4970,14 +4971,24 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       else if (modifier == NARROW)
        {
          STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
-         vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
-                                             cost_vec);
+         /* The final packing step produces one vector result per copy.  */
+         unsigned int nvectors
+           = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
+         vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
+                                             multi_step_cvt, cost_vec);
        }
       else
        {
          STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
-         vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
-                                             cost_vec);
+         /* The initial unpacking step produces two vector results
+            per copy.  MULTI_STEP_CVT is 0 for a single conversion,
+            so >> MULTI_STEP_CVT divides by 2^(number of steps - 1).  */
+         unsigned int nvectors
+           = (slp_node
+              ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
+              : ncopies * 2);
+         vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
+                                             multi_step_cvt, cost_vec);
        }
       interm_types.release ();
       return true;