vect: Add a “very cheap” cost model

author Richard Sandiford <richard.sandiford@arm.com>

Thu, 19 Nov 2020 16:49:37 +0000 (16:49 +0000)

committer Richard Sandiford <richard.sandiford@arm.com>

Thu, 19 Nov 2020 16:49:37 +0000 (16:49 +0000)
author Richard Sandiford <richard.sandiford@arm.com>
Thu, 19 Nov 2020 16:49:37 +0000 (16:49 +0000)
committer Richard Sandiford <richard.sandiford@arm.com>
Thu, 19 Nov 2020 16:49:37 +0000 (16:49 +0000)
diff --git a/gcc/common.opt b/gcc/common.opt

index fe39b3dee9f270dd39b3f69ff6a0e2e854058703..ca8a26907997f1c27dfbf2795e19a486dd1c8c75 100644 (file)
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -3020,11 +3020,11 @@ Enable basic block vectorization (SLP) on trees.
  
  fvect-cost-model=
  Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT) Optimization
--fvect-cost-model=[unlimited|dynamic|cheap]    Specifies the cost model for vectorization.
+-fvect-cost-model=[unlimited|dynamic|cheap|very-cheap] Specifies the cost model for vectorization.
  
  fsimd-cost-model=
  Common Joined RejectNegative Enum(vect_cost_model) Var(flag_simd_cost_model) Init(VECT_COST_MODEL_UNLIMITED) Optimization
--fsimd-cost-model=[unlimited|dynamic|cheap]    Specifies the vectorization cost model for code marked with a simd directive.
+-fsimd-cost-model=[unlimited|dynamic|cheap|very-cheap] Specifies the vectorization cost model for code marked with a simd directive.
  
  Enum
  Name(vect_cost_model) Type(enum vect_cost_model) UnknownError(unknown vectorizer cost model %qs)
@@ -3038,6 +3038,9 @@ Enum(vect_cost_model) String(dynamic) Value(VECT_COST_MODEL_DYNAMIC)
  EnumValue
  Enum(vect_cost_model) String(cheap) Value(VECT_COST_MODEL_CHEAP)
  
+EnumValue
+Enum(vect_cost_model) String(very-cheap) Value(VECT_COST_MODEL_VERY_CHEAP)
+
  fvect-cost-model
  Common Alias(fvect-cost-model=,dynamic,unlimited)
  Enables the dynamic vectorizer cost model.  Preserved for backward compatibility.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi

index 3510a54c6c46080c388d056e0cb9c70bfc737ba9..07232c6b33dcbbbfb953098bcce1d2cfc8a984a6 100644 (file)
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -11440,7 +11440,8 @@ and @option{-fauto-profile}.
  @item -fvect-cost-model=@var{model}
  @opindex fvect-cost-model
  Alter the cost model used for vectorization.  The @var{model} argument
-should be one of @samp{unlimited}, @samp{dynamic} or @samp{cheap}.
+should be one of @samp{unlimited}, @samp{dynamic}, @samp{cheap} or
+@samp{very-cheap}.
  With the @samp{unlimited} model the vectorized code-path is assumed
  to be profitable while with the @samp{dynamic} model a runtime check
  guards the vectorized code-path to enable it only for iteration
@@ -11448,7 +11449,14 @@ counts that will likely execute faster than when executing the original
  scalar loop.  The @samp{cheap} model disables vectorization of
  loops where doing so would be cost prohibitive for example due to
  required runtime checks for data dependence or alignment but otherwise
-is equal to the @samp{dynamic} model.
+is equal to the @samp{dynamic} model.  The @samp{very-cheap} model only
+allows vectorization if the vector code would entirely replace the
+scalar code that is being vectorized.  For example, if each iteration
+of a vectorized loop would only be able to handle exactly four iterations
+of the scalar loop, the @samp{very-cheap} model would only allow
+vectorization if the scalar iteration count is known to be a multiple
+of four.
+
  The default cost model depends on other optimization flags and is
  either @samp{dynamic} or @samp{cheap}.
  
diff --git a/gcc/flag-types.h b/gcc/flag-types.h

index 648ed096e30c89e5eca0caeaced7ba3ff57b5666..0dbab19943c66cdaa9192793586472f9ae378acc 100644 (file)
--- a/gcc/flag-types.h
+++ b/gcc/flag-types.h
@@ -232,12 +232,14 @@ enum scalar_storage_order_kind {
    SSO_LITTLE_ENDIAN
  };
  
-/* Vectorizer cost-model.  */
+/* Vectorizer cost-model.  Except for DEFAULT, the values are ordered from
+   the most conservative to the least conservative.  */
  enum vect_cost_model {
+  VECT_COST_MODEL_VERY_CHEAP = -3,
+  VECT_COST_MODEL_CHEAP = -2,
+  VECT_COST_MODEL_DYNAMIC = -1,
    VECT_COST_MODEL_UNLIMITED = 0,
-  VECT_COST_MODEL_CHEAP = 1,
-  VECT_COST_MODEL_DYNAMIC = 2,
-  VECT_COST_MODEL_DEFAULT = 3
+  VECT_COST_MODEL_DEFAULT = 1
  };
  
  /* Different instrumentation modes.  */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-cost-model-1.c b/gcc/testsuite/gcc.dg/vect/vect-cost-model-1.c

new file mode 100644 (file)

index 0000000..0737da5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-cost-model-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -ftree-vectorize -fvect-cost-model=cheap" } */
+
+void
+f (int *x, int *y)
+{
+  for (unsigned int i = 0; i < 1024; ++i)
+    x[i] += y[i];
+}
+
+/* { dg-final { scan-tree-dump {LOOP VECTORIZED} vect { target vect_int } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-cost-model-2.c b/gcc/testsuite/gcc.dg/vect/vect-cost-model-2.c

new file mode 100644 (file)

index 0000000..fa9bdb6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-cost-model-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -ftree-vectorize -fvect-cost-model=very-cheap" } */
+
+void
+f (int *x, int *y)
+{
+  for (unsigned int i = 0; i < 1024; ++i)
+    x[i] += y[i];
+}
+
+/* { dg-final { scan-tree-dump-not {LOOP VECTORIZED} vect { target vect_int } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-cost-model-3.c b/gcc/testsuite/gcc.dg/vect/vect-cost-model-3.c

new file mode 100644 (file)

index 0000000..d7c6cfd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-cost-model-3.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -ftree-vectorize -fvect-cost-model=cheap" } */
+
+void
+f (int *restrict x, int *restrict y)
+{
+  for (unsigned int i = 0; i < 1024; ++i)
+    x[i] += y[i];
+}
+
+/* { dg-final { scan-tree-dump {LOOP VECTORIZED} vect { target vect_int } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-cost-model-4.c b/gcc/testsuite/gcc.dg/vect/vect-cost-model-4.c

new file mode 100644 (file)

index 0000000..bb018ad
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-cost-model-4.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -ftree-vectorize -fvect-cost-model=very-cheap" } */
+
+int x[1024], y[1024];
+
+void
+f (void)
+{
+  for (unsigned int i = 0; i < 1024; ++i)
+    x[i] += y[i];
+}
+
+/* { dg-final { scan-tree-dump {LOOP VECTORIZED} vect { target vect_int } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-cost-model-5.c b/gcc/testsuite/gcc.dg/vect/vect-cost-model-5.c

new file mode 100644 (file)

index 0000000..536ec0a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-cost-model-5.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -ftree-vectorize -fvect-cost-model=cheap" } */
+
+void
+f (int *restrict x, int *restrict y)
+{
+  for (unsigned int i = 0; i < 1023; ++i)
+    x[i] += y[i];
+}
+
+/* { dg-final { scan-tree-dump {LOOP VECTORIZED} vect { target vect_int } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-cost-model-6.c b/gcc/testsuite/gcc.dg/vect/vect-cost-model-6.c

new file mode 100644 (file)

index 0000000..552febb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-cost-model-6.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -ftree-vectorize -fvect-cost-model=very-cheap" } */
+
+void
+f (int *restrict x, int *restrict y)
+{
+  for (unsigned int i = 0; i < 1023; ++i)
+    x[i] += y[i];
+}
+
+/* { dg-final { scan-tree-dump {LOOP VECTORIZED} vect { target { vect_int && vect_partial_vectors_usage_2 } } } } */
+/* { dg-final { scan-tree-dump-not {LOOP VECTORIZED} vect { target { vect_int && { ! vect_partial_vectors_usage_2 } } } } } */
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c

index 0efab495407cb8b4611d89baf2d0236309bb8587..18e36c89d1475dbe1fe92ff757f21839e5db9d59 100644 (file)
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -2161,7 +2161,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
          {
            unsigned max_allowed_peel
             = param_vect_max_peeling_for_alignment;
-         if (flag_vect_cost_model == VECT_COST_MODEL_CHEAP)
+         if (flag_vect_cost_model <= VECT_COST_MODEL_CHEAP)
             max_allowed_peel = 0;
            if (max_allowed_peel != (unsigned)-1)
              {
@@ -2259,7 +2259,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
    do_versioning
      = (optimize_loop_nest_for_speed_p (loop)
         && !loop->inner /* FORNOW */
-       && flag_vect_cost_model != VECT_COST_MODEL_CHEAP);
+       && flag_vect_cost_model > VECT_COST_MODEL_CHEAP);
  
    if (do_versioning)
      {
@@ -3682,6 +3682,10 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
    unsigned int count = (comp_alias_ddrs.length ()
                         + check_unequal_addrs.length ());
  
+  if (count && flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP)
+    return opt_result::failure_at
+      (vect_location, "would need a runtime alias check\n");
+
    if (dump_enabled_p ())
      dump_printf_loc (MSG_NOTE, vect_location,
                      "improved number of alias checks from %d to %d\n",
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c

index 856bbfebf7ca2345b8983cb26b303a859bff113c..48dfb4df00e338c036c4f440658c5be9638566f6 100644 (file)
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -1827,6 +1827,19 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo)
         }
      }
  
+  /* If using the "very cheap" model. reject cases in which we'd keep
+     a copy of the scalar code (even if we might be able to vectorize it).  */
+  if (flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP
+      && (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
+         || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+         || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)))
+    {
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "some scalar iterations would need to be peeled\n");
+      return 0;
+    }
+
    int min_profitable_iters, min_profitable_estimate;
    vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
                                       &min_profitable_estimate);
@@ -1885,6 +1898,20 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo)
        min_profitable_estimate = min_profitable_iters;
      }
  
+  /* If the vector loop needs multiple iterations to be beneficial then
+     things are probably too close to call, and the conservative thing
+     would be to stick with the scalar code.  */
+  if (flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP
+      && min_profitable_estimate > (int) vect_vf_for_cost (loop_vinfo))
+    {
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "one iteration of the vector loop would be"
+                        " more expensive than the equivalent number of"
+                        " iterations of the scalar loop\n");
+      return 0;
+    }
+
    HOST_WIDE_INT estimated_niter;
  
    /* If we are vectorizing an epilogue then we know the maximum number of
author	Richard Sandiford <richard.sandiford@arm.com>
	Thu, 19 Nov 2020 16:49:37 +0000 (16:49 +0000)
committer	Richard Sandiford <richard.sandiford@arm.com>
	Thu, 19 Nov 2020 16:49:37 +0000 (16:49 +0000)
gcc/common.opt		patch \| blob \| history
gcc/doc/invoke.texi		patch \| blob \| history
gcc/flag-types.h		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/vect-cost-model-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-cost-model-2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-cost-model-3.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-cost-model-4.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-cost-model-5.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-cost-model-6.c	[new file with mode: 0644]	patch \| blob
gcc/tree-vect-data-refs.c		patch \| blob \| history
gcc/tree-vect-loop.c		patch \| blob \| history