fvect-cost-model=
Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT) Optimization
--fvect-cost-model=[unlimited|dynamic|cheap] Specifies the cost model for vectorization.
+-fvect-cost-model=[unlimited|dynamic|cheap|very-cheap] Specifies the cost model for vectorization.
fsimd-cost-model=
Common Joined RejectNegative Enum(vect_cost_model) Var(flag_simd_cost_model) Init(VECT_COST_MODEL_UNLIMITED) Optimization
--fsimd-cost-model=[unlimited|dynamic|cheap] Specifies the vectorization cost model for code marked with a simd directive.
+-fsimd-cost-model=[unlimited|dynamic|cheap|very-cheap] Specifies the vectorization cost model for code marked with a simd directive.
Enum
Name(vect_cost_model) Type(enum vect_cost_model) UnknownError(unknown vectorizer cost model %qs)
EnumValue
Enum(vect_cost_model) String(cheap) Value(VECT_COST_MODEL_CHEAP)
+EnumValue
+Enum(vect_cost_model) String(very-cheap) Value(VECT_COST_MODEL_VERY_CHEAP)
+
fvect-cost-model
Common Alias(fvect-cost-model=,dynamic,unlimited)
Enables the dynamic vectorizer cost model. Preserved for backward compatibility.
@item -fvect-cost-model=@var{model}
@opindex fvect-cost-model
Alter the cost model used for vectorization. The @var{model} argument
-should be one of @samp{unlimited}, @samp{dynamic} or @samp{cheap}.
+should be one of @samp{unlimited}, @samp{dynamic}, @samp{cheap} or
+@samp{very-cheap}.
With the @samp{unlimited} model the vectorized code-path is assumed
to be profitable while with the @samp{dynamic} model a runtime check
guards the vectorized code-path to enable it only for iteration
scalar loop. The @samp{cheap} model disables vectorization of
loops where doing so would be cost prohibitive for example due to
required runtime checks for data dependence or alignment but otherwise
-is equal to the @samp{dynamic} model.
+is equal to the @samp{dynamic} model. The @samp{very-cheap} model only
+allows vectorization if the vector code would entirely replace the
+scalar code that is being vectorized. For example, if each iteration
+of a vectorized loop would only be able to handle exactly four iterations
+of the scalar loop, the @samp{very-cheap} model would only allow
+vectorization if the scalar iteration count is known to be a multiple
+of four.
+
The default cost model depends on other optimization flags and is
either @samp{dynamic} or @samp{cheap}.
SSO_LITTLE_ENDIAN
};
-/* Vectorizer cost-model. */
+/* Vectorizer cost-model. Except for DEFAULT, the values are ordered from
+ the most conservative to the least conservative. */
enum vect_cost_model {
+ VECT_COST_MODEL_VERY_CHEAP = -3,
+ VECT_COST_MODEL_CHEAP = -2,
+ VECT_COST_MODEL_DYNAMIC = -1,
VECT_COST_MODEL_UNLIMITED = 0,
- VECT_COST_MODEL_CHEAP = 1,
- VECT_COST_MODEL_DYNAMIC = 2,
- VECT_COST_MODEL_DEFAULT = 3
+ VECT_COST_MODEL_DEFAULT = 1
};
/* Different instrumentation modes. */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -ftree-vectorize -fvect-cost-model=cheap" } */
+
+void
+f (int *x, int *y)
+{
+ for (unsigned int i = 0; i < 1024; ++i)
+ x[i] += y[i];
+}
+
+/* { dg-final { scan-tree-dump {LOOP VECTORIZED} vect { target vect_int } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -ftree-vectorize -fvect-cost-model=very-cheap" } */
+
+void
+f (int *x, int *y)
+{
+ for (unsigned int i = 0; i < 1024; ++i)
+ x[i] += y[i];
+}
+
+/* { dg-final { scan-tree-dump-not {LOOP VECTORIZED} vect { target vect_int } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -ftree-vectorize -fvect-cost-model=cheap" } */
+
+void
+f (int *restrict x, int *restrict y)
+{
+ for (unsigned int i = 0; i < 1024; ++i)
+ x[i] += y[i];
+}
+
+/* { dg-final { scan-tree-dump {LOOP VECTORIZED} vect { target vect_int } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -ftree-vectorize -fvect-cost-model=very-cheap" } */
+
+int x[1024], y[1024];
+
+void
+f (void)
+{
+ for (unsigned int i = 0; i < 1024; ++i)
+ x[i] += y[i];
+}
+
+/* { dg-final { scan-tree-dump {LOOP VECTORIZED} vect { target vect_int } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -ftree-vectorize -fvect-cost-model=cheap" } */
+
+void
+f (int *restrict x, int *restrict y)
+{
+ for (unsigned int i = 0; i < 1023; ++i)
+ x[i] += y[i];
+}
+
+/* { dg-final { scan-tree-dump {LOOP VECTORIZED} vect { target vect_int } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -ftree-vectorize -fvect-cost-model=very-cheap" } */
+
+void
+f (int *restrict x, int *restrict y)
+{
+ for (unsigned int i = 0; i < 1023; ++i)
+ x[i] += y[i];
+}
+
+/* { dg-final { scan-tree-dump {LOOP VECTORIZED} vect { target { vect_int && vect_partial_vectors_usage_2 } } } } */
+/* { dg-final { scan-tree-dump-not {LOOP VECTORIZED} vect { target { vect_int && { ! vect_partial_vectors_usage_2 } } } } } */
{
unsigned max_allowed_peel
= param_vect_max_peeling_for_alignment;
- if (flag_vect_cost_model == VECT_COST_MODEL_CHEAP)
+ if (flag_vect_cost_model <= VECT_COST_MODEL_CHEAP)
max_allowed_peel = 0;
if (max_allowed_peel != (unsigned)-1)
{
do_versioning
= (optimize_loop_nest_for_speed_p (loop)
&& !loop->inner /* FORNOW */
- && flag_vect_cost_model != VECT_COST_MODEL_CHEAP);
+ && flag_vect_cost_model > VECT_COST_MODEL_CHEAP);
if (do_versioning)
{
unsigned int count = (comp_alias_ddrs.length ()
+ check_unequal_addrs.length ());
+ if (count && flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP)
+ return opt_result::failure_at
+ (vect_location, "would need a runtime alias check\n");
+
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"improved number of alias checks from %d to %d\n",
}
}
+ /* If using the "very cheap" model. reject cases in which we'd keep
+ a copy of the scalar code (even if we might be able to vectorize it). */
+ if (flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP
+ && (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
+ || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+ || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "some scalar iterations would need to be peeled\n");
+ return 0;
+ }
+
int min_profitable_iters, min_profitable_estimate;
vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
&min_profitable_estimate);
min_profitable_estimate = min_profitable_iters;
}
+ /* If the vector loop needs multiple iterations to be beneficial then
+ things are probably too close to call, and the conservative thing
+ would be to stick with the scalar code. */
+ if (flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP
+ && min_profitable_estimate > (int) vect_vf_for_cost (loop_vinfo))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "one iteration of the vector loop would be"
+ " more expensive than the equivalent number of"
+ " iterations of the scalar loop\n");
+ return 0;
+ }
+
HOST_WIDE_INT estimated_niter;
/* If we are vectorizing an epilogue then we know the maximum number of