Power supports vector memory access with length (in bytes) instructions.
Like existing fully masking for SVE, it is another approach to vectorize
the loop using partially-populated vectors.
As Richard Sandiford suggested, we should share the codes in approaches
with partial vectors if possible. This patch is to:
1) factor out two functions:
- vect_min_prec_for_max_niters
- vect_known_niters_smaller_than_vf.
2) rename four functions:
- vect_iv_limit_for_full_masking
- check_load_store_masking
- vect_set_loop_condition_masked
- vect_set_loop_condition_unmasked
3) rename macros LOOP_VINFO_MASK_COMPARE_TYPE and LOOP_VINFO_MASK_IV_TYPE.
Bootstrapped/regtested on aarch64-linux-gnu.
gcc/ChangeLog:
* tree-vect-loop-manip.c (vect_set_loop_controls_directly): Rename
LOOP_VINFO_MASK_COMPARE_TYPE to LOOP_VINFO_RGROUP_COMPARE_TYPE. Rename
LOOP_VINFO_MASK_IV_TYPE to LOOP_VINFO_RGROUP_IV_TYPE.
(vect_set_loop_condition_masked): Renamed to ...
(vect_set_loop_condition_partial_vectors): ... this. Rename
LOOP_VINFO_MASK_COMPARE_TYPE to LOOP_VINFO_RGROUP_COMPARE_TYPE. Rename
vect_iv_limit_for_full_masking to vect_iv_limit_for_partial_vectors.
(vect_set_loop_condition_unmasked): Renamed to ...
(vect_set_loop_condition_normal): ... this.
(vect_set_loop_condition): Rename vect_set_loop_condition_unmasked to
vect_set_loop_condition_normal. Rename vect_set_loop_condition_masked
to vect_set_loop_condition_partial_vectors.
(vect_prepare_for_masked_peels): Rename LOOP_VINFO_MASK_COMPARE_TYPE
to LOOP_VINFO_RGROUP_COMPARE_TYPE.
* tree-vect-loop.c (vect_known_niters_smaller_than_vf): New, factored
out from ...
(vect_analyze_loop_costing): ... this.
(_loop_vec_info::_loop_vec_info): Rename mask_compare_type to
compare_type.
(vect_min_prec_for_max_niters): New, factored out from ...
(vect_verify_full_masking): ... this. Rename
vect_iv_limit_for_full_masking to vect_iv_limit_for_partial_vectors.
Rename LOOP_VINFO_MASK_COMPARE_TYPE to LOOP_VINFO_RGROUP_COMPARE_TYPE.
Rename LOOP_VINFO_MASK_IV_TYPE to LOOP_VINFO_RGROUP_IV_TYPE.
(vectorizable_reduction): Update some dumpings with partial
vectors instead of fully-masked.
(vectorizable_live_operation): Likewise.
(vect_iv_limit_for_full_masking): Renamed to ...
(vect_iv_limit_for_partial_vectors): ... this.
* tree-vect-stmts.c (check_load_store_masking): Renamed to ...
(check_load_store_for_partial_vectors): ... this. Update some
dumpings with partial vectors instead of fully-masked.
(vectorizable_store): Rename check_load_store_masking to
check_load_store_for_partial_vectors.
(vectorizable_load): Likewise.
* tree-vectorizer.h (LOOP_VINFO_MASK_COMPARE_TYPE): Renamed to ...
(LOOP_VINFO_RGROUP_COMPARE_TYPE): ... this.
(LOOP_VINFO_MASK_IV_TYPE): Renamed to ...
(LOOP_VINFO_RGROUP_IV_TYPE): ... this.
(vect_iv_limit_for_full_masking): Renamed to ...
(vect_iv_limit_for_partial_vectors): this.
(_loop_vec_info): Rename mask_compare_type to rgroup_compare_type.
Rename iv_type to rgroup_iv_type.
rgroup_controls *rgc, tree niters,
tree niters_skip, bool might_wrap_p)
{
- tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
- tree iv_type = LOOP_VINFO_MASK_IV_TYPE (loop_vinfo);
+ tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
+ tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
tree ctrl_type = rgc->type;
unsigned int nscalars_per_iter = rgc->max_nscalars_per_iter;
poly_uint64 nscalars_per_ctrl = TYPE_VECTOR_SUBPARTS (ctrl_type);
final gcond. */
static gcond *
-vect_set_loop_condition_masked (class loop *loop, loop_vec_info loop_vinfo,
- tree niters, tree final_iv,
- bool niters_maybe_zero,
- gimple_stmt_iterator loop_cond_gsi)
+vect_set_loop_condition_partial_vectors (class loop *loop,
+ loop_vec_info loop_vinfo, tree niters,
+ tree final_iv, bool niters_maybe_zero,
+ gimple_stmt_iterator loop_cond_gsi)
{
gimple_seq preheader_seq = NULL;
gimple_seq header_seq = NULL;
- tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
+ tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
unsigned int compare_precision = TYPE_PRECISION (compare_type);
tree orig_niters = niters;
else
niters = gimple_convert (&preheader_seq, compare_type, niters);
- widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo);
+ widest_int iv_limit = vect_iv_limit_for_partial_vectors (loop_vinfo);
/* Iterate over all the rgroups and fill in their controls. We could use
the first control from any rgroup for the loop condition; here we
return cond_stmt;
}
-/* Like vect_set_loop_condition, but handle the case in which there
- are no loop masks. */
+/* Like vect_set_loop_condition, but handle the case in which the vector
+ loop handles exactly VF scalars per iteration. */
static gcond *
-vect_set_loop_condition_unmasked (class loop *loop, tree niters,
- tree step, tree final_iv,
- bool niters_maybe_zero,
- gimple_stmt_iterator loop_cond_gsi)
+vect_set_loop_condition_normal (class loop *loop, tree niters, tree step,
+ tree final_iv, bool niters_maybe_zero,
+ gimple_stmt_iterator loop_cond_gsi)
{
tree indx_before_incr, indx_after_incr;
gcond *cond_stmt;
gimple_stmt_iterator loop_cond_gsi = gsi_for_stmt (orig_cond);
if (loop_vinfo && LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
- cond_stmt = vect_set_loop_condition_masked (loop, loop_vinfo, niters,
- final_iv, niters_maybe_zero,
- loop_cond_gsi);
+ cond_stmt = vect_set_loop_condition_partial_vectors (loop, loop_vinfo,
+ niters, final_iv,
+ niters_maybe_zero,
+ loop_cond_gsi);
else
- cond_stmt = vect_set_loop_condition_unmasked (loop, niters, step,
- final_iv, niters_maybe_zero,
- loop_cond_gsi);
+ cond_stmt = vect_set_loop_condition_normal (loop, niters, step, final_iv,
+ niters_maybe_zero,
+ loop_cond_gsi);
/* Remove old loop exit test. */
stmt_vec_info orig_cond_info;
vect_prepare_for_masked_peels (loop_vec_info loop_vinfo)
{
tree misalign_in_elems;
- tree type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
+ tree type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
gcc_assert (vect_use_loop_mask_for_alignment_p (loop_vinfo));
vectorization_factor (0),
max_vectorization_factor (0),
mask_skip_niters (NULL_TREE),
- mask_compare_type (NULL_TREE),
+ rgroup_compare_type (NULL_TREE),
simd_if_cond (NULL_TREE),
unaligned_dr (NULL),
peeling_for_alignment (0),
return res;
}
+/* Calculate the minimum precision necessary to represent:
+
+ MAX_NITERS * FACTOR
+
+ as an unsigned integer, where MAX_NITERS is the maximum number of
+ loop header iterations for the original scalar form of LOOP_VINFO. */
+
+static unsigned
+vect_min_prec_for_max_niters (loop_vec_info loop_vinfo, unsigned int factor)
+{
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+
+ /* Get the maximum number of iterations that is representable
+ in the counter type. */
+ tree ni_type = TREE_TYPE (LOOP_VINFO_NITERSM1 (loop_vinfo));
+ widest_int max_ni = wi::to_widest (TYPE_MAX_VALUE (ni_type)) + 1;
+
+ /* Get a more refined estimate for the number of iterations. */
+ widest_int max_back_edges;
+ if (max_loop_iterations (loop, &max_back_edges))
+ max_ni = wi::smin (max_ni, max_back_edges + 1);
+
+ /* Work out how many bits we need to represent the limit. */
+ return wi::min_precision (max_ni * factor, UNSIGNED);
+}
+
/* Each statement in LOOP_VINFO can be masked where necessary. Check
whether we can actually generate the masks required. Return true if so,
- storing the type of the scalar IV in LOOP_VINFO_MASK_COMPARE_TYPE. */
+ storing the type of the scalar IV in LOOP_VINFO_RGROUP_COMPARE_TYPE. */
static bool
vect_verify_full_masking (loop_vec_info loop_vinfo)
{
- class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
unsigned int min_ni_width;
unsigned int max_nscalars_per_iter
= vect_get_max_nscalars_per_iter (loop_vinfo);
if (LOOP_VINFO_MASKS (loop_vinfo).is_empty ())
return false;
- /* Get the maximum number of iterations that is representable
- in the counter type. */
- tree ni_type = TREE_TYPE (LOOP_VINFO_NITERSM1 (loop_vinfo));
- widest_int max_ni = wi::to_widest (TYPE_MAX_VALUE (ni_type)) + 1;
-
- /* Get a more refined estimate for the number of iterations. */
- widest_int max_back_edges;
- if (max_loop_iterations (loop, &max_back_edges))
- max_ni = wi::smin (max_ni, max_back_edges + 1);
-
- /* Account for rgroup masks, in which each bit is replicated N times. */
- max_ni *= max_nscalars_per_iter;
-
/* Work out how many bits we need to represent the limit. */
- min_ni_width = wi::min_precision (max_ni, UNSIGNED);
+ min_ni_width
+ = vect_min_prec_for_max_niters (loop_vinfo, max_nscalars_per_iter);
/* Find a scalar mode for which WHILE_ULT is supported. */
opt_scalar_int_mode cmp_mode_iter;
tree cmp_type = NULL_TREE;
tree iv_type = NULL_TREE;
- widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo);
+ widest_int iv_limit = vect_iv_limit_for_partial_vectors (loop_vinfo);
unsigned int iv_precision = UINT_MAX;
if (iv_limit != -1)
if (!cmp_type)
return false;
- LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo) = cmp_type;
- LOOP_VINFO_MASK_IV_TYPE (loop_vinfo) = iv_type;
+ LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo) = cmp_type;
+ LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo) = iv_type;
return true;
}
return opt_result::success ();
}
+/* Return true if we know that the iteration count is smaller than the
+ vectorization factor. Return false if it isn't, or if we can't be sure
+ either way. */
+
+static bool
+vect_known_niters_smaller_than_vf (loop_vec_info loop_vinfo)
+{
+ unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
+
+ HOST_WIDE_INT max_niter;
+ if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+ max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo);
+ else
+ max_niter = max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
+
+ if (max_niter != -1 && (unsigned HOST_WIDE_INT) max_niter < assumed_vf)
+ return true;
+
+ return false;
+}
+
/* Analyze the cost of the loop described by LOOP_VINFO. Decide if it
is worthwhile to vectorize. Return 1 if definitely yes, 0 if
definitely no, or -1 if it's worth retrying. */
counts less than the vectorization factor. */
if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
{
- HOST_WIDE_INT max_niter;
-
- if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
- max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo);
- else
- max_niter = max_stmt_executions_int (loop);
-
- if (max_niter != -1
- && (unsigned HOST_WIDE_INT) max_niter < assumed_vf)
+ if (vect_known_niters_smaller_than_vf (loop_vinfo))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't use a fully-masked loop because no"
- " conditional operation is available.\n");
+ "can't operate on partial vectors because"
+ " no conditional operation is available.\n");
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
else if (reduction_type == FOLD_LEFT_REDUCTION
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't use a fully-masked loop because no"
- " conditional operation is available.\n");
+ "can't operate on partial vectors because"
+ " no conditional operation is available.\n");
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
else
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't use a fully-masked loop because "
- "the target doesn't support extract last "
- "reduction.\n");
+ "can't operate on partial vectors "
+ "because the target doesn't support extract "
+ "last reduction.\n");
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
else if (slp_node)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't use a fully-masked loop because an "
- "SLP statement is live after the loop.\n");
+ "can't operate on partial vectors "
+ "because an SLP statement is live after "
+ "the loop.\n");
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
else if (ncopies > 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't use a fully-masked loop because"
- " ncopies is greater than 1.\n");
+ "can't operate on partial vectors "
+ "because ncopies is greater than 1.\n");
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
else
}
/* Decide whether it is possible to use a zero-based induction variable
- when vectorizing LOOP_VINFO with a fully-masked loop. If it is,
- return the value that the induction variable must be able to hold
- in order to ensure that the loop ends with an all-false mask.
+ when vectorizing LOOP_VINFO with partial vectors. If it is, return
+ the value that the induction variable must be able to hold in order
+ to ensure that the rgroups eventually have no active vector elements.
Return -1 otherwise. */
+
widest_int
-vect_iv_limit_for_full_masking (loop_vec_info loop_vinfo)
+vect_iv_limit_for_partial_vectors (loop_vec_info loop_vinfo)
{
tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
gimple_stmt_iterator *);
/* Check whether a load or store statement in the loop described by
- LOOP_VINFO is possible in a fully-masked loop. This is testing
- whether the vectorizer pass has the appropriate support, as well as
- whether the target does.
+ LOOP_VINFO is possible in a loop using partial vectors. This is
+ testing whether the vectorizer pass has the appropriate support,
+ as well as whether the target does.
VLS_TYPE says whether the statement is a load or store and VECTYPE
is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
its arguments. If the load or store is conditional, SCALAR_MASK is the
condition under which it occurs.
- Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a fully-masked loop is not
- supported, otherwise record the required mask types. */
+ Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
+ vectors is not supported, otherwise record the required rgroup control
+ types. */
static void
-check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
- vec_load_store_type vls_type, int group_size,
- vect_memory_access_type memory_access_type,
- gather_scatter_info *gs_info, tree scalar_mask)
+check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
+ vec_load_store_type vls_type,
+ int group_size,
+ vect_memory_access_type
+ memory_access_type,
+ gather_scatter_info *gs_info,
+ tree scalar_mask)
{
/* Invariant loads need no special support. */
if (memory_access_type == VMAT_INVARIANT)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't use a fully-masked loop because the"
- " target doesn't have an appropriate masked"
+ "can't operate on partial vectors because"
+ " the target doesn't have an appropriate"
" load/store-lanes instruction.\n");
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
return;
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't use a fully-masked loop because the"
- " target doesn't have an appropriate masked"
+ "can't operate on partial vectors because"
+ " the target doesn't have an appropriate"
" gather load or scatter store instruction.\n");
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
return;
scalar loop. We need more work to support other mappings. */
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't use a fully-masked loop because an access"
- " isn't contiguous.\n");
+ "can't operate on partial vectors because an"
+ " access isn't contiguous.\n");
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
return;
}
if (loop_vinfo
&& LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
- check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
- memory_access_type, &gs_info, mask);
+ check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type,
+ group_size, memory_access_type,
+ &gs_info, mask);
if (slp_node
&& !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
if (loop_vinfo
&& LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
- check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
- memory_access_type, &gs_info, mask);
+ check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD,
+ group_size, memory_access_type,
+ &gs_info, mask);
STMT_VINFO_TYPE (orig_stmt_info) = load_vec_info_type;
vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
elements that should be false in the first mask). */
tree mask_skip_niters;
- /* Type of the variables to use in the WHILE_ULT call for fully-masked
- loops. */
- tree mask_compare_type;
+ /* The type that the loop control IV should be converted to before
+ testing which of the VF scalars are active and inactive.
+ Only meaningful if LOOP_VINFO_USING_PARTIAL_VECTORS_P. */
+ tree rgroup_compare_type;
/* For #pragma omp simd if (x) loops the x expression. If constant 0,
the loop should not be vectorized, if constant non-zero, simd_if_cond
is false and vectorized loop otherwise. */
tree simd_if_cond;
- /* Type of the IV to use in the WHILE_ULT call for fully-masked
- loops. */
- tree iv_type;
+ /* The type that the vector loop control IV should have when
+ LOOP_VINFO_USING_PARTIAL_VECTORS_P is true. */
+ tree rgroup_iv_type;
/* Unknown DRs according to which loop was peeled. */
class dr_vec_info *unaligned_dr;
#define LOOP_VINFO_MAX_VECT_FACTOR(L) (L)->max_vectorization_factor
#define LOOP_VINFO_MASKS(L) (L)->masks
#define LOOP_VINFO_MASK_SKIP_NITERS(L) (L)->mask_skip_niters
-#define LOOP_VINFO_MASK_COMPARE_TYPE(L) (L)->mask_compare_type
-#define LOOP_VINFO_MASK_IV_TYPE(L) (L)->iv_type
+#define LOOP_VINFO_RGROUP_COMPARE_TYPE(L) (L)->rgroup_compare_type
+#define LOOP_VINFO_RGROUP_IV_TYPE(L) (L)->rgroup_iv_type
#define LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask
#define LOOP_VINFO_LOOP_NEST(L) (L)->shared->loop_nest
#define LOOP_VINFO_DATAREFS(L) (L)->shared->datarefs
tree, tree = NULL_TREE);
/* In tree-vect-loop.c. */
-extern widest_int vect_iv_limit_for_full_masking (loop_vec_info loop_vinfo);
+extern widest_int vect_iv_limit_for_partial_vectors (loop_vec_info loop_vinfo);
/* Used in tree-vect-loop-manip.c */
extern void determine_peel_for_niter (loop_vec_info);
/* Used in gimple-loop-interchange.c and tree-parloops.c. */