Power supports vector memory access with length (in bytes) instructions.
Like existing fully masking for SVE, it is another approach to vectorize
the loop using partially-populated vectors.
As Richard Sandiford pointed out, we should extend the existing flag
can_fully_mask_p to be more generic, to indicate whether we have
any chances with partial vectors for this loop. So this patch
is to rename this flag to can_use_partial_vectors_p to be more
meaningful, also rename the macro LOOP_VINFO_CAN_FULLY_MASK_P
to LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P.
Bootstrapped/regtested on aarch64-linux-gnu.
gcc/ChangeLog:
* tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Rename
can_fully_mask_p to can_use_partial_vectors_p.
(vect_analyze_loop_2): Rename LOOP_VINFO_CAN_FULLY_MASK_P to
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P. Rename saved_can_fully_mask_p
to saved_can_use_partial_vectors_p.
(vectorizable_reduction): Rename LOOP_VINFO_CAN_FULLY_MASK_P to
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P.
(vectorizable_live_operation): Likewise.
* tree-vect-stmts.c (permute_vec_elements): Likewise.
(check_load_store_masking): Likewise.
(vectorizable_operation): Likewise.
(vectorizable_store): Likewise.
(vectorizable_load): Likewise.
(vectorizable_condition): Likewise.
* tree-vectorizer.h (LOOP_VINFO_CAN_FULLY_MASK_P): Renamed to ...
(LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P): ... this.
(_loop_vec_info): Rename can_fully_mask_p to can_use_partial_vectors_p.
vec_outside_cost (0),
vec_inside_cost (0),
vectorizable (false),
- can_fully_mask_p (true),
+ can_use_partial_vectors_p (true),
fully_masked_p (false),
peeling_for_gaps (false),
peeling_for_niter (false),
vect_optimize_slp (loop_vinfo);
}
- bool saved_can_fully_mask_p = LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo);
+ bool saved_can_use_partial_vectors_p
+ = LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo);
/* We don't expect to have to roll back to anything other than an empty
set of rgroups. */
/* Decide whether to use a fully-masked loop for this vectorization
factor. */
LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
- = (LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
+ = (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
&& vect_verify_full_masking (loop_vinfo));
if (dump_enabled_p ())
{
LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = false;
LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = 0;
LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = 0;
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = saved_can_fully_mask_p;
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
+ = saved_can_use_partial_vectors_p;
goto start_over;
}
STMT_VINFO_DEF_TYPE (vect_orig_stmt (tem)) = vect_internal_def;
STMT_VINFO_DEF_TYPE (tem) = vect_internal_def;
}
- else if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
+ else if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
{
vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
internal_fn cond_fn = get_conditional_internal_fn (code);
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"can't use a fully-masked loop because no"
" conditional operation is available.\n");
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
else if (reduction_type == FOLD_LEFT_REDUCTION
&& reduc_fn == IFN_LAST
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"can't use a fully-masked loop because no"
" conditional operation is available.\n");
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
else
vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
if (!vec_stmt_p)
{
/* No transformation required. */
- if (LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
+ if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
{
if (!direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype,
OPTIMIZE_FOR_SPEED))
"can't use a fully-masked loop because "
"the target doesn't support extract last "
"reduction.\n");
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
else if (slp_node)
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"can't use a fully-masked loop because an "
"SLP statement is live after the loop.\n");
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
else if (ncopies > 1)
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"can't use a fully-masked loop because"
" ncopies is greater than 1.\n");
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
else
{
its arguments. If the load or store is conditional, SCALAR_MASK is the
condition under which it occurs.
- Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
+ Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a fully-masked loop is not
supported, otherwise record the required mask types. */
static void
"can't use a fully-masked loop because the"
" target doesn't have an appropriate masked"
" load/store-lanes instruction.\n");
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
return;
}
unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
"can't use a fully-masked loop because the"
" target doesn't have an appropriate masked"
" gather load or scatter store instruction.\n");
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
return;
}
unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"can't use a fully-masked loop because an access"
" isn't contiguous.\n");
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
return;
}
"can't use a fully-masked loop because the target"
" doesn't have the appropriate masked load or"
" store.\n");
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
return;
}
/* We might load more scalars than we need for permuting SLP loads.
should only change the active lanes of the reduction chain,
keeping the inactive lanes as-is. */
if (loop_vinfo
- && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
+ && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
&& reduc_idx >= 0)
{
if (cond_fn == IFN_LAST
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"can't use a fully-masked loop because no"
" conditional operation is available.\n");
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
else
vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
if (loop_vinfo
- && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
+ && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
memory_access_type, &gs_info, mask);
STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
if (loop_vinfo
- && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
+ && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
memory_access_type, &gs_info, mask);
}
if (loop_vinfo
- && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
+ && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
&& reduction_type == EXTRACT_LAST_REDUCTION)
vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
ncopies * vec_num, vectype, NULL);
/* Is the loop vectorizable? */
bool vectorizable;
- /* Records whether we still have the option of using a fully-masked loop. */
- bool can_fully_mask_p;
+ /* Records whether we still have the option of vectorizing this loop
+ using partially-populated vectors; in other words, whether it is
+ still possible for one iteration of the vector loop to handle
+ fewer than VF scalars. */
+ bool can_use_partial_vectors_p;
/* True if have decided to use a fully-masked loop. */
bool fully_masked_p;
#define LOOP_VINFO_COST_MODEL_THRESHOLD(L) (L)->th
#define LOOP_VINFO_VERSIONING_THRESHOLD(L) (L)->versioning_threshold
#define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable
-#define LOOP_VINFO_CAN_FULLY_MASK_P(L) (L)->can_fully_mask_p
+#define LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P(L) (L)->can_use_partial_vectors_p
#define LOOP_VINFO_FULLY_MASKED_P(L) (L)->fully_masked_p
#define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor
#define LOOP_VINFO_MAX_VECT_FACTOR(L) (L)->max_vectorization_factor