gimple_bb (update_phi));
}
-/* Define one loop mask MASK from loop LOOP. INIT_MASK is the value that
- the mask should have during the first iteration and NEXT_MASK is the
+/* Define one loop rgroup control CTRL from loop LOOP. INIT_CTRL is the value
+ that the control should have during the first iteration and NEXT_CTRL is the
value that it should have on subsequent iterations. */
static void
-vect_set_loop_mask (class loop *loop, tree mask, tree init_mask,
- tree next_mask)
+vect_set_loop_control (class loop *loop, tree ctrl, tree init_ctrl,
+ tree next_ctrl)
{
- gphi *phi = create_phi_node (mask, loop->header);
- add_phi_arg (phi, init_mask, loop_preheader_edge (loop), UNKNOWN_LOCATION);
- add_phi_arg (phi, next_mask, loop_latch_edge (loop), UNKNOWN_LOCATION);
+ gphi *phi = create_phi_node (ctrl, loop->header);
+ add_phi_arg (phi, init_ctrl, loop_preheader_edge (loop), UNKNOWN_LOCATION);
+ add_phi_arg (phi, next_ctrl, loop_latch_edge (loop), UNKNOWN_LOCATION);
}
/* Add SEQ to the end of LOOP's preheader block. */
latter. Return true on success, adding any new statements to SEQ. */
static bool
-vect_maybe_permute_loop_masks (gimple_seq *seq, rgroup_masks *dest_rgm,
- rgroup_masks *src_rgm)
+vect_maybe_permute_loop_masks (gimple_seq *seq, rgroup_controls *dest_rgm,
+ rgroup_controls *src_rgm)
{
- tree src_masktype = src_rgm->mask_type;
- tree dest_masktype = dest_rgm->mask_type;
+ tree src_masktype = src_rgm->type;
+ tree dest_masktype = dest_rgm->type;
machine_mode src_mode = TYPE_MODE (src_masktype);
insn_code icode1, icode2;
if (dest_rgm->max_nscalars_per_iter <= src_rgm->max_nscalars_per_iter
machine_mode dest_mode = insn_data[icode1].operand[0].mode;
gcc_assert (dest_mode == insn_data[icode2].operand[0].mode);
tree unpack_masktype = vect_halve_mask_nunits (src_masktype, dest_mode);
- for (unsigned int i = 0; i < dest_rgm->masks.length (); ++i)
+ for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
{
- tree src = src_rgm->masks[i / 2];
- tree dest = dest_rgm->masks[i];
+ tree src = src_rgm->controls[i / 2];
+ tree dest = dest_rgm->controls[i];
tree_code code = ((i & 1) == (BYTES_BIG_ENDIAN ? 0 : 1)
? VEC_UNPACK_HI_EXPR
: VEC_UNPACK_LO_EXPR);
tree masks[2];
for (unsigned int i = 0; i < 2; ++i)
masks[i] = vect_gen_perm_mask_checked (src_masktype, indices[i]);
- for (unsigned int i = 0; i < dest_rgm->masks.length (); ++i)
+ for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
{
- tree src = src_rgm->masks[i / 2];
- tree dest = dest_rgm->masks[i];
+ tree src = src_rgm->controls[i / 2];
+ tree dest = dest_rgm->controls[i];
gimple *stmt = gimple_build_assign (dest, VEC_PERM_EXPR,
src, src, masks[i & 1]);
gimple_seq_add_stmt (seq, stmt);
return false;
}
-/* Helper for vect_set_loop_condition_masked. Generate definitions for
- all the masks in RGM and return a mask that is nonzero when the loop
- needs to iterate. Add any new preheader statements to PREHEADER_SEQ.
- Use LOOP_COND_GSI to insert code before the exit gcond.
+/* Helper for vect_set_loop_condition_partial_vectors. Generate definitions
+ for all the rgroup controls in RGC and return a control that is nonzero
+ when the loop needs to iterate. Add any new preheader statements to
+ PREHEADER_SEQ. Use LOOP_COND_GSI to insert code before the exit gcond.
- RGM belongs to loop LOOP. The loop originally iterated NITERS
+ RGC belongs to loop LOOP. The loop originally iterated NITERS
times and has been vectorized according to LOOP_VINFO.
If NITERS_SKIP is nonnull, the first iteration of the vectorized loop
It is known that:
- NITERS * RGM->max_nscalars_per_iter
+ NITERS * RGC->max_nscalars_per_iter
does not overflow. However, MIGHT_WRAP_P says whether an induction
variable that starts at 0 and has step:
- VF * RGM->max_nscalars_per_iter
+ VF * RGC->max_nscalars_per_iter
might overflow before hitting a value above:
- (NITERS + NITERS_SKIP) * RGM->max_nscalars_per_iter
+ (NITERS + NITERS_SKIP) * RGC->max_nscalars_per_iter
This means that we cannot guarantee that such an induction variable
- would ever hit a value that produces a set of all-false masks for RGM. */
+ would ever hit a value that produces a set of all-false masks for RGC. */
static tree
-vect_set_loop_masks_directly (class loop *loop, loop_vec_info loop_vinfo,
- gimple_seq *preheader_seq,
- gimple_stmt_iterator loop_cond_gsi,
- rgroup_masks *rgm, tree niters, tree niters_skip,
- bool might_wrap_p)
+vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
+ gimple_seq *preheader_seq,
+ gimple_stmt_iterator loop_cond_gsi,
+ rgroup_controls *rgc, tree niters,
+ tree niters_skip, bool might_wrap_p)
{
tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
tree iv_type = LOOP_VINFO_MASK_IV_TYPE (loop_vinfo);
- tree mask_type = rgm->mask_type;
- unsigned int nscalars_per_iter = rgm->max_nscalars_per_iter;
- poly_uint64 nscalars_per_mask = TYPE_VECTOR_SUBPARTS (mask_type);
+ tree ctrl_type = rgc->type;
+ unsigned int nscalars_per_iter = rgc->max_nscalars_per_iter;
+ poly_uint64 nscalars_per_ctrl = TYPE_VECTOR_SUBPARTS (ctrl_type);
poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
/* Calculate the maximum number of scalar values that the rgroup
tree nscalars_skip = niters_skip;
if (nscalars_per_iter != 1)
{
- /* We checked before choosing to use a fully-masked loop that these
- multiplications don't overflow. */
+ /* We checked before setting LOOP_VINFO_USING_PARTIAL_VECTORS_P that
+ these multiplications don't overflow. */
tree compare_factor = build_int_cst (compare_type, nscalars_per_iter);
tree iv_factor = build_int_cst (iv_type, nscalars_per_iter);
nscalars_total = gimple_build (preheader_seq, MULT_EXPR, compare_type,
test_index = gimple_convert (&test_seq, compare_type, test_index);
gsi_insert_seq_before (test_gsi, test_seq, GSI_SAME_STMT);
- /* Provide a definition of each mask in the group. */
- tree next_mask = NULL_TREE;
- tree mask;
+ /* Provide a definition of each control in the group. */
+ tree next_ctrl = NULL_TREE;
+ tree ctrl;
unsigned int i;
- FOR_EACH_VEC_ELT_REVERSE (rgm->masks, i, mask)
+ FOR_EACH_VEC_ELT_REVERSE (rgc->controls, i, ctrl)
{
- /* Previous masks will cover BIAS scalars. This mask covers the
+ /* Previous controls will cover BIAS scalars. This control covers the
next batch. */
- poly_uint64 bias = nscalars_per_mask * i;
+ poly_uint64 bias = nscalars_per_ctrl * i;
tree bias_tree = build_int_cst (compare_type, bias);
gimple *tmp_stmt;
/* See whether the first iteration of the vector loop is known
- to have a full mask. */
+ to have a full control. */
poly_uint64 const_limit;
bool first_iteration_full
= (poly_int_tree_p (first_limit, &const_limit)
- && known_ge (const_limit, (i + 1) * nscalars_per_mask));
+ && known_ge (const_limit, (i + 1) * nscalars_per_ctrl));
/* Rather than have a new IV that starts at BIAS and goes up to
- TEST_LIMIT, prefer to use the same 0-based IV for each mask
+ TEST_LIMIT, prefer to use the same 0-based IV for each control
and adjust the bound down by BIAS. */
tree this_test_limit = test_limit;
if (i != 0)
bias_tree);
}
- /* Create the initial mask. First include all scalars that
+ /* Create the initial control. First include all scalars that
are within the loop limit. */
- tree init_mask = NULL_TREE;
+ tree init_ctrl = NULL_TREE;
if (!first_iteration_full)
{
tree start, end;
{
/* FIRST_LIMIT is the maximum number of scalars handled by the
first iteration of the vector loop. Test the portion
- associated with this mask. */
+ associated with this control. */
start = bias_tree;
end = first_limit;
}
- init_mask = make_temp_ssa_name (mask_type, NULL, "max_mask");
- tmp_stmt = vect_gen_while (init_mask, start, end);
+ init_ctrl = make_temp_ssa_name (ctrl_type, NULL, "max_mask");
+ tmp_stmt = vect_gen_while (init_ctrl, start, end);
gimple_seq_add_stmt (preheader_seq, tmp_stmt);
}
&& !(poly_int_tree_p (nscalars_skip, &const_skip)
&& known_le (const_skip, bias)))
{
- tree unskipped_mask = vect_gen_while_not (preheader_seq, mask_type,
+ tree unskipped_mask = vect_gen_while_not (preheader_seq, ctrl_type,
bias_tree, nscalars_skip);
- if (init_mask)
- init_mask = gimple_build (preheader_seq, BIT_AND_EXPR, mask_type,
- init_mask, unskipped_mask);
+ if (init_ctrl)
+ init_ctrl = gimple_build (preheader_seq, BIT_AND_EXPR, ctrl_type,
+ init_ctrl, unskipped_mask);
else
- init_mask = unskipped_mask;
+ init_ctrl = unskipped_mask;
}
- if (!init_mask)
+ if (!init_ctrl)
/* First iteration is full. */
- init_mask = build_minus_one_cst (mask_type);
+ init_ctrl = build_minus_one_cst (ctrl_type);
- /* Get the mask value for the next iteration of the loop. */
- next_mask = make_temp_ssa_name (mask_type, NULL, "next_mask");
- gcall *call = vect_gen_while (next_mask, test_index, this_test_limit);
+ /* Get the control value for the next iteration of the loop. */
+ next_ctrl = make_temp_ssa_name (ctrl_type, NULL, "next_mask");
+ gcall *call = vect_gen_while (next_ctrl, test_index, this_test_limit);
gsi_insert_before (test_gsi, call, GSI_SAME_STMT);
- vect_set_loop_mask (loop, mask, init_mask, next_mask);
+ vect_set_loop_control (loop, ctrl, init_ctrl, next_ctrl);
}
- return next_mask;
+ return next_ctrl;
}
-/* Make LOOP iterate NITERS times using masking and WHILE_ULT calls.
- LOOP_VINFO describes the vectorization of LOOP. NITERS is the
- number of iterations of the original scalar loop that should be
- handled by the vector loop. NITERS_MAYBE_ZERO and FINAL_IV are
- as for vect_set_loop_condition.
+/* Set up the iteration condition and rgroup controls for LOOP, given
+ that LOOP_VINFO_USING_PARTIAL_VECTORS_P is true for the vectorized
+ loop. LOOP_VINFO describes the vectorization of LOOP. NITERS is
+ the number of iterations of the original scalar loop that should be
+ handled by the vector loop. NITERS_MAYBE_ZERO and FINAL_IV are as
+ for vect_set_loop_condition.
Insert the branch-back condition before LOOP_COND_GSI and return the
final gcond. */
widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo);
- /* Iterate over all the rgroups and fill in their masks. We could use
- the first mask from any rgroup for the loop condition; here we
+ /* Iterate over all the rgroups and fill in their controls. We could use
+ the first control from any rgroup for the loop condition; here we
arbitrarily pick the last. */
- tree test_mask = NULL_TREE;
- rgroup_masks *rgm;
+ tree test_ctrl = NULL_TREE;
+ rgroup_controls *rgc;
unsigned int i;
vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
- FOR_EACH_VEC_ELT (*masks, i, rgm)
- if (!rgm->masks.is_empty ())
+ FOR_EACH_VEC_ELT (*masks, i, rgc)
+ if (!rgc->controls.is_empty ())
{
/* First try using permutes. This adds a single vector
instruction to the loop for each mask, but needs no extra
unsigned int nmasks = i + 1;
if ((nmasks & 1) == 0)
{
- rgroup_masks *half_rgm = &(*masks)[nmasks / 2 - 1];
- if (!half_rgm->masks.is_empty ()
- && vect_maybe_permute_loop_masks (&header_seq, rgm, half_rgm))
+ rgroup_controls *half_rgc = &(*masks)[nmasks / 2 - 1];
+ if (!half_rgc->controls.is_empty ()
+ && vect_maybe_permute_loop_masks (&header_seq, rgc, half_rgc))
continue;
}
before wrapping around. */
bool might_wrap_p
= (iv_limit == -1
- || (wi::min_precision (iv_limit * rgm->max_nscalars_per_iter,
+ || (wi::min_precision (iv_limit * rgc->max_nscalars_per_iter,
UNSIGNED)
> compare_precision));
- /* Set up all masks for this group. */
- test_mask = vect_set_loop_masks_directly (loop, loop_vinfo,
- &preheader_seq,
- loop_cond_gsi, rgm,
- niters, niters_skip,
- might_wrap_p);
+ /* Set up all controls for this group. */
+ test_ctrl = vect_set_loop_controls_directly (loop, loop_vinfo,
+ &preheader_seq,
+ loop_cond_gsi, rgc,
+ niters, niters_skip,
+ might_wrap_p);
}
/* Emit all accumulated statements. */
/* Get a boolean result that tells us whether to iterate. */
edge exit_edge = single_exit (loop);
tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
- tree zero_mask = build_zero_cst (TREE_TYPE (test_mask));
- gcond *cond_stmt = gimple_build_cond (code, test_mask, zero_mask,
+ tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
+ gcond *cond_stmt = gimple_build_cond (code, test_ctrl, zero_ctrl,
NULL_TREE, NULL_TREE);
gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
epilogue_vinfos.create (6);
}
-/* Free all levels of MASKS. */
+/* Free all levels of rgroup CONTROLS. */
void
-release_vec_loop_masks (vec_loop_masks *masks)
+release_vec_loop_controls (vec<rgroup_controls> *controls)
{
- rgroup_masks *rgm;
+ rgroup_controls *rgc;
unsigned int i;
- FOR_EACH_VEC_ELT (*masks, i, rgm)
- rgm->masks.release ();
- masks->release ();
+ FOR_EACH_VEC_ELT (*controls, i, rgc)
+ rgc->controls.release ();
+ controls->release ();
}
/* Free all memory used by the _loop_vec_info, as well as all the
{
free (bbs);
- release_vec_loop_masks (&masks);
+ release_vec_loop_controls (&masks);
delete ivexpr_map;
delete scan_map;
epilogue_vinfos.release ();
static bool
can_produce_all_loop_masks_p (loop_vec_info loop_vinfo, tree cmp_type)
{
- rgroup_masks *rgm;
+ rgroup_controls *rgm;
unsigned int i;
FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), i, rgm)
- if (rgm->mask_type != NULL_TREE
+ if (rgm->type != NULL_TREE
&& !direct_internal_fn_supported_p (IFN_WHILE_ULT,
- cmp_type, rgm->mask_type,
+ cmp_type, rgm->type,
OPTIMIZE_FOR_SPEED))
return false;
return true;
{
unsigned int res = 1;
unsigned int i;
- rgroup_masks *rgm;
+ rgroup_controls *rgm;
FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), i, rgm)
res = MAX (res, rgm->max_nscalars_per_iter);
return res;
LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
= init_cost (LOOP_VINFO_LOOP (loop_vinfo));
/* Reset accumulated rgroup information. */
- release_vec_loop_masks (&LOOP_VINFO_MASKS (loop_vinfo));
+ release_vec_loop_controls (&LOOP_VINFO_MASKS (loop_vinfo));
/* Reset assorted flags. */
LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = false;
/* Calculate how many masks we need to generate. */
unsigned int num_masks = 0;
- rgroup_masks *rgm;
+ rgroup_controls *rgm;
unsigned int num_vectors_m1;
FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), num_vectors_m1, rgm)
- if (rgm->mask_type)
+ if (rgm->type)
num_masks += num_vectors_m1 + 1;
gcc_assert (num_masks > 0);
gcc_assert (nvectors != 0);
if (masks->length () < nvectors)
masks->safe_grow_cleared (nvectors);
- rgroup_masks *rgm = &(*masks)[nvectors - 1];
+ rgroup_controls *rgm = &(*masks)[nvectors - 1];
/* The number of scalars per iteration and the number of vectors are
both compile-time constants. */
unsigned int nscalars_per_iter
if (rgm->max_nscalars_per_iter < nscalars_per_iter)
{
rgm->max_nscalars_per_iter = nscalars_per_iter;
- rgm->mask_type = truth_type_for (vectype);
+ rgm->type = truth_type_for (vectype);
}
}
vect_get_loop_mask (gimple_stmt_iterator *gsi, vec_loop_masks *masks,
unsigned int nvectors, tree vectype, unsigned int index)
{
- rgroup_masks *rgm = &(*masks)[nvectors - 1];
- tree mask_type = rgm->mask_type;
+ rgroup_controls *rgm = &(*masks)[nvectors - 1];
+ tree mask_type = rgm->type;
/* Populate the rgroup's mask array, if this is the first time we've
used it. */
- if (rgm->masks.is_empty ())
+ if (rgm->controls.is_empty ())
{
- rgm->masks.safe_grow_cleared (nvectors);
+ rgm->controls.safe_grow_cleared (nvectors);
for (unsigned int i = 0; i < nvectors; ++i)
{
tree mask = make_temp_ssa_name (mask_type, NULL, "loop_mask");
/* Provide a dummy definition until the real one is available. */
SSA_NAME_DEF_STMT (mask) = gimple_build_nop ();
- rgm->masks[i] = mask;
+ rgm->controls[i] = mask;
}
}
- tree mask = rgm->masks[index];
+ tree mask = rgm->controls[index];
if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
TYPE_VECTOR_SUBPARTS (vectype)))
{
return i->kind == vec_info::bb;
}
-
/* In general, we can divide the vector statements in a vectorized loop
into related groups ("rgroups") and say that for each rgroup there is
some nS such that the rgroup operates on nS values from one scalar
In classical vectorization, each iteration of the vector loop would
handle exactly VF iterations of the original scalar loop. However,
- in a fully-masked loop, a particular iteration of the vector loop
- might handle fewer than VF iterations of the scalar loop. The vector
- lanes that correspond to iterations of the scalar loop are said to be
- "active" and the other lanes are said to be "inactive".
-
- In a fully-masked loop, many rgroups need to be masked to ensure that
- they have no effect for the inactive lanes. Each such rgroup needs a
- sequence of booleans in the same order as above, but with each (i,j)
- replaced by a boolean that indicates whether iteration i is active.
- This sequence occupies nV vector masks that again have nL lanes each.
- Thus the mask sequence as a whole consists of VF independent booleans
- that are each repeated nS times.
-
+ in vector loops that are able to operate on partial vectors, a
+ particular iteration of the vector loop might handle fewer than VF
+ iterations of the scalar loop. The vector lanes that correspond to
+ iterations of the scalar loop are said to be "active" and the other
+ lanes are said to be "inactive".
+
+ In such vector loops, many rgroups need to be controlled to ensure
+ that they have no effect for the inactive lanes. Conceptually, each
+ such rgroup needs a sequence of booleans in the same order as above,
+ but with each (i,j) replaced by a boolean that indicates whether
+ iteration i is active. This sequence occupies nV vector controls
+ that again have nL lanes each. Thus the control sequence as a whole
+ consists of VF independent booleans that are each repeated nS times.
+
+ Taking mask-based approach as a partially-populated vectors example.
We make the simplifying assumption that if a sequence of nV masks is
suitable for one (nS,nL) pair, we can reuse it for (nS/2,nL/2) by
VIEW_CONVERTing it. This holds for all current targets that support
first level being indexed by nV - 1 (since nV == 0 doesn't exist) and
the second being indexed by the mask index 0 <= i < nV. */
-/* The masks needed by rgroups with nV vectors, according to the
- description above. */
-struct rgroup_masks {
- /* The largest nS for all rgroups that use these masks. */
+/* The controls (like masks) needed by rgroups with nV vectors,
+ according to the description above. */
+struct rgroup_controls {
+ /* The largest nS for all rgroups that use these controls. */
unsigned int max_nscalars_per_iter;
- /* The type of mask to use, based on the highest nS recorded above. */
- tree mask_type;
+ /* The type of control to use, based on the highest nS recorded above.
+ For mask-based approach, it's used for mask_type. */
+ tree type;
- /* A vector of nV masks, in iteration order. */
- vec<tree> masks;
+ /* A vector of nV controls, in iteration order. */
+ vec<tree> controls;
};
-typedef auto_vec<rgroup_masks> vec_loop_masks;
+typedef auto_vec<rgroup_controls> vec_loop_masks;
typedef auto_vec<std::pair<data_reference*, tree> > drs_init_vec;