may already be set for general statements (not just data refs). */
static opt_result
-vect_determine_vf_for_stmt_1 (stmt_vec_info stmt_info,
+vect_determine_vf_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info,
bool vectype_maybe_set_p,
poly_uint64 *vf)
{
}
tree stmt_vectype, nunits_vectype;
- opt_result res = vect_get_vector_types_for_stmt (stmt_info, &stmt_vectype,
+ opt_result res = vect_get_vector_types_for_stmt (vinfo, stmt_info,
+ &stmt_vectype,
&nunits_vectype);
if (!res)
return res;
or false if something prevented vectorization. */
static opt_result
-vect_determine_vf_for_stmt (stmt_vec_info stmt_info, poly_uint64 *vf)
+vect_determine_vf_for_stmt (vec_info *vinfo,
+ stmt_vec_info stmt_info, poly_uint64 *vf)
{
- vec_info *vinfo = stmt_info->vinfo;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
stmt_info->stmt);
- opt_result res = vect_determine_vf_for_stmt_1 (stmt_info, false, vf);
+ opt_result res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, false, vf);
if (!res)
return res;
dump_printf_loc (MSG_NOTE, vect_location,
"==> examining pattern def stmt: %G",
def_stmt_info->stmt);
- res = vect_determine_vf_for_stmt_1 (def_stmt_info, true, vf);
+ res = vect_determine_vf_for_stmt_1 (vinfo, def_stmt_info, true, vf);
if (!res)
return res;
}
dump_printf_loc (MSG_NOTE, vect_location,
"==> examining pattern statement: %G",
stmt_info->stmt);
- res = vect_determine_vf_for_stmt_1 (stmt_info, true, vf);
+ res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, true, vf);
if (!res)
return res;
}
for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
gsi_next (&si))
{
+ if (is_gimple_debug (gsi_stmt (si)))
+ continue;
stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
opt_result res
- = vect_determine_vf_for_stmt (stmt_info, &vectorization_factor);
+ = vect_determine_vf_for_stmt (loop_vinfo,
+ stmt_info, &vectorization_factor);
if (!res)
return res;
}
this function would then return true for x_2. */
static bool
-vect_inner_phi_in_double_reduction_p (stmt_vec_info stmt_info, gphi *phi)
+vect_inner_phi_in_double_reduction_p (loop_vec_info loop_vinfo, gphi *phi)
{
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
use_operand_p use_p;
ssa_op_iter op_iter;
FOR_EACH_PHI_ARG (use_p, phi, op_iter, SSA_OP_USE)
}
if (!access_fn
- || vect_inner_phi_in_double_reduction_p (stmt_vinfo, phi)
+ || vect_inner_phi_in_double_reduction_p (loop_vinfo, phi)
|| !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step)
|| (LOOP_VINFO_LOOP (loop_vinfo) != loop
&& TREE_CODE (step) != INTEGER_CST))
unsigned i;
FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)
- if (STMT_VINFO_IN_PATTERN_P (first))
- {
- stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (first);
- while (next)
- {
- if (! STMT_VINFO_IN_PATTERN_P (next)
- || STMT_VINFO_REDUC_IDX (STMT_VINFO_RELATED_STMT (next)) == -1)
- break;
- next = REDUC_GROUP_NEXT_ELEMENT (next);
- }
- /* If not all stmt in the chain are patterns or if we failed
- to update STMT_VINFO_REDUC_IDX try to handle the chain
- without patterns. */
- if (! next
- && STMT_VINFO_REDUC_IDX (STMT_VINFO_RELATED_STMT (first)) != -1)
- {
- vect_fixup_reduc_chain (first);
- LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
- = STMT_VINFO_RELATED_STMT (first);
- }
- }
+ {
+ stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (first);
+ while (next)
+ {
+ if ((STMT_VINFO_IN_PATTERN_P (next)
+ != STMT_VINFO_IN_PATTERN_P (first))
+ || STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (next)) == -1)
+ break;
+ next = REDUC_GROUP_NEXT_ELEMENT (next);
+ }
+ /* If all reduction chain members are well-formed patterns adjust
+ the group to group the pattern stmts instead. */
+ if (! next
+ && STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (first)) != -1)
+ {
+ if (STMT_VINFO_IN_PATTERN_P (first))
+ {
+ vect_fixup_reduc_chain (first);
+ LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
+ = STMT_VINFO_RELATED_STMT (first);
+ }
+ }
+ /* If not all stmt in the chain are patterns or if we failed
+ to update STMT_VINFO_REDUC_IDX dissolve the chain and handle
+ it as regular reduction instead. */
+ else
+ {
+ stmt_vec_info vinfo = first;
+ stmt_vec_info last = NULL;
+ while (vinfo)
+ {
+ next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
+ REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
+ REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
+ last = vinfo;
+ vinfo = next;
+ }
+ STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize (first))
+ = vect_internal_def;
+ loop_vinfo->reductions.safe_push (vect_stmt_to_vectorize (last));
+ LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).unordered_remove (i);
+ --i;
+ }
+ }
}
/* Function vect_get_loop_niters.
vectorization_factor (0),
max_vectorization_factor (0),
mask_skip_niters (NULL_TREE),
- mask_compare_type (NULL_TREE),
+ rgroup_compare_type (NULL_TREE),
simd_if_cond (NULL_TREE),
unaligned_dr (NULL),
peeling_for_alignment (0),
vec_outside_cost (0),
vec_inside_cost (0),
vectorizable (false),
- can_fully_mask_p (true),
- fully_masked_p (false),
+ can_use_partial_vectors_p (param_vect_partial_vector_usage != 0),
+ using_partial_vectors_p (false),
+ epil_using_partial_vectors_p (false),
peeling_for_gaps (false),
peeling_for_niter (false),
no_data_dependencies (false),
{
gimple *stmt = gsi_stmt (si);
gimple_set_uid (stmt, 0);
+ if (is_gimple_debug (stmt))
+ continue;
add_stmt (stmt);
/* If .GOMP_SIMD_LANE call for the current loop has 3 arguments, the
third argument is the #pragma omp simd if (x) condition, when 0,
epilogue_vinfos.create (6);
}
-/* Free all levels of MASKS. */
+/* Free all levels of rgroup CONTROLS. */
void
-release_vec_loop_masks (vec_loop_masks *masks)
+release_vec_loop_controls (vec<rgroup_controls> *controls)
{
- rgroup_masks *rgm;
+ rgroup_controls *rgc;
unsigned int i;
- FOR_EACH_VEC_ELT (*masks, i, rgm)
- rgm->masks.release ();
- masks->release ();
+ FOR_EACH_VEC_ELT (*controls, i, rgc)
+ rgc->controls.release ();
+ controls->release ();
}
/* Free all memory used by the _loop_vec_info, as well as all the
{
free (bbs);
- release_vec_loop_masks (&masks);
+ release_vec_loop_controls (&masks);
+ release_vec_loop_controls (&lens);
delete ivexpr_map;
delete scan_map;
epilogue_vinfos.release ();
static bool
can_produce_all_loop_masks_p (loop_vec_info loop_vinfo, tree cmp_type)
{
- rgroup_masks *rgm;
+ rgroup_controls *rgm;
unsigned int i;
FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), i, rgm)
- if (rgm->mask_type != NULL_TREE
+ if (rgm->type != NULL_TREE
&& !direct_internal_fn_supported_p (IFN_WHILE_ULT,
- cmp_type, rgm->mask_type,
+ cmp_type, rgm->type,
OPTIMIZE_FOR_SPEED))
return false;
return true;
{
unsigned int res = 1;
unsigned int i;
- rgroup_masks *rgm;
+ rgroup_controls *rgm;
FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), i, rgm)
res = MAX (res, rgm->max_nscalars_per_iter);
return res;
}
+/* Calculate the minimum precision necessary to represent:
+
+ MAX_NITERS * FACTOR
+
+ as an unsigned integer, where MAX_NITERS is the maximum number of
+ loop header iterations for the original scalar form of LOOP_VINFO. */
+
+static unsigned
+vect_min_prec_for_max_niters (loop_vec_info loop_vinfo, unsigned int factor)
+{
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+
+ /* Get the maximum number of iterations that is representable
+ in the counter type. */
+ tree ni_type = TREE_TYPE (LOOP_VINFO_NITERSM1 (loop_vinfo));
+ widest_int max_ni = wi::to_widest (TYPE_MAX_VALUE (ni_type)) + 1;
+
+ /* Get a more refined estimate for the number of iterations. */
+ widest_int max_back_edges;
+ if (max_loop_iterations (loop, &max_back_edges))
+ max_ni = wi::smin (max_ni, max_back_edges + 1);
+
+ /* Work out how many bits we need to represent the limit. */
+ return wi::min_precision (max_ni * factor, UNSIGNED);
+}
+
+/* True if the loop needs peeling or partial vectors when vectorized. */
+
+static bool
+vect_need_peeling_or_partial_vectors_p (loop_vec_info loop_vinfo)
+{
+ unsigned HOST_WIDE_INT const_vf;
+ HOST_WIDE_INT max_niter
+ = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
+
+ unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
+ if (!th && LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo))
+ th = LOOP_VINFO_COST_MODEL_THRESHOLD (LOOP_VINFO_ORIG_LOOP_INFO
+ (loop_vinfo));
+
+ if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
+ {
+ /* Work out the (constant) number of iterations that need to be
+ peeled for reasons other than niters. */
+ unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+ peel_niter += 1;
+ if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter,
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
+ return true;
+ }
+ else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
+ /* ??? When peeling for gaps but not alignment, we could
+ try to check whether the (variable) niters is known to be
+ VF * N + 1. That's something of a niche case though. */
+ || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+ || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf)
+ || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
+ < (unsigned) exact_log2 (const_vf))
+ /* In case of versioning, check if the maximum number of
+ iterations is greater than th. If they are identical,
+ the epilogue is unnecessary. */
+ && (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
+ || ((unsigned HOST_WIDE_INT) max_niter
+ > (th / const_vf) * const_vf))))
+ return true;
+
+ return false;
+}
+
/* Each statement in LOOP_VINFO can be masked where necessary. Check
whether we can actually generate the masks required. Return true if so,
- storing the type of the scalar IV in LOOP_VINFO_MASK_COMPARE_TYPE. */
+ storing the type of the scalar IV in LOOP_VINFO_RGROUP_COMPARE_TYPE. */
static bool
vect_verify_full_masking (loop_vec_info loop_vinfo)
{
- class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
unsigned int min_ni_width;
unsigned int max_nscalars_per_iter
= vect_get_max_nscalars_per_iter (loop_vinfo);
if (LOOP_VINFO_MASKS (loop_vinfo).is_empty ())
return false;
- /* Get the maximum number of iterations that is representable
- in the counter type. */
- tree ni_type = TREE_TYPE (LOOP_VINFO_NITERSM1 (loop_vinfo));
- widest_int max_ni = wi::to_widest (TYPE_MAX_VALUE (ni_type)) + 1;
-
- /* Get a more refined estimate for the number of iterations. */
- widest_int max_back_edges;
- if (max_loop_iterations (loop, &max_back_edges))
- max_ni = wi::smin (max_ni, max_back_edges + 1);
-
- /* Account for rgroup masks, in which each bit is replicated N times. */
- max_ni *= max_nscalars_per_iter;
-
/* Work out how many bits we need to represent the limit. */
- min_ni_width = wi::min_precision (max_ni, UNSIGNED);
+ min_ni_width
+ = vect_min_prec_for_max_niters (loop_vinfo, max_nscalars_per_iter);
/* Find a scalar mode for which WHILE_ULT is supported. */
opt_scalar_int_mode cmp_mode_iter;
tree cmp_type = NULL_TREE;
tree iv_type = NULL_TREE;
- widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo);
+ widest_int iv_limit = vect_iv_limit_for_partial_vectors (loop_vinfo);
unsigned int iv_precision = UINT_MAX;
if (iv_limit != -1)
if (!cmp_type)
return false;
- LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo) = cmp_type;
- LOOP_VINFO_MASK_IV_TYPE (loop_vinfo) = iv_type;
+ LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo) = cmp_type;
+ LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo) = iv_type;
+ return true;
+}
+
+/* Check whether we can use vector access with length based on precison
+ comparison. So far, to keep it simple, we only allow the case that the
+ precision of the target supported length is larger than the precision
+ required by loop niters. */
+
+static bool
+vect_verify_loop_lens (loop_vec_info loop_vinfo)
+{
+ if (LOOP_VINFO_LENS (loop_vinfo).is_empty ())
+ return false;
+
+ unsigned int max_nitems_per_iter = 1;
+ unsigned int i;
+ rgroup_controls *rgl;
+ /* Find the maximum number of items per iteration for every rgroup. */
+ FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), i, rgl)
+ {
+ unsigned nitems_per_iter = rgl->max_nscalars_per_iter * rgl->factor;
+ max_nitems_per_iter = MAX (max_nitems_per_iter, nitems_per_iter);
+ }
+
+ /* Work out how many bits we need to represent the length limit. */
+ unsigned int min_ni_prec
+ = vect_min_prec_for_max_niters (loop_vinfo, max_nitems_per_iter);
+
+ /* Now use the maximum of below precisions for one suitable IV type:
+ - the IV's natural precision
+ - the precision needed to hold: the maximum number of scalar
+ iterations multiplied by the scale factor (min_ni_prec above)
+ - the Pmode precision
+
+ If min_ni_prec is less than the precision of the current niters,
+ we perfer to still use the niters type. Prefer to use Pmode and
+ wider IV to avoid narrow conversions. */
+
+ unsigned int ni_prec
+ = TYPE_PRECISION (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)));
+ min_ni_prec = MAX (min_ni_prec, ni_prec);
+ min_ni_prec = MAX (min_ni_prec, GET_MODE_BITSIZE (Pmode));
+
+ tree iv_type = NULL_TREE;
+ opt_scalar_int_mode tmode_iter;
+ FOR_EACH_MODE_IN_CLASS (tmode_iter, MODE_INT)
+ {
+ scalar_mode tmode = tmode_iter.require ();
+ unsigned int tbits = GET_MODE_BITSIZE (tmode);
+
+ /* ??? Do we really want to construct one IV whose precision exceeds
+ BITS_PER_WORD? */
+ if (tbits > BITS_PER_WORD)
+ break;
+
+ /* Find the first available standard integral type. */
+ if (tbits >= min_ni_prec && targetm.scalar_mode_supported_p (tmode))
+ {
+ iv_type = build_nonstandard_integer_type (tbits, true);
+ break;
+ }
+ }
+
+ if (!iv_type)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't vectorize with length-based partial vectors"
+ " because there is no suitable iv type.\n");
+ return false;
+ }
+
+ LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo) = iv_type;
+ LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo) = iv_type;
+
return true;
}
int j;
FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
j, si)
- (void) add_stmt_cost (target_cost_data, si->count,
- si->kind, si->stmt_info, si->misalign,
- vect_body);
+ (void) add_stmt_cost (loop_vinfo, target_cost_data, si->count,
+ si->kind, si->stmt_info, si->vectype,
+ si->misalign, vect_body);
unsigned dummy, body_cost = 0;
finish_cost (target_cost_data, &dummy, &body_cost, &dummy);
destroy_cost_data (target_cost_data);
for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
gsi_next (&si))
{
+ if (is_gimple_debug (gsi_stmt (si)))
+ continue;
stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
stmt_info = vect_stmt_to_vectorize (stmt_info);
if ((STMT_VINFO_RELEVANT_P (stmt_info)
if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def
|| (STMT_VINFO_DEF_TYPE (stmt_info)
== vect_double_reduction_def))
- && !vectorizable_lc_phi (stmt_info, NULL, NULL))
+ && !vectorizable_lc_phi (loop_vinfo,
+ stmt_info, NULL, NULL))
return opt_result::failure_at (phi, "unsupported phi\n");
}
need_to_vectorize = true;
if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
&& ! PURE_SLP_STMT (stmt_info))
- ok = vectorizable_induction (stmt_info, NULL, NULL, NULL,
+ ok = vectorizable_induction (loop_vinfo,
+ stmt_info, NULL, NULL,
&cost_vec);
else if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
|| (STMT_VINFO_DEF_TYPE (stmt_info)
== vect_double_reduction_def)
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
&& ! PURE_SLP_STMT (stmt_info))
- ok = vectorizable_reduction (stmt_info, NULL, NULL, &cost_vec);
+ ok = vectorizable_reduction (loop_vinfo,
+ stmt_info, NULL, NULL, &cost_vec);
}
/* SLP PHIs are tested by vect_slp_analyze_node_operations. */
if (ok
&& STMT_VINFO_LIVE_P (stmt_info)
&& !PURE_SLP_STMT (stmt_info))
- ok = vectorizable_live_operation (stmt_info, NULL, NULL, NULL,
+ ok = vectorizable_live_operation (loop_vinfo,
+ stmt_info, NULL, NULL, NULL,
-1, false, &cost_vec);
if (!ok)
gsi_next (&si))
{
gimple *stmt = gsi_stmt (si);
- if (!gimple_clobber_p (stmt))
+ if (!gimple_clobber_p (stmt)
+ && !is_gimple_debug (stmt))
{
opt_result res
- = vect_analyze_stmt (loop_vinfo->lookup_stmt (stmt),
+ = vect_analyze_stmt (loop_vinfo,
+ loop_vinfo->lookup_stmt (stmt),
&need_to_vectorize,
NULL, NULL, &cost_vec);
if (!res)
}
} /* bbs */
- add_stmt_costs (loop_vinfo->target_cost_data, &cost_vec);
+ add_stmt_costs (loop_vinfo, loop_vinfo->target_cost_data, &cost_vec);
/* All operations in the loop are either irrelevant (deal with loop
control, or dead), or only used outside the loop and can be moved
return opt_result::success ();
}
+/* Return true if we know that the iteration count is smaller than the
+ vectorization factor. Return false if it isn't, or if we can't be sure
+ either way. */
+
+static bool
+vect_known_niters_smaller_than_vf (loop_vec_info loop_vinfo)
+{
+ unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
+
+ HOST_WIDE_INT max_niter;
+ if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+ max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo);
+ else
+ max_niter = max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
+
+ if (max_niter != -1 && (unsigned HOST_WIDE_INT) max_niter < assumed_vf)
+ return true;
+
+ return false;
+}
+
/* Analyze the cost of the loop described by LOOP_VINFO. Decide if it
is worthwhile to vectorize. Return 1 if definitely yes, 0 if
definitely no, or -1 if it's worth retrying. */
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
- /* Only fully-masked loops can have iteration counts less than the
- vectorization factor. */
- if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ /* Only loops that can handle partially-populated vectors can have iteration
+ counts less than the vectorization factor. */
+ if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
{
- HOST_WIDE_INT max_niter;
-
- if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
- max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo);
- else
- max_niter = max_stmt_executions_int (loop);
-
- if (max_niter != -1
- && (unsigned HOST_WIDE_INT) max_niter < assumed_vf)
+ if (vect_known_niters_smaller_than_vf (loop_vinfo))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
}
}
+ /* If using the "very cheap" model. reject cases in which we'd keep
+ a copy of the scalar code (even if we might be able to vectorize it). */
+ if (flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP
+ && (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
+ || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+ || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "some scalar iterations would need to be peeled\n");
+ return 0;
+ }
+
int min_profitable_iters, min_profitable_estimate;
vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
&min_profitable_estimate);
min_profitable_estimate = min_profitable_iters;
}
+ /* If the vector loop needs multiple iterations to be beneficial then
+ things are probably too close to call, and the conservative thing
+ would be to stick with the scalar code. */
+ if (flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP
+ && min_profitable_estimate > (int) vect_vf_for_cost (loop_vinfo))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "one iteration of the vector loop would be"
+ " more expensive than the equivalent number of"
+ " iterations of the scalar loop\n");
+ return 0;
+ }
+
HOST_WIDE_INT estimated_niter;
/* If we are vectorizing an epilogue then we know the maximum number of
if (is_gimple_debug (stmt))
continue;
++(*n_stmts);
- opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs);
+ opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs,
+ NULL, 0);
if (!res)
{
if (is_gimple_call (stmt) && loop->safelen)
DUMP_VECT_SCOPE ("vect_dissolve_slp_only_groups");
- vec<data_reference_p> datarefs = loop_vinfo->shared->datarefs;
+ vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
FOR_EACH_VEC_ELT (datarefs, i, dr)
{
gcc_assert (DR_REF (dr));
}
}
+/* Determine if operating on full vectors for LOOP_VINFO might leave
+ some scalar iterations still to do. If so, decide how we should
+ handle those scalar iterations. The possibilities are:
-/* Decides whether we need to create an epilogue loop to handle
- remaining scalar iterations and sets PEELING_FOR_NITERS accordingly. */
+ (1) Make LOOP_VINFO operate on partial vectors instead of full vectors.
+ In this case:
-void
-determine_peel_for_niter (loop_vec_info loop_vinfo)
+ LOOP_VINFO_USING_PARTIAL_VECTORS_P == true
+ LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == false
+ LOOP_VINFO_PEELING_FOR_NITER == false
+
+ (2) Make LOOP_VINFO operate on full vectors and use an epilogue loop
+ to handle the remaining scalar iterations. In this case:
+
+ LOOP_VINFO_USING_PARTIAL_VECTORS_P == false
+ LOOP_VINFO_PEELING_FOR_NITER == true
+
+ There are two choices:
+
+ (2a) Consider vectorizing the epilogue loop at the same VF as the
+ main loop, but using partial vectors instead of full vectors.
+ In this case:
+
+ LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == true
+
+ (2b) Consider vectorizing the epilogue loop at lower VFs only.
+ In this case:
+
+ LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == false
+
+ When FOR_EPILOGUE_P is true, make this determination based on the
+ assumption that LOOP_VINFO is an epilogue loop, otherwise make it
+ based on the assumption that LOOP_VINFO is the main loop. The caller
+ has made sure that the number of iterations is set appropriately for
+ this value of FOR_EPILOGUE_P. */
+
+opt_result
+vect_determine_partial_vectors_and_peeling (loop_vec_info loop_vinfo,
+ bool for_epilogue_p)
{
- LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
+ /* Determine whether there would be any scalar iterations left over. */
+ bool need_peeling_or_partial_vectors_p
+ = vect_need_peeling_or_partial_vectors_p (loop_vinfo);
+
+ /* Decide whether to vectorize the loop with partial vectors. */
+ LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = false;
+ LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo) = false;
+ if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
+ && need_peeling_or_partial_vectors_p)
+ {
+ /* For partial-vector-usage=1, try to push the handling of partial
+ vectors to the epilogue, with the main loop continuing to operate
+ on full vectors.
+
+ ??? We could then end up failing to use partial vectors if we
+ decide to peel iterations into a prologue, and if the main loop
+ then ends up processing fewer than VF iterations. */
+ if (param_vect_partial_vector_usage == 1
+ && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
+ && !vect_known_niters_smaller_than_vf (loop_vinfo))
+ LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo) = true;
+ else
+ LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = true;
+ }
- unsigned HOST_WIDE_INT const_vf;
- HOST_WIDE_INT max_niter
- = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
+ if (dump_enabled_p ())
+ {
+ if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "operating on partial vectors%s.\n",
+ for_epilogue_p ? " for epilogue loop" : "");
+ else
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "operating only on full vectors%s.\n",
+ for_epilogue_p ? " for epilogue loop" : "");
+ }
- unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
- if (!th && LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo))
- th = LOOP_VINFO_COST_MODEL_THRESHOLD (LOOP_VINFO_ORIG_LOOP_INFO
- (loop_vinfo));
+ if (for_epilogue_p)
+ {
+ loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
+ gcc_assert (orig_loop_vinfo);
+ if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
+ gcc_assert (known_lt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
+ LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo)));
+ }
- if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
- /* The main loop handles all iterations. */
- LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
- else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
- && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
+ if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
{
- /* Work out the (constant) number of iterations that need to be
- peeled for reasons other than niters. */
- unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
- if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
- peel_niter += 1;
- if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter,
- LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
- LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
+ /* Check that the loop processes at least one full vector. */
+ poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ tree scalar_niters = LOOP_VINFO_NITERS (loop_vinfo);
+ if (known_lt (wi::to_widest (scalar_niters), vf))
+ return opt_result::failure_at (vect_location,
+ "loop does not have enough iterations"
+ " to support vectorization.\n");
+
+ /* If we need to peel an extra epilogue iteration to handle data
+ accesses with gaps, check that there are enough scalar iterations
+ available.
+
+ The check above is redundant with this one when peeling for gaps,
+ but the distinction is useful for diagnostics. */
+ tree scalar_nitersm1 = LOOP_VINFO_NITERSM1 (loop_vinfo);
+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+ && known_lt (wi::to_widest (scalar_nitersm1), vf))
+ return opt_result::failure_at (vect_location,
+ "loop does not have enough iterations"
+ " to support peeling for gaps.\n");
}
- else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
- /* ??? When peeling for gaps but not alignment, we could
- try to check whether the (variable) niters is known to be
- VF * N + 1. That's something of a niche case though. */
- || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
- || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf)
- || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
- < (unsigned) exact_log2 (const_vf))
- /* In case of versioning, check if the maximum number of
- iterations is greater than th. If they are identical,
- the epilogue is unnecessary. */
- && (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
- || ((unsigned HOST_WIDE_INT) max_niter
- > (th / const_vf) * const_vf))))
- LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
-}
+ LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
+ = (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
+ && need_peeling_or_partial_vectors_p);
+
+ return opt_result::success ();
+}
/* Function vect_analyze_loop_2.
/* Analyze the access patterns of the data-refs in the loop (consecutive,
complex, etc.). FORNOW: Only handle consecutive access pattern. */
- ok = vect_analyze_data_ref_accesses (loop_vinfo);
+ ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
if (!ok)
{
if (dump_enabled_p ())
/* Update the vectorization factor based on the SLP decision. */
vect_update_vf_for_slp (loop_vinfo);
+
+ /* Optimize the SLP graph with the vectorization factor fixed. */
+ vect_optimize_slp (loop_vinfo);
+
+ /* Gather the loads reachable from the SLP graph entries. */
+ vect_gather_slp_loads (loop_vinfo);
}
- bool saved_can_fully_mask_p = LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo);
+ bool saved_can_use_partial_vectors_p
+ = LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo);
/* We don't expect to have to roll back to anything other than an empty
set of rgroups. */
/* This pass will decide on using loop versioning and/or loop peeling in
order to enhance the alignment of data references in the loop. */
ok = vect_enhance_data_refs_alignment (loop_vinfo);
- else
- ok = vect_verify_datarefs_alignment (loop_vinfo);
if (!ok)
return ok;
"unsupported SLP instances\n");
goto again;
}
+
+ /* Check whether any load in ALL SLP instances is possibly permuted. */
+ slp_tree load_node, slp_root;
+ unsigned i, x;
+ slp_instance instance;
+ bool can_use_lanes = true;
+ FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), x, instance)
+ {
+ slp_root = SLP_INSTANCE_TREE (instance);
+ int group_size = SLP_TREE_LANES (slp_root);
+ tree vectype = SLP_TREE_VECTYPE (slp_root);
+ bool loads_permuted = false;
+ FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
+ {
+ if (!SLP_TREE_LOAD_PERMUTATION (load_node).exists ())
+ continue;
+ unsigned j;
+ stmt_vec_info load_info;
+ FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
+ if (SLP_TREE_LOAD_PERMUTATION (load_node)[j] != j)
+ {
+ loads_permuted = true;
+ break;
+ }
+ }
+
+ /* If the loads and stores can be handled with load/store-lane
+ instructions record it and move on to the next instance. */
+ if (loads_permuted
+ && SLP_INSTANCE_KIND (instance) == slp_inst_kind_store
+ && vect_store_lanes_supported (vectype, group_size, false))
+ {
+ FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
+ {
+ stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
+ (SLP_TREE_SCALAR_STMTS (load_node)[0]);
+ /* Use SLP for strided accesses (or if we can't
+ load-lanes). */
+ if (STMT_VINFO_STRIDED_P (stmt_vinfo)
+ || ! vect_load_lanes_supported
+ (STMT_VINFO_VECTYPE (stmt_vinfo),
+ DR_GROUP_SIZE (stmt_vinfo), false))
+ break;
+ }
+
+ can_use_lanes
+ = can_use_lanes && i == SLP_INSTANCE_LOADS (instance).length ();
+
+ if (can_use_lanes && dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "SLP instance %p can use load/store-lanes\n",
+ instance);
+ }
+ else
+ {
+ can_use_lanes = false;
+ break;
+ }
+ }
+
+ /* If all SLP instances can use load/store-lanes abort SLP and try again
+ with SLP disabled. */
+ if (can_use_lanes)
+ {
+ ok = opt_result::failure_at (vect_location,
+ "Built SLP cancelled: can use "
+ "load/store-lanes\n");
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Built SLP cancelled: all SLP instances support "
+ "load/store-lanes\n");
+ goto again;
+ }
}
/* Dissolve SLP-only groups. */
return ok;
}
- /* Decide whether to use a fully-masked loop for this vectorization
- factor. */
- LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
- = (LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
- && vect_verify_full_masking (loop_vinfo));
- if (dump_enabled_p ())
- {
- if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
- dump_printf_loc (MSG_NOTE, vect_location,
- "using a fully-masked loop.\n");
- else
- dump_printf_loc (MSG_NOTE, vect_location,
- "not using a fully-masked loop.\n");
- }
-
- /* If epilog loop is required because of data accesses with gaps,
- one additional iteration needs to be peeled. Check if there is
- enough iterations for vectorization. */
- if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
- && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
- && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ /* For now, we don't expect to mix both masking and length approaches for one
+ loop, disable it if both are recorded. */
+ if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
+ && !LOOP_VINFO_MASKS (loop_vinfo).is_empty ()
+ && !LOOP_VINFO_LENS (loop_vinfo).is_empty ())
{
- poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- tree scalar_niters = LOOP_VINFO_NITERSM1 (loop_vinfo);
-
- if (known_lt (wi::to_widest (scalar_niters), vf))
- return opt_result::failure_at (vect_location,
- "loop has no enough iterations to"
- " support peeling for gaps.\n");
- }
-
- /* If we're vectorizing an epilogue loop, we either need a fully-masked
- loop or a loop that has a lower VF than the main loop. */
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't vectorize a loop with partial vectors"
+ " because we don't expect to mix different"
+ " approaches with partial vectors for the"
+ " same loop.\n");
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
+ }
+
+ /* If we still have the option of using partial vectors,
+ check whether we can generate the necessary loop controls. */
+ if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
+ && !vect_verify_full_masking (loop_vinfo)
+ && !vect_verify_loop_lens (loop_vinfo))
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
+
+ /* If we're vectorizing an epilogue loop, the vectorized loop either needs
+ to be able to handle fewer than VF scalars, or needs to have a lower VF
+ than the main loop. */
if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)
- && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+ && !LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
&& maybe_ge (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo)))
return opt_result::failure_at (vect_location,
"Vectorization factor too high for"
" epilogue loop.\n");
+ /* Decide whether this loop_vinfo should use partial vectors or peeling,
+ assuming that the loop will be used as a main loop. We will redo
+ this analysis later if we instead decide to use the loop as an
+ epilogue loop. */
+ ok = vect_determine_partial_vectors_and_peeling (loop_vinfo, false);
+ if (!ok)
+ return ok;
+
/* Check the costings of the loop make vectorizing worthwhile. */
res = vect_analyze_loop_costing (loop_vinfo);
if (res < 0)
return opt_result::failure_at (vect_location,
"Loop costings not worthwhile.\n");
- determine_peel_for_niter (loop_vinfo);
/* If an epilogue loop is required make sure we can create one. */
if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
|| LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
}
/* Niters for at least one iteration of vectorized loop. */
- if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
niters_th += LOOP_VINFO_VECT_FACTOR (loop_vinfo);
/* One additional iteration because of peeling for gap. */
if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
LOOP_VINFO_VECT_FACTOR (loop_vinfo) = saved_vectorization_factor;
/* Free the SLP instances. */
FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance)
- vect_free_slp_instance (instance, false);
+ vect_free_slp_instance (instance);
LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
/* Reset SLP type to loop_vect on all stmts. */
for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
for (gimple_stmt_iterator si = gsi_start_bb (bb);
!gsi_end_p (si); gsi_next (&si))
{
+ if (is_gimple_debug (gsi_stmt (si)))
+ continue;
stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
STMT_SLP_TYPE (stmt_info) = loop_vect;
if (STMT_VINFO_IN_PATTERN_P (stmt_info))
LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
= init_cost (LOOP_VINFO_LOOP (loop_vinfo));
/* Reset accumulated rgroup information. */
- release_vec_loop_masks (&LOOP_VINFO_MASKS (loop_vinfo));
+ release_vec_loop_controls (&LOOP_VINFO_MASKS (loop_vinfo));
+ release_vec_loop_controls (&LOOP_VINFO_LENS (loop_vinfo));
/* Reset assorted flags. */
LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = false;
LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = 0;
LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = 0;
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = saved_can_fully_mask_p;
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
+ = saved_can_use_partial_vectors_p;
goto start_over;
}
poly_widest_int rel_old = (old_loop_vinfo->vec_inside_cost
* poly_widest_int (new_vf));
if (maybe_lt (rel_old, rel_new))
- return false;
+ {
+ /* When old_loop_vinfo uses a variable vectorization factor,
+ we know that it has a lower cost for at least one runtime VF.
+ However, we don't know how likely that VF is.
+
+ One option would be to compare the costs for the estimated VFs.
+ The problem is that that can put too much pressure on the cost
+ model. E.g. if the estimated VF is also the lowest possible VF,
+ and if old_loop_vinfo is 1 unit worse than new_loop_vinfo
+ for the estimated VF, we'd then choose new_loop_vinfo even
+ though (a) new_loop_vinfo might not actually be better than
+ old_loop_vinfo for that VF and (b) it would be significantly
+ worse at larger VFs.
+
+ Here we go for a hacky compromise: pick new_loop_vinfo if it is
+ no more expensive than old_loop_vinfo even after doubling the
+ estimated old_loop_vinfo VF. For all but trivial loops, this
+ ensures that we only pick new_loop_vinfo if it is significantly
+ better than old_loop_vinfo at the estimated VF. */
+ if (rel_new.is_constant ())
+ return false;
+
+ HOST_WIDE_INT new_estimated_vf = estimated_poly_value (new_vf);
+ HOST_WIDE_INT old_estimated_vf = estimated_poly_value (old_vf);
+ widest_int estimated_rel_new = (new_loop_vinfo->vec_inside_cost
+ * widest_int (old_estimated_vf));
+ widest_int estimated_rel_old = (old_loop_vinfo->vec_inside_cost
+ * widest_int (new_estimated_vf));
+ return estimated_rel_new * 2 <= estimated_rel_old;
+ }
if (known_lt (rel_new, rel_old))
return true;
lowest_th = ordered_min (lowest_th, th);
}
else
- delete loop_vinfo;
+ {
+ delete loop_vinfo;
+ loop_vinfo = opt_loop_vec_info::success (NULL);
+ }
/* Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is
enabled, SIMDUID is not set, it is the innermost loop and we have
else
{
delete loop_vinfo;
+ loop_vinfo = opt_loop_vec_info::success (NULL);
if (fatal)
{
gcc_checking_assert (first_loop_vinfo == NULL);
}
}
+ /* Handle the case that the original loop can use partial
+ vectorization, but want to only adopt it for the epilogue.
+ The retry should be in the same mode as original. */
+ if (vect_epilogues
+ && loop_vinfo
+ && LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo))
+ {
+ gcc_assert (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
+ && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "***** Re-trying analysis with same vector mode"
+ " %s for epilogue with partial vectors.\n",
+ GET_MODE_NAME (loop_vinfo->vector_mode));
+ continue;
+ }
+
if (mode_i < vector_modes.length ()
&& VECTOR_MODE_P (autodetected_vector_mode)
&& (related_vector_mode (vector_modes[mode_i],
fail = true;
break;
}
- /* Check there's only a single stmt the op is used on inside
- of the loop. */
+ /* Check there's only a single stmt the op is used on. For the
+ not value-changing tail and the last stmt allow out-of-loop uses.
+ ??? We could relax this and handle arbitrary live stmts by
+ forcing a scalar epilogue for example. */
imm_use_iterator imm_iter;
gimple *op_use_stmt;
unsigned cnt = 0;
FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op)
if (!is_gimple_debug (op_use_stmt)
- && flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt)))
+ && (*code != ERROR_MARK
+ || flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt))))
{
/* We want to allow x + x but not x < 1 ? x : 2. */
if (is_gimple_assign (op_use_stmt)
return NULL;
}
-/* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */
-int
-vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
- int *peel_iters_epilogue,
- stmt_vector_for_cost *scalar_cost_vec,
- stmt_vector_for_cost *prologue_cost_vec,
- stmt_vector_for_cost *epilogue_cost_vec)
+/* Estimate the number of peeled epilogue iterations for LOOP_VINFO.
+ PEEL_ITERS_PROLOGUE is the number of peeled prologue iterations,
+ or -1 if not known. */
+
+static int
+vect_get_peel_iters_epilogue (loop_vec_info loop_vinfo, int peel_iters_prologue)
{
- int retval = 0;
int assumed_vf = vect_vf_for_cost (loop_vinfo);
-
- if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+ if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) || peel_iters_prologue == -1)
{
- *peel_iters_epilogue = assumed_vf / 2;
if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
+ dump_printf_loc (MSG_NOTE, vect_location,
"cost model: epilogue peel iters set to vf/2 "
"because loop iterations are unknown .\n");
-
- /* If peeled iterations are known but number of scalar loop
- iterations are unknown, count a taken branch per peeled loop. */
- retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
- NULL, 0, vect_prologue);
- retval += record_stmt_cost (epilogue_cost_vec, 1, cond_branch_taken,
- NULL, 0, vect_epilogue);
+ return assumed_vf / 2;
}
else
{
int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
- peel_iters_prologue = niters < peel_iters_prologue ?
- niters : peel_iters_prologue;
- *peel_iters_epilogue = (niters - peel_iters_prologue) % assumed_vf;
+ peel_iters_prologue = MIN (niters, peel_iters_prologue);
+ int peel_iters_epilogue = (niters - peel_iters_prologue) % assumed_vf;
/* If we need to peel for gaps, but no peeling is required, we have to
peel VF iterations. */
- if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
- *peel_iters_epilogue = assumed_vf;
+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !peel_iters_epilogue)
+ peel_iters_epilogue = assumed_vf;
+ return peel_iters_epilogue;
+ }
+}
+
+/* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */
+int
+vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
+ int *peel_iters_epilogue,
+ stmt_vector_for_cost *scalar_cost_vec,
+ stmt_vector_for_cost *prologue_cost_vec,
+ stmt_vector_for_cost *epilogue_cost_vec)
+{
+ int retval = 0;
+
+ *peel_iters_epilogue
+ = vect_get_peel_iters_epilogue (loop_vinfo, peel_iters_prologue);
+
+ if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+ {
+ /* If peeled iterations are known but number of scalar loop
+ iterations are unknown, count a taken branch per peeled loop. */
+ if (peel_iters_prologue > 0)
+ retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
+ NULL, NULL_TREE, 0, vect_prologue);
+ if (*peel_iters_epilogue > 0)
+ retval += record_stmt_cost (epilogue_cost_vec, 1, cond_branch_taken,
+ NULL, NULL_TREE, 0, vect_epilogue);
}
stmt_info_for_cost *si;
{
/* FIXME: Make cost depend on complexity of individual check. */
unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
- (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
- vect_prologue);
+ (void) add_stmt_cost (loop_vinfo, target_cost_data, len, vector_stmt,
+ NULL, NULL_TREE, 0, vect_prologue);
if (dump_enabled_p ())
dump_printf (MSG_NOTE,
"cost model: Adding cost of checks for loop "
{
/* FIXME: Make cost depend on complexity of individual check. */
unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length ();
- (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
- vect_prologue);
+ (void) add_stmt_cost (loop_vinfo, target_cost_data, len, vector_stmt,
+ NULL, NULL_TREE, 0, vect_prologue);
len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).length ();
if (len)
/* Count LEN - 1 ANDs and LEN comparisons. */
- (void) add_stmt_cost (target_cost_data, len * 2 - 1, scalar_stmt,
- NULL, 0, vect_prologue);
+ (void) add_stmt_cost (loop_vinfo, target_cost_data, len * 2 - 1,
+ scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
len = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).length ();
if (len)
{
for (unsigned int i = 0; i < len; ++i)
if (!LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)[i].unsigned_p)
nstmts += 1;
- (void) add_stmt_cost (target_cost_data, nstmts, scalar_stmt,
- NULL, 0, vect_prologue);
+ (void) add_stmt_cost (loop_vinfo, target_cost_data, nstmts,
+ scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
}
if (dump_enabled_p ())
dump_printf (MSG_NOTE,
if (LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo))
{
/* FIXME: Make cost depend on complexity of individual check. */
- (void) add_stmt_cost (target_cost_data, 1, vector_stmt, NULL, 0,
- vect_prologue);
+ (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, vector_stmt,
+ NULL, NULL_TREE, 0, vect_prologue);
if (dump_enabled_p ())
dump_printf (MSG_NOTE,
"cost model: Adding cost of checks for loop "
}
if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
- (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, NULL, 0,
- vect_prologue);
+ (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken,
+ NULL, NULL_TREE, 0, vect_prologue);
/* Count statements in scalar loop. Using this as scalar cost for a single
iteration for now.
TODO: Build an expression that represents peel_iters for prologue and
epilogue to be used in a run-time test. */
- if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ bool prologue_need_br_taken_cost = false;
+ bool prologue_need_br_not_taken_cost = false;
+
+ /* Calculate peel_iters_prologue. */
+ if (vect_use_loop_mask_for_alignment_p (loop_vinfo))
+ peel_iters_prologue = 0;
+ else if (npeel < 0)
{
- peel_iters_prologue = 0;
- peel_iters_epilogue = 0;
+ peel_iters_prologue = assumed_vf / 2;
+ if (dump_enabled_p ())
+ dump_printf (MSG_NOTE, "cost model: "
+ "prologue peel iters set to vf/2.\n");
- if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
- {
- /* We need to peel exactly one iteration. */
- peel_iters_epilogue += 1;
- stmt_info_for_cost *si;
- int j;
- FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
- j, si)
- (void) add_stmt_cost (target_cost_data, si->count,
- si->kind, si->stmt_info, si->misalign,
- vect_epilogue);
- }
+ /* If peeled iterations are unknown, count a taken branch and a not taken
+ branch per peeled loop. Even if scalar loop iterations are known,
+ vector iterations are not known since peeled prologue iterations are
+ not known. Hence guards remain the same. */
+ prologue_need_br_taken_cost = true;
+ prologue_need_br_not_taken_cost = true;
+ }
+ else
+ {
+ peel_iters_prologue = npeel;
+ if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && peel_iters_prologue > 0)
+ /* If peeled iterations are known but number of scalar loop
+ iterations are unknown, count a taken branch per peeled loop. */
+ prologue_need_br_taken_cost = true;
+ }
+
+ bool epilogue_need_br_taken_cost = false;
+ bool epilogue_need_br_not_taken_cost = false;
+
+ /* Calculate peel_iters_epilogue. */
+ if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
+ /* We need to peel exactly one iteration for gaps. */
+ peel_iters_epilogue = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? 1 : 0;
+ else if (npeel < 0)
+ {
+ /* If peeling for alignment is unknown, loop bound of main loop
+ becomes unknown. */
+ peel_iters_epilogue = assumed_vf / 2;
+ if (dump_enabled_p ())
+ dump_printf (MSG_NOTE, "cost model: "
+ "epilogue peel iters set to vf/2 because "
+ "peeling for alignment is unknown.\n");
+
+ /* See the same reason above in peel_iters_prologue calculation. */
+ epilogue_need_br_taken_cost = true;
+ epilogue_need_br_not_taken_cost = true;
+ }
+ else
+ {
+ peel_iters_epilogue = vect_get_peel_iters_epilogue (loop_vinfo, npeel);
+ if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && peel_iters_epilogue > 0)
+ /* If peeled iterations are known but number of scalar loop
+ iterations are unknown, count a taken branch per peeled loop. */
+ epilogue_need_br_taken_cost = true;
+ }
+
+ stmt_info_for_cost *si;
+ int j;
+ /* Add costs associated with peel_iters_prologue. */
+ if (peel_iters_prologue)
+ FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
+ {
+ (void) add_stmt_cost (loop_vinfo, target_cost_data,
+ si->count * peel_iters_prologue, si->kind,
+ si->stmt_info, si->vectype, si->misalign,
+ vect_prologue);
+ }
+
+ /* Add costs associated with peel_iters_epilogue. */
+ if (peel_iters_epilogue)
+ FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
+ {
+ (void) add_stmt_cost (loop_vinfo, target_cost_data,
+ si->count * peel_iters_epilogue, si->kind,
+ si->stmt_info, si->vectype, si->misalign,
+ vect_epilogue);
+ }
+
+ /* Add possible cond_branch_taken/cond_branch_not_taken cost. */
+ if (prologue_need_br_taken_cost)
+ (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken,
+ NULL, NULL_TREE, 0, vect_prologue);
+
+ if (prologue_need_br_not_taken_cost)
+ (void) add_stmt_cost (loop_vinfo, target_cost_data, 1,
+ cond_branch_not_taken, NULL, NULL_TREE, 0,
+ vect_prologue);
+
+ if (epilogue_need_br_taken_cost)
+ (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken,
+ NULL, NULL_TREE, 0, vect_epilogue);
+
+ if (epilogue_need_br_not_taken_cost)
+ (void) add_stmt_cost (loop_vinfo, target_cost_data, 1,
+ cond_branch_not_taken, NULL, NULL_TREE, 0,
+ vect_epilogue);
+
+ /* Take care of special costs for rgroup controls of partial vectors. */
+ if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ {
/* Calculate how many masks we need to generate. */
unsigned int num_masks = 0;
- rgroup_masks *rgm;
+ rgroup_controls *rgm;
unsigned int num_vectors_m1;
FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), num_vectors_m1, rgm)
- if (rgm->mask_type)
+ if (rgm->type)
num_masks += num_vectors_m1 + 1;
gcc_assert (num_masks > 0);
simpler and safer to use the worst-case cost; if this ends up
being the tie-breaker between vectorizing or not, then it's
probably better not to vectorize. */
- (void) add_stmt_cost (target_cost_data, num_masks, vector_stmt,
- NULL, 0, vect_prologue);
- (void) add_stmt_cost (target_cost_data, num_masks - 1, vector_stmt,
- NULL, 0, vect_body);
+ (void) add_stmt_cost (loop_vinfo, target_cost_data, num_masks,
+ vector_stmt, NULL, NULL_TREE, 0, vect_prologue);
+ (void) add_stmt_cost (loop_vinfo, target_cost_data, num_masks - 1,
+ vector_stmt, NULL, NULL_TREE, 0, vect_body);
}
- else if (npeel < 0)
+ else if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
{
- peel_iters_prologue = assumed_vf / 2;
- if (dump_enabled_p ())
- dump_printf (MSG_NOTE, "cost model: "
- "prologue peel iters set to vf/2.\n");
+ /* Referring to the functions vect_set_loop_condition_partial_vectors
+ and vect_set_loop_controls_directly, we need to generate each
+ length in the prologue and in the loop body if required. Although
+ there are some possible optimizations, we consider the worst case
+ here. */
- /* If peeling for alignment is unknown, loop bound of main loop becomes
- unknown. */
- peel_iters_epilogue = assumed_vf / 2;
- if (dump_enabled_p ())
- dump_printf (MSG_NOTE, "cost model: "
- "epilogue peel iters set to vf/2 because "
- "peeling for alignment is unknown.\n");
+ bool niters_known_p = LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo);
+ bool need_iterate_p
+ = (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)
+ && !vect_known_niters_smaller_than_vf (loop_vinfo));
- /* If peeled iterations are unknown, count a taken branch and a not taken
- branch per peeled loop. Even if scalar loop iterations are known,
- vector iterations are not known since peeled prologue iterations are
- not known. Hence guards remain the same. */
- (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
- NULL, 0, vect_prologue);
- (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
- NULL, 0, vect_prologue);
- (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
- NULL, 0, vect_epilogue);
- (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
- NULL, 0, vect_epilogue);
- stmt_info_for_cost *si;
- int j;
- FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
- {
- (void) add_stmt_cost (target_cost_data,
- si->count * peel_iters_prologue,
- si->kind, si->stmt_info, si->misalign,
- vect_prologue);
- (void) add_stmt_cost (target_cost_data,
- si->count * peel_iters_epilogue,
- si->kind, si->stmt_info, si->misalign,
- vect_epilogue);
- }
- }
- else
- {
- stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
- stmt_info_for_cost *si;
- int j;
- void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
+ /* Calculate how many statements to be added. */
+ unsigned int prologue_stmts = 0;
+ unsigned int body_stmts = 0;
- prologue_cost_vec.create (2);
- epilogue_cost_vec.create (2);
- peel_iters_prologue = npeel;
+ rgroup_controls *rgc;
+ unsigned int num_vectors_m1;
+ FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)
+ if (rgc->type)
+ {
+ /* May need one SHIFT for nitems_total computation. */
+ unsigned nitems = rgc->max_nscalars_per_iter * rgc->factor;
+ if (nitems != 1 && !niters_known_p)
+ prologue_stmts += 1;
+
+ /* May need one MAX and one MINUS for wrap around. */
+ if (vect_rgroup_iv_might_wrap_p (loop_vinfo, rgc))
+ prologue_stmts += 2;
- (void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue,
- &peel_iters_epilogue,
- &LOOP_VINFO_SCALAR_ITERATION_COST
- (loop_vinfo),
- &prologue_cost_vec,
- &epilogue_cost_vec);
+ /* Need one MAX and one MINUS for each batch limit excepting for
+ the 1st one. */
+ prologue_stmts += num_vectors_m1 * 2;
- FOR_EACH_VEC_ELT (prologue_cost_vec, j, si)
- (void) add_stmt_cost (data, si->count, si->kind, si->stmt_info,
- si->misalign, vect_prologue);
+ unsigned int num_vectors = num_vectors_m1 + 1;
- FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si)
- (void) add_stmt_cost (data, si->count, si->kind, si->stmt_info,
- si->misalign, vect_epilogue);
+ /* Need to set up lengths in prologue, only one MIN required
+ for each since start index is zero. */
+ prologue_stmts += num_vectors;
+
+ /* Each may need two MINs and one MINUS to update lengths in body
+ for next iteration. */
+ if (need_iterate_p)
+ body_stmts += 3 * num_vectors;
+ }
- prologue_cost_vec.release ();
- epilogue_cost_vec.release ();
+ (void) add_stmt_cost (loop_vinfo, target_cost_data, prologue_stmts,
+ scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
+ (void) add_stmt_cost (loop_vinfo, target_cost_data, body_stmts,
+ scalar_stmt, NULL, NULL_TREE, 0, vect_body);
}
/* FORNOW: The scalar outside cost is incremented in one of the
}
/* ??? The "if" arm is written to handle all cases; see below for what
- we would do for !LOOP_VINFO_FULLY_MASKED_P. */
- if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ we would do for !LOOP_VINFO_USING_PARTIAL_VECTORS_P. */
+ if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
{
/* Rewriting the condition above in terms of the number of
vector iterations (vniters) rather than the number of
dump_printf (MSG_NOTE, " Minimum number of vector iterations: %d\n",
min_vec_niters);
- if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
{
/* Now that we know the minimum number of vector iterations,
find the minimum niters for which the scalar cost is larger:
" Calculated minimum iters for profitability: %d\n",
min_profitable_iters);
- if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+ if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
&& min_profitable_iters < (assumed_vf + peel_iters_prologue))
/* We want the vectorized loop to execute at least once. */
min_profitable_iters = assumed_vf + peel_iters_prologue;
+ else if (min_profitable_iters < peel_iters_prologue)
+ /* For LOOP_VINFO_USING_PARTIAL_VECTORS_P, we need to ensure the
+ vectorized loop executes at least once. */
+ min_profitable_iters = peel_iters_prologue;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
if (vec_outside_cost <= 0)
min_profitable_estimate = 0;
- else if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ /* ??? This "else if" arm is written to handle all cases; see below for
+ what we would do for !LOOP_VINFO_USING_PARTIAL_VECTORS_P. */
+ else if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
{
/* This is a repeat of the code above, but with + SOC rather
than - SOC. */
if (outside_overhead > 0)
min_vec_niters = outside_overhead / saving_per_viter + 1;
- if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
{
int threshold = (vec_inside_cost * min_vec_niters
+ vec_outside_cost
the loop, and the epilogue code that must be generated. */
static void
-vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn,
+vect_model_reduction_cost (loop_vec_info loop_vinfo,
+ stmt_vec_info stmt_info, internal_fn reduc_fn,
vect_reduction_type reduction_type,
int ncopies, stmt_vector_for_cost *cost_vec)
{
optab optab;
tree vectype;
machine_mode mode;
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
class loop *loop = NULL;
if (loop_vinfo)
}
-/* Function vect_model_induction_cost.
-
- Models cost for induction operations. */
-
-static void
-vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies,
- stmt_vector_for_cost *cost_vec)
-{
- unsigned inside_cost, prologue_cost;
-
- if (PURE_SLP_STMT (stmt_info))
- return;
-
- /* loop cost for vec_loop. */
- inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt,
- stmt_info, 0, vect_body);
-
- /* prologue cost for vec_init and vec_step. */
- prologue_cost = record_stmt_cost (cost_vec, 2, scalar_to_vec,
- stmt_info, 0, vect_prologue);
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "vect_model_induction_cost: inside_cost = %d, "
- "prologue_cost = %d .\n", inside_cost, prologue_cost);
-}
-
-
/* Function get_initial_def_for_reduction
A cost model should help decide between these two schemes. */
static tree
-get_initial_def_for_reduction (stmt_vec_info stmt_vinfo,
+get_initial_def_for_reduction (loop_vec_info loop_vinfo,
+ stmt_vec_info stmt_vinfo,
enum tree_code code, tree init_val,
tree *adjustment_def)
{
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree scalar_type = TREE_TYPE (init_val);
tree vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
value will not change the result. */
static void
-get_initial_defs_for_reduction (slp_tree slp_node,
+get_initial_defs_for_reduction (vec_info *vinfo,
+ slp_tree slp_node,
vec<tree> *vec_oprnds,
unsigned int number_of_vectors,
bool reduc_chain, tree neutral_op)
{
vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
stmt_vec_info stmt_vinfo = stmts[0];
- vec_info *vinfo = stmt_vinfo->vinfo;
unsigned HOST_WIDE_INT nunits;
unsigned j, number_of_places_left_in_vector;
tree vector_type;
the stmt_vec_info the meta information is stored on. */
stmt_vec_info
-info_for_reduction (stmt_vec_info stmt_info)
+info_for_reduction (vec_info *vinfo, stmt_vec_info stmt_info)
{
stmt_info = vect_orig_stmt (stmt_info);
gcc_assert (STMT_VINFO_REDUC_DEF (stmt_info));
- if (!is_a <gphi *> (stmt_info->stmt))
+ if (!is_a <gphi *> (stmt_info->stmt)
+ || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
gphi *phi = as_a <gphi *> (stmt_info->stmt);
if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
{
edge pe = loop_preheader_edge (gimple_bb (phi)->loop_father);
stmt_vec_info info
- = stmt_info->vinfo->lookup_def (PHI_ARG_DEF_FROM_EDGE (phi, pe));
+ = vinfo->lookup_def (PHI_ARG_DEF_FROM_EDGE (phi, pe));
if (info && STMT_VINFO_DEF_TYPE (info) == vect_double_reduction_def)
stmt_info = info;
}
*/
static void
-vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
+vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
+ stmt_vec_info stmt_info,
slp_tree slp_node,
slp_instance slp_node_instance)
{
- stmt_vec_info reduc_info = info_for_reduction (stmt_info);
+ stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info);
gcc_assert (reduc_info->is_reduc_info);
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
/* For double reductions we need to get at the inner loop reduction
stmt which has the meta info attached. Our stmt_info is that of the
loop-closed PHI of the inner loop which we remember as
= as_a <gphi *> (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))->stmt);
enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info);
internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info);
- stmt_vec_info prev_phi_info;
tree vectype;
machine_mode mode;
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
tree scalar_dest;
tree scalar_type;
gimple *new_phi = NULL, *phi;
- stmt_vec_info phi_info;
gimple_stmt_iterator exit_gsi;
tree new_temp = NULL_TREE, new_name, new_scalar_dest;
gimple *epilog_stmt = NULL;
tree induction_index = NULL_TREE;
if (slp_node)
- group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
+ group_size = SLP_TREE_LANES (slp_node);
if (nested_in_vect_loop_p (loop, stmt_info))
{
}
else
{
+ stmt_vec_info reduc_info = loop_vinfo->lookup_stmt (reduc_def_stmt);
vec_num = 1;
- ncopies = 0;
- phi_info = STMT_VINFO_VEC_STMT (loop_vinfo->lookup_stmt (reduc_def_stmt));
- do
- {
- ncopies++;
- phi_info = STMT_VINFO_RELATED_STMT (phi_info);
- }
- while (phi_info);
+ ncopies = STMT_VINFO_VEC_STMTS (reduc_info).length ();
}
/* For cond reductions we want to create a new vector (INDEX_COND_EXPR)
{
if (gimple_assign_rhs_code (cond_info->stmt) == COND_EXPR)
{
- gimple *vec_stmt = STMT_VINFO_VEC_STMT (cond_info)->stmt;
+ gimple *vec_stmt = STMT_VINFO_VEC_STMTS (cond_info)[0];
gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR);
ccompares.safe_push
(std::make_pair (unshare_expr (gimple_assign_rhs1 (vec_stmt)),
/* Create a vector phi node. */
tree new_phi_tree = make_ssa_name (cr_index_vector_type);
new_phi = create_phi_node (new_phi_tree, loop->header);
- loop_vinfo->add_stmt (new_phi);
add_phi_arg (as_a <gphi *> (new_phi), vec_zero,
loop_preheader_edge (loop), UNKNOWN_LOCATION);
new_phi_tree, indx_before_incr);
}
gsi_insert_seq_before (&incr_gsi, stmts, GSI_SAME_STMT);
- stmt_vec_info index_vec_info
- = loop_vinfo->add_stmt (SSA_NAME_DEF_STMT (new_phi_tree));
- STMT_VINFO_VECTYPE (index_vec_info) = cr_index_vector_type;
/* Update the phi with the vec cond. */
induction_index = new_phi_tree;
if (double_reduc)
loop = outer_loop;
exit_bb = single_exit (loop)->dest;
- prev_phi_info = NULL;
new_phis.create (slp_node ? vec_num : ncopies);
for (unsigned i = 0; i < vec_num; i++)
{
if (slp_node)
- def = gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[i]->stmt);
+ def = vect_get_slp_vect_def (slp_node, i);
else
- def = gimple_get_lhs (STMT_VINFO_VEC_STMT (rdef_info)->stmt);
+ def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[0]);
for (j = 0; j < ncopies; j++)
{
tree new_def = copy_ssa_name (def);
phi = create_phi_node (new_def, exit_bb);
- stmt_vec_info phi_info = loop_vinfo->add_stmt (phi);
if (j == 0)
new_phis.quick_push (phi);
else
{
- def = vect_get_vec_def_for_stmt_copy (loop_vinfo, def);
- STMT_VINFO_RELATED_STMT (prev_phi_info) = phi_info;
+ def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[j]);
+ new_phis.quick_push (phi);
}
SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
- prev_phi_info = phi_info;
}
}
/* Likewise if we couldn't use a single defuse cycle. */
else if (ncopies > 1)
{
- gcc_assert (new_phis.length () == 1);
gimple_seq stmts = NULL;
tree first_vect = PHI_RESULT (new_phis[0]);
first_vect = gimple_convert (&stmts, vectype, first_vect);
- stmt_vec_info next_phi_info = loop_vinfo->lookup_stmt (new_phis[0]);
for (int k = 1; k < ncopies; ++k)
{
- next_phi_info = STMT_VINFO_RELATED_STMT (next_phi_info);
- tree second_vect = PHI_RESULT (next_phi_info->stmt);
+ tree second_vect = PHI_RESULT (new_phis[k]);
second_vect = gimple_convert (&stmts, vectype, second_vect);
first_vect = gimple_build (&stmts, code, vectype,
first_vect, second_vect);
gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
if (nested_in_vect_loop)
{
- stmt_vec_info epilog_stmt_info = loop_vinfo->add_stmt (epilog_stmt);
- STMT_VINFO_RELATED_STMT (epilog_stmt_info)
- = STMT_VINFO_RELATED_STMT (loop_vinfo->lookup_stmt (new_phi));
-
if (!double_reduc)
scalar_results.quick_push (new_temp);
else
that should be used to control the operation in a fully-masked loop. */
static bool
-vectorize_fold_left_reduction (stmt_vec_info stmt_info,
+vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
+ stmt_vec_info stmt_info,
gimple_stmt_iterator *gsi,
- stmt_vec_info *vec_stmt, slp_tree slp_node,
+ gimple **vec_stmt, slp_tree slp_node,
gimple *reduc_def_stmt,
tree_code code, internal_fn reduc_fn,
tree ops[3], tree vectype_in,
int reduc_index, vec_loop_masks *masks)
{
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
- stmt_vec_info new_stmt_info = NULL;
internal_fn mask_reduc_fn = get_masked_reduction_fn (reduc_fn, vectype_in);
int ncopies;
if (slp_node)
{
auto_vec<vec<tree> > vec_defs (2);
- vect_get_slp_defs (slp_node, &vec_defs);
+ vect_get_slp_defs (loop_vinfo, slp_node, &vec_defs);
vec_oprnds0.safe_splice (vec_defs[1 - reduc_index]);
vec_defs[0].release ();
vec_defs[1].release ();
}
else
{
- tree loop_vec_def0 = vect_get_vec_def_for_operand (op0, stmt_info);
- vec_oprnds0.create (1);
- vec_oprnds0.quick_push (loop_vec_def0);
+ vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, 1,
+ op0, &vec_oprnds0);
scalar_dest_def_info = stmt_info;
}
if (i == vec_num - 1)
{
gimple_set_lhs (new_stmt, scalar_dest);
- new_stmt_info = vect_finish_replace_stmt (scalar_dest_def_info,
- new_stmt);
+ vect_finish_replace_stmt (loop_vinfo,
+ scalar_dest_def_info,
+ new_stmt);
}
else
- new_stmt_info = vect_finish_stmt_generation (scalar_dest_def_info,
- new_stmt, gsi);
+ vect_finish_stmt_generation (loop_vinfo,
+ scalar_dest_def_info,
+ new_stmt, gsi);
if (slp_node)
- SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+ else
+ {
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+ *vec_stmt = new_stmt;
+ }
}
- if (!slp_node)
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
-
return true;
}
does *NOT* necessarily hold for reduction patterns. */
bool
-vectorizable_reduction (stmt_vec_info stmt_info, slp_tree slp_node,
+vectorizable_reduction (loop_vec_info loop_vinfo,
+ stmt_vec_info stmt_info, slp_tree slp_node,
slp_instance slp_node_instance,
stmt_vector_for_cost *cost_vec)
{
tree scalar_dest;
tree vectype_in = NULL_TREE;
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
enum vect_def_type cond_reduc_dt = vect_unknown_def_type;
stmt_vec_info cond_stmt_vinfo = NULL;
return false;
/* The stmt we store reduction analysis meta on. */
- stmt_vec_info reduc_info = info_for_reduction (stmt_info);
+ stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info);
reduc_info->is_reduc_info = true;
if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
{
if (is_a <gphi *> (stmt_info->stmt))
- /* Analysis for double-reduction is done on the outer
- loop PHI, nested cycles have no further restrictions. */
- STMT_VINFO_TYPE (stmt_info) = cycle_phi_info_type;
+ {
+ if (slp_node)
+ {
+ /* We eventually need to set a vector type on invariant
+ arguments. */
+ unsigned j;
+ slp_tree child;
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child)
+ if (!vect_maybe_update_slp_op_vectype
+ (child, SLP_TREE_VECTYPE (slp_node)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for "
+ "invariants\n");
+ return false;
+ }
+ }
+ /* Analysis for double-reduction is done on the outer
+ loop PHI, nested cycles have no further restrictions. */
+ STMT_VINFO_TYPE (stmt_info) = cycle_phi_info_type;
+ }
else
STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
return true;
gphi *reduc_def_phi = as_a <gphi *> (phi_info->stmt);
/* Verify following REDUC_IDX from the latch def leads us back to the PHI
- and compute the reduction chain length. */
- tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
- loop_latch_edge (loop));
+ and compute the reduction chain length. Discover the real
+ reduction operation stmt on the way (stmt_info and slp_for_stmt_info). */
+ tree reduc_def
+ = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
+ loop_latch_edge
+ (gimple_bb (reduc_def_phi)->loop_father));
unsigned reduc_chain_length = 0;
bool only_slp_reduc_chain = true;
stmt_info = NULL;
+ slp_tree slp_for_stmt_info = slp_node ? slp_node_instance->root : NULL;
while (reduc_def != PHI_RESULT (reduc_def_phi))
{
stmt_vec_info def = loop_vinfo->lookup_def (reduc_def);
stmt_info = vdef;
reduc_def = gimple_op (vdef->stmt, 1 + STMT_VINFO_REDUC_IDX (vdef));
reduc_chain_length++;
+ if (!stmt_info && slp_node)
+ slp_for_stmt_info = SLP_TREE_CHILDREN (slp_for_stmt_info)[0];
}
/* PHIs should not participate in patterns. */
gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
The last use is the reduction variable. In case of nested cycle this
assumption is not true: we use reduc_index to record the index of the
reduction variable. */
- reduc_def = PHI_RESULT (reduc_def_phi);
+ slp_tree *slp_op = XALLOCAVEC (slp_tree, op_type);
+ /* We need to skip an extra operand for COND_EXPRs with embedded
+ comparison. */
+ unsigned opno_adjust = 0;
+ if (code == COND_EXPR
+ && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
+ opno_adjust = 1;
for (i = 0; i < op_type; i++)
{
- tree op = gimple_op (stmt, i + 1);
/* The condition of COND_EXPR is checked in vectorizable_condition(). */
if (i == 0 && code == COND_EXPR)
continue;
stmt_vec_info def_stmt_info;
enum vect_def_type dt;
- if (!vect_is_simple_use (op, loop_vinfo, &dt, &tem,
+ tree op;
+ if (!vect_is_simple_use (loop_vinfo, stmt_info, slp_for_stmt_info,
+ i + opno_adjust, &op, &slp_op[i], &dt, &tem,
&def_stmt_info))
{
if (dump_enabled_p ())
which each SLP statement has its own initial value and in which
that value needs to be repeated for every instance of the
statement within the initial vector. */
- unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
+ unsigned int group_size = SLP_TREE_LANES (slp_node);
if (!neutral_op
&& !can_duplicate_and_interleave_p (loop_vinfo, group_size,
TREE_TYPE (vectype_out)))
return false;
}
+ if (slp_node
+ && !(!single_defuse_cycle
+ && code != DOT_PROD_EXPR
+ && code != WIDEN_SUM_EXPR
+ && code != SAD_EXPR
+ && reduction_type != FOLD_LEFT_REDUCTION))
+ for (i = 0; i < op_type; i++)
+ if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for invariants\n");
+ return false;
+ }
+
if (slp_node)
vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
else
vec_num = 1;
- vect_model_reduction_cost (stmt_info, reduc_fn, reduction_type, ncopies,
- cost_vec);
+ vect_model_reduction_cost (loop_vinfo, stmt_info, reduc_fn,
+ reduction_type, ncopies, cost_vec);
if (dump_enabled_p ()
&& reduction_type == FOLD_LEFT_REDUCTION)
dump_printf_loc (MSG_NOTE, vect_location,
STMT_VINFO_DEF_TYPE (vect_orig_stmt (tem)) = vect_internal_def;
STMT_VINFO_DEF_TYPE (tem) = vect_internal_def;
}
- else if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
+ else if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
{
vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
internal_fn cond_fn = get_conditional_internal_fn (code);
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't use a fully-masked loop because no"
- " conditional operation is available.\n");
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ "can't operate on partial vectors because"
+ " no conditional operation is available.\n");
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
else if (reduction_type == FOLD_LEFT_REDUCTION
&& reduc_fn == IFN_LAST
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't use a fully-masked loop because no"
- " conditional operation is available.\n");
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ "can't operate on partial vectors because"
+ " no conditional operation is available.\n");
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
else
vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
value. */
bool
-vect_transform_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- stmt_vec_info *vec_stmt, slp_tree slp_node)
+vect_transform_reduction (loop_vec_info loop_vinfo,
+ stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ gimple **vec_stmt, slp_tree slp_node)
{
tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
int i;
int ncopies;
- int j;
int vec_num;
- stmt_vec_info reduc_info = info_for_reduction (stmt_info);
+ stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info);
gcc_assert (reduc_info->is_reduc_info);
if (nested_in_vect_loop_p (loop, stmt_info))
bool mask_by_cond_expr = use_mask_by_cond_expr_p (code, cond_fn, vectype_in);
/* Transform. */
- stmt_vec_info new_stmt_info = NULL;
- stmt_vec_info prev_stmt_info;
tree new_temp = NULL_TREE;
auto_vec<tree> vec_oprnds0;
auto_vec<tree> vec_oprnds1;
{
internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info);
return vectorize_fold_left_reduction
- (stmt_info, gsi, vec_stmt, slp_node, reduc_def_phi, code,
+ (loop_vinfo, stmt_info, gsi, vec_stmt, slp_node, reduc_def_phi, code,
reduc_fn, ops, vectype_in, reduc_index, masks);
}
tree scalar_dest = gimple_assign_lhs (stmt);
tree vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
- prev_stmt_info = NULL;
- if (!slp_node)
+ vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, ncopies,
+ single_defuse_cycle && reduc_index == 0
+ ? NULL_TREE : ops[0], &vec_oprnds0,
+ single_defuse_cycle && reduc_index == 1
+ ? NULL_TREE : ops[1], &vec_oprnds1,
+ op_type == ternary_op
+ && !(single_defuse_cycle && reduc_index == 2)
+ ? ops[2] : NULL_TREE, &vec_oprnds2);
+ if (single_defuse_cycle)
{
- vec_oprnds0.create (1);
- vec_oprnds1.create (1);
- if (op_type == ternary_op)
- vec_oprnds2.create (1);
+ gcc_assert (!slp_node);
+ vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, 1,
+ ops[reduc_index],
+ reduc_index == 0 ? &vec_oprnds0
+ : (reduc_index == 1 ? &vec_oprnds1
+ : &vec_oprnds2));
}
- for (j = 0; j < ncopies; j++)
+ FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
{
- /* Handle uses. */
- if (j == 0)
- {
- if (slp_node)
- {
- /* Get vec defs for all the operands except the reduction index,
- ensuring the ordering of the ops in the vector is kept. */
- auto_vec<vec<tree>, 3> vec_defs;
- vect_get_slp_defs (slp_node, &vec_defs);
- vec_oprnds0.safe_splice (vec_defs[0]);
- vec_defs[0].release ();
- vec_oprnds1.safe_splice (vec_defs[1]);
- vec_defs[1].release ();
- if (op_type == ternary_op)
- {
- vec_oprnds2.safe_splice (vec_defs[2]);
- vec_defs[2].release ();
- }
- }
- else
+ gimple *new_stmt;
+ tree vop[3] = { def0, vec_oprnds1[i], NULL_TREE };
+ if (masked_loop_p && !mask_by_cond_expr)
+ {
+ /* Make sure that the reduction accumulator is vop[0]. */
+ if (reduc_index == 1)
{
- vec_oprnds0.quick_push
- (vect_get_vec_def_for_operand (ops[0], stmt_info));
- vec_oprnds1.quick_push
- (vect_get_vec_def_for_operand (ops[1], stmt_info));
- if (op_type == ternary_op)
- vec_oprnds2.quick_push
- (vect_get_vec_def_for_operand (ops[2], stmt_info));
+ gcc_assert (commutative_tree_code (code));
+ std::swap (vop[0], vop[1]);
}
- }
+ tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
+ vectype_in, i);
+ gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
+ vop[0], vop[1], vop[0]);
+ new_temp = make_ssa_name (vec_dest, call);
+ gimple_call_set_lhs (call, new_temp);
+ gimple_call_set_nothrow (call, true);
+ vect_finish_stmt_generation (loop_vinfo, stmt_info, call, gsi);
+ new_stmt = call;
+ }
else
- {
- if (!slp_node)
- {
- gcc_assert (reduc_index != -1 || ! single_defuse_cycle);
-
- if (single_defuse_cycle && reduc_index == 0)
- vec_oprnds0[0] = gimple_get_lhs (new_stmt_info->stmt);
- else
- vec_oprnds0[0]
- = vect_get_vec_def_for_stmt_copy (loop_vinfo,
- vec_oprnds0[0]);
- if (single_defuse_cycle && reduc_index == 1)
- vec_oprnds1[0] = gimple_get_lhs (new_stmt_info->stmt);
- else
- vec_oprnds1[0]
- = vect_get_vec_def_for_stmt_copy (loop_vinfo,
- vec_oprnds1[0]);
- if (op_type == ternary_op)
- {
- if (single_defuse_cycle && reduc_index == 2)
- vec_oprnds2[0] = gimple_get_lhs (new_stmt_info->stmt);
- else
- vec_oprnds2[0]
- = vect_get_vec_def_for_stmt_copy (loop_vinfo,
- vec_oprnds2[0]);
- }
- }
- }
+ {
+ if (op_type == ternary_op)
+ vop[2] = vec_oprnds2[i];
- FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
- {
- tree vop[3] = { def0, vec_oprnds1[i], NULL_TREE };
- if (masked_loop_p && !mask_by_cond_expr)
+ if (masked_loop_p && mask_by_cond_expr)
{
- /* Make sure that the reduction accumulator is vop[0]. */
- if (reduc_index == 1)
- {
- gcc_assert (commutative_tree_code (code));
- std::swap (vop[0], vop[1]);
- }
tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
- vectype_in, i * ncopies + j);
- gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
- vop[0], vop[1],
- vop[0]);
- new_temp = make_ssa_name (vec_dest, call);
- gimple_call_set_lhs (call, new_temp);
- gimple_call_set_nothrow (call, true);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, call, gsi);
- }
- else
- {
- if (op_type == ternary_op)
- vop[2] = vec_oprnds2[i];
-
- if (masked_loop_p && mask_by_cond_expr)
- {
- tree mask = vect_get_loop_mask (gsi, masks,
- vec_num * ncopies,
- vectype_in, i * ncopies + j);
- build_vect_cond_expr (code, vop, mask, gsi);
- }
-
- gassign *new_stmt = gimple_build_assign (vec_dest, code,
- vop[0], vop[1], vop[2]);
- new_temp = make_ssa_name (vec_dest, new_stmt);
- gimple_assign_set_lhs (new_stmt, new_temp);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vectype_in, i);
+ build_vect_cond_expr (code, vop, mask, gsi);
}
- if (slp_node)
- SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
- }
-
- if (slp_node || single_defuse_cycle)
- continue;
+ new_stmt = gimple_build_assign (vec_dest, code,
+ vop[0], vop[1], vop[2]);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ gimple_assign_set_lhs (new_stmt, new_temp);
+ vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt, gsi);
+ }
- if (j == 0)
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
+ if (slp_node)
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+ else if (single_defuse_cycle
+ && i < ncopies - 1)
+ {
+ if (reduc_index == 0)
+ vec_oprnds0.safe_push (gimple_get_lhs (new_stmt));
+ else if (reduc_index == 1)
+ vec_oprnds1.safe_push (gimple_get_lhs (new_stmt));
+ else if (reduc_index == 2)
+ vec_oprnds2.safe_push (gimple_get_lhs (new_stmt));
+ }
else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-
- prev_stmt_info = new_stmt_info;
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
}
- if (single_defuse_cycle && !slp_node)
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
+ if (!slp_node)
+ *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
return true;
}
/* Transform phase of a cycle PHI. */
bool
-vect_transform_cycle_phi (stmt_vec_info stmt_info, stmt_vec_info *vec_stmt,
+vect_transform_cycle_phi (loop_vec_info loop_vinfo,
+ stmt_vec_info stmt_info, gimple **vec_stmt,
slp_tree slp_node, slp_instance slp_node_instance)
{
tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
int i;
int ncopies;
- stmt_vec_info prev_phi_info;
int j;
bool nested_cycle = false;
int vec_num;
stmt_vec_info reduc_stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
reduc_stmt_info = vect_stmt_to_vectorize (reduc_stmt_info);
- stmt_vec_info reduc_info = info_for_reduction (stmt_info);
+ stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info);
gcc_assert (reduc_info->is_reduc_info);
if (STMT_VINFO_REDUC_TYPE (reduc_info) == EXTRACT_LAST_REDUCTION
if (slp_node)
{
/* The size vect_schedule_slp_instance computes is off for us. */
- vec_num = vect_get_num_vectors
- (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
- * SLP_TREE_SCALAR_STMTS (slp_node).length (), vectype_in);
+ vec_num = vect_get_num_vectors (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+ * SLP_TREE_LANES (slp_node), vectype_in);
ncopies = 1;
}
else
if (slp_node)
{
vec_initial_defs.reserve (vec_num);
- gcc_assert (slp_node == slp_node_instance->reduc_phis);
- stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info);
- tree neutral_op
- = neutral_op_for_slp_reduction (slp_node, vectype_out,
- STMT_VINFO_REDUC_CODE (reduc_info),
- first != NULL);
- get_initial_defs_for_reduction (slp_node_instance->reduc_phis,
- &vec_initial_defs, vec_num,
- first != NULL, neutral_op);
+ if (nested_cycle)
+ {
+ unsigned phi_idx = loop_preheader_edge (loop)->dest_idx;
+ vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[phi_idx],
+ &vec_initial_defs);
+ }
+ else
+ {
+ gcc_assert (slp_node == slp_node_instance->reduc_phis);
+ stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info);
+ tree neutral_op
+ = neutral_op_for_slp_reduction (slp_node, vectype_out,
+ STMT_VINFO_REDUC_CODE (reduc_info),
+ first != NULL);
+ get_initial_defs_for_reduction (loop_vinfo, slp_node_instance->reduc_phis,
+ &vec_initial_defs, vec_num,
+ first != NULL, neutral_op);
+ }
}
else
{
STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info) = NULL_TREE;
}
vec_initial_def = build_vector_from_val (vectype_out, induc_val);
+ vec_initial_defs.create (ncopies);
+ for (i = 0; i < ncopies; ++i)
+ vec_initial_defs.quick_push (vec_initial_def);
}
else if (nested_cycle)
{
/* Do not use an adjustment def as that case is not supported
correctly if ncopies is not one. */
- vec_initial_def = vect_get_vec_def_for_operand (initial_def,
- reduc_stmt_info);
+ vect_get_vec_defs_for_operand (loop_vinfo, reduc_stmt_info,
+ ncopies, initial_def,
+ &vec_initial_defs);
}
else
{
if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
adjustment_defp = NULL;
vec_initial_def
- = get_initial_def_for_reduction (reduc_stmt_info, code,
+ = get_initial_def_for_reduction (loop_vinfo, reduc_stmt_info, code,
initial_def, adjustment_defp);
STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info) = adjustment_def;
+ vec_initial_defs.create (ncopies);
+ for (i = 0; i < ncopies; ++i)
+ vec_initial_defs.quick_push (vec_initial_def);
}
- vec_initial_defs.create (1);
- vec_initial_defs.quick_push (vec_initial_def);
}
/* Generate the reduction PHIs upfront. */
- prev_phi_info = NULL;
for (i = 0; i < vec_num; i++)
{
tree vec_init_def = vec_initial_defs[i];
/* Create the reduction-phi that defines the reduction
operand. */
gphi *new_phi = create_phi_node (vec_dest, loop->header);
- stmt_vec_info new_phi_info = loop_vinfo->add_stmt (new_phi);
/* Set the loop-entry arg of the reduction-phi. */
if (j != 0 && nested_cycle)
- vec_init_def = vect_get_vec_def_for_stmt_copy (loop_vinfo,
- vec_init_def);
+ vec_init_def = vec_initial_defs[j];
add_phi_arg (new_phi, vec_init_def, loop_preheader_edge (loop),
UNKNOWN_LOCATION);
/* The loop-latch arg is set in epilogue processing. */
if (slp_node)
- SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phi_info);
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phi);
else
{
if (j == 0)
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_phi_info;
- else
- STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi_info;
- prev_phi_info = new_phi_info;
+ *vec_stmt = new_phi;
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_phi);
}
}
}
/* Vectorizes LC PHIs. */
bool
-vectorizable_lc_phi (stmt_vec_info stmt_info, stmt_vec_info *vec_stmt,
+vectorizable_lc_phi (loop_vec_info loop_vinfo,
+ stmt_vec_info stmt_info, gimple **vec_stmt,
slp_tree slp_node)
{
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
if (!loop_vinfo
|| !is_a <gphi *> (stmt_info->stmt)
|| gimple_phi_num_args (stmt_info->stmt) != 1)
if (!vec_stmt) /* transformation not required. */
{
+ /* Deal with copies from externs or constants that disguise as
+ loop-closed PHI nodes (PR97886). */
+ if (slp_node
+ && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
+ SLP_TREE_VECTYPE (slp_node)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for invariants\n");
+ return false;
+ }
STMT_VINFO_TYPE (stmt_info) = lc_phi_info_type;
return true;
}
basic_block bb = gimple_bb (stmt_info->stmt);
edge e = single_pred_edge (bb);
tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
- vec<tree> vec_oprnds = vNULL;
- vect_get_vec_defs (gimple_phi_arg_def (stmt_info->stmt, 0), NULL_TREE,
- stmt_info, &vec_oprnds, NULL, slp_node);
- if (slp_node)
+ auto_vec<tree> vec_oprnds;
+ vect_get_vec_defs (loop_vinfo, stmt_info, slp_node,
+ !slp_node ? vect_get_num_copies (loop_vinfo, vectype) : 1,
+ gimple_phi_arg_def (stmt_info->stmt, 0), &vec_oprnds);
+ for (unsigned i = 0; i < vec_oprnds.length (); i++)
+ {
+ /* Create the vectorized LC PHI node. */
+ gphi *new_phi = create_phi_node (vec_dest, bb);
+ add_phi_arg (new_phi, vec_oprnds[i], e, UNKNOWN_LOCATION);
+ if (slp_node)
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phi);
+ else
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_phi);
+ }
+ if (!slp_node)
+ *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
+
+ return true;
+}
+
+/* Vectorizes PHIs. */
+
+bool
+vectorizable_phi (vec_info *,
+ stmt_vec_info stmt_info, gimple **vec_stmt,
+ slp_tree slp_node, stmt_vector_for_cost *cost_vec)
+{
+ if (!is_a <gphi *> (stmt_info->stmt) || !slp_node)
+ return false;
+
+ if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
+ return false;
+
+ tree vectype = SLP_TREE_VECTYPE (slp_node);
+
+ if (!vec_stmt) /* transformation not required. */
{
- unsigned vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
- gcc_assert (vec_oprnds.length () == vec_num);
- for (unsigned i = 0; i < vec_num; i++)
- {
- /* Create the vectorized LC PHI node. */
- gphi *new_phi = create_phi_node (vec_dest, bb);
- add_phi_arg (new_phi, vec_oprnds[i], e, UNKNOWN_LOCATION);
- stmt_vec_info new_phi_info = loop_vinfo->add_stmt (new_phi);
- SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phi_info);
- }
+ slp_tree child;
+ unsigned i;
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), i, child)
+ if (!child)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "PHI node with unvectorized backedge def\n");
+ return false;
+ }
+ else if (!vect_maybe_update_slp_op_vectype (child, vectype))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for invariants\n");
+ return false;
+ }
+ record_stmt_cost (cost_vec, SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
+ vector_stmt, stmt_info, vectype, 0, vect_body);
+ STMT_VINFO_TYPE (stmt_info) = phi_info_type;
+ return true;
}
- else
+
+ tree scalar_dest = gimple_phi_result (stmt_info->stmt);
+ basic_block bb = gimple_bb (stmt_info->stmt);
+ tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ auto_vec<gphi *> new_phis;
+ for (unsigned i = 0; i < gimple_phi_num_args (stmt_info->stmt); ++i)
{
- unsigned ncopies = vect_get_num_copies (loop_vinfo, vectype);
- stmt_vec_info prev_phi_info = NULL;
- for (unsigned i = 0; i < ncopies; i++)
- {
- if (i != 0)
- vect_get_vec_defs_for_stmt_copy (loop_vinfo, &vec_oprnds, NULL);
- /* Create the vectorized LC PHI node. */
- gphi *new_phi = create_phi_node (vec_dest, bb);
- add_phi_arg (new_phi, vec_oprnds[0], e, UNKNOWN_LOCATION);
- stmt_vec_info new_phi_info = loop_vinfo->add_stmt (new_phi);
- if (i == 0)
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_phi_info;
- else
- STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi_info;
- prev_phi_info = new_phi_info;
+ slp_tree child = SLP_TREE_CHILDREN (slp_node)[i];
+
+ /* Skip not yet vectorized defs. */
+ if (SLP_TREE_DEF_TYPE (child) == vect_internal_def
+ && SLP_TREE_VEC_STMTS (child).is_empty ())
+ continue;
+
+ auto_vec<tree> vec_oprnds;
+ vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[i], &vec_oprnds);
+ if (!new_phis.exists ())
+ {
+ new_phis.create (vec_oprnds.length ());
+ for (unsigned j = 0; j < vec_oprnds.length (); j++)
+ {
+ /* Create the vectorized LC PHI node. */
+ new_phis.quick_push (create_phi_node (vec_dest, bb));
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phis[j]);
+ }
}
+ edge e = gimple_phi_arg_edge (as_a <gphi *> (stmt_info->stmt), i);
+ for (unsigned j = 0; j < vec_oprnds.length (); j++)
+ add_phi_arg (new_phis[j], vec_oprnds[j], e, UNKNOWN_LOCATION);
}
- vec_oprnds.release ();
+ /* We should have at least one already vectorized child. */
+ gcc_assert (new_phis.exists ());
return true;
}
Return true if STMT_INFO is vectorizable in this way. */
bool
-vectorizable_induction (stmt_vec_info stmt_info,
- gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
- stmt_vec_info *vec_stmt, slp_tree slp_node,
+vectorizable_induction (loop_vec_info loop_vinfo,
+ stmt_vec_info stmt_info,
+ gimple **vec_stmt, slp_tree slp_node,
stmt_vector_for_cost *cost_vec)
{
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
unsigned ncopies;
bool nested_in_vect_loop = false;
poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
unsigned i;
tree expr;
- gimple_seq stmts;
- imm_use_iterator imm_iter;
- use_operand_p use_p;
- gimple *exit_phi;
- edge latch_e;
- tree loop_arg;
gimple_stmt_iterator si;
gphi *phi = dyn_cast <gphi *> (stmt_info->stmt);
return false;
}
- /* FORNOW: outer loop induction with SLP not supported. */
- if (STMT_SLP_TYPE (stmt_info))
- return false;
-
exit_phi = NULL;
latch_e = loop_latch_edge (loop->inner);
loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
if (slp_node && !nunits.is_constant ())
{
- /* The current SLP code creates the initial value element-by-element. */
+ /* The current SLP code creates the step value element-by-element. */
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"SLP induction not supported for variable-length"
if (!vec_stmt) /* transformation not required. */
{
+ unsigned inside_cost = 0, prologue_cost = 0;
+ if (slp_node)
+ {
+ /* We eventually need to set a vector type on invariant
+ arguments. */
+ unsigned j;
+ slp_tree child;
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child)
+ if (!vect_maybe_update_slp_op_vectype
+ (child, SLP_TREE_VECTYPE (slp_node)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for "
+ "invariants\n");
+ return false;
+ }
+ /* loop cost for vec_loop. */
+ inside_cost
+ = record_stmt_cost (cost_vec,
+ SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
+ vector_stmt, stmt_info, 0, vect_body);
+ /* prologue cost for vec_init (if not nested) and step. */
+ prologue_cost = record_stmt_cost (cost_vec, 1 + !nested_in_vect_loop,
+ scalar_to_vec,
+ stmt_info, 0, vect_prologue);
+ }
+ else /* if (!slp_node) */
+ {
+ /* loop cost for vec_loop. */
+ inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt,
+ stmt_info, 0, vect_body);
+ /* prologue cost for vec_init and vec_step. */
+ prologue_cost = record_stmt_cost (cost_vec, 2, scalar_to_vec,
+ stmt_info, 0, vect_prologue);
+ }
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "vect_model_induction_cost: inside_cost = %d, "
+ "prologue_cost = %d .\n", inside_cost,
+ prologue_cost);
+
STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_induction");
- vect_model_induction_cost (stmt_info, ncopies, cost_vec);
return true;
}
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.\n");
- latch_e = loop_latch_edge (iv_loop);
- loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
-
step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
gcc_assert (step_expr != NULL_TREE);
tree step_vectype = get_same_sized_vectype (TREE_TYPE (step_expr), vectype);
pe = loop_preheader_edge (iv_loop);
- init_expr = PHI_ARG_DEF_FROM_EDGE (phi,
- loop_preheader_edge (iv_loop));
-
- stmts = NULL;
- if (!nested_in_vect_loop)
- {
- /* Convert the initial value to the IV update type. */
- tree new_type = TREE_TYPE (step_expr);
- init_expr = gimple_convert (&stmts, new_type, init_expr);
-
- /* If we are using the loop mask to "peel" for alignment then we need
- to adjust the start value here. */
- tree skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
- if (skip_niters != NULL_TREE)
- {
- if (FLOAT_TYPE_P (vectype))
- skip_niters = gimple_build (&stmts, FLOAT_EXPR, new_type,
- skip_niters);
- else
- skip_niters = gimple_convert (&stmts, new_type, skip_niters);
- tree skip_step = gimple_build (&stmts, MULT_EXPR, new_type,
- skip_niters, step_expr);
- init_expr = gimple_build (&stmts, MINUS_EXPR, new_type,
- init_expr, skip_step);
- }
- }
-
- if (stmts)
- {
- new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
- gcc_assert (!new_bb);
- }
-
/* Find the first insertion point in the BB. */
basic_block bb = gimple_bb (phi);
si = gsi_after_labels (bb);
/* For SLP induction we have to generate several IVs as for example
- with group size 3 we need [i, i, i, i + S] [i + S, i + S, i + 2*S, i + 2*S]
- [i + 2*S, i + 3*S, i + 3*S, i + 3*S]. The step is the same uniform
- [VF*S, VF*S, VF*S, VF*S] for all. */
+ with group size 3 we need
+ [i0, i1, i2, i0 + S0] [i1 + S1, i2 + S2, i0 + 2*S0, i1 + 2*S1]
+ [i2 + 2*S2, i0 + 3*S0, i1 + 3*S1, i2 + 3*S2]. */
if (slp_node)
{
/* Enforced above. */
unsigned int const_nunits = nunits.to_constant ();
- /* Generate [VF*S, VF*S, ... ]. */
- if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
+ /* The initial values are vectorized, but any lanes > group_size
+ need adjustment. */
+ slp_tree init_node
+ = SLP_TREE_CHILDREN (slp_node)[pe->dest_idx];
+
+ /* Gather steps. Since we do not vectorize inductions as
+ cycles we have to reconstruct the step from SCEV data. */
+ unsigned group_size = SLP_TREE_LANES (slp_node);
+ tree *steps = XALLOCAVEC (tree, group_size);
+ tree *inits = XALLOCAVEC (tree, group_size);
+ stmt_vec_info phi_info;
+ FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, phi_info)
{
- expr = build_int_cst (integer_type_node, vf);
- expr = fold_convert (TREE_TYPE (step_expr), expr);
+ steps[i] = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
+ if (!init_node)
+ inits[i] = gimple_phi_arg_def (as_a<gphi *> (phi_info->stmt),
+ pe->dest_idx);
}
- else
- expr = build_int_cst (TREE_TYPE (step_expr), vf);
- new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
- expr, step_expr);
- if (! CONSTANT_CLASS_P (new_name))
- new_name = vect_init_vector (stmt_info, new_name,
- TREE_TYPE (step_expr), NULL);
- new_vec = build_vector_from_val (step_vectype, new_name);
- vec_step = vect_init_vector (stmt_info, new_vec, step_vectype, NULL);
/* Now generate the IVs. */
- unsigned group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
- unsigned elts = const_nunits * nvects;
- unsigned nivs = least_common_multiple (group_size,
- const_nunits) / const_nunits;
- gcc_assert (elts % group_size == 0);
- tree elt = init_expr;
+ gcc_assert ((const_nunits * nvects) % group_size == 0);
+ unsigned nivs;
+ if (nested_in_vect_loop)
+ nivs = nvects;
+ else
+ {
+ /* Compute the number of distinct IVs we need. First reduce
+ group_size if it is a multiple of const_nunits so we get
+ one IV for a group_size of 4 but const_nunits 2. */
+ unsigned group_sizep = group_size;
+ if (group_sizep % const_nunits == 0)
+ group_sizep = group_sizep / const_nunits;
+ nivs = least_common_multiple (group_sizep,
+ const_nunits) / const_nunits;
+ }
+ tree stept = TREE_TYPE (step_vectype);
+ tree lupdate_mul = NULL_TREE;
+ if (!nested_in_vect_loop)
+ {
+ /* The number of iterations covered in one vector iteration. */
+ unsigned lup_mul = (nvects * const_nunits) / group_size;
+ lupdate_mul
+ = build_vector_from_val (step_vectype,
+ SCALAR_FLOAT_TYPE_P (stept)
+ ? build_real_from_wide (stept, lup_mul,
+ UNSIGNED)
+ : build_int_cstu (stept, lup_mul));
+ }
+ tree peel_mul = NULL_TREE;
+ gimple_seq init_stmts = NULL;
+ if (LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo))
+ {
+ if (SCALAR_FLOAT_TYPE_P (stept))
+ peel_mul = gimple_build (&init_stmts, FLOAT_EXPR, stept,
+ LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
+ else
+ peel_mul = gimple_convert (&init_stmts, stept,
+ LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
+ peel_mul = gimple_build_vector_from_val (&init_stmts,
+ step_vectype, peel_mul);
+ }
unsigned ivn;
+ auto_vec<tree> vec_steps;
for (ivn = 0; ivn < nivs; ++ivn)
{
- tree_vector_builder elts (step_vectype, const_nunits, 1);
- stmts = NULL;
+ tree_vector_builder step_elts (step_vectype, const_nunits, 1);
+ tree_vector_builder init_elts (vectype, const_nunits, 1);
+ tree_vector_builder mul_elts (step_vectype, const_nunits, 1);
for (unsigned eltn = 0; eltn < const_nunits; ++eltn)
{
- if (ivn*const_nunits + eltn >= group_size
- && (ivn * const_nunits + eltn) % group_size == 0)
- elt = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (elt),
- elt, step_expr);
- elts.quick_push (elt);
- }
- vec_init = gimple_build_vector (&stmts, &elts);
- vec_init = gimple_convert (&stmts, vectype, vec_init);
- if (stmts)
- {
- new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
- gcc_assert (!new_bb);
+ /* The scalar steps of the IVs. */
+ tree elt = steps[(ivn*const_nunits + eltn) % group_size];
+ elt = gimple_convert (&init_stmts, TREE_TYPE (step_vectype), elt);
+ step_elts.quick_push (elt);
+ if (!init_node)
+ {
+ /* The scalar inits of the IVs if not vectorized. */
+ elt = inits[(ivn*const_nunits + eltn) % group_size];
+ if (!useless_type_conversion_p (TREE_TYPE (vectype),
+ TREE_TYPE (elt)))
+ elt = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
+ TREE_TYPE (vectype), elt);
+ init_elts.quick_push (elt);
+ }
+ /* The number of steps to add to the initial values. */
+ unsigned mul_elt = (ivn*const_nunits + eltn) / group_size;
+ mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept)
+ ? build_real_from_wide (stept,
+ mul_elt, UNSIGNED)
+ : build_int_cstu (stept, mul_elt));
}
+ vec_step = gimple_build_vector (&init_stmts, &step_elts);
+ vec_steps.safe_push (vec_step);
+ tree step_mul = gimple_build_vector (&init_stmts, &mul_elts);
+ if (peel_mul)
+ step_mul = gimple_build (&init_stmts, PLUS_EXPR, step_vectype,
+ step_mul, peel_mul);
+ if (!init_node)
+ vec_init = gimple_build_vector (&init_stmts, &init_elts);
/* Create the induction-phi that defines the induction-operand. */
- vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
+ vec_dest = vect_get_new_vect_var (vectype, vect_simple_var,
+ "vec_iv_");
induction_phi = create_phi_node (vec_dest, iv_loop->header);
- stmt_vec_info induction_phi_info
- = loop_vinfo->add_stmt (induction_phi);
induc_def = PHI_RESULT (induction_phi);
/* Create the iv update inside the loop */
+ tree up = vec_step;
+ if (lupdate_mul)
+ up = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
+ vec_step, lupdate_mul);
gimple_seq stmts = NULL;
vec_def = gimple_convert (&stmts, step_vectype, induc_def);
vec_def = gimple_build (&stmts,
- PLUS_EXPR, step_vectype, vec_def, vec_step);
+ PLUS_EXPR, step_vectype, vec_def, up);
vec_def = gimple_convert (&stmts, vectype, vec_def);
- loop_vinfo->add_stmt (SSA_NAME_DEF_STMT (vec_def));
gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
+ add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
+ UNKNOWN_LOCATION);
+
+ if (init_node)
+ vec_init = vect_get_slp_vect_def (init_node, ivn);
+ if (!nested_in_vect_loop
+ && !integer_zerop (step_mul))
+ {
+ vec_def = gimple_convert (&init_stmts, step_vectype, vec_init);
+ up = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
+ vec_step, step_mul);
+ vec_def = gimple_build (&init_stmts, PLUS_EXPR, step_vectype,
+ vec_def, up);
+ vec_init = gimple_convert (&init_stmts, vectype, vec_def);
+ }
/* Set the arguments of the phi node: */
add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
- add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
- UNKNOWN_LOCATION);
- SLP_TREE_VEC_STMTS (slp_node).quick_push (induction_phi_info);
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (induction_phi);
+ }
+ if (!nested_in_vect_loop)
+ {
+ /* Fill up to the number of vectors we need for the whole group. */
+ nivs = least_common_multiple (group_size,
+ const_nunits) / const_nunits;
+ for (; ivn < nivs; ++ivn)
+ {
+ SLP_TREE_VEC_STMTS (slp_node)
+ .quick_push (SLP_TREE_VEC_STMTS (slp_node)[0]);
+ vec_steps.safe_push (vec_steps[0]);
+ }
}
- /* Re-use IVs when we can. */
+ /* Re-use IVs when we can. We are generating further vector
+ stmts by adding VF' * stride to the IVs generated above. */
if (ivn < nvects)
{
unsigned vfp
= least_common_multiple (group_size, const_nunits) / group_size;
- /* Generate [VF'*S, VF'*S, ... ]. */
- if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
- {
- expr = build_int_cst (integer_type_node, vfp);
- expr = fold_convert (TREE_TYPE (step_expr), expr);
- }
- else
- expr = build_int_cst (TREE_TYPE (step_expr), vfp);
- new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
- expr, step_expr);
- if (! CONSTANT_CLASS_P (new_name))
- new_name = vect_init_vector (stmt_info, new_name,
- TREE_TYPE (step_expr), NULL);
- new_vec = build_vector_from_val (step_vectype, new_name);
- vec_step = vect_init_vector (stmt_info, new_vec, step_vectype, NULL);
+ tree lupdate_mul
+ = build_vector_from_val (step_vectype,
+ SCALAR_FLOAT_TYPE_P (stept)
+ ? build_real_from_wide (stept,
+ vfp, UNSIGNED)
+ : build_int_cstu (stept, vfp));
for (; ivn < nvects; ++ivn)
{
- gimple *iv = SLP_TREE_VEC_STMTS (slp_node)[ivn - nivs]->stmt;
- tree def;
- if (gimple_code (iv) == GIMPLE_PHI)
- def = gimple_phi_result (iv);
- else
- def = gimple_assign_lhs (iv);
+ gimple *iv = SLP_TREE_VEC_STMTS (slp_node)[ivn - nivs];
+ tree def = gimple_get_lhs (iv);
+ if (ivn < 2*nivs)
+ vec_steps[ivn - nivs]
+ = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
+ vec_steps[ivn - nivs], lupdate_mul);
gimple_seq stmts = NULL;
def = gimple_convert (&stmts, step_vectype, def);
- def = gimple_build (&stmts,
- PLUS_EXPR, step_vectype, def, vec_step);
+ def = gimple_build (&stmts, PLUS_EXPR, step_vectype,
+ def, vec_steps[ivn % nivs]);
def = gimple_convert (&stmts, vectype, def);
if (gimple_code (iv) == GIMPLE_PHI)
gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
gimple_stmt_iterator tgsi = gsi_for_stmt (iv);
gsi_insert_seq_after (&tgsi, stmts, GSI_CONTINUE_LINKING);
}
- SLP_TREE_VEC_STMTS (slp_node).quick_push
- (loop_vinfo->add_stmt (SSA_NAME_DEF_STMT (def)));
+ SLP_TREE_VEC_STMTS (slp_node)
+ .quick_push (SSA_NAME_DEF_STMT (def));
}
}
+ new_bb = gsi_insert_seq_on_edge_immediate (pe, init_stmts);
+ gcc_assert (!new_bb);
+
return true;
}
+ init_expr = PHI_ARG_DEF_FROM_EDGE (phi,
+ loop_preheader_edge (iv_loop));
+
+ gimple_seq stmts = NULL;
+ if (!nested_in_vect_loop)
+ {
+ /* Convert the initial value to the IV update type. */
+ tree new_type = TREE_TYPE (step_expr);
+ init_expr = gimple_convert (&stmts, new_type, init_expr);
+
+ /* If we are using the loop mask to "peel" for alignment then we need
+ to adjust the start value here. */
+ tree skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
+ if (skip_niters != NULL_TREE)
+ {
+ if (FLOAT_TYPE_P (vectype))
+ skip_niters = gimple_build (&stmts, FLOAT_EXPR, new_type,
+ skip_niters);
+ else
+ skip_niters = gimple_convert (&stmts, new_type, skip_niters);
+ tree skip_step = gimple_build (&stmts, MULT_EXPR, new_type,
+ skip_niters, step_expr);
+ init_expr = gimple_build (&stmts, MINUS_EXPR, new_type,
+ init_expr, skip_step);
+ }
+ }
+
+ if (stmts)
+ {
+ new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
+ gcc_assert (!new_bb);
+ }
+
/* Create the vector that holds the initial_value of the induction. */
if (nested_in_vect_loop)
{
/* iv_loop is nested in the loop to be vectorized. init_expr had already
been created during vectorization of previous stmts. We obtain it
from the STMT_VINFO_VEC_STMT of the defining stmt. */
- vec_init = vect_get_vec_def_for_operand (init_expr, stmt_info);
+ auto_vec<tree> vec_inits;
+ vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, 1,
+ init_expr, &vec_inits);
+ vec_init = vec_inits[0];
/* If the initial value is not of proper type, convert it. */
if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init)))
{
new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop),
new_stmt);
gcc_assert (!new_bb);
- loop_vinfo->add_stmt (new_stmt);
}
}
else
gcc_assert (CONSTANT_CLASS_P (new_name)
|| TREE_CODE (new_name) == SSA_NAME);
new_vec = build_vector_from_val (step_vectype, t);
- vec_step = vect_init_vector (stmt_info, new_vec, step_vectype, NULL);
+ vec_step = vect_init_vector (loop_vinfo, stmt_info,
+ new_vec, step_vectype, NULL);
/* Create the following def-use cycle:
/* Create the induction-phi that defines the induction-operand. */
vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
induction_phi = create_phi_node (vec_dest, iv_loop->header);
- stmt_vec_info induction_phi_info = loop_vinfo->add_stmt (induction_phi);
induc_def = PHI_RESULT (induction_phi);
/* Create the iv update inside the loop */
vec_def = gimple_convert (&stmts, vectype, vec_def);
gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
new_stmt = SSA_NAME_DEF_STMT (vec_def);
- stmt_vec_info new_stmt_info = loop_vinfo->add_stmt (new_stmt);
/* Set the arguments of the phi node: */
add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
UNKNOWN_LOCATION);
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = induction_phi_info;
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (induction_phi);
+ *vec_stmt = induction_phi;
/* In case that vectorization factor (VF) is bigger than the number
of elements that we can fit in a vectype (nunits), we have to generate
if (ncopies > 1)
{
gimple_seq seq = NULL;
- stmt_vec_info prev_stmt_vinfo;
/* FORNOW. This restriction should be relaxed. */
gcc_assert (!nested_in_vect_loop);
gcc_assert (CONSTANT_CLASS_P (new_name)
|| TREE_CODE (new_name) == SSA_NAME);
new_vec = build_vector_from_val (step_vectype, t);
- vec_step = vect_init_vector (stmt_info, new_vec, step_vectype, NULL);
+ vec_step = vect_init_vector (loop_vinfo, stmt_info,
+ new_vec, step_vectype, NULL);
vec_def = induc_def;
- prev_stmt_vinfo = induction_phi_info;
for (i = 1; i < ncopies; i++)
{
/* vec_i = vec_prev + vec_step */
gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
new_stmt = SSA_NAME_DEF_STMT (vec_def);
- new_stmt_info = loop_vinfo->add_stmt (new_stmt);
- STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt_info;
- prev_stmt_vinfo = new_stmt_info;
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
}
}
- if (nested_in_vect_loop)
- {
- /* Find the loop-closed exit-phi of the induction, and record
- the final vector of induction results: */
- exit_phi = NULL;
- FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
- {
- gimple *use_stmt = USE_STMT (use_p);
- if (is_gimple_debug (use_stmt))
- continue;
-
- if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (use_stmt)))
- {
- exit_phi = use_stmt;
- break;
- }
- }
- if (exit_phi)
- {
- stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (exit_phi);
- /* FORNOW. Currently not supporting the case that an inner-loop induction
- is not used in the outer-loop (i.e. only outside the outer-loop). */
- gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
- && !STMT_VINFO_LIVE_P (stmt_vinfo));
-
- STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt_info;
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "vector of inductions after inner-loop:%G",
- new_stmt);
- }
- }
-
-
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"transform induction: created def-use cycle: %G%G",
it can be supported. */
bool
-vectorizable_live_operation (stmt_vec_info stmt_info,
+vectorizable_live_operation (vec_info *vinfo,
+ stmt_vec_info stmt_info,
gimple_stmt_iterator *gsi,
slp_tree slp_node, slp_instance slp_node_instance,
int slp_index, bool vec_stmt_p,
- stmt_vector_for_cost *)
+ stmt_vector_for_cost *cost_vec)
{
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
imm_use_iterator imm_iter;
tree lhs, lhs_type, bitsize, vec_bitsize;
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ tree vectype = (slp_node
+ ? SLP_TREE_VECTYPE (slp_node)
+ : STMT_VINFO_VECTYPE (stmt_info));
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
int ncopies;
gimple *use_stmt;
all involved stmts together. */
else if (slp_index != 0)
return true;
+ else
+ /* For SLP reductions the meta-info is attached to
+ the representative. */
+ stmt_info = SLP_TREE_REPRESENTATIVE (slp_node);
}
- stmt_vec_info reduc_info = info_for_reduction (stmt_info);
+ stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info);
gcc_assert (reduc_info->is_reduc_info);
if (STMT_VINFO_REDUC_TYPE (reduc_info) == FOLD_LEFT_REDUCTION
|| STMT_VINFO_REDUC_TYPE (reduc_info) == EXTRACT_LAST_REDUCTION)
return true;
- vect_create_epilog_for_reduction (stmt_info, slp_node,
+ vect_create_epilog_for_reduction (loop_vinfo, stmt_info, slp_node,
slp_node_instance);
return true;
}
- /* FORNOW. CHECKME. */
- if (nested_in_vect_loop_p (loop, stmt_info))
- return false;
-
/* If STMT is not relevant and it is a simple assignment and its inputs are
invariant then it can remain in place, unvectorized. The original last
scalar value that it computes will be used. */
{
gcc_assert (slp_index >= 0);
- int num_scalar = SLP_TREE_SCALAR_STMTS (slp_node).length ();
- int num_vec = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
-
/* Get the last occurrence of the scalar index from the concatenation of
all the slp vectors. Calculate which slp vector it is and the index
within. */
+ int num_scalar = SLP_TREE_LANES (slp_node);
+ int num_vec = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
poly_uint64 pos = (num_vec * nunits) - num_scalar + slp_index;
/* Calculate which vector contains the result, and which lane of
if (!vec_stmt_p)
{
/* No transformation required. */
- if (LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
+ if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
{
if (!direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype,
OPTIMIZE_FOR_SPEED))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't use a fully-masked loop because "
- "the target doesn't support extract last "
- "reduction.\n");
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ "can't operate on partial vectors "
+ "because the target doesn't support extract "
+ "last reduction.\n");
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
else if (slp_node)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't use a fully-masked loop because an "
- "SLP statement is live after the loop.\n");
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ "can't operate on partial vectors "
+ "because an SLP statement is live after "
+ "the loop.\n");
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
else if (ncopies > 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't use a fully-masked loop because"
- " ncopies is greater than 1.\n");
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ "can't operate on partial vectors "
+ "because ncopies is greater than 1.\n");
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
else
{
1, vectype, NULL);
}
}
+ /* ??? Enable for loop costing as well. */
+ if (!loop_vinfo)
+ record_stmt_cost (cost_vec, 1, vec_to_scalar, stmt_info, NULL_TREE,
+ 0, vect_epilogue);
return true;
}
/* Use the lhs of the original scalar statement. */
gimple *stmt = vect_orig_stmt (stmt_info)->stmt;
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location, "extracting lane for live "
+ "stmt %G", stmt);
- lhs = (is_a <gphi *> (stmt)) ? gimple_phi_result (stmt)
- : gimple_get_lhs (stmt);
+ lhs = gimple_get_lhs (stmt);
lhs_type = TREE_TYPE (lhs);
- bitsize = (VECTOR_BOOLEAN_TYPE_P (vectype)
- ? bitsize_int (TYPE_PRECISION (TREE_TYPE (vectype)))
- : TYPE_SIZE (TREE_TYPE (vectype)));
+ bitsize = vector_element_bits_tree (vectype);
vec_bitsize = TYPE_SIZE (vectype);
/* Get the vectorized lhs of STMT and the lane to use (counted in bits). */
tree vec_lhs, bitstart;
+ gimple *vec_stmt;
if (slp_node)
{
- gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
+ gcc_assert (!loop_vinfo || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
/* Get the correct slp vectorized stmt. */
- gimple *vec_stmt = SLP_TREE_VEC_STMTS (slp_node)[vec_entry]->stmt;
- if (gphi *phi = dyn_cast <gphi *> (vec_stmt))
- vec_lhs = gimple_phi_result (phi);
- else
- vec_lhs = gimple_get_lhs (vec_stmt);
+ vec_stmt = SLP_TREE_VEC_STMTS (slp_node)[vec_entry];
+ vec_lhs = gimple_get_lhs (vec_stmt);
/* Get entry to use. */
bitstart = bitsize_int (vec_index);
}
else
{
- enum vect_def_type dt = STMT_VINFO_DEF_TYPE (stmt_info);
- vec_lhs = vect_get_vec_def_for_operand_1 (stmt_info, dt);
- gcc_checking_assert (ncopies == 1
- || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
-
/* For multiple copies, get the last copy. */
- for (int i = 1; i < ncopies; ++i)
- vec_lhs = vect_get_vec_def_for_stmt_copy (loop_vinfo, vec_lhs);
+ vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info).last ();
+ vec_lhs = gimple_get_lhs (vec_stmt);
/* Get the last lane in the vector. */
bitstart = int_const_binop (MINUS_EXPR, vec_bitsize, bitsize);
}
- /* Ensure the VEC_LHS for lane extraction stmts satisfy loop-closed PHI
- requirement, insert one phi node for it. It looks like:
- loop;
- BB:
- # lhs' = PHI <lhs>
- ==>
- loop;
- BB:
- # vec_lhs' = PHI <vec_lhs>
- new_tree = lane_extract <vec_lhs', ...>;
- lhs' = new_tree; */
+ if (loop_vinfo)
+ {
+ /* Ensure the VEC_LHS for lane extraction stmts satisfy loop-closed PHI
+ requirement, insert one phi node for it. It looks like:
+ loop;
+ BB:
+ # lhs' = PHI <lhs>
+ ==>
+ loop;
+ BB:
+ # vec_lhs' = PHI <vec_lhs>
+ new_tree = lane_extract <vec_lhs', ...>;
+ lhs' = new_tree; */
+
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ basic_block exit_bb = single_exit (loop)->dest;
+ gcc_assert (single_pred_p (exit_bb));
+
+ tree vec_lhs_phi = copy_ssa_name (vec_lhs);
+ gimple *phi = create_phi_node (vec_lhs_phi, exit_bb);
+ SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, vec_lhs);
+
+ gimple_seq stmts = NULL;
+ tree new_tree;
+ if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ {
+ /* Emit:
- basic_block exit_bb = single_exit (loop)->dest;
- gcc_assert (single_pred_p (exit_bb));
+ SCALAR_RES = EXTRACT_LAST <VEC_LHS, MASK>
- tree vec_lhs_phi = copy_ssa_name (vec_lhs);
- gimple *phi = create_phi_node (vec_lhs_phi, exit_bb);
- SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, vec_lhs);
+ where VEC_LHS is the vectorized live-out result and MASK is
+ the loop mask for the final iteration. */
+ gcc_assert (ncopies == 1 && !slp_node);
+ tree scalar_type = TREE_TYPE (STMT_VINFO_VECTYPE (stmt_info));
+ tree mask = vect_get_loop_mask (gsi, &LOOP_VINFO_MASKS (loop_vinfo),
+ 1, vectype, 0);
+ tree scalar_res = gimple_build (&stmts, CFN_EXTRACT_LAST, scalar_type,
+ mask, vec_lhs_phi);
- gimple_seq stmts = NULL;
- tree new_tree;
- if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
- {
- /* Emit:
+ /* Convert the extracted vector element to the scalar type. */
+ new_tree = gimple_convert (&stmts, lhs_type, scalar_res);
+ }
+ else
+ {
+ tree bftype = TREE_TYPE (vectype);
+ if (VECTOR_BOOLEAN_TYPE_P (vectype))
+ bftype = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 1);
+ new_tree = build3 (BIT_FIELD_REF, bftype,
+ vec_lhs_phi, bitsize, bitstart);
+ new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree),
+ &stmts, true, NULL_TREE);
+ }
- SCALAR_RES = EXTRACT_LAST <VEC_LHS, MASK>
+ if (stmts)
+ {
+ gimple_stmt_iterator exit_gsi = gsi_after_labels (exit_bb);
+ gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
- where VEC_LHS is the vectorized live-out result and MASK is
- the loop mask for the final iteration. */
- gcc_assert (ncopies == 1 && !slp_node);
- tree scalar_type = TREE_TYPE (STMT_VINFO_VECTYPE (stmt_info));
- tree mask = vect_get_loop_mask (gsi, &LOOP_VINFO_MASKS (loop_vinfo), 1,
- vectype, 0);
- tree scalar_res = gimple_build (&stmts, CFN_EXTRACT_LAST, scalar_type,
- mask, vec_lhs_phi);
+ /* Remove existing phi from lhs and create one copy from new_tree. */
+ tree lhs_phi = NULL_TREE;
+ gimple_stmt_iterator gsi;
+ for (gsi = gsi_start_phis (exit_bb);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple *phi = gsi_stmt (gsi);
+ if ((gimple_phi_arg_def (phi, 0) == lhs))
+ {
+ remove_phi_node (&gsi, false);
+ lhs_phi = gimple_phi_result (phi);
+ gimple *copy = gimple_build_assign (lhs_phi, new_tree);
+ gsi_insert_before (&exit_gsi, copy, GSI_SAME_STMT);
+ break;
+ }
+ }
+ }
- /* Convert the extracted vector element to the required scalar type. */
- new_tree = gimple_convert (&stmts, lhs_type, scalar_res);
+ /* Replace use of lhs with newly computed result. If the use stmt is a
+ single arg PHI, just replace all uses of PHI result. It's necessary
+ because lcssa PHI defining lhs may be before newly inserted stmt. */
+ use_operand_p use_p;
+ FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
+ if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
+ && !is_gimple_debug (use_stmt))
+ {
+ if (gimple_code (use_stmt) == GIMPLE_PHI
+ && gimple_phi_num_args (use_stmt) == 1)
+ {
+ replace_uses_by (gimple_phi_result (use_stmt), new_tree);
+ }
+ else
+ {
+ FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
+ SET_USE (use_p, new_tree);
+ }
+ update_stmt (use_stmt);
+ }
}
else
{
+ /* For basic-block vectorization simply insert the lane-extraction. */
tree bftype = TREE_TYPE (vectype);
if (VECTOR_BOOLEAN_TYPE_P (vectype))
bftype = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 1);
- new_tree = build3 (BIT_FIELD_REF, bftype, vec_lhs_phi, bitsize, bitstart);
+ tree new_tree = build3 (BIT_FIELD_REF, bftype,
+ vec_lhs, bitsize, bitstart);
+ gimple_seq stmts = NULL;
new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree),
&stmts, true, NULL_TREE);
- }
-
- if (stmts)
- {
- gimple_stmt_iterator exit_gsi = gsi_after_labels (exit_bb);
- gsi_insert_before (&exit_gsi, stmts, GSI_CONTINUE_LINKING);
-
- /* Remove existing phi from lhs and create one copy from new_tree. */
- tree lhs_phi = NULL_TREE;
- gimple_stmt_iterator gsi;
- for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
- {
- gimple *phi = gsi_stmt (gsi);
- if ((gimple_phi_arg_def (phi, 0) == lhs))
- {
- remove_phi_node (&gsi, false);
- lhs_phi = gimple_phi_result (phi);
- gimple *copy = gimple_build_assign (lhs_phi, new_tree);
- gsi_insert_after (&exit_gsi, copy, GSI_CONTINUE_LINKING);
- break;
- }
- }
- }
-
- /* Replace use of lhs with newly computed result. If the use stmt is a
- single arg PHI, just replace all uses of PHI result. It's necessary
- because lcssa PHI defining lhs may be before newly inserted stmt. */
- use_operand_p use_p;
- FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
- if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
- && !is_gimple_debug (use_stmt))
- {
- if (gimple_code (use_stmt) == GIMPLE_PHI
- && gimple_phi_num_args (use_stmt) == 1)
+ if (TREE_CODE (new_tree) == SSA_NAME
+ && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
+ SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_tree) = 1;
+ if (is_a <gphi *> (vec_stmt))
{
- replace_uses_by (gimple_phi_result (use_stmt), new_tree);
+ gimple_stmt_iterator si = gsi_after_labels (gimple_bb (vec_stmt));
+ gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
}
else
{
- FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
- SET_USE (use_p, new_tree);
+ gimple_stmt_iterator si = gsi_for_stmt (vec_stmt);
+ gsi_insert_seq_after (&si, stmts, GSI_SAME_STMT);
}
- update_stmt (use_stmt);
+
+ /* Replace use of lhs with newly computed result. If the use stmt is a
+ single arg PHI, just replace all uses of PHI result. It's necessary
+ because lcssa PHI defining lhs may be before newly inserted stmt. */
+ use_operand_p use_p;
+ stmt_vec_info use_stmt_info;
+ FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
+ if (!is_gimple_debug (use_stmt)
+ && (!(use_stmt_info = vinfo->lookup_stmt (use_stmt))
+ || !PURE_SLP_STMT (vect_stmt_to_vectorize (use_stmt_info))))
+ {
+ /* ??? This can happen when the live lane ends up being
+ used in a vector construction code-generated by an
+ external SLP node (and code-generation for that already
+ happened). See gcc.dg/vect/bb-slp-47.c.
+ Doing this is what would happen if that vector CTOR
+ were not code-generated yet so it is not too bad.
+ ??? In fact we'd likely want to avoid this situation
+ in the first place. */
+ if (TREE_CODE (new_tree) == SSA_NAME
+ && !SSA_NAME_IS_DEFAULT_DEF (new_tree)
+ && gimple_code (use_stmt) != GIMPLE_PHI
+ && !vect_stmt_dominates_stmt_p (SSA_NAME_DEF_STMT (new_tree),
+ use_stmt))
+ {
+ enum tree_code code = gimple_assign_rhs_code (use_stmt);
+ gcc_assert (code == CONSTRUCTOR
+ || code == VIEW_CONVERT_EXPR
+ || CONVERT_EXPR_CODE_P (code));
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Using original scalar computation for "
+ "live lane because use preceeds vector "
+ "def\n");
+ continue;
+ }
+ /* ??? It can also happen that we end up pulling a def into
+ a loop where replacing out-of-loop uses would require
+ a new LC SSA PHI node. Retain the original scalar in
+ those cases as well. PR98064. */
+ if (TREE_CODE (new_tree) == SSA_NAME
+ && !SSA_NAME_IS_DEFAULT_DEF (new_tree)
+ && (gimple_bb (use_stmt)->loop_father
+ != gimple_bb (vec_stmt)->loop_father)
+ && !flow_loop_nested_p (gimple_bb (vec_stmt)->loop_father,
+ gimple_bb (use_stmt)->loop_father))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Using original scalar computation for "
+ "live lane because there is an out-of-loop "
+ "definition for it\n");
+ continue;
+ }
+ FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
+ SET_USE (use_p, new_tree);
+ update_stmt (use_stmt);
+ }
}
return true;
{
gcc_assert (nvectors != 0);
if (masks->length () < nvectors)
- masks->safe_grow_cleared (nvectors);
- rgroup_masks *rgm = &(*masks)[nvectors - 1];
+ masks->safe_grow_cleared (nvectors, true);
+ rgroup_controls *rgm = &(*masks)[nvectors - 1];
/* The number of scalars per iteration and the number of vectors are
both compile-time constants. */
unsigned int nscalars_per_iter
if (rgm->max_nscalars_per_iter < nscalars_per_iter)
{
rgm->max_nscalars_per_iter = nscalars_per_iter;
- rgm->mask_type = truth_type_for (vectype);
+ rgm->type = truth_type_for (vectype);
+ rgm->factor = 1;
}
}
vect_get_loop_mask (gimple_stmt_iterator *gsi, vec_loop_masks *masks,
unsigned int nvectors, tree vectype, unsigned int index)
{
- rgroup_masks *rgm = &(*masks)[nvectors - 1];
- tree mask_type = rgm->mask_type;
+ rgroup_controls *rgm = &(*masks)[nvectors - 1];
+ tree mask_type = rgm->type;
/* Populate the rgroup's mask array, if this is the first time we've
used it. */
- if (rgm->masks.is_empty ())
+ if (rgm->controls.is_empty ())
{
- rgm->masks.safe_grow_cleared (nvectors);
+ rgm->controls.safe_grow_cleared (nvectors, true);
for (unsigned int i = 0; i < nvectors; ++i)
{
tree mask = make_temp_ssa_name (mask_type, NULL, "loop_mask");
/* Provide a dummy definition until the real one is available. */
SSA_NAME_DEF_STMT (mask) = gimple_build_nop ();
- rgm->masks[i] = mask;
+ rgm->controls[i] = mask;
}
}
- tree mask = rgm->masks[index];
+ tree mask = rgm->controls[index];
if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
TYPE_VECTOR_SUBPARTS (vectype)))
{
return mask;
}
+/* Record that LOOP_VINFO would need LENS to contain a sequence of NVECTORS
+ lengths for controlling an operation on VECTYPE. The operation splits
+ each element of VECTYPE into FACTOR separate subelements, measuring the
+ length as a number of these subelements. */
+
+void
+vect_record_loop_len (loop_vec_info loop_vinfo, vec_loop_lens *lens,
+ unsigned int nvectors, tree vectype, unsigned int factor)
+{
+ gcc_assert (nvectors != 0);
+ if (lens->length () < nvectors)
+ lens->safe_grow_cleared (nvectors, true);
+ rgroup_controls *rgl = &(*lens)[nvectors - 1];
+
+ /* The number of scalars per iteration, scalar occupied bytes and
+ the number of vectors are both compile-time constants. */
+ unsigned int nscalars_per_iter
+ = exact_div (nvectors * TYPE_VECTOR_SUBPARTS (vectype),
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant ();
+
+ if (rgl->max_nscalars_per_iter < nscalars_per_iter)
+ {
+ /* For now, we only support cases in which all loads and stores fall back
+ to VnQI or none do. */
+ gcc_assert (!rgl->max_nscalars_per_iter
+ || (rgl->factor == 1 && factor == 1)
+ || (rgl->max_nscalars_per_iter * rgl->factor
+ == nscalars_per_iter * factor));
+ rgl->max_nscalars_per_iter = nscalars_per_iter;
+ rgl->type = vectype;
+ rgl->factor = factor;
+ }
+}
+
+/* Given a complete set of length LENS, extract length number INDEX for an
+ rgroup that operates on NVECTORS vectors, where 0 <= INDEX < NVECTORS. */
+
+tree
+vect_get_loop_len (loop_vec_info loop_vinfo, vec_loop_lens *lens,
+ unsigned int nvectors, unsigned int index)
+{
+ rgroup_controls *rgl = &(*lens)[nvectors - 1];
+
+ /* Populate the rgroup's len array, if this is the first time we've
+ used it. */
+ if (rgl->controls.is_empty ())
+ {
+ rgl->controls.safe_grow_cleared (nvectors, true);
+ for (unsigned int i = 0; i < nvectors; ++i)
+ {
+ tree len_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
+ gcc_assert (len_type != NULL_TREE);
+ tree len = make_temp_ssa_name (len_type, NULL, "loop_len");
+
+ /* Provide a dummy definition until the real one is available. */
+ SSA_NAME_DEF_STMT (len) = gimple_build_nop ();
+ rgl->controls[i] = len;
+ }
+ }
+
+ return rgl->controls[index];
+}
+
/* Scale profiling counters by estimation for LOOP which is vectorized
by factor VF. */
scale_bbs_frequencies (&loop->latch, 1, exit_l->probability / prob);
}
+/* For a vectorized stmt DEF_STMT_INFO adjust all vectorized PHI
+ latch edge values originally defined by it. */
+
+static void
+maybe_set_vectorized_backedge_value (loop_vec_info loop_vinfo,
+ stmt_vec_info def_stmt_info)
+{
+ tree def = gimple_get_lhs (vect_orig_stmt (def_stmt_info)->stmt);
+ if (!def || TREE_CODE (def) != SSA_NAME)
+ return;
+ stmt_vec_info phi_info;
+ imm_use_iterator iter;
+ use_operand_p use_p;
+ FOR_EACH_IMM_USE_FAST (use_p, iter, def)
+ if (gphi *phi = dyn_cast <gphi *> (USE_STMT (use_p)))
+ if (gimple_bb (phi)->loop_father->header == gimple_bb (phi)
+ && (phi_info = loop_vinfo->lookup_stmt (phi))
+ && VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (phi_info))
+ && STMT_VINFO_REDUC_TYPE (phi_info) != FOLD_LEFT_REDUCTION
+ && STMT_VINFO_REDUC_TYPE (phi_info) != EXTRACT_LAST_REDUCTION)
+ {
+ loop_p loop = gimple_bb (phi)->loop_father;
+ edge e = loop_latch_edge (loop);
+ if (PHI_ARG_DEF_FROM_EDGE (phi, e) == def)
+ {
+ vec<gimple *> &phi_defs = STMT_VINFO_VEC_STMTS (phi_info);
+ vec<gimple *> &latch_defs = STMT_VINFO_VEC_STMTS (def_stmt_info);
+ gcc_assert (phi_defs.length () == latch_defs.length ());
+ for (unsigned i = 0; i < phi_defs.length (); ++i)
+ add_phi_arg (as_a <gphi *> (phi_defs[i]),
+ gimple_get_lhs (latch_defs[i]), e,
+ gimple_phi_arg_location (phi, e->dest_idx));
+ }
+ }
+}
+
/* Vectorize STMT_INFO if relevant, inserting any new instructions before GSI.
When vectorizing STMT_INFO as a store, set *SEEN_STORE to its
stmt_vec_info. */
-static void
+static bool
vect_transform_loop_stmt (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
gimple_stmt_iterator *gsi, stmt_vec_info *seen_store)
{
if (!STMT_VINFO_RELEVANT_P (stmt_info)
&& !STMT_VINFO_LIVE_P (stmt_info))
- return;
+ return false;
if (STMT_VINFO_VECTYPE (stmt_info))
{
/* Pure SLP statements have already been vectorized. We still need
to apply loop vectorization to hybrid SLP statements. */
if (PURE_SLP_STMT (stmt_info))
- return;
+ return false;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
- if (vect_transform_stmt (stmt_info, gsi, NULL, NULL))
+ if (vect_transform_stmt (loop_vinfo, stmt_info, gsi, NULL, NULL))
*seen_store = stmt_info;
+
+ return true;
}
/* Helper function to pass to simplify_replace_tree to enable replacing tree's
basic_block *epilogue_bbs = get_loop_body (epilogue);
unsigned i;
+ free (LOOP_VINFO_BBS (epilogue_vinfo));
LOOP_VINFO_BBS (epilogue_vinfo) = epilogue_bbs;
/* Advance data_reference's with the number of iterations of the previous
!gsi_end_p (epilogue_gsi); gsi_next (&epilogue_gsi))
{
new_stmt = gsi_stmt (epilogue_gsi);
+ if (is_gimple_debug (new_stmt))
+ continue;
gcc_assert (gimple_uid (new_stmt) > 0);
stmt_vinfo
}
struct data_reference *dr;
- vec<data_reference_p> datarefs = epilogue_vinfo->shared->datarefs;
+ vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (epilogue_vinfo);
FOR_EACH_VEC_ELT (datarefs, i, dr)
{
orig_stmt = DR_STMT (dr);
if (niters_vector == NULL_TREE)
{
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
- && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+ && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
&& known_eq (lowest_vf, vf))
{
niters_vector
LOOP_VINFO_INT_NITERS (loop_vinfo) / lowest_vf);
step_vector = build_one_cst (TREE_TYPE (niters));
}
- else
+ else if (vect_use_loop_mask_for_alignment_p (loop_vinfo))
vect_gen_vector_loop_niters (loop_vinfo, niters, &niters_vector,
&step_vector, niters_no_overflow);
+ else
+ /* vect_do_peeling subtracted the number of peeled prologue
+ iterations from LOOP_VINFO_NITERS. */
+ vect_gen_vector_loop_niters (loop_vinfo, LOOP_VINFO_NITERS (loop_vinfo),
+ &niters_vector, &step_vector,
+ niters_no_overflow);
}
/* 1) Make sure the loop header has exactly two entries
split_edge (loop_preheader_edge (loop));
- if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
- && vect_use_loop_mask_for_alignment_p (loop_vinfo))
+ if (vect_use_loop_mask_for_alignment_p (loop_vinfo))
/* This will deal with any possible peeling. */
vect_prepare_for_masked_peels (loop_vinfo);
if (!loop_vinfo->slp_instances.is_empty ())
{
DUMP_VECT_SCOPE ("scheduling SLP instances");
- vect_schedule_slp (loop_vinfo);
+ vect_schedule_slp (loop_vinfo, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
}
/* FORNOW: the vectorizer supports only loops which body consist
for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
gsi_next (&si))
- {
+ {
gphi *phi = si.phi ();
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n");
- vect_transform_stmt (stmt_info, NULL, NULL, NULL);
+ vect_transform_stmt (loop_vinfo, stmt_info, NULL, NULL, NULL);
}
}
+ for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
+ gsi_next (&si))
+ {
+ gphi *phi = si.phi ();
+ stmt_info = loop_vinfo->lookup_stmt (phi);
+ if (!stmt_info)
+ continue;
+
+ if (!STMT_VINFO_RELEVANT_P (stmt_info)
+ && !STMT_VINFO_LIVE_P (stmt_info))
+ continue;
+
+ if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
+ || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
+ || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def
+ || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
+ || STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def)
+ && ! PURE_SLP_STMT (stmt_info))
+ maybe_set_vectorized_backedge_value (loop_vinfo, stmt_info);
+ }
+
for (gimple_stmt_iterator si = gsi_start_bb (bb);
!gsi_end_p (si);)
{
}
else
{
+ /* Ignore vector stmts created in the outer loop. */
stmt_info = loop_vinfo->lookup_stmt (stmt);
/* vector stmts created in the outer-loop during vectorization of
}
stmt_vec_info pat_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info);
- vect_transform_loop_stmt (loop_vinfo, pat_stmt_info, &si,
- &seen_store);
+ if (vect_transform_loop_stmt (loop_vinfo, pat_stmt_info,
+ &si, &seen_store))
+ maybe_set_vectorized_backedge_value (loop_vinfo,
+ pat_stmt_info);
+ }
+ else
+ {
+ if (vect_transform_loop_stmt (loop_vinfo, stmt_info, &si,
+ &seen_store))
+ maybe_set_vectorized_backedge_value (loop_vinfo,
+ stmt_info);
}
- vect_transform_loop_stmt (loop_vinfo, stmt_info, &si,
- &seen_store);
}
gsi_next (&si);
if (seen_store)
/* Interleaving. If IS_STORE is TRUE, the
vectorization of the interleaving chain was
completed - free all the stores in the chain. */
- vect_remove_stores (DR_GROUP_FIRST_ELEMENT (seen_store));
+ vect_remove_stores (loop_vinfo,
+ DR_GROUP_FIRST_ELEMENT (seen_store));
else
/* Free the attached stmt_vec_info and remove the stmt. */
loop_vinfo->remove_stmt (stmt_info);
/* True if the final iteration might not handle a full vector's
worth of scalar iterations. */
- bool final_iter_may_be_partial = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
+ bool final_iter_may_be_partial
+ = LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo);
/* The minimum number of iterations performed by the epilogue. This
is 1 when peeling for gaps because we always need a final scalar
iteration. */
int bias_for_lowest = 1 - min_epilogue_iters;
int bias_for_assumed = bias_for_lowest;
int alignment_npeels = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
- if (alignment_npeels && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ if (alignment_npeels && LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
{
/* When the amount of peeling is known at compile time, the first
iteration will have exactly alignment_npeels active elements.
won't work. */
slp_instance instance;
FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
- vect_free_slp_instance (instance, true);
+ vect_free_slp_instance (instance);
LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
/* Clear-up safelen field since its value is invalid after vectorization
since vectorized loop can have loop-carried dependencies. */
}
/* Decide whether it is possible to use a zero-based induction variable
- when vectorizing LOOP_VINFO with a fully-masked loop. If it is,
- return the value that the induction variable must be able to hold
- in order to ensure that the loop ends with an all-false mask.
+ when vectorizing LOOP_VINFO with partial vectors. If it is, return
+ the value that the induction variable must be able to hold in order
+ to ensure that the rgroups eventually have no active vector elements.
Return -1 otherwise. */
+
widest_int
-vect_iv_limit_for_full_masking (loop_vec_info loop_vinfo)
+vect_iv_limit_for_partial_vectors (loop_vec_info loop_vinfo)
{
tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
return iv_limit;
}
+/* For the given rgroup_controls RGC, check whether an induction variable
+ would ever hit a value that produces a set of all-false masks or zero
+ lengths before wrapping around. Return true if it's possible to wrap
+ around before hitting the desirable value, otherwise return false. */
+
+bool
+vect_rgroup_iv_might_wrap_p (loop_vec_info loop_vinfo, rgroup_controls *rgc)
+{
+ widest_int iv_limit = vect_iv_limit_for_partial_vectors (loop_vinfo);
+
+ if (iv_limit == -1)
+ return true;
+
+ tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
+ unsigned int compare_precision = TYPE_PRECISION (compare_type);
+ unsigned nitems = rgc->max_nscalars_per_iter * rgc->factor;
+
+ if (wi::min_precision (iv_limit * nitems, UNSIGNED) > compare_precision)
+ return true;
+
+ return false;
+}