unsigned i;
FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)
- if (STMT_VINFO_IN_PATTERN_P (first))
- {
- stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (first);
- while (next)
- {
- if (! STMT_VINFO_IN_PATTERN_P (next)
- || STMT_VINFO_REDUC_IDX (STMT_VINFO_RELATED_STMT (next)) == -1)
- break;
- next = REDUC_GROUP_NEXT_ELEMENT (next);
- }
- /* If not all stmt in the chain are patterns or if we failed
- to update STMT_VINFO_REDUC_IDX try to handle the chain
- without patterns. */
- if (! next
- && STMT_VINFO_REDUC_IDX (STMT_VINFO_RELATED_STMT (first)) != -1)
- {
- vect_fixup_reduc_chain (first);
- LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
- = STMT_VINFO_RELATED_STMT (first);
- }
- }
+ {
+ stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (first);
+ while (next)
+ {
+ if ((STMT_VINFO_IN_PATTERN_P (next)
+ != STMT_VINFO_IN_PATTERN_P (first))
+ || STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (next)) == -1)
+ break;
+ next = REDUC_GROUP_NEXT_ELEMENT (next);
+ }
+ /* If all reduction chain members are well-formed patterns adjust
+ the group to group the pattern stmts instead. */
+ if (! next
+ && STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (first)) != -1)
+ {
+ if (STMT_VINFO_IN_PATTERN_P (first))
+ {
+ vect_fixup_reduc_chain (first);
+ LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
+ = STMT_VINFO_RELATED_STMT (first);
+ }
+ }
+ /* If not all stmt in the chain are patterns or if we failed
+ to update STMT_VINFO_REDUC_IDX dissolve the chain and handle
+ it as regular reduction instead. */
+ else
+ {
+ stmt_vec_info vinfo = first;
+ stmt_vec_info last = NULL;
+ while (vinfo)
+ {
+ next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
+ REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
+ REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
+ last = vinfo;
+ vinfo = next;
+ }
+ STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize (first))
+ = vect_internal_def;
+ loop_vinfo->reductions.safe_push (vect_stmt_to_vectorize (last));
+ LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).unordered_remove (i);
+ --i;
+ }
+ }
}
/* Function vect_get_loop_niters.
vec_outside_cost (0),
vec_inside_cost (0),
vectorizable (false),
- can_use_partial_vectors_p (true),
+ can_use_partial_vectors_p (param_vect_partial_vector_usage != 0),
using_partial_vectors_p (false),
epil_using_partial_vectors_p (false),
peeling_for_gaps (false),
}
}
+ /* If using the "very cheap" model. reject cases in which we'd keep
+ a copy of the scalar code (even if we might be able to vectorize it). */
+ if (flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP
+ && (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
+ || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+ || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "some scalar iterations would need to be peeled\n");
+ return 0;
+ }
+
int min_profitable_iters, min_profitable_estimate;
vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
&min_profitable_estimate);
min_profitable_estimate = min_profitable_iters;
}
+ /* If the vector loop needs multiple iterations to be beneficial then
+ things are probably too close to call, and the conservative thing
+ would be to stick with the scalar code. */
+ if (flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP
+ && min_profitable_estimate > (int) vect_vf_for_cost (loop_vinfo))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "one iteration of the vector loop would be"
+ " more expensive than the equivalent number of"
+ " iterations of the scalar loop\n");
+ return 0;
+ }
+
HOST_WIDE_INT estimated_niter;
/* If we are vectorizing an epilogue then we know the maximum number of
}
}
+/* Determine if operating on full vectors for LOOP_VINFO might leave
+ some scalar iterations still to do. If so, decide how we should
+ handle those scalar iterations. The possibilities are:
-/* Decides whether we need to create an epilogue loop to handle
- remaining scalar iterations and sets PEELING_FOR_NITERS accordingly. */
+ (1) Make LOOP_VINFO operate on partial vectors instead of full vectors.
+ In this case:
-void
-determine_peel_for_niter (loop_vec_info loop_vinfo)
+ LOOP_VINFO_USING_PARTIAL_VECTORS_P == true
+ LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == false
+ LOOP_VINFO_PEELING_FOR_NITER == false
+
+ (2) Make LOOP_VINFO operate on full vectors and use an epilogue loop
+ to handle the remaining scalar iterations. In this case:
+
+ LOOP_VINFO_USING_PARTIAL_VECTORS_P == false
+ LOOP_VINFO_PEELING_FOR_NITER == true
+
+ There are two choices:
+
+ (2a) Consider vectorizing the epilogue loop at the same VF as the
+ main loop, but using partial vectors instead of full vectors.
+ In this case:
+
+ LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == true
+
+ (2b) Consider vectorizing the epilogue loop at lower VFs only.
+ In this case:
+
+ LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == false
+
+ When FOR_EPILOGUE_P is true, make this determination based on the
+ assumption that LOOP_VINFO is an epilogue loop, otherwise make it
+ based on the assumption that LOOP_VINFO is the main loop. The caller
+ has made sure that the number of iterations is set appropriately for
+ this value of FOR_EPILOGUE_P. */
+
+opt_result
+vect_determine_partial_vectors_and_peeling (loop_vec_info loop_vinfo,
+ bool for_epilogue_p)
{
- LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
+ /* Determine whether there would be any scalar iterations left over. */
+ bool need_peeling_or_partial_vectors_p
+ = vect_need_peeling_or_partial_vectors_p (loop_vinfo);
- if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
- /* The main loop handles all iterations. */
- LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
- else if (vect_need_peeling_or_partial_vectors_p (loop_vinfo))
- LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
-}
+ /* Decide whether to vectorize the loop with partial vectors. */
+ LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = false;
+ LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo) = false;
+ if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
+ && need_peeling_or_partial_vectors_p)
+ {
+ /* For partial-vector-usage=1, try to push the handling of partial
+ vectors to the epilogue, with the main loop continuing to operate
+ on full vectors.
+
+ ??? We could then end up failing to use partial vectors if we
+ decide to peel iterations into a prologue, and if the main loop
+ then ends up processing fewer than VF iterations. */
+ if (param_vect_partial_vector_usage == 1
+ && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
+ && !vect_known_niters_smaller_than_vf (loop_vinfo))
+ LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo) = true;
+ else
+ LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = true;
+ }
+
+ if (dump_enabled_p ())
+ {
+ if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "operating on partial vectors%s.\n",
+ for_epilogue_p ? " for epilogue loop" : "");
+ else
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "operating only on full vectors%s.\n",
+ for_epilogue_p ? " for epilogue loop" : "");
+ }
+ if (for_epilogue_p)
+ {
+ loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
+ gcc_assert (orig_loop_vinfo);
+ if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
+ gcc_assert (known_lt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
+ LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo)));
+ }
+
+ if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
+ {
+ /* Check that the loop processes at least one full vector. */
+ poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ tree scalar_niters = LOOP_VINFO_NITERS (loop_vinfo);
+ if (known_lt (wi::to_widest (scalar_niters), vf))
+ return opt_result::failure_at (vect_location,
+ "loop does not have enough iterations"
+ " to support vectorization.\n");
+
+ /* If we need to peel an extra epilogue iteration to handle data
+ accesses with gaps, check that there are enough scalar iterations
+ available.
+
+ The check above is redundant with this one when peeling for gaps,
+ but the distinction is useful for diagnostics. */
+ tree scalar_nitersm1 = LOOP_VINFO_NITERSM1 (loop_vinfo);
+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+ && known_lt (wi::to_widest (scalar_nitersm1), vf))
+ return opt_result::failure_at (vect_location,
+ "loop does not have enough iterations"
+ " to support peeling for gaps.\n");
+ }
+
+ LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
+ = (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
+ && need_peeling_or_partial_vectors_p);
+
+ return opt_result::success ();
+}
/* Function vect_analyze_loop_2.
/* Optimize the SLP graph with the vectorization factor fixed. */
vect_optimize_slp (loop_vinfo);
+
+ /* Gather the loads reachable from the SLP graph entries. */
+ vect_gather_slp_loads (loop_vinfo);
}
bool saved_can_use_partial_vectors_p
"unsupported SLP instances\n");
goto again;
}
+
+ /* Check whether any load in ALL SLP instances is possibly permuted. */
+ slp_tree load_node, slp_root;
+ unsigned i, x;
+ slp_instance instance;
+ bool can_use_lanes = true;
+ FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), x, instance)
+ {
+ slp_root = SLP_INSTANCE_TREE (instance);
+ int group_size = SLP_TREE_LANES (slp_root);
+ tree vectype = SLP_TREE_VECTYPE (slp_root);
+ bool loads_permuted = false;
+ FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
+ {
+ if (!SLP_TREE_LOAD_PERMUTATION (load_node).exists ())
+ continue;
+ unsigned j;
+ stmt_vec_info load_info;
+ FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
+ if (SLP_TREE_LOAD_PERMUTATION (load_node)[j] != j)
+ {
+ loads_permuted = true;
+ break;
+ }
+ }
+
+ /* If the loads and stores can be handled with load/store-lane
+ instructions record it and move on to the next instance. */
+ if (loads_permuted
+ && SLP_INSTANCE_KIND (instance) == slp_inst_kind_store
+ && vect_store_lanes_supported (vectype, group_size, false))
+ {
+ FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
+ {
+ stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
+ (SLP_TREE_SCALAR_STMTS (load_node)[0]);
+ /* Use SLP for strided accesses (or if we can't
+ load-lanes). */
+ if (STMT_VINFO_STRIDED_P (stmt_vinfo)
+ || ! vect_load_lanes_supported
+ (STMT_VINFO_VECTYPE (stmt_vinfo),
+ DR_GROUP_SIZE (stmt_vinfo), false))
+ break;
+ }
+
+ can_use_lanes
+ = can_use_lanes && i == SLP_INSTANCE_LOADS (instance).length ();
+
+ if (can_use_lanes && dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "SLP instance %p can use load/store-lanes\n",
+ instance);
+ }
+ else
+ {
+ can_use_lanes = false;
+ break;
+ }
+ }
+
+ /* If all SLP instances can use load/store-lanes abort SLP and try again
+ with SLP disabled. */
+ if (can_use_lanes)
+ {
+ ok = opt_result::failure_at (vect_location,
+ "Built SLP cancelled: can use "
+ "load/store-lanes\n");
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Built SLP cancelled: all SLP instances support "
+ "load/store-lanes\n");
+ goto again;
+ }
}
/* Dissolve SLP-only groups. */
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
- /* Decide whether to vectorize a loop with partial vectors for
- this vectorization factor. */
- if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
- {
- /* Don't use partial vectors if we don't need to peel the loop. */
- if (param_vect_partial_vector_usage == 0
- || !vect_need_peeling_or_partial_vectors_p (loop_vinfo))
- LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = false;
- else if (vect_verify_full_masking (loop_vinfo)
- || vect_verify_loop_lens (loop_vinfo))
- {
- /* The epilogue and other known niters less than VF
- cases can still use vector access with length fully. */
- if (param_vect_partial_vector_usage == 1
- && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
- && !vect_known_niters_smaller_than_vf (loop_vinfo))
- {
- LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = false;
- LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo) = true;
- }
- else
- LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = true;
- }
- else
- LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = false;
- }
- else
- LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = false;
-
- if (dump_enabled_p ())
- {
- if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
- dump_printf_loc (MSG_NOTE, vect_location,
- "operating on partial vectors.\n");
- else
- dump_printf_loc (MSG_NOTE, vect_location,
- "operating only on full vectors.\n");
- }
-
- /* If epilog loop is required because of data accesses with gaps,
- one additional iteration needs to be peeled. Check if there is
- enough iterations for vectorization. */
- if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
- && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
- && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
- {
- poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- tree scalar_niters = LOOP_VINFO_NITERSM1 (loop_vinfo);
-
- if (known_lt (wi::to_widest (scalar_niters), vf))
- return opt_result::failure_at (vect_location,
- "loop has no enough iterations to"
- " support peeling for gaps.\n");
- }
+ /* If we still have the option of using partial vectors,
+ check whether we can generate the necessary loop controls. */
+ if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
+ && !vect_verify_full_masking (loop_vinfo)
+ && !vect_verify_loop_lens (loop_vinfo))
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
/* If we're vectorizing an epilogue loop, the vectorized loop either needs
to be able to handle fewer than VF scalars, or needs to have a lower VF
than the main loop. */
if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)
- && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
+ && !LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
&& maybe_ge (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo)))
return opt_result::failure_at (vect_location,
"Vectorization factor too high for"
" epilogue loop.\n");
+ /* Decide whether this loop_vinfo should use partial vectors or peeling,
+ assuming that the loop will be used as a main loop. We will redo
+ this analysis later if we instead decide to use the loop as an
+ epilogue loop. */
+ ok = vect_determine_partial_vectors_and_peeling (loop_vinfo, false);
+ if (!ok)
+ return ok;
+
/* Check the costings of the loop make vectorizing worthwhile. */
res = vect_analyze_loop_costing (loop_vinfo);
if (res < 0)
return opt_result::failure_at (vect_location,
"Loop costings not worthwhile.\n");
- determine_peel_for_niter (loop_vinfo);
/* If an epilogue loop is required make sure we can create one. */
if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
|| LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
fail = true;
break;
}
- /* Check there's only a single stmt the op is used on inside
- of the loop. */
+ /* Check there's only a single stmt the op is used on. For the
+ not value-changing tail and the last stmt allow out-of-loop uses.
+ ??? We could relax this and handle arbitrary live stmts by
+ forcing a scalar epilogue for example. */
imm_use_iterator imm_iter;
gimple *op_use_stmt;
unsigned cnt = 0;
FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op)
if (!is_gimple_debug (op_use_stmt)
- && flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt)))
+ && (*code != ERROR_MARK
+ || flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt))))
{
/* We want to allow x + x but not x < 1 ? x : 2. */
if (is_gimple_assign (op_use_stmt)
}
-/* Function vect_model_induction_cost.
-
- Models cost for induction operations. */
-
-static void
-vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies,
- stmt_vector_for_cost *cost_vec)
-{
- unsigned inside_cost, prologue_cost;
-
- if (PURE_SLP_STMT (stmt_info))
- return;
-
- /* loop cost for vec_loop. */
- inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt,
- stmt_info, 0, vect_body);
-
- /* prologue cost for vec_init and vec_step. */
- prologue_cost = record_stmt_cost (cost_vec, 2, scalar_to_vec,
- stmt_info, 0, vect_prologue);
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "vect_model_induction_cost: inside_cost = %d, "
- "prologue_cost = %d .\n", inside_cost, prologue_cost);
-}
-
-
/* Function get_initial_def_for_reduction
if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
{
if (is_a <gphi *> (stmt_info->stmt))
- /* Analysis for double-reduction is done on the outer
- loop PHI, nested cycles have no further restrictions. */
- STMT_VINFO_TYPE (stmt_info) = cycle_phi_info_type;
+ {
+ if (slp_node)
+ {
+ /* We eventually need to set a vector type on invariant
+ arguments. */
+ unsigned j;
+ slp_tree child;
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child)
+ if (!vect_maybe_update_slp_op_vectype
+ (child, SLP_TREE_VECTYPE (slp_node)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for "
+ "invariants\n");
+ return false;
+ }
+ }
+ /* Analysis for double-reduction is done on the outer
+ loop PHI, nested cycles have no further restrictions. */
+ STMT_VINFO_TYPE (stmt_info) = cycle_phi_info_type;
+ }
else
STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
return true;
gphi *reduc_def_phi = as_a <gphi *> (phi_info->stmt);
/* Verify following REDUC_IDX from the latch def leads us back to the PHI
- and compute the reduction chain length. */
- tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
- loop_latch_edge (loop));
+ and compute the reduction chain length. Discover the real
+ reduction operation stmt on the way (stmt_info and slp_for_stmt_info). */
+ tree reduc_def
+ = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
+ loop_latch_edge
+ (gimple_bb (reduc_def_phi)->loop_father));
unsigned reduc_chain_length = 0;
bool only_slp_reduc_chain = true;
stmt_info = NULL;
+ slp_tree slp_for_stmt_info = slp_node ? slp_node_instance->root : NULL;
while (reduc_def != PHI_RESULT (reduc_def_phi))
{
stmt_vec_info def = loop_vinfo->lookup_def (reduc_def);
stmt_info = vdef;
reduc_def = gimple_op (vdef->stmt, 1 + STMT_VINFO_REDUC_IDX (vdef));
reduc_chain_length++;
+ if (!stmt_info && slp_node)
+ slp_for_stmt_info = SLP_TREE_CHILDREN (slp_for_stmt_info)[0];
}
/* PHIs should not participate in patterns. */
gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
The last use is the reduction variable. In case of nested cycle this
assumption is not true: we use reduc_index to record the index of the
reduction variable. */
- /* ??? To get at invariant/constant uses on the SLP node we have to
- get to it here, slp_node is still the reduction PHI. */
- slp_tree slp_for_stmt_info = NULL;
- if (slp_node)
- {
- slp_for_stmt_info = slp_node_instance->root;
- /* And then there's reduction chain with a conversion ... */
- if (SLP_TREE_REPRESENTATIVE (slp_for_stmt_info) != stmt_info)
- slp_for_stmt_info = SLP_TREE_CHILDREN (slp_for_stmt_info)[0];
- gcc_assert (SLP_TREE_REPRESENTATIVE (slp_for_stmt_info) == stmt_info);
- }
slp_tree *slp_op = XALLOCAVEC (slp_tree, op_type);
/* We need to skip an extra operand for COND_EXPRs with embedded
comparison. */
if (slp_node)
{
vec_initial_defs.reserve (vec_num);
- gcc_assert (slp_node == slp_node_instance->reduc_phis);
- stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info);
- tree neutral_op
- = neutral_op_for_slp_reduction (slp_node, vectype_out,
- STMT_VINFO_REDUC_CODE (reduc_info),
- first != NULL);
- get_initial_defs_for_reduction (loop_vinfo, slp_node_instance->reduc_phis,
- &vec_initial_defs, vec_num,
- first != NULL, neutral_op);
+ if (nested_cycle)
+ {
+ unsigned phi_idx = loop_preheader_edge (loop)->dest_idx;
+ vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[phi_idx],
+ &vec_initial_defs);
+ }
+ else
+ {
+ gcc_assert (slp_node == slp_node_instance->reduc_phis);
+ stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info);
+ tree neutral_op
+ = neutral_op_for_slp_reduction (slp_node, vectype_out,
+ STMT_VINFO_REDUC_CODE (reduc_info),
+ first != NULL);
+ get_initial_defs_for_reduction (loop_vinfo, slp_node_instance->reduc_phis,
+ &vec_initial_defs, vec_num,
+ first != NULL, neutral_op);
+ }
}
else
{
if (!vec_stmt) /* transformation not required. */
{
+ /* Deal with copies from externs or constants that disguise as
+ loop-closed PHI nodes (PR97886). */
+ if (slp_node
+ && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
+ SLP_TREE_VECTYPE (slp_node)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for invariants\n");
+ return false;
+ }
STMT_VINFO_TYPE (stmt_info) = lc_phi_info_type;
return true;
}
return true;
}
+/* Vectorizes PHIs. */
+
+bool
+vectorizable_phi (vec_info *,
+ stmt_vec_info stmt_info, gimple **vec_stmt,
+ slp_tree slp_node, stmt_vector_for_cost *cost_vec)
+{
+ if (!is_a <gphi *> (stmt_info->stmt) || !slp_node)
+ return false;
+
+ if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
+ return false;
+
+ tree vectype = SLP_TREE_VECTYPE (slp_node);
+
+ if (!vec_stmt) /* transformation not required. */
+ {
+ slp_tree child;
+ unsigned i;
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), i, child)
+ if (!child)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "PHI node with unvectorized backedge def\n");
+ return false;
+ }
+ else if (!vect_maybe_update_slp_op_vectype (child, vectype))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for invariants\n");
+ return false;
+ }
+ record_stmt_cost (cost_vec, SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
+ vector_stmt, stmt_info, vectype, 0, vect_body);
+ STMT_VINFO_TYPE (stmt_info) = phi_info_type;
+ return true;
+ }
+
+ tree scalar_dest = gimple_phi_result (stmt_info->stmt);
+ basic_block bb = gimple_bb (stmt_info->stmt);
+ tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ auto_vec<gphi *> new_phis;
+ for (unsigned i = 0; i < gimple_phi_num_args (stmt_info->stmt); ++i)
+ {
+ slp_tree child = SLP_TREE_CHILDREN (slp_node)[i];
+
+ /* Skip not yet vectorized defs. */
+ if (SLP_TREE_DEF_TYPE (child) == vect_internal_def
+ && SLP_TREE_VEC_STMTS (child).is_empty ())
+ continue;
+
+ auto_vec<tree> vec_oprnds;
+ vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[i], &vec_oprnds);
+ if (!new_phis.exists ())
+ {
+ new_phis.create (vec_oprnds.length ());
+ for (unsigned j = 0; j < vec_oprnds.length (); j++)
+ {
+ /* Create the vectorized LC PHI node. */
+ new_phis.quick_push (create_phi_node (vec_dest, bb));
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phis[j]);
+ }
+ }
+ edge e = gimple_phi_arg_edge (as_a <gphi *> (stmt_info->stmt), i);
+ for (unsigned j = 0; j < vec_oprnds.length (); j++)
+ add_phi_arg (new_phis[j], vec_oprnds[j], e, UNKNOWN_LOCATION);
+ }
+ /* We should have at least one already vectorized child. */
+ gcc_assert (new_phis.exists ());
+
+ return true;
+}
+
/* Function vect_min_worthwhile_factor.
poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
unsigned i;
tree expr;
- gimple_seq stmts;
gimple_stmt_iterator si;
gphi *phi = dyn_cast <gphi *> (stmt_info->stmt);
return false;
}
- /* FORNOW: outer loop induction with SLP not supported. */
- if (STMT_SLP_TYPE (stmt_info))
- return false;
-
exit_phi = NULL;
latch_e = loop_latch_edge (loop->inner);
loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
if (slp_node && !nunits.is_constant ())
{
- /* The current SLP code creates the initial value element-by-element. */
+ /* The current SLP code creates the step value element-by-element. */
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"SLP induction not supported for variable-length"
if (!vec_stmt) /* transformation not required. */
{
+ unsigned inside_cost = 0, prologue_cost = 0;
+ if (slp_node)
+ {
+ /* We eventually need to set a vector type on invariant
+ arguments. */
+ unsigned j;
+ slp_tree child;
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child)
+ if (!vect_maybe_update_slp_op_vectype
+ (child, SLP_TREE_VECTYPE (slp_node)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for "
+ "invariants\n");
+ return false;
+ }
+ /* loop cost for vec_loop. */
+ inside_cost
+ = record_stmt_cost (cost_vec,
+ SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
+ vector_stmt, stmt_info, 0, vect_body);
+ /* prologue cost for vec_init (if not nested) and step. */
+ prologue_cost = record_stmt_cost (cost_vec, 1 + !nested_in_vect_loop,
+ scalar_to_vec,
+ stmt_info, 0, vect_prologue);
+ }
+ else /* if (!slp_node) */
+ {
+ /* loop cost for vec_loop. */
+ inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt,
+ stmt_info, 0, vect_body);
+ /* prologue cost for vec_init and vec_step. */
+ prologue_cost = record_stmt_cost (cost_vec, 2, scalar_to_vec,
+ stmt_info, 0, vect_prologue);
+ }
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "vect_model_induction_cost: inside_cost = %d, "
+ "prologue_cost = %d .\n", inside_cost,
+ prologue_cost);
+
STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_induction");
- vect_model_induction_cost (stmt_info, ncopies, cost_vec);
return true;
}
tree step_vectype = get_same_sized_vectype (TREE_TYPE (step_expr), vectype);
pe = loop_preheader_edge (iv_loop);
- init_expr = PHI_ARG_DEF_FROM_EDGE (phi,
- loop_preheader_edge (iv_loop));
-
- stmts = NULL;
- if (!nested_in_vect_loop)
- {
- /* Convert the initial value to the IV update type. */
- tree new_type = TREE_TYPE (step_expr);
- init_expr = gimple_convert (&stmts, new_type, init_expr);
-
- /* If we are using the loop mask to "peel" for alignment then we need
- to adjust the start value here. */
- tree skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
- if (skip_niters != NULL_TREE)
- {
- if (FLOAT_TYPE_P (vectype))
- skip_niters = gimple_build (&stmts, FLOAT_EXPR, new_type,
- skip_niters);
- else
- skip_niters = gimple_convert (&stmts, new_type, skip_niters);
- tree skip_step = gimple_build (&stmts, MULT_EXPR, new_type,
- skip_niters, step_expr);
- init_expr = gimple_build (&stmts, MINUS_EXPR, new_type,
- init_expr, skip_step);
- }
- }
-
- if (stmts)
- {
- new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
- gcc_assert (!new_bb);
- }
-
/* Find the first insertion point in the BB. */
basic_block bb = gimple_bb (phi);
si = gsi_after_labels (bb);
/* For SLP induction we have to generate several IVs as for example
- with group size 3 we need [i, i, i, i + S] [i + S, i + S, i + 2*S, i + 2*S]
- [i + 2*S, i + 3*S, i + 3*S, i + 3*S]. The step is the same uniform
- [VF*S, VF*S, VF*S, VF*S] for all. */
+ with group size 3 we need
+ [i0, i1, i2, i0 + S0] [i1 + S1, i2 + S2, i0 + 2*S0, i1 + 2*S1]
+ [i2 + 2*S2, i0 + 3*S0, i1 + 3*S1, i2 + 3*S2]. */
if (slp_node)
{
/* Enforced above. */
unsigned int const_nunits = nunits.to_constant ();
- /* Generate [VF*S, VF*S, ... ]. */
- if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
+ /* The initial values are vectorized, but any lanes > group_size
+ need adjustment. */
+ slp_tree init_node
+ = SLP_TREE_CHILDREN (slp_node)[pe->dest_idx];
+
+ /* Gather steps. Since we do not vectorize inductions as
+ cycles we have to reconstruct the step from SCEV data. */
+ unsigned group_size = SLP_TREE_LANES (slp_node);
+ tree *steps = XALLOCAVEC (tree, group_size);
+ tree *inits = XALLOCAVEC (tree, group_size);
+ stmt_vec_info phi_info;
+ FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, phi_info)
{
- expr = build_int_cst (integer_type_node, vf);
- expr = fold_convert (TREE_TYPE (step_expr), expr);
+ steps[i] = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
+ if (!init_node)
+ inits[i] = gimple_phi_arg_def (as_a<gphi *> (phi_info->stmt),
+ pe->dest_idx);
}
- else
- expr = build_int_cst (TREE_TYPE (step_expr), vf);
- new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
- expr, step_expr);
- if (! CONSTANT_CLASS_P (new_name))
- new_name = vect_init_vector (loop_vinfo, stmt_info, new_name,
- TREE_TYPE (step_expr), NULL);
- new_vec = build_vector_from_val (step_vectype, new_name);
- vec_step = vect_init_vector (loop_vinfo, stmt_info,
- new_vec, step_vectype, NULL);
/* Now generate the IVs. */
- unsigned group_size = SLP_TREE_LANES (slp_node);
unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
- unsigned elts = const_nunits * nvects;
- /* Compute the number of distinct IVs we need. First reduce
- group_size if it is a multiple of const_nunits so we get
- one IV for a group_size of 4 but const_nunits 2. */
- unsigned group_sizep = group_size;
- if (group_sizep % const_nunits == 0)
- group_sizep = group_sizep / const_nunits;
- unsigned nivs = least_common_multiple (group_sizep,
- const_nunits) / const_nunits;
- gcc_assert (elts % group_size == 0);
- tree elt = init_expr;
+ gcc_assert ((const_nunits * nvects) % group_size == 0);
+ unsigned nivs;
+ if (nested_in_vect_loop)
+ nivs = nvects;
+ else
+ {
+ /* Compute the number of distinct IVs we need. First reduce
+ group_size if it is a multiple of const_nunits so we get
+ one IV for a group_size of 4 but const_nunits 2. */
+ unsigned group_sizep = group_size;
+ if (group_sizep % const_nunits == 0)
+ group_sizep = group_sizep / const_nunits;
+ nivs = least_common_multiple (group_sizep,
+ const_nunits) / const_nunits;
+ }
+ tree stept = TREE_TYPE (step_vectype);
+ tree lupdate_mul = NULL_TREE;
+ if (!nested_in_vect_loop)
+ {
+ /* The number of iterations covered in one vector iteration. */
+ unsigned lup_mul = (nvects * const_nunits) / group_size;
+ lupdate_mul
+ = build_vector_from_val (step_vectype,
+ SCALAR_FLOAT_TYPE_P (stept)
+ ? build_real_from_wide (stept, lup_mul,
+ UNSIGNED)
+ : build_int_cstu (stept, lup_mul));
+ }
+ tree peel_mul = NULL_TREE;
+ gimple_seq init_stmts = NULL;
+ if (LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo))
+ {
+ if (SCALAR_FLOAT_TYPE_P (stept))
+ peel_mul = gimple_build (&init_stmts, FLOAT_EXPR, stept,
+ LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
+ else
+ peel_mul = gimple_convert (&init_stmts, stept,
+ LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
+ peel_mul = gimple_build_vector_from_val (&init_stmts,
+ step_vectype, peel_mul);
+ }
unsigned ivn;
+ auto_vec<tree> vec_steps;
for (ivn = 0; ivn < nivs; ++ivn)
{
- tree_vector_builder elts (step_vectype, const_nunits, 1);
- stmts = NULL;
+ tree_vector_builder step_elts (step_vectype, const_nunits, 1);
+ tree_vector_builder init_elts (vectype, const_nunits, 1);
+ tree_vector_builder mul_elts (step_vectype, const_nunits, 1);
for (unsigned eltn = 0; eltn < const_nunits; ++eltn)
{
- if (ivn*const_nunits + eltn >= group_size
- && (ivn * const_nunits + eltn) % group_size == 0)
- elt = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (elt),
- elt, step_expr);
- elts.quick_push (elt);
- }
- vec_init = gimple_build_vector (&stmts, &elts);
- vec_init = gimple_convert (&stmts, vectype, vec_init);
- if (stmts)
- {
- new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
- gcc_assert (!new_bb);
+ /* The scalar steps of the IVs. */
+ tree elt = steps[(ivn*const_nunits + eltn) % group_size];
+ elt = gimple_convert (&init_stmts, TREE_TYPE (step_vectype), elt);
+ step_elts.quick_push (elt);
+ if (!init_node)
+ {
+ /* The scalar inits of the IVs if not vectorized. */
+ elt = inits[(ivn*const_nunits + eltn) % group_size];
+ if (!useless_type_conversion_p (TREE_TYPE (vectype),
+ TREE_TYPE (elt)))
+ elt = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
+ TREE_TYPE (vectype), elt);
+ init_elts.quick_push (elt);
+ }
+ /* The number of steps to add to the initial values. */
+ unsigned mul_elt = (ivn*const_nunits + eltn) / group_size;
+ mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept)
+ ? build_real_from_wide (stept,
+ mul_elt, UNSIGNED)
+ : build_int_cstu (stept, mul_elt));
}
+ vec_step = gimple_build_vector (&init_stmts, &step_elts);
+ vec_steps.safe_push (vec_step);
+ tree step_mul = gimple_build_vector (&init_stmts, &mul_elts);
+ if (peel_mul)
+ step_mul = gimple_build (&init_stmts, PLUS_EXPR, step_vectype,
+ step_mul, peel_mul);
+ if (!init_node)
+ vec_init = gimple_build_vector (&init_stmts, &init_elts);
/* Create the induction-phi that defines the induction-operand. */
- vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
+ vec_dest = vect_get_new_vect_var (vectype, vect_simple_var,
+ "vec_iv_");
induction_phi = create_phi_node (vec_dest, iv_loop->header);
induc_def = PHI_RESULT (induction_phi);
/* Create the iv update inside the loop */
+ tree up = vec_step;
+ if (lupdate_mul)
+ up = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
+ vec_step, lupdate_mul);
gimple_seq stmts = NULL;
vec_def = gimple_convert (&stmts, step_vectype, induc_def);
vec_def = gimple_build (&stmts,
- PLUS_EXPR, step_vectype, vec_def, vec_step);
+ PLUS_EXPR, step_vectype, vec_def, up);
vec_def = gimple_convert (&stmts, vectype, vec_def);
gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
+ add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
+ UNKNOWN_LOCATION);
+
+ if (init_node)
+ vec_init = vect_get_slp_vect_def (init_node, ivn);
+ if (!nested_in_vect_loop
+ && !integer_zerop (step_mul))
+ {
+ vec_def = gimple_convert (&init_stmts, step_vectype, vec_init);
+ up = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
+ vec_step, step_mul);
+ vec_def = gimple_build (&init_stmts, PLUS_EXPR, step_vectype,
+ vec_def, up);
+ vec_init = gimple_convert (&init_stmts, vectype, vec_def);
+ }
/* Set the arguments of the phi node: */
add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
- add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
- UNKNOWN_LOCATION);
SLP_TREE_VEC_STMTS (slp_node).quick_push (induction_phi);
}
- /* Fill up to the number of vectors we need for the whole group. */
- nivs = least_common_multiple (group_size,
- const_nunits) / const_nunits;
- for (; ivn < nivs; ++ivn)
- SLP_TREE_VEC_STMTS (slp_node)
- .quick_push (SLP_TREE_VEC_STMTS (slp_node)[0]);
+ if (!nested_in_vect_loop)
+ {
+ /* Fill up to the number of vectors we need for the whole group. */
+ nivs = least_common_multiple (group_size,
+ const_nunits) / const_nunits;
+ for (; ivn < nivs; ++ivn)
+ {
+ SLP_TREE_VEC_STMTS (slp_node)
+ .quick_push (SLP_TREE_VEC_STMTS (slp_node)[0]);
+ vec_steps.safe_push (vec_steps[0]);
+ }
+ }
- /* Re-use IVs when we can. */
+ /* Re-use IVs when we can. We are generating further vector
+ stmts by adding VF' * stride to the IVs generated above. */
if (ivn < nvects)
{
unsigned vfp
= least_common_multiple (group_size, const_nunits) / group_size;
- /* Generate [VF'*S, VF'*S, ... ]. */
- if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
- {
- expr = build_int_cst (integer_type_node, vfp);
- expr = fold_convert (TREE_TYPE (step_expr), expr);
- }
- else
- expr = build_int_cst (TREE_TYPE (step_expr), vfp);
- new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
- expr, step_expr);
- if (! CONSTANT_CLASS_P (new_name))
- new_name = vect_init_vector (loop_vinfo, stmt_info, new_name,
- TREE_TYPE (step_expr), NULL);
- new_vec = build_vector_from_val (step_vectype, new_name);
- vec_step = vect_init_vector (loop_vinfo, stmt_info, new_vec,
- step_vectype, NULL);
+ tree lupdate_mul
+ = build_vector_from_val (step_vectype,
+ SCALAR_FLOAT_TYPE_P (stept)
+ ? build_real_from_wide (stept,
+ vfp, UNSIGNED)
+ : build_int_cstu (stept, vfp));
for (; ivn < nvects; ++ivn)
{
gimple *iv = SLP_TREE_VEC_STMTS (slp_node)[ivn - nivs];
- tree def;
- if (gimple_code (iv) == GIMPLE_PHI)
- def = gimple_phi_result (iv);
- else
- def = gimple_assign_lhs (iv);
+ tree def = gimple_get_lhs (iv);
+ if (ivn < 2*nivs)
+ vec_steps[ivn - nivs]
+ = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
+ vec_steps[ivn - nivs], lupdate_mul);
gimple_seq stmts = NULL;
def = gimple_convert (&stmts, step_vectype, def);
- def = gimple_build (&stmts,
- PLUS_EXPR, step_vectype, def, vec_step);
+ def = gimple_build (&stmts, PLUS_EXPR, step_vectype,
+ def, vec_steps[ivn % nivs]);
def = gimple_convert (&stmts, vectype, def);
if (gimple_code (iv) == GIMPLE_PHI)
gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
}
}
+ new_bb = gsi_insert_seq_on_edge_immediate (pe, init_stmts);
+ gcc_assert (!new_bb);
+
return true;
}
+ init_expr = PHI_ARG_DEF_FROM_EDGE (phi,
+ loop_preheader_edge (iv_loop));
+
+ gimple_seq stmts = NULL;
+ if (!nested_in_vect_loop)
+ {
+ /* Convert the initial value to the IV update type. */
+ tree new_type = TREE_TYPE (step_expr);
+ init_expr = gimple_convert (&stmts, new_type, init_expr);
+
+ /* If we are using the loop mask to "peel" for alignment then we need
+ to adjust the start value here. */
+ tree skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
+ if (skip_niters != NULL_TREE)
+ {
+ if (FLOAT_TYPE_P (vectype))
+ skip_niters = gimple_build (&stmts, FLOAT_EXPR, new_type,
+ skip_niters);
+ else
+ skip_niters = gimple_convert (&stmts, new_type, skip_niters);
+ tree skip_step = gimple_build (&stmts, MULT_EXPR, new_type,
+ skip_niters, step_expr);
+ init_expr = gimple_build (&stmts, MINUS_EXPR, new_type,
+ init_expr, skip_step);
+ }
+ }
+
+ if (stmts)
+ {
+ new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
+ gcc_assert (!new_bb);
+ }
+
/* Create the vector that holds the initial_value of the induction. */
if (nested_in_vect_loop)
{
imm_use_iterator imm_iter;
tree lhs, lhs_type, bitsize, vec_bitsize;
tree vectype = (slp_node
- ? SLP_TREE_VECTYPE (SLP_TREE_REPRESENTATIVE (slp_node))
+ ? SLP_TREE_VECTYPE (slp_node)
: STMT_VINFO_VECTYPE (stmt_info));
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
int ncopies;
gimple_seq stmts = NULL;
new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree),
&stmts, true, NULL_TREE);
-
- gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+ if (TREE_CODE (new_tree) == SSA_NAME
+ && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
+ SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_tree) = 1;
+ if (is_a <gphi *> (vec_stmt))
+ {
+ gimple_stmt_iterator si = gsi_after_labels (gimple_bb (vec_stmt));
+ gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
+ }
+ else
+ {
+ gimple_stmt_iterator si = gsi_for_stmt (vec_stmt);
+ gsi_insert_seq_after (&si, stmts, GSI_SAME_STMT);
+ }
/* Replace use of lhs with newly computed result. If the use stmt is a
single arg PHI, just replace all uses of PHI result. It's necessary
were not code-generated yet so it is not too bad.
??? In fact we'd likely want to avoid this situation
in the first place. */
- if (gimple_code (use_stmt) != GIMPLE_PHI
- && !vect_stmt_dominates_stmt_p (gsi_stmt (*gsi), use_stmt))
+ if (TREE_CODE (new_tree) == SSA_NAME
+ && !SSA_NAME_IS_DEFAULT_DEF (new_tree)
+ && gimple_code (use_stmt) != GIMPLE_PHI
+ && !vect_stmt_dominates_stmt_p (SSA_NAME_DEF_STMT (new_tree),
+ use_stmt))
{
- gcc_assert (is_gimple_assign (use_stmt)
- && gimple_assign_rhs_code (use_stmt) == CONSTRUCTOR);
+ enum tree_code code = gimple_assign_rhs_code (use_stmt);
+ gcc_assert (code == CONSTRUCTOR
+ || code == VIEW_CONVERT_EXPR
+ || CONVERT_EXPR_CODE_P (code));
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"Using original scalar computation for "
"def\n");
continue;
}
+ /* ??? It can also happen that we end up pulling a def into
+ a loop where replacing out-of-loop uses would require
+ a new LC SSA PHI node. Retain the original scalar in
+ those cases as well. PR98064. */
+ if (TREE_CODE (new_tree) == SSA_NAME
+ && !SSA_NAME_IS_DEFAULT_DEF (new_tree)
+ && (gimple_bb (use_stmt)->loop_father
+ != gimple_bb (vec_stmt)->loop_father)
+ && !flow_loop_nested_p (gimple_bb (vec_stmt)->loop_father,
+ gimple_bb (use_stmt)->loop_father))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Using original scalar computation for "
+ "live lane because there is an out-of-loop "
+ "definition for it\n");
+ continue;
+ }
FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
SET_USE (use_p, new_tree);
update_stmt (use_stmt);
When vectorizing STMT_INFO as a store, set *SEEN_STORE to its
stmt_vec_info. */
-static void
+static bool
vect_transform_loop_stmt (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
gimple_stmt_iterator *gsi, stmt_vec_info *seen_store)
{
if (!STMT_VINFO_RELEVANT_P (stmt_info)
&& !STMT_VINFO_LIVE_P (stmt_info))
- return;
+ return false;
if (STMT_VINFO_VECTYPE (stmt_info))
{
/* Pure SLP statements have already been vectorized. We still need
to apply loop vectorization to hybrid SLP statements. */
if (PURE_SLP_STMT (stmt_info))
- return;
+ return false;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
if (vect_transform_stmt (loop_vinfo, stmt_info, gsi, NULL, NULL))
*seen_store = stmt_info;
+
+ return true;
}
/* Helper function to pass to simplify_replace_tree to enable replacing tree's
basic_block *epilogue_bbs = get_loop_body (epilogue);
unsigned i;
+ free (LOOP_VINFO_BBS (epilogue_vinfo));
LOOP_VINFO_BBS (epilogue_vinfo) = epilogue_bbs;
/* Advance data_reference's with the number of iterations of the previous
split_edge (loop_preheader_edge (loop));
- if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
- && vect_use_loop_mask_for_alignment_p (loop_vinfo))
+ if (vect_use_loop_mask_for_alignment_p (loop_vinfo))
/* This will deal with any possible peeling. */
vect_prepare_for_masked_peels (loop_vinfo);
}
stmt_vec_info pat_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info);
- vect_transform_loop_stmt (loop_vinfo, pat_stmt_info, &si,
- &seen_store);
- maybe_set_vectorized_backedge_value (loop_vinfo,
- pat_stmt_info);
+ if (vect_transform_loop_stmt (loop_vinfo, pat_stmt_info,
+ &si, &seen_store))
+ maybe_set_vectorized_backedge_value (loop_vinfo,
+ pat_stmt_info);
}
else
{
- vect_transform_loop_stmt (loop_vinfo, stmt_info, &si,
- &seen_store);
- maybe_set_vectorized_backedge_value (loop_vinfo,
- stmt_info);
+ if (vect_transform_loop_stmt (loop_vinfo, stmt_info, &si,
+ &seen_store))
+ maybe_set_vectorized_backedge_value (loop_vinfo,
+ stmt_info);
}
}
gsi_next (&si);