/* Statement Analysis and Transformation for Vectorization
- Copyright (C) 2003-2019 Free Software Foundation, Inc.
+ Copyright (C) 2003-2020 Free Software Foundation, Inc.
Contributed by Dorit Naishlos <dorit@il.ibm.com>
and Ira Rosen <irar@il.ibm.com>
/* Return TRUE iff the given statement is in an inner loop relative to
the loop being vectorized. */
bool
-stmt_in_inner_loop_p (class _stmt_vec_info *stmt_info)
+stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
{
gimple *stmt = STMT_VINFO_STMT (stmt_info);
basic_block bb = gimple_bb (stmt);
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
class loop* loop;
if (!loop_vinfo)
unsigned
record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
- int misalign, enum vect_cost_model_location where)
+ tree vectype, int misalign,
+ enum vect_cost_model_location where)
{
if ((kind == vector_load || kind == unaligned_load)
- && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
kind = vector_gather_load;
if ((kind == vector_store || kind == unaligned_store)
- && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
kind = vector_scatter_store;
- stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
+ stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
body_cost_vec->safe_push (si);
- tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
return (unsigned)
(builtin_vectorization_cost (kind, vectype, misalign) * count);
}
with scalar destination SCALAR_DEST. */
static tree
-read_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+read_vector_array (vec_info *vinfo,
+ stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
{
tree vect_type, vect, vect_name, array_ref;
new_stmt = gimple_build_assign (vect, array_ref);
vect_name = make_ssa_name (vect, new_stmt);
gimple_assign_set_lhs (new_stmt, vect_name);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
return vect_name;
}
The store is part of the vectorization of STMT_INFO. */
static void
-write_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+write_vector_array (vec_info *vinfo,
+ stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
tree vect, tree array, unsigned HOST_WIDE_INT n)
{
tree array_ref;
NULL_TREE, NULL_TREE);
new_stmt = gimple_build_assign (array_ref, vect);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
}
/* PTR is a pointer to an array of type TYPE. Return a representation
Emit the clobber before *GSI. */
static void
-vect_clobber_variable (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- tree var)
+vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
+ gimple_stmt_iterator *gsi, tree var)
{
tree clobber = build_clobber (TREE_TYPE (var));
gimple *new_stmt = gimple_build_assign (var, clobber);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
}
/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
}
for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
{
+ if (is_gimple_debug (gsi_stmt (si)))
+ continue;
stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
return opt_result::success ();
}
-/* Compute the prologue cost for invariant or constant operands. */
-
-static unsigned
-vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
- unsigned opno, enum vect_def_type dt,
- stmt_vector_for_cost *cost_vec)
-{
- vec_info *vinfo = stmt_info->vinfo;
- gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
- tree op = gimple_op (stmt, opno);
- unsigned prologue_cost = 0;
-
- /* Without looking at the actual initializer a vector of
- constants can be implemented as load from the constant pool.
- When all elements are the same we can use a splat. */
- tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op));
- unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
- unsigned num_vects_to_check;
- unsigned HOST_WIDE_INT const_nunits;
- unsigned nelt_limit;
- if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
- && ! multiple_p (const_nunits, group_size))
- {
- num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
- nelt_limit = const_nunits;
- }
- else
- {
- /* If either the vector has variable length or the vectors
- are composed of repeated whole groups we only need to
- cost construction once. All vectors will be the same. */
- num_vects_to_check = 1;
- nelt_limit = group_size;
- }
- tree elt = NULL_TREE;
- unsigned nelt = 0;
- for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
- {
- unsigned si = j % group_size;
- if (nelt == 0)
- elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
- /* ??? We're just tracking whether all operands of a single
- vector initializer are the same, ideally we'd check if
- we emitted the same one already. */
- else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
- opno))
- elt = NULL_TREE;
- nelt++;
- if (nelt == nelt_limit)
- {
- /* ??? We need to pass down stmt_info for a vector type
- even if it points to the wrong stmt. */
- prologue_cost += record_stmt_cost
- (cost_vec, 1,
- dt == vect_external_def
- ? (elt ? scalar_to_vec : vec_construct)
- : vector_load,
- stmt_info, 0, vect_prologue);
- nelt = 0;
- }
- }
-
- return prologue_cost;
-}
-
/* Function vect_model_simple_cost.
Models cost for simple operations, i.e. those that only emit ncopies of a
be generated for the single vector op. We will handle that shortly. */
static void
-vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
+vect_model_simple_cost (vec_info *,
+ stmt_vec_info stmt_info, int ncopies,
enum vect_def_type *dt,
int ndts,
slp_tree node,
- stmt_vector_for_cost *cost_vec)
+ stmt_vector_for_cost *cost_vec,
+ vect_cost_for_stmt kind = vector_stmt)
{
int inside_cost = 0, prologue_cost = 0;
if (node)
ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
- if (node)
- {
- /* Scan operands and account for prologue cost of constants/externals.
- ??? This over-estimates cost for multiple uses and should be
- re-engineered. */
- gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
- tree lhs = gimple_get_lhs (stmt);
- for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
- {
- tree op = gimple_op (stmt, i);
- enum vect_def_type dt;
- if (!op || op == lhs)
- continue;
- if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
- && (dt == vect_constant_def || dt == vect_external_def))
- prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
- i, dt, cost_vec);
- }
- }
- else
+ if (!node)
/* Cost the "broadcast" of a scalar operand in to a vector operand.
Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
cost model. */
}
/* Pass the inside-of-loop statements to the target-specific cost model. */
- inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
+ inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
stmt_info, 0, vect_body);
if (dump_enabled_p ())
}
-/* Model cost for type demotion and promotion operations. PWR is normally
- zero for single-step promotions and demotions. It will be one if
- two-step promotion/demotion is required, and so on. Each additional
+/* Model cost for type demotion and promotion operations. PWR is
+ normally zero for single-step promotions and demotions. It will be
+ one if two-step promotion/demotion is required, and so on. NCOPIES
+ is the number of vector results (and thus number of instructions)
+ for the narrowest end of the operation chain. Each additional
step doubles the number of instructions required. */
static void
vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
- enum vect_def_type *dt, int pwr,
+ enum vect_def_type *dt,
+ unsigned int ncopies, int pwr,
stmt_vector_for_cost *cost_vec)
{
- int i, tmp;
+ int i;
int inside_cost = 0, prologue_cost = 0;
for (i = 0; i < pwr + 1; i++)
{
- tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
- (i + 1) : i;
- inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
- vec_promote_demote, stmt_info, 0,
- vect_body);
+ inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
+ stmt_info, 0, vect_body);
+ ncopies *= 2;
}
/* FORNOW: Assuming maximum 2 args per stmts. */
has the overhead of the grouped access attributed to it. */
static void
-vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
- enum vect_def_type dt,
+vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
vect_memory_access_type memory_access_type,
vec_load_store_type vls_type, slp_tree slp_node,
stmt_vector_for_cost *cost_vec)
if (vls_type == VLS_STORE_INVARIANT)
{
- if (slp_node)
- prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
- 1, dt, cost_vec);
- else
+ if (!slp_node)
prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
stmt_info, 0, vect_prologue);
}
scalar_store, stmt_info, 0, vect_body);
}
else
- vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
+ vect_get_store_cost (vinfo, stmt_info, ncopies, &inside_cost, cost_vec);
if (memory_access_type == VMAT_ELEMENTWISE
|| memory_access_type == VMAT_STRIDED_SLP)
/* Calculate cost of DR's memory access. */
void
-vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
+vect_get_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
unsigned int *inside_cost,
stmt_vector_for_cost *body_cost_vec)
{
dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
int alignment_support_scheme
- = vect_supportable_dr_alignment (dr_info, false);
+ = vect_supportable_dr_alignment (vinfo, dr_info, false);
switch (alignment_support_scheme)
{
access scheme chosen. */
static void
-vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
+vect_model_load_cost (vec_info *vinfo,
+ stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
vect_memory_access_type memory_access_type,
- slp_instance instance,
slp_tree slp_node,
stmt_vector_for_cost *cost_vec)
{
unsigned n_perms;
unsigned assumed_nunits
= vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
- unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
- vect_transform_slp_perm_load (slp_node, vNULL, NULL,
- slp_vf, instance, true,
- &n_perms);
+ vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
+ vf, true, &n_perms);
inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
first_stmt_info, 0, vect_body);
/* And adjust the number of loads performed. This handles
scalar_load, stmt_info, 0, vect_body);
}
else
- vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
+ vect_get_load_cost (vinfo, stmt_info, ncopies, first_stmt_p,
&inside_cost, &prologue_cost,
cost_vec, cost_vec, true);
if (memory_access_type == VMAT_ELEMENTWISE
/* Calculate cost of DR's memory access. */
void
-vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
+vect_get_load_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
bool add_realign_cost, unsigned int *inside_cost,
unsigned int *prologue_cost,
stmt_vector_for_cost *prologue_cost_vec,
{
dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
int alignment_support_scheme
- = vect_supportable_dr_alignment (dr_info, false);
+ = vect_supportable_dr_alignment (vinfo, dr_info, false);
switch (alignment_support_scheme)
{
the loop preheader for the vectorized stmt STMT_VINFO. */
static void
-vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt,
+vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
gimple_stmt_iterator *gsi)
{
if (gsi)
- vect_finish_stmt_generation (stmt_vinfo, new_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
else
{
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+ loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
if (loop_vinfo)
{
basic_block new_bb;
edge pe;
- if (nested_in_vect_loop_p (loop, stmt_vinfo))
+ if (stmt_vinfo && nested_in_vect_loop_p (loop, stmt_vinfo))
loop = loop->inner;
pe = loop_preheader_edge (loop);
}
else
{
- bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
- basic_block bb;
- gimple_stmt_iterator gsi_bb_start;
-
- gcc_assert (bb_vinfo);
- bb = BB_VINFO_BB (bb_vinfo);
- gsi_bb_start = gsi_after_labels (bb);
- gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
+ bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
+ gimple_stmt_iterator gsi_region_begin = bb_vinfo->region_begin;
+ gsi_insert_before (&gsi_region_begin, new_stmt, GSI_SAME_STMT);
}
}
It will be used in the vectorization of STMT_INFO. */
tree
-vect_init_vector (stmt_vec_info stmt_info, tree val, tree type,
+vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
gimple_stmt_iterator *gsi)
{
gimple *init_stmt;
new_temp = make_ssa_name (TREE_TYPE (type));
init_stmt = gimple_build_assign (new_temp, COND_EXPR,
val, true_val, false_val);
- vect_init_vector_1 (stmt_info, init_stmt, gsi);
+ vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
val = new_temp;
}
}
{
init_stmt = gsi_stmt (gsi2);
gsi_remove (&gsi2, false);
- vect_init_vector_1 (stmt_info, init_stmt, gsi);
+ vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
}
}
}
new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
init_stmt = gimple_build_assign (new_temp, val);
- vect_init_vector_1 (stmt_info, init_stmt, gsi);
+ vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
return new_temp;
}
-/* Function vect_get_vec_def_for_operand_1.
-
- For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
- with type DT that will be used in the vectorized stmt. */
-
-tree
-vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
- enum vect_def_type dt)
-{
- tree vec_oprnd;
- stmt_vec_info vec_stmt_info;
-
- switch (dt)
- {
- /* operand is a constant or a loop invariant. */
- case vect_constant_def:
- case vect_external_def:
- /* Code should use vect_get_vec_def_for_operand. */
- gcc_unreachable ();
-
- /* Operand is defined by a loop header phi. In case of nested
- cycles we also may have uses of the backedge def. */
- case vect_reduction_def:
- case vect_double_reduction_def:
- case vect_nested_cycle:
- case vect_induction_def:
- gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
- || dt == vect_nested_cycle);
- /* Fallthru. */
-
- /* operand is defined inside the loop. */
- case vect_internal_def:
- {
- /* Get the def from the vectorized stmt. */
- vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
- /* Get vectorized pattern statement. */
- if (!vec_stmt_info
- && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
- && !STMT_VINFO_RELEVANT (def_stmt_info))
- vec_stmt_info = (STMT_VINFO_VEC_STMT
- (STMT_VINFO_RELATED_STMT (def_stmt_info)));
- gcc_assert (vec_stmt_info);
- if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
- vec_oprnd = PHI_RESULT (phi);
- else
- vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
- return vec_oprnd;
- }
-
- default:
- gcc_unreachable ();
- }
-}
-
-/* Function vect_get_vec_def_for_operand.
+/* Function vect_get_vec_defs_for_operand.
- OP is an operand in STMT_VINFO. This function returns a (vector) def
- that will be used in the vectorized stmt for STMT_VINFO.
+ OP is an operand in STMT_VINFO. This function returns a vector of
+ NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
In the case that OP is an SSA_NAME which is defined in the loop, then
- STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
+ STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
In case OP is an invariant or constant, a new stmt that creates a vector def
needs to be introduced. VECTYPE may be used to specify a required type for
vector invariant. */
-tree
-vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
+void
+vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
+ unsigned ncopies,
+ tree op, vec<tree> *vec_oprnds, tree vectype)
{
gimple *def_stmt;
enum vect_def_type dt;
bool is_simple_use;
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+ loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "vect_get_vec_def_for_operand: %T\n", op);
+ "vect_get_vec_defs_for_operand: %T\n", op);
stmt_vec_info def_stmt_info;
is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
if (def_stmt && dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
+ vec_oprnds->create (ncopies);
if (dt == vect_constant_def || dt == vect_external_def)
{
tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
vector_type = vectype;
else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
&& VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
- vector_type = build_same_sized_truth_vector_type (stmt_vectype);
+ vector_type = truth_type_for (stmt_vectype);
else
vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
gcc_assert (vector_type);
- return vect_init_vector (stmt_vinfo, op, vector_type, NULL);
+ tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
+ while (ncopies--)
+ vec_oprnds->quick_push (vop);
}
else
- return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
-}
-
-
-/* Function vect_get_vec_def_for_stmt_copy
-
- Return a vector-def for an operand. This function is used when the
- vectorized stmt to be created (by the caller to this function) is a "copy"
- created in case the vectorized result cannot fit in one vector, and several
- copies of the vector-stmt are required. In this case the vector-def is
- retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
- of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
-
- Context:
- In case the vectorization factor (VF) is bigger than the number
- of elements that can fit in a vectype (nunits), we have to generate
- more than one vector stmt to vectorize the scalar stmt. This situation
- arises when there are multiple data-types operated upon in the loop; the
- smallest data-type determines the VF, and as a result, when vectorizing
- stmts operating on wider types we need to create 'VF/nunits' "copies" of the
- vector stmt (each computing a vector of 'nunits' results, and together
- computing 'VF' results in each iteration). This function is called when
- vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
- which VF=16 and nunits=4, so the number of copies required is 4):
-
- scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
-
- S1: x = load VS1.0: vx.0 = memref0 VS1.1
- VS1.1: vx.1 = memref1 VS1.2
- VS1.2: vx.2 = memref2 VS1.3
- VS1.3: vx.3 = memref3
-
- S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
- VSnew.1: vz1 = vx.1 + ... VSnew.2
- VSnew.2: vz2 = vx.2 + ... VSnew.3
- VSnew.3: vz3 = vx.3 + ...
-
- The vectorization of S1 is explained in vectorizable_load.
- The vectorization of S2:
- To create the first vector-stmt out of the 4 copies - VSnew.0 -
- the function 'vect_get_vec_def_for_operand' is called to
- get the relevant vector-def for each operand of S2. For operand x it
- returns the vector-def 'vx.0'.
-
- To create the remaining copies of the vector-stmt (VSnew.j), this
- function is called to get the relevant vector-def for each operand. It is
- obtained from the respective VS1.j stmt, which is recorded in the
- STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
-
- For example, to obtain the vector-def 'vx.1' in order to create the
- vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
- Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
- STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
- and return its def ('vx.1').
- Overall, to create the above sequence this function will be called 3 times:
- vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
- vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
- vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
-
-tree
-vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
-{
- stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
- if (!def_stmt_info)
- /* Do nothing; can reuse same def. */
- return vec_oprnd;
-
- def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
- gcc_assert (def_stmt_info);
- if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
- vec_oprnd = PHI_RESULT (phi);
- else
- vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
- return vec_oprnd;
-}
-
-
-/* Get vectorized definitions for the operands to create a copy of an original
- stmt. See vect_get_vec_def_for_stmt_copy () for details. */
-
-void
-vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
- vec<tree> *vec_oprnds0,
- vec<tree> *vec_oprnds1)
-{
- tree vec_oprnd = vec_oprnds0->pop ();
-
- vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
- vec_oprnds0->quick_push (vec_oprnd);
-
- if (vec_oprnds1 && vec_oprnds1->length ())
{
- vec_oprnd = vec_oprnds1->pop ();
- vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
- vec_oprnds1->quick_push (vec_oprnd);
+ def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
+ gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
+ for (unsigned i = 0; i < ncopies; ++i)
+ vec_oprnds->quick_push (gimple_get_lhs
+ (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
}
}
/* Get vectorized definitions for OP0 and OP1. */
void
-vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
- vec<tree> *vec_oprnds0,
- vec<tree> *vec_oprnds1,
- slp_tree slp_node)
+vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
+ unsigned ncopies,
+ tree op0, vec<tree> *vec_oprnds0, tree vectype0,
+ tree op1, vec<tree> *vec_oprnds1, tree vectype1,
+ tree op2, vec<tree> *vec_oprnds2, tree vectype2,
+ tree op3, vec<tree> *vec_oprnds3, tree vectype3)
{
if (slp_node)
{
- auto_vec<vec<tree> > vec_defs (SLP_TREE_CHILDREN (slp_node).length ());
- vect_get_slp_defs (slp_node, &vec_defs, op1 ? 2 : 1);
- *vec_oprnds0 = vec_defs[0];
+ if (op0)
+ vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
if (op1)
- *vec_oprnds1 = vec_defs[1];
+ vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
+ if (op2)
+ vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
+ if (op3)
+ vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
}
else
{
- tree vec_oprnd;
-
- vec_oprnds0->create (1);
- vec_oprnd = vect_get_vec_def_for_operand (op0, stmt_info);
- vec_oprnds0->quick_push (vec_oprnd);
-
+ if (op0)
+ vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
+ op0, vec_oprnds0, vectype0);
if (op1)
- {
- vec_oprnds1->create (1);
- vec_oprnd = vect_get_vec_def_for_operand (op1, stmt_info);
- vec_oprnds1->quick_push (vec_oprnd);
- }
+ vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
+ op1, vec_oprnds1, vectype1);
+ if (op2)
+ vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
+ op2, vec_oprnds2, vectype2);
+ if (op3)
+ vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
+ op3, vec_oprnds3, vectype3);
}
}
+void
+vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
+ unsigned ncopies,
+ tree op0, vec<tree> *vec_oprnds0,
+ tree op1, vec<tree> *vec_oprnds1,
+ tree op2, vec<tree> *vec_oprnds2,
+ tree op3, vec<tree> *vec_oprnds3)
+{
+ vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
+ op0, vec_oprnds0, NULL_TREE,
+ op1, vec_oprnds1, NULL_TREE,
+ op2, vec_oprnds2, NULL_TREE,
+ op3, vec_oprnds3, NULL_TREE);
+}
+
/* Helper function called by vect_finish_replace_stmt and
vect_finish_stmt_generation. Set the location of the new
statement and create and return a stmt_vec_info for it. */
-static stmt_vec_info
-vect_finish_stmt_generation_1 (stmt_vec_info stmt_info, gimple *vec_stmt)
+static void
+vect_finish_stmt_generation_1 (vec_info *,
+ stmt_vec_info stmt_info, gimple *vec_stmt)
{
- vec_info *vinfo = stmt_info->vinfo;
-
- stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
-
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
- gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
-
- /* While EH edges will generally prevent vectorization, stmt might
- e.g. be in a must-not-throw region. Ensure newly created stmts
- that could throw are part of the same region. */
- int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
- if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
- add_stmt_to_eh_lp (vec_stmt, lp_nr);
+ if (stmt_info)
+ {
+ gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
- return vec_stmt_info;
+ /* While EH edges will generally prevent vectorization, stmt might
+ e.g. be in a must-not-throw region. Ensure newly created stmts
+ that could throw are part of the same region. */
+ int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
+ if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
+ add_stmt_to_eh_lp (vec_stmt, lp_nr);
+ }
+ else
+ gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
}
/* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
which sets the same scalar result as STMT_INFO did. Create and return a
stmt_vec_info for VEC_STMT. */
-stmt_vec_info
-vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt)
+void
+vect_finish_replace_stmt (vec_info *vinfo,
+ stmt_vec_info stmt_info, gimple *vec_stmt)
{
- gcc_assert (gimple_get_lhs (stmt_info->stmt) == gimple_get_lhs (vec_stmt));
+ gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
+ gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
- gimple_stmt_iterator gsi = gsi_for_stmt (stmt_info->stmt);
+ gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
gsi_replace (&gsi, vec_stmt, true);
- return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
+ vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
}
/* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
-stmt_vec_info
-vect_finish_stmt_generation (stmt_vec_info stmt_info, gimple *vec_stmt,
+void
+vect_finish_stmt_generation (vec_info *vinfo,
+ stmt_vec_info stmt_info, gimple *vec_stmt,
gimple_stmt_iterator *gsi)
{
- gcc_assert (gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
+ gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
if (!gsi_end_p (*gsi)
&& gimple_has_mem_ops (vec_stmt))
}
}
gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
- return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
+ vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
}
/* We want to vectorize a call to combined function CFN with function
}
-static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
+static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
gimple_stmt_iterator *);
/* Check whether a load or store statement in the loop described by
- LOOP_VINFO is possible in a fully-masked loop. This is testing
- whether the vectorizer pass has the appropriate support, as well as
- whether the target does.
+ LOOP_VINFO is possible in a loop using partial vectors. This is
+ testing whether the vectorizer pass has the appropriate support,
+ as well as whether the target does.
VLS_TYPE says whether the statement is a load or store and VECTYPE
is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
its arguments. If the load or store is conditional, SCALAR_MASK is the
condition under which it occurs.
- Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
- supported, otherwise record the required mask types. */
+ Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
+ vectors is not supported, otherwise record the required rgroup control
+ types. */
static void
-check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
- vec_load_store_type vls_type, int group_size,
- vect_memory_access_type memory_access_type,
- gather_scatter_info *gs_info, tree scalar_mask)
+check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
+ vec_load_store_type vls_type,
+ int group_size,
+ vect_memory_access_type
+ memory_access_type,
+ gather_scatter_info *gs_info,
+ tree scalar_mask)
{
/* Invariant loads need no special support. */
if (memory_access_type == VMAT_INVARIANT)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't use a fully-masked loop because the"
- " target doesn't have an appropriate masked"
+ "can't operate on partial vectors because"
+ " the target doesn't have an appropriate"
" load/store-lanes instruction.\n");
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
return;
}
unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't use a fully-masked loop because the"
- " target doesn't have an appropriate masked"
+ "can't operate on partial vectors because"
+ " the target doesn't have an appropriate"
" gather load or scatter store instruction.\n");
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
return;
}
unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
scalar loop. We need more work to support other mappings. */
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't use a fully-masked loop because an access"
- " isn't contiguous.\n");
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ "can't operate on partial vectors because an"
+ " access isn't contiguous.\n");
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
return;
}
machine_mode mask_mode;
- if (!(targetm.vectorize.get_mask_mode
- (GET_MODE_NUNITS (vecmode),
- GET_MODE_SIZE (vecmode)).exists (&mask_mode))
+ if (!VECTOR_MODE_P (vecmode)
+ || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
|| !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
{
if (dump_enabled_p ())
"can't use a fully-masked loop because the target"
" doesn't have the appropriate masked load or"
" store.\n");
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
return;
}
/* We might load more scalars than we need for permuting SLP loads.
is negative, 0 if it is zero, and 1 if it is greater than zero. */
static int
-compare_step_with_zero (stmt_vec_info stmt_info)
+compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
{
dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
- return tree_int_cst_compare (vect_dr_behavior (dr_info)->step,
+ return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
size_zero_node);
}
accesses consecutive elements with a negative step. */
static vect_memory_access_type
-get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
+get_negative_load_store_type (vec_info *vinfo,
+ stmt_vec_info stmt_info, tree vectype,
vec_load_store_type vls_type,
unsigned int ncopies)
{
return VMAT_ELEMENTWISE;
}
- alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
+ alignment_support_scheme = vect_supportable_dr_alignment (vinfo,
+ dr_info, false);
if (alignment_support_scheme != dr_aligned
&& alignment_support_scheme != dr_unaligned_supported)
{
gcc_unreachable ();
}
+/* Function VECTOR_VECTOR_COMPOSITION_TYPE
+
+ This function returns a vector type which can be composed with NETLS pieces,
+ whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
+ same vector size as the return vector. It checks target whether supports
+ pieces-size vector mode for construction firstly, if target fails to, check
+ pieces-size scalar mode for construction further. It returns NULL_TREE if
+ fails to find the available composition.
+
+ For example, for (vtype=V16QI, nelts=4), we can probably get:
+ - V16QI with PTYPE V4QI.
+ - V4SI with PTYPE SI.
+ - NULL_TREE. */
+
+static tree
+vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
+{
+ gcc_assert (VECTOR_TYPE_P (vtype));
+ gcc_assert (known_gt (nelts, 0U));
+
+ machine_mode vmode = TYPE_MODE (vtype);
+ if (!VECTOR_MODE_P (vmode))
+ return NULL_TREE;
+
+ poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
+ unsigned int pbsize;
+ if (constant_multiple_p (vbsize, nelts, &pbsize))
+ {
+ /* First check if vec_init optab supports construction from
+ vector pieces directly. */
+ scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
+ poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
+ machine_mode rmode;
+ if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
+ && (convert_optab_handler (vec_init_optab, vmode, rmode)
+ != CODE_FOR_nothing))
+ {
+ *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
+ return vtype;
+ }
+
+ /* Otherwise check if exists an integer type of the same piece size and
+ if vec_init optab supports construction from it directly. */
+ if (int_mode_for_size (pbsize, 0).exists (&elmode)
+ && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
+ && (convert_optab_handler (vec_init_optab, rmode, elmode)
+ != CODE_FOR_nothing))
+ {
+ *ptype = build_nonstandard_integer_type (pbsize, 1);
+ return build_vector_type (*ptype, nelts);
+ }
+ }
+
+ return NULL_TREE;
+}
+
/* A subroutine of get_load_store_type, with a subset of the same
arguments. Handle the case where STMT_INFO is part of a grouped load
or store.
as well as at the end. */
static bool
-get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
+get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
+ tree vectype, bool slp,
bool masked_p, vec_load_store_type vls_type,
vect_memory_access_type *memory_access_type,
gather_scatter_info *gs_info)
{
- vec_info *vinfo = stmt_info->vinfo;
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
by simply loading half of the vector only. Usually
the construction with an upper zero half will be elided. */
dr_alignment_support alignment_support_scheme;
- scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
- machine_mode vmode;
+ tree half_vtype;
if (overrun_p
&& !masked_p
&& (((alignment_support_scheme
- = vect_supportable_dr_alignment (first_dr_info, false)))
+ = vect_supportable_dr_alignment (vinfo,
+ first_dr_info, false)))
== dr_aligned
|| alignment_support_scheme == dr_unaligned_supported)
&& known_eq (nunits, (group_size - gap) * 2)
&& known_eq (nunits, group_size)
- && mode_for_vector (elmode, (group_size - gap)).exists (&vmode)
- && VECTOR_MODE_P (vmode)
- && targetm.vector_mode_supported_p (vmode)
- && (convert_optab_handler (vec_init_optab,
- TYPE_MODE (vectype), vmode)
- != CODE_FOR_nothing))
+ && (vector_vector_composition_type (vectype, 2, &half_vtype)
+ != NULL_TREE))
overrun_p = false;
if (overrun_p && !can_overrun_p)
"Peeling for outer loop is not supported\n");
return false;
}
- int cmp = compare_step_with_zero (stmt_info);
+ int cmp = compare_step_with_zero (vinfo, stmt_info);
if (cmp < 0)
*memory_access_type = get_negative_load_store_type
- (stmt_info, vectype, vls_type, 1);
+ (vinfo, stmt_info, vectype, vls_type, 1);
else
{
gcc_assert (!loop_vinfo || cmp > 0);
if (!STMT_VINFO_STRIDED_P (first_stmt_info)
&& (can_overrun_p || !would_overrun_p)
- && compare_step_with_zero (stmt_info) > 0)
+ && compare_step_with_zero (vinfo, stmt_info) > 0)
{
/* First cope with the degenerate case of a single-element
vector. */
NCOPIES is the number of vector statements that will be needed. */
static bool
-get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
+get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
+ tree vectype, bool slp,
bool masked_p, vec_load_store_type vls_type,
unsigned int ncopies,
vect_memory_access_type *memory_access_type,
gather_scatter_info *gs_info)
{
- vec_info *vinfo = stmt_info->vinfo;
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
{
}
else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
{
- if (!get_group_load_store_type (stmt_info, vectype, slp, masked_p,
+ if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp, masked_p,
vls_type, memory_access_type, gs_info))
return false;
}
}
else
{
- int cmp = compare_step_with_zero (stmt_info);
+ int cmp = compare_step_with_zero (vinfo, stmt_info);
if (cmp < 0)
*memory_access_type = get_negative_load_store_type
- (stmt_info, vectype, vls_type, ncopies);
+ (vinfo, stmt_info, vectype, vls_type, ncopies);
else if (cmp == 0)
{
gcc_assert (vls_type == VLS_LOAD);
}
/* Return true if boolean argument MASK is suitable for vectorizing
- conditional load or store STMT_INFO. When returning true, store the type
+ conditional operation STMT_INFO. When returning true, store the type
of the definition in *MASK_DT_OUT and the type of the vectorized mask
in *MASK_VECTYPE_OUT. */
static bool
-vect_check_load_store_mask (stmt_vec_info stmt_info, tree mask,
- vect_def_type *mask_dt_out,
- tree *mask_vectype_out)
+vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info, tree mask,
+ vect_def_type *mask_dt_out,
+ tree *mask_vectype_out)
{
- vec_info *vinfo = stmt_info->vinfo;
if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
{
if (dump_enabled_p ())
enum vect_def_type mask_dt;
tree mask_vectype;
- if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
+ if (!vect_is_simple_use (mask, vinfo, &mask_dt, &mask_vectype))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
*RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
static bool
-vect_check_store_rhs (stmt_vec_info stmt_info, tree rhs,
+vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
+ slp_tree slp_node, tree rhs,
vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
vec_load_store_type *vls_type_out)
{
enum vect_def_type rhs_dt;
tree rhs_vectype;
- if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
+ slp_tree slp_op;
+ if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
+ &rhs, &slp_op, &rhs_dt, &rhs_vectype))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
floats are interpreted as a bitmask. */
static tree
-vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype)
+vect_build_all_ones_mask (vec_info *vinfo,
+ stmt_vec_info stmt_info, tree masktype)
{
if (TREE_CODE (masktype) == INTEGER_TYPE)
return build_int_cst (masktype, -1);
{
tree mask = build_int_cst (TREE_TYPE (masktype), -1);
mask = build_vector_from_val (masktype, mask);
- return vect_init_vector (stmt_info, mask, masktype, NULL);
+ return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
}
else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
{
real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
tree mask = build_real (TREE_TYPE (masktype), r);
mask = build_vector_from_val (masktype, mask);
- return vect_init_vector (stmt_info, mask, masktype, NULL);
+ return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
}
gcc_unreachable ();
}
STMT_INFO as a gather load. */
static tree
-vect_build_zero_merge_argument (stmt_vec_info stmt_info, tree vectype)
+vect_build_zero_merge_argument (vec_info *vinfo,
+ stmt_vec_info stmt_info, tree vectype)
{
tree merge;
if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
else
gcc_unreachable ();
merge = build_vector_from_val (vectype, merge);
- return vect_init_vector (stmt_info, merge, vectype, NULL);
+ return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
}
/* Build a gather load call while vectorizing STMT_INFO. Insert new
MASK is null. */
static void
-vect_build_gather_load_calls (stmt_vec_info stmt_info,
+vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
gimple_stmt_iterator *gsi,
- stmt_vec_info *vec_stmt,
+ gimple **vec_stmt,
gather_scatter_info *gs_info,
tree mask)
{
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
|| TREE_CODE (masktype) == INTEGER_TYPE
|| types_compatible_p (srctype, masktype)));
if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
- masktype = build_same_sized_truth_vector_type (srctype);
+ masktype = truth_type_for (srctype);
tree mask_halftype = masktype;
tree perm_mask = NULL_TREE;
mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
}
else if (mask)
- mask_halftype
- = build_same_sized_truth_vector_type (gs_info->offset_vectype);
+ mask_halftype = truth_type_for (gs_info->offset_vectype);
}
else
gcc_unreachable ();
tree src_op = NULL_TREE;
tree mask_op = NULL_TREE;
tree prev_res = NULL_TREE;
- stmt_vec_info prev_stmt_info = NULL;
if (!mask)
{
- src_op = vect_build_zero_merge_argument (stmt_info, rettype);
- mask_op = vect_build_all_ones_mask (stmt_info, masktype);
+ src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
+ mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
}
+ auto_vec<tree> vec_oprnds0;
+ auto_vec<tree> vec_masks;
+ vect_get_vec_defs_for_operand (vinfo, stmt_info,
+ modifier == WIDEN ? ncopies / 2 : ncopies,
+ gs_info->offset, &vec_oprnds0);
+ if (mask)
+ vect_get_vec_defs_for_operand (vinfo, stmt_info,
+ modifier == NARROW ? ncopies / 2 : ncopies,
+ mask, &vec_masks);
for (int j = 0; j < ncopies; ++j)
{
tree op, var;
if (modifier == WIDEN && (j & 1))
- op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
+ op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
perm_mask, stmt_info, gsi);
- else if (j == 0)
- op = vec_oprnd0
- = vect_get_vec_def_for_operand (gs_info->offset, stmt_info);
else
- op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
- vec_oprnd0);
+ op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
{
var = vect_get_new_ssa_name (idxtype, vect_simple_var);
op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
op = var;
}
if (mask)
{
if (mask_perm_mask && (j & 1))
- mask_op = permute_vec_elements (mask_op, mask_op,
+ mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
mask_perm_mask, stmt_info, gsi);
else
{
- if (j == 0)
- vec_mask = vect_get_vec_def_for_operand (mask, stmt_info);
- else if (modifier != NARROW || (j & 1) == 0)
- vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
- vec_mask);
+ if (modifier == NARROW)
+ {
+ if ((j & 1) == 0)
+ vec_mask = vec_masks[j / 2];
+ }
+ else
+ vec_mask = vec_masks[j];
mask_op = vec_mask;
if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
gassign *new_stmt
= gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
mask_op = var;
}
}
= gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
: VEC_UNPACK_LO_EXPR,
mask_op);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
mask_op = var;
}
src_op = mask_op;
mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
gassign *new_stmt
= gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
mask_arg = var;
if (!useless_type_conversion_p (real_masktype, utype))
{
<= TYPE_PRECISION (real_masktype));
var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
mask_arg = var;
}
src_op = build_zero_cst (srctype);
}
- gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
- mask_arg, scale);
+ gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
+ mask_arg, scale);
- stmt_vec_info new_stmt_info;
if (!useless_type_conversion_p (vectype, rettype))
{
gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
TYPE_VECTOR_SUBPARTS (rettype)));
op = vect_get_new_ssa_name (rettype, vect_simple_var);
- gimple_call_set_lhs (new_call, op);
- vect_finish_stmt_generation (stmt_info, new_call, gsi);
+ gimple_call_set_lhs (new_stmt, op);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
var = make_ssa_name (vec_dest);
op = build1 (VIEW_CONVERT_EXPR, vectype, op);
- gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
}
else
{
- var = make_ssa_name (vec_dest, new_call);
- gimple_call_set_lhs (new_call, var);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_call, gsi);
+ var = make_ssa_name (vec_dest, new_stmt);
+ gimple_call_set_lhs (new_stmt, var);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
}
if (modifier == NARROW)
prev_res = var;
continue;
}
- var = permute_vec_elements (prev_res, var, perm_mask,
+ var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
stmt_info, gsi);
- new_stmt_info = loop_vinfo->lookup_def (var);
+ new_stmt = SSA_NAME_DEF_STMT (var);
}
- if (prev_stmt_info == NULL)
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
- else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
- prev_stmt_info = new_stmt_info;
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
}
+ *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
}
/* Prepare the base and offset in GS_INFO for vectorization.
containing loop. */
static void
-vect_get_gather_scatter_ops (class loop *loop, stmt_vec_info stmt_info,
+vect_get_gather_scatter_ops (vec_info *vinfo,
+ class loop *loop, stmt_vec_info stmt_info,
gather_scatter_info *gs_info,
- tree *dataref_ptr, tree *vec_offset)
+ tree *dataref_ptr, vec<tree> *vec_offset,
+ unsigned ncopies)
{
gimple_seq stmts = NULL;
*dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
gcc_assert (!new_bb);
}
- *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
- gs_info->offset_vectype);
+ vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, gs_info->offset,
+ vec_offset, gs_info->offset_vectype);
}
/* Prepare to implement a grouped or strided load or store using
gimple_seq stmts;
tree bump = size_binop (MULT_EXPR,
- fold_convert (sizetype, DR_STEP (dr)),
+ fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
size_int (TYPE_VECTOR_SUBPARTS (vectype)));
*dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
if (stmts)
offset_type = TREE_TYPE (gs_info->offset_vectype);
/* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
- tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
+ tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
ssize_int (gs_info->scale));
step = fold_convert (offset_type, step);
step = force_gimple_operand (step, &stmts, true, NULL_TREE);
vectorization. */
static tree
-vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type,
+vect_get_data_ptr_increment (vec_info *vinfo,
+ dr_vec_info *dr_info, tree aggr_type,
vect_memory_access_type memory_access_type)
{
if (memory_access_type == VMAT_INVARIANT)
return size_zero_node;
tree iv_step = TYPE_SIZE_UNIT (aggr_type);
- tree step = vect_dr_behavior (dr_info)->step;
+ tree step = vect_dr_behavior (vinfo, dr_info)->step;
if (tree_int_cst_sgn (step) == -1)
iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
return iv_step;
}
-/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
+/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
static bool
-vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- stmt_vec_info *vec_stmt, slp_tree slp_node,
+vectorizable_bswap (vec_info *vinfo,
+ stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ gimple **vec_stmt, slp_tree slp_node,
+ slp_tree *slp_op,
tree vectype_in, stmt_vector_for_cost *cost_vec)
{
tree op, vectype;
gcall *stmt = as_a <gcall *> (stmt_info->stmt);
- vec_info *vinfo = stmt_info->vinfo;
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
unsigned ncopies;
op = gimple_call_arg (stmt, 0);
if (! vec_stmt)
{
+ if (slp_node
+ && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for invariants\n");
+ return false;
+ }
+
STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_bswap");
if (! slp_node)
/* Transform. */
vec<tree> vec_oprnds = vNULL;
- stmt_vec_info new_stmt_info = NULL;
- stmt_vec_info prev_stmt_info = NULL;
- for (unsigned j = 0; j < ncopies; j++)
- {
- /* Handle uses. */
- if (j == 0)
- vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
- else
- vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
-
- /* Arguments are ready. create the new vector stmt. */
- unsigned i;
- tree vop;
- FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
- {
- gimple *new_stmt;
- tree tem = make_ssa_name (char_vectype);
- new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
- char_vectype, vop));
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
- tree tem2 = make_ssa_name (char_vectype);
- new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
- tem, tem, bswap_vconst);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
- tem = make_ssa_name (vectype);
- new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
- vectype, tem2));
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
- if (slp_node)
- SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
- }
-
+ vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
+ op, &vec_oprnds);
+ /* Arguments are ready. create the new vector stmt. */
+ unsigned i;
+ tree vop;
+ FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
+ {
+ gimple *new_stmt;
+ tree tem = make_ssa_name (char_vectype);
+ new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
+ char_vectype, vop));
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ tree tem2 = make_ssa_name (char_vectype);
+ new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
+ tem, tem, bswap_vconst);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ tem = make_ssa_name (vectype);
+ new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
+ vectype, tem2));
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
if (slp_node)
- continue;
-
- if (j == 0)
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-
- prev_stmt_info = new_stmt_info;
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
}
+ if (!slp_node)
+ *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
+
vec_oprnds.release ();
return true;
}
*CONVERT_CODE. */
static bool
-simple_integer_narrowing (vec_info *vinfo, tree vectype_out, tree vectype_in,
+simple_integer_narrowing (tree vectype_out, tree vectype_in,
tree_code *convert_code)
{
if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
tree_code code;
int multi_step_cvt = 0;
auto_vec <tree, 8> interm_types;
- if (!supportable_narrowing_operation (vinfo, NOP_EXPR, vectype_out,
- vectype_in, &code, &multi_step_cvt,
- &interm_types)
+ if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
+ &code, &multi_step_cvt, &interm_types)
|| multi_step_cvt)
return false;
Return true if STMT_INFO is vectorizable in this way. */
static bool
-vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- stmt_vec_info *vec_stmt, slp_tree slp_node,
+vectorizable_call (vec_info *vinfo,
+ stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ gimple **vec_stmt, slp_tree slp_node,
stmt_vector_for_cost *cost_vec)
{
gcall *stmt;
tree scalar_dest;
tree op;
tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
- stmt_vec_info prev_stmt_info;
tree vectype_out, vectype_in;
poly_uint64 nunits_in;
poly_uint64 nunits_out;
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
- vec_info *vinfo = stmt_info->vinfo;
+ loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
+ bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
tree fndecl, new_temp, rhs_type;
enum vect_def_type dt[4]
= { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
vect_unknown_def_type };
tree vectypes[ARRAY_SIZE (dt)] = {};
+ slp_tree slp_op[ARRAY_SIZE (dt)] = {};
int ndts = ARRAY_SIZE (dt);
int ncopies, j;
auto_vec<tree, 8> vargs;
vectype_in = NULL_TREE;
nargs = gimple_call_num_args (stmt);
- /* Bail out if the function has more than three arguments, we do not have
+ /* Bail out if the function has more than four arguments, we do not have
interesting builtin functions to vectorize with more than two arguments
except for fma. No arguments is also not good. */
if (nargs == 0 || nargs > 4)
for (i = 0; i < nargs; i++)
{
- op = gimple_call_arg (stmt, i);
- if (!vect_is_simple_use (op, vinfo, &dt[i], &vectypes[i]))
+ if ((int) i == mask_opno)
+ {
+ op = gimple_call_arg (stmt, i);
+ if (!vect_check_scalar_mask (vinfo,
+ stmt_info, op, &dt[i], &vectypes[i]))
+ return false;
+ continue;
+ }
+
+ if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
+ i, &op, &slp_op[i], &dt[i], &vectypes[i]))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
return false;
}
- /* Skip the mask argument to an internal function. This operand
- has been converted via a pattern if necessary. */
- if ((int) i == mask_opno)
- continue;
-
/* We can only handle calls with arguments of the same type. */
if (rhs_type
&& !types_compatible_p (rhs_type, TREE_TYPE (op)))
return false;
}
}
- /* If all arguments are external or constant defs use a vector type with
- the same size as the output vector type. */
+ /* If all arguments are external or constant defs, infer the vector type
+ from the scalar type. */
if (!vectype_in)
- vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
+ vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
if (vec_stmt)
gcc_assert (vectype_in);
if (!vectype_in)
return false;
}
+ /* FORNOW: we don't yet support mixtures of vector sizes for calls,
+ just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
+ are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
+ by a pack of the two vectors into an SI vector. We would need
+ separate code to handle direct VnDI->VnSI IFN_CTZs. */
+ if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "mismatched vector sizes %T and %T\n",
+ vectype_in, vectype_out);
+ return false;
+ }
+
+ if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
+ != VECTOR_BOOLEAN_TYPE_P (vectype_in))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "mixed mask and nonmask vector types\n");
+ return false;
+ }
/* FORNOW */
nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
if (cfn != CFN_LAST
&& (modifier == NONE
|| (modifier == NARROW
- && simple_integer_narrowing (vinfo, vectype_out, vectype_in,
+ && simple_integer_narrowing (vectype_out, vectype_in,
&convert_code))))
ifn = vectorizable_internal_function (cfn, callee, vectype_out,
vectype_in);
else if (modifier == NONE
&& (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
|| gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
- || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
- return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
- vectype_in, cost_vec);
+ || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
+ || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
+ return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
+ slp_op, vectype_in, cost_vec);
else
{
if (dump_enabled_p ())
vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
if (!vec_stmt) /* transformation not required. */
{
+ if (slp_node)
+ for (i = 0; i < nargs; ++i)
+ if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for invariants\n");
+ return false;
+ }
STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_call");
- vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
+ vect_model_simple_cost (vinfo, stmt_info,
+ ncopies, dt, ndts, slp_node, cost_vec);
if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
record_stmt_cost (cost_vec, ncopies / 2,
vec_promote_demote, stmt_info, 0, vect_body);
bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
- stmt_vec_info new_stmt_info = NULL;
- prev_stmt_info = NULL;
if (modifier == NONE || ifn != IFN_LAST)
{
tree prev_res = NULL_TREE;
vargs.safe_grow (nargs);
orig_vargs.safe_grow (nargs);
+ auto_vec<vec<tree> > vec_defs (nargs);
for (j = 0; j < ncopies; ++j)
{
/* Build argument list for the vectorized call. */
if (slp_node)
{
- auto_vec<vec<tree> > vec_defs (nargs);
vec<tree> vec_oprnds0;
- vect_get_slp_defs (slp_node, &vec_defs);
+ vect_get_slp_defs (vinfo, slp_node, &vec_defs);
vec_oprnds0 = vec_defs[0];
/* Arguments are ready. Create the new vector stmt. */
vec<tree> vec_oprndsk = vec_defs[k];
vargs[k] = vec_oprndsk[i];
}
+ gimple *new_stmt;
if (modifier == NARROW)
{
/* We don't define any narrowing conditional functions
= gimple_build_call_internal_vec (ifn, vargs);
gimple_call_set_lhs (call, half_res);
gimple_call_set_nothrow (call, true);
- vect_finish_stmt_generation (stmt_info, call, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
if ((i & 1) == 0)
{
prev_res = half_res;
continue;
}
new_temp = make_ssa_name (vec_dest);
- gimple *new_stmt
- = gimple_build_assign (new_temp, convert_code,
- prev_res, half_res);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt,
- gsi);
+ new_stmt = gimple_build_assign (new_temp, convert_code,
+ prev_res, half_res);
+ vect_finish_stmt_generation (vinfo, stmt_info,
+ new_stmt, gsi);
}
else
{
new_temp = make_ssa_name (vec_dest, call);
gimple_call_set_lhs (call, new_temp);
gimple_call_set_nothrow (call, true);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, call, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
+ new_stmt = call;
}
- SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
- }
-
- for (i = 0; i < nargs; i++)
- {
- vec<tree> vec_oprndsi = vec_defs[i];
- vec_oprndsi.release ();
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
}
continue;
}
- if (mask_opno >= 0 && !vectypes[mask_opno])
- {
- gcc_assert (modifier != WIDEN);
- vectypes[mask_opno]
- = build_same_sized_truth_vector_type (vectype_in);
- }
-
for (i = 0; i < nargs; i++)
{
op = gimple_call_arg (stmt, i);
if (j == 0)
- vec_oprnd0
- = vect_get_vec_def_for_operand (op, stmt_info, vectypes[i]);
- else
- vec_oprnd0
- = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
-
- orig_vargs[i] = vargs[i] = vec_oprnd0;
+ {
+ vec_defs.quick_push (vNULL);
+ vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
+ op, &vec_defs[i]);
+ }
+ orig_vargs[i] = vargs[i] = vec_defs[i][j];
}
if (mask_opno >= 0 && masked_loop_p)
vargs[mask_opno], gsi);
}
+ gimple *new_stmt;
if (cfn == CFN_GOMP_SIMD_LANE)
{
tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
tree new_var
= vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
gimple *init_stmt = gimple_build_assign (new_var, cst);
- vect_init_vector_1 (stmt_info, init_stmt, NULL);
+ vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
new_temp = make_ssa_name (vec_dest);
- gimple *new_stmt = gimple_build_assign (new_temp, new_var);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ new_stmt = gimple_build_assign (new_temp, new_var);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
}
else if (modifier == NARROW)
{
gcall *call = gimple_build_call_internal_vec (ifn, vargs);
gimple_call_set_lhs (call, half_res);
gimple_call_set_nothrow (call, true);
- vect_finish_stmt_generation (stmt_info, call, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
if ((j & 1) == 0)
{
prev_res = half_res;
continue;
}
new_temp = make_ssa_name (vec_dest);
- gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
- prev_res, half_res);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ new_stmt = gimple_build_assign (new_temp, convert_code,
+ prev_res, half_res);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
}
else
{
new_temp = make_ssa_name (vec_dest, call);
gimple_call_set_lhs (call, new_temp);
gimple_call_set_nothrow (call, true);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, call, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
+ new_stmt = call;
}
if (j == (modifier == NARROW ? 1 : 0))
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
- else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-
- prev_stmt_info = new_stmt_info;
+ *vec_stmt = new_stmt;
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+ }
+ for (i = 0; i < nargs; i++)
+ {
+ vec<tree> vec_oprndsi = vec_defs[i];
+ vec_oprndsi.release ();
}
}
else if (modifier == NARROW)
{
+ auto_vec<vec<tree> > vec_defs (nargs);
/* We don't define any narrowing conditional functions at present. */
gcc_assert (mask_opno < 0);
for (j = 0; j < ncopies; ++j)
if (slp_node)
{
- auto_vec<vec<tree> > vec_defs (nargs);
vec<tree> vec_oprnds0;
- vect_get_slp_defs (slp_node, &vec_defs);
+ vect_get_slp_defs (vinfo, slp_node, &vec_defs);
vec_oprnds0 = vec_defs[0];
/* Arguments are ready. Create the new vector stmt. */
new_temp = make_ssa_name (vec_dest, call);
gimple_call_set_lhs (call, new_temp);
gimple_call_set_nothrow (call, true);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, call, gsi);
- SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
- }
-
- for (i = 0; i < nargs; i++)
- {
- vec<tree> vec_oprndsi = vec_defs[i];
- vec_oprndsi.release ();
+ vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
}
continue;
}
op = gimple_call_arg (stmt, i);
if (j == 0)
{
- vec_oprnd0
- = vect_get_vec_def_for_operand (op, stmt_info,
- vectypes[i]);
- vec_oprnd1
- = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
- }
- else
- {
- vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
- 2 * i + 1);
- vec_oprnd0
- = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
- vec_oprnd1
- = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
+ vec_defs.quick_push (vNULL);
+ vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
+ op, &vec_defs[i], vectypes[i]);
}
+ vec_oprnd0 = vec_defs[i][2*j];
+ vec_oprnd1 = vec_defs[i][2*j+1];
vargs.quick_push (vec_oprnd0);
vargs.quick_push (vec_oprnd1);
gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_call_set_lhs (new_stmt, new_temp);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-
- if (j == 0)
- STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
- else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
- prev_stmt_info = new_stmt_info;
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
}
- *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
+ if (!slp_node)
+ *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
+
+ for (i = 0; i < nargs; i++)
+ {
+ vec<tree> vec_oprndsi = vec_defs[i];
+ vec_oprndsi.release ();
+ }
}
else
/* No current target implements this case. */
Return true if STMT_INFO is vectorizable in this way. */
static bool
-vectorizable_simd_clone_call (stmt_vec_info stmt_info,
+vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
gimple_stmt_iterator *gsi,
- stmt_vec_info *vec_stmt, slp_tree slp_node,
+ gimple **vec_stmt, slp_tree slp_node,
stmt_vector_for_cost *)
{
tree vec_dest;
tree scalar_dest;
tree op, type;
tree vec_oprnd0 = NULL_TREE;
- stmt_vec_info prev_stmt_info;
tree vectype;
unsigned int nunits;
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
- vec_info *vinfo = stmt_info->vinfo;
+ loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
+ bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
tree fndecl, new_temp;
int ncopies, j;
|| thisarginfo.dt == vect_external_def)
gcc_assert (thisarginfo.vectype == NULL_TREE);
else
- gcc_assert (thisarginfo.vectype != NULL_TREE);
+ {
+ gcc_assert (thisarginfo.vectype != NULL_TREE);
+ if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "vector mask arguments are not supported\n");
+ return false;
+ }
+ }
/* For linear arguments, the analyze phase should have saved
the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
&& bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
{
tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
- arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type);
+ arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
+ slp_node);
if (arginfo[i].vectype == NULL
|| (simd_clone_subparts (arginfo[i].vectype)
> bestn->simdclone->simdlen))
}
STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
-/* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
+/* vect_model_simple_cost (vinfo, stmt_info, ncopies,
+ dt, slp_node, cost_vec); */
return true;
}
}
}
- prev_stmt_info = NULL;
+ auto_vec<vec<tree> > vec_oprnds;
+ auto_vec<unsigned> vec_oprnds_i;
+ vec_oprnds.safe_grow_cleared (nargs);
+ vec_oprnds_i.safe_grow_cleared (nargs);
for (j = 0; j < ncopies; ++j)
{
/* Build argument list for the vectorized call. */
/ simd_clone_subparts (atype));
gcc_assert ((k & (k - 1)) == 0);
if (m == 0)
- vec_oprnd0
- = vect_get_vec_def_for_operand (op, stmt_info);
+ {
+ vect_get_vec_defs_for_operand (vinfo, stmt_info,
+ ncopies * o / k, op,
+ &vec_oprnds[i]);
+ vec_oprnds_i[i] = 0;
+ vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
+ }
else
{
vec_oprnd0 = arginfo[i].op;
if ((m & (k - 1)) == 0)
- vec_oprnd0
- = vect_get_vec_def_for_stmt_copy (vinfo,
- vec_oprnd0);
+ vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
}
arginfo[i].op = vec_oprnd0;
vec_oprnd0
gassign *new_stmt
= gimple_build_assign (make_ssa_name (atype),
vec_oprnd0);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info,
+ new_stmt, gsi);
vargs.safe_push (gimple_assign_lhs (new_stmt));
}
else
for (l = 0; l < k; l++)
{
if (m == 0 && l == 0)
- vec_oprnd0
- = vect_get_vec_def_for_operand (op, stmt_info);
+ {
+ vect_get_vec_defs_for_operand (vinfo, stmt_info,
+ k * o * ncopies,
+ op,
+ &vec_oprnds[i]);
+ vec_oprnds_i[i] = 0;
+ vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
+ }
else
- vec_oprnd0
- = vect_get_vec_def_for_stmt_copy (vinfo,
- arginfo[i].op);
+ vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
arginfo[i].op = vec_oprnd0;
if (k == 1)
break;
gassign *new_stmt
= gimple_build_assign (make_ssa_name (atype),
vec_oprnd0);
- vect_finish_stmt_generation (stmt_info, new_stmt,
- gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info,
+ new_stmt, gsi);
vargs.safe_push (gimple_assign_lhs (new_stmt));
}
}
}
tree phi_res = copy_ssa_name (op);
gphi *new_phi = create_phi_node (phi_res, loop->header);
- loop_vinfo->add_stmt (new_phi);
add_phi_arg (new_phi, arginfo[i].op,
loop_preheader_edge (loop), UNKNOWN_LOCATION);
enum tree_code code
= gimple_build_assign (phi_arg, code, phi_res, tcst);
gimple_stmt_iterator si = gsi_after_labels (loop->header);
gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
- loop_vinfo->add_stmt (new_stmt);
add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
UNKNOWN_LOCATION);
arginfo[i].op = phi_res;
gassign *new_stmt
= gimple_build_assign (new_temp, code,
arginfo[i].op, tcst);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
vargs.safe_push (new_temp);
}
break;
new_temp = make_ssa_name (rtype, new_call);
gimple_call_set_lhs (new_call, new_temp);
}
- stmt_vec_info new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_call, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
+ gimple *new_stmt = new_call;
if (vec_dest)
{
else
t = build3 (BIT_FIELD_REF, vectype, new_temp,
bitsize_int (prec), bitsize_int (l * prec));
- gimple *new_stmt
- = gimple_build_assign (make_ssa_name (vectype), t);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
if (j == 0 && l == 0)
- STMT_VINFO_VEC_STMT (stmt_info)
- = *vec_stmt = new_stmt_info;
- else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-
- prev_stmt_info = new_stmt_info;
+ *vec_stmt = new_stmt;
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
}
if (ratype)
- vect_clobber_variable (stmt_info, gsi, new_temp);
+ vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
continue;
}
else if (simd_clone_subparts (vectype) > nunits)
{
tree tem = build4 (ARRAY_REF, rtype, new_temp,
size_int (m), NULL_TREE, NULL_TREE);
- gimple *new_stmt
- = gimple_build_assign (make_ssa_name (rtype), tem);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt,
- gsi);
+ new_stmt = gimple_build_assign (make_ssa_name (rtype),
+ tem);
+ vect_finish_stmt_generation (vinfo, stmt_info,
+ new_stmt, gsi);
CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
gimple_assign_lhs (new_stmt));
}
- vect_clobber_variable (stmt_info, gsi, new_temp);
+ vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
}
else
CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
if ((j & (k - 1)) != k - 1)
continue;
vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
- gimple *new_stmt
+ new_stmt
= gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
if ((unsigned) j == k - 1)
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
- else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-
- prev_stmt_info = new_stmt_info;
+ *vec_stmt = new_stmt;
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
continue;
}
else if (ratype)
tree t = build_fold_addr_expr (new_temp);
t = build2 (MEM_REF, vectype, t,
build_int_cst (TREE_TYPE (t), 0));
- gimple *new_stmt
- = gimple_build_assign (make_ssa_name (vec_dest), t);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
- vect_clobber_variable (stmt_info, gsi, new_temp);
+ new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
}
}
if (j == 0)
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
- else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-
- prev_stmt_info = new_stmt_info;
+ *vec_stmt = new_stmt;
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
}
+ for (i = 0; i < nargs; ++i)
+ {
+ vec<tree> oprndsi = vec_oprnds[i];
+ oprndsi.release ();
+ }
vargs.release ();
/* The call in STMT might prevent it from being removed in dce.
STMT_INFO is the original scalar stmt that we are vectorizing. */
static gimple *
-vect_gen_widened_results_half (enum tree_code code,
- tree decl,
+vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
tree vec_oprnd0, tree vec_oprnd1, int op_type,
tree vec_dest, gimple_stmt_iterator *gsi,
stmt_vec_info stmt_info)
tree new_temp;
/* Generate half of the widened result: */
- if (code == CALL_EXPR)
- {
- /* Target specific support */
- if (op_type == binary_op)
- new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
- else
- new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
- new_temp = make_ssa_name (vec_dest, new_stmt);
- gimple_call_set_lhs (new_stmt, new_temp);
- }
- else
- {
- /* Generic support */
- gcc_assert (op_type == TREE_CODE_LENGTH (code));
- if (op_type != binary_op)
- vec_oprnd1 = NULL;
- new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
- new_temp = make_ssa_name (vec_dest, new_stmt);
- gimple_assign_set_lhs (new_stmt, new_temp);
- }
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ gcc_assert (op_type == TREE_CODE_LENGTH (code));
+ if (op_type != binary_op)
+ vec_oprnd1 = NULL;
+ new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ gimple_assign_set_lhs (new_stmt, new_temp);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
return new_stmt;
}
-/* Get vectorized definitions for loop-based vectorization of STMT_INFO.
- For the first operand we call vect_get_vec_def_for_operand (with OPRND
- containing scalar operand), and for the rest we get a copy with
- vect_get_vec_def_for_stmt_copy() using the previous vector definition
- (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
- The vectors are collected into VEC_OPRNDS. */
-
-static void
-vect_get_loop_based_defs (tree *oprnd, stmt_vec_info stmt_info,
- vec<tree> *vec_oprnds, int multi_step_cvt)
-{
- vec_info *vinfo = stmt_info->vinfo;
- tree vec_oprnd;
-
- /* Get first vector operand. */
- /* All the vector operands except the very first one (that is scalar oprnd)
- are stmt copies. */
- if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
- vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt_info);
- else
- vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
-
- vec_oprnds->quick_push (vec_oprnd);
-
- /* Get second vector operand. */
- vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
- vec_oprnds->quick_push (vec_oprnd);
-
- *oprnd = vec_oprnd;
-
- /* For conversion in multiple steps, continue to get operands
- recursively. */
- if (multi_step_cvt)
- vect_get_loop_based_defs (oprnd, stmt_info, vec_oprnds,
- multi_step_cvt - 1);
-}
-
-
/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
For multi-step conversions store the resulting vectors and call the function
recursively. */
static void
-vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
+vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
int multi_step_cvt,
stmt_vec_info stmt_info,
vec<tree> vec_dsts,
gimple_stmt_iterator *gsi,
- slp_tree slp_node, enum tree_code code,
- stmt_vec_info *prev_stmt_info)
+ slp_tree slp_node, enum tree_code code)
{
unsigned int i;
tree vop0, vop1, new_tmp, vec_dest;
gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
new_tmp = make_ssa_name (vec_dest, new_stmt);
gimple_assign_set_lhs (new_stmt, new_tmp);
- stmt_vec_info new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
if (multi_step_cvt)
/* Store the resulting vector for next recursive call. */
vectors in SLP_NODE or in vector info of the scalar statement
(or in STMT_VINFO_RELATED_STMT chain). */
if (slp_node)
- SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
else
- {
- if (!*prev_stmt_info)
- STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
- else
- STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
-
- *prev_stmt_info = new_stmt_info;
- }
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
}
}
/* At each level of recursion we have half of the operands we had at the
previous level. */
vec_oprnds->truncate ((i+1)/2);
- vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
+ vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
+ multi_step_cvt - 1,
stmt_info, vec_dsts, gsi,
- slp_node, VEC_PACK_TRUNC_EXPR,
- prev_stmt_info);
+ slp_node, VEC_PACK_TRUNC_EXPR);
}
vec_dsts.quick_push (vec_dest);
call the function recursively. */
static void
-vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
+vect_create_vectorized_promotion_stmts (vec_info *vinfo,
+ vec<tree> *vec_oprnds0,
vec<tree> *vec_oprnds1,
stmt_vec_info stmt_info, tree vec_dest,
gimple_stmt_iterator *gsi,
enum tree_code code1,
- enum tree_code code2, tree decl1,
- tree decl2, int op_type)
+ enum tree_code code2, int op_type)
{
int i;
tree vop0, vop1, new_tmp1, new_tmp2;
vop1 = NULL_TREE;
/* Generate the two halves of promotion operation. */
- new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
+ new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
op_type, vec_dest, gsi,
stmt_info);
- new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
+ new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
op_type, vec_dest, gsi,
stmt_info);
if (is_gimple_call (new_stmt1))
Return true if STMT_INFO is vectorizable in this way. */
static bool
-vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- stmt_vec_info *vec_stmt, slp_tree slp_node,
+vectorizable_conversion (vec_info *vinfo,
+ stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ gimple **vec_stmt, slp_tree slp_node,
stmt_vector_for_cost *cost_vec)
{
tree vec_dest;
tree scalar_dest;
tree op0, op1 = NULL_TREE;
- tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
- tree decl1 = NULL_TREE, decl2 = NULL_TREE;
tree new_temp;
enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
int ndts = 2;
- stmt_vec_info prev_stmt_info;
poly_uint64 nunits_in;
poly_uint64 nunits_out;
tree vectype_out, vectype_in;
- int ncopies, i, j;
+ int ncopies, i;
tree lhs_type, rhs_type;
enum { NARROW, NONE, WIDEN } modifier;
vec<tree> vec_oprnds0 = vNULL;
vec<tree> vec_oprnds1 = vNULL;
tree vop0;
- bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
- vec_info *vinfo = stmt_info->vinfo;
+ bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
int multi_step_cvt = 0;
vec<tree> interm_types = vNULL;
- tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
+ tree intermediate_type, cvt_type = NULL_TREE;
int op_type;
unsigned short fltsz;
lhs_type = TREE_TYPE (scalar_dest);
vectype_out = STMT_VINFO_VECTYPE (stmt_info);
- op0 = gimple_assign_rhs1 (stmt);
- rhs_type = TREE_TYPE (op0);
+ /* Check the operands of the operation. */
+ slp_tree slp_op0, slp_op1 = NULL;
+ if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
+ 0, &op0, &slp_op0, &dt[0], &vectype_in))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "use not simple.\n");
+ return false;
+ }
+ rhs_type = TREE_TYPE (op0);
if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
&& !((INTEGRAL_TYPE_P (lhs_type)
&& INTEGRAL_TYPE_P (rhs_type))
return false;
}
- /* Check the operands of the operation. */
- if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "use not simple.\n");
- return false;
- }
if (op_type == binary_op)
{
- bool ok;
-
- op1 = gimple_assign_rhs2 (stmt);
gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
- /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
- OP1. */
- if (CONSTANT_CLASS_P (op0))
- ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
- else
- ok = vect_is_simple_use (op1, vinfo, &dt[1]);
- if (!ok)
+ op1 = gimple_assign_rhs2 (stmt);
+ tree vectype1_in;
+ if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
+ &op1, &slp_op1, &dt[1], &vectype1_in))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"use not simple.\n");
return false;
}
+ /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
+ OP1. */
+ if (!vectype_in)
+ vectype_in = vectype1_in;
}
- /* If op0 is an external or constant defs use a vector type of
- the same size as the output vector type. */
+ /* If op0 is an external or constant def, infer the vector type
+ from the scalar type. */
if (!vectype_in)
- vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
+ vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
if (vec_stmt)
gcc_assert (vectype_in);
if (!vectype_in)
switch (modifier)
{
case NONE:
- if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
+ if (code != FIX_TRUNC_EXPR
+ && code != FLOAT_EXPR
+ && !CONVERT_EXPR_CODE_P (code))
return false;
- if (supportable_convert_operation (code, vectype_out, vectype_in,
- &decl1, &code1))
+ if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
break;
/* FALLTHRU */
unsupported:
return false;
case WIDEN:
- if (supportable_widening_operation (code, stmt_info, vectype_out,
+ if (supportable_widening_operation (vinfo, code, stmt_info, vectype_out,
vectype_in, &code1, &code2,
&multi_step_cvt, &interm_types))
{
if (GET_MODE_SIZE (rhs_mode) == fltsz)
{
if (!supportable_convert_operation (code, vectype_out,
- cvt_type, &decl1, &codecvt1))
+ cvt_type, &codecvt1))
goto unsupported;
}
- else if (!supportable_widening_operation (code, stmt_info,
+ else if (!supportable_widening_operation (vinfo, code, stmt_info,
vectype_out, cvt_type,
&codecvt1, &codecvt2,
&multi_step_cvt,
else
gcc_assert (multi_step_cvt == 0);
- if (supportable_widening_operation (NOP_EXPR, stmt_info, cvt_type,
+ if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
+ cvt_type,
vectype_in, &code1, &code2,
&multi_step_cvt, &interm_types))
{
case NARROW:
gcc_assert (op_type == unary_op);
- if (supportable_narrowing_operation (vinfo, code, vectype_out,
- vectype_in, &code1, &multi_step_cvt,
+ if (supportable_narrowing_operation (code, vectype_out, vectype_in,
+ &code1, &multi_step_cvt,
&interm_types))
break;
if (cvt_type == NULL_TREE)
goto unsupported;
if (!supportable_convert_operation (code, cvt_type, vectype_in,
- &decl1, &codecvt1))
+ &codecvt1))
goto unsupported;
- if (supportable_narrowing_operation (vinfo, NOP_EXPR, vectype_out,
- cvt_type, &code1, &multi_step_cvt,
+ if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
+ &code1, &multi_step_cvt,
&interm_types))
break;
goto unsupported;
if (!vec_stmt) /* transformation not required. */
{
+ if (slp_node
+ && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
+ || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for invariants\n");
+ return false;
+ }
DUMP_VECT_SCOPE ("vectorizable_conversion");
- if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
+ if (modifier == NONE)
{
STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
- vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
+ vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
cost_vec);
}
else if (modifier == NARROW)
{
STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
- vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
- cost_vec);
+ /* The final packing step produces one vector result per copy. */
+ unsigned int nvectors
+ = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
+ vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
+ multi_step_cvt, cost_vec);
}
else
{
STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
- vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
- cost_vec);
+ /* The initial unpacking step produces two vector results
+ per copy. MULTI_STEP_CVT is 0 for a single conversion,
+ so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
+ unsigned int nvectors
+ = (slp_node
+ ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
+ : ncopies * 2);
+ vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
+ multi_step_cvt, cost_vec);
}
interm_types.release ();
return true;
modifier == WIDEN
? vectype_out : cvt_type);
+ int ninputs = 1;
if (!slp_node)
{
if (modifier == WIDEN)
+ ;
+ else if (modifier == NARROW)
{
- vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
- if (op_type == binary_op)
- vec_oprnds1.create (1);
+ if (multi_step_cvt)
+ ninputs = vect_pow2 (multi_step_cvt);
+ ninputs *= 2;
}
- else if (modifier == NARROW)
- vec_oprnds0.create (
- 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
}
- else if (code == WIDEN_LSHIFT_EXPR)
- vec_oprnds1.create (slp_node->vec_stmts_size);
- last_oprnd = op0;
- prev_stmt_info = NULL;
switch (modifier)
{
case NONE:
- for (j = 0; j < ncopies; j++)
+ vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
+ op0, &vec_oprnds0);
+ FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
{
- if (j == 0)
- vect_get_vec_defs (op0, NULL, stmt_info, &vec_oprnds0,
- NULL, slp_node);
- else
- vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
-
- FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
- {
- stmt_vec_info new_stmt_info;
- /* Arguments are ready, create the new vector stmt. */
- if (code1 == CALL_EXPR)
- {
- gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
- new_temp = make_ssa_name (vec_dest, new_stmt);
- gimple_call_set_lhs (new_stmt, new_temp);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
- }
- else
- {
- gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
- gassign *new_stmt
- = gimple_build_assign (vec_dest, code1, vop0);
- new_temp = make_ssa_name (vec_dest, new_stmt);
- gimple_assign_set_lhs (new_stmt, new_temp);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
- }
+ /* Arguments are ready, create the new vector stmt. */
+ gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
+ gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ gimple_assign_set_lhs (new_stmt, new_temp);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
- if (slp_node)
- SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
- else
- {
- if (!prev_stmt_info)
- STMT_VINFO_VEC_STMT (stmt_info)
- = *vec_stmt = new_stmt_info;
- else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
- prev_stmt_info = new_stmt_info;
- }
- }
+ if (slp_node)
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+ else
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
}
break;
of elements that we can fit in a vectype (nunits), we have to
generate more than one vector stmt - i.e - we need to "unroll"
the vector stmt by a factor VF/nunits. */
- for (j = 0; j < ncopies; j++)
- {
- /* Handle uses. */
- if (j == 0)
- {
- if (slp_node)
- {
- if (code == WIDEN_LSHIFT_EXPR)
- {
- unsigned int k;
-
- vec_oprnd1 = op1;
- /* Store vec_oprnd1 for every vector stmt to be created
- for SLP_NODE. We check during the analysis that all
- the shift arguments are the same. */
- for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
- vec_oprnds1.quick_push (vec_oprnd1);
-
- vect_get_vec_defs (op0, NULL_TREE, stmt_info,
- &vec_oprnds0, NULL, slp_node);
- }
- else
- vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
- &vec_oprnds1, slp_node);
- }
- else
- {
- vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt_info);
- vec_oprnds0.quick_push (vec_oprnd0);
- if (op_type == binary_op)
- {
- if (code == WIDEN_LSHIFT_EXPR)
- vec_oprnd1 = op1;
- else
- vec_oprnd1
- = vect_get_vec_def_for_operand (op1, stmt_info);
- vec_oprnds1.quick_push (vec_oprnd1);
- }
- }
- }
- else
+ vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
+ op0, &vec_oprnds0,
+ code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
+ &vec_oprnds1);
+ if (code == WIDEN_LSHIFT_EXPR)
+ {
+ vec_oprnds1.create (ncopies * ninputs);
+ for (i = 0; i < ncopies * ninputs; ++i)
+ vec_oprnds1.quick_push (op1);
+ }
+ /* Arguments are ready. Create the new vector stmts. */
+ for (i = multi_step_cvt; i >= 0; i--)
+ {
+ tree this_dest = vec_dsts[i];
+ enum tree_code c1 = code1, c2 = code2;
+ if (i == 0 && codecvt2 != ERROR_MARK)
{
- vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
- vec_oprnds0.truncate (0);
- vec_oprnds0.quick_push (vec_oprnd0);
- if (op_type == binary_op)
- {
- if (code == WIDEN_LSHIFT_EXPR)
- vec_oprnd1 = op1;
- else
- vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
- vec_oprnd1);
- vec_oprnds1.truncate (0);
- vec_oprnds1.quick_push (vec_oprnd1);
- }
+ c1 = codecvt1;
+ c2 = codecvt2;
}
+ vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
+ &vec_oprnds1, stmt_info,
+ this_dest, gsi,
+ c1, c2, op_type);
+ }
- /* Arguments are ready. Create the new vector stmts. */
- for (i = multi_step_cvt; i >= 0; i--)
+ FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
+ {
+ gimple *new_stmt;
+ if (cvt_type)
{
- tree this_dest = vec_dsts[i];
- enum tree_code c1 = code1, c2 = code2;
- if (i == 0 && codecvt2 != ERROR_MARK)
- {
- c1 = codecvt1;
- c2 = codecvt2;
- }
- vect_create_vectorized_promotion_stmts (&vec_oprnds0,
- &vec_oprnds1, stmt_info,
- this_dest, gsi,
- c1, c2, decl1, decl2,
- op_type);
+ gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
+ new_temp = make_ssa_name (vec_dest);
+ new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
}
+ else
+ new_stmt = SSA_NAME_DEF_STMT (vop0);
- FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
- {
- stmt_vec_info new_stmt_info;
- if (cvt_type)
- {
- if (codecvt1 == CALL_EXPR)
- {
- gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
- new_temp = make_ssa_name (vec_dest, new_stmt);
- gimple_call_set_lhs (new_stmt, new_temp);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt,
- gsi);
- }
- else
- {
- gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
- new_temp = make_ssa_name (vec_dest);
- gassign *new_stmt
- = gimple_build_assign (new_temp, codecvt1, vop0);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt,
- gsi);
- }
- }
- else
- new_stmt_info = vinfo->lookup_def (vop0);
-
- if (slp_node)
- SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
- else
- {
- if (!prev_stmt_info)
- STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
- else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
- prev_stmt_info = new_stmt_info;
- }
- }
+ if (slp_node)
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+ else
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
}
-
- *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
break;
case NARROW:
of elements that we can fit in a vectype (nunits), we have to
generate more than one vector stmt - i.e - we need to "unroll"
the vector stmt by a factor VF/nunits. */
- for (j = 0; j < ncopies; j++)
- {
- /* Handle uses. */
- if (slp_node)
- vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
- slp_node);
- else
- {
- vec_oprnds0.truncate (0);
- vect_get_loop_based_defs (&last_oprnd, stmt_info, &vec_oprnds0,
- vect_pow2 (multi_step_cvt) - 1);
- }
-
- /* Arguments are ready. Create the new vector stmts. */
- if (cvt_type)
- FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
- {
- if (codecvt1 == CALL_EXPR)
- {
- gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
- new_temp = make_ssa_name (vec_dest, new_stmt);
- gimple_call_set_lhs (new_stmt, new_temp);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
- }
- else
- {
- gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
- new_temp = make_ssa_name (vec_dest);
- gassign *new_stmt
- = gimple_build_assign (new_temp, codecvt1, vop0);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
- }
-
- vec_oprnds0[i] = new_temp;
- }
-
- vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
- stmt_info, vec_dsts, gsi,
- slp_node, code1,
- &prev_stmt_info);
- }
+ vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
+ op0, &vec_oprnds0);
+ /* Arguments are ready. Create the new vector stmts. */
+ if (cvt_type)
+ FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
+ {
+ gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
+ new_temp = make_ssa_name (vec_dest);
+ gassign *new_stmt
+ = gimple_build_assign (new_temp, codecvt1, vop0);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ vec_oprnds0[i] = new_temp;
+ }
- *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
+ vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
+ multi_step_cvt,
+ stmt_info, vec_dsts, gsi,
+ slp_node, code1);
break;
}
+ if (!slp_node)
+ *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
vec_oprnds0.release ();
vec_oprnds1.release ();
return true;
}
+/* Return true if we can assume from the scalar form of STMT_INFO that
+ neither the scalar nor the vector forms will generate code. STMT_INFO
+ is known not to involve a data reference. */
+
+bool
+vect_nop_conversion_p (stmt_vec_info stmt_info)
+{
+ gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
+ if (!stmt)
+ return false;
+
+ tree lhs = gimple_assign_lhs (stmt);
+ tree_code code = gimple_assign_rhs_code (stmt);
+ tree rhs = gimple_assign_rhs1 (stmt);
+
+ if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
+ return true;
+
+ if (CONVERT_EXPR_CODE_P (code))
+ return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
+
+ return false;
+}
/* Function vectorizable_assignment.
Return true if STMT_INFO is vectorizable in this way. */
static bool
-vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- stmt_vec_info *vec_stmt, slp_tree slp_node,
+vectorizable_assignment (vec_info *vinfo,
+ stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ gimple **vec_stmt, slp_tree slp_node,
stmt_vector_for_cost *cost_vec)
{
tree vec_dest;
tree scalar_dest;
tree op;
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
tree new_temp;
enum vect_def_type dt[1] = {vect_unknown_def_type};
int ndts = 1;
int ncopies;
- int i, j;
+ int i;
vec<tree> vec_oprnds = vNULL;
tree vop;
- bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
- vec_info *vinfo = stmt_info->vinfo;
- stmt_vec_info prev_stmt_info = NULL;
+ bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
enum tree_code code;
tree vectype_in;
if (TREE_CODE (scalar_dest) != SSA_NAME)
return false;
- code = gimple_assign_rhs_code (stmt);
- if (gimple_assign_single_p (stmt)
- || code == PAREN_EXPR
- || CONVERT_EXPR_CODE_P (code))
- op = gimple_assign_rhs1 (stmt);
- else
+ if (STMT_VINFO_DATA_REF (stmt_info))
return false;
- if (code == VIEW_CONVERT_EXPR)
- op = TREE_OPERAND (op, 0);
+ code = gimple_assign_rhs_code (stmt);
+ if (!(gimple_assign_single_p (stmt)
+ || code == PAREN_EXPR
+ || CONVERT_EXPR_CODE_P (code)))
+ return false;
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
gcc_assert (ncopies >= 1);
- if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
+ slp_tree slp_op;
+ if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
+ &dt[0], &vectype_in))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"use not simple.\n");
return false;
}
+ if (!vectype_in)
+ vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
/* We can handle NOP_EXPR conversions that do not change the number
of elements or the vector size. */
if (!vec_stmt) /* transformation not required. */
{
+ if (slp_node
+ && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for invariants\n");
+ return false;
+ }
STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_assignment");
- vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
+ if (!vect_nop_conversion_p (stmt_info))
+ vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
+ cost_vec);
return true;
}
vec_dest = vect_create_destination_var (scalar_dest, vectype);
/* Handle use. */
- for (j = 0; j < ncopies; j++)
- {
- /* Handle uses. */
- if (j == 0)
- vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
- else
- vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
-
- /* Arguments are ready. create the new vector stmt. */
- stmt_vec_info new_stmt_info = NULL;
- FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
- {
- if (CONVERT_EXPR_CODE_P (code)
- || code == VIEW_CONVERT_EXPR)
- vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
- gassign *new_stmt = gimple_build_assign (vec_dest, vop);
- new_temp = make_ssa_name (vec_dest, new_stmt);
- gimple_assign_set_lhs (new_stmt, new_temp);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
- if (slp_node)
- SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
- }
+ vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
+ /* Arguments are ready. create the new vector stmt. */
+ FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
+ {
+ if (CONVERT_EXPR_CODE_P (code)
+ || code == VIEW_CONVERT_EXPR)
+ vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
+ gassign *new_stmt = gimple_build_assign (vec_dest, vop);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ gimple_assign_set_lhs (new_stmt, new_temp);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
if (slp_node)
- continue;
-
- if (j == 0)
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-
- prev_stmt_info = new_stmt_info;
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
}
+ if (!slp_node)
+ *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
vec_oprnds.release ();
return true;
Return true if STMT_INFO is vectorizable in this way. */
static bool
-vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- stmt_vec_info *vec_stmt, slp_tree slp_node,
+vectorizable_shift (vec_info *vinfo,
+ stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ gimple **vec_stmt, slp_tree slp_node,
stmt_vector_for_cost *cost_vec)
{
tree vec_dest;
tree op0, op1 = NULL;
tree vec_oprnd1 = NULL_TREE;
tree vectype;
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
enum tree_code code;
machine_mode vec_mode;
tree new_temp;
machine_mode optab_op2_mode;
enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
int ndts = 2;
- stmt_vec_info prev_stmt_info;
poly_uint64 nunits_in;
poly_uint64 nunits_out;
tree vectype_out;
tree op1_vectype;
int ncopies;
- int j, i;
+ int i;
vec<tree> vec_oprnds0 = vNULL;
vec<tree> vec_oprnds1 = vNULL;
tree vop0, vop1;
unsigned int k;
bool scalar_shift_arg = true;
- bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
- vec_info *vinfo = stmt_info->vinfo;
+ bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
+ bool incompatible_op1_vectype_p = false;
if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
return false;
return false;
}
- op0 = gimple_assign_rhs1 (stmt);
- if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
+ slp_tree slp_op0;
+ if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
+ 0, &op0, &slp_op0, &dt[0], &vectype))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"use not simple.\n");
return false;
}
- /* If op0 is an external or constant def use a vector type with
- the same size as the output vector type. */
+ /* If op0 is an external or constant def, infer the vector type
+ from the scalar type. */
if (!vectype)
- vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
+ vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
if (vec_stmt)
gcc_assert (vectype);
if (!vectype)
if (maybe_ne (nunits_out, nunits_in))
return false;
- op1 = gimple_assign_rhs2 (stmt);
stmt_vec_info op1_def_stmt_info;
- if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
- &op1_def_stmt_info))
+ slp_tree slp_op1;
+ if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
+ &dt[1], &op1_vectype, &op1_def_stmt_info))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"vector/vector shift/rotate found.\n");
if (!op1_vectype)
- op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
- if ((op1_vectype == NULL_TREE
- || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
+ op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
+ slp_op1);
+ incompatible_op1_vectype_p
+ = (op1_vectype == NULL_TREE
+ || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
+ TYPE_VECTOR_SUBPARTS (vectype))
+ || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
+ if (incompatible_op1_vectype_p
&& (!slp_node
- || SLP_TREE_DEF_TYPE
- (SLP_TREE_CHILDREN (slp_node)[1]) != vect_constant_def))
+ || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
+ || slp_op1->refcnt != 1))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
dump_printf_loc (MSG_NOTE, vect_location,
"vector/vector shift/rotate found.\n");
+ if (!op1_vectype)
+ op1_vectype = get_vectype_for_scalar_type (vinfo,
+ TREE_TYPE (op1),
+ slp_node);
+
/* Unlike the other binary operators, shifts/rotates have
the rhs being int, instead of the same type as the lhs,
so make sure the scalar is the right type if we are
dealing with vectors of long long/long/short/char. */
- if (dt[1] == vect_constant_def)
- {
- if (!slp_node)
- op1 = fold_convert (TREE_TYPE (vectype), op1);
- }
- else if (!useless_type_conversion_p (TREE_TYPE (vectype),
- TREE_TYPE (op1)))
- {
- if (vec_stmt && !slp_node)
- {
- op1 = fold_convert (TREE_TYPE (vectype), op1);
- op1 = vect_init_vector (stmt_info, op1,
- TREE_TYPE (vectype), NULL);
- }
- }
+ incompatible_op1_vectype_p
+ = (!op1_vectype
+ || !tree_nop_conversion_p (TREE_TYPE (vectype),
+ TREE_TYPE (op1)));
}
}
}
if (!vec_stmt) /* transformation not required. */
{
+ if (slp_node
+ && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
+ || (!scalar_shift_arg
+ && (!incompatible_op1_vectype_p
+ || dt[1] == vect_constant_def)
+ && !vect_maybe_update_slp_op_vectype
+ (slp_op1,
+ incompatible_op1_vectype_p ? vectype : op1_vectype))))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for invariants\n");
+ return false;
+ }
+ /* Now adjust the constant shift amount in place. */
+ if (slp_node
+ && incompatible_op1_vectype_p
+ && dt[1] == vect_constant_def)
+ {
+ for (unsigned i = 0;
+ i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
+ {
+ SLP_TREE_SCALAR_OPS (slp_op1)[i]
+ = fold_convert (TREE_TYPE (vectype),
+ SLP_TREE_SCALAR_OPS (slp_op1)[i]);
+ gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
+ == INTEGER_CST));
+ }
+ }
STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_shift");
- vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
+ vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
+ scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
return true;
}
dump_printf_loc (MSG_NOTE, vect_location,
"transform binary/unary operation.\n");
+ if (incompatible_op1_vectype_p && !slp_node)
+ {
+ op1 = fold_convert (TREE_TYPE (vectype), op1);
+ if (dt[1] != vect_constant_def)
+ op1 = vect_init_vector (vinfo, stmt_info, op1,
+ TREE_TYPE (vectype), NULL);
+ }
+
/* Handle def. */
vec_dest = vect_create_destination_var (scalar_dest, vectype);
- prev_stmt_info = NULL;
- for (j = 0; j < ncopies; j++)
+ if (scalar_shift_arg)
{
- /* Handle uses. */
- if (j == 0)
- {
- if (scalar_shift_arg)
- {
- /* Vector shl and shr insn patterns can be defined with scalar
- operand 2 (shift operand). In this case, use constant or loop
- invariant op1 directly, without extending it to vector mode
- first. */
- optab_op2_mode = insn_data[icode].operand[2].mode;
- if (!VECTOR_MODE_P (optab_op2_mode))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "operand 1 using scalar mode.\n");
- vec_oprnd1 = op1;
- vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
- vec_oprnds1.quick_push (vec_oprnd1);
- if (slp_node)
- {
- /* Store vec_oprnd1 for every vector stmt to be created
- for SLP_NODE. We check during the analysis that all
- the shift arguments are the same.
- TODO: Allow different constants for different vector
- stmts generated for an SLP instance. */
- for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
- vec_oprnds1.quick_push (vec_oprnd1);
- }
- }
- }
- else if (slp_node
- && !useless_type_conversion_p (TREE_TYPE (vectype),
- TREE_TYPE (op1)))
- {
- if (was_scalar_shift_arg)
- {
- /* If the argument was the same in all lanes create
- the correctly typed vector shift amount directly. */
- op1 = fold_convert (TREE_TYPE (vectype), op1);
- op1 = vect_init_vector (stmt_info, op1, TREE_TYPE (vectype),
- !loop_vinfo ? gsi : NULL);
- vec_oprnd1 = vect_init_vector (stmt_info, op1, vectype,
- !loop_vinfo ? gsi : NULL);
- vec_oprnds1.create (slp_node->vec_stmts_size);
- for (k = 0; k < slp_node->vec_stmts_size; k++)
- vec_oprnds1.quick_push (vec_oprnd1);
- }
- else if (dt[1] == vect_constant_def)
- {
- /* Convert the scalar constant shift amounts in-place. */
- slp_tree shift = SLP_TREE_CHILDREN (slp_node)[1];
- gcc_assert (SLP_TREE_DEF_TYPE (shift) == vect_constant_def);
- for (unsigned i = 0;
- i < SLP_TREE_SCALAR_OPS (shift).length (); ++i)
- {
- SLP_TREE_SCALAR_OPS (shift)[i]
- = fold_convert (TREE_TYPE (vectype),
- SLP_TREE_SCALAR_OPS (shift)[i]);
- gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (shift)[i])
- == INTEGER_CST));
- }
- }
- else
- gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
- }
-
- /* vec_oprnd1 is available if operand 1 should be of a scalar-type
- (a special case for certain kind of vector shifts); otherwise,
- operand 1 should be of a vector type (the usual case). */
- if (vec_oprnd1)
- vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
- slp_node);
- else
- vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
- slp_node);
- }
+ /* Vector shl and shr insn patterns can be defined with scalar
+ operand 2 (shift operand). In this case, use constant or loop
+ invariant op1 directly, without extending it to vector mode
+ first. */
+ optab_op2_mode = insn_data[icode].operand[2].mode;
+ if (!VECTOR_MODE_P (optab_op2_mode))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "operand 1 using scalar mode.\n");
+ vec_oprnd1 = op1;
+ vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
+ vec_oprnds1.quick_push (vec_oprnd1);
+ /* Store vec_oprnd1 for every vector stmt to be created.
+ We check during the analysis that all the shift arguments
+ are the same.
+ TODO: Allow different constants for different vector
+ stmts generated for an SLP instance. */
+ for (k = 0;
+ k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
+ vec_oprnds1.quick_push (vec_oprnd1);
+ }
+ }
+ else if (slp_node && incompatible_op1_vectype_p)
+ {
+ if (was_scalar_shift_arg)
+ {
+ /* If the argument was the same in all lanes create
+ the correctly typed vector shift amount directly. */
+ op1 = fold_convert (TREE_TYPE (vectype), op1);
+ op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
+ !loop_vinfo ? gsi : NULL);
+ vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
+ !loop_vinfo ? gsi : NULL);
+ vec_oprnds1.create (slp_node->vec_stmts_size);
+ for (k = 0; k < slp_node->vec_stmts_size; k++)
+ vec_oprnds1.quick_push (vec_oprnd1);
+ }
+ else if (dt[1] == vect_constant_def)
+ /* The constant shift amount has been adjusted in place. */
+ ;
else
- vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
+ gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
+ }
- /* Arguments are ready. Create the new vector stmt. */
- stmt_vec_info new_stmt_info = NULL;
- FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
- {
- vop1 = vec_oprnds1[i];
- gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
- new_temp = make_ssa_name (vec_dest, new_stmt);
- gimple_assign_set_lhs (new_stmt, new_temp);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
- if (slp_node)
- SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
- }
+ /* vec_oprnd1 is available if operand 1 should be of a scalar-type
+ (a special case for certain kind of vector shifts); otherwise,
+ operand 1 should be of a vector type (the usual case). */
+ vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
+ op0, &vec_oprnds0,
+ vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
+ /* Arguments are ready. Create the new vector stmt. */
+ FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
+ {
+ vop1 = vec_oprnds1[i];
+ gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ gimple_assign_set_lhs (new_stmt, new_temp);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
if (slp_node)
- continue;
-
- if (j == 0)
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
- prev_stmt_info = new_stmt_info;
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
}
+ if (!slp_node)
+ *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
+
vec_oprnds0.release ();
vec_oprnds1.release ();
Return true if STMT_INFO is vectorizable in this way. */
static bool
-vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- stmt_vec_info *vec_stmt, slp_tree slp_node,
+vectorizable_operation (vec_info *vinfo,
+ stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ gimple **vec_stmt, slp_tree slp_node,
stmt_vector_for_cost *cost_vec)
{
tree vec_dest;
tree scalar_dest;
tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
tree vectype;
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
enum tree_code code, orig_code;
machine_mode vec_mode;
tree new_temp;
enum vect_def_type dt[3]
= {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
int ndts = 3;
- stmt_vec_info prev_stmt_info;
poly_uint64 nunits_in;
poly_uint64 nunits_out;
tree vectype_out;
int ncopies, vec_num;
- int j, i;
+ int i;
vec<tree> vec_oprnds0 = vNULL;
vec<tree> vec_oprnds1 = vNULL;
vec<tree> vec_oprnds2 = vNULL;
tree vop0, vop1, vop2;
- bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
- vec_info *vinfo = stmt_info->vinfo;
+ bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
return false;
if (!stmt)
return false;
- if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
+ /* Loads and stores are handled in vectorizable_{load,store}. */
+ if (STMT_VINFO_DATA_REF (stmt_info))
return false;
orig_code = code = gimple_assign_rhs_code (stmt);
+ /* Shifts are handled in vectorizable_shift. */
+ if (code == LSHIFT_EXPR
+ || code == RSHIFT_EXPR
+ || code == LROTATE_EXPR
+ || code == RROTATE_EXPR)
+ return false;
+
+ /* Comparisons are handled in vectorizable_comparison. */
+ if (TREE_CODE_CLASS (code) == tcc_comparison)
+ return false;
+
+ /* Conditions are handled in vectorizable_condition. */
+ if (code == COND_EXPR)
+ return false;
+
/* For pointer addition and subtraction, we should use the normal
plus and minus for the vector operation. */
if (code == POINTER_PLUS_EXPR)
/* Most operations cannot handle bit-precision types without extra
truncations. */
- if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
+ bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
+ if (!mask_op_p
&& !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
/* Exception are bitwise binary operations. */
&& code != BIT_IOR_EXPR
return false;
}
- op0 = gimple_assign_rhs1 (stmt);
- if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
+ slp_tree slp_op0;
+ if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
+ 0, &op0, &slp_op0, &dt[0], &vectype))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"use not simple.\n");
return false;
}
- /* If op0 is an external or constant def use a vector type with
- the same size as the output vector type. */
+ /* If op0 is an external or constant def, infer the vector type
+ from the scalar type. */
if (!vectype)
{
/* For boolean type we cannot determine vectype by
vectype = vectype_out;
}
else
- vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
+ vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
+ slp_node);
}
if (vec_stmt)
gcc_assert (vectype);
if (maybe_ne (nunits_out, nunits_in))
return false;
+ tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
+ slp_tree slp_op1 = NULL, slp_op2 = NULL;
if (op_type == binary_op || op_type == ternary_op)
{
- op1 = gimple_assign_rhs2 (stmt);
- if (!vect_is_simple_use (op1, vinfo, &dt[1]))
+ if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
+ 1, &op1, &slp_op1, &dt[1], &vectype2))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
}
if (op_type == ternary_op)
{
- op2 = gimple_assign_rhs3 (stmt);
- if (!vect_is_simple_use (op2, vinfo, &dt[2]))
+ if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
+ 2, &op2, &slp_op2, &dt[2], &vectype3))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
gcc_assert (ncopies >= 1);
- /* Shifts are handled in vectorizable_shift (). */
- if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
- || code == RROTATE_EXPR)
- return false;
+ /* Reject attempts to combine mask types with nonmask types, e.g. if
+ we have an AND between a (nonmask) boolean loaded from memory and
+ a (mask) boolean result of a comparison.
+
+ TODO: We could easily fix these cases up using pattern statements. */
+ if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
+ || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
+ || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "mixed mask and nonmask vector types\n");
+ return false;
+ }
/* Supportable by target? */
should only change the active lanes of the reduction chain,
keeping the inactive lanes as-is. */
if (loop_vinfo
- && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
+ && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
&& reduc_idx >= 0)
{
if (cond_fn == IFN_LAST
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"can't use a fully-masked loop because no"
" conditional operation is available.\n");
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
else
vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
vectype, NULL);
}
+ /* Put types on constant and invariant SLP children. */
+ if (slp_node
+ && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
+ || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
+ || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for invariants\n");
+ return false;
+ }
+
STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_operation");
- vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
+ vect_model_simple_cost (vinfo, stmt_info,
+ ncopies, dt, ndts, slp_node, cost_vec);
return true;
}
VS2_3: vz3 = vx3 + v1 - -
S2: z = x + 1 - VS2_0 */
- prev_stmt_info = NULL;
- for (j = 0; j < ncopies; j++)
- {
- /* Handle uses. */
- if (j == 0)
- {
- if (op_type == binary_op)
- vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
- slp_node);
- else if (op_type == ternary_op)
- {
- if (slp_node)
- {
- auto_vec<vec<tree> > vec_defs(3);
- vect_get_slp_defs (slp_node, &vec_defs);
- vec_oprnds0 = vec_defs[0];
- vec_oprnds1 = vec_defs[1];
- vec_oprnds2 = vec_defs[2];
- }
- else
- {
- vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
- &vec_oprnds1, NULL);
- vect_get_vec_defs (op2, NULL_TREE, stmt_info, &vec_oprnds2,
- NULL, NULL);
- }
- }
- else
- vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
- slp_node);
+ vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
+ op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
+ /* Arguments are ready. Create the new vector stmt. */
+ FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
+ {
+ gimple *new_stmt = NULL;
+ vop1 = ((op_type == binary_op || op_type == ternary_op)
+ ? vec_oprnds1[i] : NULL_TREE);
+ vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
+ if (masked_loop_p && reduc_idx >= 0)
+ {
+ /* Perform the operation on active elements only and take
+ inactive elements from the reduction chain input. */
+ gcc_assert (!vop2);
+ vop2 = reduc_idx == 1 ? vop1 : vop0;
+ tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
+ vectype, i);
+ gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
+ vop0, vop1, vop2);
+ new_temp = make_ssa_name (vec_dest, call);
+ gimple_call_set_lhs (call, new_temp);
+ gimple_call_set_nothrow (call, true);
+ vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
+ new_stmt = call;
}
else
{
- vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
- if (op_type == ternary_op)
- {
- tree vec_oprnd = vec_oprnds2.pop ();
- vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
- vec_oprnd));
- }
- }
-
- /* Arguments are ready. Create the new vector stmt. */
- stmt_vec_info new_stmt_info = NULL;
- FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
- {
- vop1 = ((op_type == binary_op || op_type == ternary_op)
- ? vec_oprnds1[i] : NULL_TREE);
- vop2 = ((op_type == ternary_op)
- ? vec_oprnds2[i] : NULL_TREE);
- if (masked_loop_p && reduc_idx >= 0)
- {
- /* Perform the operation on active elements only and take
- inactive elements from the reduction chain input. */
- gcc_assert (!vop2);
- vop2 = reduc_idx == 1 ? vop1 : vop0;
- tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
- vectype, i * ncopies + j);
- gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
- vop0, vop1, vop2);
- new_temp = make_ssa_name (vec_dest, call);
- gimple_call_set_lhs (call, new_temp);
- gimple_call_set_nothrow (call, true);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, call, gsi);
- }
- else
+ new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ gimple_assign_set_lhs (new_stmt, new_temp);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ if (vec_cvt_dest)
{
- gassign *new_stmt = gimple_build_assign (vec_dest, code,
- vop0, vop1, vop2);
- new_temp = make_ssa_name (vec_dest, new_stmt);
+ new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
+ new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
+ new_temp);
+ new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
- if (vec_cvt_dest)
- {
- new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
- gassign *new_stmt
- = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
- new_temp);
- new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
- gimple_assign_set_lhs (new_stmt, new_temp);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
- }
+ vect_finish_stmt_generation (vinfo, stmt_info,
+ new_stmt, gsi);
}
- if (slp_node)
- SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
- }
-
+ }
if (slp_node)
- continue;
-
- if (j == 0)
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
- prev_stmt_info = new_stmt_info;
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
}
+ if (!slp_node)
+ *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
+
vec_oprnds0.release ();
vec_oprnds1.release ();
vec_oprnds2.release ();
&& TREE_CODE (init) != REAL_CST)
|| !initializer_zerop (init))
{
- tree masktype = build_same_sized_truth_vector_type (vectype);
+ tree masktype = truth_type_for (vectype);
if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
return -1;
whole_vector_shift_kind = scan_store_kind_lshift_cond;
Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
static bool
-check_scan_store (stmt_vec_info stmt_info, tree vectype,
+check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
enum vect_def_type rhs_dt, bool slp, tree mask,
vect_memory_access_type memory_access_type)
{
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
tree ref_type;
|| loop_vinfo == NULL
|| LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
|| STMT_VINFO_GROUPED_ACCESS (stmt_info)
- || !integer_zerop (DR_OFFSET (dr_info->dr))
+ || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
|| !integer_zerop (DR_INIT (dr_info->dr))
|| !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
|| !alias_sets_conflict_p (get_alias_set (vectype),
Handle only the transformation, checking is done in check_scan_store. */
static bool
-vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- stmt_vec_info *vec_stmt, int ncopies)
+vectorizable_scan_store (vec_info *vinfo,
+ stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ gimple **vec_stmt, int ncopies)
{
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
- vec_info *vinfo = stmt_info->vinfo;
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
if (dump_enabled_p ())
zero_vec = build_zero_cst (vectype);
if (masktype == NULL_TREE
&& use_whole_vector[i] == scan_store_kind_lshift_cond)
- masktype = build_same_sized_truth_vector_type (vectype);
+ masktype = truth_type_for (vectype);
perms[i] = vect_gen_perm_mask_any (vectype, indices);
}
else
perms[i] = vect_gen_perm_mask_checked (vectype, indices);
}
- stmt_vec_info prev_stmt_info = NULL;
tree vec_oprnd1 = NULL_TREE;
tree vec_oprnd2 = NULL_TREE;
tree vec_oprnd3 = NULL_TREE;
tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
tree dataref_offset = build_int_cst (ref_type, 0);
- tree bump = vect_get_data_ptr_increment (dr_info, vectype, VMAT_CONTIGUOUS);
+ tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
+ vectype, VMAT_CONTIGUOUS);
tree ldataref_ptr = NULL_TREE;
tree orig = NULL_TREE;
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
+ auto_vec<tree> vec_oprnds1;
+ auto_vec<tree> vec_oprnds2;
+ auto_vec<tree> vec_oprnds3;
+ vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
+ *init, &vec_oprnds1,
+ ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
+ rhs2, &vec_oprnds3);
for (int j = 0; j < ncopies; j++)
{
- stmt_vec_info new_stmt_info;
+ vec_oprnd1 = vec_oprnds1[j];
+ if (ldataref_ptr == NULL)
+ vec_oprnd2 = vec_oprnds2[j];
+ vec_oprnd3 = vec_oprnds3[j];
if (j == 0)
- {
- vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info);
- if (ldataref_ptr == NULL)
- vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info);
- vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info);
- orig = vec_oprnd3;
- }
- else
- {
- vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
- if (ldataref_ptr == NULL)
- vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
- vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3);
- if (!inscan_var_store)
- dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
- }
+ orig = vec_oprnd3;
+ else if (!inscan_var_store)
+ dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
if (ldataref_ptr)
{
dataref_offset);
vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
- new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
- if (prev_stmt_info == NULL)
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
- else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
- prev_stmt_info = new_stmt_info;
+ vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
+ *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
}
tree v = vec_oprnd2;
!= scan_store_kind_perm))
? zero_vec : vec_oprnd1, v,
perms[i]);
- new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
- if (prev_stmt_info == NULL)
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
- else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
- prev_stmt_info = new_stmt_info;
+ vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
+ *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
{
tree new_temp2 = make_ssa_name (vectype);
g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
new_temp, vec_oprnd1);
- new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
- prev_stmt_info = new_stmt_info;
+ vect_finish_stmt_generation (vinfo, stmt_info,
+ g, gsi);
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
new_temp = new_temp2;
}
tree new_temp2 = make_ssa_name (vectype);
g = gimple_build_assign (new_temp2, code, v, new_temp);
- new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
- prev_stmt_info = new_stmt_info;
+ vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
v = new_temp2;
}
tree new_temp = make_ssa_name (vectype);
gimple *g = gimple_build_assign (new_temp, code, orig, v);
- new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
- prev_stmt_info = new_stmt_info;
+ vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
tree last_perm_arg = new_temp;
/* For exclusive scan, new_temp computed above is the exclusive scan
{
last_perm_arg = make_ssa_name (vectype);
g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
- new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
- prev_stmt_info = new_stmt_info;
+ vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
}
orig = make_ssa_name (vectype);
g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
last_perm_arg, perms[units_log2]);
- new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
- prev_stmt_info = new_stmt_info;
+ vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
if (!inscan_var_store)
{
dataref_offset);
vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
g = gimple_build_assign (data_ref, new_temp);
- new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
- prev_stmt_info = new_stmt_info;
+ vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
}
}
dataref_offset);
vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
gimple *g = gimple_build_assign (data_ref, orig);
- stmt_vec_info new_stmt_info
- = vect_finish_stmt_generation (stmt_info, g, gsi);
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
- prev_stmt_info = new_stmt_info;
+ vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
}
return true;
}
Return true if STMT_INFO is vectorizable in this way. */
static bool
-vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- stmt_vec_info *vec_stmt, slp_tree slp_node,
+vectorizable_store (vec_info *vinfo,
+ stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ gimple **vec_stmt, slp_tree slp_node,
stmt_vector_for_cost *cost_vec)
{
tree data_ref;
tree op;
tree vec_oprnd = NULL_TREE;
tree elem_type;
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
class loop *loop = NULL;
machine_mode vec_mode;
tree dummy;
enum dr_alignment_support alignment_support_scheme;
enum vect_def_type rhs_dt = vect_unknown_def_type;
enum vect_def_type mask_dt = vect_unknown_def_type;
- stmt_vec_info prev_stmt_info = NULL;
tree dataref_ptr = NULL_TREE;
tree dataref_offset = NULL_TREE;
gimple *ptr_incr = NULL;
vec<tree> vec_oprnds = vNULL;
bool slp = (slp_node != NULL);
unsigned int vec_num;
- bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
- vec_info *vinfo = stmt_info->vinfo;
+ bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
tree aggr_type;
gather_scatter_info gs_info;
poly_uint64 vf;
if (mask_index >= 0)
{
mask = gimple_call_arg (call, mask_index);
- if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
- &mask_vectype))
+ if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
+ &mask_vectype))
return false;
}
}
return false;
}
- if (!vect_check_store_rhs (stmt_info, op, &rhs_dt, &rhs_vectype, &vls_type))
+ if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
+ op, &rhs_dt, &rhs_vectype, &vls_type))
return false;
elem_type = TREE_TYPE (vectype);
return false;
vect_memory_access_type memory_access_type;
- if (!get_load_store_type (stmt_info, vectype, slp, mask, vls_type, ncopies,
- &memory_access_type, &gs_info))
+ if (!get_load_store_type (vinfo, stmt_info, vectype, slp, mask, vls_type,
+ ncopies, &memory_access_type, &gs_info))
return false;
if (mask)
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
{
- if (!check_scan_store (stmt_info, vectype, rhs_dt, slp, mask,
+ if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
memory_access_type))
return false;
}
STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
if (loop_vinfo
- && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
- check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
- memory_access_type, &gs_info, mask);
+ && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
+ check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type,
+ group_size, memory_access_type,
+ &gs_info, mask);
+
+ if (slp_node
+ && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
+ vectype))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for invariants\n");
+ return false;
+ }
STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
- vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
- vls_type, slp_node, cost_vec);
+ vect_model_store_cost (vinfo, stmt_info, ncopies,
+ memory_access_type, vls_type, slp_node, cost_vec);
return true;
}
gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
ncopies *= 2;
if (mask)
- mask_halfvectype
- = build_same_sized_truth_vector_type (gs_info.offset_vectype);
+ mask_halfvectype = truth_type_for (gs_info.offset_vectype);
}
else
gcc_unreachable ();
if (mask == NULL_TREE)
{
mask_arg = build_int_cst (masktype, -1);
- mask_arg = vect_init_vector (stmt_info, mask_arg, masktype, NULL);
+ mask_arg = vect_init_vector (vinfo, stmt_info,
+ mask_arg, masktype, NULL);
}
scale = build_int_cst (scaletype, gs_info.scale);
- prev_stmt_info = NULL;
+ auto_vec<tree> vec_oprnds0;
+ auto_vec<tree> vec_oprnds1;
+ auto_vec<tree> vec_masks;
+ if (mask)
+ {
+ tree mask_vectype = truth_type_for (vectype);
+ vect_get_vec_defs_for_operand (vinfo, stmt_info,
+ modifier == NARROW
+ ? ncopies / 2 : ncopies,
+ mask, &vec_masks, mask_vectype);
+ }
+ vect_get_vec_defs_for_operand (vinfo, stmt_info,
+ modifier == WIDEN
+ ? ncopies / 2 : ncopies,
+ gs_info.offset, &vec_oprnds0);
+ vect_get_vec_defs_for_operand (vinfo, stmt_info,
+ modifier == NARROW
+ ? ncopies / 2 : ncopies,
+ op, &vec_oprnds1);
for (j = 0; j < ncopies; ++j)
{
- if (j == 0)
+ if (modifier == WIDEN)
{
- src = vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt_info);
- op = vec_oprnd0 = vect_get_vec_def_for_operand (gs_info.offset,
- stmt_info);
+ if (j & 1)
+ op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
+ perm_mask, stmt_info, gsi);
+ else
+ op = vec_oprnd0 = vec_oprnds0[j / 2];
+ src = vec_oprnd1 = vec_oprnds1[j];
if (mask)
- mask_op = vec_mask = vect_get_vec_def_for_operand (mask,
- stmt_info);
+ mask_op = vec_mask = vec_masks[j];
}
- else if (modifier != NONE && (j & 1))
+ else if (modifier == NARROW)
{
- if (modifier == WIDEN)
- {
- src
- = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
- vec_oprnd1);
- op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
- stmt_info, gsi);
- if (mask)
- mask_op
- = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
- vec_mask);
- }
- else if (modifier == NARROW)
- {
- src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
- stmt_info, gsi);
- op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
- vec_oprnd0);
- }
+ if (j & 1)
+ src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
+ perm_mask, stmt_info, gsi);
else
- gcc_unreachable ();
+ src = vec_oprnd1 = vec_oprnds1[j / 2];
+ op = vec_oprnd0 = vec_oprnds0[j];
+ if (mask)
+ mask_op = vec_mask = vec_masks[j / 2];
}
else
{
- src = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
- vec_oprnd1);
- op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
- vec_oprnd0);
+ op = vec_oprnd0 = vec_oprnds0[j];
+ src = vec_oprnd1 = vec_oprnds1[j];
if (mask)
- mask_op = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
- vec_mask);
+ mask_op = vec_mask = vec_masks[j];
}
if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
src = build1 (VIEW_CONVERT_EXPR, srctype, src);
gassign *new_stmt
= gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
src = var;
}
op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
gassign *new_stmt
= gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
op = var;
}
= gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
: VEC_UNPACK_LO_EXPR,
mask_op);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
mask_arg = var;
}
tree optype = TREE_TYPE (mask_arg);
mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
gassign *new_stmt
= gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
mask_arg = var;
if (!useless_type_conversion_p (masktype, utype))
{
<= TYPE_PRECISION (masktype));
var = vect_get_new_ssa_name (masktype, vect_scalar_var);
new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
mask_arg = var;
}
}
gcall *new_stmt
= gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
- stmt_vec_info new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
- if (prev_stmt_info == NULL)
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
- else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
- prev_stmt_info = new_stmt_info;
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
}
+ *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
return true;
}
else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
- return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies);
+ return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
tree running_off;
tree stride_base, stride_step, alias_off;
tree vec_oprnd;
+ tree dr_offset;
unsigned int g;
/* Checked by get_load_store_type. */
unsigned int const_nunits = nunits.to_constant ();
gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
+ dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
stride_base
= fold_build_pointer_plus
(DR_BASE_ADDRESS (first_dr_info->dr),
size_binop (PLUS_EXPR,
- convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
+ convert_to_ptrofftype (dr_offset),
convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
of vector elts directly. */
scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
machine_mode vmode;
- if (!mode_for_vector (elmode, group_size).exists (&vmode)
- || !VECTOR_MODE_P (vmode)
- || !targetm.vector_mode_supported_p (vmode)
+ if (!VECTOR_MODE_P (TYPE_MODE (vectype))
+ || !related_vector_mode (TYPE_MODE (vectype), elmode,
+ group_size).exists (&vmode)
|| (convert_optab_handler (vec_extract_optab,
TYPE_MODE (vectype), vmode)
== CODE_FOR_nothing))
element extracts from the original vector type and
element size stores. */
if (int_mode_for_size (lsize, 0).exists (&elmode)
- && mode_for_vector (elmode, lnunits).exists (&vmode)
- && VECTOR_MODE_P (vmode)
- && targetm.vector_mode_supported_p (vmode)
+ && VECTOR_MODE_P (TYPE_MODE (vectype))
+ && related_vector_mode (TYPE_MODE (vectype), elmode,
+ lnunits).exists (&vmode)
&& (convert_optab_handler (vec_extract_optab,
vmode, elmode)
!= CODE_FOR_nothing))
loop, &incr_gsi, insert_after,
&offvar, NULL);
incr = gsi_stmt (incr_gsi);
- loop_vinfo->add_stmt (incr);
stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
- prev_stmt_info = NULL;
alias_off = build_int_cst (ref_type, 0);
stmt_vec_info next_stmt_info = first_stmt_info;
for (g = 0; g < group_size; g++)
tree newoff = copy_ssa_name (running_off, NULL);
incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
running_off, pos);
- vect_finish_stmt_generation (stmt_info, incr, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
running_off = newoff;
}
+ if (!slp)
+ op = vect_get_store_rhs (next_stmt_info);
+ vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
+ op, &vec_oprnds);
unsigned int group_el = 0;
unsigned HOST_WIDE_INT
elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
for (j = 0; j < ncopies; j++)
{
- /* We've set op and dt above, from vect_get_store_rhs,
- and first_stmt_info == stmt_info. */
- if (j == 0)
- {
- if (slp)
- {
- vect_get_vec_defs (op, NULL_TREE, stmt_info,
- &vec_oprnds, NULL, slp_node);
- vec_oprnd = vec_oprnds[0];
- }
- else
- {
- op = vect_get_store_rhs (next_stmt_info);
- vec_oprnd = vect_get_vec_def_for_operand
- (op, next_stmt_info);
- }
- }
- else
- {
- if (slp)
- vec_oprnd = vec_oprnds[j];
- else
- vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
- vec_oprnd);
- }
+ vec_oprnd = vec_oprnds[j];
/* Pun the vector to extract from if necessary. */
if (lvectype != vectype)
{
gimple *pun
= gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
lvectype, vec_oprnd));
- vect_finish_stmt_generation (stmt_info, pun, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
vec_oprnd = tem;
}
for (i = 0; i < nstores; i++)
/* And store it to *running_off. */
assign = gimple_build_assign (newref, elem);
- stmt_vec_info assign_info
- = vect_finish_stmt_generation (stmt_info, assign, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
group_el += lnel;
if (! slp
newoff = copy_ssa_name (running_off, NULL);
incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
running_off, stride_step);
- vect_finish_stmt_generation (stmt_info, incr, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
running_off = newoff;
group_el = 0;
&& !slp)
{
if (j == 0 && i == 0)
- STMT_VINFO_VEC_STMT (stmt_info)
- = *vec_stmt = assign_info;
- else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
- prev_stmt_info = assign_info;
+ *vec_stmt = assign;
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
}
}
}
auto_vec<tree> dr_chain (group_size);
oprnds.create (group_size);
- alignment_support_scheme
- = vect_supportable_dr_alignment (first_dr_info, false);
+ /* Gather-scatter accesses perform only component accesses, alignment
+ is irrelevant for them. */
+ if (memory_access_type == VMAT_GATHER_SCATTER)
+ alignment_support_scheme = dr_unaligned_supported;
+ else
+ alignment_support_scheme
+ = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
+
gcc_assert (alignment_support_scheme);
vec_loop_masks *loop_masks
= (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
else
aggr_type = vectype;
- bump = vect_get_data_ptr_increment (dr_info, aggr_type,
+ bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
memory_access_type);
}
/* In case the vectorization factor (VF) is bigger than the number
of elements that we can fit in a vectype (nunits), we have to generate
more than one vector stmt - i.e - we need to "unroll" the
- vector stmt by a factor VF/nunits. For more details see documentation in
- vect_get_vec_def_for_copy_stmt. */
+ vector stmt by a factor VF/nunits. */
/* In case of interleaving (non-unit grouped access):
STMT_VINFO_RELATED_STMT for the next copies.
*/
- prev_stmt_info = NULL;
- tree vec_mask = NULL_TREE;
+ auto_vec<tree> vec_masks;
+ tree vec_mask = NULL;
+ auto_vec<tree> vec_offsets;
+ auto_vec<vec<tree> > gvec_oprnds;
+ gvec_oprnds.safe_grow_cleared (group_size);
for (j = 0; j < ncopies; j++)
{
- stmt_vec_info new_stmt_info;
+ gimple *new_stmt;
if (j == 0)
{
if (slp)
{
/* Get vectorized arguments for SLP_NODE. */
- vect_get_vec_defs (op, NULL_TREE, stmt_info, &vec_oprnds,
- NULL, slp_node);
-
+ vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
+ op, &vec_oprnds);
vec_oprnd = vec_oprnds[0];
}
else
{
/* For interleaved stores we collect vectorized defs for all the
stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
- used as an input to vect_permute_store_chain(), and OPRNDS as
- an input to vect_get_vec_def_for_stmt_copy() for the next copy.
+ used as an input to vect_permute_store_chain().
- If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
- OPRNDS are of size 1. */
+ If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
+ and OPRNDS are of size 1. */
stmt_vec_info next_stmt_info = first_stmt_info;
for (i = 0; i < group_size; i++)
{
that there is no interleaving, DR_GROUP_SIZE is 1,
and only one iteration of the loop will be executed. */
op = vect_get_store_rhs (next_stmt_info);
- vec_oprnd = vect_get_vec_def_for_operand
- (op, next_stmt_info);
- dr_chain.quick_push (vec_oprnd);
- oprnds.quick_push (vec_oprnd);
+ vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
+ ncopies, op, &gvec_oprnds[i]);
+ vec_oprnd = gvec_oprnds[i][0];
+ dr_chain.quick_push (gvec_oprnds[i][0]);
+ oprnds.quick_push (gvec_oprnds[i][0]);
next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
}
if (mask)
- vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
- mask_vectype);
+ {
+ vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
+ mask, &vec_masks, mask_vectype);
+ vec_mask = vec_masks[0];
+ }
}
/* We should have catched mismatched types earlier. */
&& !loop_masks
&& TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
&& VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
- && integer_zerop (DR_OFFSET (first_dr_info->dr))
+ && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
&& integer_zerop (DR_INIT (first_dr_info->dr))
&& alias_sets_conflict_p (get_alias_set (aggr_type),
get_alias_set (TREE_TYPE (ref_type))))
dataref_offset = build_int_cst (ref_type, 0);
}
else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
- vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
- &dataref_ptr, &vec_offset);
+ {
+ vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
+ &dataref_ptr, &vec_offsets, ncopies);
+ vec_offset = vec_offsets[0];
+ }
else
dataref_ptr
- = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
+ = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
simd_lane_access_p ? loop : NULL,
offset, &dummy, gsi, &ptr_incr,
simd_lane_access_p, NULL_TREE, bump);
{
/* For interleaved stores we created vectorized defs for all the
defs stored in OPRNDS in the previous iteration (previous copy).
- DR_CHAIN is then used as an input to vect_permute_store_chain(),
- and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
- next copy.
+ DR_CHAIN is then used as an input to vect_permute_store_chain().
If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
OPRNDS are of size 1. */
for (i = 0; i < group_size; i++)
{
- op = oprnds[i];
- vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
- dr_chain[i] = vec_oprnd;
- oprnds[i] = vec_oprnd;
+ vec_oprnd = gvec_oprnds[i][j];
+ dr_chain[i] = gvec_oprnds[i][j];
+ oprnds[i] = gvec_oprnds[i][j];
}
if (mask)
- vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
+ vec_mask = vec_masks[j];
if (dataref_offset)
dataref_offset
= int_const_binop (PLUS_EXPR, dataref_offset, bump);
else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
- vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
+ vec_offset = vec_offsets[j];
else
- dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
+ dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
stmt_info, bump);
}
/* Invalidate the current contents of VEC_ARRAY. This should
become an RTL clobber too, which prevents the vector registers
from being upward-exposed. */
- vect_clobber_variable (stmt_info, gsi, vec_array);
+ vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
/* Store the individual vectors into the array. */
for (i = 0; i < vec_num; i++)
{
vec_oprnd = dr_chain[i];
- write_vector_array (stmt_info, gsi, vec_oprnd, vec_array, i);
+ write_vector_array (vinfo, stmt_info,
+ gsi, vec_oprnd, vec_array, i);
}
tree final_mask = NULL;
gimple_call_set_lhs (call, data_ref);
}
gimple_call_set_nothrow (call, true);
- new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
+ new_stmt = call;
/* Record that VEC_ARRAY is now dead. */
- vect_clobber_variable (stmt_info, gsi, vec_array);
+ vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
}
else
{
- new_stmt_info = NULL;
+ new_stmt = NULL;
if (grouped_store)
{
if (j == 0)
result_chain.create (group_size);
/* Permute. */
- vect_permute_store_chain (dr_chain, group_size, stmt_info, gsi,
- &result_chain);
+ vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
+ gsi, &result_chain);
}
stmt_vec_info next_stmt_info = first_stmt_info;
(IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
scale, vec_oprnd);
gimple_call_set_nothrow (call, true);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, call, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
+ new_stmt = call;
break;
}
if (i > 0)
/* Bump the vector pointer. */
- dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
- stmt_info, bump);
+ dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
+ gsi, stmt_info, bump);
if (slp)
vec_oprnd = vec_oprnds[i];
misalign = 0;
else if (DR_MISALIGNMENT (first_dr_info) == -1)
{
- align = dr_alignment (vect_dr_behavior (first_dr_info));
+ align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
misalign = 0;
}
else
gimple *perm_stmt
= gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
vec_oprnd, perm_mask);
- vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
perm_stmt = SSA_NAME_DEF_STMT (new_temp);
vec_oprnd = new_temp;
dataref_ptr, ptr,
final_mask, vec_oprnd);
gimple_call_set_nothrow (call, true);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, call, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
+ new_stmt = call;
}
else
{
= build_aligned_type (TREE_TYPE (data_ref),
TYPE_ALIGN (elem_type));
vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
- gassign *new_stmt
- = gimple_build_assign (data_ref, vec_oprnd);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ new_stmt = gimple_build_assign (data_ref, vec_oprnd);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
}
if (slp)
if (!slp)
{
if (j == 0)
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
- else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
- prev_stmt_info = new_stmt_info;
+ *vec_stmt = new_stmt;
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
}
}
+ for (i = 0; i < group_size; ++i)
+ {
+ vec<tree> oprndsi = gvec_oprnds[i];
+ oprndsi.release ();
+ }
oprnds.release ();
result_chain.release ();
vec_oprnds.release ();
permuted vector variable. */
static tree
-permute_vec_elements (tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
+permute_vec_elements (vec_info *vinfo,
+ tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
gimple_stmt_iterator *gsi)
{
tree vectype = TREE_TYPE (x);
/* Generate the permute statement. */
perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
- vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
return data_ref;
}
Return true if STMT_INFO is vectorizable in this way. */
static bool
-vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- stmt_vec_info *vec_stmt, slp_tree slp_node,
- slp_instance slp_node_instance,
+vectorizable_load (vec_info *vinfo,
+ stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ gimple **vec_stmt, slp_tree slp_node,
stmt_vector_for_cost *cost_vec)
{
tree scalar_dest;
tree vec_dest = NULL;
tree data_ref = NULL;
- stmt_vec_info prev_stmt_info;
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
class loop *loop = NULL;
class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
bool nested_in_vect_loop = false;
int vec_num;
bool slp = (slp_node != NULL);
bool slp_perm = false;
- bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+ bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
poly_uint64 vf;
tree aggr_type;
gather_scatter_info gs_info;
- vec_info *vinfo = stmt_info->vinfo;
tree ref_type;
enum vect_def_type mask_dt = vect_unknown_def_type;
&& ! vec_stmt)
return false;
+ if (!STMT_VINFO_DATA_REF (stmt_info))
+ return false;
+
+ /* ??? Alignment analysis for SLP looks at SLP_TREE_SCALAR_STMTS[0]
+ for unpermuted loads but we get passed SLP_TREE_REPRESENTATIVE
+ which can be different when reduction chains were re-ordered.
+ Now that we figured we're a dataref reset stmt_info back to
+ SLP_TREE_SCALAR_STMTS[0]. When we're SLP only things should be
+ refactored in a way to maintain the dr_vec_info pointer for the
+ relevant access explicitely. */
+ stmt_vec_info orig_stmt_info = stmt_info;
+ if (slp_node)
+ stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
+
tree mask = NULL_TREE, mask_vectype = NULL_TREE;
if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
{
if (mask_index >= 0)
{
mask = gimple_call_arg (call, mask_index);
- if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
- &mask_vectype))
+ if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
+ &mask_vectype))
return false;
}
}
- if (!STMT_VINFO_DATA_REF (stmt_info))
- return false;
-
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
}
if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
- slp_perm = true;
+ {
+ slp_perm = true;
+
+ if (!loop_vinfo)
+ {
+ /* In BB vectorization we may not actually use a loaded vector
+ accessing elements in excess of DR_GROUP_SIZE. */
+ stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
+ group_info = DR_GROUP_FIRST_ELEMENT (group_info);
+ unsigned HOST_WIDE_INT nunits;
+ unsigned j, k, maxk = 0;
+ FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
+ if (k > maxk)
+ maxk = k;
+ tree vectype = STMT_VINFO_VECTYPE (group_info);
+ if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
+ || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "BB vectorization with gaps at the end of "
+ "a load is not supported\n");
+ return false;
+ }
+ }
+
+ auto_vec<tree> tem;
+ unsigned n_perms;
+ if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
+ true, &n_perms))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION,
+ vect_location,
+ "unsupported load permutation\n");
+ return false;
+ }
+ }
/* Invalidate assumptions made by dependence analysis when vectorization
on the unrolled body effectively re-orders stmts. */
group_size = 1;
vect_memory_access_type memory_access_type;
- if (!get_load_store_type (stmt_info, vectype, slp, mask, VLS_LOAD, ncopies,
- &memory_access_type, &gs_info))
+ if (!get_load_store_type (vinfo, stmt_info, vectype, slp, mask, VLS_LOAD,
+ ncopies, &memory_access_type, &gs_info))
return false;
if (mask)
STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
if (loop_vinfo
- && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
- check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
- memory_access_type, &gs_info, mask);
-
- STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
- vect_model_load_cost (stmt_info, ncopies, memory_access_type,
- slp_node_instance, slp_node, cost_vec);
+ && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
+ check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD,
+ group_size, memory_access_type,
+ &gs_info, mask);
+
+ STMT_VINFO_TYPE (orig_stmt_info) = load_vec_info_type;
+ vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
+ slp_node, cost_vec);
return true;
}
if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
{
- vect_build_gather_load_calls (stmt_info, gsi, vec_stmt, &gs_info, mask);
+ vect_build_gather_load_calls (vinfo,
+ stmt_info, gsi, vec_stmt, &gs_info, mask);
return true;
}
}
/* These copies are all equivalent, but currently the representation
requires a separate STMT_VINFO_VEC_STMT for each one. */
- prev_stmt_info = NULL;
gimple_stmt_iterator gsi2 = *gsi;
gsi_next (&gsi2);
for (j = 0; j < ncopies; j++)
{
- stmt_vec_info new_stmt_info;
if (hoist_p)
- {
- new_temp = vect_init_vector (stmt_info, scalar_dest,
- vectype, NULL);
- gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
- new_stmt_info = vinfo->add_stmt (new_stmt);
- }
+ new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
+ vectype, NULL);
else
- {
- new_temp = vect_init_vector (stmt_info, scalar_dest,
- vectype, &gsi2);
- new_stmt_info = vinfo->lookup_def (new_temp);
- }
+ new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
+ vectype, &gsi2);
+ gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
if (slp)
- SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
- else if (j == 0)
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
- prev_stmt_info = new_stmt_info;
+ {
+ if (j == 0)
+ *vec_stmt = new_stmt;
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+ }
}
return true;
}
{
gimple_stmt_iterator incr_gsi;
bool insert_after;
- gimple *incr;
tree offvar;
tree ivstep;
tree running_off;
/* Checked by get_load_store_type. */
unsigned int const_nunits = nunits.to_constant ();
unsigned HOST_WIDE_INT cst_offset = 0;
+ tree dr_offset;
gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
gcc_assert (!nested_in_vect_loop);
ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
}
+ dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
stride_base
= fold_build_pointer_plus
(DR_BASE_ADDRESS (first_dr_info->dr),
size_binop (PLUS_EXPR,
- convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
+ convert_to_ptrofftype (dr_offset),
convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
create_iv (stride_base, ivstep, NULL,
loop, &incr_gsi, insert_after,
&offvar, NULL);
- incr = gsi_stmt (incr_gsi);
- loop_vinfo->add_stmt (incr);
stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
- prev_stmt_info = NULL;
running_off = offvar;
alias_off = build_int_cst (ref_type, 0);
int nloads = const_nunits;
{
if (group_size < const_nunits)
{
- /* First check if vec_init optab supports construction from
- vector elts directly. */
- scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
- machine_mode vmode;
- if (mode_for_vector (elmode, group_size).exists (&vmode)
- && VECTOR_MODE_P (vmode)
- && targetm.vector_mode_supported_p (vmode)
- && (convert_optab_handler (vec_init_optab,
- TYPE_MODE (vectype), vmode)
- != CODE_FOR_nothing))
+ /* First check if vec_init optab supports construction from vector
+ elts directly. Otherwise avoid emitting a constructor of
+ vector elements by performing the loads using an integer type
+ of the same size, constructing a vector of those and then
+ re-interpreting it as the original vector type. This avoids a
+ huge runtime penalty due to the general inability to perform
+ store forwarding from smaller stores to a larger load. */
+ tree ptype;
+ tree vtype
+ = vector_vector_composition_type (vectype,
+ const_nunits / group_size,
+ &ptype);
+ if (vtype != NULL_TREE)
{
nloads = const_nunits / group_size;
lnel = group_size;
- ltype = build_vector_type (TREE_TYPE (vectype), group_size);
- }
- else
- {
- /* Otherwise avoid emitting a constructor of vector elements
- by performing the loads using an integer type of the same
- size, constructing a vector of those and then
- re-interpreting it as the original vector type.
- This avoids a huge runtime penalty due to the general
- inability to perform store forwarding from smaller stores
- to a larger load. */
- unsigned lsize
- = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
- unsigned int lnunits = const_nunits / group_size;
- /* If we can't construct such a vector fall back to
- element loads of the original vector type. */
- if (int_mode_for_size (lsize, 0).exists (&elmode)
- && mode_for_vector (elmode, lnunits).exists (&vmode)
- && VECTOR_MODE_P (vmode)
- && targetm.vector_mode_supported_p (vmode)
- && (convert_optab_handler (vec_init_optab, vmode, elmode)
- != CODE_FOR_nothing))
- {
- nloads = lnunits;
- lnel = group_size;
- ltype = build_nonstandard_integer_type (lsize, 1);
- lvectype = build_vector_type (ltype, nloads);
- }
+ lvectype = vtype;
+ ltype = ptype;
}
}
else
{
if (nloads > 1)
vec_alloc (v, nloads);
- stmt_vec_info new_stmt_info = NULL;
+ gimple *new_stmt = NULL;
for (i = 0; i < nloads; i++)
{
tree this_off = build_int_cst (TREE_TYPE (alias_off),
group_el * elsz + cst_offset);
tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
- gassign *new_stmt
- = gimple_build_assign (make_ssa_name (ltype), data_ref);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
if (nloads > 1)
CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
gimple_assign_lhs (new_stmt));
tree newoff = copy_ssa_name (running_off);
gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
running_off, stride_step);
- vect_finish_stmt_generation (stmt_info, incr, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
running_off = newoff;
group_el = 0;
if (nloads > 1)
{
tree vec_inv = build_constructor (lvectype, v);
- new_temp = vect_init_vector (stmt_info, vec_inv, lvectype, gsi);
- new_stmt_info = vinfo->lookup_def (new_temp);
+ new_temp = vect_init_vector (vinfo, stmt_info,
+ vec_inv, lvectype, gsi);
+ new_stmt = SSA_NAME_DEF_STMT (new_temp);
if (lvectype != vectype)
{
- gassign *new_stmt
- = gimple_build_assign (make_ssa_name (vectype),
- VIEW_CONVERT_EXPR,
- build1 (VIEW_CONVERT_EXPR,
- vectype, new_temp));
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ new_stmt = gimple_build_assign (make_ssa_name (vectype),
+ VIEW_CONVERT_EXPR,
+ build1 (VIEW_CONVERT_EXPR,
+ vectype, new_temp));
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
}
}
if (slp)
{
if (slp_perm)
- dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
+ dr_chain.quick_push (gimple_assign_lhs (new_stmt));
else
- SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
}
else
{
if (j == 0)
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
- else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
- prev_stmt_info = new_stmt_info;
+ *vec_stmt = new_stmt;
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
}
}
if (slp_perm)
{
unsigned n_perms;
- vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
- slp_node_instance, false, &n_perms);
+ vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
+ false, &n_perms);
}
return true;
}
first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
/* Check if the chain of loads is already vectorized. */
- if (STMT_VINFO_VEC_STMT (first_stmt_info)
+ if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
/* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
??? But we can only do so if there is exactly one
as we have no way to get at the rest. Leave the CSE
is even wrong code. See PR56270. */
&& !slp)
{
- *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
+ *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
return true;
}
first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
unpermuted sequence. In other cases we need to load the
whole group, not only the number of vector stmts the
permutation result fits in. */
+ unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
if (slp_perm
- && (group_size != SLP_INSTANCE_GROUP_SIZE (slp_node_instance)
+ && (group_size != scalar_lanes
|| !multiple_p (nunits, group_size)))
{
/* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
{
vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
group_gap_adj
- = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
+ = group_size - scalar_lanes;
}
}
else
ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
}
- alignment_support_scheme
- = vect_supportable_dr_alignment (first_dr_info, false);
+ /* Gather-scatter accesses perform only component accesses, alignment
+ is irrelevant for them. */
+ if (memory_access_type == VMAT_GATHER_SCATTER)
+ alignment_support_scheme = dr_unaligned_supported;
+ else
+ alignment_support_scheme
+ = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
+
gcc_assert (alignment_support_scheme);
vec_loop_masks *loop_masks
= (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
VS1_3: vx3 = memref3 - -
S1: x = load - VS1_0
S2: z = x + 1 - -
-
- See in documentation in vect_get_vec_def_for_stmt_copy for how the
- information we recorded in RELATED_STMT field is used to vectorize
- stmt S2. */
+ */
/* In case of interleaving (non-unit grouped access):
compute_in_loop = true;
}
+ bool diff_first_stmt_info
+ = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
+
if ((alignment_support_scheme == dr_explicit_realign_optimized
|| alignment_support_scheme == dr_explicit_realign)
&& !compute_in_loop)
{
- msq = vect_setup_realignment (first_stmt_info_for_drptr
- ? first_stmt_info_for_drptr
- : first_stmt_info, gsi, &realignment_token,
- alignment_support_scheme, NULL_TREE,
- &at_loop);
+ /* If we have different first_stmt_info, we can't set up realignment
+ here, since we can't guarantee first_stmt_info DR has been
+ initialized yet, use first_stmt_info_for_drptr DR by bumping the
+ distance from first_stmt_info DR instead as below. */
+ if (!diff_first_stmt_info)
+ msq = vect_setup_realignment (vinfo,
+ first_stmt_info, gsi, &realignment_token,
+ alignment_support_scheme, NULL_TREE,
+ &at_loop);
if (alignment_support_scheme == dr_explicit_realign_optimized)
{
phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
size_one_node);
+ gcc_assert (!first_stmt_info_for_drptr);
}
}
else
aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
else
aggr_type = vectype;
- bump = vect_get_data_ptr_increment (dr_info, aggr_type,
+ bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
memory_access_type);
}
+ vec<tree> vec_offsets = vNULL;
+ auto_vec<tree> vec_masks;
+ if (mask)
+ vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
+ mask, &vec_masks, mask_vectype, NULL_TREE);
tree vec_mask = NULL_TREE;
- prev_stmt_info = NULL;
poly_uint64 group_elt = 0;
for (j = 0; j < ncopies; j++)
{
- stmt_vec_info new_stmt_info = NULL;
/* 1. Create the vector or array pointer update chain. */
if (j == 0)
{
if (simd_lane_access_p
&& TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
&& VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
- && integer_zerop (DR_OFFSET (first_dr_info->dr))
+ && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
&& integer_zerop (DR_INIT (first_dr_info->dr))
&& alias_sets_conflict_p (get_alias_set (aggr_type),
get_alias_set (TREE_TYPE (ref_type)))
dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
dataref_offset = build_int_cst (ref_type, 0);
}
- else if (first_stmt_info_for_drptr
- && first_stmt_info != first_stmt_info_for_drptr)
+ else if (diff_first_stmt_info)
{
dataref_ptr
- = vect_create_data_ref_ptr (first_stmt_info_for_drptr,
+ = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
aggr_type, at_loop, offset, &dummy,
gsi, &ptr_incr, simd_lane_access_p,
byte_offset, bump);
size_binop (MINUS_EXPR,
DR_INIT (first_dr_info->dr),
DR_INIT (ptrdr)));
- dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
+ dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
stmt_info, diff);
+ if (alignment_support_scheme == dr_explicit_realign)
+ {
+ msq = vect_setup_realignment (vinfo,
+ first_stmt_info_for_drptr, gsi,
+ &realignment_token,
+ alignment_support_scheme,
+ dataref_ptr, &at_loop);
+ gcc_assert (!compute_in_loop);
+ }
}
else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
- vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
- &dataref_ptr, &vec_offset);
+ {
+ vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
+ &dataref_ptr, &vec_offsets, ncopies);
+ vec_offset = vec_offsets[0];
+ }
else
dataref_ptr
- = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
+ = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
+ at_loop,
offset, &dummy, gsi, &ptr_incr,
simd_lane_access_p,
byte_offset, bump);
if (mask)
- {
- if (slp_node)
- {
- auto_vec<vec<tree> > vec_defs (1);
- vect_get_slp_defs (slp_node, &vec_defs);
- vec_mask = vec_defs[0][0];
- }
- else
- vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
- mask_vectype);
- }
+ vec_mask = vec_masks[0];
}
else
{
dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
bump);
else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
- vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
+ vec_offset = vec_offsets[j];
else
- dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
+ dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
stmt_info, bump);
if (mask)
- vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
+ vec_mask = vec_masks[j];
}
if (grouped_load || slp_perm)
dr_chain.create (vec_num);
+ gimple *new_stmt = NULL;
if (memory_access_type == VMAT_LOAD_STORE_LANES)
{
tree vec_array;
}
gimple_call_set_lhs (call, vec_array);
gimple_call_set_nothrow (call, true);
- new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
+ new_stmt = call;
/* Extract each vector into an SSA_NAME. */
for (i = 0; i < vec_num; i++)
{
- new_temp = read_vector_array (stmt_info, gsi, scalar_dest,
+ new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
vec_array, i);
dr_chain.quick_push (new_temp);
}
/* Record the mapping between SSA_NAMEs and statements. */
- vect_record_grouped_load_vectors (stmt_info, dr_chain);
+ vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
/* Record that VEC_ARRAY is now dead. */
- vect_clobber_variable (stmt_info, gsi, vec_array);
+ vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
}
else
{
vec_mask, gsi);
if (i > 0)
- dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
- stmt_info, bump);
+ dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
+ gsi, stmt_info, bump);
/* 2. Create the vector-load in the loop. */
- gimple *new_stmt = NULL;
switch (alignment_support_scheme)
{
case dr_aligned:
else if (DR_MISALIGNMENT (first_dr_info) == -1)
{
align = dr_alignment
- (vect_dr_behavior (first_dr_info));
+ (vect_dr_behavior (vinfo, first_dr_info));
misalign = 0;
}
else
else
{
tree ltype = vectype;
+ tree new_vtype = NULL_TREE;
+ unsigned HOST_WIDE_INT gap
+ = DR_GROUP_GAP (first_stmt_info);
+ unsigned int vect_align
+ = vect_known_alignment_in_bytes (first_dr_info);
+ unsigned int scalar_dr_size
+ = vect_get_scalar_dr_size (first_dr_info);
/* If there's no peeling for gaps but we have a gap
with slp loads then load the lower half of the
vector only. See get_group_load_store_type for
if (slp
&& loop_vinfo
&& !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
- && DR_GROUP_GAP (first_stmt_info) != 0
- && known_eq (nunits,
- (group_size
- - DR_GROUP_GAP (first_stmt_info)) * 2)
- && known_eq (nunits, group_size))
- ltype = build_vector_type (TREE_TYPE (vectype),
- (group_size
- - DR_GROUP_GAP
- (first_stmt_info)));
+ && gap != 0
+ && known_eq (nunits, (group_size - gap) * 2)
+ && known_eq (nunits, group_size)
+ && gap >= (vect_align / scalar_dr_size))
+ {
+ tree half_vtype;
+ new_vtype
+ = vector_vector_composition_type (vectype, 2,
+ &half_vtype);
+ if (new_vtype != NULL_TREE)
+ ltype = half_vtype;
+ }
+ tree offset
+ = (dataref_offset ? dataref_offset
+ : build_int_cst (ref_type, 0));
+ if (ltype != vectype
+ && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+ {
+ unsigned HOST_WIDE_INT gap_offset
+ = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
+ tree gapcst = build_int_cst (ref_type, gap_offset);
+ offset = size_binop (PLUS_EXPR, offset, gapcst);
+ }
data_ref
- = fold_build2 (MEM_REF, ltype, dataref_ptr,
- dataref_offset
- ? dataref_offset
- : build_int_cst (ref_type, 0));
+ = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
if (alignment_support_scheme == dr_aligned)
;
else if (DR_MISALIGNMENT (first_dr_info) == -1)
TYPE_ALIGN (elem_type));
if (ltype != vectype)
{
- vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
+ vect_copy_ref_info (data_ref,
+ DR_REF (first_dr_info->dr));
tree tem = make_ssa_name (ltype);
new_stmt = gimple_build_assign (tem, data_ref);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info,
+ new_stmt, gsi);
data_ref = NULL;
vec<constructor_elt, va_gc> *v;
vec_alloc (v, 2);
- CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
- CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
- build_zero_cst (ltype));
- new_stmt
- = gimple_build_assign (vec_dest,
- build_constructor
- (vectype, v));
+ if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+ {
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+ build_zero_cst (ltype));
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
+ }
+ else
+ {
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+ build_zero_cst (ltype));
+ }
+ gcc_assert (new_vtype != NULL_TREE);
+ if (new_vtype == vectype)
+ new_stmt = gimple_build_assign (
+ vec_dest, build_constructor (vectype, v));
+ else
+ {
+ tree new_vname = make_ssa_name (new_vtype);
+ new_stmt = gimple_build_assign (
+ new_vname, build_constructor (new_vtype, v));
+ vect_finish_stmt_generation (vinfo, stmt_info,
+ new_stmt, gsi);
+ new_stmt = gimple_build_assign (
+ vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
+ new_vname));
+ }
}
}
break;
tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
if (compute_in_loop)
- msq = vect_setup_realignment (first_stmt_info, gsi,
+ msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
&realignment_token,
dr_explicit_realign,
dataref_ptr, NULL);
build_int_cst
(TREE_TYPE (dataref_ptr),
-(HOST_WIDE_INT) align));
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info,
+ new_stmt, gsi);
data_ref
= build2 (MEM_REF, vectype, ptr,
build_int_cst (ref_type, 0));
new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp);
gimple_move_vops (new_stmt, stmt_info->stmt);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info,
+ new_stmt, gsi);
msq = new_temp;
bump = size_binop (MULT_EXPR, vs,
TYPE_SIZE_UNIT (elem_type));
bump = size_binop (MINUS_EXPR, bump, size_one_node);
- ptr = bump_vector_ptr (dataref_ptr, NULL, gsi,
+ ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
stmt_info, bump);
new_stmt = gimple_build_assign
(NULL_TREE, BIT_AND_EXPR, ptr,
(TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
ptr = copy_ssa_name (ptr, new_stmt);
gimple_assign_set_lhs (new_stmt, ptr);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info,
+ new_stmt, gsi);
data_ref
= build2 (MEM_REF, vectype, ptr,
build_int_cst (ref_type, 0));
(new_temp, BIT_AND_EXPR, dataref_ptr,
build_int_cst (TREE_TYPE (dataref_ptr),
-(HOST_WIDE_INT) align));
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info,
+ new_stmt, gsi);
data_ref
= build2 (MEM_REF, vectype, new_temp,
build_int_cst (ref_type, 0));
}
new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_set_lhs (new_stmt, new_temp);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
/* 3. Handle explicit realignment if necessary/supported.
Create in loop:
msq, lsq, realignment_token);
new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
if (alignment_support_scheme == dr_explicit_realign_optimized)
{
if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
{
tree perm_mask = perm_mask_for_reverse (vectype);
- new_temp = permute_vec_elements (new_temp, new_temp,
+ new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
perm_mask, stmt_info, gsi);
- new_stmt_info = vinfo->lookup_def (new_temp);
+ new_stmt = SSA_NAME_DEF_STMT (new_temp);
}
/* Collect vector loads and later create their permutation in
/* Store vector loads in the corresponding SLP_NODE. */
if (slp && !slp_perm)
- SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
/* With SLP permutation we load the gaps as well, without
we need to skip the gaps after we manage to fully load
= (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
* group_gap_adj);
tree bump = wide_int_to_tree (sizetype, bump_val);
- dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
- stmt_info, bump);
+ dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
+ gsi, stmt_info, bump);
group_elt = 0;
}
}
= (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
* group_gap_adj);
tree bump = wide_int_to_tree (sizetype, bump_val);
- dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
+ dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
stmt_info, bump);
}
}
if (slp_perm)
{
unsigned n_perms;
- if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
- slp_node_instance, false,
- &n_perms))
- {
- dr_chain.release ();
- return false;
- }
+ bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
+ gsi, vf, false, &n_perms);
+ gcc_assert (ok);
}
else
{
if (grouped_load)
{
if (memory_access_type != VMAT_LOAD_STORE_LANES)
- vect_transform_grouped_load (stmt_info, dr_chain,
+ vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
group_size, gsi);
- *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
+ *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
}
else
{
- if (j == 0)
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
- else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
- prev_stmt_info = new_stmt_info;
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
}
}
dr_chain.release ();
}
+ if (!slp)
+ *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
return true;
}
condition operands are supportable using vec_is_simple_use. */
static bool
-vect_is_simple_cond (tree cond, vec_info *vinfo,
- tree *comp_vectype, enum vect_def_type *dts,
- tree vectype)
+vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
+ slp_tree slp_node, tree *comp_vectype,
+ enum vect_def_type *dts, tree vectype)
{
tree lhs, rhs;
tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
+ slp_tree slp_op;
/* Mask case. */
if (TREE_CODE (cond) == SSA_NAME
&& VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
{
- if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
+ if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
+ &slp_op, &dts[0], comp_vectype)
|| !*comp_vectype
|| !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
return false;
if (TREE_CODE (lhs) == SSA_NAME)
{
- if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
+ if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
+ &lhs, &slp_op, &dts[0], &vectype1))
return false;
}
else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
if (TREE_CODE (rhs) == SSA_NAME)
{
- if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
+ if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
+ &rhs, &slp_op, &dts[1], &vectype2))
return false;
}
else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
if (! *comp_vectype)
{
tree scalar_type = TREE_TYPE (lhs);
- /* If we can widen the comparison to match vectype do so. */
- if (INTEGRAL_TYPE_P (scalar_type)
- && vectype
- && tree_int_cst_lt (TYPE_SIZE (scalar_type),
- TYPE_SIZE (TREE_TYPE (vectype))))
- scalar_type = build_nonstandard_integer_type
- (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
- TYPE_UNSIGNED (scalar_type));
- *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
+ if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
+ *comp_vectype = truth_type_for (vectype);
+ else
+ {
+ /* If we can widen the comparison to match vectype do so. */
+ if (INTEGRAL_TYPE_P (scalar_type)
+ && !slp_node
+ && tree_int_cst_lt (TYPE_SIZE (scalar_type),
+ TYPE_SIZE (TREE_TYPE (vectype))))
+ scalar_type = build_nonstandard_integer_type
+ (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
+ *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
+ slp_node);
+ }
}
return true;
Return true if STMT_INFO is vectorizable in this way. */
static bool
-vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- stmt_vec_info *vec_stmt,
+vectorizable_condition (vec_info *vinfo,
+ stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ gimple **vec_stmt,
slp_tree slp_node, stmt_vector_for_cost *cost_vec)
{
- vec_info *vinfo = stmt_info->vinfo;
tree scalar_dest = NULL_TREE;
tree vec_dest = NULL_TREE;
tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
tree vec_compare;
tree new_temp;
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
enum vect_def_type dts[4]
= {vect_unknown_def_type, vect_unknown_def_type,
vect_unknown_def_type, vect_unknown_def_type};
int ndts = 4;
int ncopies;
+ int vec_num;
enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
- stmt_vec_info prev_stmt_info = NULL;
- int i, j;
- bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+ int i;
+ bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
vec<tree> vec_oprnds0 = vNULL;
vec<tree> vec_oprnds1 = vNULL;
vec<tree> vec_oprnds2 = vNULL;
{
if (STMT_SLP_TYPE (stmt_info))
return false;
- reduc_info = info_for_reduction (stmt_info);
+ reduc_info = info_for_reduction (vinfo, stmt_info);
reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
if (slp_node)
- ncopies = 1;
+ {
+ ncopies = 1;
+ vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ }
else
- ncopies = vect_get_num_copies (loop_vinfo, vectype);
+ {
+ ncopies = vect_get_num_copies (loop_vinfo, vectype);
+ vec_num = 1;
+ }
gcc_assert (ncopies >= 1);
if (for_reduction && ncopies > 1)
return false; /* FORNOW */
cond_expr = gimple_assign_rhs1 (stmt);
- then_clause = gimple_assign_rhs2 (stmt);
- else_clause = gimple_assign_rhs3 (stmt);
- if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
- &comp_vectype, &dts[0], slp_node ? NULL : vectype)
+ if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
+ &comp_vectype, &dts[0], vectype)
|| !comp_vectype)
return false;
- if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
+ unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
+ slp_tree then_slp_node, else_slp_node;
+ if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
+ &then_clause, &then_slp_node, &dts[2], &vectype1))
return false;
- if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
+ if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
+ &else_clause, &else_slp_node, &dts[3], &vectype2))
return false;
if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
return false;
masked = !COMPARISON_CLASS_P (cond_expr);
- vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
+ vec_cmp_type = truth_type_for (comp_vectype);
if (vec_cmp_type == NULL_TREE)
return false;
if (new_code == ERROR_MARK)
must_invert_cmp_result = true;
else
- cond_code = new_code;
+ {
+ cond_code = new_code;
+ /* Make sure we don't accidentally use the old condition. */
+ cond_expr = NULL_TREE;
+ }
}
- /* Make sure we don't accidentally use the old condition. */
- cond_expr = NULL_TREE;
std::swap (then_clause, else_clause);
}
cond_code = SSA_NAME;
}
+ if (TREE_CODE_CLASS (cond_code) == tcc_comparison
+ && reduction_type == EXTRACT_LAST_REDUCTION
+ && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "reduction comparison operation not supported.\n");
+ return false;
+ }
+
if (!vec_stmt)
{
if (bitop1 != NOP_EXPR)
return false;
}
}
- if (expand_vec_cond_expr_p (vectype, comp_vectype,
- cond_code))
+
+ vect_cost_for_stmt kind = vector_stmt;
+ if (reduction_type == EXTRACT_LAST_REDUCTION)
+ /* Count one reduction-like operation per vector. */
+ kind = vec_to_scalar;
+ else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
+ return false;
+
+ if (slp_node
+ && (!vect_maybe_update_slp_op_vectype
+ (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
+ || (op_adjust == 1
+ && !vect_maybe_update_slp_op_vectype
+ (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
+ || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
+ || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
{
- STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
- vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
- cost_vec);
- return true;
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for invariants\n");
+ return false;
}
- return false;
+
+ if (loop_vinfo
+ && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
+ && reduction_type == EXTRACT_LAST_REDUCTION)
+ vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
+ ncopies * vec_num, vectype, NULL);
+
+ STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
+ vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
+ cost_vec, kind);
+ return true;
}
/* Transform. */
if (reduction_type != EXTRACT_LAST_REDUCTION)
vec_dest = vect_create_destination_var (scalar_dest, vectype);
- /* Handle cond expr. */
- for (j = 0; j < ncopies; j++)
- {
- bool swap_cond_operands = false;
+ bool swap_cond_operands = false;
- /* See whether another part of the vectorized code applies a loop
- mask to the condition, or to its inverse. */
+ /* See whether another part of the vectorized code applies a loop
+ mask to the condition, or to its inverse. */
- vec_loop_masks *masks = NULL;
- if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ vec_loop_masks *masks = NULL;
+ if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ {
+ if (reduction_type == EXTRACT_LAST_REDUCTION)
+ masks = &LOOP_VINFO_MASKS (loop_vinfo);
+ else
{
- if (reduction_type == EXTRACT_LAST_REDUCTION)
+ scalar_cond_masked_key cond (cond_expr, ncopies);
+ if (loop_vinfo->scalar_cond_masked_set.contains (cond))
masks = &LOOP_VINFO_MASKS (loop_vinfo);
else
{
- scalar_cond_masked_key cond (cond_expr, ncopies);
+ bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
+ cond.code = invert_tree_comparison (cond.code, honor_nans);
if (loop_vinfo->scalar_cond_masked_set.contains (cond))
- masks = &LOOP_VINFO_MASKS (loop_vinfo);
- else
{
- bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
- cond.code = invert_tree_comparison (cond.code, honor_nans);
- if (loop_vinfo->scalar_cond_masked_set.contains (cond))
- {
- masks = &LOOP_VINFO_MASKS (loop_vinfo);
- cond_code = cond.code;
- swap_cond_operands = true;
- }
+ masks = &LOOP_VINFO_MASKS (loop_vinfo);
+ cond_code = cond.code;
+ swap_cond_operands = true;
}
}
}
+ }
- stmt_vec_info new_stmt_info = NULL;
- if (j == 0)
- {
- if (slp_node)
- {
- auto_vec<vec<tree>, 4> vec_defs;
- vect_get_slp_defs (slp_node, &vec_defs);
- vec_oprnds3 = vec_defs.pop ();
- vec_oprnds2 = vec_defs.pop ();
- if (!masked)
- vec_oprnds1 = vec_defs.pop ();
- vec_oprnds0 = vec_defs.pop ();
- }
- else
- {
- if (masked)
- {
- vec_cond_lhs
- = vect_get_vec_def_for_operand (cond_expr, stmt_info,
- comp_vectype);
- }
- else
- {
- vec_cond_lhs
- = vect_get_vec_def_for_operand (cond_expr0,
- stmt_info, comp_vectype);
- vec_cond_rhs
- = vect_get_vec_def_for_operand (cond_expr1,
- stmt_info, comp_vectype);
- }
- vec_then_clause = vect_get_vec_def_for_operand (then_clause,
- stmt_info);
- if (reduction_type != EXTRACT_LAST_REDUCTION)
- vec_else_clause = vect_get_vec_def_for_operand (else_clause,
- stmt_info);
- }
- }
- else
- {
- vec_cond_lhs
- = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
- if (!masked)
- vec_cond_rhs
- = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
-
- vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
- vec_oprnds2.pop ());
- vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
- vec_oprnds3.pop ());
- }
-
- if (!slp_node)
- {
- vec_oprnds0.quick_push (vec_cond_lhs);
- if (!masked)
- vec_oprnds1.quick_push (vec_cond_rhs);
- vec_oprnds2.quick_push (vec_then_clause);
- vec_oprnds3.quick_push (vec_else_clause);
- }
+ /* Handle cond expr. */
+ if (masked)
+ vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
+ cond_expr, &vec_oprnds0, comp_vectype,
+ then_clause, &vec_oprnds2, vectype,
+ reduction_type != EXTRACT_LAST_REDUCTION
+ ? else_clause : NULL, &vec_oprnds3, vectype);
+ else
+ vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
+ cond_expr0, &vec_oprnds0, comp_vectype,
+ cond_expr1, &vec_oprnds1, comp_vectype,
+ then_clause, &vec_oprnds2, vectype,
+ reduction_type != EXTRACT_LAST_REDUCTION
+ ? else_clause : NULL, &vec_oprnds3, vectype);
- /* Arguments are ready. Create the new vector stmt. */
- FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
- {
- vec_then_clause = vec_oprnds2[i];
- vec_else_clause = vec_oprnds3[i];
+ /* Arguments are ready. Create the new vector stmt. */
+ FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
+ {
+ vec_then_clause = vec_oprnds2[i];
+ if (reduction_type != EXTRACT_LAST_REDUCTION)
+ vec_else_clause = vec_oprnds3[i];
- if (swap_cond_operands)
- std::swap (vec_then_clause, vec_else_clause);
+ if (swap_cond_operands)
+ std::swap (vec_then_clause, vec_else_clause);
- if (masked)
- vec_compare = vec_cond_lhs;
+ if (masked)
+ vec_compare = vec_cond_lhs;
+ else
+ {
+ vec_cond_rhs = vec_oprnds1[i];
+ if (bitop1 == NOP_EXPR)
+ vec_compare = build2 (cond_code, vec_cmp_type,
+ vec_cond_lhs, vec_cond_rhs);
else
{
- vec_cond_rhs = vec_oprnds1[i];
- if (bitop1 == NOP_EXPR)
- vec_compare = build2 (cond_code, vec_cmp_type,
- vec_cond_lhs, vec_cond_rhs);
+ new_temp = make_ssa_name (vec_cmp_type);
+ gassign *new_stmt;
+ if (bitop1 == BIT_NOT_EXPR)
+ new_stmt = gimple_build_assign (new_temp, bitop1,
+ vec_cond_rhs);
else
+ new_stmt
+ = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
+ vec_cond_rhs);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ if (bitop2 == NOP_EXPR)
+ vec_compare = new_temp;
+ else if (bitop2 == BIT_NOT_EXPR)
{
- new_temp = make_ssa_name (vec_cmp_type);
- gassign *new_stmt;
- if (bitop1 == BIT_NOT_EXPR)
- new_stmt = gimple_build_assign (new_temp, bitop1,
- vec_cond_rhs);
- else
- new_stmt
- = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
- vec_cond_rhs);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
- if (bitop2 == NOP_EXPR)
- vec_compare = new_temp;
- else if (bitop2 == BIT_NOT_EXPR)
- {
- /* Instead of doing ~x ? y : z do x ? z : y. */
- vec_compare = new_temp;
- std::swap (vec_then_clause, vec_else_clause);
- }
- else
- {
- vec_compare = make_ssa_name (vec_cmp_type);
- new_stmt
- = gimple_build_assign (vec_compare, bitop2,
- vec_cond_lhs, new_temp);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
- }
+ /* Instead of doing ~x ? y : z do x ? z : y. */
+ vec_compare = new_temp;
+ std::swap (vec_then_clause, vec_else_clause);
+ }
+ else
+ {
+ vec_compare = make_ssa_name (vec_cmp_type);
+ new_stmt
+ = gimple_build_assign (vec_compare, bitop2,
+ vec_cond_lhs, new_temp);
+ vect_finish_stmt_generation (vinfo, stmt_info,
+ new_stmt, gsi);
}
}
+ }
- /* If we decided to apply a loop mask to the result of the vector
- comparison, AND the comparison with the mask now. Later passes
- should then be able to reuse the AND results between mulitple
- vector statements.
+ /* If we decided to apply a loop mask to the result of the vector
+ comparison, AND the comparison with the mask now. Later passes
+ should then be able to reuse the AND results between mulitple
+ vector statements.
- For example:
- for (int i = 0; i < 100; ++i)
- x[i] = y[i] ? z[i] : 10;
+ For example:
+ for (int i = 0; i < 100; ++i)
+ x[i] = y[i] ? z[i] : 10;
- results in following optimized GIMPLE:
+ results in following optimized GIMPLE:
- mask__35.8_43 = vect__4.7_41 != { 0, ... };
- vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
- _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
- vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
- vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
- vect_iftmp.11_47, { 10, ... }>;
+ mask__35.8_43 = vect__4.7_41 != { 0, ... };
+ vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
+ _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
+ vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
+ vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
+ vect_iftmp.11_47, { 10, ... }>;
- instead of using a masked and unmasked forms of
- vec != { 0, ... } (masked in the MASK_LOAD,
- unmasked in the VEC_COND_EXPR). */
+ instead of using a masked and unmasked forms of
+ vec != { 0, ... } (masked in the MASK_LOAD,
+ unmasked in the VEC_COND_EXPR). */
- /* Force vec_compare to be an SSA_NAME rather than a comparison,
- in cases where that's necessary. */
+ /* Force vec_compare to be an SSA_NAME rather than a comparison,
+ in cases where that's necessary. */
- if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
+ if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
+ {
+ if (!is_gimple_val (vec_compare))
{
- if (!is_gimple_val (vec_compare))
- {
- tree vec_compare_name = make_ssa_name (vec_cmp_type);
- gassign *new_stmt = gimple_build_assign (vec_compare_name,
- vec_compare);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
- vec_compare = vec_compare_name;
- }
-
- if (must_invert_cmp_result)
- {
- tree vec_compare_name = make_ssa_name (vec_cmp_type);
- gassign *new_stmt = gimple_build_assign (vec_compare_name,
- BIT_NOT_EXPR,
- vec_compare);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
- vec_compare = vec_compare_name;
- }
+ tree vec_compare_name = make_ssa_name (vec_cmp_type);
+ gassign *new_stmt = gimple_build_assign (vec_compare_name,
+ vec_compare);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ vec_compare = vec_compare_name;
+ }
- if (masks)
- {
- unsigned vec_num = vec_oprnds0.length ();
- tree loop_mask
- = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
- vectype, vec_num * j + i);
- tree tmp2 = make_ssa_name (vec_cmp_type);
- gassign *g
- = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
- loop_mask);
- vect_finish_stmt_generation (stmt_info, g, gsi);
- vec_compare = tmp2;
- }
+ if (must_invert_cmp_result)
+ {
+ tree vec_compare_name = make_ssa_name (vec_cmp_type);
+ gassign *new_stmt = gimple_build_assign (vec_compare_name,
+ BIT_NOT_EXPR,
+ vec_compare);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ vec_compare = vec_compare_name;
}
- if (reduction_type == EXTRACT_LAST_REDUCTION)
+ if (masks)
{
- gcall *new_stmt = gimple_build_call_internal
- (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
- vec_then_clause);
- gimple_call_set_lhs (new_stmt, scalar_dest);
- SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
- if (stmt_info->stmt == gsi_stmt (*gsi))
- new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt);
- else
- {
- /* In this case we're moving the definition to later in the
- block. That doesn't matter because the only uses of the
- lhs are in phi statements. */
- gimple_stmt_iterator old_gsi
- = gsi_for_stmt (stmt_info->stmt);
- gsi_remove (&old_gsi, true);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
- }
+ unsigned vec_num = vec_oprnds0.length ();
+ tree loop_mask
+ = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
+ vectype, i);
+ tree tmp2 = make_ssa_name (vec_cmp_type);
+ gassign *g
+ = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
+ loop_mask);
+ vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
+ vec_compare = tmp2;
}
+ }
+
+ gimple *new_stmt;
+ if (reduction_type == EXTRACT_LAST_REDUCTION)
+ {
+ gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
+ tree lhs = gimple_get_lhs (old_stmt);
+ new_stmt = gimple_build_call_internal
+ (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
+ vec_then_clause);
+ gimple_call_set_lhs (new_stmt, lhs);
+ SSA_NAME_DEF_STMT (lhs) = new_stmt;
+ if (old_stmt == gsi_stmt (*gsi))
+ vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
else
{
- new_temp = make_ssa_name (vec_dest);
- gassign *new_stmt
- = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
- vec_then_clause, vec_else_clause);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ /* In this case we're moving the definition to later in the
+ block. That doesn't matter because the only uses of the
+ lhs are in phi statements. */
+ gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
+ gsi_remove (&old_gsi, true);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
}
- if (slp_node)
- SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
- }
-
- if (slp_node)
- continue;
-
- if (j == 0)
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
- else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-
- prev_stmt_info = new_stmt_info;
+ }
+ else
+ {
+ new_temp = make_ssa_name (vec_dest);
+ new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
+ vec_then_clause, vec_else_clause);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ }
+ if (slp_node)
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+ else
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
}
+ if (!slp_node)
+ *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
+
vec_oprnds0.release ();
vec_oprnds1.release ();
vec_oprnds2.release ();
Return true if STMT_INFO is vectorizable in this way. */
static bool
-vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- stmt_vec_info *vec_stmt,
+vectorizable_comparison (vec_info *vinfo,
+ stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ gimple **vec_stmt,
slp_tree slp_node, stmt_vector_for_cost *cost_vec)
{
- vec_info *vinfo = stmt_info->vinfo;
tree lhs, rhs1, rhs2;
tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
tree new_temp;
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
int ndts = 2;
poly_uint64 nunits;
int ncopies;
enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
- stmt_vec_info prev_stmt_info = NULL;
- int i, j;
- bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+ int i;
+ bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
vec<tree> vec_oprnds0 = vNULL;
vec<tree> vec_oprnds1 = vNULL;
tree mask_type;
if (TREE_CODE_CLASS (code) != tcc_comparison)
return false;
- rhs1 = gimple_assign_rhs1 (stmt);
- rhs2 = gimple_assign_rhs2 (stmt);
-
- if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
+ slp_tree slp_rhs1, slp_rhs2;
+ if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
+ 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
return false;
- if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
+ if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
+ 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
return false;
if (vectype1 && vectype2
/* Invariant comparison. */
if (!vectype)
{
- vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
- if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
+ if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
+ vectype = mask_type;
+ else
+ vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
+ slp_node);
+ if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
return false;
}
else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
}
}
+ /* Put types on constant and invariant SLP children. */
+ if (slp_node
+ && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
+ || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for invariants\n");
+ return false;
+ }
+
STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
- vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
+ vect_model_simple_cost (vinfo, stmt_info,
+ ncopies * (1 + (bitop2 != NOP_EXPR)),
dts, ndts, slp_node, cost_vec);
return true;
}
lhs = gimple_assign_lhs (stmt);
mask = vect_create_destination_var (lhs, mask_type);
- /* Handle cmp expr. */
- for (j = 0; j < ncopies; j++)
+ vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
+ rhs1, &vec_oprnds0, vectype,
+ rhs2, &vec_oprnds1, vectype);
+ if (swap_p)
+ std::swap (vec_oprnds0, vec_oprnds1);
+
+ /* Arguments are ready. Create the new vector stmt. */
+ FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
{
- stmt_vec_info new_stmt_info = NULL;
- if (j == 0)
- {
- if (slp_node)
- {
- auto_vec<vec<tree>, 2> vec_defs;
- vect_get_slp_defs (slp_node, &vec_defs);
- vec_oprnds1 = vec_defs.pop ();
- vec_oprnds0 = vec_defs.pop ();
- if (swap_p)
- std::swap (vec_oprnds0, vec_oprnds1);
- }
- else
- {
- vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt_info,
- vectype);
- vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt_info,
- vectype);
- }
- }
- else
- {
- vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
- vec_oprnds0.pop ());
- vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
- vec_oprnds1.pop ());
- }
+ gimple *new_stmt;
+ vec_rhs2 = vec_oprnds1[i];
- if (!slp_node)
+ new_temp = make_ssa_name (mask);
+ if (bitop1 == NOP_EXPR)
{
- if (swap_p && j == 0)
- std::swap (vec_rhs1, vec_rhs2);
- vec_oprnds0.quick_push (vec_rhs1);
- vec_oprnds1.quick_push (vec_rhs2);
+ new_stmt = gimple_build_assign (new_temp, code,
+ vec_rhs1, vec_rhs2);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
}
-
- /* Arguments are ready. Create the new vector stmt. */
- FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
+ else
{
- vec_rhs2 = vec_oprnds1[i];
-
- new_temp = make_ssa_name (mask);
- if (bitop1 == NOP_EXPR)
- {
- gassign *new_stmt = gimple_build_assign (new_temp, code,
- vec_rhs1, vec_rhs2);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
- }
+ if (bitop1 == BIT_NOT_EXPR)
+ new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
else
+ new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
+ vec_rhs2);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ if (bitop2 != NOP_EXPR)
{
- gassign *new_stmt;
- if (bitop1 == BIT_NOT_EXPR)
- new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
+ tree res = make_ssa_name (mask);
+ if (bitop2 == BIT_NOT_EXPR)
+ new_stmt = gimple_build_assign (res, bitop2, new_temp);
else
- new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
- vec_rhs2);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
- if (bitop2 != NOP_EXPR)
- {
- tree res = make_ssa_name (mask);
- if (bitop2 == BIT_NOT_EXPR)
- new_stmt = gimple_build_assign (res, bitop2, new_temp);
- else
- new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
- new_temp);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
- }
+ new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
+ new_temp);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
}
- if (slp_node)
- SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
}
-
if (slp_node)
- continue;
-
- if (j == 0)
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
-
- prev_stmt_info = new_stmt_info;
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
}
+ if (!slp_node)
+ *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
+
vec_oprnds0.release ();
vec_oprnds1.release ();
GSI and VEC_STMT_P are as for vectorizable_live_operation. */
static bool
-can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+can_vectorize_live_stmts (loop_vec_info loop_vinfo,
+ stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
slp_tree slp_node, slp_instance slp_node_instance,
bool vec_stmt_p,
stmt_vector_for_cost *cost_vec)
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
{
if (STMT_VINFO_LIVE_P (slp_stmt_info)
- && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node,
+ && !vectorizable_live_operation (loop_vinfo,
+ slp_stmt_info, gsi, slp_node,
slp_node_instance, i,
vec_stmt_p, cost_vec))
return false;
}
}
else if (STMT_VINFO_LIVE_P (stmt_info)
- && !vectorizable_live_operation (stmt_info, gsi, slp_node,
- slp_node_instance, -1,
+ && !vectorizable_live_operation (loop_vinfo, stmt_info, gsi,
+ slp_node, slp_node_instance, -1,
vec_stmt_p, cost_vec))
return false;
/* Make sure the statement is vectorizable. */
opt_result
-vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
+vect_analyze_stmt (vec_info *vinfo,
+ stmt_vec_info stmt_info, bool *need_to_vectorize,
slp_tree node, slp_instance node_instance,
stmt_vector_for_cost *cost_vec)
{
- vec_info *vinfo = stmt_info->vinfo;
- bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+ bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
bool ok;
gimple_seq pattern_def_seq;
pattern_def_stmt_info->stmt);
opt_result res
- = vect_analyze_stmt (pattern_def_stmt_info,
+ = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
need_to_vectorize, node, node_instance,
cost_vec);
if (!res)
pattern_stmt_info->stmt);
opt_result res
- = vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node,
+ = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
node_instance, cost_vec);
if (!res)
return res;
/* Prefer vectorizable_call over vectorizable_simd_clone_call so
-mveclibabi= takes preference over library functions with
the simd attribute. */
- ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
- || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
+ ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
+ || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
cost_vec)
- || vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec)
- || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
- || vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec)
- || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
- cost_vec)
- || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
- || vectorizable_reduction (stmt_info, node, node_instance, cost_vec)
- || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
- || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
- || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec)
- || vectorizable_comparison (stmt_info, NULL, NULL, node,
+ || vectorizable_conversion (vinfo, stmt_info,
+ NULL, NULL, node, cost_vec)
+ || vectorizable_operation (vinfo, stmt_info,
+ NULL, NULL, node, cost_vec)
+ || vectorizable_assignment (vinfo, stmt_info,
+ NULL, NULL, node, cost_vec)
+ || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
+ || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
+ || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
+ node, node_instance, cost_vec)
+ || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
+ NULL, NULL, node, cost_vec)
+ || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
+ || vectorizable_condition (vinfo, stmt_info,
+ NULL, NULL, node, cost_vec)
+ || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
cost_vec)
- || vectorizable_lc_phi (stmt_info, NULL, node));
+ || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
+ stmt_info, NULL, node));
else
{
if (bb_vinfo)
- ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
- || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
- cost_vec)
- || vectorizable_conversion (stmt_info, NULL, NULL, node,
+ ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
+ || vectorizable_simd_clone_call (vinfo, stmt_info,
+ NULL, NULL, node, cost_vec)
+ || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
cost_vec)
- || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
- || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
- || vectorizable_assignment (stmt_info, NULL, NULL, node,
+ || vectorizable_shift (vinfo, stmt_info,
+ NULL, NULL, node, cost_vec)
+ || vectorizable_operation (vinfo, stmt_info,
+ NULL, NULL, node, cost_vec)
+ || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
cost_vec)
- || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
- cost_vec)
- || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
- || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec)
- || vectorizable_comparison (stmt_info, NULL, NULL, node,
+ || vectorizable_load (vinfo, stmt_info,
+ NULL, NULL, node, cost_vec)
+ || vectorizable_store (vinfo, stmt_info,
+ NULL, NULL, node, cost_vec)
+ || vectorizable_condition (vinfo, stmt_info,
+ NULL, NULL, node, cost_vec)
+ || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
cost_vec));
}
if (!bb_vinfo
&& STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
&& STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
- && !can_vectorize_live_stmts (stmt_info, NULL, node, node_instance,
+ && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
+ stmt_info, NULL, node, node_instance,
false, cost_vec))
return opt_result::failure_at (stmt_info->stmt,
"not vectorized:"
Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
bool
-vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+vect_transform_stmt (vec_info *vinfo,
+ stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
slp_tree slp_node, slp_instance slp_node_instance)
{
- vec_info *vinfo = stmt_info->vinfo;
bool is_store = false;
- stmt_vec_info vec_stmt = NULL;
+ gimple *vec_stmt = NULL;
bool done;
gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
- stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
-
- bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
- && nested_in_vect_loop_p
- (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
- stmt_info));
- gimple *stmt = stmt_info->stmt;
switch (STMT_VINFO_TYPE (stmt_info))
{
case type_demotion_vec_info_type:
case type_promotion_vec_info_type:
case type_conversion_vec_info_type:
- done = vectorizable_conversion (stmt_info, gsi, &vec_stmt, slp_node,
- NULL);
+ done = vectorizable_conversion (vinfo, stmt_info,
+ gsi, &vec_stmt, slp_node, NULL);
gcc_assert (done);
break;
case induc_vec_info_type:
- done = vectorizable_induction (stmt_info, gsi, &vec_stmt, slp_node,
+ done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
+ stmt_info, gsi, &vec_stmt, slp_node,
NULL);
gcc_assert (done);
break;
case shift_vec_info_type:
- done = vectorizable_shift (stmt_info, gsi, &vec_stmt, slp_node, NULL);
+ done = vectorizable_shift (vinfo, stmt_info,
+ gsi, &vec_stmt, slp_node, NULL);
gcc_assert (done);
break;
case op_vec_info_type:
- done = vectorizable_operation (stmt_info, gsi, &vec_stmt, slp_node,
+ done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
NULL);
gcc_assert (done);
break;
case assignment_vec_info_type:
- done = vectorizable_assignment (stmt_info, gsi, &vec_stmt, slp_node,
- NULL);
+ done = vectorizable_assignment (vinfo, stmt_info,
+ gsi, &vec_stmt, slp_node, NULL);
gcc_assert (done);
break;
case load_vec_info_type:
- done = vectorizable_load (stmt_info, gsi, &vec_stmt, slp_node,
- slp_node_instance, NULL);
+ done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
+ NULL);
gcc_assert (done);
break;
case store_vec_info_type:
- done = vectorizable_store (stmt_info, gsi, &vec_stmt, slp_node, NULL);
+ done = vectorizable_store (vinfo, stmt_info,
+ gsi, &vec_stmt, slp_node, NULL);
gcc_assert (done);
if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
{
break;
case condition_vec_info_type:
- done = vectorizable_condition (stmt_info, gsi, &vec_stmt, slp_node, NULL);
+ done = vectorizable_condition (vinfo, stmt_info,
+ gsi, &vec_stmt, slp_node, NULL);
gcc_assert (done);
break;
case comparison_vec_info_type:
- done = vectorizable_comparison (stmt_info, gsi, &vec_stmt,
+ done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
slp_node, NULL);
gcc_assert (done);
break;
case call_vec_info_type:
- done = vectorizable_call (stmt_info, gsi, &vec_stmt, slp_node, NULL);
- stmt = gsi_stmt (*gsi);
+ done = vectorizable_call (vinfo, stmt_info,
+ gsi, &vec_stmt, slp_node, NULL);
break;
case call_simd_clone_vec_info_type:
- done = vectorizable_simd_clone_call (stmt_info, gsi, &vec_stmt,
+ done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
slp_node, NULL);
- stmt = gsi_stmt (*gsi);
break;
case reduc_vec_info_type:
- done = vect_transform_reduction (stmt_info, gsi, &vec_stmt, slp_node);
+ done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
+ gsi, &vec_stmt, slp_node);
gcc_assert (done);
break;
case cycle_phi_info_type:
- done = vect_transform_cycle_phi (stmt_info, &vec_stmt, slp_node,
- slp_node_instance);
+ done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
+ &vec_stmt, slp_node, slp_node_instance);
gcc_assert (done);
break;
case lc_phi_info_type:
- done = vectorizable_lc_phi (stmt_info, &vec_stmt, slp_node);
+ done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
+ stmt_info, &vec_stmt, slp_node);
gcc_assert (done);
break;
"stmt not supported.\n");
gcc_unreachable ();
}
+ done = true;
}
- /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
- This would break hybrid SLP vectorization. */
- if (slp_node)
- gcc_assert (!vec_stmt
- && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
-
- /* Handle inner-loop stmts whose DEF is used in the loop-nest that
- is being vectorized, but outside the immediately enclosing loop. */
- if (vec_stmt
- && nested_p
- && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
- && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
- || STMT_VINFO_RELEVANT (stmt_info) ==
- vect_used_in_outer_by_reduction))
- {
- class loop *innerloop = LOOP_VINFO_LOOP (
- STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
- imm_use_iterator imm_iter;
- use_operand_p use_p;
- tree scalar_dest;
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Record the vdef for outer-loop vectorization.\n");
-
- /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
- (to be used when vectorizing outer-loop stmts that use the DEF of
- STMT). */
- if (gimple_code (stmt) == GIMPLE_PHI)
- scalar_dest = PHI_RESULT (stmt);
- else
- scalar_dest = gimple_get_lhs (stmt);
-
- FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
- if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
- {
- stmt_vec_info exit_phi_info
- = vinfo->lookup_stmt (USE_STMT (use_p));
- STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
- }
- }
-
- if (vec_stmt)
- STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
+ if (!slp_node && vec_stmt)
+ gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
return is_store;
stmt_vec_info reduc_info;
if (STMT_VINFO_REDUC_DEF (orig_stmt_info)
&& vect_stmt_to_vectorize (orig_stmt_info) == stmt_info
- && (reduc_info = info_for_reduction (orig_stmt_info))
+ && (reduc_info = info_for_reduction (vinfo, orig_stmt_info))
&& STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION
&& STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
{
&& (PHI_ARG_DEF_FROM_EDGE (phi, e)
== gimple_get_lhs (orig_stmt_info->stmt)))
{
- stmt_vec_info phi_info
- = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info));
- stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
- do
- {
- add_phi_arg (as_a <gphi *> (phi_info->stmt),
- gimple_get_lhs (vec_stmt->stmt), e,
- gimple_phi_arg_location (phi, e->dest_idx));
- phi_info = STMT_VINFO_RELATED_STMT (phi_info);
- vec_stmt = STMT_VINFO_RELATED_STMT (vec_stmt);
- }
- while (phi_info);
- gcc_assert (!vec_stmt);
+ vec<gimple *> &phi_info
+ = STMT_VINFO_VEC_STMTS (STMT_VINFO_REDUC_DEF (orig_stmt_info));
+ vec<gimple *> &vec_stmt
+ = STMT_VINFO_VEC_STMTS (stmt_info);
+ gcc_assert (phi_info.length () == vec_stmt.length ());
+ for (unsigned i = 0; i < phi_info.length (); ++i)
+ add_phi_arg (as_a <gphi *> (phi_info[i]),
+ gimple_get_lhs (vec_stmt[i]), e,
+ gimple_phi_arg_location (phi, e->dest_idx));
}
else if (slp_node
&& slp_node != slp_node_instance->reduc_phis)
gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
== SLP_TREE_VEC_STMTS (slp_node).length ());
for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i)
- add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]->stmt),
- gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[i]->stmt),
+ add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]),
+ vect_get_slp_vect_def (slp_node, i),
e, gimple_phi_arg_location (phi, e->dest_idx));
}
}
/* Handle stmts whose DEF is used outside the loop-nest that is
being vectorized. */
- done = can_vectorize_live_stmts (stmt_info, gsi, slp_node,
- slp_node_instance, true, NULL);
+ if (is_a <loop_vec_info> (vinfo))
+ done = can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
+ stmt_info, gsi, slp_node,
+ slp_node_instance, true, NULL);
gcc_assert (done);
return false;
stmt_vec_info. */
void
-vect_remove_stores (stmt_vec_info first_stmt_info)
+vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
{
- vec_info *vinfo = first_stmt_info->vinfo;
stmt_vec_info next_stmt_info = first_stmt_info;
while (next_stmt_info)
}
}
-/* Function get_vectype_for_scalar_type_and_size.
+/* If NUNITS is nonzero, return a vector type that contains NUNITS
+ elements of type SCALAR_TYPE, or null if the target doesn't support
+ such a type.
+
+ If NUNITS is zero, return a vector type that contains elements of
+ type SCALAR_TYPE, choosing whichever vector size the target prefers.
- Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
- by the target. */
+ If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
+ for this vectorization region and want to "autodetect" the best choice.
+ Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
+ and we want the new type to be interoperable with it. PREVAILING_MODE
+ in this case can be a scalar integer mode or a vector mode; when it
+ is a vector mode, the function acts like a tree-level version of
+ related_vector_mode. */
tree
-get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
+get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
+ tree scalar_type, poly_uint64 nunits)
{
tree orig_scalar_type = scalar_type;
scalar_mode inner_mode;
machine_mode simd_mode;
- poly_uint64 nunits;
tree vectype;
if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
if (scalar_type == NULL_TREE)
return NULL_TREE;
- /* If no size was supplied use the mode the target prefers. Otherwise
- lookup a vector mode of the specified size. */
- if (known_eq (size, 0U))
- simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
- else if (!multiple_p (size, nbytes, &nunits)
- || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
- return NULL_TREE;
- /* NOTE: nunits == 1 is allowed to support single element vector types. */
- if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
- return NULL_TREE;
+ /* If no prevailing mode was supplied, use the mode the target prefers.
+ Otherwise lookup a vector mode based on the prevailing mode. */
+ if (prevailing_mode == VOIDmode)
+ {
+ gcc_assert (known_eq (nunits, 0U));
+ simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
+ if (SCALAR_INT_MODE_P (simd_mode))
+ {
+ /* Traditional behavior is not to take the integer mode
+ literally, but simply to use it as a way of determining
+ the vector size. It is up to mode_for_vector to decide
+ what the TYPE_MODE should be.
- vectype = build_vector_type (scalar_type, nunits);
+ Note that nunits == 1 is allowed in order to support single
+ element vector types. */
+ if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
+ || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
+ return NULL_TREE;
+ }
+ }
+ else if (SCALAR_INT_MODE_P (prevailing_mode)
+ || !related_vector_mode (prevailing_mode,
+ inner_mode, nunits).exists (&simd_mode))
+ {
+ /* Fall back to using mode_for_vector, mostly in the hope of being
+ able to use an integer mode. */
+ if (known_eq (nunits, 0U)
+ && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
+ return NULL_TREE;
+
+ if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
+ return NULL_TREE;
+ }
+ vectype = build_vector_type_for_mode (scalar_type, simd_mode);
+
+ /* In cases where the mode was chosen by mode_for_vector, check that
+ the target actually supports the chosen mode, or that it at least
+ allows the vector mode to be replaced by a like-sized integer. */
if (!VECTOR_MODE_P (TYPE_MODE (vectype))
&& !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
return NULL_TREE;
/* Function get_vectype_for_scalar_type.
Returns the vector type corresponding to SCALAR_TYPE as supported
- by the target. */
+ by the target. If GROUP_SIZE is nonzero and we're performing BB
+ vectorization, make sure that the number of elements in the vector
+ is no bigger than GROUP_SIZE. */
tree
-get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type)
+get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
+ unsigned int group_size)
{
- tree vectype;
- vectype = get_vectype_for_scalar_type_and_size (scalar_type,
- vinfo->vector_size);
+ /* For BB vectorization, we should always have a group size once we've
+ constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
+ are tentative requests during things like early data reference
+ analysis and pattern recognition. */
+ if (is_a <bb_vec_info> (vinfo))
+ gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
+ else
+ group_size = 0;
+
+ tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
+ scalar_type);
+ if (vectype && vinfo->vector_mode == VOIDmode)
+ vinfo->vector_mode = TYPE_MODE (vectype);
+
+ /* Register the natural choice of vector type, before the group size
+ has been applied. */
+ if (vectype)
+ vinfo->used_vector_modes.add (TYPE_MODE (vectype));
+
+ /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
+ try again with an explicit number of elements. */
if (vectype
- && known_eq (vinfo->vector_size, 0U))
- vinfo->vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
+ && group_size
+ && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
+ {
+ /* Start with the biggest number of units that fits within
+ GROUP_SIZE and halve it until we find a valid vector type.
+ Usually either the first attempt will succeed or all will
+ fail (in the latter case because GROUP_SIZE is too small
+ for the target), but it's possible that a target could have
+ a hole between supported vector types.
+
+ If GROUP_SIZE is not a power of 2, this has the effect of
+ trying the largest power of 2 that fits within the group,
+ even though the group is not a multiple of that vector size.
+ The BB vectorizer will then try to carve up the group into
+ smaller pieces. */
+ unsigned int nunits = 1 << floor_log2 (group_size);
+ do
+ {
+ vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
+ scalar_type, nunits);
+ nunits /= 2;
+ }
+ while (nunits > 1 && !vectype);
+ }
+
return vectype;
}
+/* Return the vector type corresponding to SCALAR_TYPE as supported
+ by the target. NODE, if nonnull, is the SLP tree node that will
+ use the returned vector type. */
+
+tree
+get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
+{
+ unsigned int group_size = 0;
+ if (node)
+ group_size = SLP_TREE_LANES (node);
+ return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
+}
+
/* Function get_mask_type_for_scalar_type.
Returns the mask type corresponding to a result of comparison
- of vectors of specified SCALAR_TYPE as supported by target. */
+ of vectors of specified SCALAR_TYPE as supported by target.
+ If GROUP_SIZE is nonzero and we're performing BB vectorization,
+ make sure that the number of elements in the vector is no bigger
+ than GROUP_SIZE. */
tree
-get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type)
+get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
+ unsigned int group_size)
{
- tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
+ tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
if (!vectype)
return NULL;
- return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
- vinfo->vector_size);
+ return truth_type_for (vectype);
}
/* Function get_same_sized_vectype
get_same_sized_vectype (tree scalar_type, tree vector_type)
{
if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
- return build_same_sized_truth_vector_type (vector_type);
+ return truth_type_for (vector_type);
+
+ poly_uint64 nunits;
+ if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
+ GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
+ return NULL_TREE;
- return get_vectype_for_scalar_type_and_size
- (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
+ return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
+ scalar_type, nunits);
+}
+
+/* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
+ would not change the chosen vector modes. */
+
+bool
+vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
+{
+ for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
+ i != vinfo->used_vector_modes.end (); ++i)
+ if (!VECTOR_MODE_P (*i)
+ || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
+ return false;
+ return true;
}
/* Function vect_is_simple_use.
return true;
}
+/* Function vect_is_simple_use.
+
+ Same as vect_is_simple_use but determines the operand by operand
+ position OPERAND from either STMT or SLP_NODE, filling in *OP
+ and *SLP_DEF (when SLP_NODE is not NULL). */
+
+bool
+vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
+ unsigned operand, tree *op, slp_tree *slp_def,
+ enum vect_def_type *dt,
+ tree *vectype, stmt_vec_info *def_stmt_info_out)
+{
+ if (slp_node)
+ {
+ slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
+ *slp_def = child;
+ if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
+ *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
+ else
+ {
+ if (def_stmt_info_out)
+ *def_stmt_info_out = NULL;
+ *op = SLP_TREE_SCALAR_OPS (child)[0];
+ *dt = SLP_TREE_DEF_TYPE (child);
+ *vectype = SLP_TREE_VECTYPE (child);
+ return true;
+ }
+ }
+ else
+ {
+ if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
+ {
+ if (gimple_assign_rhs_code (ass) == COND_EXPR
+ && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
+ {
+ if (operand < 2)
+ *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
+ else
+ *op = gimple_op (ass, operand);
+ }
+ else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
+ *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
+ else
+ *op = gimple_op (ass, operand + 1);
+ }
+ else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
+ {
+ if (gimple_call_internal_p (call)
+ && internal_store_fn_p (gimple_call_internal_fn (call)))
+ operand = internal_fn_stored_value_index (gimple_call_internal_fn
+ (call));
+ *op = gimple_call_arg (call, operand);
+ }
+ else
+ gcc_unreachable ();
+ }
+
+ /* ??? We might want to update *vectype from *slp_def here though
+ when sharing nodes this would prevent unsharing in the caller. */
+ return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
+}
+
+/* If OP is not NULL and is external or constant update its vector
+ type with VECTYPE. Returns true if successful or false if not,
+ for example when conflicting vector types are present. */
+
+bool
+vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
+{
+ if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
+ return true;
+ if (SLP_TREE_VECTYPE (op))
+ return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
+ SLP_TREE_VECTYPE (op) = vectype;
+ return true;
+}
/* Function supportable_widening_operation
widening operation (short in the above example). */
bool
-supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
+supportable_widening_operation (vec_info *vinfo,
+ enum tree_code code, stmt_vec_info stmt_info,
tree vectype_out, tree vectype_in,
enum tree_code *code1, enum tree_code *code2,
int *multi_step_cvt,
vec<tree> *interm_types)
{
- vec_info *vinfo = stmt_info->vinfo;
- loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
+ loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
class loop *vect_loop = NULL;
machine_mode vec_mode;
enum insn_code icode1, icode2;
if (vect_loop
&& STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
&& !nested_in_vect_loop_p (vect_loop, stmt_info)
- && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
+ && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
stmt_info, vectype_out,
vectype_in, code1, code2,
multi_step_cvt, interm_types))
{
intermediate_mode = insn_data[icode1].operand[0].mode;
if (VECTOR_BOOLEAN_TYPE_P (prev_type))
- {
- intermediate_type = vect_halve_mask_nunits (vinfo, prev_type);
- if (intermediate_mode != TYPE_MODE (intermediate_type))
- return false;
- }
+ intermediate_type
+ = vect_halve_mask_nunits (prev_type, intermediate_mode);
else
intermediate_type
= lang_hooks.types.type_for_mode (intermediate_mode,
narrowing operation (short in the above example). */
bool
-supportable_narrowing_operation (vec_info *vinfo, enum tree_code code,
+supportable_narrowing_operation (enum tree_code code,
tree vectype_out, tree vectype_in,
enum tree_code *code1, int *multi_step_cvt,
vec<tree> *interm_types)
{
intermediate_mode = insn_data[icode1].operand[0].mode;
if (VECTOR_BOOLEAN_TYPE_P (prev_type))
- {
- intermediate_type = vect_double_mask_nunits (vinfo, prev_type);
- if (intermediate_mode != TYPE_MODE (intermediate_type))
- return false;
- }
+ intermediate_type
+ = vect_double_mask_nunits (prev_type, intermediate_mode);
else
intermediate_type
= lang_hooks.types.type_for_mode (intermediate_mode, uns);
/* Try to compute the vector types required to vectorize STMT_INFO,
returning true on success and false if vectorization isn't possible.
+ If GROUP_SIZE is nonzero and we're performing BB vectorization,
+ take sure that the number of elements in the vectors is no bigger
+ than GROUP_SIZE.
On success:
- Set *STMT_VECTYPE_OUT to:
- NULL_TREE if the statement doesn't need to be vectorized;
- - boolean_type_node if the statement is a boolean operation whose
- vector type can only be determined once all the other vector types
- are known; and
- the equivalent of STMT_VINFO_VECTYPE otherwise.
- Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
statement does not help to determine the overall number of units. */
opt_result
-vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
+vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
tree *stmt_vectype_out,
- tree *nunits_vectype_out)
+ tree *nunits_vectype_out,
+ unsigned int group_size)
{
- vec_info *vinfo = stmt_info->vinfo;
gimple *stmt = stmt_info->stmt;
+ /* For BB vectorization, we should always have a group size once we've
+ constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
+ are tentative requests during things like early data reference
+ analysis and pattern recognition. */
+ if (is_a <bb_vec_info> (vinfo))
+ gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
+ else
+ group_size = 0;
+
*stmt_vectype_out = NULL_TREE;
*nunits_vectype_out = NULL_TREE;
tree vectype;
tree scalar_type = NULL_TREE;
- if (STMT_VINFO_VECTYPE (stmt_info))
- *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
+ if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
+ {
+ vectype = STMT_VINFO_VECTYPE (stmt_info);
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "precomputed vectype: %T\n", vectype);
+ }
+ else if (vect_use_mask_type_p (stmt_info))
+ {
+ unsigned int precision = stmt_info->mask_precision;
+ scalar_type = build_nonstandard_integer_type (precision, 1);
+ vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
+ if (!vectype)
+ return opt_result::failure_at (stmt, "not vectorized: unsupported"
+ " data-type %T\n", scalar_type);
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
+ }
else
{
- gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
- if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
+ if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
+ scalar_type = TREE_TYPE (DR_REF (dr));
+ else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
else
scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
- /* Pure bool ops don't participate in number-of-units computation.
- For comparisons use the types being compared. */
- if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
- && is_gimple_assign (stmt)
- && gimple_assign_rhs_code (stmt) != COND_EXPR)
+ if (dump_enabled_p ())
{
- *stmt_vectype_out = boolean_type_node;
-
- tree rhs1 = gimple_assign_rhs1 (stmt);
- if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
- && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
- scalar_type = TREE_TYPE (rhs1);
+ if (group_size)
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "get vectype for scalar type (group size %d):"
+ " %T\n", group_size, scalar_type);
else
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "pure bool operation.\n");
- return opt_result::success ();
- }
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "get vectype for scalar type: %T\n", scalar_type);
}
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "get vectype for scalar type: %T\n", scalar_type);
- vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
+ vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
if (!vectype)
return opt_result::failure_at (stmt,
"not vectorized:"
" unsupported data-type %T\n",
scalar_type);
- if (!*stmt_vectype_out)
- *stmt_vectype_out = vectype;
-
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
}
+ *stmt_vectype_out = vectype;
/* Don't try to compute scalar types if the stmt produces a boolean
vector; use the existing vector type instead. */
- tree nunits_vectype;
- if (VECTOR_BOOLEAN_TYPE_P (vectype))
- nunits_vectype = vectype;
- else
+ tree nunits_vectype = vectype;
+ if (!VECTOR_BOOLEAN_TYPE_P (vectype))
{
/* The number of units is set according to the smallest scalar
type (or the largest vector size, but we only support one
vector size per vectorization). */
- if (*stmt_vectype_out != boolean_type_node)
+ HOST_WIDE_INT dummy;
+ scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
+ if (scalar_type != TREE_TYPE (vectype))
{
- HOST_WIDE_INT dummy;
- scalar_type = vect_get_smallest_scalar_type (stmt_info,
- &dummy, &dummy);
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "get vectype for smallest scalar type: %T\n",
+ scalar_type);
+ nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
+ group_size);
+ if (!nunits_vectype)
+ return opt_result::failure_at
+ (stmt, "not vectorized: unsupported data-type %T\n",
+ scalar_type);
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
+ nunits_vectype);
}
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "get vectype for scalar type: %T\n", scalar_type);
- nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
}
- if (!nunits_vectype)
- return opt_result::failure_at (stmt,
- "not vectorized: unsupported data-type %T\n",
- scalar_type);
- if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
- GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
- return opt_result::failure_at (stmt,
- "not vectorized: different sized vector "
- "types in statement, %T and %T\n",
- vectype, nunits_vectype);
+ gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
+ TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)));
if (dump_enabled_p ())
{
- dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
- nunits_vectype);
-
dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
dump_printf (MSG_NOTE, "\n");
*nunits_vectype_out = nunits_vectype;
return opt_result::success ();
}
-
-/* Try to determine the correct vector type for STMT_INFO, which is a
- statement that produces a scalar boolean result. Return the vector
- type on success, otherwise return NULL_TREE. */
-
-opt_tree
-vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
-{
- vec_info *vinfo = stmt_info->vinfo;
- gimple *stmt = stmt_info->stmt;
- tree mask_type = NULL;
- tree vectype, scalar_type;
-
- if (is_gimple_assign (stmt)
- && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
- && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
- {
- scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
- mask_type = get_mask_type_for_scalar_type (vinfo, scalar_type);
-
- if (!mask_type)
- return opt_tree::failure_at (stmt,
- "not vectorized: unsupported mask\n");
- }
- else
- {
- tree rhs;
- ssa_op_iter iter;
- enum vect_def_type dt;
-
- FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
- {
- if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
- return opt_tree::failure_at (stmt,
- "not vectorized:can't compute mask"
- " type for statement, %G", stmt);
-
- /* No vectype probably means external definition.
- Allow it in case there is another operand which
- allows to determine mask type. */
- if (!vectype)
- continue;
-
- if (!mask_type)
- mask_type = vectype;
- else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
- TYPE_VECTOR_SUBPARTS (vectype)))
- return opt_tree::failure_at (stmt,
- "not vectorized: different sized mask"
- " types in statement, %T and %T\n",
- mask_type, vectype);
- else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
- != VECTOR_BOOLEAN_TYPE_P (vectype))
- return opt_tree::failure_at (stmt,
- "not vectorized: mixed mask and "
- "nonmask vector types in statement, "
- "%T and %T\n",
- mask_type, vectype);
- }
-
- /* We may compare boolean value loaded as vector of integers.
- Fix mask_type in such case. */
- if (mask_type
- && !VECTOR_BOOLEAN_TYPE_P (mask_type)
- && gimple_code (stmt) == GIMPLE_ASSIGN
- && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
- mask_type = build_same_sized_truth_vector_type (mask_type);
- }
-
- /* No mask_type should mean loop invariant predicate.
- This is probably a subject for optimization in if-conversion. */
- if (!mask_type)
- return opt_tree::failure_at (stmt,
- "not vectorized: can't compute mask type "
- "for statement: %G", stmt);
-
- return opt_tree::success (mask_type);
-}