unsigned nstores = nunits;
unsigned lnel = 1;
tree ltype = elem_type;
+ tree lvectype = vectype;
if (slp)
{
if (group_size < nunits
nstores = nunits / group_size;
lnel = group_size;
ltype = build_vector_type (elem_type, group_size);
+ lvectype = vectype;
+
+ /* First check if vec_extract optab doesn't support extraction
+ of vector elts directly. */
+ machine_mode elmode = TYPE_MODE (elem_type);
+ machine_mode vmode = mode_for_vector (elmode, group_size);
+ if (! VECTOR_MODE_P (vmode)
+ || (convert_optab_handler (vec_extract_optab,
+ TYPE_MODE (vectype), vmode)
+ == CODE_FOR_nothing))
+ {
+ /* Try to avoid emitting an extract of vector elements
+ by performing the extracts using an integer type of the
+ same size, extracting from a vector of those and then
+ re-interpreting it as the original vector type if
+ supported. */
+ unsigned lsize
+ = group_size * GET_MODE_BITSIZE (elmode);
+ elmode = mode_for_size (lsize, MODE_INT, 0);
+ vmode = mode_for_vector (elmode, nunits / group_size);
+ /* If we can't construct such a vector fall back to
+ element extracts from the original vector type and
+ element size stores. */
+ if (VECTOR_MODE_P (vmode)
+ && (convert_optab_handler (vec_extract_optab,
+ vmode, elmode)
+ != CODE_FOR_nothing))
+ {
+ nstores = nunits / group_size;
+ lnel = group_size;
+ ltype = build_nonstandard_integer_type (lsize, 1);
+ lvectype = build_vector_type (ltype, nstores);
+ }
+ /* Else fall back to vector extraction anyway.
+ Fewer stores are more important than avoiding spilling
+ of the vector we extract from. Compared to the
+ construction case in vectorizable_load no store-forwarding
+ issue exists here for reasonable archs. */
+ }
}
else if (group_size >= nunits
&& group_size % nunits == 0)
nstores = 1;
lnel = nunits;
ltype = vectype;
+ lvectype = vectype;
}
ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
}
}
-
+ /* Pun the vector to extract from if necessary. */
+ if (lvectype != vectype)
+ {
+ tree tem = make_ssa_name (lvectype);
+ gimple *pun
+ = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
+ lvectype, vec_oprnd));
+ vect_finish_stmt_generation (stmt, pun, gsi);
+ vec_oprnd = tem;
+ }
for (i = 0; i < nstores; i++)
{
tree newref, newoff;