+2018-01-03 Richard Sandiford <richard.sandiford@linaro.org>
+ Alan Hayward <alan.hayward@arm.com>
+ David Sherwood <david.sherwood@arm.com>
+
+ * tree.h (build_index_vector): Declare.
+ * tree.c (build_index_vector): New function.
+ * tree-vect-loop.c (get_initial_defs_for_reduction): Treat the number
+ of units as polynomial, forcibly converting it to a constant if
+ vectorizable_reduction has already enforced the condition.
+ (vect_create_epilog_for_reduction): Likewise. Use build_index_vector
+ to create a {1,2,3,...} vector.
+ (vectorizable_reduction): Treat the number of units as polynomial.
+ Choose vectype_in based on the largest scalar element size rather
+ than the smallest number of units. Enforce the restrictions
+ relied on above.
+
2018-01-03 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
vector_type = STMT_VINFO_VECTYPE (stmt_vinfo);
scalar_type = TREE_TYPE (vector_type);
+ /* vectorizable_reduction has already rejected SLP reductions on
+ variable-length vectors. */
nunits = TYPE_VECTOR_SUBPARTS (vector_type);
gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def);
if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
{
tree indx_before_incr, indx_after_incr;
- int nunits_out = TYPE_VECTOR_SUBPARTS (vectype);
- int k;
+ poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype);
gimple *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR);
vector size (STEP). */
/* Create a {1,2,3,...} vector. */
- tree_vector_builder vtemp (cr_index_vector_type, 1, 3);
- for (k = 0; k < 3; ++k)
- vtemp.quick_push (build_int_cst (cr_index_scalar_type, k + 1));
- tree series_vect = vtemp.build ();
+ tree series_vect = build_index_vector (cr_index_vector_type, 1, 1);
/* Create a vector of the step value. */
tree step = build_int_cst (cr_index_scalar_type, nunits_out);
tree data_eltype = TREE_TYPE (TREE_TYPE (new_phi_result));
tree idx_eltype = TREE_TYPE (TREE_TYPE (induction_index));
unsigned HOST_WIDE_INT el_size = tree_to_uhwi (TYPE_SIZE (idx_eltype));
- unsigned HOST_WIDE_INT v_size
- = el_size * TYPE_VECTOR_SUBPARTS (TREE_TYPE (induction_index));
+ poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE (induction_index));
+ /* Enforced by vectorizable_reduction, which ensures we have target
+ support before allowing a conditional reduction on variable-length
+ vectors. */
+ unsigned HOST_WIDE_INT v_size = el_size * nunits.to_constant ();
tree idx_val = NULL_TREE, val = NULL_TREE;
for (unsigned HOST_WIDE_INT off = 0; off < v_size; off += el_size)
{
{
bool reduce_with_shift = have_whole_vector_shift (mode);
int element_bitsize = tree_to_uhwi (bitsize);
+ /* Enforced by vectorizable_reduction, which disallows SLP reductions
+ for variable-length vectors and also requires direct target support
+ for loop reductions. */
int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
tree vec_temp;
if (k == 1
&& gimple_assign_rhs_code (reduc_stmt) == COND_EXPR)
continue;
- tem = get_vectype_for_scalar_type (TREE_TYPE (op));
- if (! vectype_in
- || TYPE_VECTOR_SUBPARTS (tem) < TYPE_VECTOR_SUBPARTS (vectype_in))
- vectype_in = tem;
+ if (!vectype_in
+ || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in)))
+ < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (op)))))
+ vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op));
break;
}
gcc_assert (vectype_in);
/* To properly compute ncopies we are interested in the widest
input type in case we're looking at a widening accumulation. */
if (!vectype_in
- || TYPE_VECTOR_SUBPARTS (vectype_in) > TYPE_VECTOR_SUBPARTS (tem))
+ || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in)))
+ < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (tem)))))
vectype_in = tem;
}
gcc_assert (ncopies >= 1);
vec_mode = TYPE_MODE (vectype_in);
+ poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
if (code == COND_EXPR)
{
int scalar_precision
= GET_MODE_PRECISION (SCALAR_TYPE_MODE (scalar_type));
cr_index_scalar_type = make_unsigned_type (scalar_precision);
- cr_index_vector_type = build_vector_type
- (cr_index_scalar_type, TYPE_VECTOR_SUBPARTS (vectype_out));
+ cr_index_vector_type = build_vector_type (cr_index_scalar_type,
+ nunits_out);
if (direct_internal_fn_supported_p (IFN_REDUC_MAX, cr_index_vector_type,
OPTIMIZE_FOR_SPEED))
reduc_fn = IFN_REDUC_MAX;
}
+ if (reduc_fn == IFN_LAST && !nunits_out.is_constant ())
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "missing target support for reduction on"
+ " variable-length vectors.\n");
+ return false;
+ }
+
if ((double_reduc
|| STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) != TREE_CODE_REDUCTION)
&& ncopies > 1)
return false;
}
+ if (double_reduc && !nunits_out.is_constant ())
+ {
+ /* The current double-reduction code creates the initial value
+ element-by-element. */
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "double reduction not supported for variable-length"
+ " vectors.\n");
+ return false;
+ }
+
+ if (slp_node && !nunits_out.is_constant ())
+ {
+ /* The current SLP code creates the initial value element-by-element. */
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "SLP reduction not supported for variable-length"
+ " vectors.\n");
+ return false;
+ }
+
/* In case of widenning multiplication by a constant, we update the type
of the constant to be the type of the other operand. We check that the
constant fits the type in the pattern recognition pass. */
return build2 (VEC_SERIES_EXPR, type, base, step);
}
+/* Return a vector with the same number of units and number of bits
+ as VEC_TYPE, but in which the elements are a linear series of unsigned
+ integers { BASE, BASE + STEP, BASE + STEP * 2, ... }. */
+
+tree
+build_index_vector (tree vec_type, poly_uint64 base, poly_uint64 step)
+{
+ tree index_vec_type = vec_type;
+ tree index_elt_type = TREE_TYPE (vec_type);
+ poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vec_type);
+ if (!INTEGRAL_TYPE_P (index_elt_type) || !TYPE_UNSIGNED (index_elt_type))
+ {
+ index_elt_type = build_nonstandard_integer_type
+ (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (index_elt_type)), true);
+ index_vec_type = build_vector_type (index_elt_type, nunits);
+ }
+
+ tree_vector_builder v (index_vec_type, 1, 3);
+ for (unsigned int i = 0; i < 3; ++i)
+ v.quick_push (build_int_cstu (index_elt_type, base + i * step));
+ return v.build ();
+}
+
/* Something has messed with the elements of CONSTRUCTOR C after it was built;
calculate TREE_CONSTANT and TREE_SIDE_EFFECTS. */