+2018-01-02 Richard Sandiford <richard.sandiford@linaro.org>
+
+ * optabs-query.h (can_vec_perm_p): Delete.
+ (can_vec_perm_var_p, can_vec_perm_const_p): Declare.
+ * optabs-query.c (can_vec_perm_p): Split into...
+ (can_vec_perm_var_p, can_vec_perm_const_p): ...these two functions.
+ (can_mult_highpart_p): Use can_vec_perm_const_p to test whether a
+ particular selector is valid.
+ * tree-ssa-forwprop.c (simplify_vector_constructor): Likewise.
+ * tree-vect-data-refs.c (vect_grouped_store_supported): Likewise.
+ (vect_grouped_load_supported): Likewise.
+ (vect_shift_permute_load_chain): Likewise.
+ * tree-vect-slp.c (vect_build_slp_tree_1): Likewise.
+ (vect_transform_slp_perm_load): Likewise.
+ * tree-vect-stmts.c (perm_mask_for_reverse): Likewise.
+ (vectorizable_bswap): Likewise.
+ (vect_gen_perm_mask_checked): Likewise.
+ * fold-const.c (fold_ternary_loc): Likewise. Don't take
+ implementations of variable permutation vectors into account
+ when deciding which selector to use.
+ * tree-vect-loop.c (have_whole_vector_shift): Don't check whether
+ vec_perm_const_optab is supported; instead use can_vec_perm_const_p
+ with a false third argument.
+ * tree-vect-generic.c (lower_vec_perm): Use can_vec_perm_const_p
+ to test whether the constant selector is valid and can_vec_perm_var_p
+ to test whether a variable selector is valid.
+
2018-01-02 Richard Sandiford <richard.sandiford@linaro.org>
* optabs-query.h (can_vec_perm_p): Take a const vec_perm_indices *.
argument permutation while still allowing an equivalent
2-argument version. */
if (need_mask_canon && arg2 == op2
- && !can_vec_perm_p (TYPE_MODE (type), false, &sel)
- && can_vec_perm_p (TYPE_MODE (type), false, &sel2))
+ && !can_vec_perm_const_p (TYPE_MODE (type), sel, false)
+ && can_vec_perm_const_p (TYPE_MODE (type), sel2, false))
{
need_mask_canon = need_mask_canon2;
sel = sel2;
return opt_machine_mode ();
}
-/* Return true if VEC_PERM_EXPR of arbitrary input vectors can be
- expanded using SIMD extensions of the CPU. SEL may be NULL, which
- stands for an unknown constant. Note that additional permutations
- representing whole-vector shifts may also be handled via the vec_shr
- optab, but only where the second input vector is entirely constant
- zeroes; this case is not dealt with here. */
+/* Return true if VEC_PERM_EXPRs with variable selector operands can be
+ expanded using SIMD extensions of the CPU. MODE is the mode of the
+ vectors being permuted. */
bool
-can_vec_perm_p (machine_mode mode, bool variable, const vec_perm_indices *sel)
+can_vec_perm_var_p (machine_mode mode)
{
- machine_mode qimode;
-
/* If the target doesn't implement a vector mode for the vector type,
then no operations are supported. */
if (!VECTOR_MODE_P (mode))
return false;
- if (!variable)
- {
- if (direct_optab_handler (vec_perm_const_optab, mode) != CODE_FOR_nothing
- && (sel == NULL
- || targetm.vectorize.vec_perm_const_ok == NULL
- || targetm.vectorize.vec_perm_const_ok (mode, *sel)))
- return true;
- }
-
if (direct_optab_handler (vec_perm_optab, mode) != CODE_FOR_nothing)
return true;
/* We allow fallback to a QI vector mode, and adjust the mask. */
+ machine_mode qimode;
if (!qimode_for_vec_perm (mode).exists (&qimode))
return false;
- /* ??? For completeness, we ought to check the QImode version of
- vec_perm_const_optab. But all users of this implicit lowering
- feature implement the variable vec_perm_optab. */
if (direct_optab_handler (vec_perm_optab, qimode) == CODE_FOR_nothing)
return false;
/* In order to support the lowering of variable permutations,
we need to support shifts and adds. */
- if (variable)
+ if (GET_MODE_UNIT_SIZE (mode) > 2
+ && optab_handler (ashl_optab, mode) == CODE_FOR_nothing
+ && optab_handler (vashl_optab, mode) == CODE_FOR_nothing)
+ return false;
+ if (optab_handler (add_optab, qimode) == CODE_FOR_nothing)
+ return false;
+
+ return true;
+}
+
+/* Return true if the target directly supports VEC_PERM_EXPRs on vectors
+ of mode MODE using the selector SEL. ALLOW_VARIABLE_P is true if it
+ is acceptable to force the selector into a register and use a variable
+ permute (if the target supports that).
+
+ Note that additional permutations representing whole-vector shifts may
+ also be handled via the vec_shr optab, but only where the second input
+ vector is entirely constant zeroes; this case is not dealt with here. */
+
+bool
+can_vec_perm_const_p (machine_mode mode, const vec_perm_indices &sel,
+ bool allow_variable_p)
+{
+ /* If the target doesn't implement a vector mode for the vector type,
+ then no operations are supported. */
+ if (!VECTOR_MODE_P (mode))
+ return false;
+
+ /* It's probably cheaper to test for the variable case first. */
+ if (allow_variable_p)
+ {
+ if (direct_optab_handler (vec_perm_optab, mode) != CODE_FOR_nothing)
+ return true;
+
+ /* Unlike can_vec_perm_var_p, we don't need to test for optabs
+ related computing the QImode selector, since that happens at
+ compile time. */
+ machine_mode qimode;
+ if (qimode_for_vec_perm (mode).exists (&qimode)
+ && direct_optab_handler (vec_perm_optab, qimode) != CODE_FOR_nothing)
+ return true;
+ }
+
+ if (direct_optab_handler (vec_perm_const_optab, mode) != CODE_FOR_nothing)
{
- if (GET_MODE_UNIT_SIZE (mode) > 2
- && optab_handler (ashl_optab, mode) == CODE_FOR_nothing
- && optab_handler (vashl_optab, mode) == CODE_FOR_nothing)
- return false;
- if (optab_handler (add_optab, qimode) == CODE_FOR_nothing)
- return false;
+ if (targetm.vectorize.vec_perm_const_ok == NULL
+ || targetm.vectorize.vec_perm_const_ok (mode, sel))
+ return true;
+
+ /* ??? For completeness, we ought to check the QImode version of
+ vec_perm_const_optab. But all users of this implicit lowering
+ feature implement the variable vec_perm_optab. */
}
- return true;
+ return false;
}
/* Find a widening optab even if it doesn't widen as much as we want.
sel.quick_push (!BYTES_BIG_ENDIAN
+ (i & ~1)
+ ((i & 1) ? nunits : 0));
- if (can_vec_perm_p (mode, false, &sel))
+ if (can_vec_perm_const_p (mode, sel))
return 2;
}
}
auto_vec_perm_indices sel (nunits);
for (i = 0; i < nunits; ++i)
sel.quick_push (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1));
- if (can_vec_perm_p (mode, false, &sel))
+ if (can_vec_perm_const_p (mode, sel))
return 3;
}
}
enum insn_code can_fix_p (machine_mode, machine_mode, int, bool *);
bool can_conditionally_move_p (machine_mode mode);
opt_machine_mode qimode_for_vec_perm (machine_mode);
-bool can_vec_perm_p (machine_mode, bool, const vec_perm_indices *);
+bool can_vec_perm_var_p (machine_mode);
+bool can_vec_perm_const_p (machine_mode, const vec_perm_indices &,
+ bool = true);
/* Find a widening optab even if it doesn't widen as much as we want. */
#define find_widening_optab_handler(A, B, C) \
find_widening_optab_handler_and_mode (A, B, C, NULL)
{
tree mask_type;
- if (!can_vec_perm_p (TYPE_MODE (type), false, &sel))
+ if (!can_vec_perm_const_p (TYPE_MODE (type), sel))
return false;
mask_type
= build_vector_type (build_nonstandard_integer_type (elem_size, 1),
if (3 * i + nelt2 < nelt)
sel[3 * i + nelt2] = 0;
}
- if (!can_vec_perm_p (mode, false, &sel))
+ if (!can_vec_perm_const_p (mode, sel))
{
if (dump_enabled_p ())
dump_printf (MSG_MISSED_OPTIMIZATION,
- "permutaion op not supported by target.\n");
+ "permutation op not supported by target.\n");
return false;
}
if (3 * i + nelt2 < nelt)
sel[3 * i + nelt2] = nelt + j2++;
}
- if (!can_vec_perm_p (mode, false, &sel))
+ if (!can_vec_perm_const_p (mode, sel))
{
if (dump_enabled_p ())
dump_printf (MSG_MISSED_OPTIMIZATION,
- "permutaion op not supported by target.\n");
+ "permutation op not supported by target.\n");
return false;
}
}
sel[i * 2] = i;
sel[i * 2 + 1] = i + nelt;
}
- if (can_vec_perm_p (mode, false, &sel))
+ if (can_vec_perm_const_p (mode, sel))
{
for (i = 0; i < nelt; i++)
sel[i] += nelt / 2;
- if (can_vec_perm_p (mode, false, &sel))
+ if (can_vec_perm_const_p (mode, sel))
return true;
}
}
sel[i] = 3 * i + k;
else
sel[i] = 0;
- if (!can_vec_perm_p (mode, false, &sel))
+ if (!can_vec_perm_const_p (mode, sel))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
sel[i] = i;
else
sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
- if (!can_vec_perm_p (mode, false, &sel))
+ if (!can_vec_perm_const_p (mode, sel))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
gcc_assert (pow2p_hwi (count));
for (i = 0; i < nelt; i++)
sel[i] = i * 2;
- if (can_vec_perm_p (mode, false, &sel))
+ if (can_vec_perm_const_p (mode, sel))
{
for (i = 0; i < nelt; i++)
sel[i] = i * 2 + 1;
- if (can_vec_perm_p (mode, false, &sel))
+ if (can_vec_perm_const_p (mode, sel))
return true;
}
}
sel[i] = i * 2;
for (i = 0; i < nelt / 2; ++i)
sel[nelt / 2 + i] = i * 2 + 1;
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
+ if (!can_vec_perm_const_p (TYPE_MODE (vectype), sel))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
sel[i] = i * 2 + 1;
for (i = 0; i < nelt / 2; ++i)
sel[nelt / 2 + i] = i * 2;
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
+ if (!can_vec_perm_const_p (TYPE_MODE (vectype), sel))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
For vector length 8 it is {4 5 6 7 8 9 10 11}. */
for (i = 0; i < nelt; i++)
sel[i] = nelt / 2 + i;
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
+ if (!can_vec_perm_const_p (TYPE_MODE (vectype), sel))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
sel[i] = i;
for (i = nelt / 2; i < nelt; i++)
sel[i] = nelt + i;
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
+ if (!can_vec_perm_const_p (TYPE_MODE (vectype), sel))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
sel[i] = 3 * k + (l % 3);
k++;
}
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
+ if (!can_vec_perm_const_p (TYPE_MODE (vectype), sel))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
For vector length 8 it is {6 7 8 9 10 11 12 13}. */
for (i = 0; i < nelt; i++)
sel[i] = 2 * (nelt / 3) + (nelt % 3) + i;
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
+ if (!can_vec_perm_const_p (TYPE_MODE (vectype), sel))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
For vector length 8 it is {5 6 7 8 9 10 11 12}. */
for (i = 0; i < nelt; i++)
sel[i] = 2 * (nelt / 3) + 1 + i;
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
+ if (!can_vec_perm_const_p (TYPE_MODE (vectype), sel))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
For vector length 8 it is {3 4 5 6 7 8 9 10}. */
for (i = 0; i < nelt; i++)
sel[i] = (nelt / 3) + (nelt % 3) / 2 + i;
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
+ if (!can_vec_perm_const_p (TYPE_MODE (vectype), sel))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
For vector length 8 it is {5 6 7 8 9 10 11 12}. */
for (i = 0; i < nelt; i++)
sel[i] = 2 * (nelt / 3) + (nelt % 3) / 2 + i;
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
+ if (!can_vec_perm_const_p (TYPE_MODE (vectype), sel))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
sel_int.quick_push (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
& (2 * elements - 1));
- if (can_vec_perm_p (TYPE_MODE (vect_type), false, &sel_int))
+ if (can_vec_perm_const_p (TYPE_MODE (vect_type), sel_int))
{
gimple_assign_set_rhs3 (stmt, mask);
update_stmt (stmt);
}
}
}
- else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
+ else if (can_vec_perm_var_p (TYPE_MODE (vect_type)))
return;
warning_at (loc, OPT_Wvector_operation_performance,
if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
return true;
- if (direct_optab_handler (vec_perm_const_optab, mode) == CODE_FOR_nothing)
- return false;
-
unsigned int i, nelt = GET_MODE_NUNITS (mode);
auto_vec_perm_indices sel (nelt);
{
sel.truncate (0);
calc_vec_perm_mask_for_shift (i, nelt, &sel);
- if (!can_vec_perm_p (mode, false, &sel))
+ if (!can_vec_perm_const_p (mode, sel, false))
return false;
}
return true;
elt += count;
sel.quick_push (elt);
}
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
+ if (!can_vec_perm_const_p (TYPE_MODE (vectype), sel))
{
for (i = 0; i < group_size; ++i)
if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
if (index == nunits)
{
if (! noop_p
- && ! can_vec_perm_p (mode, false, &mask))
+ && ! can_vec_perm_const_p (mode, mask))
{
if (dump_enabled_p ())
{
for (i = 0; i < nunits; ++i)
sel.quick_push (nunits - 1 - i);
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
+ if (!can_vec_perm_const_p (TYPE_MODE (vectype), sel))
return NULL_TREE;
return vect_gen_perm_mask_checked (vectype, sel);
}
for (unsigned j = 0; j < word_bytes; ++j)
elts.quick_push ((i + 1) * word_bytes - j - 1);
- if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, &elts))
+ if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), elts))
return false;
if (! vec_stmt)
/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
VECTOR_CST mask. No checks are made that the target platform supports the
- mask, so callers may wish to test can_vec_perm_p separately, or use
+ mask, so callers may wish to test can_vec_perm_const_p separately, or use
vect_gen_perm_mask_checked. */
tree
return mask_elts.build ();
}
-/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
+/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
i.e. that the target supports the pattern _for arbitrary input vectors_. */
tree
vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
{
- gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, &sel));
+ gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
return vect_gen_perm_mask_any (vectype, sel);
}