+2014-10-02 Jakub Jelinek <jakub@redhat.com>
+
+ PR target/62128
+ * config/i386/i386.c (expand_vec_perm_1): Try expand_vec_perm_palignr
+ if it expands to a single insn only.
+ (expand_vec_perm_palignr): Add SINGLE_INSN_ONLY_P argument. If true,
+ fail unless in_order is true. Add forward declaration.
+ (expand_vec_perm_vperm2f128): Fix up comment about which permutation
+ is useful for one_operand_p.
+ (ix86_expand_vec_perm_const_1): Adjust expand_vec_perm_palignr caller.
+
2014-10-01 Jan Hubicka <hubicka@ucw.cz>
* cgraphclones.c (build_function_type_skip_args): Do not make new
static bool canonicalize_perm (struct expand_vec_perm_d *d);
static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
+static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
/* Get a vector mode of the same size as the original but with elements
twice as wide. This is only guaranteed to apply to integral vectors. */
if (expand_vec_perm_pshufb (d))
return true;
+ /* Try the AVX2 vpalignr instruction. */
+ if (expand_vec_perm_palignr (d, true))
+ return true;
+
/* Try the AVX512F vpermi2 instructions. */
rtx vec[64];
enum machine_mode mode = d->vmode;
the permutation using the SSSE3 palignr instruction. This succeeds
when all of the elements in PERM fit within one vector and we merely
need to shift them down so that a single vector permutation has a
- chance to succeed. */
+ chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
+ the vpalignr instruction itself can perform the requested permutation. */
static bool
-expand_vec_perm_palignr (struct expand_vec_perm_d *d)
+expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
{
unsigned i, nelt = d->nelt;
unsigned min, max;
/* Given that we have SSSE3, we know we'll be able to implement the
single operand permutation after the palignr with pshufb for
- 128-bit vectors. */
- if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16)
+ 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
+ first. */
+ if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
return true;
dcopy = *d;
}
dcopy.one_operand_p = true;
+ if (single_insn_only_p && !in_order)
+ return false;
+
/* For AVX2, test whether we can permute the result in one instruction. */
if (d->testing_p)
{
return true;
}
- /* For one operand, the only useful vperm2f128 permutation is 0x10. */
+ /* For one operand, the only useful vperm2f128 permutation is 0x01
+ aka lanes swap. */
if (d->one_operand_p)
return false;
}
if (expand_vec_perm_pshuflw_pshufhw (d))
return true;
- if (expand_vec_perm_palignr (d))
+ if (expand_vec_perm_palignr (d, false))
return true;
if (expand_vec_perm_interleave2 (d))