unsigned char perm[MAX_VECT_LEN];
enum machine_mode vmode;
unsigned char nelt;
+ bool one_operand_p;
bool testing_p;
};
dperm.vmode = mode;
dperm.nelt = GET_MODE_NUNITS (mode);
dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
+ dperm.one_operand_p = true;
/* Extend to SImode using a paradoxical SUBREG. */
tmp1 = gen_reg_rtx (SImode);
rtx target, op0, op1, x;
rtx rperm[32], vperm;
- if (d->op0 == d->op1)
+ if (d->one_operand_p)
return false;
if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
;
rtx rperm[8], vperm;
unsigned i;
- if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
+ if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
return false;
/* We can only permute within the 128-bit lane. */
nelt = d->nelt;
- if (d->op0 != d->op1)
+ if (!d->one_operand_p)
{
if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
{
else
{
eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
- if (d->op0 != d->op1)
+ if (!d->one_operand_p)
mask = 2 * nelt - 1;
else if (vmode == V16QImode)
mask = nelt - 1;
target = gen_lowpart (vmode, d->target);
op0 = gen_lowpart (vmode, d->op0);
- if (d->op0 == d->op1)
+ if (d->one_operand_p)
{
if (vmode == V16QImode)
emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
/* Check plain VEC_SELECT first, because AVX has instructions that could
match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
input where SEL+CONCAT may not. */
- if (d->op0 == d->op1)
+ if (d->one_operand_p)
{
int mask = nelt - 1;
bool identity_perm = true;
return true;
/* Recognize interleave style patterns with reversed operands. */
- if (d->op0 != d->op1)
+ if (!d->one_operand_p)
{
for (i = 0; i < nelt; ++i)
{
unsigned i;
bool ok;
- if (d->vmode != V8HImode || d->op0 != d->op1)
+ if (d->vmode != V8HImode || !d->one_operand_p)
return false;
/* The two permutations only operate in 64-bit lanes. */
gen_lowpart (TImode, d->op0), shift));
d->op0 = d->op1 = d->target;
+ d->one_operand_p = true;
in_order = true;
for (i = 0; i < nelt; ++i)
if (GET_MODE_SIZE (d->vmode) == 16)
{
- if (d->op0 == d->op1)
+ if (d->one_operand_p)
return false;
}
else if (GET_MODE_SIZE (d->vmode) == 32)
{
if (!TARGET_AVX)
return false;
- /* For 32-byte modes allow even d->op0 == d->op1.
+ /* For 32-byte modes allow even d->one_operand_p.
The lack of cross-lane shuffling in some instructions
might prevent a single insn shuffle. */
dfinal = *d;
if (nzcnt == 1)
{
- gcc_assert (d->op0 == d->op1);
+ gcc_assert (d->one_operand_p);
nonzero_halves[1] = nonzero_halves[0];
same_halves = true;
}
- else if (d->op0 == d->op1)
+ else if (d->one_operand_p)
{
gcc_assert (nonzero_halves[0] == 0);
gcc_assert (nonzero_halves[1] == 1);
}
}
}
- else if (d->op0 == d->op1)
+ else if (d->one_operand_p)
return false;
else if (TARGET_AVX2
&& (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
}
dfinal.op0 = gen_reg_rtx (dfinal.vmode);
dfinal.op1 = dfinal.op0;
+ dfinal.one_operand_p = true;
dremap.target = dfinal.op0;
/* Test if the final remap can be done with a single insn. For V4SFmode or
if (!(TARGET_AVX2
&& (d->vmode == V32QImode || d->vmode == V16HImode)
- && d->op0 == d->op1))
+ && d->one_operand_p))
return false;
contents[0] = 0;
dremap.target = gen_reg_rtx (V4DImode);
dremap.op0 = gen_lowpart (V4DImode, d->op0);
dremap.op1 = dremap.op0;
+ dremap.one_operand_p = true;
for (i = 0; i < 2; ++i)
{
unsigned int cnt = 0;
dfinal = *d;
dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
dfinal.op1 = dfinal.op0;
+ dfinal.one_operand_p = true;
for (i = 0, j = 0; i < nelt; ++i)
{
if (i == nelt2)
return false;
dsecond = *d;
- if (d->op0 == d->op1)
- dsecond.op1 = gen_reg_rtx (d->vmode);
+ dsecond.one_operand_p = false;
dsecond.testing_p = true;
/* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
vperm2f128 on d->op0 and d->op1. */
dsecond.testing_p = false;
dfirst = *d;
- if (d->op0 == d->op1)
- dfirst.target = dsecond.op1;
- else
- dfirst.target = gen_reg_rtx (d->vmode);
+ dfirst.target = gen_reg_rtx (d->vmode);
for (i = 0; i < nelt; i++)
dfirst.perm[i] = (i & (nelt2 - 1))
+ ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
return true;
}
- /* For d->op0 == d->op1 the only useful vperm2f128 permutation
- is 0x10. */
- if (d->op0 == d->op1)
+ /* For one operand, the only useful vperm2f128 permutation is 0x10. */
+ if (d->one_operand_p)
return false;
}
unsigned i, nelt;
rtx (*gen) (rtx, rtx, rtx);
- if (d->op0 == d->op1)
+ if (d->one_operand_p)
return false;
if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
;
if (!TARGET_AVX
|| TARGET_AVX2
|| (d->vmode != V8SFmode && d->vmode != V4DFmode)
- || d->op0 != d->op1)
+ || !d->one_operand_p)
return false;
dfirst = *d;
dsecond = *d;
dsecond.op0 = dfirst.target;
dsecond.op1 = dfirst.target;
+ dsecond.one_operand_p = true;
dsecond.target = gen_reg_rtx (dsecond.vmode);
for (i = 0; i < nelt; i++)
dsecond.perm[i] = i ^ nelt2;
if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
return false;
- gcc_assert (d->op0 != d->op1);
+ gcc_assert (!d->one_operand_p);
nelt = d->nelt;
eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
unsigned int i, nelt, eltsz;
if (!TARGET_AVX2
- || d->op0 != d->op1
+ || !d->one_operand_p
|| (d->vmode != V32QImode && d->vmode != V16HImode))
return false;
unsigned int i, nelt, eltsz;
if (!TARGET_AVX2
- || d->op0 == d->op1
+ || d->one_operand_p
|| (d->vmode != V32QImode && d->vmode != V16HImode))
return false;
{
unsigned i, elt, nelt = d->nelt;
- if (d->op0 != d->op1)
+ if (!d->one_operand_p)
return false;
elt = d->perm[0];
bool used[4];
if (!TARGET_AVX2
- || d->op0 == d->op1
+ || d->one_operand_p
|| (d->vmode != V32QImode && d->vmode != V16HImode))
return false;
perm[i] = ei;
}
+ d.one_operand_p = true;
switch (which)
{
default:
case 3:
if (!rtx_equal_p (d.op0, d.op1))
- break;
-
+ {
+ d.one_operand_p = false;
+ break;
+ }
/* The elements of PERM do not suggest that only the first operand
is used, but both operands are identical. Allow easier matching
of the permutation by folding the permutation into the single
input vector. */
- for (i = 0; i < nelt; ++i)
- if (d.perm[i] >= nelt)
- d.perm[i] -= nelt;
/* FALLTHRU */
- case 1:
- d.op1 = d.op0;
- break;
-
case 2:
for (i = 0; i < nelt; ++i)
- d.perm[i] -= nelt;
+ d.perm[i] &= nelt - 1;
d.op0 = d.op1;
break;
+
+ case 1:
+ d.op1 = d.op0;
+ break;
}
if (ix86_expand_vec_perm_const_1 (&d))
return true;
- /* If the mask says both arguments are needed, but they are the same,
- the above tried to expand with d.op0 == d.op1. If that didn't work,
- retry with d.op0 != d.op1 as that is what testing has been done with. */
- if (which == 3 && d.op0 == d.op1)
+ /* If the selector says both arguments are needed, but the operands are the
+ same, the above tried to expand with one_operand_p and flattened selector.
+ If that didn't work, retry without one_operand_p; we succeeded with that
+ during testing. */
+ if (which == 3 && d.one_operand_p)
{
- rtx seq;
- bool ok;
-
+ d.one_operand_p = false;
memcpy (d.perm, perm, sizeof (perm));
- d.op1 = gen_reg_rtx (d.vmode);
- start_sequence ();
- ok = ix86_expand_vec_perm_const_1 (&d);
- seq = get_insns ();
- end_sequence ();
- if (ok)
- {
- emit_move_insn (d.op1, d.op0);
- emit_insn (seq);
- return true;
- }
+ return ix86_expand_vec_perm_const_1 (&d);
}
return false;
{
struct expand_vec_perm_d d;
unsigned int i, nelt, which;
- bool ret, one_vec;
+ bool ret;
d.vmode = vmode;
d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
d.perm[i] -= nelt;
/* Check whether the mask can be applied to the vector type. */
- one_vec = (which != 3);
+ d.one_operand_p = (which != 3);
/* Implementable with shufps or pshufd. */
- if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
+ if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
return true;
/* Otherwise we have to go through the motions and see if we can
figure out how to generate the requested permutation. */
d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
- if (!one_vec)
+ if (!d.one_operand_p)
d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
start_sequence ();
d.op1 = op1;
d.vmode = GET_MODE (targ);
d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
+ d.one_operand_p = false;
d.testing_p = false;
for (i = 0; i < nelt; ++i)