/* Operand has no constraints, anything is OK. */
win = !n_alternatives;
- alternative_mask enabled = recog_data.enabled_alternatives;
+ alternative_mask preferred = get_preferred_alternatives (insn);
for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
{
- if (!TEST_BIT (enabled, j))
+ if (!TEST_BIT (preferred, j))
continue;
if (op_alt[i].anything_ok
|| (op_alt[i].matches != -1
if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
return (pic_offset_table_rtx
&& rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
+ else if (!REG_P (x))
+ return false;
else if (pic_offset_table_rtx)
- return REG_P (x) && REGNO (x) == REGNO (pic_offset_table_rtx);
+ {
+ if (REGNO (x) == REGNO (pic_offset_table_rtx))
+ return true;
+ if (HARD_REGISTER_P (x)
+ && !HARD_REGISTER_P (pic_offset_table_rtx)
+ && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
+ return true;
+ return false;
+ }
else
- return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
+ return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
}
/* Helper function for ix86_delegitimize_address.
leal (%ebx, %ecx, 4), %ecx
...
movl foo@GOTOFF(%ecx), %edx
- in which case we return (%ecx - %ebx) + foo.
-
- Note that when pseudo_pic_reg is used we can generate it only
- before reload_completed. */
+ in which case we return (%ecx - %ebx) + foo
+ or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
+ and reload has completed. */
if (pic_offset_table_rtx
&& (!reload_completed || !ix86_use_pseudo_pic_reg ()))
result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
pic_offset_table_rtx),
result);
+ else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
+ {
+ rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
+ tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
+ result = gen_rtx_PLUS (Pmode, tmp, result);
+ }
else
return orig_x;
}
return true;
}
+/* AVX512F does support 64-byte integer vector operations,
+ thus the longest vector we are faced with is V64QImode. */
+#define MAX_VECT_LEN 64
+
+struct expand_vec_perm_d
+{
+ rtx target, op0, op1;
+ unsigned char perm[MAX_VECT_LEN];
+ enum machine_mode vmode;
+ unsigned char nelt;
+ bool one_operand_p;
+ bool testing_p;
+};
+
static bool
-ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
+ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
+ struct expand_vec_perm_d *d)
{
- enum machine_mode mode = GET_MODE (op0);
+ /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
+ expander, so args are either in d, or in op0, op1 etc. */
+ enum machine_mode mode = GET_MODE (d ? d->op0 : op0);
+ enum machine_mode maskmode = mode;
+ rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
+
switch (mode)
{
+ case V8HImode:
+ if (TARGET_AVX512VL && TARGET_AVX512BW)
+ gen = gen_avx512vl_vpermi2varv8hi3;
+ break;
+ case V16HImode:
+ if (TARGET_AVX512VL && TARGET_AVX512BW)
+ gen = gen_avx512vl_vpermi2varv16hi3;
+ break;
+ case V32HImode:
+ if (TARGET_AVX512BW)
+ gen = gen_avx512bw_vpermi2varv32hi3;
+ break;
+ case V4SImode:
+ if (TARGET_AVX512VL)
+ gen = gen_avx512vl_vpermi2varv4si3;
+ break;
+ case V8SImode:
+ if (TARGET_AVX512VL)
+ gen = gen_avx512vl_vpermi2varv8si3;
+ break;
case V16SImode:
- emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
- force_reg (V16SImode, mask),
- op1));
- return true;
+ if (TARGET_AVX512F)
+ gen = gen_avx512f_vpermi2varv16si3;
+ break;
+ case V4SFmode:
+ if (TARGET_AVX512VL)
+ {
+ gen = gen_avx512vl_vpermi2varv4sf3;
+ maskmode = V4SImode;
+ }
+ break;
+ case V8SFmode:
+ if (TARGET_AVX512VL)
+ {
+ gen = gen_avx512vl_vpermi2varv8sf3;
+ maskmode = V8SImode;
+ }
+ break;
case V16SFmode:
- emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
- force_reg (V16SImode, mask),
- op1));
- return true;
+ if (TARGET_AVX512F)
+ {
+ gen = gen_avx512f_vpermi2varv16sf3;
+ maskmode = V16SImode;
+ }
+ break;
+ case V2DImode:
+ if (TARGET_AVX512VL)
+ gen = gen_avx512vl_vpermi2varv2di3;
+ break;
+ case V4DImode:
+ if (TARGET_AVX512VL)
+ gen = gen_avx512vl_vpermi2varv4di3;
+ break;
case V8DImode:
- emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
- force_reg (V8DImode, mask),
- op1));
- return true;
+ if (TARGET_AVX512F)
+ gen = gen_avx512f_vpermi2varv8di3;
+ break;
+ case V2DFmode:
+ if (TARGET_AVX512VL)
+ {
+ gen = gen_avx512vl_vpermi2varv2df3;
+ maskmode = V2DImode;
+ }
+ break;
+ case V4DFmode:
+ if (TARGET_AVX512VL)
+ {
+ gen = gen_avx512vl_vpermi2varv4df3;
+ maskmode = V4DImode;
+ }
+ break;
case V8DFmode:
- emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
- force_reg (V8DImode, mask),
- op1));
- return true;
+ if (TARGET_AVX512F)
+ {
+ gen = gen_avx512f_vpermi2varv8df3;
+ maskmode = V8DImode;
+ }
+ break;
default:
- return false;
+ break;
}
+
+ if (gen == NULL)
+ return false;
+
+ /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
+ expander, so args are either in d, or in op0, op1 etc. */
+ if (d)
+ {
+ rtx vec[64];
+ target = d->target;
+ op0 = d->op0;
+ op1 = d->op1;
+ for (int i = 0; i < d->nelt; ++i)
+ vec[i] = GEN_INT (d->perm[i]);
+ mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
+ }
+
+ emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
+ return true;
}
/* Expand a variable vector permutation. */
e = GET_MODE_UNIT_SIZE (mode);
gcc_assert (w <= 64);
- if (TARGET_AVX512F
- && ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
+ if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
return;
if (TARGET_AVX2)
switch (imode)
{
+ case V64QImode:
+ if (unsigned_p)
+ unpack = gen_avx512bw_zero_extendv32qiv32hi2;
+ else
+ unpack = gen_avx512bw_sign_extendv32qiv32hi2;
+ halfmode = V32QImode;
+ extract
+ = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
+ break;
case V32QImode:
if (unsigned_p)
unpack = gen_avx2_zero_extendv16qiv16hi2;
for (i = recog_data.n_operands - 1; i >= 0; --i)
if (MEM_P (recog_data.operand[i]))
{
- constrain_operands_cached (reload_completed);
+ constrain_operands_cached (insn, reload_completed);
if (which_alternative != -1)
{
const char *constraints = recog_data.constraints[i];
emit_label (donelab);
}
\f
-/* AVX512F does support 64-byte integer vector operations,
- thus the longest vector we are faced with is V64QImode. */
-#define MAX_VECT_LEN 64
-
-struct expand_vec_perm_d
-{
- rtx target, op0, op1;
- unsigned char perm[MAX_VECT_LEN];
- enum machine_mode vmode;
- unsigned char nelt;
- bool one_operand_p;
- bool testing_p;
-};
-
static bool canonicalize_perm (struct expand_vec_perm_d *d);
static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
goto widen;
case V8HImode:
+ if (TARGET_AVX2)
+ return ix86_vector_duplicate_value (mode, target, val);
+
if (TARGET_SSE2)
{
struct expand_vec_perm_d dperm;
goto widen;
case V16QImode:
+ if (TARGET_AVX2)
+ return ix86_vector_duplicate_value (mode, target, val);
+
if (TARGET_SSE2)
goto permute;
goto widen;
case V16HImode:
case V32QImode:
- {
- enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
- rtx x = gen_reg_rtx (hvmode);
+ if (TARGET_AVX2)
+ return ix86_vector_duplicate_value (mode, target, val);
+ else
+ {
+ enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
+ rtx x = gen_reg_rtx (hvmode);
- ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
- gcc_assert (ok);
+ ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
+ gcc_assert (ok);
- x = gen_rtx_VEC_CONCAT (mode, x, x);
- emit_insn (gen_rtx_SET (VOIDmode, target, x));
- }
+ x = gen_rtx_VEC_CONCAT (mode, x, x);
+ emit_insn (gen_rtx_SET (VOIDmode, target, x));
+ }
+ return true;
+
+ case V64QImode:
+ case V32HImode:
+ if (TARGET_AVX512BW)
+ return ix86_vector_duplicate_value (mode, target, val);
+ else
+ {
+ enum machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
+ rtx x = gen_reg_rtx (hvmode);
+
+ ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
+ gcc_assert (ok);
+
+ x = gen_rtx_VEC_CONCAT (mode, x, x);
+ emit_insn (gen_rtx_SET (VOIDmode, target, x));
+ }
return true;
default:
ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
rtx target, rtx vals)
{
- rtx ops[64], op0, op1;
+ rtx ops[64], op0, op1, op2, op3, op4, op5;
enum machine_mode half_mode = VOIDmode;
+ enum machine_mode quarter_mode = VOIDmode;
int n, i;
switch (mode)
gen_rtx_VEC_CONCAT (mode, op0, op1)));
return;
+ case V64QImode:
+ quarter_mode = V16QImode;
+ half_mode = V32QImode;
+ goto quarter;
+
+ case V32HImode:
+ quarter_mode = V8HImode;
+ half_mode = V16HImode;
+ goto quarter;
+
+quarter:
+ n = GET_MODE_NUNITS (mode);
+ for (i = 0; i < n; i++)
+ ops[i] = XVECEXP (vals, 0, i);
+ op0 = gen_reg_rtx (quarter_mode);
+ op1 = gen_reg_rtx (quarter_mode);
+ op2 = gen_reg_rtx (quarter_mode);
+ op3 = gen_reg_rtx (quarter_mode);
+ op4 = gen_reg_rtx (half_mode);
+ op5 = gen_reg_rtx (half_mode);
+ ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
+ n >> 3);
+ ix86_expand_vector_init_interleave (quarter_mode, op1,
+ &ops [n >> 2], n >> 3);
+ ix86_expand_vector_init_interleave (quarter_mode, op2,
+ &ops [n >> 1], n >> 3);
+ ix86_expand_vector_init_interleave (quarter_mode, op3,
+ &ops [(n >> 1) | (n >> 2)], n >> 3);
+ emit_insn (gen_rtx_SET (VOIDmode, op4,
+ gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
+ emit_insn (gen_rtx_SET (VOIDmode, op5,
+ gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_rtx_VEC_CONCAT (mode, op4, op5)));
+ return;
+
case V16QImode:
if (!TARGET_SSE4_1)
break;
if (d->one_operand_p)
return false;
- if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
+ if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
+ && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
+ ;
+ else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
;
else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
;
switch (vmode)
{
+ case V8DFmode:
+ case V16SFmode:
case V4DFmode:
case V8SFmode:
case V2DFmode:
case V4SFmode:
case V8HImode:
case V8SImode:
+ case V32HImode:
+ case V64QImode:
+ case V16SImode:
+ case V8DImode:
for (i = 0; i < nelt; ++i)
mask |= (d->perm[i] >= nelt) << i;
break;
expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
{
unsigned i, nelt, eltsz, mask;
- unsigned char perm[32];
+ unsigned char perm[64];
enum machine_mode vmode = V16QImode;
- rtx rperm[32], vperm, target, op0, op1;
+ rtx rperm[64], vperm, target, op0, op1;
nelt = d->nelt;
return false;
}
}
+ else if (GET_MODE_SIZE (d->vmode) == 64)
+ {
+ if (!TARGET_AVX512BW)
+ return false;
+ if (vmode == V64QImode)
+ {
+ /* vpshufb only works intra lanes, it is not
+ possible to shuffle bytes in between the lanes. */
+ for (i = 0; i < nelt; ++i)
+ if ((d->perm[i] ^ i) & (nelt / 4))
+ return false;
+ }
+ }
else
return false;
}
mask = 2 * nelt - 1;
else if (vmode == V16QImode)
mask = nelt - 1;
+ else if (vmode == V64QImode)
+ mask = nelt / 4 - 1;
else
mask = nelt / 2 - 1;
emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
else if (vmode == V32QImode)
emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
+ else if (vmode == V64QImode)
+ emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
else if (vmode == V8SFmode)
emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
else
rtx (*gen) (rtx, rtx) = NULL;
switch (d->vmode)
{
+ case V64QImode:
+ if (TARGET_AVX512BW)
+ gen = gen_avx512bw_vec_dupv64qi;
+ break;
case V32QImode:
gen = gen_avx2_pbroadcastv32qi_1;
break;
+ case V32HImode:
+ if (TARGET_AVX512BW)
+ gen = gen_avx512bw_vec_dupv32hi;
+ break;
case V16HImode:
gen = gen_avx2_pbroadcastv16hi_1;
break;
+ case V16SImode:
+ if (TARGET_AVX512F)
+ gen = gen_avx512f_vec_dupv16si;
+ break;
case V8SImode:
gen = gen_avx2_pbroadcastv8si_1;
break;
case V8HImode:
gen = gen_avx2_pbroadcastv8hi;
break;
+ case V16SFmode:
+ if (TARGET_AVX512F)
+ gen = gen_avx512f_vec_dupv16sf;
+ break;
case V8SFmode:
gen = gen_avx2_vec_dupv8sf_1;
break;
+ case V8DFmode:
+ if (TARGET_AVX512F)
+ gen = gen_avx512f_vec_dupv8df;
+ break;
+ case V8DImode:
+ if (TARGET_AVX512F)
+ gen = gen_avx512f_vec_dupv8di;
+ break;
/* For other modes prefer other shuffles this function creates. */
default: break;
}
return true;
/* Try the AVX512F vpermi2 instructions. */
- if (TARGET_AVX512F)
- {
- rtx vec[64];
- enum machine_mode mode = d->vmode;
- if (mode == V8DFmode)
- mode = V8DImode;
- else if (mode == V16SFmode)
- mode = V16SImode;
- for (i = 0; i < nelt; ++i)
- vec[i] = GEN_INT (d->perm[i]);
- rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
- if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
- return true;
- }
+ if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
+ return true;
return false;
}
/* Given sufficient ISA support we can just return true here
for selected vector modes. */
- if (d.vmode == V16SImode || d.vmode == V16SFmode
- || d.vmode == V8DFmode || d.vmode == V8DImode)
- /* All implementable with a single vpermi2 insn. */
- return true;
- if (GET_MODE_SIZE (d.vmode) == 16)
+ switch (d.vmode)
{
+ case V16SFmode:
+ case V16SImode:
+ case V8DImode:
+ case V8DFmode:
+ if (TARGET_AVX512F)
+ /* All implementable with a single vpermi2 insn. */
+ return true;
+ break;
+ case V32HImode:
+ if (TARGET_AVX512BW)
+ /* All implementable with a single vpermi2 insn. */
+ return true;
+ break;
+ case V8SImode:
+ case V8SFmode:
+ case V4DFmode:
+ case V4DImode:
+ if (TARGET_AVX512VL)
+ /* All implementable with a single vpermi2 insn. */
+ return true;
+ break;
+ case V16HImode:
+ if (TARGET_AVX2)
+ /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
+ return true;
+ break;
+ case V32QImode:
+ if (TARGET_AVX2)
+ /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
+ return true;
+ break;
+ case V4SImode:
+ case V4SFmode:
+ case V8HImode:
+ case V16QImode:
/* All implementable with a single vpperm insn. */
if (TARGET_XOP)
return true;
/* All implementable with 2 pshufb + 1 ior. */
if (TARGET_SSSE3)
return true;
+ break;
+ case V2DImode:
+ case V2DFmode:
/* All implementable with shufpd or unpck[lh]pd. */
- if (d.nelt == 2)
- return true;
+ return true;
+ default:
+ return false;
}
/* Extract the values from the vector CST into the permutation
gen_il = gen_avx2_interleave_lowv32qi;
gen_ih = gen_avx2_interleave_highv32qi;
break;
+ case V64QImode:
+ himode = V32HImode;
+ gen_il = gen_avx512bw_interleave_lowv64qi;
+ gen_ih = gen_avx512bw_interleave_highv64qi;
+ break;
default:
gcc_unreachable ();
}
{
/* For SSE2, we used an full interleave, so the desired
results are in the even elements. */
- for (i = 0; i < 32; ++i)
+ for (i = 0; i < 64; ++i)
d.perm[i] = i * 2;
}
else
/* For AVX, the interleave used above was not cross-lane. So the
extraction is evens but with the second and third quarter swapped.
Happily, that is even one insn shorter than even extraction. */
- for (i = 0; i < 32; ++i)
+ for (i = 0; i < 64; ++i)
d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
}
enum machine_mode mode = GET_MODE (op0);
rtx t1, t2, t3, t4, t5, t6;
- if (TARGET_XOP && mode == V2DImode)
+ if (TARGET_AVX512DQ && mode == V8DImode)
+ emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
+ else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
+ emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
+ else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
+ emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
+ else if (TARGET_XOP && mode == V2DImode)
{
/* op1: A,B,C,D, op2: E,F,G,H */
op1 = gen_lowpart (V4SImode, op1);