DEF_VECTOR_TYPE (V8SI, SI)
DEF_VECTOR_TYPE (V16HI, HI)
DEF_VECTOR_TYPE (V32QI, QI)
-
+DEF_VECTOR_TYPE (V4UDI, UDI, V4DI)
+DEF_VECTOR_TYPE (V8USI, USI, V8SI)
DEF_POINTER_TYPE (PCCHAR, CHAR, CONST)
DEF_POINTER_TYPE (PCDOUBLE, DOUBLE, CONST)
DEF_FUNCTION_TYPE (V2DI, V2DI, V16QI)
DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI)
DEF_FUNCTION_TYPE (V2DI, V4SI, V4SI)
+DEF_FUNCTION_TYPE (V2UDI, V4USI, V4USI)
DEF_FUNCTION_TYPE (V2DI, PCV2DI, V2DI)
DEF_FUNCTION_TYPE (V2SF, V2SF, V2SF)
DEF_FUNCTION_TYPE (V2SI, INT, INT)
DEF_FUNCTION_TYPE (V8SI, PCV8SI, V8SI)
DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI)
DEF_FUNCTION_TYPE (V4DI, V8SI, V8SI)
+DEF_FUNCTION_TYPE (V4UDI, V8USI, V8USI)
DEF_FUNCTION_TYPE (V4DI, V4DI, V2DI)
DEF_FUNCTION_TYPE (V4DI, PCV4DI, V4DI)
DEF_FUNCTION_TYPE (V4DI, V4DI, INT)
IX86_BUILTIN_CPYSGNPS256,
IX86_BUILTIN_CPYSGNPD256,
+ IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V4SI,
+ IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V8SI,
+ IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V4SI,
+ IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V8SI,
+ IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V4SI,
+ IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V8SI,
+
/* FMA4 instructions. */
IX86_BUILTIN_VFMADDSS,
IX86_BUILTIN_VFMADDSD,
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_vw_umul_even_v4si", IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V4SI, UNKNOWN, (int) V2UDI_FTYPE_V4USI_V4USI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_odd_v4si, "__builtin_ia32_vw_umul_odd_v4si", IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V4SI, UNKNOWN, (int) V2UDI_FTYPE_V4USI_V4USI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_vec_widen_smult_odd_v4si, "__builtin_ia32_vw_smul_odd_v4si", IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V4SI, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
/* SSE4.1 */
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mulv4siv4di3 , "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_odd_v8si, "__builtin_ia32_vw_smul_odd_v8si", IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V8SI, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulv4siv4di3 , "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulv4siv4di3 , "__builtin_i386_vw_umul_even_v8si" , IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V8SI, UNKNOWN, (int) V4UDI_FTYPE_V8USI_V8USI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_odd_v8si, "__builtin_ia32_vw_umul_odd_v8si", IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V8SI, UNKNOWN, (int) V4UDI_FTYPE_V8USI_V8USI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
case V2DI_FTYPE_V2DI_V2DI:
case V2DI_FTYPE_V16QI_V16QI:
case V2DI_FTYPE_V4SI_V4SI:
+ case V2UDI_FTYPE_V4USI_V4USI:
case V2DI_FTYPE_V2DI_V16QI:
case V2DI_FTYPE_V2DF_V2DF:
case V2SI_FTYPE_V2SI_V2SI:
case V8SI_FTYPE_V16HI_V16HI:
case V4DI_FTYPE_V4DI_V4DI:
case V4DI_FTYPE_V8SI_V8SI:
+ case V4UDI_FTYPE_V8USI_V8USI:
if (comparison == UNKNOWN)
return ix86_expand_binop_builtin (icode, exp, target);
nargs = 2;
return NULL_TREE;
}
}
+
+static tree
+ix86_builtin_mul_widen_even (tree type)
+{
+ bool uns_p = TYPE_UNSIGNED (type);
+ enum ix86_builtins code;
+
+ switch (TYPE_MODE (type))
+ {
+ case V4SImode:
+ if (uns_p)
+ {
+ if (!TARGET_SSE2)
+ return NULL;
+ code = IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V4SI;
+ }
+ else
+ {
+ if (!TARGET_SSE4_1)
+ return NULL;
+ code = IX86_BUILTIN_PMULDQ128;
+ }
+ break;
+
+ case V8SImode:
+ if (!TARGET_AVX2)
+ return NULL;
+ code = (uns_p ? IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V8SI
+ : IX86_BUILTIN_PMULDQ256);
+ break;
+
+ default:
+ return NULL;
+ }
+ return ix86_builtins[code];
+}
+
+static tree
+ix86_builtin_mul_widen_odd (tree type)
+{
+ bool uns_p = TYPE_UNSIGNED (type);
+ enum ix86_builtins code;
+
+ switch (TYPE_MODE (type))
+ {
+ case V4SImode:
+ if (uns_p)
+ {
+ if (!TARGET_SSE2)
+ return NULL;
+ code = IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V4SI;
+ }
+ else
+ {
+ if (!TARGET_SSE4_1)
+ return NULL;
+ code = IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V4SI;
+ }
+ break;
+
+ case V8SImode:
+ if (!TARGET_AVX2)
+ return NULL;
+ code = (uns_p ? IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V8SI
+ : IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V8SI);
+ break;
+
+ default:
+ return NULL;
+ }
+ return ix86_builtins[code];
+}
\f
/* Helper for avx_vpermilps256_operand et al. This is also used by
the expansion functions to turn the parallel back into a mask.
bool uns_p, bool odd_p)
{
enum machine_mode mode = GET_MODE (op1);
+ enum machine_mode wmode = GET_MODE (dest);
rtx x;
/* We only play even/odd games with vectors of SImode. */
the even slots. For some cpus this is faster than a PSHUFD. */
if (odd_p)
{
- enum machine_mode wmode = GET_MODE (dest);
-
+ if (TARGET_XOP && mode == V4SImode)
+ {
+ x = force_reg (wmode, CONST0_RTX (wmode));
+ emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
+ return;
+ }
op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
GEN_INT (GET_MODE_UNIT_BITSIZE (mode)), NULL,
1, OPTAB_DIRECT);
x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
else if (TARGET_XOP)
{
- x = force_reg (V2DImode, CONST0_RTX (V2DImode));
+ x = force_reg (wmode, CONST0_RTX (wmode));
x = gen_xop_pmacsdql (dest, op1, op2, x);
}
else
#undef TARGET_VECTORIZE_BUILTIN_GATHER
#define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
+#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
+#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN ix86_builtin_mul_widen_even
+#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
+#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD ix86_builtin_mul_widen_odd
+
#undef TARGET_BUILTIN_RECIPROCAL
#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal