From: Jakub Jelinek Date: Tue, 18 Dec 2018 18:41:26 +0000 (+0100) Subject: re PR tree-optimization/88464 (AVX-512 vectorization of masked scatter failing with... X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=dc5b05a07544bbab9e309a45ebcb350a37b69103;p=gcc.git re PR tree-optimization/88464 (AVX-512 vectorization of masked scatter failing with "not suitable for scatter store") PR target/88464 * config/i386/i386-builtin-types.def (VOID_FTYPE_PDOUBLE_QI_V8SI_V4DF_INT, VOID_FTYPE_PFLOAT_QI_V4DI_V8SF_INT, VOID_FTYPE_PLONGLONG_QI_V8SI_V4DI_INT, VOID_FTYPE_PINT_QI_V4DI_V8SI_INT, VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT, VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT, VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT, VOID_FTYPE_PINT_QI_V2DI_V4SI_INT): New builtin types. * config/i386/i386.c (enum ix86_builtins): Add IX86_BUILTIN_SCATTERALTSIV4DF, IX86_BUILTIN_SCATTERALTDIV8SF, IX86_BUILTIN_SCATTERALTSIV4DI, IX86_BUILTIN_SCATTERALTDIV8SI, IX86_BUILTIN_SCATTERALTSIV2DF, IX86_BUILTIN_SCATTERALTDIV4SF, IX86_BUILTIN_SCATTERALTSIV2DI and IX86_BUILTIN_SCATTERALTDIV4SI. (ix86_init_mmx_sse_builtins): Fix up names of IX86_BUILTIN_GATHERALT*, IX86_BUILTIN_GATHER3ALT* and IX86_BUILTIN_SCATTERALT* builtins to match the IX86_BUILTIN codes. Build IX86_BUILTIN_SCATTERALTSIV4DF, IX86_BUILTIN_SCATTERALTDIV8SF, IX86_BUILTIN_SCATTERALTSIV4DI, IX86_BUILTIN_SCATTERALTDIV8SI, IX86_BUILTIN_SCATTERALTSIV2DF, IX86_BUILTIN_SCATTERALTDIV4SF, IX86_BUILTIN_SCATTERALTSIV2DI and IX86_BUILTIN_SCATTERALTDIV4SI decls. (ix86_vectorize_builtin_scatter): Expand those new builtins. * gcc.target/i386/avx512f-pr88464-5.c: New test. * gcc.target/i386/avx512f-pr88464-6.c: New test. * gcc.target/i386/avx512f-pr88464-7.c: New test. * gcc.target/i386/avx512f-pr88464-8.c: New test. * gcc.target/i386/avx512vl-pr88464-5.c: New test. * gcc.target/i386/avx512vl-pr88464-6.c: New test. * gcc.target/i386/avx512vl-pr88464-7.c: New test. * gcc.target/i386/avx512vl-pr88464-8.c: New test. * gcc.target/i386/avx512vl-pr88464-9.c: New test. * gcc.target/i386/avx512vl-pr88464-10.c: New test. * gcc.target/i386/avx512vl-pr88464-11.c: New test. * gcc.target/i386/avx512vl-pr88464-12.c: New test. * gcc.target/i386/avx512vl-pr88464-13.c: New test. * gcc.target/i386/avx512vl-pr88464-14.c: New test. * gcc.target/i386/avx512vl-pr88464-15.c: New test. * gcc.target/i386/avx512vl-pr88464-16.c: New test. From-SVN: r267239 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7d97a05e6cd..a4051f7acd2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,29 @@ +2018-12-18 Jakub Jelinek + + PR target/88464 + * config/i386/i386-builtin-types.def + (VOID_FTYPE_PDOUBLE_QI_V8SI_V4DF_INT, + VOID_FTYPE_PFLOAT_QI_V4DI_V8SF_INT, + VOID_FTYPE_PLONGLONG_QI_V8SI_V4DI_INT, + VOID_FTYPE_PINT_QI_V4DI_V8SI_INT, + VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT, + VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT, + VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT, + VOID_FTYPE_PINT_QI_V2DI_V4SI_INT): New builtin types. + * config/i386/i386.c (enum ix86_builtins): Add + IX86_BUILTIN_SCATTERALTSIV4DF, IX86_BUILTIN_SCATTERALTDIV8SF, + IX86_BUILTIN_SCATTERALTSIV4DI, IX86_BUILTIN_SCATTERALTDIV8SI, + IX86_BUILTIN_SCATTERALTSIV2DF, IX86_BUILTIN_SCATTERALTDIV4SF, + IX86_BUILTIN_SCATTERALTSIV2DI and IX86_BUILTIN_SCATTERALTDIV4SI. + (ix86_init_mmx_sse_builtins): Fix up names of IX86_BUILTIN_GATHERALT*, + IX86_BUILTIN_GATHER3ALT* and IX86_BUILTIN_SCATTERALT* builtins to + match the IX86_BUILTIN codes. Build IX86_BUILTIN_SCATTERALTSIV4DF, + IX86_BUILTIN_SCATTERALTDIV8SF, IX86_BUILTIN_SCATTERALTSIV4DI, + IX86_BUILTIN_SCATTERALTDIV8SI, IX86_BUILTIN_SCATTERALTSIV2DF, + IX86_BUILTIN_SCATTERALTDIV4SF, IX86_BUILTIN_SCATTERALTSIV2DI and + IX86_BUILTIN_SCATTERALTDIV4SI decls. + (ix86_vectorize_builtin_scatter): Expand those new builtins. + 2018-12-18 Bill Schmidt * doc/extend.texi (PowerPC Altivec/VSX Built-in Functions): diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index 3bead21730c..61c9e6e11f0 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -1068,7 +1068,14 @@ DEF_FUNCTION_TYPE (VOID, PFLOAT, HI, V8DI, V16SF, INT) DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V16SI, V8DF, INT) DEF_FUNCTION_TYPE (VOID, PINT, HI, V8DI, V16SI, INT) DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V16SI, V8DI, INT) - +DEF_FUNCTION_TYPE (VOID, PFLOAT, QI, V4DI, V8SF, INT) +DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V8SI, V4DF, INT) +DEF_FUNCTION_TYPE (VOID, PINT, QI, V4DI, V8SI, INT) +DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V8SI, V4DI, INT) +DEF_FUNCTION_TYPE (VOID, PFLOAT, QI, V2DI, V4SF, INT) +DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V4SI, V2DF, INT) +DEF_FUNCTION_TYPE (VOID, PINT, QI, V2DI, V4SI, INT) +DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V4SI, V2DI, INT) DEF_FUNCTION_TYPE (V16SF, V16SF, PCVOID, V16SI, HI, INT) DEF_FUNCTION_TYPE (V8DF, V8DF, PCVOID, V8SI, QI, INT) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 1c36e12c79c..b3c86761e25 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -30072,6 +30072,14 @@ enum ix86_builtins IX86_BUILTIN_SCATTERALTDIV16SF, IX86_BUILTIN_SCATTERALTSIV8DI, IX86_BUILTIN_SCATTERALTDIV16SI, + IX86_BUILTIN_SCATTERALTSIV4DF, + IX86_BUILTIN_SCATTERALTDIV8SF, + IX86_BUILTIN_SCATTERALTSIV4DI, + IX86_BUILTIN_SCATTERALTDIV8SI, + IX86_BUILTIN_SCATTERALTSIV2DF, + IX86_BUILTIN_SCATTERALTDIV4SF, + IX86_BUILTIN_SCATTERALTSIV2DI, + IX86_BUILTIN_SCATTERALTDIV4SI, IX86_BUILTIN_SCATTERDIV16SF, IX86_BUILTIN_SCATTERDIV16SI, IX86_BUILTIN_SCATTERDIV8DF, @@ -30879,7 +30887,7 @@ ix86_init_mmx_sse_builtins (void) V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT, IX86_BUILTIN_GATHERALTSIV4DF); - def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ", + def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv8sf ", V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT, IX86_BUILTIN_GATHERALTDIV8SF); @@ -30887,7 +30895,7 @@ ix86_init_mmx_sse_builtins (void) V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT, IX86_BUILTIN_GATHERALTSIV4DI); - def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ", + def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv8si ", V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT, IX86_BUILTIN_GATHERALTDIV8SI); @@ -30924,19 +30932,19 @@ ix86_init_mmx_sse_builtins (void) V8DI_FTYPE_V8DI_PCVOID_V8DI_QI_INT, IX86_BUILTIN_GATHER3DIV8DI); - def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ", + def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gather3altsiv8df ", V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT, IX86_BUILTIN_GATHER3ALTSIV8DF); - def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ", + def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gather3altdiv16sf ", V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT, IX86_BUILTIN_GATHER3ALTDIV16SF); - def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ", + def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gather3altsiv8di ", V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT, IX86_BUILTIN_GATHER3ALTSIV8DI); - def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ", + def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gather3altdiv16si ", V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT, IX86_BUILTIN_GATHER3ALTDIV16SI); @@ -31116,11 +31124,12 @@ ix86_init_mmx_sse_builtins (void) def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di", VOID_FTYPE_PVOID_QI_V2DI_V2DI_INT, IX86_BUILTIN_SCATTERDIV2DI); + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8df ", VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT, IX86_BUILTIN_SCATTERALTSIV8DF); - def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8sf ", + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv16sf ", VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT, IX86_BUILTIN_SCATTERALTDIV16SF); @@ -31128,10 +31137,42 @@ ix86_init_mmx_sse_builtins (void) VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT, IX86_BUILTIN_SCATTERALTSIV8DI); - def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8si ", + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv16si ", VOID_FTYPE_PINT_HI_V8DI_V16SI_INT, IX86_BUILTIN_SCATTERALTDIV16SI); + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatteraltsiv4df ", + VOID_FTYPE_PDOUBLE_QI_V8SI_V4DF_INT, + IX86_BUILTIN_SCATTERALTSIV4DF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatteraltdiv8sf ", + VOID_FTYPE_PFLOAT_QI_V4DI_V8SF_INT, + IX86_BUILTIN_SCATTERALTDIV8SF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatteraltsiv4di ", + VOID_FTYPE_PLONGLONG_QI_V8SI_V4DI_INT, + IX86_BUILTIN_SCATTERALTSIV4DI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatteraltdiv8si ", + VOID_FTYPE_PINT_QI_V4DI_V8SI_INT, + IX86_BUILTIN_SCATTERALTDIV8SI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatteraltsiv2df ", + VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT, + IX86_BUILTIN_SCATTERALTSIV2DF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatteraltdiv4sf ", + VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT, + IX86_BUILTIN_SCATTERALTDIV4SF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatteraltsiv2di ", + VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT, + IX86_BUILTIN_SCATTERALTSIV2DI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatteraltdiv4si ", + VOID_FTYPE_PINT_QI_V2DI_V4SI_INT, + IX86_BUILTIN_SCATTERALTDIV4SI); + /* AVX512PF */ def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd", VOID_FTYPE_QI_V8SI_PCVOID_INT_INT, @@ -37529,6 +37570,30 @@ rdseed_step: case IX86_BUILTIN_SCATTERALTDIV16SI: icode = CODE_FOR_avx512f_scatterdiv16si; goto scatter_gen; + case IX86_BUILTIN_SCATTERALTSIV4DF: + icode = CODE_FOR_avx512vl_scattersiv4df; + goto scatter_gen; + case IX86_BUILTIN_SCATTERALTDIV8SF: + icode = CODE_FOR_avx512vl_scatterdiv8sf; + goto scatter_gen; + case IX86_BUILTIN_SCATTERALTSIV4DI: + icode = CODE_FOR_avx512vl_scattersiv4di; + goto scatter_gen; + case IX86_BUILTIN_SCATTERALTDIV8SI: + icode = CODE_FOR_avx512vl_scatterdiv8si; + goto scatter_gen; + case IX86_BUILTIN_SCATTERALTSIV2DF: + icode = CODE_FOR_avx512vl_scattersiv2df; + goto scatter_gen; + case IX86_BUILTIN_SCATTERALTDIV4SF: + icode = CODE_FOR_avx512vl_scatterdiv4sf; + goto scatter_gen; + case IX86_BUILTIN_SCATTERALTSIV2DI: + icode = CODE_FOR_avx512vl_scattersiv2di; + goto scatter_gen; + case IX86_BUILTIN_SCATTERALTDIV4SI: + icode = CODE_FOR_avx512vl_scatterdiv4si; + goto scatter_gen; case IX86_BUILTIN_GATHERPFDPS: icode = CODE_FOR_avx512pf_gatherpfv16sisf; goto vec_prefetch_gen; @@ -37813,6 +37878,36 @@ rdseed_step: emit_insn (gen (half, op3)); op3 = half; break; + case IX86_BUILTIN_SCATTERALTSIV4DF: + case IX86_BUILTIN_SCATTERALTSIV4DI: + half = gen_reg_rtx (V4SImode); + if (!nonimmediate_operand (op2, V8SImode)) + op2 = copy_to_mode_reg (V8SImode, op2); + emit_insn (gen_vec_extract_lo_v8si (half, op2)); + op2 = half; + break; + case IX86_BUILTIN_SCATTERALTDIV8SF: + case IX86_BUILTIN_SCATTERALTDIV8SI: + half = gen_reg_rtx (mode3); + if (mode3 == V4SFmode) + gen = gen_vec_extract_lo_v8sf; + else + gen = gen_vec_extract_lo_v8si; + if (!nonimmediate_operand (op3, GET_MODE (op3))) + op3 = copy_to_mode_reg (GET_MODE (op3), op3); + emit_insn (gen (half, op3)); + op3 = half; + break; + case IX86_BUILTIN_SCATTERALTSIV2DF: + case IX86_BUILTIN_SCATTERALTSIV2DI: + if (!nonimmediate_operand (op2, V4SImode)) + op2 = copy_to_mode_reg (V4SImode, op2); + break; + case IX86_BUILTIN_SCATTERALTDIV4SF: + case IX86_BUILTIN_SCATTERALTDIV4SI: + if (!nonimmediate_operand (op3, GET_MODE (op3))) + op3 = copy_to_mode_reg (GET_MODE (op3), op3); + break; default: break; } @@ -38928,6 +39023,54 @@ ix86_vectorize_builtin_scatter (const_tree vectype, case E_V16SImode: code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI; break; + case E_V4DFmode: + if (TARGET_AVX512VL) + code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF; + else + return NULL_TREE; + break; + case E_V4DImode: + if (TARGET_AVX512VL) + code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI; + else + return NULL_TREE; + break; + case E_V8SFmode: + if (TARGET_AVX512VL) + code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF; + else + return NULL_TREE; + break; + case E_V8SImode: + if (TARGET_AVX512VL) + code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI; + else + return NULL_TREE; + break; + case E_V2DFmode: + if (TARGET_AVX512VL) + code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF; + else + return NULL_TREE; + break; + case E_V2DImode: + if (TARGET_AVX512VL) + code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI; + else + return NULL_TREE; + break; + case E_V4SFmode: + if (TARGET_AVX512VL) + code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF; + else + return NULL_TREE; + break; + case E_V4SImode: + if (TARGET_AVX512VL) + code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI; + else + return NULL_TREE; + break; default: return NULL_TREE; } diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 256d9d23e40..e8a58688927 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,23 @@ +2018-12-18 Jakub Jelinek + + PR target/88464 + * gcc.target/i386/avx512f-pr88464-5.c: New test. + * gcc.target/i386/avx512f-pr88464-6.c: New test. + * gcc.target/i386/avx512f-pr88464-7.c: New test. + * gcc.target/i386/avx512f-pr88464-8.c: New test. + * gcc.target/i386/avx512vl-pr88464-5.c: New test. + * gcc.target/i386/avx512vl-pr88464-6.c: New test. + * gcc.target/i386/avx512vl-pr88464-7.c: New test. + * gcc.target/i386/avx512vl-pr88464-8.c: New test. + * gcc.target/i386/avx512vl-pr88464-9.c: New test. + * gcc.target/i386/avx512vl-pr88464-10.c: New test. + * gcc.target/i386/avx512vl-pr88464-11.c: New test. + * gcc.target/i386/avx512vl-pr88464-12.c: New test. + * gcc.target/i386/avx512vl-pr88464-13.c: New test. + * gcc.target/i386/avx512vl-pr88464-14.c: New test. + * gcc.target/i386/avx512vl-pr88464-15.c: New test. + * gcc.target/i386/avx512vl-pr88464-16.c: New test. + 2018-12-18 Kyrylo Tkachov * lib/target-supports.exp (check_effective_target_vect_usad_char): diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c new file mode 100644 index 00000000000..462e951fdc1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c @@ -0,0 +1,45 @@ +/* PR tree-optimization/88464 */ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512 -fdump-tree-vect-details" } */ +/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +__attribute__((noipa)) void +f1 (long long * __restrict__ a, const long long * __restrict__ b, const int * __restrict__ c, int n) +{ + int i; +#pragma GCC ivdep + for (i = 0; i < n; ++i) + if (a[i] > 10) + a[i] = b[c[i]]; +} + +__attribute__((noipa)) void +f2 (long long * __restrict__ a, const long long * __restrict__ b, const long * __restrict__ c, int n) +{ + int i; +#pragma GCC ivdep + for (i = 0; i < n; ++i) + if (a[i] > 10) + a[i] = b[c[i]]; +} + +__attribute__((noipa)) void +f3 (int * __restrict__ a, const int * __restrict__ b, const int * __restrict__ c, int n) +{ + int i; +#pragma GCC ivdep + for (i = 0; i < n; ++i) + if (a[i] > 10) + a[i] = b[c[i]]; +} + +__attribute__((noipa)) void +f4 (int * __restrict__ a, const int * __restrict__ b, const long * __restrict__ c, int n) +{ + int i; +#pragma GCC ivdep + for (i = 0; i < n; ++i) + if (a[i] > 10) + a[i] = b[c[i]]; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-6.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-6.c new file mode 100644 index 00000000000..9ebb72a5bae --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-6.c @@ -0,0 +1,61 @@ +/* PR tree-optimization/88464 */ +/* { dg-do run { target { avx512f } } } */ +/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512" } */ + +#include "avx512f-check.h" + +#include "avx512f-pr88464-5.c" + +static void +avx512f_test (void) +{ + long long a[1024], b[1024]; + int c[1024], f[1024]; + int d[1024]; + long e[1024]; + int i; + for (i = 0; i < 1024; i++) + { + asm volatile ("" : "+g" (i)); + a[i] = (i % 3) != 0 ? 15 : -5; + b[i] = 2 * i; + d[i] = (i % 3) ? 1023 - i : __INT_MAX__; + } + f1 (a, b, d, 1024); + for (i = 0; i < 1024; i++) + { + asm volatile ("" : "+g" (i)); + if (a[i] != ((i % 3) != 0 ? (1023 - i) * 2 : -5)) + abort (); + a[i] = (i % 3) != 1 ? 15 : -5; + b[i] = 3 * i; + e[i] = (i % 3) != 1 ? 1023 - i : __LONG_MAX__; + } + f2 (a, b, e, 1024); + for (i = 0; i < 1024; i++) + { + asm volatile ("" : "+g" (i)); + if (a[i] != ((i % 3) != 1 ? (1023 - i) * 3 : -5)) + abort (); + c[i] = (i % 3) != 2 ? 15 : -5; + d[i] = (i % 3) != 2 ? 1023 - i : __INT_MAX__; + f[i] = 4 * i; + } + f3 (c, f, d, 1024); + for (i = 0; i < 1024; i++) + { + asm volatile ("" : "+g" (i)); + if (c[i] != ((i % 3) != 2 ? (1023 - i) * 4: -5)) + abort (); + c[i] = (i % 3) != 0 ? 15 : -5; + e[i] = (i % 3) != 0 ? 1023 - i : __INT_MAX__; + f[i] = 5 * i; + } + f4 (c, f, e, 1024); + for (i = 0; i < 1024; i++) + { + asm volatile ("" : "+g" (i)); + if (c[i] != ((i % 3) != 0 ? (1023 - i) * 5 : -5)) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-7.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-7.c new file mode 100644 index 00000000000..738640c2bf5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-7.c @@ -0,0 +1,45 @@ +/* PR tree-optimization/88464 */ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512 -fdump-tree-vect-details" } */ +/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +__attribute__((noipa)) void +f1 (long long * __restrict__ a, const long long * __restrict__ b, const int * __restrict__ c, int n) +{ + int i; +#pragma GCC ivdep + for (i = 0; i < n; ++i) + if (b[i] > -2) + a[c[i]] = b[i]; +} + +__attribute__((noipa)) void +f2 (long long * __restrict__ a, const long long * __restrict__ b, const long * __restrict__ c, int n) +{ + int i; +#pragma GCC ivdep + for (i = 0; i < n; ++i) + if (b[i] > -2) + a[c[i]] = b[i]; +} + +__attribute__((noipa)) void +f3 (int * __restrict__ a, const int * __restrict__ b, const int * __restrict__ c, int n) +{ + int i; +#pragma GCC ivdep + for (i = 0; i < n; ++i) + if (b[i] > -2) + a[c[i]] = b[i]; +} + +__attribute__((noipa)) void +f4 (int * __restrict__ a, const int * __restrict__ b, const long * __restrict__ c, int n) +{ + int i; +#pragma GCC ivdep + for (i = 0; i < n; ++i) + if (b[i] > -2) + a[c[i]] = b[i]; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-8.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-8.c new file mode 100644 index 00000000000..0e28baf03be --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-8.c @@ -0,0 +1,61 @@ +/* PR tree-optimization/88464 */ +/* { dg-do run { target { avx512f } } } */ +/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512" } */ + +#include "avx512f-check.h" + +#include "avx512f-pr88464-7.c" + +static void +avx512f_test (void) +{ + long long a[1024], b[1024]; + int c[1024], f[1024]; + int d[1024]; + long e[1024]; + int i; + for (i = 0; i < 1024; i++) + { + asm volatile ("" : "+g" (i)); + a[i] = -5; + b[i] = (i % 3) != 0 ? 2 * i : -5; + d[i] = (i % 3) != 0 ? 1023 - i : __INT_MAX__; + } + f1 (a, b, d, 1024); + for (i = 0; i < 1024; i++) + { + asm volatile ("" : "+g" (i)); + if (a[i] != ((i % 3) != 0 ? (1023 - i) * 2 : -5)) + abort (); + a[i] = -5; + b[i] = (i % 3) != 1 ? 3 * i : -5; + e[i] = (i % 3) != 1 ? 1023 - i : __LONG_MAX__; + } + f2 (a, b, e, 1024); + for (i = 0; i < 1024; i++) + { + asm volatile ("" : "+g" (i)); + if (a[i] != ((i % 3) != 2 ? (1023 - i) * 3 : -5)) + abort (); + c[i] = -5; + d[i] = (i % 3) != 2 ? 1023 - i : __INT_MAX__; + f[i] = (i % 3) != 2 ? 4 * i : -5; + } + f3 (c, f, d, 1024); + for (i = 0; i < 1024; i++) + { + asm volatile ("" : "+g" (i)); + if (c[i] != ((i % 3) != 1 ? (1023 - i) * 4 : -5)) + abort (); + c[i] = -5; + e[i] = (i % 3) != 0 ? 1023 - i : __INT_MAX__; + f[i] = (i % 3) != 0 ? 5 * i : -5; + } + f4 (c, f, e, 1024); + for (i = 0; i < 1024; i++) + { + asm volatile ("" : "+g" (i)); + if (c[i] != ((i % 3) != 0 ? (1023 - i) * 5 : -5)) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-10.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-10.c new file mode 100644 index 00000000000..c5d4e5be123 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-10.c @@ -0,0 +1,20 @@ +/* PR tree-optimization/88464 */ +/* { dg-do run { target { avx512vl } } } */ +/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512" } */ + +#define AVX512VL +#define AVX512F_LEN 512 +#define AVX512F_LEN_HALF 256 + +#include "avx512f-pr88464-6.c" + +static void +test_256 (void) +{ + avx512f_test (); +} + +static void +test_128 (void) +{ +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c new file mode 100644 index 00000000000..9696008855d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c @@ -0,0 +1,7 @@ +/* PR tree-optimization/88464 */ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=skylake-avx512 -fdump-tree-vect-details" } */ +/* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +#include "avx512f-pr88464-5.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-12.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-12.c new file mode 100644 index 00000000000..130eddd8690 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-12.c @@ -0,0 +1,20 @@ +/* PR tree-optimization/88464 */ +/* { dg-do run { target { avx512vl } } } */ +/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=skylake-avx512" } */ + +#define AVX512VL +#define AVX512F_LEN 512 +#define AVX512F_LEN_HALF 256 + +#include "avx512f-pr88464-6.c" + +static void +test_256 (void) +{ + avx512f_test (); +} + +static void +test_128 (void) +{ +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-13.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-13.c new file mode 100644 index 00000000000..50515c4c783 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-13.c @@ -0,0 +1,7 @@ +/* PR tree-optimization/88464 */ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512 -fdump-tree-vect-details" } */ +/* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +#include "avx512f-pr88464-7.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-14.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-14.c new file mode 100644 index 00000000000..1a700247f9b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-14.c @@ -0,0 +1,20 @@ +/* PR tree-optimization/88464 */ +/* { dg-do run { target { avx512vl } } } */ +/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512" } */ + +#define AVX512VL +#define AVX512F_LEN 512 +#define AVX512F_LEN_HALF 256 + +#include "avx512f-pr88464-8.c" + +static void +test_256 (void) +{ + avx512f_test (); +} + +static void +test_128 (void) +{ +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-15.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-15.c new file mode 100644 index 00000000000..914a8c30dc3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-15.c @@ -0,0 +1,7 @@ +/* PR tree-optimization/88464 */ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -fno-vect-cost-model -mtune=skylake-avx512 -fdump-tree-vect-details" } */ +/* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +#include "avx512f-pr88464-7.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-16.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-16.c new file mode 100644 index 00000000000..6136fc5b858 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-16.c @@ -0,0 +1,20 @@ +/* PR tree-optimization/88464 */ +/* { dg-do run { target { avx512vl } } } */ +/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -fno-vect-cost-model -mtune=skylake-avx512" } */ + +#define AVX512VL +#define AVX512F_LEN 512 +#define AVX512F_LEN_HALF 256 + +#include "avx512f-pr88464-8.c" + +static void +test_256 (void) +{ + avx512f_test (); +} + +static void +test_128 (void) +{ +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-5.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-5.c new file mode 100644 index 00000000000..c7556fee5ae --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-5.c @@ -0,0 +1,7 @@ +/* PR tree-optimization/88464 */ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512 -fdump-tree-vect-details" } */ +/* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +#include "avx512f-pr88464-3.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-6.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-6.c new file mode 100644 index 00000000000..79f9bfe83af --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-6.c @@ -0,0 +1,20 @@ +/* PR tree-optimization/88464 */ +/* { dg-do run { target { avx512vl } } } */ +/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512" } */ + +#define AVX512VL +#define AVX512F_LEN 512 +#define AVX512F_LEN_HALF 256 + +#include "avx512f-pr88464-4.c" + +static void +test_256 (void) +{ + avx512f_test (); +} + +static void +test_128 (void) +{ +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-7.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-7.c new file mode 100644 index 00000000000..1fb423eb0f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-7.c @@ -0,0 +1,7 @@ +/* PR tree-optimization/88464 */ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -fno-vect-cost-model -mtune=skylake-avx512 -fdump-tree-vect-details" } */ +/* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +#include "avx512f-pr88464-3.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-8.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-8.c new file mode 100644 index 00000000000..8c670b79b30 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-8.c @@ -0,0 +1,20 @@ +/* PR tree-optimization/88464 */ +/* { dg-do run { target { avx512vl } } } */ +/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -fno-vect-cost-model -mtune=skylake-avx512" } */ + +#define AVX512VL +#define AVX512F_LEN 512 +#define AVX512F_LEN_HALF 256 + +#include "avx512f-pr88464-4.c" + +static void +test_256 (void) +{ + avx512f_test (); +} + +static void +test_128 (void) +{ +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c new file mode 100644 index 00000000000..3af568ab323 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c @@ -0,0 +1,7 @@ +/* PR tree-optimization/88464 */ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512 -fdump-tree-vect-details" } */ +/* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +#include "avx512f-pr88464-5.c"