re PR tree-optimization/88464 (AVX-512 vectorization of masked scatter failing with...
authorJakub Jelinek <jakub@redhat.com>
Tue, 18 Dec 2018 18:41:26 +0000 (19:41 +0100)
committerJakub Jelinek <jakub@gcc.gnu.org>
Tue, 18 Dec 2018 18:41:26 +0000 (19:41 +0100)
PR target/88464
* config/i386/i386-builtin-types.def
(VOID_FTYPE_PDOUBLE_QI_V8SI_V4DF_INT,
VOID_FTYPE_PFLOAT_QI_V4DI_V8SF_INT,
VOID_FTYPE_PLONGLONG_QI_V8SI_V4DI_INT,
VOID_FTYPE_PINT_QI_V4DI_V8SI_INT,
VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
VOID_FTYPE_PINT_QI_V2DI_V4SI_INT): New builtin types.
* config/i386/i386.c (enum ix86_builtins): Add
IX86_BUILTIN_SCATTERALTSIV4DF, IX86_BUILTIN_SCATTERALTDIV8SF,
IX86_BUILTIN_SCATTERALTSIV4DI, IX86_BUILTIN_SCATTERALTDIV8SI,
IX86_BUILTIN_SCATTERALTSIV2DF, IX86_BUILTIN_SCATTERALTDIV4SF,
IX86_BUILTIN_SCATTERALTSIV2DI and IX86_BUILTIN_SCATTERALTDIV4SI.
(ix86_init_mmx_sse_builtins): Fix up names of IX86_BUILTIN_GATHERALT*,
IX86_BUILTIN_GATHER3ALT* and IX86_BUILTIN_SCATTERALT* builtins to
match the IX86_BUILTIN codes.  Build  IX86_BUILTIN_SCATTERALTSIV4DF,
IX86_BUILTIN_SCATTERALTDIV8SF, IX86_BUILTIN_SCATTERALTSIV4DI,
IX86_BUILTIN_SCATTERALTDIV8SI, IX86_BUILTIN_SCATTERALTSIV2DF,
IX86_BUILTIN_SCATTERALTDIV4SF, IX86_BUILTIN_SCATTERALTSIV2DI and
IX86_BUILTIN_SCATTERALTDIV4SI decls.
(ix86_vectorize_builtin_scatter): Expand those new builtins.

* gcc.target/i386/avx512f-pr88464-5.c: New test.
* gcc.target/i386/avx512f-pr88464-6.c: New test.
* gcc.target/i386/avx512f-pr88464-7.c: New test.
* gcc.target/i386/avx512f-pr88464-8.c: New test.
* gcc.target/i386/avx512vl-pr88464-5.c: New test.
* gcc.target/i386/avx512vl-pr88464-6.c: New test.
* gcc.target/i386/avx512vl-pr88464-7.c: New test.
* gcc.target/i386/avx512vl-pr88464-8.c: New test.
* gcc.target/i386/avx512vl-pr88464-9.c: New test.
* gcc.target/i386/avx512vl-pr88464-10.c: New test.
* gcc.target/i386/avx512vl-pr88464-11.c: New test.
* gcc.target/i386/avx512vl-pr88464-12.c: New test.
* gcc.target/i386/avx512vl-pr88464-13.c: New test.
* gcc.target/i386/avx512vl-pr88464-14.c: New test.
* gcc.target/i386/avx512vl-pr88464-15.c: New test.
* gcc.target/i386/avx512vl-pr88464-16.c: New test.

From-SVN: r267239

20 files changed:
gcc/ChangeLog
gcc/config/i386/i386-builtin-types.def
gcc/config/i386/i386.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-pr88464-6.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-pr88464-7.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-pr88464-8.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-pr88464-10.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-pr88464-12.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-pr88464-13.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-pr88464-14.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-pr88464-15.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-pr88464-16.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-pr88464-5.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-pr88464-6.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-pr88464-7.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-pr88464-8.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c [new file with mode: 0644]

index 7d97a05e6cd465408a48600508e852c04783ea69..a4051f7acd207f94292761af85453f61fd4bac3d 100644 (file)
@@ -1,3 +1,29 @@
+2018-12-18  Jakub Jelinek  <jakub@redhat.com>
+
+       PR target/88464
+       * config/i386/i386-builtin-types.def
+       (VOID_FTYPE_PDOUBLE_QI_V8SI_V4DF_INT,
+       VOID_FTYPE_PFLOAT_QI_V4DI_V8SF_INT,
+       VOID_FTYPE_PLONGLONG_QI_V8SI_V4DI_INT,
+       VOID_FTYPE_PINT_QI_V4DI_V8SI_INT,
+       VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
+       VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
+       VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
+       VOID_FTYPE_PINT_QI_V2DI_V4SI_INT): New builtin types.
+       * config/i386/i386.c (enum ix86_builtins): Add
+       IX86_BUILTIN_SCATTERALTSIV4DF, IX86_BUILTIN_SCATTERALTDIV8SF,
+       IX86_BUILTIN_SCATTERALTSIV4DI, IX86_BUILTIN_SCATTERALTDIV8SI,
+       IX86_BUILTIN_SCATTERALTSIV2DF, IX86_BUILTIN_SCATTERALTDIV4SF,
+       IX86_BUILTIN_SCATTERALTSIV2DI and IX86_BUILTIN_SCATTERALTDIV4SI.
+       (ix86_init_mmx_sse_builtins): Fix up names of IX86_BUILTIN_GATHERALT*,
+       IX86_BUILTIN_GATHER3ALT* and IX86_BUILTIN_SCATTERALT* builtins to
+       match the IX86_BUILTIN codes.  Build    IX86_BUILTIN_SCATTERALTSIV4DF,
+       IX86_BUILTIN_SCATTERALTDIV8SF, IX86_BUILTIN_SCATTERALTSIV4DI,
+       IX86_BUILTIN_SCATTERALTDIV8SI, IX86_BUILTIN_SCATTERALTSIV2DF,
+       IX86_BUILTIN_SCATTERALTDIV4SF, IX86_BUILTIN_SCATTERALTSIV2DI and
+       IX86_BUILTIN_SCATTERALTDIV4SI decls.
+       (ix86_vectorize_builtin_scatter): Expand those new builtins.
+
 2018-12-18  Bill Schmidt  <wschmidt@linux.ibm.com>
 
        * doc/extend.texi (PowerPC Altivec/VSX Built-in Functions):
index 3bead21730c66cc371f45d00ee58ff6a6251d721..61c9e6e11f05ce4e14f432b17ac2b35883485288 100644 (file)
@@ -1068,7 +1068,14 @@ DEF_FUNCTION_TYPE (VOID, PFLOAT, HI, V8DI, V16SF, INT)
 DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V16SI, V8DF, INT)
 DEF_FUNCTION_TYPE (VOID, PINT, HI, V8DI, V16SI, INT)
 DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V16SI, V8DI, INT)
-
+DEF_FUNCTION_TYPE (VOID, PFLOAT, QI, V4DI, V8SF, INT)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V8SI, V4DF, INT)
+DEF_FUNCTION_TYPE (VOID, PINT, QI, V4DI, V8SI, INT)
+DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V8SI, V4DI, INT)
+DEF_FUNCTION_TYPE (VOID, PFLOAT, QI, V2DI, V4SF, INT)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V4SI, V2DF, INT)
+DEF_FUNCTION_TYPE (VOID, PINT, QI, V2DI, V4SI, INT)
+DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V4SI, V2DI, INT)
 
 DEF_FUNCTION_TYPE (V16SF, V16SF, PCVOID, V16SI, HI, INT)
 DEF_FUNCTION_TYPE (V8DF, V8DF, PCVOID, V8SI, QI, INT)
index 1c36e12c79c801ad8d64bc17ab5e17865ea89a53..b3c86761e25c20a4713f9e6f1f812685ce4a2f03 100644 (file)
@@ -30072,6 +30072,14 @@ enum ix86_builtins
   IX86_BUILTIN_SCATTERALTDIV16SF,
   IX86_BUILTIN_SCATTERALTSIV8DI,
   IX86_BUILTIN_SCATTERALTDIV16SI,
+  IX86_BUILTIN_SCATTERALTSIV4DF,
+  IX86_BUILTIN_SCATTERALTDIV8SF,
+  IX86_BUILTIN_SCATTERALTSIV4DI,
+  IX86_BUILTIN_SCATTERALTDIV8SI,
+  IX86_BUILTIN_SCATTERALTSIV2DF,
+  IX86_BUILTIN_SCATTERALTDIV4SF,
+  IX86_BUILTIN_SCATTERALTSIV2DI,
+  IX86_BUILTIN_SCATTERALTDIV4SI,
   IX86_BUILTIN_SCATTERDIV16SF,
   IX86_BUILTIN_SCATTERDIV16SI,
   IX86_BUILTIN_SCATTERDIV8DF,
@@ -30879,7 +30887,7 @@ ix86_init_mmx_sse_builtins (void)
                    V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
                    IX86_BUILTIN_GATHERALTSIV4DF);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
+  def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv8sf ",
                    V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
                    IX86_BUILTIN_GATHERALTDIV8SF);
 
@@ -30887,7 +30895,7 @@ ix86_init_mmx_sse_builtins (void)
                    V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
                    IX86_BUILTIN_GATHERALTSIV4DI);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
+  def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv8si ",
                    V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
                    IX86_BUILTIN_GATHERALTDIV8SI);
 
@@ -30924,19 +30932,19 @@ ix86_init_mmx_sse_builtins (void)
                    V8DI_FTYPE_V8DI_PCVOID_V8DI_QI_INT,
                    IX86_BUILTIN_GATHER3DIV8DI);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gather3altsiv8df ",
                    V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
                    IX86_BUILTIN_GATHER3ALTSIV8DF);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gather3altdiv16sf ",
                    V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
                    IX86_BUILTIN_GATHER3ALTDIV16SF);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gather3altsiv8di ",
                    V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
                    IX86_BUILTIN_GATHER3ALTSIV8DI);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gather3altdiv16si ",
                    V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
                    IX86_BUILTIN_GATHER3ALTDIV16SI);
 
@@ -31116,11 +31124,12 @@ ix86_init_mmx_sse_builtins (void)
   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
               VOID_FTYPE_PVOID_QI_V2DI_V2DI_INT,
               IX86_BUILTIN_SCATTERDIV2DI);
+
   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8df ",
               VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT,
               IX86_BUILTIN_SCATTERALTSIV8DF);
 
-  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8sf ",
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv16sf ",
               VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT,
               IX86_BUILTIN_SCATTERALTDIV16SF);
 
@@ -31128,10 +31137,42 @@ ix86_init_mmx_sse_builtins (void)
               VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT,
               IX86_BUILTIN_SCATTERALTSIV8DI);
 
-  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8si ",
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv16si ",
               VOID_FTYPE_PINT_HI_V8DI_V16SI_INT,
               IX86_BUILTIN_SCATTERALTDIV16SI);
 
+  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatteraltsiv4df ",
+              VOID_FTYPE_PDOUBLE_QI_V8SI_V4DF_INT,
+              IX86_BUILTIN_SCATTERALTSIV4DF);
+
+  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatteraltdiv8sf ",
+              VOID_FTYPE_PFLOAT_QI_V4DI_V8SF_INT,
+              IX86_BUILTIN_SCATTERALTDIV8SF);
+
+  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatteraltsiv4di ",
+              VOID_FTYPE_PLONGLONG_QI_V8SI_V4DI_INT,
+              IX86_BUILTIN_SCATTERALTSIV4DI);
+
+  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatteraltdiv8si ",
+              VOID_FTYPE_PINT_QI_V4DI_V8SI_INT,
+              IX86_BUILTIN_SCATTERALTDIV8SI);
+
+  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatteraltsiv2df ",
+              VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
+              IX86_BUILTIN_SCATTERALTSIV2DF);
+
+  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatteraltdiv4sf ",
+              VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
+              IX86_BUILTIN_SCATTERALTDIV4SF);
+
+  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatteraltsiv2di ",
+              VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
+              IX86_BUILTIN_SCATTERALTSIV2DI);
+
+  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatteraltdiv4si ",
+              VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
+              IX86_BUILTIN_SCATTERALTDIV4SI);
+
   /* AVX512PF */
   def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
               VOID_FTYPE_QI_V8SI_PCVOID_INT_INT,
@@ -37529,6 +37570,30 @@ rdseed_step:
     case IX86_BUILTIN_SCATTERALTDIV16SI:
       icode = CODE_FOR_avx512f_scatterdiv16si;
       goto scatter_gen;
+    case IX86_BUILTIN_SCATTERALTSIV4DF:
+      icode = CODE_FOR_avx512vl_scattersiv4df;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERALTDIV8SF:
+      icode = CODE_FOR_avx512vl_scatterdiv8sf;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERALTSIV4DI:
+      icode = CODE_FOR_avx512vl_scattersiv4di;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERALTDIV8SI:
+      icode = CODE_FOR_avx512vl_scatterdiv8si;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERALTSIV2DF:
+      icode = CODE_FOR_avx512vl_scattersiv2df;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERALTDIV4SF:
+      icode = CODE_FOR_avx512vl_scatterdiv4sf;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERALTSIV2DI:
+      icode = CODE_FOR_avx512vl_scattersiv2di;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERALTDIV4SI:
+      icode = CODE_FOR_avx512vl_scatterdiv4si;
+      goto scatter_gen;
     case IX86_BUILTIN_GATHERPFDPS:
       icode = CODE_FOR_avx512pf_gatherpfv16sisf;
       goto vec_prefetch_gen;
@@ -37813,6 +37878,36 @@ rdseed_step:
          emit_insn (gen (half, op3));
          op3 = half;
          break;
+       case IX86_BUILTIN_SCATTERALTSIV4DF:
+       case IX86_BUILTIN_SCATTERALTSIV4DI:
+         half = gen_reg_rtx (V4SImode);
+         if (!nonimmediate_operand (op2, V8SImode))
+           op2 = copy_to_mode_reg (V8SImode, op2);
+         emit_insn (gen_vec_extract_lo_v8si (half, op2));
+         op2 = half;
+         break;
+       case IX86_BUILTIN_SCATTERALTDIV8SF:
+       case IX86_BUILTIN_SCATTERALTDIV8SI:
+         half = gen_reg_rtx (mode3);
+         if (mode3 == V4SFmode)
+           gen = gen_vec_extract_lo_v8sf;
+         else
+           gen = gen_vec_extract_lo_v8si;
+         if (!nonimmediate_operand (op3, GET_MODE (op3)))
+           op3 = copy_to_mode_reg (GET_MODE (op3), op3);
+         emit_insn (gen (half, op3));
+         op3 = half;
+         break;
+       case IX86_BUILTIN_SCATTERALTSIV2DF:
+       case IX86_BUILTIN_SCATTERALTSIV2DI:
+         if (!nonimmediate_operand (op2, V4SImode))
+           op2 = copy_to_mode_reg (V4SImode, op2);
+         break;
+       case IX86_BUILTIN_SCATTERALTDIV4SF:
+       case IX86_BUILTIN_SCATTERALTDIV4SI:
+         if (!nonimmediate_operand (op3, GET_MODE (op3)))
+           op3 = copy_to_mode_reg (GET_MODE (op3), op3);
+         break;
        default:
          break;
        }
@@ -38928,6 +39023,54 @@ ix86_vectorize_builtin_scatter (const_tree vectype,
     case E_V16SImode:
       code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
       break;
+    case E_V4DFmode:
+      if (TARGET_AVX512VL)
+       code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
+      else
+       return NULL_TREE;
+      break;
+    case E_V4DImode:
+      if (TARGET_AVX512VL)
+       code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
+      else
+       return NULL_TREE;
+      break;
+    case E_V8SFmode:
+      if (TARGET_AVX512VL)
+       code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
+      else
+       return NULL_TREE;
+      break;
+    case E_V8SImode:
+      if (TARGET_AVX512VL)
+       code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
+      else
+       return NULL_TREE;
+      break;
+    case E_V2DFmode:
+      if (TARGET_AVX512VL)
+       code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
+      else
+       return NULL_TREE;
+      break;
+    case E_V2DImode:
+      if (TARGET_AVX512VL)
+       code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
+      else
+       return NULL_TREE;
+      break;
+    case E_V4SFmode:
+      if (TARGET_AVX512VL)
+       code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
+      else
+       return NULL_TREE;
+      break;
+    case E_V4SImode:
+      if (TARGET_AVX512VL)
+       code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
+      else
+       return NULL_TREE;
+      break;
     default:
       return NULL_TREE;
     }
index 256d9d23e401cdca9b52c5fc1dfcd28bc91e0370..e8a5868892778b5a47ab9fb1312582b7c0d2c791 100644 (file)
@@ -1,3 +1,23 @@
+2018-12-18  Jakub Jelinek  <jakub@redhat.com>
+
+       PR target/88464
+       * gcc.target/i386/avx512f-pr88464-5.c: New test.
+       * gcc.target/i386/avx512f-pr88464-6.c: New test.
+       * gcc.target/i386/avx512f-pr88464-7.c: New test.
+       * gcc.target/i386/avx512f-pr88464-8.c: New test.
+       * gcc.target/i386/avx512vl-pr88464-5.c: New test.
+       * gcc.target/i386/avx512vl-pr88464-6.c: New test.
+       * gcc.target/i386/avx512vl-pr88464-7.c: New test.
+       * gcc.target/i386/avx512vl-pr88464-8.c: New test.
+       * gcc.target/i386/avx512vl-pr88464-9.c: New test.
+       * gcc.target/i386/avx512vl-pr88464-10.c: New test.
+       * gcc.target/i386/avx512vl-pr88464-11.c: New test.
+       * gcc.target/i386/avx512vl-pr88464-12.c: New test.
+       * gcc.target/i386/avx512vl-pr88464-13.c: New test.
+       * gcc.target/i386/avx512vl-pr88464-14.c: New test.
+       * gcc.target/i386/avx512vl-pr88464-15.c: New test.
+       * gcc.target/i386/avx512vl-pr88464-16.c: New test.
+
 2018-12-18  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
 
        * lib/target-supports.exp (check_effective_target_vect_usad_char):
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c
new file mode 100644 (file)
index 0000000..462e951
--- /dev/null
@@ -0,0 +1,45 @@
+/* PR tree-optimization/88464 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+
+__attribute__((noipa)) void
+f1 (long long * __restrict__ a, const long long * __restrict__ b, const int * __restrict__ c, int n)
+{
+  int i;
+#pragma GCC ivdep
+  for (i = 0; i < n; ++i)
+    if (a[i] > 10)
+      a[i] = b[c[i]];
+}
+
+__attribute__((noipa)) void
+f2 (long long * __restrict__ a, const long long * __restrict__ b, const long * __restrict__ c, int n)
+{
+  int i;
+#pragma GCC ivdep
+  for (i = 0; i < n; ++i)
+    if (a[i] > 10)
+      a[i] = b[c[i]];
+}
+
+__attribute__((noipa)) void
+f3 (int * __restrict__ a, const int * __restrict__ b, const int * __restrict__ c, int n)
+{
+  int i;
+#pragma GCC ivdep
+  for (i = 0; i < n; ++i)
+    if (a[i] > 10)
+      a[i] = b[c[i]];
+}
+
+__attribute__((noipa)) void
+f4 (int * __restrict__ a, const int * __restrict__ b, const long * __restrict__ c, int n)
+{
+  int i;
+#pragma GCC ivdep
+  for (i = 0; i < n; ++i)
+    if (a[i] > 10)
+      a[i] = b[c[i]];
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-6.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-6.c
new file mode 100644 (file)
index 0000000..9ebb72a
--- /dev/null
@@ -0,0 +1,61 @@
+/* PR tree-optimization/88464 */
+/* { dg-do run { target { avx512f } } } */
+/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512" } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-pr88464-5.c"
+
+static void
+avx512f_test (void)
+{
+  long long a[1024], b[1024];
+  int c[1024], f[1024];
+  int d[1024];
+  long e[1024];
+  int i;
+  for (i = 0; i < 1024; i++)
+    {
+      asm volatile ("" : "+g" (i));
+      a[i] = (i % 3) != 0 ? 15 : -5;
+      b[i] = 2 * i;
+      d[i] = (i % 3) ? 1023 - i : __INT_MAX__;
+    }
+  f1 (a, b, d, 1024);
+  for (i = 0; i < 1024; i++)
+    {
+      asm volatile ("" : "+g" (i));
+      if (a[i] != ((i % 3) != 0 ? (1023 - i) * 2 : -5))
+       abort ();
+      a[i] = (i % 3) != 1 ? 15 : -5;
+      b[i] = 3 * i;
+      e[i] = (i % 3) != 1 ? 1023 - i : __LONG_MAX__;
+    }
+  f2 (a, b, e, 1024);
+  for (i = 0; i < 1024; i++)
+    {
+      asm volatile ("" : "+g" (i));
+      if (a[i] != ((i % 3) != 1 ? (1023 - i) * 3 : -5))
+       abort ();
+      c[i] = (i % 3) != 2 ? 15 : -5;
+      d[i] = (i % 3) != 2 ? 1023 - i : __INT_MAX__;
+      f[i] = 4 * i;
+    }
+  f3 (c, f, d, 1024);
+  for (i = 0; i < 1024; i++)
+    {
+      asm volatile ("" : "+g" (i));
+      if (c[i] != ((i % 3) != 2 ? (1023 - i) * 4: -5))
+       abort ();
+      c[i] = (i % 3) != 0 ? 15 : -5;
+      e[i] = (i % 3) != 0 ? 1023 - i : __INT_MAX__;
+      f[i] = 5 * i;
+    }
+  f4 (c, f, e, 1024);
+  for (i = 0; i < 1024; i++)
+    {
+      asm volatile ("" : "+g" (i));
+      if (c[i] != ((i % 3) != 0 ? (1023 - i) * 5 : -5))
+       abort ();
+    }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-7.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-7.c
new file mode 100644 (file)
index 0000000..738640c
--- /dev/null
@@ -0,0 +1,45 @@
+/* PR tree-optimization/88464 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+
+__attribute__((noipa)) void
+f1 (long long * __restrict__ a, const long long * __restrict__ b, const int * __restrict__ c, int n)
+{
+  int i;
+#pragma GCC ivdep
+  for (i = 0; i < n; ++i)
+    if (b[i] > -2)
+      a[c[i]] = b[i];
+}
+
+__attribute__((noipa)) void
+f2 (long long * __restrict__ a, const long long * __restrict__ b, const long * __restrict__ c, int n)
+{
+  int i;
+#pragma GCC ivdep
+  for (i = 0; i < n; ++i)
+    if (b[i] > -2)
+      a[c[i]] = b[i];
+}
+
+__attribute__((noipa)) void
+f3 (int * __restrict__ a, const int * __restrict__ b, const int * __restrict__ c, int n)
+{
+  int i;
+#pragma GCC ivdep
+  for (i = 0; i < n; ++i)
+    if (b[i] > -2)
+      a[c[i]] = b[i];
+}
+
+__attribute__((noipa)) void
+f4 (int * __restrict__ a, const int * __restrict__ b, const long * __restrict__ c, int n)
+{
+  int i;
+#pragma GCC ivdep
+  for (i = 0; i < n; ++i)
+    if (b[i] > -2)
+      a[c[i]] = b[i];
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-8.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-8.c
new file mode 100644 (file)
index 0000000..0e28baf
--- /dev/null
@@ -0,0 +1,61 @@
+/* PR tree-optimization/88464 */
+/* { dg-do run { target { avx512f } } } */
+/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512" } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-pr88464-7.c"
+
+static void
+avx512f_test (void)
+{
+  long long a[1024], b[1024];
+  int c[1024], f[1024];
+  int d[1024];
+  long e[1024];
+  int i;
+  for (i = 0; i < 1024; i++)
+    {
+      asm volatile ("" : "+g" (i));
+      a[i] = -5;
+      b[i] = (i % 3) != 0 ? 2 * i : -5;
+      d[i] = (i % 3) != 0 ? 1023 - i : __INT_MAX__;
+    }
+  f1 (a, b, d, 1024);
+  for (i = 0; i < 1024; i++)
+    {
+      asm volatile ("" : "+g" (i));
+      if (a[i] != ((i % 3) != 0 ? (1023 - i) * 2 : -5))
+       abort ();
+      a[i] = -5;
+      b[i] = (i % 3) != 1 ? 3 * i : -5;
+      e[i] = (i % 3) != 1 ? 1023 - i : __LONG_MAX__;
+    }
+  f2 (a, b, e, 1024);
+  for (i = 0; i < 1024; i++)
+    {
+      asm volatile ("" : "+g" (i));
+      if (a[i] != ((i % 3) != 2 ? (1023 - i) * 3 : -5))
+       abort ();
+      c[i] = -5;
+      d[i] = (i % 3) != 2 ? 1023 - i : __INT_MAX__;
+      f[i] = (i % 3) != 2 ? 4 * i : -5;
+    }
+  f3 (c, f, d, 1024);
+  for (i = 0; i < 1024; i++)
+    {
+      asm volatile ("" : "+g" (i));
+      if (c[i] != ((i % 3) != 1 ? (1023 - i) * 4 : -5))
+       abort ();
+      c[i] = -5;
+      e[i] = (i % 3) != 0 ? 1023 - i : __INT_MAX__;
+      f[i] = (i % 3) != 0 ? 5 * i : -5;
+    }
+  f4 (c, f, e, 1024);
+  for (i = 0; i < 1024; i++)
+    {
+      asm volatile ("" : "+g" (i));
+      if (c[i] != ((i % 3) != 0 ? (1023 - i) * 5 : -5))
+       abort ();
+    }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-10.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-10.c
new file mode 100644 (file)
index 0000000..c5d4e5b
--- /dev/null
@@ -0,0 +1,20 @@
+/* PR tree-optimization/88464 */
+/* { dg-do run { target { avx512vl } } } */
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512" } */
+
+#define AVX512VL
+#define AVX512F_LEN 512
+#define AVX512F_LEN_HALF 256
+
+#include "avx512f-pr88464-6.c"
+
+static void
+test_256 (void)
+{
+  avx512f_test ();
+}
+
+static void
+test_128 (void)
+{
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c
new file mode 100644 (file)
index 0000000..9696008
--- /dev/null
@@ -0,0 +1,7 @@
+/* PR tree-optimization/88464 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+
+#include "avx512f-pr88464-5.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-12.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-12.c
new file mode 100644 (file)
index 0000000..130eddd
--- /dev/null
@@ -0,0 +1,20 @@
+/* PR tree-optimization/88464 */
+/* { dg-do run { target { avx512vl } } } */
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=skylake-avx512" } */
+
+#define AVX512VL
+#define AVX512F_LEN 512
+#define AVX512F_LEN_HALF 256
+
+#include "avx512f-pr88464-6.c"
+
+static void
+test_256 (void)
+{
+  avx512f_test ();
+}
+
+static void
+test_128 (void)
+{
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-13.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-13.c
new file mode 100644 (file)
index 0000000..50515c4
--- /dev/null
@@ -0,0 +1,7 @@
+/* PR tree-optimization/88464 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+
+#include "avx512f-pr88464-7.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-14.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-14.c
new file mode 100644 (file)
index 0000000..1a70024
--- /dev/null
@@ -0,0 +1,20 @@
+/* PR tree-optimization/88464 */
+/* { dg-do run { target { avx512vl } } } */
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512" } */
+
+#define AVX512VL
+#define AVX512F_LEN 512
+#define AVX512F_LEN_HALF 256
+
+#include "avx512f-pr88464-8.c"
+
+static void
+test_256 (void)
+{
+  avx512f_test ();
+}
+
+static void
+test_128 (void)
+{
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-15.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-15.c
new file mode 100644 (file)
index 0000000..914a8c3
--- /dev/null
@@ -0,0 +1,7 @@
+/* PR tree-optimization/88464 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -fno-vect-cost-model -mtune=skylake-avx512 -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+
+#include "avx512f-pr88464-7.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-16.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-16.c
new file mode 100644 (file)
index 0000000..6136fc5
--- /dev/null
@@ -0,0 +1,20 @@
+/* PR tree-optimization/88464 */
+/* { dg-do run { target { avx512vl } } } */
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -fno-vect-cost-model -mtune=skylake-avx512" } */
+
+#define AVX512VL
+#define AVX512F_LEN 512
+#define AVX512F_LEN_HALF 256
+
+#include "avx512f-pr88464-8.c"
+
+static void
+test_256 (void)
+{
+  avx512f_test ();
+}
+
+static void
+test_128 (void)
+{
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-5.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-5.c
new file mode 100644 (file)
index 0000000..c7556fe
--- /dev/null
@@ -0,0 +1,7 @@
+/* PR tree-optimization/88464 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+
+#include "avx512f-pr88464-3.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-6.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-6.c
new file mode 100644 (file)
index 0000000..79f9bfe
--- /dev/null
@@ -0,0 +1,20 @@
+/* PR tree-optimization/88464 */
+/* { dg-do run { target { avx512vl } } } */
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512" } */
+
+#define AVX512VL
+#define AVX512F_LEN 512
+#define AVX512F_LEN_HALF 256
+
+#include "avx512f-pr88464-4.c"
+
+static void
+test_256 (void)
+{
+  avx512f_test ();
+}
+
+static void
+test_128 (void)
+{
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-7.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-7.c
new file mode 100644 (file)
index 0000000..1fb423e
--- /dev/null
@@ -0,0 +1,7 @@
+/* PR tree-optimization/88464 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -fno-vect-cost-model -mtune=skylake-avx512 -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+
+#include "avx512f-pr88464-3.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-8.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-8.c
new file mode 100644 (file)
index 0000000..8c670b7
--- /dev/null
@@ -0,0 +1,20 @@
+/* PR tree-optimization/88464 */
+/* { dg-do run { target { avx512vl } } } */
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -fno-vect-cost-model -mtune=skylake-avx512" } */
+
+#define AVX512VL
+#define AVX512F_LEN 512
+#define AVX512F_LEN_HALF 256
+
+#include "avx512f-pr88464-4.c"
+
+static void
+test_256 (void)
+{
+  avx512f_test ();
+}
+
+static void
+test_128 (void)
+{
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c
new file mode 100644 (file)
index 0000000..3af568a
--- /dev/null
@@ -0,0 +1,7 @@
+/* PR tree-optimization/88464 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+
+#include "avx512f-pr88464-5.c"