re PR target/82460 (AVX512: choose between vpermi2d and vpermt2d to save mov instruct...
authorJakub Jelinek <jakub@redhat.com>
Tue, 24 Oct 2017 19:35:37 +0000 (21:35 +0200)
committerJakub Jelinek <jakub@gcc.gnu.org>
Tue, 24 Oct 2017 19:35:37 +0000 (21:35 +0200)
PR target/82460
* config/i386/sse.md (UNSPEC_VPERMI2, UNSPEC_VPERMI2_MASK): Remove.
(VPERMI2, VPERMI2I): New mode iterators.
(<avx512>_vpermi2var<mode>3_maskz): Remove 3 define_expand patterns.
(<avx512>_vpermi2var<mode>3<sd_maskz_name>): Remove 3 define_insn
patterns.
(<avx512>_vpermi2var<mode>3_mask): New define_expand using VPERMI2
mode iterator.  Remove 3 old define_insn patterns.
(*<avx512>_vpermi2var<mode>3_mask): 2 new define_insn patterns.
(<avx512>_vpermt2var<mode>3_maskz): Adjust 1 define_expand to use
VPERMI2 mode iterator, remove the other two expanders.
(<avx512>_vpermt2var<mode>3<sd_maskz_name>): Adjust 1 define_insn
to use VPERMI2 mode iterator, add another alternative for vpermi2*
instructions, remove the other two patterns.
(<avx512>_vpermt2var<mode>3_mask): Adjust 1 define_insn to use VPERMI2
mode iterator, remove the other two patterns.
* config/i386/i386.c (ix86_expand_vec_perm_vpermi2): Renamed to ...
(ix86_expand_vec_perm_vpermt2): ... this.  Swap mask and op0
arguments, use gen_*vpermt2* expanders instead of gen_*vpermi2*
and adjust argument order accordingly.
(ix86_expand_vec_perm): Adjust caller.
(expand_vec_perm_1): Likewise.
(expand_vec_perm_vpermi2_vpshub2): Rename to ...
(expand_vec_perm_vpermt2_vpshub2): ... this.
(ix86_expand_vec_perm_const_1): Adjust caller.
(ix86_vectorize_vec_perm_const_ok): Adjust comments.

* gcc.target/i386/pr82460-1.c: New test.
* gcc.target/i386/pr82460-2.c: New test.
* gcc.target/i386/avx512f-vpermt2pd-1.c: Adjust scan-assembler*
regexps to allow vpermt2* to vpermi2* replacement or vice versa
where possible.
* gcc.target/i386/avx512vl-vpermt2pd-1.c: Likewise.
* gcc.target/i386/avx512f-vpermt2d-1.c: Likewise.
* gcc.target/i386/vect-pack-trunc-2.c: Likewise.
* gcc.target/i386/avx512vl-vpermt2ps-1.c: Likewise.
* gcc.target/i386/avx512vl-vpermt2q-1.c: Likewise.
* gcc.target/i386/avx512f-vpermt2ps-1.c: Likewise.
* gcc.target/i386/avx512vl-vpermt2d-1.c: Likewise.
* gcc.target/i386/avx512bw-vpermt2w-1.c: Likewise.
* gcc.target/i386/avx512vbmi-vpermt2b-1.c: Likewise.
* gcc.target/i386/avx512f-vpermt2q-1.c: Likewise.

From-SVN: r254059

17 files changed:
gcc/ChangeLog
gcc/config/i386/i386.c
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/avx512bw-vpermt2w-1.c
gcc/testsuite/gcc.target/i386/avx512f-vpermt2d-1.c
gcc/testsuite/gcc.target/i386/avx512f-vpermt2pd-1.c
gcc/testsuite/gcc.target/i386/avx512f-vpermt2ps-1.c
gcc/testsuite/gcc.target/i386/avx512f-vpermt2q-1.c
gcc/testsuite/gcc.target/i386/avx512vbmi-vpermt2b-1.c
gcc/testsuite/gcc.target/i386/avx512vl-vpermt2d-1.c
gcc/testsuite/gcc.target/i386/avx512vl-vpermt2pd-1.c
gcc/testsuite/gcc.target/i386/avx512vl-vpermt2ps-1.c
gcc/testsuite/gcc.target/i386/avx512vl-vpermt2q-1.c
gcc/testsuite/gcc.target/i386/pr82460-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr82460-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/vect-pack-trunc-2.c

index b2864cf177bbc3f6bc83764c5d6f820ce4a0e490..2f750ac6da33982207f23dd03ff90348692d5c50 100644 (file)
@@ -1,5 +1,32 @@
 2017-10-24  Jakub Jelinek  <jakub@redhat.com>
 
+       PR target/82460
+       * config/i386/sse.md (UNSPEC_VPERMI2, UNSPEC_VPERMI2_MASK): Remove.
+       (VPERMI2, VPERMI2I): New mode iterators.
+       (<avx512>_vpermi2var<mode>3_maskz): Remove 3 define_expand patterns.
+       (<avx512>_vpermi2var<mode>3<sd_maskz_name>): Remove 3 define_insn
+       patterns.
+       (<avx512>_vpermi2var<mode>3_mask): New define_expand using VPERMI2
+       mode iterator.  Remove 3 old define_insn patterns.
+       (*<avx512>_vpermi2var<mode>3_mask): 2 new define_insn patterns.
+       (<avx512>_vpermt2var<mode>3_maskz): Adjust 1 define_expand to use
+       VPERMI2 mode iterator, remove the other two expanders.
+       (<avx512>_vpermt2var<mode>3<sd_maskz_name>): Adjust 1 define_insn
+       to use VPERMI2 mode iterator, add another alternative for vpermi2*
+       instructions, remove the other two patterns.
+       (<avx512>_vpermt2var<mode>3_mask): Adjust 1 define_insn to use VPERMI2
+       mode iterator, remove the other two patterns.
+       * config/i386/i386.c (ix86_expand_vec_perm_vpermi2): Renamed to ...
+       (ix86_expand_vec_perm_vpermt2): ... this.  Swap mask and op0
+       arguments, use gen_*vpermt2* expanders instead of gen_*vpermi2*
+       and adjust argument order accordingly.
+       (ix86_expand_vec_perm): Adjust caller.
+       (expand_vec_perm_1): Likewise.
+       (expand_vec_perm_vpermi2_vpshub2): Rename to ...
+       (expand_vec_perm_vpermt2_vpshub2): ... this.
+       (ix86_expand_vec_perm_const_1): Adjust caller.
+       (ix86_vectorize_vec_perm_const_ok): Adjust comments.
+
        PR target/82370
        * config/i386/sse.md (VIMAX_AVX2): Remove V4TImode.
        (VIMAX_AVX2_AVX512BW, VIMAX_AVX512VL): New mode iterators.
index d2188ad6086a0595025b42ebe524a3d314b8379e..367cadea3c1303409a46cea1af08aa7b11f2f9ef 100644 (file)
@@ -24049,10 +24049,10 @@ struct expand_vec_perm_d
 };
 
 static bool
-ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
+ix86_expand_vec_perm_vpermt2 (rtx target, rtx mask, rtx op0, rtx op1,
                              struct expand_vec_perm_d *d)
 {
-  /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
+  /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const
      expander, so args are either in d, or in op0, op1 etc.  */
   machine_mode mode = GET_MODE (d ? d->op0 : op0);
   machine_mode maskmode = mode;
@@ -24062,83 +24062,83 @@ ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
     {
     case E_V8HImode:
       if (TARGET_AVX512VL && TARGET_AVX512BW)
-       gen = gen_avx512vl_vpermi2varv8hi3;
+       gen = gen_avx512vl_vpermt2varv8hi3;
       break;
     case E_V16HImode:
       if (TARGET_AVX512VL && TARGET_AVX512BW)
-       gen = gen_avx512vl_vpermi2varv16hi3;
+       gen = gen_avx512vl_vpermt2varv16hi3;
       break;
     case E_V64QImode:
       if (TARGET_AVX512VBMI)
-       gen = gen_avx512bw_vpermi2varv64qi3;
+       gen = gen_avx512bw_vpermt2varv64qi3;
       break;
     case E_V32HImode:
       if (TARGET_AVX512BW)
-       gen = gen_avx512bw_vpermi2varv32hi3;
+       gen = gen_avx512bw_vpermt2varv32hi3;
       break;
     case E_V4SImode:
       if (TARGET_AVX512VL)
-       gen = gen_avx512vl_vpermi2varv4si3;
+       gen = gen_avx512vl_vpermt2varv4si3;
       break;
     case E_V8SImode:
       if (TARGET_AVX512VL)
-       gen = gen_avx512vl_vpermi2varv8si3;
+       gen = gen_avx512vl_vpermt2varv8si3;
       break;
     case E_V16SImode:
       if (TARGET_AVX512F)
-       gen = gen_avx512f_vpermi2varv16si3;
+       gen = gen_avx512f_vpermt2varv16si3;
       break;
     case E_V4SFmode:
       if (TARGET_AVX512VL)
        {
-         gen = gen_avx512vl_vpermi2varv4sf3;
+         gen = gen_avx512vl_vpermt2varv4sf3;
          maskmode = V4SImode;
        }
       break;
     case E_V8SFmode:
       if (TARGET_AVX512VL)
        {
-         gen = gen_avx512vl_vpermi2varv8sf3;
+         gen = gen_avx512vl_vpermt2varv8sf3;
          maskmode = V8SImode;
        }
       break;
     case E_V16SFmode:
       if (TARGET_AVX512F)
        {
-         gen = gen_avx512f_vpermi2varv16sf3;
+         gen = gen_avx512f_vpermt2varv16sf3;
          maskmode = V16SImode;
        }
       break;
     case E_V2DImode:
       if (TARGET_AVX512VL)
-       gen = gen_avx512vl_vpermi2varv2di3;
+       gen = gen_avx512vl_vpermt2varv2di3;
       break;
     case E_V4DImode:
       if (TARGET_AVX512VL)
-       gen = gen_avx512vl_vpermi2varv4di3;
+       gen = gen_avx512vl_vpermt2varv4di3;
       break;
     case E_V8DImode:
       if (TARGET_AVX512F)
-       gen = gen_avx512f_vpermi2varv8di3;
+       gen = gen_avx512f_vpermt2varv8di3;
       break;
     case E_V2DFmode:
       if (TARGET_AVX512VL)
        {
-         gen = gen_avx512vl_vpermi2varv2df3;
+         gen = gen_avx512vl_vpermt2varv2df3;
          maskmode = V2DImode;
        }
       break;
     case E_V4DFmode:
       if (TARGET_AVX512VL)
        {
-         gen = gen_avx512vl_vpermi2varv4df3;
+         gen = gen_avx512vl_vpermt2varv4df3;
          maskmode = V4DImode;
        }
       break;
     case E_V8DFmode:
       if (TARGET_AVX512F)
        {
-         gen = gen_avx512f_vpermi2varv8df3;
+         gen = gen_avx512f_vpermt2varv8df3;
          maskmode = V8DImode;
        }
       break;
@@ -24149,7 +24149,7 @@ ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
   if (gen == NULL)
     return false;
 
-  /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
+  /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const
      expander, so args are either in d, or in op0, op1 etc.  */
   if (d)
     {
@@ -24162,7 +24162,7 @@ ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
       mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
     }
 
-  emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
+  emit_insn (gen (target, force_reg (maskmode, mask), op0, op1));
   return true;
 }
 
@@ -24213,7 +24213,7 @@ ix86_expand_vec_perm (rtx operands[])
        }
     }
 
-  if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
+  if (ix86_expand_vec_perm_vpermt2 (target, mask, op0, op1, NULL))
     return;
 
   if (TARGET_AVX2)
@@ -45395,8 +45395,8 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
   if (ix86_expand_vec_one_operand_perm_avx512 (d))
     return true;
 
-  /* Try the AVX512F vpermi2 instructions.  */
-  if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
+  /* Try the AVX512F vpermt2/vpermi2 instructions.  */
+  if (ix86_expand_vec_perm_vpermt2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
     return true;
 
   /* See if we can get the same permutation in different vector integer
@@ -47055,9 +47055,9 @@ expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
 }
 
 /* Implement arbitrary permutations of two V64QImode operands
-   will 2 vpermi2w, 2 vpshufb and one vpor instruction.  */
+   with 2 vperm[it]2w, 2 vpshufb and one vpor instruction.  */
 static bool
-expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
+expand_vec_perm_vpermt2_vpshub2 (struct expand_vec_perm_d *d)
 {
   if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
     return false;
@@ -47302,7 +47302,7 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
   if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
     return true;
 
-  if (expand_vec_perm_vpermi2_vpshub2 (d))
+  if (expand_vec_perm_vpermt2_vpshub2 (d))
     return true;
 
   /* ??? Look for narrow permutations whose element orderings would
@@ -47450,17 +47450,17 @@ ix86_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
     case E_V8DImode:
     case E_V8DFmode:
       if (TARGET_AVX512F)
-       /* All implementable with a single vpermi2 insn.  */
+       /* All implementable with a single vperm[it]2 insn.  */
        return true;
       break;
     case E_V32HImode:
       if (TARGET_AVX512BW)
-       /* All implementable with a single vpermi2 insn.  */
+       /* All implementable with a single vperm[it]2 insn.  */
        return true;
       break;
     case E_V64QImode:
       if (TARGET_AVX512BW)
-       /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn.  */
+       /* Implementable with 2 vperm[it]2, 2 vpshufb and 1 or insn.  */
        return true;
       break;
     case E_V8SImode:
@@ -47468,7 +47468,7 @@ ix86_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
     case E_V4DFmode:
     case E_V4DImode:
       if (TARGET_AVX512VL)
-       /* All implementable with a single vpermi2 insn.  */
+       /* All implementable with a single vperm[it]2 insn.  */
        return true;
       break;
     case E_V16HImode:
index 4f9f2bd0a1cb1a8ec1343b398b30cc7d55805d16..fe3cb1791ad0c28711d94bf1a84b9c86b0d4d365 100644 (file)
@@ -83,9 +83,7 @@
   UNSPEC_VSIBADDR
 
   ;; For AVX512F support
-  UNSPEC_VPERMI2
   UNSPEC_VPERMT2
-  UNSPEC_VPERMI2_MASK
   UNSPEC_UNSIGNED_FIX_NOTRUNC
   UNSPEC_UNSIGNED_PCMP
   UNSPEC_TESTM
    (set_attr "prefix" "<mask_prefix>")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_expand "<avx512>_vpermi2var<mode>3_maskz"
-  [(match_operand:VI48F 0 "register_operand")
-   (match_operand:VI48F 1 "register_operand")
-   (match_operand:<sseintvecmode> 2 "register_operand")
-   (match_operand:VI48F 3 "nonimmediate_operand")
-   (match_operand:<avx512fmaskmode> 4 "register_operand")]
-  "TARGET_AVX512F"
-{
-  emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
-       operands[0], operands[1], operands[2], operands[3],
-       CONST0_RTX (<MODE>mode), operands[4]));
-  DONE;
-})
-
-(define_expand "<avx512>_vpermi2var<mode>3_maskz"
-  [(match_operand:VI1_AVX512VL 0 "register_operand")
-   (match_operand:VI1_AVX512VL 1 "register_operand")
-   (match_operand:<sseintvecmode> 2 "register_operand")
-   (match_operand:VI1_AVX512VL 3 "nonimmediate_operand")
-   (match_operand:<avx512fmaskmode> 4 "register_operand")]
-  "TARGET_AVX512VBMI"
-{
-  emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
-       operands[0], operands[1], operands[2], operands[3],
-       CONST0_RTX (<MODE>mode), operands[4]));
-  DONE;
-})
-
-(define_expand "<avx512>_vpermi2var<mode>3_maskz"
-  [(match_operand:VI2_AVX512VL 0 "register_operand")
-   (match_operand:VI2_AVX512VL 1 "register_operand")
-   (match_operand:<sseintvecmode> 2 "register_operand")
-   (match_operand:VI2_AVX512VL 3 "nonimmediate_operand")
-   (match_operand:<avx512fmaskmode> 4 "register_operand")]
-  "TARGET_AVX512BW"
-{
-  emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
-       operands[0], operands[1], operands[2], operands[3],
-       CONST0_RTX (<MODE>mode), operands[4]));
-  DONE;
-})
-
-(define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
-  [(set (match_operand:VI48F 0 "register_operand" "=v")
-       (unspec:VI48F
-         [(match_operand:VI48F 1 "register_operand" "v")
-          (match_operand:<sseintvecmode> 2 "register_operand" "0")
-          (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
-         UNSPEC_VPERMI2))]
+(define_mode_iterator VPERMI2
+  [V16SI V16SF V8DI V8DF
+   (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
+   (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
+   (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+   (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
+   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
+   (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
+   (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
+   (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
+
+(define_mode_iterator VPERMI2I
+  [V16SI V8DI
+   (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+   (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
+   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
+   (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
+   (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
+   (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
+
+(define_expand "<avx512>_vpermi2var<mode>3_mask"
+  [(set (match_operand:VPERMI2 0 "register_operand")
+       (vec_merge:VPERMI2
+         (unspec:VPERMI2
+           [(match_operand:<sseintvecmode> 2 "register_operand")
+            (match_operand:VPERMI2 1 "register_operand")
+            (match_operand:VPERMI2 3 "nonimmediate_operand")]
+           UNSPEC_VPERMT2)
+         (match_dup 5)
+         (match_operand:<avx512fmaskmode> 4 "register_operand")))]
   "TARGET_AVX512F"
-  "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
-  [(set_attr "type" "sselog")
-   (set_attr "prefix" "evex")
-   (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
-  [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
-       (unspec:VI1_AVX512VL
-         [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
-          (match_operand:<sseintvecmode> 2 "register_operand" "0")
-          (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
-         UNSPEC_VPERMI2))]
-  "TARGET_AVX512VBMI"
-  "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
-  [(set_attr "type" "sselog")
-   (set_attr "prefix" "evex")
-   (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
-  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
-       (unspec:VI2_AVX512VL
-         [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
-          (match_operand:<sseintvecmode> 2 "register_operand" "0")
-          (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
-         UNSPEC_VPERMI2))]
-  "TARGET_AVX512BW"
-  "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
-  [(set_attr "type" "sselog")
-   (set_attr "prefix" "evex")
-   (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "<avx512>_vpermi2var<mode>3_mask"
-  [(set (match_operand:VI48F 0 "register_operand" "=v")
-       (vec_merge:VI48F
-         (unspec:VI48F
-           [(match_operand:VI48F 1 "register_operand" "v")
-           (match_operand:<sseintvecmode> 2 "register_operand" "0")
-           (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
-           UNSPEC_VPERMI2_MASK)
-         (match_dup 0)
+  "operands[5] = gen_lowpart (<MODE>mode, operands[2]);")
+
+(define_insn "*<avx512>_vpermi2var<mode>3_mask"
+  [(set (match_operand:VPERMI2I 0 "register_operand" "=v")
+       (vec_merge:VPERMI2I
+         (unspec:VPERMI2I
+           [(match_operand:<sseintvecmode> 2 "register_operand" "0")
+            (match_operand:VPERMI2I 1 "register_operand" "v")
+            (match_operand:VPERMI2I 3 "nonimmediate_operand" "vm")]
+           UNSPEC_VPERMT2)
+         (match_dup 2)
          (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
   "TARGET_AVX512F"
   "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "<avx512>_vpermi2var<mode>3_mask"
-  [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
-       (vec_merge:VI1_AVX512VL
-         (unspec:VI1_AVX512VL
-           [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
-           (match_operand:<sseintvecmode> 2 "register_operand" "0")
-           (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
-           UNSPEC_VPERMI2_MASK)
-         (match_dup 0)
-         (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
-  "TARGET_AVX512VBMI"
-  "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
-  [(set_attr "type" "sselog")
-   (set_attr "prefix" "evex")
-   (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "<avx512>_vpermi2var<mode>3_mask"
-  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
-       (vec_merge:VI2_AVX512VL
-         (unspec:VI2_AVX512VL
-           [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
-           (match_operand:<sseintvecmode> 2 "register_operand" "0")
-           (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
-           UNSPEC_VPERMI2_MASK)
-         (match_dup 0)
+(define_insn "*<avx512>_vpermi2var<mode>3_mask"
+  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+       (vec_merge:VF_AVX512VL
+         (unspec:VF_AVX512VL
+           [(match_operand:<sseintvecmode> 2 "register_operand" "0")
+            (match_operand:VF_AVX512VL 1 "register_operand" "v")
+            (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "vm")]
+           UNSPEC_VPERMT2)
+         (subreg:VF_AVX512VL (match_dup 2) 0)
          (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
-  "TARGET_AVX512BW"
+  "TARGET_AVX512F"
   "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
-  [(match_operand:VI48F 0 "register_operand")
+  [(match_operand:VPERMI2 0 "register_operand")
    (match_operand:<sseintvecmode> 1 "register_operand")
-   (match_operand:VI48F 2 "register_operand")
-   (match_operand:VI48F 3 "nonimmediate_operand")
+   (match_operand:VPERMI2 2 "register_operand")
+   (match_operand:VPERMI2 3 "nonimmediate_operand")
    (match_operand:<avx512fmaskmode> 4 "register_operand")]
   "TARGET_AVX512F"
 {
   DONE;
 })
 
-(define_expand "<avx512>_vpermt2var<mode>3_maskz"
-  [(match_operand:VI1_AVX512VL 0 "register_operand")
-   (match_operand:<sseintvecmode> 1 "register_operand")
-   (match_operand:VI1_AVX512VL 2 "register_operand")
-   (match_operand:VI1_AVX512VL 3 "nonimmediate_operand")
-   (match_operand:<avx512fmaskmode> 4 "register_operand")]
-  "TARGET_AVX512VBMI"
-{
-  emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
-       operands[0], operands[1], operands[2], operands[3],
-       CONST0_RTX (<MODE>mode), operands[4]));
-  DONE;
-})
-
-(define_expand "<avx512>_vpermt2var<mode>3_maskz"
-  [(match_operand:VI2_AVX512VL 0 "register_operand")
-   (match_operand:<sseintvecmode> 1 "register_operand")
-   (match_operand:VI2_AVX512VL 2 "register_operand")
-   (match_operand:VI2_AVX512VL 3 "nonimmediate_operand")
-   (match_operand:<avx512fmaskmode> 4 "register_operand")]
-  "TARGET_AVX512BW"
-{
-  emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
-       operands[0], operands[1], operands[2], operands[3],
-       CONST0_RTX (<MODE>mode), operands[4]));
-  DONE;
-})
-
 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
-  [(set (match_operand:VI48F 0 "register_operand" "=v")
-       (unspec:VI48F
-         [(match_operand:<sseintvecmode> 1 "register_operand" "v")
-          (match_operand:VI48F 2 "register_operand" "0")
-          (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
+  [(set (match_operand:VPERMI2 0 "register_operand" "=v,v")
+       (unspec:VPERMI2
+         [(match_operand:<sseintvecmode> 1 "register_operand" "v,0")
+          (match_operand:VPERMI2 2 "register_operand" "0,v")
+          (match_operand:VPERMI2 3 "nonimmediate_operand" "vm,vm")]
          UNSPEC_VPERMT2))]
   "TARGET_AVX512F"
-  "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
-  [(set_attr "type" "sselog")
-   (set_attr "prefix" "evex")
-   (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
-  [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
-       (unspec:VI1_AVX512VL
-         [(match_operand:<sseintvecmode> 1 "register_operand" "v")
-          (match_operand:VI1_AVX512VL 2 "register_operand" "0")
-          (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
-         UNSPEC_VPERMT2))]
-  "TARGET_AVX512VBMI"
-  "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
-  [(set_attr "type" "sselog")
-   (set_attr "prefix" "evex")
-   (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
-  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
-       (unspec:VI2_AVX512VL
-         [(match_operand:<sseintvecmode> 1 "register_operand" "v")
-          (match_operand:VI2_AVX512VL 2 "register_operand" "0")
-          (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
-         UNSPEC_VPERMT2))]
-  "TARGET_AVX512BW"
-  "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
+  "@
+   vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}
+   vpermi2<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "<avx512>_vpermt2var<mode>3_mask"
-  [(set (match_operand:VI48F 0 "register_operand" "=v")
-       (vec_merge:VI48F
-         (unspec:VI48F
+  [(set (match_operand:VPERMI2 0 "register_operand" "=v")
+       (vec_merge:VPERMI2
+         (unspec:VPERMI2
            [(match_operand:<sseintvecmode> 1 "register_operand" "v")
-           (match_operand:VI48F 2 "register_operand" "0")
-           (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
+           (match_operand:VPERMI2 2 "register_operand" "0")
+           (match_operand:VPERMI2 3 "nonimmediate_operand" "vm")]
            UNSPEC_VPERMT2)
          (match_dup 2)
          (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "<avx512>_vpermt2var<mode>3_mask"
-  [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
-       (vec_merge:VI1_AVX512VL
-         (unspec:VI1_AVX512VL
-           [(match_operand:<sseintvecmode> 1 "register_operand" "v")
-           (match_operand:VI1_AVX512VL 2 "register_operand" "0")
-           (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
-           UNSPEC_VPERMT2)
-         (match_dup 2)
-         (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
-  "TARGET_AVX512VBMI"
-  "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
-  [(set_attr "type" "sselog")
-   (set_attr "prefix" "evex")
-   (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "<avx512>_vpermt2var<mode>3_mask"
-  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
-       (vec_merge:VI2_AVX512VL
-         (unspec:VI2_AVX512VL
-           [(match_operand:<sseintvecmode> 1 "register_operand" "v")
-           (match_operand:VI2_AVX512VL 2 "register_operand" "0")
-           (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
-           UNSPEC_VPERMT2)
-         (match_dup 2)
-         (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
-  "TARGET_AVX512BW"
-  "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
-  [(set_attr "type" "sselog")
-   (set_attr "prefix" "evex")
-   (set_attr "mode" "<sseinsnmode>")])
-
 (define_expand "avx_vperm2f128<mode>3"
   [(set (match_operand:AVX256MODE2P 0 "register_operand")
        (unspec:AVX256MODE2P
index a33711847637ca7cf8fe9df8ac421dc204ef2fdb..8dbf3b5604dcbaac7d6c2e1730f55046bdf71b9a 100644 (file)
@@ -1,5 +1,22 @@
 2017-10-24  Jakub Jelinek  <jakub@redhat.com>
 
+       PR target/82460
+       * gcc.target/i386/pr82460-1.c: New test.
+       * gcc.target/i386/pr82460-2.c: New test.
+       * gcc.target/i386/avx512f-vpermt2pd-1.c: Adjust scan-assembler*
+       regexps to allow vpermt2* to vpermi2* replacement or vice versa
+       where possible.
+       * gcc.target/i386/avx512vl-vpermt2pd-1.c: Likewise.
+       * gcc.target/i386/avx512f-vpermt2d-1.c: Likewise.
+       * gcc.target/i386/vect-pack-trunc-2.c: Likewise.
+       * gcc.target/i386/avx512vl-vpermt2ps-1.c: Likewise.
+       * gcc.target/i386/avx512vl-vpermt2q-1.c: Likewise.
+       * gcc.target/i386/avx512f-vpermt2ps-1.c: Likewise.
+       * gcc.target/i386/avx512vl-vpermt2d-1.c: Likewise.
+       * gcc.target/i386/avx512bw-vpermt2w-1.c: Likewise.
+       * gcc.target/i386/avx512vbmi-vpermt2b-1.c: Likewise.
+       * gcc.target/i386/avx512f-vpermt2q-1.c: Likewise.
+
        PR target/82370
        * gcc.target/i386/pr82370.c: New test.
 
index be8737ec785e426e2bd16ae6ffbcd301131bcce3..a734cb600ce676d5306aa07e383f4e3a31e629c7 100644 (file)
@@ -1,14 +1,14 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512bw -mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
-/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } *
-/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
-/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } *
+/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
index ceb1bd3bf0c10f7002a1973cd40b1b91fa4bb082..919cd217c98cb18d3b8e347c2e5004bb43a9690b 100644 (file)
@@ -1,8 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
-/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
index 2a4955b0f34a6573156a81ad4c8be78eb9c873ed..c021efb3192acdc29d08be18b6d29a94a05a2660 100644 (file)
@@ -1,8 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
 /* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
index dadc6d70530e5c5030f6cd28d3fa2fd6a8e63939..ffe177bf32099f2a9321540d02767a2beefaabea 100644 (file)
@@ -1,8 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
 /* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
index 9c6e989b9ddf3c2641c78ef6068a512e598852ac..74bb4ed037cf00ea518239d248a13572a372052b 100644 (file)
@@ -1,8 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
-/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
index f1c31cc56b015cfd68e33fb0ef488cb98d1ac750..24a0b9e3fce37c80ec43db0727ec91944098e6b0 100644 (file)
@@ -1,14 +1,14 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512vbmi -mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%zmm\[0-9\]+" 3 } } */
-/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%ymm\[0-9\]+" 3 } } *
-/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%xmm\[0-9\]+" 3 } } */
-/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%zmm\[0-9\]+" 3 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%ymm\[0-9\]+" 3 } } *
+/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%xmm\[0-9\]+" 3 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */
 
 #include <immintrin.h>
 
index 3a6de905cb6fd910f9b3fcbf654021d85d4040ea..218650c6cc486599e8cc771198b33f2787996be1 100644 (file)
@@ -1,11 +1,11 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
-/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
-/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
index 5dd0734bca4e70b36c4c0d464015b23aceabb5a9..64bd30e40c334c5e9401829121e6b80a0d6fb881 100644 (file)
@@ -1,11 +1,11 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
-/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2pd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2pd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
 /* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2pd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2pd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
index 0d7e37bb548e022094347f973a2f3387980adffa..7af2dea6f9a44a52f5699ff87e630ce9763caf7f 100644 (file)
@@ -1,11 +1,11 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
-/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
 /* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
index 475aa6dbd0497a6b4a0d3c36981a847a7def995d..0cbd8b5b2a3b3877595d4a0a799c0e71a6e33269 100644 (file)
@@ -1,11 +1,11 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
-/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
-/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
diff --git a/gcc/testsuite/gcc.target/i386/pr82460-1.c b/gcc/testsuite/gcc.target/i386/pr82460-1.c
new file mode 100644 (file)
index 0000000..6529c4a
--- /dev/null
@@ -0,0 +1,30 @@
+/* PR target/82460 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vbmi" } */
+/* { dg-final { scan-assembler-not {\mvmovd} } } */
+
+#include <x86intrin.h>
+
+__m512i
+f1 (__m512i x, __m512i y, char *z)
+{
+  return _mm512_permutex2var_epi32 (y, x, _mm512_loadu_si512 (z));
+}
+
+__m512i
+f2 (__m512i x, __m512i y, char *z)
+{
+  return _mm512_permutex2var_epi32 (x, y, _mm512_loadu_si512 (z));
+}
+
+__m512i
+f3 (__m512i x, __m512i y, __m512i z)
+{
+  return _mm512_permutex2var_epi8 (y, x, z);
+}
+
+__m512i
+f4 (__m512i x, __m512i y, __m512i z)
+{
+  return _mm512_permutex2var_epi8 (x, y, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr82460-2.c b/gcc/testsuite/gcc.target/i386/pr82460-2.c
new file mode 100644 (file)
index 0000000..4d96521
--- /dev/null
@@ -0,0 +1,17 @@
+/* PR target/82460 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -mavx512vbmi -mno-prefer-avx256" } */
+/* We want to reuse the permutation mask in the loop, so use vpermt2b rather
+   than vpermi2b.  */
+/* { dg-final { scan-assembler-not {\mvpermi2b\M} } } */
+/* { dg-final { scan-assembler {\mvpermt2b\M} } } */
+
+void
+foo (unsigned char *__restrict__ x, const unsigned short *__restrict__ y,
+     unsigned long z)
+{
+  unsigned char *w = x + z;
+  do
+    *x++ = *y++ >> 8;
+  while (x < w);
+}
index f3d899c1134477908c0573dbf746378673b6214b..3503deaa9d9b4611609cbafe54a42381b06eb6ec 100644 (file)
@@ -25,4 +25,4 @@ avx512bw_test ()
       abort ();
 }
 
-/* { dg-final { scan-assembler-times "vpermi2w\[ \\t\]+\[^\n\]*%zmm" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[it]2w\[ \\t\]+\[^\n\]*%zmm" 1 } } */