From: Jakub Jelinek Date: Tue, 24 Oct 2017 19:35:37 +0000 (+0200) Subject: re PR target/82460 (AVX512: choose between vpermi2d and vpermt2d to save mov instruct... X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=6ab5a6f30f2a3a9db37604195ff4b802779f83bc;p=gcc.git re PR target/82460 (AVX512: choose between vpermi2d and vpermt2d to save mov instructions. Also, fails to optimize away shifts before shuffle) PR target/82460 * config/i386/sse.md (UNSPEC_VPERMI2, UNSPEC_VPERMI2_MASK): Remove. (VPERMI2, VPERMI2I): New mode iterators. (_vpermi2var3_maskz): Remove 3 define_expand patterns. (_vpermi2var3): Remove 3 define_insn patterns. (_vpermi2var3_mask): New define_expand using VPERMI2 mode iterator. Remove 3 old define_insn patterns. (*_vpermi2var3_mask): 2 new define_insn patterns. (_vpermt2var3_maskz): Adjust 1 define_expand to use VPERMI2 mode iterator, remove the other two expanders. (_vpermt2var3): Adjust 1 define_insn to use VPERMI2 mode iterator, add another alternative for vpermi2* instructions, remove the other two patterns. (_vpermt2var3_mask): Adjust 1 define_insn to use VPERMI2 mode iterator, remove the other two patterns. * config/i386/i386.c (ix86_expand_vec_perm_vpermi2): Renamed to ... (ix86_expand_vec_perm_vpermt2): ... this. Swap mask and op0 arguments, use gen_*vpermt2* expanders instead of gen_*vpermi2* and adjust argument order accordingly. (ix86_expand_vec_perm): Adjust caller. (expand_vec_perm_1): Likewise. (expand_vec_perm_vpermi2_vpshub2): Rename to ... (expand_vec_perm_vpermt2_vpshub2): ... this. (ix86_expand_vec_perm_const_1): Adjust caller. (ix86_vectorize_vec_perm_const_ok): Adjust comments. * gcc.target/i386/pr82460-1.c: New test. * gcc.target/i386/pr82460-2.c: New test. * gcc.target/i386/avx512f-vpermt2pd-1.c: Adjust scan-assembler* regexps to allow vpermt2* to vpermi2* replacement or vice versa where possible. * gcc.target/i386/avx512vl-vpermt2pd-1.c: Likewise. * gcc.target/i386/avx512f-vpermt2d-1.c: Likewise. * gcc.target/i386/vect-pack-trunc-2.c: Likewise. * gcc.target/i386/avx512vl-vpermt2ps-1.c: Likewise. * gcc.target/i386/avx512vl-vpermt2q-1.c: Likewise. * gcc.target/i386/avx512f-vpermt2ps-1.c: Likewise. * gcc.target/i386/avx512vl-vpermt2d-1.c: Likewise. * gcc.target/i386/avx512bw-vpermt2w-1.c: Likewise. * gcc.target/i386/avx512vbmi-vpermt2b-1.c: Likewise. * gcc.target/i386/avx512f-vpermt2q-1.c: Likewise. From-SVN: r254059 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b2864cf177b..2f750ac6da3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,32 @@ 2017-10-24 Jakub Jelinek + PR target/82460 + * config/i386/sse.md (UNSPEC_VPERMI2, UNSPEC_VPERMI2_MASK): Remove. + (VPERMI2, VPERMI2I): New mode iterators. + (_vpermi2var3_maskz): Remove 3 define_expand patterns. + (_vpermi2var3): Remove 3 define_insn + patterns. + (_vpermi2var3_mask): New define_expand using VPERMI2 + mode iterator. Remove 3 old define_insn patterns. + (*_vpermi2var3_mask): 2 new define_insn patterns. + (_vpermt2var3_maskz): Adjust 1 define_expand to use + VPERMI2 mode iterator, remove the other two expanders. + (_vpermt2var3): Adjust 1 define_insn + to use VPERMI2 mode iterator, add another alternative for vpermi2* + instructions, remove the other two patterns. + (_vpermt2var3_mask): Adjust 1 define_insn to use VPERMI2 + mode iterator, remove the other two patterns. + * config/i386/i386.c (ix86_expand_vec_perm_vpermi2): Renamed to ... + (ix86_expand_vec_perm_vpermt2): ... this. Swap mask and op0 + arguments, use gen_*vpermt2* expanders instead of gen_*vpermi2* + and adjust argument order accordingly. + (ix86_expand_vec_perm): Adjust caller. + (expand_vec_perm_1): Likewise. + (expand_vec_perm_vpermi2_vpshub2): Rename to ... + (expand_vec_perm_vpermt2_vpshub2): ... this. + (ix86_expand_vec_perm_const_1): Adjust caller. + (ix86_vectorize_vec_perm_const_ok): Adjust comments. + PR target/82370 * config/i386/sse.md (VIMAX_AVX2): Remove V4TImode. (VIMAX_AVX2_AVX512BW, VIMAX_AVX512VL): New mode iterators. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index d2188ad6086..367cadea3c1 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -24049,10 +24049,10 @@ struct expand_vec_perm_d }; static bool -ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1, +ix86_expand_vec_perm_vpermt2 (rtx target, rtx mask, rtx op0, rtx op1, struct expand_vec_perm_d *d) { - /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const + /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const expander, so args are either in d, or in op0, op1 etc. */ machine_mode mode = GET_MODE (d ? d->op0 : op0); machine_mode maskmode = mode; @@ -24062,83 +24062,83 @@ ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1, { case E_V8HImode: if (TARGET_AVX512VL && TARGET_AVX512BW) - gen = gen_avx512vl_vpermi2varv8hi3; + gen = gen_avx512vl_vpermt2varv8hi3; break; case E_V16HImode: if (TARGET_AVX512VL && TARGET_AVX512BW) - gen = gen_avx512vl_vpermi2varv16hi3; + gen = gen_avx512vl_vpermt2varv16hi3; break; case E_V64QImode: if (TARGET_AVX512VBMI) - gen = gen_avx512bw_vpermi2varv64qi3; + gen = gen_avx512bw_vpermt2varv64qi3; break; case E_V32HImode: if (TARGET_AVX512BW) - gen = gen_avx512bw_vpermi2varv32hi3; + gen = gen_avx512bw_vpermt2varv32hi3; break; case E_V4SImode: if (TARGET_AVX512VL) - gen = gen_avx512vl_vpermi2varv4si3; + gen = gen_avx512vl_vpermt2varv4si3; break; case E_V8SImode: if (TARGET_AVX512VL) - gen = gen_avx512vl_vpermi2varv8si3; + gen = gen_avx512vl_vpermt2varv8si3; break; case E_V16SImode: if (TARGET_AVX512F) - gen = gen_avx512f_vpermi2varv16si3; + gen = gen_avx512f_vpermt2varv16si3; break; case E_V4SFmode: if (TARGET_AVX512VL) { - gen = gen_avx512vl_vpermi2varv4sf3; + gen = gen_avx512vl_vpermt2varv4sf3; maskmode = V4SImode; } break; case E_V8SFmode: if (TARGET_AVX512VL) { - gen = gen_avx512vl_vpermi2varv8sf3; + gen = gen_avx512vl_vpermt2varv8sf3; maskmode = V8SImode; } break; case E_V16SFmode: if (TARGET_AVX512F) { - gen = gen_avx512f_vpermi2varv16sf3; + gen = gen_avx512f_vpermt2varv16sf3; maskmode = V16SImode; } break; case E_V2DImode: if (TARGET_AVX512VL) - gen = gen_avx512vl_vpermi2varv2di3; + gen = gen_avx512vl_vpermt2varv2di3; break; case E_V4DImode: if (TARGET_AVX512VL) - gen = gen_avx512vl_vpermi2varv4di3; + gen = gen_avx512vl_vpermt2varv4di3; break; case E_V8DImode: if (TARGET_AVX512F) - gen = gen_avx512f_vpermi2varv8di3; + gen = gen_avx512f_vpermt2varv8di3; break; case E_V2DFmode: if (TARGET_AVX512VL) { - gen = gen_avx512vl_vpermi2varv2df3; + gen = gen_avx512vl_vpermt2varv2df3; maskmode = V2DImode; } break; case E_V4DFmode: if (TARGET_AVX512VL) { - gen = gen_avx512vl_vpermi2varv4df3; + gen = gen_avx512vl_vpermt2varv4df3; maskmode = V4DImode; } break; case E_V8DFmode: if (TARGET_AVX512F) { - gen = gen_avx512f_vpermi2varv8df3; + gen = gen_avx512f_vpermt2varv8df3; maskmode = V8DImode; } break; @@ -24149,7 +24149,7 @@ ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1, if (gen == NULL) return false; - /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const + /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const expander, so args are either in d, or in op0, op1 etc. */ if (d) { @@ -24162,7 +24162,7 @@ ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1, mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec)); } - emit_insn (gen (target, op0, force_reg (maskmode, mask), op1)); + emit_insn (gen (target, force_reg (maskmode, mask), op0, op1)); return true; } @@ -24213,7 +24213,7 @@ ix86_expand_vec_perm (rtx operands[]) } } - if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL)) + if (ix86_expand_vec_perm_vpermt2 (target, mask, op0, op1, NULL)) return; if (TARGET_AVX2) @@ -45395,8 +45395,8 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) if (ix86_expand_vec_one_operand_perm_avx512 (d)) return true; - /* Try the AVX512F vpermi2 instructions. */ - if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d)) + /* Try the AVX512F vpermt2/vpermi2 instructions. */ + if (ix86_expand_vec_perm_vpermt2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d)) return true; /* See if we can get the same permutation in different vector integer @@ -47055,9 +47055,9 @@ expand_vec_perm_broadcast (struct expand_vec_perm_d *d) } /* Implement arbitrary permutations of two V64QImode operands - will 2 vpermi2w, 2 vpshufb and one vpor instruction. */ + with 2 vperm[it]2w, 2 vpshufb and one vpor instruction. */ static bool -expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d) +expand_vec_perm_vpermt2_vpshub2 (struct expand_vec_perm_d *d) { if (!TARGET_AVX512BW || !(d->vmode == V64QImode)) return false; @@ -47302,7 +47302,7 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) if (expand_vec_perm_vpshufb2_vpermq_even_odd (d)) return true; - if (expand_vec_perm_vpermi2_vpshub2 (d)) + if (expand_vec_perm_vpermt2_vpshub2 (d)) return true; /* ??? Look for narrow permutations whose element orderings would @@ -47450,17 +47450,17 @@ ix86_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) case E_V8DImode: case E_V8DFmode: if (TARGET_AVX512F) - /* All implementable with a single vpermi2 insn. */ + /* All implementable with a single vperm[it]2 insn. */ return true; break; case E_V32HImode: if (TARGET_AVX512BW) - /* All implementable with a single vpermi2 insn. */ + /* All implementable with a single vperm[it]2 insn. */ return true; break; case E_V64QImode: if (TARGET_AVX512BW) - /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */ + /* Implementable with 2 vperm[it]2, 2 vpshufb and 1 or insn. */ return true; break; case E_V8SImode: @@ -47468,7 +47468,7 @@ ix86_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) case E_V4DFmode: case E_V4DImode: if (TARGET_AVX512VL) - /* All implementable with a single vpermi2 insn. */ + /* All implementable with a single vperm[it]2 insn. */ return true; break; case E_V16HImode: diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 4f9f2bd0a1c..fe3cb1791ad 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -83,9 +83,7 @@ UNSPEC_VSIBADDR ;; For AVX512F support - UNSPEC_VPERMI2 UNSPEC_VPERMT2 - UNSPEC_VPERMI2_MASK UNSPEC_UNSIGNED_FIX_NOTRUNC UNSPEC_UNSIGNED_PCMP UNSPEC_TESTM @@ -18085,96 +18083,48 @@ (set_attr "prefix" "") (set_attr "mode" "")]) -(define_expand "_vpermi2var3_maskz" - [(match_operand:VI48F 0 "register_operand") - (match_operand:VI48F 1 "register_operand") - (match_operand: 2 "register_operand") - (match_operand:VI48F 3 "nonimmediate_operand") - (match_operand: 4 "register_operand")] - "TARGET_AVX512F" -{ - emit_insn (gen__vpermi2var3_maskz_1 ( - operands[0], operands[1], operands[2], operands[3], - CONST0_RTX (mode), operands[4])); - DONE; -}) - -(define_expand "_vpermi2var3_maskz" - [(match_operand:VI1_AVX512VL 0 "register_operand") - (match_operand:VI1_AVX512VL 1 "register_operand") - (match_operand: 2 "register_operand") - (match_operand:VI1_AVX512VL 3 "nonimmediate_operand") - (match_operand: 4 "register_operand")] - "TARGET_AVX512VBMI" -{ - emit_insn (gen__vpermi2var3_maskz_1 ( - operands[0], operands[1], operands[2], operands[3], - CONST0_RTX (mode), operands[4])); - DONE; -}) - -(define_expand "_vpermi2var3_maskz" - [(match_operand:VI2_AVX512VL 0 "register_operand") - (match_operand:VI2_AVX512VL 1 "register_operand") - (match_operand: 2 "register_operand") - (match_operand:VI2_AVX512VL 3 "nonimmediate_operand") - (match_operand: 4 "register_operand")] - "TARGET_AVX512BW" -{ - emit_insn (gen__vpermi2var3_maskz_1 ( - operands[0], operands[1], operands[2], operands[3], - CONST0_RTX (mode), operands[4])); - DONE; -}) - -(define_insn "_vpermi2var3" - [(set (match_operand:VI48F 0 "register_operand" "=v") - (unspec:VI48F - [(match_operand:VI48F 1 "register_operand" "v") - (match_operand: 2 "register_operand" "0") - (match_operand:VI48F 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMI2))] +(define_mode_iterator VPERMI2 + [V16SI V16SF V8DI V8DF + (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL") + (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL") + (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") + (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL") + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL") + (V8HI "TARGET_AVX512BW && TARGET_AVX512VL") + (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL") + (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")]) + +(define_mode_iterator VPERMI2I + [V16SI V8DI + (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") + (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL") + (V8HI "TARGET_AVX512BW && TARGET_AVX512VL") + (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL") + (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")]) + +(define_expand "_vpermi2var3_mask" + [(set (match_operand:VPERMI2 0 "register_operand") + (vec_merge:VPERMI2 + (unspec:VPERMI2 + [(match_operand: 2 "register_operand") + (match_operand:VPERMI2 1 "register_operand") + (match_operand:VPERMI2 3 "nonimmediate_operand")] + UNSPEC_VPERMT2) + (match_dup 5) + (match_operand: 4 "register_operand")))] "TARGET_AVX512F" - "vpermi2\t{%3, %1, %0|%0, %1, %3}" - [(set_attr "type" "sselog") - (set_attr "prefix" "evex") - (set_attr "mode" "")]) - -(define_insn "_vpermi2var3" - [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v") - (unspec:VI1_AVX512VL - [(match_operand:VI1_AVX512VL 1 "register_operand" "v") - (match_operand: 2 "register_operand" "0") - (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMI2))] - "TARGET_AVX512VBMI" - "vpermi2\t{%3, %1, %0|%0, %1, %3}" - [(set_attr "type" "sselog") - (set_attr "prefix" "evex") - (set_attr "mode" "")]) - -(define_insn "_vpermi2var3" - [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") - (unspec:VI2_AVX512VL - [(match_operand:VI2_AVX512VL 1 "register_operand" "v") - (match_operand: 2 "register_operand" "0") - (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMI2))] - "TARGET_AVX512BW" - "vpermi2\t{%3, %1, %0|%0, %1, %3}" - [(set_attr "type" "sselog") - (set_attr "prefix" "evex") - (set_attr "mode" "")]) - -(define_insn "_vpermi2var3_mask" - [(set (match_operand:VI48F 0 "register_operand" "=v") - (vec_merge:VI48F - (unspec:VI48F - [(match_operand:VI48F 1 "register_operand" "v") - (match_operand: 2 "register_operand" "0") - (match_operand:VI48F 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMI2_MASK) - (match_dup 0) + "operands[5] = gen_lowpart (mode, operands[2]);") + +(define_insn "*_vpermi2var3_mask" + [(set (match_operand:VPERMI2I 0 "register_operand" "=v") + (vec_merge:VPERMI2I + (unspec:VPERMI2I + [(match_operand: 2 "register_operand" "0") + (match_operand:VPERMI2I 1 "register_operand" "v") + (match_operand:VPERMI2I 3 "nonimmediate_operand" "vm")] + UNSPEC_VPERMT2) + (match_dup 2) (match_operand: 4 "register_operand" "Yk")))] "TARGET_AVX512F" "vpermi2\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" @@ -18182,43 +18132,27 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) -(define_insn "_vpermi2var3_mask" - [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v") - (vec_merge:VI1_AVX512VL - (unspec:VI1_AVX512VL - [(match_operand:VI1_AVX512VL 1 "register_operand" "v") - (match_operand: 2 "register_operand" "0") - (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMI2_MASK) - (match_dup 0) - (match_operand: 4 "register_operand" "Yk")))] - "TARGET_AVX512VBMI" - "vpermi2\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" - [(set_attr "type" "sselog") - (set_attr "prefix" "evex") - (set_attr "mode" "")]) - -(define_insn "_vpermi2var3_mask" - [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") - (vec_merge:VI2_AVX512VL - (unspec:VI2_AVX512VL - [(match_operand:VI2_AVX512VL 1 "register_operand" "v") - (match_operand: 2 "register_operand" "0") - (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMI2_MASK) - (match_dup 0) +(define_insn "*_vpermi2var3_mask" + [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") + (vec_merge:VF_AVX512VL + (unspec:VF_AVX512VL + [(match_operand: 2 "register_operand" "0") + (match_operand:VF_AVX512VL 1 "register_operand" "v") + (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "vm")] + UNSPEC_VPERMT2) + (subreg:VF_AVX512VL (match_dup 2) 0) (match_operand: 4 "register_operand" "Yk")))] - "TARGET_AVX512BW" + "TARGET_AVX512F" "vpermi2\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") (set_attr "mode" "")]) (define_expand "_vpermt2var3_maskz" - [(match_operand:VI48F 0 "register_operand") + [(match_operand:VPERMI2 0 "register_operand") (match_operand: 1 "register_operand") - (match_operand:VI48F 2 "register_operand") - (match_operand:VI48F 3 "nonimmediate_operand") + (match_operand:VPERMI2 2 "register_operand") + (match_operand:VPERMI2 3 "nonimmediate_operand") (match_operand: 4 "register_operand")] "TARGET_AVX512F" { @@ -18228,80 +18162,28 @@ DONE; }) -(define_expand "_vpermt2var3_maskz" - [(match_operand:VI1_AVX512VL 0 "register_operand") - (match_operand: 1 "register_operand") - (match_operand:VI1_AVX512VL 2 "register_operand") - (match_operand:VI1_AVX512VL 3 "nonimmediate_operand") - (match_operand: 4 "register_operand")] - "TARGET_AVX512VBMI" -{ - emit_insn (gen__vpermt2var3_maskz_1 ( - operands[0], operands[1], operands[2], operands[3], - CONST0_RTX (mode), operands[4])); - DONE; -}) - -(define_expand "_vpermt2var3_maskz" - [(match_operand:VI2_AVX512VL 0 "register_operand") - (match_operand: 1 "register_operand") - (match_operand:VI2_AVX512VL 2 "register_operand") - (match_operand:VI2_AVX512VL 3 "nonimmediate_operand") - (match_operand: 4 "register_operand")] - "TARGET_AVX512BW" -{ - emit_insn (gen__vpermt2var3_maskz_1 ( - operands[0], operands[1], operands[2], operands[3], - CONST0_RTX (mode), operands[4])); - DONE; -}) - (define_insn "_vpermt2var3" - [(set (match_operand:VI48F 0 "register_operand" "=v") - (unspec:VI48F - [(match_operand: 1 "register_operand" "v") - (match_operand:VI48F 2 "register_operand" "0") - (match_operand:VI48F 3 "nonimmediate_operand" "vm")] + [(set (match_operand:VPERMI2 0 "register_operand" "=v,v") + (unspec:VPERMI2 + [(match_operand: 1 "register_operand" "v,0") + (match_operand:VPERMI2 2 "register_operand" "0,v") + (match_operand:VPERMI2 3 "nonimmediate_operand" "vm,vm")] UNSPEC_VPERMT2))] "TARGET_AVX512F" - "vpermt2\t{%3, %1, %0|%0, %1, %3}" - [(set_attr "type" "sselog") - (set_attr "prefix" "evex") - (set_attr "mode" "")]) - -(define_insn "_vpermt2var3" - [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v") - (unspec:VI1_AVX512VL - [(match_operand: 1 "register_operand" "v") - (match_operand:VI1_AVX512VL 2 "register_operand" "0") - (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMT2))] - "TARGET_AVX512VBMI" - "vpermt2\t{%3, %1, %0|%0, %1, %3}" - [(set_attr "type" "sselog") - (set_attr "prefix" "evex") - (set_attr "mode" "")]) - -(define_insn "_vpermt2var3" - [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") - (unspec:VI2_AVX512VL - [(match_operand: 1 "register_operand" "v") - (match_operand:VI2_AVX512VL 2 "register_operand" "0") - (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMT2))] - "TARGET_AVX512BW" - "vpermt2\t{%3, %1, %0|%0, %1, %3}" + "@ + vpermt2\t{%3, %1, %0|%0, %1, %3} + vpermi2\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") (set_attr "mode" "")]) (define_insn "_vpermt2var3_mask" - [(set (match_operand:VI48F 0 "register_operand" "=v") - (vec_merge:VI48F - (unspec:VI48F + [(set (match_operand:VPERMI2 0 "register_operand" "=v") + (vec_merge:VPERMI2 + (unspec:VPERMI2 [(match_operand: 1 "register_operand" "v") - (match_operand:VI48F 2 "register_operand" "0") - (match_operand:VI48F 3 "nonimmediate_operand" "vm")] + (match_operand:VPERMI2 2 "register_operand" "0") + (match_operand:VPERMI2 3 "nonimmediate_operand" "vm")] UNSPEC_VPERMT2) (match_dup 2) (match_operand: 4 "register_operand" "Yk")))] @@ -18311,38 +18193,6 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) -(define_insn "_vpermt2var3_mask" - [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v") - (vec_merge:VI1_AVX512VL - (unspec:VI1_AVX512VL - [(match_operand: 1 "register_operand" "v") - (match_operand:VI1_AVX512VL 2 "register_operand" "0") - (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMT2) - (match_dup 2) - (match_operand: 4 "register_operand" "Yk")))] - "TARGET_AVX512VBMI" - "vpermt2\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" - [(set_attr "type" "sselog") - (set_attr "prefix" "evex") - (set_attr "mode" "")]) - -(define_insn "_vpermt2var3_mask" - [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") - (vec_merge:VI2_AVX512VL - (unspec:VI2_AVX512VL - [(match_operand: 1 "register_operand" "v") - (match_operand:VI2_AVX512VL 2 "register_operand" "0") - (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMT2) - (match_dup 2) - (match_operand: 4 "register_operand" "Yk")))] - "TARGET_AVX512BW" - "vpermt2\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" - [(set_attr "type" "sselog") - (set_attr "prefix" "evex") - (set_attr "mode" "")]) - (define_expand "avx_vperm2f1283" [(set (match_operand:AVX256MODE2P 0 "register_operand") (unspec:AVX256MODE2P diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index a3371184763..8dbf3b5604d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,22 @@ 2017-10-24 Jakub Jelinek + PR target/82460 + * gcc.target/i386/pr82460-1.c: New test. + * gcc.target/i386/pr82460-2.c: New test. + * gcc.target/i386/avx512f-vpermt2pd-1.c: Adjust scan-assembler* + regexps to allow vpermt2* to vpermi2* replacement or vice versa + where possible. + * gcc.target/i386/avx512vl-vpermt2pd-1.c: Likewise. + * gcc.target/i386/avx512f-vpermt2d-1.c: Likewise. + * gcc.target/i386/vect-pack-trunc-2.c: Likewise. + * gcc.target/i386/avx512vl-vpermt2ps-1.c: Likewise. + * gcc.target/i386/avx512vl-vpermt2q-1.c: Likewise. + * gcc.target/i386/avx512f-vpermt2ps-1.c: Likewise. + * gcc.target/i386/avx512vl-vpermt2d-1.c: Likewise. + * gcc.target/i386/avx512bw-vpermt2w-1.c: Likewise. + * gcc.target/i386/avx512vbmi-vpermt2b-1.c: Likewise. + * gcc.target/i386/avx512f-vpermt2q-1.c: Likewise. + PR target/82370 * gcc.target/i386/pr82370.c: New test. diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpermt2w-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpermt2w-1.c index be8737ec785..a734cb600ce 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-vpermt2w-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpermt2w-1.c @@ -1,14 +1,14 @@ /* { dg-do compile } */ /* { dg-options "-mavx512bw -mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } * -/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } * +/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermt2d-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermt2d-1.c index ceb1bd3bf0c..919cd217c98 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpermt2d-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermt2d-1.c @@ -1,8 +1,8 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ -/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermt2pd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermt2pd-1.c index 2a4955b0f34..c021efb3192 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpermt2pd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermt2pd-1.c @@ -1,8 +1,8 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ -/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermt2ps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermt2ps-1.c index dadc6d70530..ffe177bf320 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpermt2ps-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermt2ps-1.c @@ -1,8 +1,8 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ -/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermt2q-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermt2q-1.c index 9c6e989b9dd..74bb4ed037c 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpermt2q-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermt2q-1.c @@ -1,8 +1,8 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ -/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512vbmi-vpermt2b-1.c b/gcc/testsuite/gcc.target/i386/avx512vbmi-vpermt2b-1.c index f1c31cc56b0..24a0b9e3fce 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vbmi-vpermt2b-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vbmi-vpermt2b-1.c @@ -1,14 +1,14 @@ /* { dg-do compile } */ /* { dg-options "-mavx512vbmi -mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%zmm\[0-9\]+" 3 } } */ -/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%ymm\[0-9\]+" 3 } } * -/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%xmm\[0-9\]+" 3 } } */ -/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%zmm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%ymm\[0-9\]+" 3 } } * +/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%xmm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2d-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2d-1.c index 3a6de905cb6..218650c6cc4 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2d-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2d-1.c @@ -1,11 +1,11 @@ /* { dg-do compile } */ /* { dg-options "-mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2pd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2pd-1.c index 5dd0734bca4..64bd30e40c3 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2pd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2pd-1.c @@ -1,11 +1,11 @@ /* { dg-do compile } */ /* { dg-options "-mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2pd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2pd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2pd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2pd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2ps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2ps-1.c index 0d7e37bb548..7af2dea6f9a 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2ps-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2ps-1.c @@ -1,11 +1,11 @@ /* { dg-do compile } */ /* { dg-options "-mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2q-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2q-1.c index 475aa6dbd04..0cbd8b5b2a3 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2q-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2q-1.c @@ -1,11 +1,11 @@ /* { dg-do compile } */ /* { dg-options "-mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/pr82460-1.c b/gcc/testsuite/gcc.target/i386/pr82460-1.c new file mode 100644 index 00000000000..6529c4a9b9e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82460-1.c @@ -0,0 +1,30 @@ +/* PR target/82460 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512vbmi" } */ +/* { dg-final { scan-assembler-not {\mvmovd} } } */ + +#include + +__m512i +f1 (__m512i x, __m512i y, char *z) +{ + return _mm512_permutex2var_epi32 (y, x, _mm512_loadu_si512 (z)); +} + +__m512i +f2 (__m512i x, __m512i y, char *z) +{ + return _mm512_permutex2var_epi32 (x, y, _mm512_loadu_si512 (z)); +} + +__m512i +f3 (__m512i x, __m512i y, __m512i z) +{ + return _mm512_permutex2var_epi8 (y, x, z); +} + +__m512i +f4 (__m512i x, __m512i y, __m512i z) +{ + return _mm512_permutex2var_epi8 (x, y, z); +} diff --git a/gcc/testsuite/gcc.target/i386/pr82460-2.c b/gcc/testsuite/gcc.target/i386/pr82460-2.c new file mode 100644 index 00000000000..4d965216b59 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82460-2.c @@ -0,0 +1,17 @@ +/* PR target/82460 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -mavx512vbmi -mno-prefer-avx256" } */ +/* We want to reuse the permutation mask in the loop, so use vpermt2b rather + than vpermi2b. */ +/* { dg-final { scan-assembler-not {\mvpermi2b\M} } } */ +/* { dg-final { scan-assembler {\mvpermt2b\M} } } */ + +void +foo (unsigned char *__restrict__ x, const unsigned short *__restrict__ y, + unsigned long z) +{ + unsigned char *w = x + z; + do + *x++ = *y++ >> 8; + while (x < w); +} diff --git a/gcc/testsuite/gcc.target/i386/vect-pack-trunc-2.c b/gcc/testsuite/gcc.target/i386/vect-pack-trunc-2.c index f3d899c1134..3503deaa9d9 100644 --- a/gcc/testsuite/gcc.target/i386/vect-pack-trunc-2.c +++ b/gcc/testsuite/gcc.target/i386/vect-pack-trunc-2.c @@ -25,4 +25,4 @@ avx512bw_test () abort (); } -/* { dg-final { scan-assembler-times "vpermi2w\[ \\t\]+\[^\n\]*%zmm" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[it]2w\[ \\t\]+\[^\n\]*%zmm" 1 } } */