From bc27ffae1b62831da78acb0362ed0dcabb1f3f1c Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 12 May 2016 10:33:14 +0200 Subject: [PATCH] re PR target/71019 (AVX512BW instructions emitted even without AVX512BW) PR target/71019 * config/i386/sse.md (_packssdw, _packusdw): Make sure EVEX encoded insn is not emitted unless TARGET_AVX512BW. (_packuswb, _packsswb): Likewise. For TARGET_AVX512BW, use "=v" constraint instead of "=x" for the result operand. * gcc.target/i386/avx512vl-pack-1.c: New test. * gcc.target/i386/avx512vl-pack-2.c: New test. * gcc.target/i386/avx512bw-pack-2.c: New test. From-SVN: r236163 --- gcc/ChangeLog | 8 ++ gcc/config/i386/sse.md | 50 ++++---- gcc/testsuite/ChangeLog | 5 + .../gcc.target/i386/avx512bw-pack-2.c | 100 ++++++++++++++++ .../gcc.target/i386/avx512vl-pack-1.c | 68 +++++++++++ .../gcc.target/i386/avx512vl-pack-2.c | 108 ++++++++++++++++++ 6 files changed, 316 insertions(+), 23 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-pack-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pack-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pack-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index caae23a5ab5..153c0a7d085 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,13 @@ 2016-05-12 Jakub Jelinek + PR target/71019 + * config/i386/sse.md (_packssdw, + _packusdw): Make sure EVEX encoded insn + is not emitted unless TARGET_AVX512BW. + (_packuswb, _packsswb): + Likewise. For TARGET_AVX512BW, use "=v" constraint instead of "=x" + for the result operand. + * config/i386/sse.md (*vec_setv4sf_sse4_1, sse4_1_insertps): Use v constraint instead of x in avx alternatives. Use maybe_evex instead of vex prefix. diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index c2c7c8da4e8..6c5dd62e7b4 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -11500,54 +11500,57 @@ }) (define_insn "_packsswb" - [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x") + [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v") (vec_concat:VI1_AVX512 (ss_truncate: - (match_operand: 1 "register_operand" "0,v")) + (match_operand: 1 "register_operand" "0,x,v")) (ss_truncate: - (match_operand: 2 "vector_operand" "xBm,vm"))))] + (match_operand: 2 "vector_operand" "xBm,xm,vm"))))] "TARGET_SSE2 && && " "@ packsswb\t{%2, %0|%0, %2} + vpacksswb\t{%2, %1, %0|%0, %1, %2} vpacksswb\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,avx,avx512bw") (set_attr "type" "sselog") - (set_attr "prefix_data16" "1,*") - (set_attr "prefix" "orig,maybe_evex") + (set_attr "prefix_data16" "1,*,*") + (set_attr "prefix" "orig,,evex") (set_attr "mode" "")]) (define_insn "_packssdw" - [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v") + [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v") (vec_concat:VI2_AVX2 (ss_truncate: - (match_operand: 1 "register_operand" "0,v")) + (match_operand: 1 "register_operand" "0,x,v")) (ss_truncate: - (match_operand: 2 "vector_operand" "xBm,vm"))))] + (match_operand: 2 "vector_operand" "xBm,xm,vm"))))] "TARGET_SSE2 && && " "@ packssdw\t{%2, %0|%0, %2} + vpackssdw\t{%2, %1, %0|%0, %1, %2} vpackssdw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,avx,avx512bw") (set_attr "type" "sselog") - (set_attr "prefix_data16" "1,*") - (set_attr "prefix" "orig,vex") + (set_attr "prefix_data16" "1,*,*") + (set_attr "prefix" "orig,,evex") (set_attr "mode" "")]) (define_insn "_packuswb" - [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x") + [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v") (vec_concat:VI1_AVX512 (us_truncate: - (match_operand: 1 "register_operand" "0,v")) + (match_operand: 1 "register_operand" "0,x,v")) (us_truncate: - (match_operand: 2 "vector_operand" "xBm,vm"))))] + (match_operand: 2 "vector_operand" "xBm,xm,vm"))))] "TARGET_SSE2 && && " "@ packuswb\t{%2, %0|%0, %2} + vpackuswb\t{%2, %1, %0|%0, %1, %2} vpackuswb\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,avx,avx512bw") (set_attr "type" "sselog") - (set_attr "prefix_data16" "1,*") - (set_attr "prefix" "orig,vex") + (set_attr "prefix_data16" "1,*,*") + (set_attr "prefix" "orig,,evex") (set_attr "mode" "")]) (define_insn "avx512bw_interleave_highv64qi" @@ -14572,21 +14575,22 @@ (set_attr "mode" "")]) (define_insn "_packusdw" - [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,v") + [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,x,v") (vec_concat:VI2_AVX2 (us_truncate: - (match_operand: 1 "register_operand" "0,0,v")) + (match_operand: 1 "register_operand" "0,0,x,v")) (us_truncate: - (match_operand: 2 "vector_operand" "YrBm,*xBm,vm"))))] + (match_operand: 2 "vector_operand" "YrBm,*xBm,xm,vm"))))] "TARGET_SSE4_1 && && " "@ packusdw\t{%2, %0|%0, %2} packusdw\t{%2, %0|%0, %2} + vpackusdw\t{%2, %1, %0|%0, %1, %2} vpackusdw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,noavx,avx") + [(set_attr "isa" "noavx,noavx,avx,avx512bw") (set_attr "type" "sselog") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,orig,maybe_evex") + (set_attr "prefix" "orig,orig,,evex") (set_attr "mode" "")]) (define_insn "_pblendvb" diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 31f7bc20082..ec16edd781d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,10 @@ 2016-05-12 Jakub Jelinek + PR target/71019 + * gcc.target/i386/avx512vl-pack-1.c: New test. + * gcc.target/i386/avx512vl-pack-2.c: New test. + * gcc.target/i386/avx512bw-pack-2.c: New test. + * gcc.target/i386/avx512vl-vinsertps-1.c: New test. * gcc.target/i386/avx512dq-abs-copysign-1.c: New test. diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pack-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pack-2.c new file mode 100644 index 00000000000..3b9c201b042 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pack-2.c @@ -0,0 +1,100 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512vl -mavx512bw" } */ + +#include + +__m128i +f1 (__m128i a, __m128i b) +{ + register __m128i c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm_packs_epi16 (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpacksswb\[^\n\r\]*xmm16" 1 } } */ + +__m128i +f2 (__m128i a, __m128i b) +{ + register __m128i c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm_packs_epi32 (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpackssdw\[^\n\r\]*xmm16" 1 } } */ + +__m128i +f3 (__m128i a, __m128i b) +{ + register __m128i c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm_packus_epi16 (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpackuswb\[^\n\r\]*xmm16" 1 } } */ + +__m128i +f4 (__m128i a, __m128i b) +{ + register __m128i c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm_packus_epi32 (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpackusdw\[^\n\r\]*xmm16" 1 } } */ + +__m256i +f5 (__m256i a, __m256i b) +{ + register __m256i c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm256_packs_epi16 (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpacksswb\[^\n\r\]*ymm16" 1 } } */ + +__m256i +f6 (__m256i a, __m256i b) +{ + register __m256i c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm256_packs_epi32 (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpackssdw\[^\n\r\]*ymm16" 1 } } */ + +__m256i +f7 (__m256i a, __m256i b) +{ + register __m256i c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm256_packus_epi16 (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpackuswb\[^\n\r\]*ymm16" 1 } } */ + +__m256i +f8 (__m256i a, __m256i b) +{ + register __m256i c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm256_packus_epi32 (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpackusdw\[^\n\r\]*ymm16" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pack-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pack-1.c new file mode 100644 index 00000000000..a589d63ed3b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pack-1.c @@ -0,0 +1,68 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512vl -mno-avx512bw" } */ + +#include + +__m128i +f1 (__m128i a, __m128i b) +{ + return _mm_packs_epi16 (a, b); +} + +/* { dg-final { scan-assembler-times "vpacksswb\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m128i +f2 (__m128i a, __m128i b) +{ + return _mm_packs_epi32 (a, b); +} + +/* { dg-final { scan-assembler-times "vpackssdw\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m128i +f3 (__m128i a, __m128i b) +{ + return _mm_packus_epi16 (a, b); +} + +/* { dg-final { scan-assembler-times "vpackuswb\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m128i +f4 (__m128i a, __m128i b) +{ + return _mm_packus_epi32 (a, b); +} + +/* { dg-final { scan-assembler-times "vpackusdw\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m256i +f5 (__m256i a, __m256i b) +{ + return _mm256_packs_epi16 (a, b); +} + +/* { dg-final { scan-assembler-times "vpacksswb\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +__m256i +f6 (__m256i a, __m256i b) +{ + return _mm256_packs_epi32 (a, b); +} + +/* { dg-final { scan-assembler-times "vpackssdw\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +__m256i +f7 (__m256i a, __m256i b) +{ + return _mm256_packus_epi16 (a, b); +} + +/* { dg-final { scan-assembler-times "vpackuswb\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +__m256i +f8 (__m256i a, __m256i b) +{ + return _mm256_packus_epi32 (a, b); +} + +/* { dg-final { scan-assembler-times "vpackusdw\[^\n\r\]*ymm\[0-9\]" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pack-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pack-2.c new file mode 100644 index 00000000000..05820065e9f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pack-2.c @@ -0,0 +1,108 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512vl -mno-avx512bw" } */ + +#include + +__m128i +f1 (__m128i a, __m128i b) +{ + register __m128i c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm_packs_epi16 (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpacksswb\[^\n\r\]*xmm\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-not "vpacksswb\[^\n\r\]*xmm16" } } */ + +__m128i +f2 (__m128i a, __m128i b) +{ + register __m128i c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm_packs_epi32 (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpackssdw\[^\n\r\]*xmm\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-not "vpackssdw\[^\n\r\]*xmm16" } } */ + +__m128i +f3 (__m128i a, __m128i b) +{ + register __m128i c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm_packus_epi16 (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpackuswb\[^\n\r\]*xmm\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-not "vpackuswb\[^\n\r\]*xmm16" } } */ + +__m128i +f4 (__m128i a, __m128i b) +{ + register __m128i c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm_packus_epi32 (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpackusdw\[^\n\r\]*xmm\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-not "vpackusdw\[^\n\r\]*xmm16" } } */ + +__m256i +f5 (__m256i a, __m256i b) +{ + register __m256i c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm256_packs_epi16 (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpacksswb\[^\n\r\]*ymm\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-not "vpacksswb\[^\n\r\]*ymm16" } } */ + +__m256i +f6 (__m256i a, __m256i b) +{ + register __m256i c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm256_packs_epi32 (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpackssdw\[^\n\r\]*ymm\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-not "vpackssdw\[^\n\r\]*ymm16" } } */ + +__m256i +f7 (__m256i a, __m256i b) +{ + register __m256i c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm256_packus_epi16 (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpackuswb\[^\n\r\]*ymm\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-not "vpackuswb\[^\n\r\]*ymm16" } } */ + +__m256i +f8 (__m256i a, __m256i b) +{ + register __m256i c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm256_packus_epi32 (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpackusdw\[^\n\r\]*ymm\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-not "vpackusdw\[^\n\r\]*ymm16" } } */ -- 2.30.2