From dd9b529f08c3c6064c37234922d298336d78caf7 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Tue, 28 Jan 2020 11:32:56 -0800 Subject: [PATCH] i386: Prefer TARGET_AVX over TARGET_SSE_TYPELESS_STORES movaps/movups is one byte shorter than movdqa/movdqu. But it isn't the case for AVX nor AVX512. This patch prefers TARGET_AVX over TARGET_SSE_TYPELESS_STORES and adjust vmovups checks in assembly ouputs. gcc/ PR target/91461 * config/i386/i386.md (*movoi_internal_avx): Remove TARGET_SSE_TYPELESS_STORES check. (*movti_internal): Prefer TARGET_AVX over TARGET_SSE_TYPELESS_STORES. (*movtf_internal): Likewise. * config/i386/sse.md (mov_internal): Prefer TARGET_AVX over TARGET_SSE_TYPELESS_STORES. Remove " == 16" check from TARGET_SSE_TYPELESS_STORES. gcc/testsuite/ PR target/91461 * gcc.target/i386/avx256-unaligned-store-2.c: Don't check vmovups. * gcc.target/i386/avx256-unaligned-store-3.c: Likewise. * gcc.target/i386/pieces-memcpy-4.c: Likewise. * gcc.target/i386/pieces-memcpy-5.c: Likewise. * gcc.target/i386/pieces-memcpy-6.c: Likewise. * gcc.target/i386/pieces-strcpy-2.c: Likewise. * gcc.target/i386/pr90980-1.c: Likewise. * gcc.target/i386/pr87317-4.c: Check "\tvmovd\t" instead of "vmovd" to avoid matching "vmovdqu". * gcc.target/i386/pr87317-5.c: Likewise. * gcc.target/i386/pr87317-7.c: Likewise. * gcc.target/i386/pr91461-1.c: New test. * gcc.target/i386/pr91461-2.c: Likewise. * gcc.target/i386/pr91461-3.c: Likewise. * gcc.target/i386/pr91461-4.c: Likewise. * gcc.target/i386/pr91461-5.c: Likewise. --- gcc/ChangeLog | 12 +++ gcc/config/i386/i386.md | 12 ++- gcc/config/i386/sse.md | 9 +-- gcc/testsuite/ChangeLog | 21 +++++ .../i386/avx256-unaligned-store-2.c | 4 +- .../i386/avx256-unaligned-store-3.c | 4 +- .../gcc.target/i386/pieces-memcpy-4.c | 3 +- .../gcc.target/i386/pieces-memcpy-5.c | 3 +- .../gcc.target/i386/pieces-memcpy-6.c | 3 +- .../gcc.target/i386/pieces-strcpy-2.c | 2 +- gcc/testsuite/gcc.target/i386/pr87317-4.c | 2 +- gcc/testsuite/gcc.target/i386/pr87317-5.c | 2 +- gcc/testsuite/gcc.target/i386/pr87317-7.c | 2 +- gcc/testsuite/gcc.target/i386/pr90980-1.c | 2 +- gcc/testsuite/gcc.target/i386/pr91461-1.c | 66 ++++++++++++++++ gcc/testsuite/gcc.target/i386/pr91461-2.c | 19 +++++ gcc/testsuite/gcc.target/i386/pr91461-3.c | 76 +++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr91461-4.c | 21 +++++ gcc/testsuite/gcc.target/i386/pr91461-5.c | 17 +++++ 19 files changed, 253 insertions(+), 27 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr91461-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr91461-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr91461-3.c create mode 100644 gcc/testsuite/gcc.target/i386/pr91461-4.c create mode 100644 gcc/testsuite/gcc.target/i386/pr91461-5.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 51347484251..05f3b724660 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2020-01-28 H.J. Lu + + PR target/91461 + * config/i386/i386.md (*movoi_internal_avx): Remove + TARGET_SSE_TYPELESS_STORES check. + (*movti_internal): Prefer TARGET_AVX over + TARGET_SSE_TYPELESS_STORES. + (*movtf_internal): Likewise. + * config/i386/sse.md (mov_internal): Prefer TARGET_AVX over + TARGET_SSE_TYPELESS_STORES. Remove " == 16" check + from TARGET_SSE_TYPELESS_STORES. + 2020-01-28 David Malcolm * diagnostic-core.h (warning_at): Rename overload to... diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index a125ab350bb..9f0077d59a9 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1980,9 +1980,7 @@ (and (eq_attr "alternative" "1") (match_test "TARGET_AVX512VL")) (const_string "XI") - (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") - (and (eq_attr "alternative" "3") - (match_test "TARGET_SSE_TYPELESS_STORES"))) + (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") (const_string "V8SF") ] (const_string "OI")))]) @@ -2059,13 +2057,13 @@ (and (eq_attr "alternative" "3") (match_test "TARGET_AVX512VL")) (const_string "XI") + (match_test "TARGET_AVX") + (const_string "TI") (ior (not (match_test "TARGET_SSE2")) (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") (and (eq_attr "alternative" "5") (match_test "TARGET_SSE_TYPELESS_STORES")))) (const_string "V4SF") - (match_test "TARGET_AVX") - (const_string "TI") (match_test "optimize_function_for_size_p (cfun)") (const_string "V4SF") ] @@ -3324,13 +3322,13 @@ (set (attr "mode") (cond [(eq_attr "alternative" "3,4") (const_string "DI") + (match_test "TARGET_AVX") + (const_string "TI") (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") (const_string "V4SF") (and (eq_attr "alternative" "2") (match_test "TARGET_SSE_TYPELESS_STORES")) (const_string "V4SF") - (match_test "TARGET_AVX") - (const_string "TI") (ior (not (match_test "TARGET_SSE2")) (match_test "optimize_function_for_size_p (cfun)")) (const_string "V4SF") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 04a8c5e56b9..abbd879aab3 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1116,13 +1116,12 @@ (cond [(and (eq_attr "alternative" "1") (match_test "TARGET_AVX512VL")) (const_string "") - (and (match_test " == 16") - (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") - (and (eq_attr "alternative" "3") - (match_test "TARGET_SSE_TYPELESS_STORES")))) - (const_string "") (match_test "TARGET_AVX") (const_string "") + (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") + (and (eq_attr "alternative" "3") + (match_test "TARGET_SSE_TYPELESS_STORES"))) + (const_string "") (ior (not (match_test "TARGET_SSE2")) (match_test "optimize_function_for_size_p (cfun)")) (const_string "V4SF") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 693650d2486..37ab4b9ea62 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,24 @@ +2020-01-28 H.J. Lu + + PR target/91461 + * gcc.target/i386/avx256-unaligned-store-2.c: Don't check + vmovups. + * gcc.target/i386/avx256-unaligned-store-3.c: Likewise. + * gcc.target/i386/pieces-memcpy-4.c: Likewise. + * gcc.target/i386/pieces-memcpy-5.c: Likewise. + * gcc.target/i386/pieces-memcpy-6.c: Likewise. + * gcc.target/i386/pieces-strcpy-2.c: Likewise. + * gcc.target/i386/pr90980-1.c: Likewise. + * gcc.target/i386/pr87317-4.c: Check "\tvmovd\t" instead of + "vmovd" to avoid matching "vmovdqu". + * gcc.target/i386/pr87317-5.c: Likewise. + * gcc.target/i386/pr87317-7.c: Likewise. + * gcc.target/i386/pr91461-1.c: New test. + * gcc.target/i386/pr91461-2.c: Likewise. + * gcc.target/i386/pr91461-3.c: Likewise. + * gcc.target/i386/pr91461-4.c: Likewise. + * gcc.target/i386/pr91461-5.c: Likewise. + 2020-01-28 David Malcolm * gcc.dg/plugin/diagnostic_plugin_test_metadata.c: Update for diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c index 1e7969bb47d..be12529e8d5 100644 --- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c @@ -23,6 +23,6 @@ avx_test (void) } } -/* { dg-final { scan-assembler-not "vmovups.*movv32qi_internal/3" } } */ -/* { dg-final { scan-assembler "vmovups.*movv16qi_internal/3" } } */ +/* { dg-final { scan-assembler-not "vmovdqu.*movv32qi_internal/3" } } */ +/* { dg-final { scan-assembler "vmovdqu.*movv16qi_internal/3" } } */ /* { dg-final { scan-assembler "vextract.128" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c index a439a66ff34..918028df9ed 100644 --- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c @@ -17,6 +17,6 @@ avx_test (void) d[i] = c[i] * 20.0; } -/* { dg-final { scan-assembler-not "vmovups.*movv4df_internal/3" } } */ -/* { dg-final { scan-assembler "vmovups.*movv2df_internal/3" } } */ +/* { dg-final { scan-assembler-not "vmovupd.*movv4df_internal/3" } } */ +/* { dg-final { scan-assembler "vmovupd.*movv2df_internal/3" } } */ /* { dg-final { scan-assembler "vextractf128" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-4.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-4.c index 64e8921abe2..6f20203a146 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-4.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-4.c @@ -9,5 +9,4 @@ foo (void) __builtin_memcpy (dst, src, 18); } -/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */ -/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-5.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-5.c index 3c464c32f8e..5a1c7b3d512 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-5.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-5.c @@ -9,5 +9,4 @@ foo (void) __builtin_memcpy (dst, src, 19); } -/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */ -/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c index cdb00e05bc1..5f99cc98c47 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c @@ -9,5 +9,4 @@ foo (void) __builtin_memcpy (dst, src, 33); } -/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */ -/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 4 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c index 74212559508..90446edb4f3 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c @@ -12,4 +12,4 @@ foo (char *s) } /* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\n\]*%xmm" 4 } } */ -/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 4 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr87317-4.c b/gcc/testsuite/gcc.target/i386/pr87317-4.c index 2d4f24a89e9..d802575f4c5 100644 --- a/gcc/testsuite/gcc.target/i386/pr87317-4.c +++ b/gcc/testsuite/gcc.target/i386/pr87317-4.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2 -march=haswell" } */ /* { dg-final { scan-assembler-times "vpmovzxbd" 1 } } */ -/* { dg-final { scan-assembler-not "vmovd" } } */ +/* { dg-final { scan-assembler-not "\tvmovd\t" } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/pr87317-5.c b/gcc/testsuite/gcc.target/i386/pr87317-5.c index 96f82847e5d..42cf7dc0ffe 100644 --- a/gcc/testsuite/gcc.target/i386/pr87317-5.c +++ b/gcc/testsuite/gcc.target/i386/pr87317-5.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2 -march=haswell" } */ /* { dg-final { scan-assembler-times "vpmovzxwq" 1 } } */ -/* { dg-final { scan-assembler-not "vmovd" } } */ +/* { dg-final { scan-assembler-not "\tvmovd\t" } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/pr87317-7.c b/gcc/testsuite/gcc.target/i386/pr87317-7.c index 2c043d9eb26..c76af7efd5f 100644 --- a/gcc/testsuite/gcc.target/i386/pr87317-7.c +++ b/gcc/testsuite/gcc.target/i386/pr87317-7.c @@ -1,7 +1,7 @@ /* { dg-do compile { target { ! ia32 } } } */ /* { dg-options "-O2 -march=haswell" } */ /* { dg-final { scan-assembler-times "vpmovzxbd" 1 } } */ -/* { dg-final { scan-assembler-not "vmovd" } } */ +/* { dg-final { scan-assembler-not "\tvmovd\t" } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/pr90980-1.c b/gcc/testsuite/gcc.target/i386/pr90980-1.c index 72a30dc8da2..885518984c5 100644 --- a/gcc/testsuite/gcc.target/i386/pr90980-1.c +++ b/gcc/testsuite/gcc.target/i386/pr90980-1.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-march=skylake-avx512 -O2" } */ -/* { dg-final { scan-assembler-times "(?:vmovups|vmovdqu)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[2346\]*\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/pr91461-1.c b/gcc/testsuite/gcc.target/i386/pr91461-1.c new file mode 100644 index 00000000000..0c94b8e2b76 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr91461-1.c @@ -0,0 +1,66 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx" } */ +/* { dg-final { scan-assembler "\tvmovdqa\t" } } */ +/* { dg-final { scan-assembler "\tvmovdqu\t" } } */ +/* { dg-final { scan-assembler "\tvmovapd\t" } } */ +/* { dg-final { scan-assembler "\tvmovupd\t" } } */ +/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */ +/* { dg-final { scan-assembler-not "\tvmovups\t" } } */ + +#include + +void +foo1 (__m128i *p, __m128i x) +{ + *p = x; +} + +void +foo2 (__m128d *p, __m128d x) +{ + *p = x; +} + +void +foo3 (__float128 *p, __float128 x) +{ + *p = x; +} + +void +foo4 (__m128i_u *p, __m128i x) +{ + *p = x; +} + +void +foo5 (__m128d_u *p, __m128d x) +{ + *p = x; +} + +typedef __float128 __float128_u __attribute__ ((__aligned__ (1))); + +void +foo6 (__float128_u *p, __float128 x) +{ + *p = x; +} + +#ifdef __x86_64__ +typedef __int128 __int128_u __attribute__ ((__aligned__ (1))); + +extern __int128 int128; + +void +foo7 (__int128 *p) +{ + *p = int128; +} + +void +foo8 (__int128_u *p) +{ + *p = int128; +} +#endif diff --git a/gcc/testsuite/gcc.target/i386/pr91461-2.c b/gcc/testsuite/gcc.target/i386/pr91461-2.c new file mode 100644 index 00000000000..921cfaf9780 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr91461-2.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx" } */ +/* { dg-final { scan-assembler "\tvmovdqa\t" } } */ +/* { dg-final { scan-assembler "\tvmovapd\t" } } */ +/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */ + +#include + +void +foo1 (__m256i *p, __m256i x) +{ + *p = x; +} + +void +foo2 (__m256d *p, __m256d x) +{ + *p = x; +} diff --git a/gcc/testsuite/gcc.target/i386/pr91461-3.c b/gcc/testsuite/gcc.target/i386/pr91461-3.c new file mode 100644 index 00000000000..c67a48063bf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr91461-3.c @@ -0,0 +1,76 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512f -mavx512vl" } */ +/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */ +/* { dg-final { scan-assembler-not "\tvmovups\t" } } */ + +#include + +void +foo1 (__m128i *p, __m128i a) +{ + register __m128i x __asm ("xmm16") = a; + asm volatile ("" : "+v" (x)); + *p = x; +} + +void +foo2 (__m128d *p, __m128d a) +{ + register __m128d x __asm ("xmm16") = a; + asm volatile ("" : "+v" (x)); + *p = x; +} + +void +foo3 (__float128 *p, __float128 a) +{ + register __float128 x __asm ("xmm16") = a; + asm volatile ("" : "+v" (x)); + *p = x; +} + +void +foo4 (__m128i_u *p, __m128i a) +{ + register __m128i x __asm ("xmm16") = a; + asm volatile ("" : "+v" (x)); + *p = x; +} + +void +foo5 (__m128d_u *p, __m128d a) +{ + register __m128d x __asm ("xmm16") = a; + asm volatile ("" : "+v" (x)); + *p = x; +} + +typedef __float128 __float128_u __attribute__ ((__aligned__ (1))); + +void +foo6 (__float128_u *p, __float128 a) +{ + register __float128 x __asm ("xmm16") = a; + asm volatile ("" : "+v" (x)); + *p = x; +} + +typedef __int128 __int128_u __attribute__ ((__aligned__ (1))); + +extern __int128 int128; + +void +foo7 (__int128 *p) +{ + register __int128 x __asm ("xmm16") = int128; + asm volatile ("" : "+v" (x)); + *p = x; +} + +void +foo8 (__int128_u *p) +{ + register __int128 x __asm ("xmm16") = int128; + asm volatile ("" : "+v" (x)); + *p = x; +} diff --git a/gcc/testsuite/gcc.target/i386/pr91461-4.c b/gcc/testsuite/gcc.target/i386/pr91461-4.c new file mode 100644 index 00000000000..69df590de3a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr91461-4.c @@ -0,0 +1,21 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512f -mavx512vl" } */ +/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */ + +#include + +void +foo1 (__m256i *p, __m256i a) +{ + register __m256i x __asm ("xmm16") = a; + asm volatile ("" : "+v" (x)); + *p = x; +} + +void +foo2 (__m256d *p, __m256d a) +{ + register __m256d x __asm ("xmm16") = a; + asm volatile ("" : "+v" (x)); + *p = x; +} diff --git a/gcc/testsuite/gcc.target/i386/pr91461-5.c b/gcc/testsuite/gcc.target/i386/pr91461-5.c new file mode 100644 index 00000000000..974263042f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr91461-5.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */ + +#include + +void +foo1 (__m512i *p, __m512i x) +{ + *p = x; +} + +void +foo2 (__m512d *p, __m512d x) +{ + *p = x; +} -- 2.30.2