i386: Prefer TARGET_AVX over TARGET_SSE_TYPELESS_STORES
authorH.J. Lu <hjl.tools@gmail.com>
Tue, 28 Jan 2020 19:32:56 +0000 (11:32 -0800)
committerH.J. Lu <hjl.tools@gmail.com>
Tue, 28 Jan 2020 19:33:12 +0000 (11:33 -0800)
movaps/movups is one byte shorter than movdqa/movdqu.  But it isn't the
case for AVX nor AVX512.  This patch prefers TARGET_AVX over
TARGET_SSE_TYPELESS_STORES and adjust vmovups checks in assembly ouputs.

gcc/

PR target/91461
* config/i386/i386.md (*movoi_internal_avx): Remove
TARGET_SSE_TYPELESS_STORES check.
(*movti_internal): Prefer TARGET_AVX over
TARGET_SSE_TYPELESS_STORES.
(*movtf_internal): Likewise.
* config/i386/sse.md (mov<mode>_internal): Prefer TARGET_AVX over
TARGET_SSE_TYPELESS_STORES.  Remove "<MODE_SIZE> == 16" check
from TARGET_SSE_TYPELESS_STORES.

gcc/testsuite/

PR target/91461
* gcc.target/i386/avx256-unaligned-store-2.c: Don't check
vmovups.
* gcc.target/i386/avx256-unaligned-store-3.c: Likewise.
* gcc.target/i386/pieces-memcpy-4.c: Likewise.
* gcc.target/i386/pieces-memcpy-5.c: Likewise.
* gcc.target/i386/pieces-memcpy-6.c: Likewise.
* gcc.target/i386/pieces-strcpy-2.c: Likewise.
* gcc.target/i386/pr90980-1.c: Likewise.
* gcc.target/i386/pr87317-4.c: Check "\tvmovd\t" instead of
"vmovd" to avoid matching "vmovdqu".
* gcc.target/i386/pr87317-5.c: Likewise.
* gcc.target/i386/pr87317-7.c: Likewise.
* gcc.target/i386/pr91461-1.c: New test.
* gcc.target/i386/pr91461-2.c: Likewise.
* gcc.target/i386/pr91461-3.c: Likewise.
* gcc.target/i386/pr91461-4.c: Likewise.
* gcc.target/i386/pr91461-5.c: Likewise.

19 files changed:
gcc/ChangeLog
gcc/config/i386/i386.md
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c
gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c
gcc/testsuite/gcc.target/i386/pieces-memcpy-4.c
gcc/testsuite/gcc.target/i386/pieces-memcpy-5.c
gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c
gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
gcc/testsuite/gcc.target/i386/pr87317-4.c
gcc/testsuite/gcc.target/i386/pr87317-5.c
gcc/testsuite/gcc.target/i386/pr87317-7.c
gcc/testsuite/gcc.target/i386/pr90980-1.c
gcc/testsuite/gcc.target/i386/pr91461-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr91461-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr91461-3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr91461-4.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr91461-5.c [new file with mode: 0644]

index 513474842510e3baeb9cd27620a494b65fbe914e..05f3b724660bd9f880fa8223e501d9bf64319f6d 100644 (file)
@@ -1,3 +1,15 @@
+2020-01-28  H.J. Lu  <hongjiu.lu@intel.com>
+
+       PR target/91461
+       * config/i386/i386.md (*movoi_internal_avx): Remove
+       TARGET_SSE_TYPELESS_STORES check.
+       (*movti_internal): Prefer TARGET_AVX over
+       TARGET_SSE_TYPELESS_STORES.
+       (*movtf_internal): Likewise.
+       * config/i386/sse.md (mov<mode>_internal): Prefer TARGET_AVX over
+       TARGET_SSE_TYPELESS_STORES.  Remove "<MODE_SIZE> == 16" check
+       from TARGET_SSE_TYPELESS_STORES.
+
 2020-01-28  David Malcolm  <dmalcolm@redhat.com>
 
        * diagnostic-core.h (warning_at): Rename overload to...
index a125ab350bb2698517cfe9744a13bc75c3470e98..9f0077d59a973e8561c1f3cf5ef6255f7fb3ef37 100644 (file)
               (and (eq_attr "alternative" "1")
                    (match_test "TARGET_AVX512VL"))
                 (const_string "XI")
-              (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
-                   (and (eq_attr "alternative" "3")
-                        (match_test "TARGET_SSE_TYPELESS_STORES")))
+              (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
                 (const_string "V8SF")
              ]
              (const_string "OI")))])
               (and (eq_attr "alternative" "3")
                    (match_test "TARGET_AVX512VL"))
                 (const_string "XI")
+              (match_test "TARGET_AVX")
+                (const_string "TI")
               (ior (not (match_test "TARGET_SSE2"))
                    (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
                         (and (eq_attr "alternative" "5")
                              (match_test "TARGET_SSE_TYPELESS_STORES"))))
                 (const_string "V4SF")
-              (match_test "TARGET_AVX")
-                (const_string "TI")
               (match_test "optimize_function_for_size_p (cfun)")
                 (const_string "V4SF")
               ]
    (set (attr "mode")
         (cond [(eq_attr "alternative" "3,4")
                 (const_string "DI")
+              (match_test "TARGET_AVX")
+                (const_string "TI")
               (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
                 (const_string "V4SF")
               (and (eq_attr "alternative" "2")
                    (match_test "TARGET_SSE_TYPELESS_STORES"))
                 (const_string "V4SF")
-              (match_test "TARGET_AVX")
-                (const_string "TI")
               (ior (not (match_test "TARGET_SSE2"))
                    (match_test "optimize_function_for_size_p (cfun)"))
                 (const_string "V4SF")
index 04a8c5e56b94fc05bdad3324efe93f9580ee6ee3..abbd879aab35c3512b25e56087cbb5b87a50b004 100644 (file)
        (cond [(and (eq_attr "alternative" "1")
                    (match_test "TARGET_AVX512VL"))
                 (const_string "<sseinsnmode>")
-              (and (match_test "<MODE_SIZE> == 16")
-                   (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
-                        (and (eq_attr "alternative" "3")
-                             (match_test "TARGET_SSE_TYPELESS_STORES"))))
-                (const_string "<ssePSmode>")
               (match_test "TARGET_AVX")
                 (const_string "<sseinsnmode>")
+              (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+                   (and (eq_attr "alternative" "3")
+                        (match_test "TARGET_SSE_TYPELESS_STORES")))
+                (const_string "<ssePSmode>")
               (ior (not (match_test "TARGET_SSE2"))
                    (match_test "optimize_function_for_size_p (cfun)"))
                 (const_string "V4SF")
index 693650d24863d1985cf0b93251bf720356bbdb85..37ab4b9ea62e8018ef089ba8ae3bd7ac040f68de 100644 (file)
@@ -1,3 +1,24 @@
+2020-01-28  H.J. Lu  <hongjiu.lu@intel.com>
+
+       PR target/91461
+       * gcc.target/i386/avx256-unaligned-store-2.c: Don't check
+       vmovups.
+       * gcc.target/i386/avx256-unaligned-store-3.c: Likewise.
+       * gcc.target/i386/pieces-memcpy-4.c: Likewise.
+       * gcc.target/i386/pieces-memcpy-5.c: Likewise.
+       * gcc.target/i386/pieces-memcpy-6.c: Likewise.
+       * gcc.target/i386/pieces-strcpy-2.c: Likewise.
+       * gcc.target/i386/pr90980-1.c: Likewise.
+       * gcc.target/i386/pr87317-4.c: Check "\tvmovd\t" instead of
+       "vmovd" to avoid matching "vmovdqu".
+       * gcc.target/i386/pr87317-5.c: Likewise.
+       * gcc.target/i386/pr87317-7.c: Likewise.
+       * gcc.target/i386/pr91461-1.c: New test.
+       * gcc.target/i386/pr91461-2.c: Likewise.
+       * gcc.target/i386/pr91461-3.c: Likewise.
+       * gcc.target/i386/pr91461-4.c: Likewise.
+       * gcc.target/i386/pr91461-5.c: Likewise.
+
 2020-01-28  David Malcolm  <dmalcolm@redhat.com>
 
        * gcc.dg/plugin/diagnostic_plugin_test_metadata.c: Update for
index 1e7969bb47db98a7304010821836063d0ad93fab..be12529e8d5c5a0778113ba44a2779ceb5011385 100644 (file)
@@ -23,6 +23,6 @@ avx_test (void)
     }
 }
 
-/* { dg-final { scan-assembler-not "vmovups.*movv32qi_internal/3" } } */
-/* { dg-final { scan-assembler "vmovups.*movv16qi_internal/3" } } */
+/* { dg-final { scan-assembler-not "vmovdqu.*movv32qi_internal/3" } } */
+/* { dg-final { scan-assembler "vmovdqu.*movv16qi_internal/3" } } */
 /* { dg-final { scan-assembler "vextract.128" } } */
index a439a66ff3482a6b62b257389dc341e6df12917e..918028df9ed8a6cc10f7eaa9c8bb64013f549695 100644 (file)
@@ -17,6 +17,6 @@ avx_test (void)
     d[i] = c[i] * 20.0;
 }
 
-/* { dg-final { scan-assembler-not "vmovups.*movv4df_internal/3" } } */
-/* { dg-final { scan-assembler "vmovups.*movv2df_internal/3" } } */
+/* { dg-final { scan-assembler-not "vmovupd.*movv4df_internal/3" } } */
+/* { dg-final { scan-assembler "vmovupd.*movv2df_internal/3" } } */
 /* { dg-final { scan-assembler "vextractf128" } } */
index 64e8921abe26bc6faef60f8f580dc13fa9d8e0b0..6f20203a1466a15e014d0d38c3dd1b0900d0a074 100644 (file)
@@ -9,5 +9,4 @@ foo (void)
   __builtin_memcpy (dst, src, 18);
 }
 
-/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */
-/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */
index 3c464c32f8eed6de0ab15f2fae0f6f5bb87dcf75..5a1c7b3d512ddf3d607d9ccca123aad0c7f7b023 100644 (file)
@@ -9,5 +9,4 @@ foo (void)
   __builtin_memcpy (dst, src, 19);
 }
 
-/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */
-/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */
index cdb00e05bc15d6fc3c9f06318a61f5821336d534..5f99cc98c4720c902ad4ce93bca5372bb98438b4 100644 (file)
@@ -9,5 +9,4 @@ foo (void)
   __builtin_memcpy (dst, src, 33);
 }
 
-/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */
-/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 4 } } */
index 742125595080bf8961ab6f02fb063634d95c2b69..90446edb4f35086353359af1801fafe73a0ab74d 100644 (file)
@@ -12,4 +12,4 @@ foo (char *s)
 }
 
 /* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\n\]*%xmm" 4 } } */
-/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 4 } } */
index 2d4f24a89e9bc6fc7f968ca050ed398fc796c2b1..d802575f4c511944f980eae6ad8d3728d0a7b594 100644 (file)
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -march=haswell" } */
 /* { dg-final { scan-assembler-times "vpmovzxbd" 1 } } */
-/* { dg-final { scan-assembler-not "vmovd" } } */
+/* { dg-final { scan-assembler-not "\tvmovd\t" } } */
 
 #include <immintrin.h>
 
index 96f82847e5d5392fbfa8e8564771774ecfdbfbfc..42cf7dc0ffedc9c3516ae5f85b97b2932bc76167 100644 (file)
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -march=haswell" } */
 /* { dg-final { scan-assembler-times "vpmovzxwq" 1 } } */
-/* { dg-final { scan-assembler-not "vmovd" } } */
+/* { dg-final { scan-assembler-not "\tvmovd\t" } } */
 
 #include <immintrin.h>
 
index 2c043d9eb26a025bf06a2c52be3b6dc7ffbdea87..c76af7efd5fbae565197f044e1ed16caeff50e26 100644 (file)
@@ -1,7 +1,7 @@
 /* { dg-do compile { target { ! ia32 } } } */
 /* { dg-options "-O2 -march=haswell" } */
 /* { dg-final { scan-assembler-times "vpmovzxbd" 1 } } */
-/* { dg-final { scan-assembler-not "vmovd" } } */
+/* { dg-final { scan-assembler-not "\tvmovd\t" } } */
 
 #include <immintrin.h>
 
index 72a30dc8da2485076f77c2c6979639e6fa7eaa57..885518984c53283eda6410e4d4e1891ba6cd753d 100644 (file)
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-march=skylake-avx512 -O2" } */
-/* { dg-final { scan-assembler-times "(?:vmovups|vmovdqu)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[2346\]*\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */
 
 #include <immintrin.h>
 
diff --git a/gcc/testsuite/gcc.target/i386/pr91461-1.c b/gcc/testsuite/gcc.target/i386/pr91461-1.c
new file mode 100644 (file)
index 0000000..0c94b8e
--- /dev/null
@@ -0,0 +1,66 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-final { scan-assembler "\tvmovdqa\t" } } */
+/* { dg-final { scan-assembler "\tvmovdqu\t" } } */
+/* { dg-final { scan-assembler "\tvmovapd\t" } } */
+/* { dg-final { scan-assembler "\tvmovupd\t" } } */
+/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */
+/* { dg-final { scan-assembler-not "\tvmovups\t" } } */
+
+#include <immintrin.h>
+
+void
+foo1 (__m128i *p, __m128i x)
+{
+  *p = x;
+}
+
+void
+foo2 (__m128d *p, __m128d x)
+{
+  *p = x;
+}
+
+void
+foo3 (__float128 *p, __float128 x)
+{
+  *p = x;
+}
+
+void
+foo4 (__m128i_u *p, __m128i x)
+{
+  *p = x;
+}
+
+void
+foo5 (__m128d_u *p, __m128d x)
+{
+  *p = x;
+}
+
+typedef __float128 __float128_u __attribute__ ((__aligned__ (1)));
+
+void
+foo6 (__float128_u *p, __float128 x)
+{
+  *p = x;
+}
+
+#ifdef __x86_64__
+typedef __int128 __int128_u __attribute__ ((__aligned__ (1)));
+
+extern __int128 int128;
+
+void
+foo7 (__int128 *p)
+{
+  *p = int128;
+}
+
+void
+foo8 (__int128_u *p)
+{
+  *p = int128;
+}
+#endif
diff --git a/gcc/testsuite/gcc.target/i386/pr91461-2.c b/gcc/testsuite/gcc.target/i386/pr91461-2.c
new file mode 100644 (file)
index 0000000..921cfaf
--- /dev/null
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-final { scan-assembler "\tvmovdqa\t" } } */
+/* { dg-final { scan-assembler "\tvmovapd\t" } } */
+/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */
+
+#include <immintrin.h>
+
+void
+foo1 (__m256i *p, __m256i x)
+{
+  *p = x;
+}
+
+void
+foo2 (__m256d *p, __m256d x)
+{
+  *p = x;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr91461-3.c b/gcc/testsuite/gcc.target/i386/pr91461-3.c
new file mode 100644 (file)
index 0000000..c67a480
--- /dev/null
@@ -0,0 +1,76 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512f -mavx512vl" } */
+/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */
+/* { dg-final { scan-assembler-not "\tvmovups\t" } } */
+
+#include <immintrin.h>
+
+void
+foo1 (__m128i *p, __m128i a)
+{
+  register __m128i x __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (x));
+  *p = x;
+}
+
+void
+foo2 (__m128d *p, __m128d a)
+{
+  register __m128d x __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (x));
+  *p = x;
+}
+
+void
+foo3 (__float128 *p, __float128 a)
+{
+  register __float128 x __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (x));
+  *p = x;
+}
+
+void
+foo4 (__m128i_u *p, __m128i a)
+{
+  register __m128i x __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (x));
+  *p = x;
+}
+
+void
+foo5 (__m128d_u *p, __m128d a)
+{
+  register __m128d x __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (x));
+  *p = x;
+}
+
+typedef __float128 __float128_u __attribute__ ((__aligned__ (1)));
+
+void
+foo6 (__float128_u *p, __float128 a)
+{
+  register __float128 x __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (x));
+  *p = x;
+}
+
+typedef __int128 __int128_u __attribute__ ((__aligned__ (1)));
+
+extern __int128 int128;
+
+void
+foo7 (__int128 *p)
+{
+  register __int128 x __asm ("xmm16") = int128;
+  asm volatile ("" : "+v" (x));
+  *p = x;
+}
+
+void
+foo8 (__int128_u *p)
+{
+  register __int128 x __asm ("xmm16") = int128;
+  asm volatile ("" : "+v" (x));
+  *p = x;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr91461-4.c b/gcc/testsuite/gcc.target/i386/pr91461-4.c
new file mode 100644 (file)
index 0000000..69df590
--- /dev/null
@@ -0,0 +1,21 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512f -mavx512vl" } */
+/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */
+
+#include <immintrin.h>
+
+void
+foo1 (__m256i *p, __m256i a)
+{
+  register __m256i x __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (x));
+  *p = x;
+}
+
+void
+foo2 (__m256d *p, __m256d a)
+{
+  register __m256d x __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (x));
+  *p = x;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr91461-5.c b/gcc/testsuite/gcc.target/i386/pr91461-5.c
new file mode 100644 (file)
index 0000000..9742630
--- /dev/null
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */
+
+#include <immintrin.h>
+
+void
+foo1 (__m512i *p, __m512i x)
+{
+  *p = x;
+}
+
+void
+foo2 (__m512d *p, __m512d x)
+{
+  *p = x;
+}