+2015-08-11 H.J. Lu <hongjiu.lu@intel.com>
+
+ * config/i386/i386.h (MOVE_MAX_PIECES): Use TImode in 64-bit
+ mode if unaligned SSE load and store are optimal.
+
2016-08-11 Bernd Edlinger <bernd.edlinger@hotmail.de>
PR tree-optimization/71083
/* MOVE_MAX_PIECES is the number of bytes at a time which we can
move efficiently, as opposed to MOVE_MAX which is the maximum
- number of bytes we can move with a single instruction. */
-#define MOVE_MAX_PIECES UNITS_PER_WORD
+ number of bytes we can move with a single instruction.
+
+ ??? We should use TImode in 32-bit mode and use OImode or XImode
+ if they are available. But since by_pieces_ninsns determines the
+ widest mode with MAX_FIXED_MODE_SIZE, we can only use TImode in
+ 64-bit mode. */
+#define MOVE_MAX_PIECES \
+ ((TARGET_64BIT \
+ && TARGET_SSE2 \
+ && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \
+ && TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
+ ? GET_MODE_SIZE (TImode) : UNITS_PER_WORD)
/* If a memory-to-memory move would take MOVE_RATIO or more simple
move-instruction pairs, we will do a movmem or libcall instead.
+2015-08-11 H.J. Lu <hongjiu.lu@intel.com>
+
+ * gcc.target/i386/pieces-memcpy-1.c: New test.
+ * gcc.target/i386/pieces-memcpy-2.c: Likewise.
+ * gcc.target/i386/pieces-memcpy-3.c: Likewise.
+ * gcc.target/i386/pieces-memcpy-4.c: Likewise.
+ * gcc.target/i386/pieces-memcpy-5.c: Likewise.
+ * gcc.target/i386/pieces-memcpy-6.c: Likewise.
+
2016-08-11 Jakub Jelinek <jakub@redhat.com>
PR c/72816
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+ __builtin_memcpy (dst, src, 64);
+}
+
+/* { dg-final { scan-assembler-times "movdqu\[ \\t\]+\[^\n\]*%xmm" 4 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+ __builtin_memcpy (dst, src, 33);
+}
+
+/* { dg-final { scan-assembler-times "movdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+ __builtin_memcpy (dst, src, 17);
+}
+
+/* { dg-final { scan-assembler-times "movdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+ __builtin_memcpy (dst, src, 18);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+ __builtin_memcpy (dst, src, 19);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+ __builtin_memcpy (dst, src, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */