From df7ec09f1209a33b35af3b798e6ff6ead5de8570 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Thu, 11 Aug 2016 15:51:01 +0000 Subject: [PATCH] Use TImode for piecewise move in 64-bit mode Use TImode for piecewise move in 64-bit mode. We should use TImode in 32-bit mode and use OImode or XImode if they are available. But since by_pieces_ninsns determines the widest mode with MAX_FIXED_MODE_SIZE, we can only use TImode in 64-bit mode. gcc/ * config/i386/i386.h (MOVE_MAX_PIECES): Use TImode in 64-bit mode if unaligned SSE load and store are optimal. gcc/testsuite/ * gcc.target/i386/pieces-memcpy-1.c: New test. * gcc.target/i386/pieces-memcpy-2.c: Likewise. * gcc.target/i386/pieces-memcpy-3.c: Likewise. * gcc.target/i386/pieces-memcpy-4.c: Likewise. * gcc.target/i386/pieces-memcpy-5.c: Likewise. * gcc.target/i386/pieces-memcpy-6.c: Likewise. From-SVN: r239378 --- gcc/ChangeLog | 5 +++++ gcc/config/i386/i386.h | 14 ++++++++++++-- gcc/testsuite/ChangeLog | 9 +++++++++ gcc/testsuite/gcc.target/i386/pieces-memcpy-1.c | 13 +++++++++++++ gcc/testsuite/gcc.target/i386/pieces-memcpy-2.c | 13 +++++++++++++ gcc/testsuite/gcc.target/i386/pieces-memcpy-3.c | 13 +++++++++++++ gcc/testsuite/gcc.target/i386/pieces-memcpy-4.c | 13 +++++++++++++ gcc/testsuite/gcc.target/i386/pieces-memcpy-5.c | 13 +++++++++++++ gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c | 13 +++++++++++++ 9 files changed, 104 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-3.c create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-4.c create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-5.c create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c41c1252374..9ed3a74ef81 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2015-08-11 H.J. Lu + + * config/i386/i386.h (MOVE_MAX_PIECES): Use TImode in 64-bit + mode if unaligned SSE load and store are optimal. + 2016-08-11 Bernd Edlinger PR tree-optimization/71083 diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 9b662648f7f..8751143a2a6 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1950,8 +1950,18 @@ typedef struct ix86_args { /* MOVE_MAX_PIECES is the number of bytes at a time which we can move efficiently, as opposed to MOVE_MAX which is the maximum - number of bytes we can move with a single instruction. */ -#define MOVE_MAX_PIECES UNITS_PER_WORD + number of bytes we can move with a single instruction. + + ??? We should use TImode in 32-bit mode and use OImode or XImode + if they are available. But since by_pieces_ninsns determines the + widest mode with MAX_FIXED_MODE_SIZE, we can only use TImode in + 64-bit mode. */ +#define MOVE_MAX_PIECES \ + ((TARGET_64BIT \ + && TARGET_SSE2 \ + && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \ + && TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \ + ? GET_MODE_SIZE (TImode) : UNITS_PER_WORD) /* If a memory-to-memory move would take MOVE_RATIO or more simple move-instruction pairs, we will do a movmem or libcall instead. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index db8a62e3c69..c2482f5dce2 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,12 @@ +2015-08-11 H.J. Lu + + * gcc.target/i386/pieces-memcpy-1.c: New test. + * gcc.target/i386/pieces-memcpy-2.c: Likewise. + * gcc.target/i386/pieces-memcpy-3.c: Likewise. + * gcc.target/i386/pieces-memcpy-4.c: Likewise. + * gcc.target/i386/pieces-memcpy-5.c: Likewise. + * gcc.target/i386/pieces-memcpy-6.c: Likewise. + 2016-08-11 Jakub Jelinek PR c/72816 diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-1.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-1.c new file mode 100644 index 00000000000..22202c26f8c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-1.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */ + +extern char *dst, *src; + +void +foo (void) +{ + __builtin_memcpy (dst, src, 64); +} + +/* { dg-final { scan-assembler-times "movdqu\[ \\t\]+\[^\n\]*%xmm" 4 } } */ +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-2.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-2.c new file mode 100644 index 00000000000..bc4f05b86c5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-2.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */ + +extern char *dst, *src; + +void +foo (void) +{ + __builtin_memcpy (dst, src, 33); +} + +/* { dg-final { scan-assembler-times "movdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */ +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-3.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-3.c new file mode 100644 index 00000000000..84d6676fca0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-3.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */ + +extern char *dst, *src; + +void +foo (void) +{ + __builtin_memcpy (dst, src, 17); +} + +/* { dg-final { scan-assembler-times "movdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */ +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-4.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-4.c new file mode 100644 index 00000000000..64e8921abe2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-4.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */ + +extern char *dst, *src; + +void +foo (void) +{ + __builtin_memcpy (dst, src, 18); +} + +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */ +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-5.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-5.c new file mode 100644 index 00000000000..3c464c32f8e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-5.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512f -mtune=generic" } */ + +extern char *dst, *src; + +void +foo (void) +{ + __builtin_memcpy (dst, src, 19); +} + +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */ +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c new file mode 100644 index 00000000000..cdb00e05bc1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ + +extern char *dst, *src; + +void +foo (void) +{ + __builtin_memcpy (dst, src, 33); +} + +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */ +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */ -- 2.30.2