From 107192f73ec949170ac5c65e52cf85c93be2f07a Mon Sep 17 00:00:00 2001 From: Allan Sandfeld Jensen Date: Wed, 15 Aug 2018 10:33:12 +0600 Subject: [PATCH] i386.c (expand_vec_perm_movs): New method matching movs patterns. * config/i386/i386.c (expand_vec_perm_movs): New method matching movs patterns. (expand_vec_perm_1): Try the new method. * gcc.target/i386/sse2-movs.c: New test. From-SVN: r263549 --- gcc/ChangeLog | 6 ++++ gcc/config/i386/emmintrin.h | 2 +- gcc/config/i386/i386.c | 41 +++++++++++++++++++++++ gcc/config/i386/xmmintrin.h | 5 ++- gcc/testsuite/ChangeLog | 4 +++ gcc/testsuite/gcc.target/i386/sse2-movs.c | 21 ++++++++++++ 6 files changed, 77 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/sse2-movs.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 84f81f6c898..9dc9acd84c0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2018-08-14 Allan Sandfeld Jensen + + * config/i386/i386.c (expand_vec_perm_movs): New method matching movs + patterns. + (expand_vec_perm_1): Try the new method. + 2018-08-14 Ilya Leoshkevich PR target/86547 diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h index b940a39d27b..6501638f619 100644 --- a/gcc/config/i386/emmintrin.h +++ b/gcc/config/i386/emmintrin.h @@ -113,7 +113,7 @@ _mm_setzero_pd (void) extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_move_sd (__m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B); + return __extension__ (__m128d) __builtin_shuffle((__v2df)__A, (__v2df)__B, (__v2di){2, 1}); } /* Load two DPFP values from P. The address must be 16-byte aligned. */ diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 7554fd1f659..15a3caa94c3 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -46145,6 +46145,43 @@ expand_vselect_vconcat (rtx target, rtx op0, rtx op1, return ok; } +/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D + using movss or movsd. */ +static bool +expand_vec_perm_movs (struct expand_vec_perm_d *d) +{ + machine_mode vmode = d->vmode; + unsigned i, nelt = d->nelt; + rtx x; + + if (d->one_operand_p) + return false; + + if (TARGET_SSE2 && (vmode == V2DFmode || vmode == V4SFmode)) + ; + else + return false; + + /* Only the first element is changed. */ + if (d->perm[0] != nelt && d->perm[0] != 0) + return false; + for (i = 1; i < nelt; ++i) + if (d->perm[i] != i + nelt - d->perm[0]) + return false; + + if (d->testing_p) + return true; + + if (d->perm[0] == nelt) + x = gen_rtx_VEC_MERGE (vmode, d->op1, d->op0, GEN_INT (1)); + else + x = gen_rtx_VEC_MERGE (vmode, d->op0, d->op1, GEN_INT (1)); + + emit_insn (gen_rtx_SET (d->target, x)); + + return true; +} + /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */ @@ -46887,6 +46924,10 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) } } + /* Try movss/movsd instructions. */ + if (expand_vec_perm_movs (d)) + return true; + /* Finally, try the fully general two operand permute. */ if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt, d->testing_p)) diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h index f64f3f74a0b..f770570295c 100644 --- a/gcc/config/i386/xmmintrin.h +++ b/gcc/config/i386/xmmintrin.h @@ -1011,7 +1011,10 @@ _mm_storer_ps (float *__P, __m128 __A) extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_move_ss (__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_movss ((__v4sf)__A, (__v4sf)__B); + return (__m128) __builtin_shuffle ((__v4sf)__A, (__v4sf)__B, + __extension__ + (__attribute__((__vector_size__ (16))) int) + {4,1,2,3}); } /* Extracts one of the four words of A. The selector N must be immediate. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b456781c6f9..b8d76d0dde6 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2018-08-14 Allan Sandfeld Jensen + + * gcc.target/i386/sse2-movs.c: New test. + 2018-08-14 Martin Sebor PR tree-optimization/86650 diff --git a/gcc/testsuite/gcc.target/i386/sse2-movs.c b/gcc/testsuite/gcc.target/i386/sse2-movs.c new file mode 100644 index 00000000000..79f486cfa82 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-movs.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2" } */ +/* { dg-require-effective-target sse2 } */ +/* { dg-final { scan-assembler "movss" } } */ +/* { dg-final { scan-assembler "movsd" } } */ +/* { dg-final { scan-assembler-not "unpcklps" } } */ +/* { dg-final { scan-assembler-not "shufps" } } */ +/* { dg-final { scan-assembler-not "shufpd" } } */ + +typedef float v4sf __attribute__ ((vector_size (16))); +typedef double v2df __attribute__ ((vector_size (16))); + +v4sf movss(v4sf a, v4sf b) +{ + return (v4sf){b[0],a[1],a[2],a[3]}; +} + +v2df movsd(v2df a, v2df b) +{ + return (v2df){b[0],a[1]}; +} -- 2.30.2