From 5fd816e6509bdd5fcf9f347a8e2e9830c4e3defe Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 16 Jun 2015 19:14:00 +0200 Subject: [PATCH] re PR target/56766 (Fails to combine (vec_select (vec_concat ...)) to (vec_merge ...)) PR target/56776 * config/i386/sse.md (*avx_addsubv4df3_1): New insn pattern. (*avx_addsubv4df3_1s): Ditto. (*sse3_addsubv2df3_1): Ditto. (*sse3_addsubv2df3_1s): Ditto. (*avx_addsubv8sf3_1): Ditto. (*avx_addsubv8sf3_1s): Ditto. (*sse3_addsubv4sf3_1): Ditto. (*sse3_addsubv4sf3_1s): Ditto. testsuite/ChangeLog: PR target/56776 * gcc.target/i386/pr56776-1.c: New test. * gcc.target/i386/pr56776-2.c: Ditto. From-SVN: r224527 --- gcc/config/i386/sse.md | 146 ++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr56766-1.c | 42 +++++++ gcc/testsuite/gcc.target/i386/pr56766-2.c | 40 ++++++ 3 files changed, 228 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr56766-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr56766-2.c diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 4ef51d66803..d1277ca8a9d 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -2032,6 +2032,38 @@ (set_attr "prefix" "vex") (set_attr "mode" "V4DF")]) +(define_insn "*avx_addsubv4df3_1" + [(set (match_operand:V4DF 0 "register_operand" "=x") + (vec_select:V4DF + (vec_concat:V8DF + (minus:V4DF + (match_operand:V4DF 1 "register_operand" "x") + (match_operand:V4DF 2 "nonimmediate_operand" "xm")) + (plus:V4DF (match_dup 1) (match_dup 2))) + (parallel [(const_int 0) (const_int 5) + (const_int 2) (const_int 7)])))] + "TARGET_AVX" + "vaddsubpd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseadd") + (set_attr "prefix" "vex") + (set_attr "mode" "V4DF")]) + +(define_insn "*avx_addsubv4df3_1s" + [(set (match_operand:V4DF 0 "register_operand" "=x") + (vec_select:V4DF + (vec_concat:V8DF + (minus:V4DF + (match_operand:V4DF 1 "register_operand" "x") + (match_operand:V4DF 2 "nonimmediate_operand" "xm")) + (plus:V4DF (match_dup 2) (match_dup 1))) + (parallel [(const_int 0) (const_int 5) + (const_int 2) (const_int 7)])))] + "TARGET_AVX" + "vaddsubpd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseadd") + (set_attr "prefix" "vex") + (set_attr "mode" "V4DF")]) + (define_insn "sse3_addsubv2df3" [(set (match_operand:V2DF 0 "register_operand" "=x,x") (vec_merge:V2DF @@ -2050,6 +2082,44 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "V2DF")]) +(define_insn "*sse3_addsubv2df3_1" + [(set (match_operand:V2DF 0 "register_operand" "=x,x") + (vec_select:V2DF + (vec_concat:V4DF + (minus:V2DF + (match_operand:V2DF 1 "register_operand" "0,x") + (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")) + (plus:V2DF (match_dup 1) (match_dup 2))) + (parallel [(const_int 0) (const_int 3)])))] + "TARGET_SSE3" + "@ + addsubpd\t{%2, %0|%0, %2} + vaddsubpd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") + (set_attr "atom_unit" "complex") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "V2DF")]) + +(define_insn "*sse3_addsubv2df3_1s" + [(set (match_operand:V2DF 0 "register_operand" "=x,x") + (vec_select:V2DF + (vec_concat:V4DF + (minus:V2DF + (match_operand:V2DF 1 "register_operand" "0,x") + (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")) + (plus:V2DF (match_dup 2) (match_dup 1))) + (parallel [(const_int 0) (const_int 3)])))] + "TARGET_SSE3" + "@ + addsubpd\t{%2, %0|%0, %2} + vaddsubpd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") + (set_attr "atom_unit" "complex") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "V2DF")]) + (define_insn "avx_addsubv8sf3" [(set (match_operand:V8SF 0 "register_operand" "=x") (vec_merge:V8SF @@ -2064,6 +2134,42 @@ (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) +(define_insn "*avx_addsubv8sf3_1" + [(set (match_operand:V8SF 0 "register_operand" "=x") + (vec_select:V8SF + (vec_concat:V16SF + (minus:V8SF + (match_operand:V8SF 1 "register_operand" "x") + (match_operand:V8SF 2 "nonimmediate_operand" "xm")) + (plus:V8SF (match_dup 1) (match_dup 2))) + (parallel [(const_int 0) (const_int 9) + (const_int 2) (const_int 11) + (const_int 4) (const_int 13) + (const_int 6) (const_int 15)])))] + "TARGET_AVX" + "vaddsubps\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseadd") + (set_attr "prefix" "vex") + (set_attr "mode" "V8SF")]) + +(define_insn "*avx_addsubv8sf3_1s" + [(set (match_operand:V8SF 0 "register_operand" "=x") + (vec_select:V8SF + (vec_concat:V16SF + (minus:V8SF + (match_operand:V8SF 1 "register_operand" "x") + (match_operand:V8SF 2 "nonimmediate_operand" "xm")) + (plus:V8SF (match_dup 2) (match_dup 1))) + (parallel [(const_int 0) (const_int 9) + (const_int 2) (const_int 11) + (const_int 4) (const_int 13) + (const_int 6) (const_int 15)])))] + "TARGET_AVX" + "vaddsubps\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseadd") + (set_attr "prefix" "vex") + (set_attr "mode" "V8SF")]) + (define_insn "sse3_addsubv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x,x") (vec_merge:V4SF @@ -2082,6 +2188,46 @@ (set_attr "prefix_rep" "1,*") (set_attr "mode" "V4SF")]) +(define_insn "*sse3_addsubv4sf3_1" + [(set (match_operand:V4SF 0 "register_operand" "=x,x") + (vec_select:V4SF + (vec_concat:V8SF + (minus:V4SF + (match_operand:V4SF 1 "register_operand" "0,x") + (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")) + (plus:V4SF (match_dup 1) (match_dup 2))) + (parallel [(const_int 0) (const_int 5) + (const_int 2) (const_int 7)])))] + "TARGET_SSE3" + "@ + addsubps\t{%2, %0|%0, %2} + vaddsubps\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") + (set_attr "prefix" "orig,vex") + (set_attr "prefix_rep" "1,*") + (set_attr "mode" "V4SF")]) + +(define_insn "*sse3_addsubv4sf3_1s" + [(set (match_operand:V4SF 0 "register_operand" "=x,x") + (vec_select:V4SF + (vec_concat:V8SF + (minus:V4SF + (match_operand:V4SF 1 "register_operand" "0,x") + (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")) + (plus:V4SF (match_dup 2) (match_dup 1))) + (parallel [(const_int 0) (const_int 5) + (const_int 2) (const_int 7)])))] + "TARGET_SSE3" + "@ + addsubps\t{%2, %0|%0, %2} + vaddsubps\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") + (set_attr "prefix" "orig,vex") + (set_attr "prefix_rep" "1,*") + (set_attr "mode" "V4SF")]) + (define_insn "avx_hv4df3" [(set (match_operand:V4DF 0 "register_operand" "=x") (vec_concat:V4DF diff --git a/gcc/testsuite/gcc.target/i386/pr56766-1.c b/gcc/testsuite/gcc.target/i386/pr56766-1.c new file mode 100644 index 00000000000..dff489f4f4d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr56766-1.c @@ -0,0 +1,42 @@ +/* PR target/56766 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx" } */ + +typedef float v4sf __attribute__((vector_size(16))); +typedef int v4si __attribute__((vector_size(16))); +v4sf foo_v4sf (v4sf x, v4sf y) +{ + v4sf tem0 = x - y; + v4sf tem1 = x + y; + return __builtin_shuffle (tem0, tem1, (v4si) { 0, 5, 2, 7 }); +} + +typedef float v8sf __attribute__((vector_size(32))); +typedef int v8si __attribute__((vector_size(32))); +v8sf foo_v8sf (v8sf x, v8sf y) +{ + v8sf tem0 = x - y; + v8sf tem1 = x + y; + return __builtin_shuffle (tem0, tem1, (v8si) { 0, 9, 2, 11, 4, 13, 6, 15 }); +} + +typedef double v2df __attribute__((vector_size(16))); +typedef long long v2di __attribute__((vector_size(16))); +v2df foo_v2df (v2df x, v2df y) +{ + v2df tem0 = x - y; + v2df tem1 = x + y; + return __builtin_shuffle (tem0, tem1, (v2di) { 0, 3 }); +} + +typedef double v4df __attribute__((vector_size(32))); +typedef long long v4di __attribute__((vector_size(32))); +v4df foo_v4df (v4df x, v4df y) +{ + v4df tem0 = x - y; + v4df tem1 = x + y; + return __builtin_shuffle (tem0, tem1, (v4di) { 0, 5, 2, 7 }); +} + +/* { dg-final { scan-assembler-times "vaddsubps" 2 } } */ +/* { dg-final { scan-assembler-times "vaddsubpd" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr56766-2.c b/gcc/testsuite/gcc.target/i386/pr56766-2.c new file mode 100644 index 00000000000..fa3a706caeb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr56766-2.c @@ -0,0 +1,40 @@ +/* PR target/56766 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model -mavx" } */ + +void test_v4sf (float * __restrict__ p, float * __restrict q) +{ + p[0] = p[0] - q[0]; + p[1] = p[1] + q[1]; + p[2] = p[2] - q[2]; + p[3] = p[3] + q[3]; +} + +void test_v8sf (float * __restrict__ p, float * __restrict q) +{ + p[0] = p[0] - q[0]; + p[1] = p[1] + q[1]; + p[2] = p[2] - q[2]; + p[3] = p[3] + q[3]; + p[4] = p[4] - q[4]; + p[5] = p[5] + q[5]; + p[6] = p[6] - q[6]; + p[7] = p[7] + q[7]; +} + +void test_v2df (double * __restrict__ p, double * __restrict q) +{ + p[0] = p[0] - q[0]; + p[1] = p[1] + q[1]; +} + +void test_v4df (double * __restrict__ p, double * __restrict q) +{ + p[0] = p[0] - q[0]; + p[1] = p[1] + q[1]; + p[2] = p[2] - q[2]; + p[3] = p[3] + q[3]; +} + +/* { dg-final { scan-assembler-times "vaddsubps" 2 } } */ +/* { dg-final { scan-assembler-times "vaddsubpd" 2 } } */ -- 2.30.2