From 7121e32beac871f5fcd1a141c23844a41303efc5 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 23 Jun 2015 10:50:12 +0200 Subject: [PATCH] re PR target/66560 (Fails to generate ADDSUBPS) PR target/66560 * config/i386/predicates.md (addsub_vm_operator): New predicate. (addsub_vs_operator): Ditto. (addsub_vs_parallel): Ditto. * config/i386/sse.md (ssedoublemode): Add V4SF and V2DF modes. (avx_addsubv4df3, avx_addsubv8sf3, sse3_addsubv2df3, sse3_addsubv4sf3): Put minus RTX before plus and adjust vec_merge selector. (*avx_addsubv4df3_1, *avx_addsubv4df3_1s, *sse3_addsubv2df3_1) (*sse_addsubv2df3_1s, *avx_addsubv8sf3_1, *avx_addsubv8sf3_1s) (*sse3_addsubv4sf3_1, *sse_addsubv4sf3_1s): Remove insn patterns. (addsub vec_merge splitters): New combiner splitters. (addsub vec_select/vec_concat splitters): Ditto. testsuite/ChangeLog: PR target/66560 * gcc.target/i386/pr66560-1.c: New test. * gcc.target/i386/pr66560-2.c: Ditto. * gcc.target/i386/pr66560-3.c: Ditto. * gcc.target/i386/pr66560-4.c: Ditto. From-SVN: r224824 --- gcc/ChangeLog | 15 ++ gcc/config/i386/predicates.md | 99 +++++++- gcc/config/i386/sse.md | 294 ++++++++++------------ gcc/testsuite/ChangeLog | 8 + gcc/testsuite/gcc.target/i386/pr66560-1.c | 35 +++ gcc/testsuite/gcc.target/i386/pr66560-2.c | 35 +++ gcc/testsuite/gcc.target/i386/pr66560-3.c | 35 +++ gcc/testsuite/gcc.target/i386/pr66560-4.c | 35 +++ 8 files changed, 395 insertions(+), 161 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr66560-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr66560-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr66560-3.c create mode 100644 gcc/testsuite/gcc.target/i386/pr66560-4.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index cd7074e5f15..b514755f19b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2015-06-23 Uros Bizjak + + PR target/66560 + * config/i386/predicates.md (addsub_vm_operator): New predicate. + (addsub_vs_operator): Ditto. + (addsub_vs_parallel): Ditto. + * config/i386/sse.md (ssedoublemode): Add V4SF and V2DF modes. + (avx_addsubv4df3, avx_addsubv8sf3, sse3_addsubv2df3, sse3_addsubv4sf3): + Put minus RTX before plus and adjust vec_merge selector. + (*avx_addsubv4df3_1, *avx_addsubv4df3_1s, *sse3_addsubv2df3_1) + (*sse_addsubv2df3_1s, *avx_addsubv8sf3_1, *avx_addsubv8sf3_1s) + (*sse3_addsubv4sf3_1, *sse_addsubv4sf3_1s): Remove insn patterns. + (addsub vec_merge splitters): New combiner splitters. + (addsub vec_select/vec_concat splitters): Ditto. + 2015-06-23 Bin Cheng PR tree-optimization/66449 diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index b7bb84fe0ee..4e45246e069 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1426,8 +1426,105 @@ (and (match_code "unspec_volatile") (match_test "XINT (op, 1) == UNSPECV_VZEROUPPER"))) -;; Return true if OP is a parallel for a vbroadcast permute. +;; Return true if OP is an addsub vec_merge operation +(define_predicate "addsub_vm_operator" + (match_code "vec_merge") +{ + rtx op0, op1; + int swapped; + HOST_WIDE_INT mask; + int nunits, elt; + + op0 = XEXP (op, 0); + op1 = XEXP (op, 1); + + /* Sanity check. */ + if (GET_CODE (op0) == MINUS && GET_CODE (op1) == PLUS) + swapped = 0; + else if (GET_CODE (op0) == PLUS && GET_CODE (op1) == MINUS) + swapped = 1; + else + gcc_unreachable (); + + mask = INTVAL (XEXP (op, 2)); + nunits = GET_MODE_NUNITS (mode); + + for (elt = 0; elt < nunits; elt++) + { + /* bit clear: take from op0, set: take from op1 */ + int bit = !(mask & (HOST_WIDE_INT_1U << elt)); + + if (bit != ((elt & 1) ^ swapped)) + return false; + } + + return true; +}) + +;; Return true if OP is an addsub vec_select/vec_concat operation +(define_predicate "addsub_vs_operator" + (and (match_code "vec_select") + (match_code "vec_concat" "0")) +{ + rtx op0, op1; + bool swapped; + int nunits, elt; + + op0 = XEXP (XEXP (op, 0), 0); + op1 = XEXP (XEXP (op, 0), 1); + + /* Sanity check. */ + if (GET_CODE (op0) == MINUS && GET_CODE (op1) == PLUS) + swapped = false; + else if (GET_CODE (op0) == PLUS && GET_CODE (op1) == MINUS) + swapped = true; + else + gcc_unreachable (); + + nunits = GET_MODE_NUNITS (mode); + if (XVECLEN (XEXP (op, 1), 0) != nunits) + return false; + + /* We already checked that permutation is suitable for addsub, + so only look at the first element of the parallel. */ + elt = INTVAL (XVECEXP (XEXP (op, 1), 0, 0)); + return elt == (swapped ? nunits : 0); +}) + +;; Return true if OP is a parallel for an addsub vec_select. +(define_predicate "addsub_vs_parallel" + (and (match_code "parallel") + (match_code "const_int" "a")) +{ + int nelt = XVECLEN (op, 0); + int elt, i; + + if (nelt < 2) + return false; + + /* Check that the permutation is suitable for addsub. + For example, { 0 9 2 11 4 13 6 15 } or { 8 1 10 3 12 5 14 7 }. */ + elt = INTVAL (XVECEXP (op, 0, 0)); + if (elt == 0) + { + for (i = 1; i < nelt; ++i) + if (INTVAL (XVECEXP (op, 0, i)) != (i + (i & 1) * nelt)) + return false; + } + else if (elt == nelt) + { + for (i = 1; i < nelt; ++i) + if (INTVAL (XVECEXP (op, 0, i)) != (elt + i - (i & 1) * nelt)) + return false; + } + else + return false; + + return true; +}) + +;; Return true if OP is a parallel for a vbroadcast permute. (define_predicate "avx_vbroadcast_operand" (and (match_code "parallel") (match_code "const_int" "a")) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index d1277ca8a9d..9c958167451 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -487,10 +487,12 @@ (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")]) (define_mode_attr ssedoublemode - [(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF") - (V8SF "V16SF") (V8SI "V16SI") (V4DI "V8DI") (V4DF "V8DF") - (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI") (V4SI "V4DI") - (V32HI "V32SI") (V32QI "V32HI") (V16QI "V16HI") (V64QI "V64HI")]) + [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF") + (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF") + (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI") + (V4HI "V4SI") (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI") + (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI") + (V4DI "V8DI") (V8DI "V16DI")]) (define_mode_attr ssebytemode [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")]) @@ -2021,43 +2023,11 @@ (define_insn "avx_addsubv4df3" [(set (match_operand:V4DF 0 "register_operand" "=x") (vec_merge:V4DF - (plus:V4DF + (minus:V4DF (match_operand:V4DF 1 "register_operand" "x") (match_operand:V4DF 2 "nonimmediate_operand" "xm")) - (minus:V4DF (match_dup 1) (match_dup 2)) - (const_int 10)))] - "TARGET_AVX" - "vaddsubpd\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseadd") - (set_attr "prefix" "vex") - (set_attr "mode" "V4DF")]) - -(define_insn "*avx_addsubv4df3_1" - [(set (match_operand:V4DF 0 "register_operand" "=x") - (vec_select:V4DF - (vec_concat:V8DF - (minus:V4DF - (match_operand:V4DF 1 "register_operand" "x") - (match_operand:V4DF 2 "nonimmediate_operand" "xm")) - (plus:V4DF (match_dup 1) (match_dup 2))) - (parallel [(const_int 0) (const_int 5) - (const_int 2) (const_int 7)])))] - "TARGET_AVX" - "vaddsubpd\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseadd") - (set_attr "prefix" "vex") - (set_attr "mode" "V4DF")]) - -(define_insn "*avx_addsubv4df3_1s" - [(set (match_operand:V4DF 0 "register_operand" "=x") - (vec_select:V4DF - (vec_concat:V8DF - (minus:V4DF - (match_operand:V4DF 1 "register_operand" "x") - (match_operand:V4DF 2 "nonimmediate_operand" "xm")) - (plus:V4DF (match_dup 2) (match_dup 1))) - (parallel [(const_int 0) (const_int 5) - (const_int 2) (const_int 7)])))] + (plus:V4DF (match_dup 1) (match_dup 2)) + (const_int 5)))] "TARGET_AVX" "vaddsubpd\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseadd") @@ -2067,49 +2037,11 @@ (define_insn "sse3_addsubv2df3" [(set (match_operand:V2DF 0 "register_operand" "=x,x") (vec_merge:V2DF - (plus:V2DF + (minus:V2DF (match_operand:V2DF 1 "register_operand" "0,x") (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")) - (minus:V2DF (match_dup 1) (match_dup 2)) - (const_int 2)))] - "TARGET_SSE3" - "@ - addsubpd\t{%2, %0|%0, %2} - vaddsubpd\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") - (set_attr "type" "sseadd") - (set_attr "atom_unit" "complex") - (set_attr "prefix" "orig,vex") - (set_attr "mode" "V2DF")]) - -(define_insn "*sse3_addsubv2df3_1" - [(set (match_operand:V2DF 0 "register_operand" "=x,x") - (vec_select:V2DF - (vec_concat:V4DF - (minus:V2DF - (match_operand:V2DF 1 "register_operand" "0,x") - (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")) - (plus:V2DF (match_dup 1) (match_dup 2))) - (parallel [(const_int 0) (const_int 3)])))] - "TARGET_SSE3" - "@ - addsubpd\t{%2, %0|%0, %2} - vaddsubpd\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") - (set_attr "type" "sseadd") - (set_attr "atom_unit" "complex") - (set_attr "prefix" "orig,vex") - (set_attr "mode" "V2DF")]) - -(define_insn "*sse3_addsubv2df3_1s" - [(set (match_operand:V2DF 0 "register_operand" "=x,x") - (vec_select:V2DF - (vec_concat:V4DF - (minus:V2DF - (match_operand:V2DF 1 "register_operand" "0,x") - (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")) - (plus:V2DF (match_dup 2) (match_dup 1))) - (parallel [(const_int 0) (const_int 3)])))] + (plus:V2DF (match_dup 1) (match_dup 2)) + (const_int 1)))] "TARGET_SSE3" "@ addsubpd\t{%2, %0|%0, %2} @@ -2123,47 +2055,11 @@ (define_insn "avx_addsubv8sf3" [(set (match_operand:V8SF 0 "register_operand" "=x") (vec_merge:V8SF - (plus:V8SF + (minus:V8SF (match_operand:V8SF 1 "register_operand" "x") (match_operand:V8SF 2 "nonimmediate_operand" "xm")) - (minus:V8SF (match_dup 1) (match_dup 2)) - (const_int 170)))] - "TARGET_AVX" - "vaddsubps\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseadd") - (set_attr "prefix" "vex") - (set_attr "mode" "V8SF")]) - -(define_insn "*avx_addsubv8sf3_1" - [(set (match_operand:V8SF 0 "register_operand" "=x") - (vec_select:V8SF - (vec_concat:V16SF - (minus:V8SF - (match_operand:V8SF 1 "register_operand" "x") - (match_operand:V8SF 2 "nonimmediate_operand" "xm")) - (plus:V8SF (match_dup 1) (match_dup 2))) - (parallel [(const_int 0) (const_int 9) - (const_int 2) (const_int 11) - (const_int 4) (const_int 13) - (const_int 6) (const_int 15)])))] - "TARGET_AVX" - "vaddsubps\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseadd") - (set_attr "prefix" "vex") - (set_attr "mode" "V8SF")]) - -(define_insn "*avx_addsubv8sf3_1s" - [(set (match_operand:V8SF 0 "register_operand" "=x") - (vec_select:V8SF - (vec_concat:V16SF - (minus:V8SF - (match_operand:V8SF 1 "register_operand" "x") - (match_operand:V8SF 2 "nonimmediate_operand" "xm")) - (plus:V8SF (match_dup 2) (match_dup 1))) - (parallel [(const_int 0) (const_int 9) - (const_int 2) (const_int 11) - (const_int 4) (const_int 13) - (const_int 6) (const_int 15)])))] + (plus:V8SF (match_dup 1) (match_dup 2)) + (const_int 85)))] "TARGET_AVX" "vaddsubps\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseadd") @@ -2173,11 +2069,11 @@ (define_insn "sse3_addsubv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x,x") (vec_merge:V4SF - (plus:V4SF + (minus:V4SF (match_operand:V4SF 1 "register_operand" "0,x") (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")) - (minus:V4SF (match_dup 1) (match_dup 2)) - (const_int 10)))] + (plus:V4SF (match_dup 1) (match_dup 2)) + (const_int 5)))] "TARGET_SSE3" "@ addsubps\t{%2, %0|%0, %2} @@ -2188,45 +2084,123 @@ (set_attr "prefix_rep" "1,*") (set_attr "mode" "V4SF")]) -(define_insn "*sse3_addsubv4sf3_1" - [(set (match_operand:V4SF 0 "register_operand" "=x,x") - (vec_select:V4SF - (vec_concat:V8SF - (minus:V4SF - (match_operand:V4SF 1 "register_operand" "0,x") - (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")) - (plus:V4SF (match_dup 1) (match_dup 2))) - (parallel [(const_int 0) (const_int 5) - (const_int 2) (const_int 7)])))] - "TARGET_SSE3" - "@ - addsubps\t{%2, %0|%0, %2} - vaddsubps\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") - (set_attr "type" "sseadd") - (set_attr "prefix" "orig,vex") - (set_attr "prefix_rep" "1,*") - (set_attr "mode" "V4SF")]) +(define_split + [(set (match_operand:VF_128_256 0 "register_operand") + (match_operator:VF_128_256 6 "addsub_vm_operator" + [(minus:VF_128_256 + (match_operand:VF_128_256 1 "register_operand") + (match_operand:VF_128_256 2 "nonimmediate_operand")) + (plus:VF_128_256 + (match_operand:VF_128_256 3 "nonimmediate_operand") + (match_operand:VF_128_256 4 "nonimmediate_operand")) + (match_operand 5 "const_int_operand")]))] + "TARGET_SSE3 + && can_create_pseudo_p () + && ((rtx_equal_p (operands[1], operands[3]) + && rtx_equal_p (operands[2], operands[4])) + || (rtx_equal_p (operands[1], operands[4]) + && rtx_equal_p (operands[2], operands[3])))" + [(set (match_dup 0) + (vec_merge:VF_128_256 + (minus:VF_128_256 (match_dup 1) (match_dup 2)) + (plus:VF_128_256 (match_dup 1) (match_dup 2)) + (match_dup 5)))]) -(define_insn "*sse3_addsubv4sf3_1s" - [(set (match_operand:V4SF 0 "register_operand" "=x,x") - (vec_select:V4SF - (vec_concat:V8SF - (minus:V4SF - (match_operand:V4SF 1 "register_operand" "0,x") - (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")) - (plus:V4SF (match_dup 2) (match_dup 1))) - (parallel [(const_int 0) (const_int 5) - (const_int 2) (const_int 7)])))] - "TARGET_SSE3" - "@ - addsubps\t{%2, %0|%0, %2} - vaddsubps\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") - (set_attr "type" "sseadd") - (set_attr "prefix" "orig,vex") - (set_attr "prefix_rep" "1,*") - (set_attr "mode" "V4SF")]) +(define_split + [(set (match_operand:VF_128_256 0 "register_operand") + (match_operator:VF_128_256 6 "addsub_vm_operator" + [(plus:VF_128_256 + (match_operand:VF_128_256 1 "nonimmediate_operand") + (match_operand:VF_128_256 2 "nonimmediate_operand")) + (minus:VF_128_256 + (match_operand:VF_128_256 3 "register_operand") + (match_operand:VF_128_256 4 "nonimmediate_operand")) + (match_operand 5 "const_int_operand")]))] + "TARGET_SSE3 + && can_create_pseudo_p () + && ((rtx_equal_p (operands[1], operands[3]) + && rtx_equal_p (operands[2], operands[4])) + || (rtx_equal_p (operands[1], operands[4]) + && rtx_equal_p (operands[2], operands[3])))" + [(set (match_dup 0) + (vec_merge:VF_128_256 + (minus:VF_128_256 (match_dup 3) (match_dup 4)) + (plus:VF_128_256 (match_dup 3) (match_dup 4)) + (match_dup 5)))] +{ + /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes. */ + operands[5] + = GEN_INT (~INTVAL (operands[5]) + & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (mode)) - 1)); +}) + +(define_split + [(set (match_operand:VF_128_256 0 "register_operand") + (match_operator:VF_128_256 7 "addsub_vs_operator" + [(vec_concat: + (minus:VF_128_256 + (match_operand:VF_128_256 1 "register_operand") + (match_operand:VF_128_256 2 "nonimmediate_operand")) + (plus:VF_128_256 + (match_operand:VF_128_256 3 "nonimmediate_operand") + (match_operand:VF_128_256 4 "nonimmediate_operand"))) + (match_parallel 5 "addsub_vs_parallel" + [(match_operand 6 "const_int_operand")])]))] + "TARGET_SSE3 + && can_create_pseudo_p () + && ((rtx_equal_p (operands[1], operands[3]) + && rtx_equal_p (operands[2], operands[4])) + || (rtx_equal_p (operands[1], operands[4]) + && rtx_equal_p (operands[2], operands[3])))" + [(set (match_dup 0) + (vec_merge:VF_128_256 + (minus:VF_128_256 (match_dup 1) (match_dup 2)) + (plus:VF_128_256 (match_dup 1) (match_dup 2)) + (match_dup 5)))] +{ + int i, nelt = XVECLEN (operands[5], 0); + HOST_WIDE_INT ival = 0; + + for (i = 0; i < nelt; i++) + if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (mode)) + ival |= HOST_WIDE_INT_1 << i; + + operands[5] = GEN_INT (ival); +}) + +(define_split + [(set (match_operand:VF_128_256 0 "register_operand") + (match_operator:VF_128_256 7 "addsub_vs_operator" + [(vec_concat: + (plus:VF_128_256 + (match_operand:VF_128_256 1 "nonimmediate_operand") + (match_operand:VF_128_256 2 "nonimmediate_operand")) + (minus:VF_128_256 + (match_operand:VF_128_256 3 "register_operand") + (match_operand:VF_128_256 4 "nonimmediate_operand"))) + (match_parallel 5 "addsub_vs_parallel" + [(match_operand 6 "const_int_operand")])]))] + "TARGET_SSE3 + && can_create_pseudo_p () + && ((rtx_equal_p (operands[1], operands[3]) + && rtx_equal_p (operands[2], operands[4])) + || (rtx_equal_p (operands[1], operands[4]) + && rtx_equal_p (operands[2], operands[3])))" + [(set (match_dup 0) + (vec_merge:VF_128_256 + (minus:VF_128_256 (match_dup 3) (match_dup 4)) + (plus:VF_128_256 (match_dup 3) (match_dup 4)) + (match_dup 5)))] +{ + int i, nelt = XVECLEN (operands[5], 0); + HOST_WIDE_INT ival = 0; + + for (i = 0; i < nelt; i++) + if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (mode)) + ival |= HOST_WIDE_INT_1 << i; + + operands[5] = GEN_INT (ival); +}) (define_insn "avx_hv4df3" [(set (match_operand:V4DF 0 "register_operand" "=x") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f17ae0db623..8e2ab43089e 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2015-06-23 Uros Bizjak + + PR target/66560 + * gcc.target/i386/pr66560-1.c: New test. + * gcc.target/i386/pr66560-2.c: Ditto. + * gcc.target/i386/pr66560-3.c: Ditto. + * gcc.target/i386/pr66560-4.c: Ditto. + 2015-06-23 Thomas Schwinge * gcc.target/nvptx/nvptx.exp: New file. diff --git a/gcc/testsuite/gcc.target/i386/pr66560-1.c b/gcc/testsuite/gcc.target/i386/pr66560-1.c new file mode 100644 index 00000000000..b535ca7d7b3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr66560-1.c @@ -0,0 +1,35 @@ +/* PR target/66560 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4" } */ + +typedef float v4sf __attribute__((vector_size(16))); +typedef int v4si __attribute__((vector_size(16))); +v4sf foo1 (v4sf x, v4sf y) +{ + v4sf tem0 = x - y; + v4sf tem1 = x + y; + return __builtin_shuffle (tem0, tem1, (v4si) { 0, 5, 2, 7 }); +} + +v4sf foo2 (v4sf x, v4sf y) +{ + v4sf tem0 = x - y; + v4sf tem1 = y + x; + return __builtin_shuffle (tem0, tem1, (v4si) { 0, 5, 2, 7 }); +} + +v4sf foo3 (v4sf x, v4sf y) +{ + v4sf tem0 = x + y; + v4sf tem1 = x - y; + return __builtin_shuffle (tem0, tem1, (v4si) { 4, 1, 6, 3 }); +} + +v4sf foo4 (v4sf x, v4sf y) +{ + v4sf tem0 = y + x; + v4sf tem1 = x - y; + return __builtin_shuffle (tem0, tem1, (v4si) { 4, 1, 6, 3 }); +} + +/* { dg-final { scan-assembler-times "addsubps" 4 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr66560-2.c b/gcc/testsuite/gcc.target/i386/pr66560-2.c new file mode 100644 index 00000000000..c308f3d19fb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr66560-2.c @@ -0,0 +1,35 @@ +/* PR target/66560 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4" } */ + +typedef double v2df __attribute__((vector_size(16))); +typedef long long v2di __attribute__((vector_size(16))); +v2df foo1 (v2df x, v2df y) +{ + v2df tem0 = x - y; + v2df tem1 = x + y; + return __builtin_shuffle (tem0, tem1, (v2di) { 0, 3 }); +} + +v2df foo2 (v2df x, v2df y) +{ + v2df tem0 = x - y; + v2df tem1 = y + x; + return __builtin_shuffle (tem0, tem1, (v2di) { 0, 3 }); +} + +v2df foo3 (v2df x, v2df y) +{ + v2df tem0 = x + y; + v2df tem1 = x - y; + return __builtin_shuffle (tem0, tem1, (v2di) { 2, 1 }); +} + +v2df foo4 (v2df x, v2df y) +{ + v2df tem0 = y + x; + v2df tem1 = x - y; + return __builtin_shuffle (tem0, tem1, (v2di) { 2, 1 }); +} + +/* { dg-final { scan-assembler-times "addsubpd" 4 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr66560-3.c b/gcc/testsuite/gcc.target/i386/pr66560-3.c new file mode 100644 index 00000000000..22f19d5ff13 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr66560-3.c @@ -0,0 +1,35 @@ +/* PR target/66560 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx" } */ + +typedef float v8sf __attribute__((vector_size(32))); +typedef int v8si __attribute__((vector_size(32))); +v8sf foo1 (v8sf x, v8sf y) +{ + v8sf tem0 = x - y; + v8sf tem1 = x + y; + return __builtin_shuffle (tem0, tem1, (v8si) { 0, 9, 2, 11, 4, 13, 6, 15 }); +} + +v8sf foo2 (v8sf x, v8sf y) +{ + v8sf tem0 = x - y; + v8sf tem1 = y + x; + return __builtin_shuffle (tem0, tem1, (v8si) { 0, 9, 2, 11, 4, 13, 6, 15 }); +} + +v8sf foo3 (v8sf x, v8sf y) +{ + v8sf tem0 = x + y; + v8sf tem1 = x - y; + return __builtin_shuffle (tem0, tem1, (v8si) { 8, 1, 10, 3, 12, 5, 14, 7 }); +} + +v8sf foo4 (v8sf x, v8sf y) +{ + v8sf tem0 = y + x; + v8sf tem1 = x - y; + return __builtin_shuffle (tem0, tem1, (v8si) { 8, 1, 10, 3, 12, 5, 14, 7 }); +} + +/* { dg-final { scan-assembler-times "vaddsubps" 4 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr66560-4.c b/gcc/testsuite/gcc.target/i386/pr66560-4.c new file mode 100644 index 00000000000..a8a6e907f95 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr66560-4.c @@ -0,0 +1,35 @@ +/* PR target/66560 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx" } */ + +typedef double v4df __attribute__((vector_size(32))); +typedef long long v4di __attribute__((vector_size(32))); +v4df foo1 (v4df x, v4df y) +{ + v4df tem0 = x - y; + v4df tem1 = x + y; + return __builtin_shuffle (tem0, tem1, (v4di) { 0, 5, 2, 7 }); +} + +v4df foo2 (v4df x, v4df y) +{ + v4df tem0 = x - y; + v4df tem1 = y + x; + return __builtin_shuffle (tem0, tem1, (v4di) { 0, 5, 2, 7 }); +} + +v4df foo3 (v4df x, v4df y) +{ + v4df tem0 = x + y; + v4df tem1 = x - y; + return __builtin_shuffle (tem0, tem1, (v4di) { 4, 1, 6, 3 }); +} + +v4df foo4 (v4df x, v4df y) +{ + v4df tem0 = y + x; + v4df tem1 = x - y; + return __builtin_shuffle (tem0, tem1, (v4di) { 4, 1, 6, 3 }); +} + +/* { dg-final { scan-assembler-times "vaddsubpd" 4 } } */ -- 2.30.2