(and (match_code "unspec_volatile")
(match_test "XINT (op, 1) == UNSPECV_VZEROUPPER")))
-;; Return true if OP is a parallel for a vbroadcast permute.
+;; Return true if OP is an addsub vec_merge operation
+(define_predicate "addsub_vm_operator"
+ (match_code "vec_merge")
+{
+ rtx op0, op1;
+ int swapped;
+ HOST_WIDE_INT mask;
+ int nunits, elt;
+
+ op0 = XEXP (op, 0);
+ op1 = XEXP (op, 1);
+
+ /* Sanity check. */
+ if (GET_CODE (op0) == MINUS && GET_CODE (op1) == PLUS)
+ swapped = 0;
+ else if (GET_CODE (op0) == PLUS && GET_CODE (op1) == MINUS)
+ swapped = 1;
+ else
+ gcc_unreachable ();
+
+ mask = INTVAL (XEXP (op, 2));
+ nunits = GET_MODE_NUNITS (mode);
+
+ for (elt = 0; elt < nunits; elt++)
+ {
+ /* bit clear: take from op0, set: take from op1 */
+ int bit = !(mask & (HOST_WIDE_INT_1U << elt));
+
+ if (bit != ((elt & 1) ^ swapped))
+ return false;
+ }
+
+ return true;
+})
+
+;; Return true if OP is an addsub vec_select/vec_concat operation
+(define_predicate "addsub_vs_operator"
+ (and (match_code "vec_select")
+ (match_code "vec_concat" "0"))
+{
+ rtx op0, op1;
+ bool swapped;
+ int nunits, elt;
+
+ op0 = XEXP (XEXP (op, 0), 0);
+ op1 = XEXP (XEXP (op, 0), 1);
+
+ /* Sanity check. */
+ if (GET_CODE (op0) == MINUS && GET_CODE (op1) == PLUS)
+ swapped = false;
+ else if (GET_CODE (op0) == PLUS && GET_CODE (op1) == MINUS)
+ swapped = true;
+ else
+ gcc_unreachable ();
+
+ nunits = GET_MODE_NUNITS (mode);
+ if (XVECLEN (XEXP (op, 1), 0) != nunits)
+ return false;
+
+ /* We already checked that permutation is suitable for addsub,
+ so only look at the first element of the parallel. */
+ elt = INTVAL (XVECEXP (XEXP (op, 1), 0, 0));
+ return elt == (swapped ? nunits : 0);
+})
+
+;; Return true if OP is a parallel for an addsub vec_select.
+(define_predicate "addsub_vs_parallel"
+ (and (match_code "parallel")
+ (match_code "const_int" "a"))
+{
+ int nelt = XVECLEN (op, 0);
+ int elt, i;
+
+ if (nelt < 2)
+ return false;
+
+ /* Check that the permutation is suitable for addsub.
+ For example, { 0 9 2 11 4 13 6 15 } or { 8 1 10 3 12 5 14 7 }. */
+ elt = INTVAL (XVECEXP (op, 0, 0));
+ if (elt == 0)
+ {
+ for (i = 1; i < nelt; ++i)
+ if (INTVAL (XVECEXP (op, 0, i)) != (i + (i & 1) * nelt))
+ return false;
+ }
+ else if (elt == nelt)
+ {
+ for (i = 1; i < nelt; ++i)
+ if (INTVAL (XVECEXP (op, 0, i)) != (elt + i - (i & 1) * nelt))
+ return false;
+ }
+ else
+ return false;
+
+ return true;
+})
+
+;; Return true if OP is a parallel for a vbroadcast permute.
(define_predicate "avx_vbroadcast_operand"
(and (match_code "parallel")
(match_code "const_int" "a"))
(V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
(define_mode_attr ssedoublemode
- [(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF")
- (V8SF "V16SF") (V8SI "V16SI") (V4DI "V8DI") (V4DF "V8DF")
- (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI") (V4SI "V4DI")
- (V32HI "V32SI") (V32QI "V32HI") (V16QI "V16HI") (V64QI "V64HI")])
+ [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
+ (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
+ (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
+ (V4HI "V4SI") (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
+ (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
+ (V4DI "V8DI") (V8DI "V16DI")])
(define_mode_attr ssebytemode
[(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
(define_insn "avx_addsubv4df3"
[(set (match_operand:V4DF 0 "register_operand" "=x")
(vec_merge:V4DF
- (plus:V4DF
+ (minus:V4DF
(match_operand:V4DF 1 "register_operand" "x")
(match_operand:V4DF 2 "nonimmediate_operand" "xm"))
- (minus:V4DF (match_dup 1) (match_dup 2))
- (const_int 10)))]
- "TARGET_AVX"
- "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "prefix" "vex")
- (set_attr "mode" "V4DF")])
-
-(define_insn "*avx_addsubv4df3_1"
- [(set (match_operand:V4DF 0 "register_operand" "=x")
- (vec_select:V4DF
- (vec_concat:V8DF
- (minus:V4DF
- (match_operand:V4DF 1 "register_operand" "x")
- (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
- (plus:V4DF (match_dup 1) (match_dup 2)))
- (parallel [(const_int 0) (const_int 5)
- (const_int 2) (const_int 7)])))]
- "TARGET_AVX"
- "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "prefix" "vex")
- (set_attr "mode" "V4DF")])
-
-(define_insn "*avx_addsubv4df3_1s"
- [(set (match_operand:V4DF 0 "register_operand" "=x")
- (vec_select:V4DF
- (vec_concat:V8DF
- (minus:V4DF
- (match_operand:V4DF 1 "register_operand" "x")
- (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
- (plus:V4DF (match_dup 2) (match_dup 1)))
- (parallel [(const_int 0) (const_int 5)
- (const_int 2) (const_int 7)])))]
+ (plus:V4DF (match_dup 1) (match_dup 2))
+ (const_int 5)))]
"TARGET_AVX"
"vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
(define_insn "sse3_addsubv2df3"
[(set (match_operand:V2DF 0 "register_operand" "=x,x")
(vec_merge:V2DF
- (plus:V2DF
+ (minus:V2DF
(match_operand:V2DF 1 "register_operand" "0,x")
(match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
- (minus:V2DF (match_dup 1) (match_dup 2))
- (const_int 2)))]
- "TARGET_SSE3"
- "@
- addsubpd\t{%2, %0|%0, %2}
- vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx")
- (set_attr "type" "sseadd")
- (set_attr "atom_unit" "complex")
- (set_attr "prefix" "orig,vex")
- (set_attr "mode" "V2DF")])
-
-(define_insn "*sse3_addsubv2df3_1"
- [(set (match_operand:V2DF 0 "register_operand" "=x,x")
- (vec_select:V2DF
- (vec_concat:V4DF
- (minus:V2DF
- (match_operand:V2DF 1 "register_operand" "0,x")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
- (plus:V2DF (match_dup 1) (match_dup 2)))
- (parallel [(const_int 0) (const_int 3)])))]
- "TARGET_SSE3"
- "@
- addsubpd\t{%2, %0|%0, %2}
- vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx")
- (set_attr "type" "sseadd")
- (set_attr "atom_unit" "complex")
- (set_attr "prefix" "orig,vex")
- (set_attr "mode" "V2DF")])
-
-(define_insn "*sse3_addsubv2df3_1s"
- [(set (match_operand:V2DF 0 "register_operand" "=x,x")
- (vec_select:V2DF
- (vec_concat:V4DF
- (minus:V2DF
- (match_operand:V2DF 1 "register_operand" "0,x")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
- (plus:V2DF (match_dup 2) (match_dup 1)))
- (parallel [(const_int 0) (const_int 3)])))]
+ (plus:V2DF (match_dup 1) (match_dup 2))
+ (const_int 1)))]
"TARGET_SSE3"
"@
addsubpd\t{%2, %0|%0, %2}
(define_insn "avx_addsubv8sf3"
[(set (match_operand:V8SF 0 "register_operand" "=x")
(vec_merge:V8SF
- (plus:V8SF
+ (minus:V8SF
(match_operand:V8SF 1 "register_operand" "x")
(match_operand:V8SF 2 "nonimmediate_operand" "xm"))
- (minus:V8SF (match_dup 1) (match_dup 2))
- (const_int 170)))]
- "TARGET_AVX"
- "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "prefix" "vex")
- (set_attr "mode" "V8SF")])
-
-(define_insn "*avx_addsubv8sf3_1"
- [(set (match_operand:V8SF 0 "register_operand" "=x")
- (vec_select:V8SF
- (vec_concat:V16SF
- (minus:V8SF
- (match_operand:V8SF 1 "register_operand" "x")
- (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
- (plus:V8SF (match_dup 1) (match_dup 2)))
- (parallel [(const_int 0) (const_int 9)
- (const_int 2) (const_int 11)
- (const_int 4) (const_int 13)
- (const_int 6) (const_int 15)])))]
- "TARGET_AVX"
- "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "prefix" "vex")
- (set_attr "mode" "V8SF")])
-
-(define_insn "*avx_addsubv8sf3_1s"
- [(set (match_operand:V8SF 0 "register_operand" "=x")
- (vec_select:V8SF
- (vec_concat:V16SF
- (minus:V8SF
- (match_operand:V8SF 1 "register_operand" "x")
- (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
- (plus:V8SF (match_dup 2) (match_dup 1)))
- (parallel [(const_int 0) (const_int 9)
- (const_int 2) (const_int 11)
- (const_int 4) (const_int 13)
- (const_int 6) (const_int 15)])))]
+ (plus:V8SF (match_dup 1) (match_dup 2))
+ (const_int 85)))]
"TARGET_AVX"
"vaddsubps\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
(define_insn "sse3_addsubv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x,x")
(vec_merge:V4SF
- (plus:V4SF
+ (minus:V4SF
(match_operand:V4SF 1 "register_operand" "0,x")
(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
- (minus:V4SF (match_dup 1) (match_dup 2))
- (const_int 10)))]
+ (plus:V4SF (match_dup 1) (match_dup 2))
+ (const_int 5)))]
"TARGET_SSE3"
"@
addsubps\t{%2, %0|%0, %2}
(set_attr "prefix_rep" "1,*")
(set_attr "mode" "V4SF")])
-(define_insn "*sse3_addsubv4sf3_1"
- [(set (match_operand:V4SF 0 "register_operand" "=x,x")
- (vec_select:V4SF
- (vec_concat:V8SF
- (minus:V4SF
- (match_operand:V4SF 1 "register_operand" "0,x")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
- (plus:V4SF (match_dup 1) (match_dup 2)))
- (parallel [(const_int 0) (const_int 5)
- (const_int 2) (const_int 7)])))]
- "TARGET_SSE3"
- "@
- addsubps\t{%2, %0|%0, %2}
- vaddsubps\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx")
- (set_attr "type" "sseadd")
- (set_attr "prefix" "orig,vex")
- (set_attr "prefix_rep" "1,*")
- (set_attr "mode" "V4SF")])
+(define_split
+ [(set (match_operand:VF_128_256 0 "register_operand")
+ (match_operator:VF_128_256 6 "addsub_vm_operator"
+ [(minus:VF_128_256
+ (match_operand:VF_128_256 1 "register_operand")
+ (match_operand:VF_128_256 2 "nonimmediate_operand"))
+ (plus:VF_128_256
+ (match_operand:VF_128_256 3 "nonimmediate_operand")
+ (match_operand:VF_128_256 4 "nonimmediate_operand"))
+ (match_operand 5 "const_int_operand")]))]
+ "TARGET_SSE3
+ && can_create_pseudo_p ()
+ && ((rtx_equal_p (operands[1], operands[3])
+ && rtx_equal_p (operands[2], operands[4]))
+ || (rtx_equal_p (operands[1], operands[4])
+ && rtx_equal_p (operands[2], operands[3])))"
+ [(set (match_dup 0)
+ (vec_merge:VF_128_256
+ (minus:VF_128_256 (match_dup 1) (match_dup 2))
+ (plus:VF_128_256 (match_dup 1) (match_dup 2))
+ (match_dup 5)))])
-(define_insn "*sse3_addsubv4sf3_1s"
- [(set (match_operand:V4SF 0 "register_operand" "=x,x")
- (vec_select:V4SF
- (vec_concat:V8SF
- (minus:V4SF
- (match_operand:V4SF 1 "register_operand" "0,x")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
- (plus:V4SF (match_dup 2) (match_dup 1)))
- (parallel [(const_int 0) (const_int 5)
- (const_int 2) (const_int 7)])))]
- "TARGET_SSE3"
- "@
- addsubps\t{%2, %0|%0, %2}
- vaddsubps\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx")
- (set_attr "type" "sseadd")
- (set_attr "prefix" "orig,vex")
- (set_attr "prefix_rep" "1,*")
- (set_attr "mode" "V4SF")])
+(define_split
+ [(set (match_operand:VF_128_256 0 "register_operand")
+ (match_operator:VF_128_256 6 "addsub_vm_operator"
+ [(plus:VF_128_256
+ (match_operand:VF_128_256 1 "nonimmediate_operand")
+ (match_operand:VF_128_256 2 "nonimmediate_operand"))
+ (minus:VF_128_256
+ (match_operand:VF_128_256 3 "register_operand")
+ (match_operand:VF_128_256 4 "nonimmediate_operand"))
+ (match_operand 5 "const_int_operand")]))]
+ "TARGET_SSE3
+ && can_create_pseudo_p ()
+ && ((rtx_equal_p (operands[1], operands[3])
+ && rtx_equal_p (operands[2], operands[4]))
+ || (rtx_equal_p (operands[1], operands[4])
+ && rtx_equal_p (operands[2], operands[3])))"
+ [(set (match_dup 0)
+ (vec_merge:VF_128_256
+ (minus:VF_128_256 (match_dup 3) (match_dup 4))
+ (plus:VF_128_256 (match_dup 3) (match_dup 4))
+ (match_dup 5)))]
+{
+ /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes. */
+ operands[5]
+ = GEN_INT (~INTVAL (operands[5])
+ & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
+})
+
+(define_split
+ [(set (match_operand:VF_128_256 0 "register_operand")
+ (match_operator:VF_128_256 7 "addsub_vs_operator"
+ [(vec_concat:<ssedoublemode>
+ (minus:VF_128_256
+ (match_operand:VF_128_256 1 "register_operand")
+ (match_operand:VF_128_256 2 "nonimmediate_operand"))
+ (plus:VF_128_256
+ (match_operand:VF_128_256 3 "nonimmediate_operand")
+ (match_operand:VF_128_256 4 "nonimmediate_operand")))
+ (match_parallel 5 "addsub_vs_parallel"
+ [(match_operand 6 "const_int_operand")])]))]
+ "TARGET_SSE3
+ && can_create_pseudo_p ()
+ && ((rtx_equal_p (operands[1], operands[3])
+ && rtx_equal_p (operands[2], operands[4]))
+ || (rtx_equal_p (operands[1], operands[4])
+ && rtx_equal_p (operands[2], operands[3])))"
+ [(set (match_dup 0)
+ (vec_merge:VF_128_256
+ (minus:VF_128_256 (match_dup 1) (match_dup 2))
+ (plus:VF_128_256 (match_dup 1) (match_dup 2))
+ (match_dup 5)))]
+{
+ int i, nelt = XVECLEN (operands[5], 0);
+ HOST_WIDE_INT ival = 0;
+
+ for (i = 0; i < nelt; i++)
+ if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
+ ival |= HOST_WIDE_INT_1 << i;
+
+ operands[5] = GEN_INT (ival);
+})
+
+(define_split
+ [(set (match_operand:VF_128_256 0 "register_operand")
+ (match_operator:VF_128_256 7 "addsub_vs_operator"
+ [(vec_concat:<ssedoublemode>
+ (plus:VF_128_256
+ (match_operand:VF_128_256 1 "nonimmediate_operand")
+ (match_operand:VF_128_256 2 "nonimmediate_operand"))
+ (minus:VF_128_256
+ (match_operand:VF_128_256 3 "register_operand")
+ (match_operand:VF_128_256 4 "nonimmediate_operand")))
+ (match_parallel 5 "addsub_vs_parallel"
+ [(match_operand 6 "const_int_operand")])]))]
+ "TARGET_SSE3
+ && can_create_pseudo_p ()
+ && ((rtx_equal_p (operands[1], operands[3])
+ && rtx_equal_p (operands[2], operands[4]))
+ || (rtx_equal_p (operands[1], operands[4])
+ && rtx_equal_p (operands[2], operands[3])))"
+ [(set (match_dup 0)
+ (vec_merge:VF_128_256
+ (minus:VF_128_256 (match_dup 3) (match_dup 4))
+ (plus:VF_128_256 (match_dup 3) (match_dup 4))
+ (match_dup 5)))]
+{
+ int i, nelt = XVECLEN (operands[5], 0);
+ HOST_WIDE_INT ival = 0;
+
+ for (i = 0; i < nelt; i++)
+ if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
+ ival |= HOST_WIDE_INT_1 << i;
+
+ operands[5] = GEN_INT (ival);
+})
(define_insn "avx_h<plusminus_insn>v4df3"
[(set (match_operand:V4DF 0 "register_operand" "=x")