+2002-12-04 Geoffrey Keating <geoffk@apple.com>
+
+ * combine.c (combine_simplify_rtx): Add new canonicalizations.
+ * doc/md.texi (Insn Canonicalizations): Document new
+ canonicalizations for multiply/add combinations.
+ * config/rs6000/rs6000.md: Add and modify floating add/multiply
+ patterns to ensure they're used whenever they can be.
+
2002-12-04 Kazu Hirata <kazu@cs.umass.edu>
* config/h8300/h8300.c: Update the comments related to shifts.
return gen_binary (MINUS, mode, XEXP (XEXP (x, 0), 1),
XEXP (XEXP (x, 0), 0));
+ /* (neg (plus A B)) is canonicalized to (minus (neg A) B). */
+ if (GET_CODE (XEXP (x, 0)) == PLUS
+ && !HONOR_SIGNED_ZEROS (mode)
+ && !HONOR_SIGN_DEPENDENT_ROUNDING (mode))
+ {
+ temp = simplify_gen_unary (NEG, mode, XEXP (XEXP (x, 0), 0), mode);
+ temp = combine_simplify_rtx (temp, mode, last, in_dest);
+ return gen_binary (MINUS, mode, temp, XEXP (XEXP (x, 0), 1));
+ }
+
+ /* (neg (mult A B)) becomes (mult (neg A) B).
+ This works even for floating-point values. */
+ if (GET_CODE (XEXP (x, 0)) == MULT)
+ {
+ temp = simplify_gen_unary (NEG, mode, XEXP (XEXP (x, 0), 0), mode);
+ return gen_binary (MULT, mode, temp, XEXP (XEXP (x, 0), 1));
+ }
+
/* (neg (xor A 1)) is (plus A -1) if A is known to be either 0 or 1. */
if (GET_CODE (XEXP (x, 0)) == XOR && XEXP (XEXP (x, 0), 1) == const1_rtx
&& nonzero_bits (XEXP (XEXP (x, 0), 0), mode) == 1)
#endif
case PLUS:
+ /* Canonicalize (plus (mult (neg B) C) A) to (minus A (mult B C)).
+ */
+ if (GET_CODE (XEXP (x, 0)) == MULT
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == NEG)
+ {
+ rtx in1, in2;
+
+ in1 = XEXP (XEXP (XEXP (x, 0), 0), 0);
+ in2 = XEXP (XEXP (x, 0), 1);
+ return gen_binary (MINUS, mode, XEXP (x, 1),
+ gen_binary (MULT, mode, in1, in2));
+ }
+
/* If we have (plus (plus (A const) B)), associate it so that CONST is
outermost. That's because that's the way indexed addresses are
supposed to appear. This code used to check many more cases, but
return simplify_and_const_int (NULL_RTX, mode, XEXP (x, 0),
-INTVAL (XEXP (XEXP (x, 1), 1)) - 1);
+ /* Canonicalize (minus A (mult (neg B) C)) to (plus (mult B C) A).
+ */
+ if (GET_CODE (XEXP (x, 1)) == MULT
+ && GET_CODE (XEXP (XEXP (x, 1), 0)) == NEG)
+ {
+ rtx in1, in2;
+
+ in1 = XEXP (XEXP (XEXP (x, 1), 0), 0);
+ in2 = XEXP (XEXP (x, 1), 1);
+ return gen_binary (PLUS, mode, gen_binary (MULT, mode, in1, in2),
+ XEXP (x, 0));
+ }
+
+ /* Canonicalize (minus (neg A) (mult B C)) to
+ (minus (mult (neg B) C) A). */
+ if (GET_CODE (XEXP (x, 1)) == MULT
+ && GET_CODE (XEXP (x, 0)) == NEG)
+ {
+ rtx in1, in2;
+
+ in1 = simplify_gen_unary (NEG, mode, XEXP (XEXP (x, 1), 0), mode);
+ in2 = XEXP (XEXP (x, 1), 1);
+ return gen_binary (MINUS, mode, gen_binary (MULT, mode, in1, in2),
+ XEXP (XEXP (x, 0), 0));
+ }
+
/* Canonicalize (minus A (plus B C)) to (minus (minus A B) C) for
integers. */
if (GET_CODE (XEXP (x, 1)) == PLUS && INTEGRAL_MODE_P (mode))
(neg:SF (plus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
(match_operand:SF 2 "gpc_reg_operand" "f"))
(match_operand:SF 3 "gpc_reg_operand" "f"))))]
- "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD
+ && HONOR_SIGNED_ZEROS (SFmode)"
+ "fnmadds %0,%1,%2,%3"
+ [(set_attr "type" "fp")])
+
+(define_insn ""
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (minus:SF (mult:SF (neg:SF (match_operand:SF 1 "gpc_reg_operand" "f"))
+ (match_operand:SF 2 "gpc_reg_operand" "f"))
+ (match_operand:SF 3 "gpc_reg_operand" "f")))]
+ "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD
+ && ! HONOR_SIGNED_ZEROS (SFmode)"
"fnmadds %0,%1,%2,%3"
[(set_attr "type" "fp")])
"{fnma|fnmadd} %0,%1,%2,%3"
[(set_attr "type" "dmul")])
+(define_insn ""
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (minus:SF (mult:SF (neg:SF (match_operand:SF 1 "gpc_reg_operand" "f"))
+ (match_operand:SF 2 "gpc_reg_operand" "f"))
+ (match_operand:SF 3 "gpc_reg_operand" "f")))]
+ "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD
+ && ! HONOR_SIGNED_ZEROS (SFmode)"
+ "{fnma|fnmadd} %0,%1,%2,%3"
+ [(set_attr "type" "dmul")])
+
(define_insn ""
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
(neg:SF (minus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
(match_operand:SF 2 "gpc_reg_operand" "f"))
(match_operand:SF 3 "gpc_reg_operand" "f"))))]
- "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD
+ && HONOR_SIGNED_ZEROS (SFmode)"
+ "fnmsubs %0,%1,%2,%3"
+ [(set_attr "type" "fp")])
+
+(define_insn ""
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (minus:SF (match_operand:SF 3 "gpc_reg_operand" "f")
+ (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
+ (match_operand:SF 2 "gpc_reg_operand" "f"))))]
+ "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD
+ && ! HONOR_SIGNED_ZEROS (SFmode)"
"fnmsubs %0,%1,%2,%3"
[(set_attr "type" "fp")])
"{fnms|fnmsub} %0,%1,%2,%3"
[(set_attr "type" "dmul")])
+(define_insn ""
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (minus:SF (match_operand:SF 3 "gpc_reg_operand" "f")
+ (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
+ (match_operand:SF 2 "gpc_reg_operand" "f"))))]
+ "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD
+ && ! HONOR_SIGNED_ZEROS (SFmode)"
+ "{fnms|fnmsub} %0,%1,%2,%3"
+ [(set_attr "type" "fp")])
+
(define_expand "sqrtsf2"
[(set (match_operand:SF 0 "gpc_reg_operand" "")
(sqrt:SF (match_operand:SF 1 "gpc_reg_operand" "")))]
(neg:DF (plus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%f")
(match_operand:DF 2 "gpc_reg_operand" "f"))
(match_operand:DF 3 "gpc_reg_operand" "f"))))]
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD
+ && HONOR_SIGNED_ZEROS (DFmode)"
+ "{fnma|fnmadd} %0,%1,%2,%3"
+ [(set_attr "type" "dmul")])
+
+(define_insn ""
+ [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+ (minus:DF (mult:DF (neg:DF (match_operand:DF 1 "gpc_reg_operand" "f"))
+ (match_operand:DF 2 "gpc_reg_operand" "f"))
+ (match_operand:DF 3 "gpc_reg_operand" "f")))]
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD
+ && ! HONOR_SIGNED_ZEROS (DFmode)"
"{fnma|fnmadd} %0,%1,%2,%3"
[(set_attr "type" "dmul")])
(neg:DF (minus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%f")
(match_operand:DF 2 "gpc_reg_operand" "f"))
(match_operand:DF 3 "gpc_reg_operand" "f"))))]
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD
+ && HONOR_SIGNED_ZEROS (DFmode)"
+ "{fnms|fnmsub} %0,%1,%2,%3"
+ [(set_attr "type" "dmul")])
+
+(define_insn ""
+ [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+ (minus:DF (match_operand:DF 3 "gpc_reg_operand" "f")
+ (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%f")
+ (match_operand:DF 2 "gpc_reg_operand" "f"))))]
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD
+ && ! HONOR_SIGNED_ZEROS (DFmode)"
"{fnms|fnmsub} %0,%1,%2,%3"
[(set_attr "type" "dmul")])
@code{mult}, @code{plus}, or @code{minus} expression, it will be the
first operand.
+@item
+In combinations of @code{neg}, @code{mult}, @code{plus}, and
+@code{minus}, the @code{neg} operations (if any) will be moved inside
+the operations as far as possible. For instance,
+@code{(neg (mult A B))} is canonicalized as @code{(mult (neg A) B)}, but
+@code{(plus (mult (neg A) B) C)} is canonicalized as
+@code{(minus A (mult B C))}.
+
@cindex @code{compare}, canonicalization of
@item
For the @code{compare} operator, a constant is always the second operand
+2002-12-04 Geoffrey Keating <geoffk@apple.com>
+
+ * gcc.dg/ppc-fmadd-1.c: New file.
+ * gcc.dg/ppc-fmadd-2.c: New file.
+ * gcc.dg/ppc-fmadd-3.c: New file.
+
2002-12-04 Eric Botcazou <ebotcazou@libertysurf.fr>
* gcc.c-torture/compile/20021204-1.c: New test.
--- /dev/null
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-options "-ffast-math -O2" } */
+/* { dg-final { scan-assembler-not "f(add|sub|mul|neg)" } } */
+
+void foo(double *a, double *b, double *c, double *d)
+{
+ a[0] = b[0] + c[0] * d[0]; // fmadd
+ a[1] = b[1] - c[1] * d[1]; // fnmsub with fast-math
+ a[2] = -b[2] + c[2] * d[2]; // fmsub
+ a[3] = -b[3] - c[3] * d[3]; // fnmadd with fast-math
+ a[4] = -( b[4] + c[4] * d[4]); // fnmadd
+ a[5] = -( b[5] - c[5] * d[5]); // fmsub with fast-math
+ a[6] = -(-b[6] + c[6] * d[6]); // fnmsub
+ a[7] = -(-b[7] - c[7] * d[7]); // fmadd with fast-math
+ a[10] = b[10] - c[10] * -d[10]; // fmadd
+ a[11] = b[11] + c[11] * -d[11]; // fnmsub with fast-math
+ a[12] = -b[12] - c[12] * -d[12]; // fmsub
+ a[13] = -b[13] + c[13] * -d[13]; // fnmadd with fast-math
+ a[14] = -( b[14] - c[14] * -d[14]); // fnmadd
+ a[15] = -( b[15] + c[15] * -d[15]); // fmsub with fast-math
+ a[16] = -(-b[16] - c[16] * -d[16]); // fnmsub
+ a[17] = -(-b[17] + c[17] * -d[17]); // fmadd with fast-math
+}
+
+void foos(float *a, float *b, float *c, float *d)
+{
+ a[0] = b[0] + c[0] * d[0]; // fmadd
+ a[1] = b[1] - c[1] * d[1]; // fnmsub with fast-math
+ a[2] = -b[2] + c[2] * d[2]; // fmsub
+ a[3] = -b[3] - c[3] * d[3]; // fnmadd with fast-math
+ a[4] = -( b[4] + c[4] * d[4]); // fnmadd
+ a[5] = -( b[5] - c[5] * d[5]); // fmsub with fast-math
+ a[6] = -(-b[6] + c[6] * d[6]); // fnmsub
+ a[7] = -(-b[7] - c[7] * d[7]); // fmadd with fast-math
+ a[10] = b[10] - c[10] * -d[10]; // fmadd
+ a[11] = b[11] + c[11] * -d[11]; // fnmsub with fast-math
+ a[12] = -b[12] - c[12] * -d[12]; // fmsub
+ a[13] = -b[13] + c[13] * -d[13]; // fnmadd with fast-math
+ a[14] = -( b[14] - c[14] * -d[14]); // fnmadd
+ a[15] = -( b[15] + c[15] * -d[15]); // fmsub with fast-math
+ a[16] = -(-b[16] - c[16] * -d[16]); // fnmsub
+ a[17] = -(-b[17] + c[17] * -d[17]); // fmadd with fast-math
+}
--- /dev/null
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "f(add|sub|mul|neg)" } } */
+
+void foo(double *a, double *b, double *c, double *d)
+{
+ a[0] = b[0] + c[0] * d[0]; // fmadd
+ a[2] = -b[2] + c[2] * d[2]; // fmsub
+ a[4] = -( b[4] + c[4] * d[4]); // fnmadd
+ a[6] = -(-b[6] + c[6] * d[6]); // fnmsub
+ a[10] = b[10] - c[10] * -d[10]; // fmadd
+ a[12] = -b[12] - c[12] * -d[12]; // fmsub
+ a[14] = -( b[14] - c[14] * -d[14]); // fnmadd
+ a[16] = -(-b[16] - c[16] * -d[16]); // fnmsub
+}
+
+void foos(float *a, float *b, float *c, float *d)
+{
+ a[0] = b[0] + c[0] * d[0]; // fmadd
+ a[2] = -b[2] + c[2] * d[2]; // fmsub
+ a[4] = -( b[4] + c[4] * d[4]); // fnmadd
+ a[6] = -(-b[6] + c[6] * d[6]); // fnmsub
+ a[10] = b[10] - c[10] * -d[10]; // fmadd
+ a[12] = -b[12] - c[12] * -d[12]; // fmsub
+ a[14] = -( b[14] - c[14] * -d[14]); // fnmadd
+ a[16] = -(-b[16] - c[16] * -d[16]); // fnmsub
+}
--- /dev/null
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "f(add|sub|mul)" } } */
+
+void foo(double *a, double *b, double *c, double *d)
+{
+#if 0
+ a[1] = b[1] - c[1] * d[1]; // fneg, fmadd without fast-math
+#endif
+ a[3] = -b[3] - c[3] * d[3]; // fneg, fmsub without fast-math
+#if 0
+ a[5] = -( b[5] - c[5] * d[5]); // fneg, fnmadd without fast-math
+#endif
+ a[7] = -(-b[7] - c[7] * d[7]); // fneg, fnmsub without fast-math
+ a[11] = b[11] + c[11] * -d[11]; // fneg, fmadd without fast-math
+ a[13] = -b[13] + c[13] * -d[13]; // fneg, fmsub without fast-math
+ a[15] = -( b[15] + c[15] * -d[15]); // fneg, fnmadd without fast-math
+ a[17] = -(-b[17] + c[17] * -d[17]); // fneg, fnmsub without fast-math
+}
+
+void foos(float *a, float *b, float *c, float *d)
+{
+#if 0
+ a[1] = b[1] - c[1] * d[1]; // fneg, fmadd without fast-math
+#endif
+ a[3] = -b[3] - c[3] * d[3]; // fneg, fmsub without fast-math
+#if 0
+ a[5] = -( b[5] - c[5] * d[5]); // fneg, fnmadd without fast-math
+#endif
+ a[7] = -(-b[7] - c[7] * d[7]); // fneg, fnmsub without fast-math
+ a[11] = b[11] + c[11] * -d[11]; // fneg, fmadd without fast-math
+ a[13] = -b[13] + c[13] * -d[13]; // fneg, fmsub without fast-math
+ a[15] = -( b[15] + c[15] * -d[15]); // fneg, fnmadd without fast-math
+ a[17] = -(-b[17] + c[17] * -d[17]); // fneg, fnmsub without fast-math
+}
+