From c7d32ff6193ad93643fda556bc0db619771241f1 Mon Sep 17 00:00:00 2001
From: Richard Guenther <rguenther@suse.de>
Date: Sun, 29 Oct 2006 15:25:52 +0000
Subject: [PATCH 1/1] builtins.c (expand_builtin_mathfn): Expand nearbyint as
 rint in case -fno-trapping-math is enabled.

2006-10-29  Richard Guenther  <rguenther@suse.de>

	* builtins.c (expand_builtin_mathfn): Expand nearbyint as
	rint in case -fno-trapping-math is enabled.
	* config/i386/i386-protos.h (ix86_expand_rint): Declare.
	* config/i386/i386.c (ix86_gen_TWO52): New static helper function.
	(ix86_expand_sse_fabs): Likewise.
	(ix86_expand_rint): New function expanding rint to x87 or SSE math.
	* config/i386/i386.md (rintdf2): Enable for SSE math if
        -fno-trapping-math is enabled, use ix86_expand_rint for expansion.
	(rintsf2): Likewise.

	* gcc.target/i386/math-torture/rint.c: New testcase.
	* gcc.target/i386/math-torture/nearbyint.c: Likewise.

From-SVN: r118144
---
 gcc/ChangeLog                                 |  12 +++
 gcc/builtins.c                                |   5 +-
 gcc/config/i386/i386-protos.h                 |   1 +
 gcc/config/i386/i386.c                        | 100 ++++++++++++++++--
 gcc/config/i386/i386.md                       |  32 ++++--
 gcc/testsuite/ChangeLog                       |   5 +
 .../gcc.target/i386/math-torture/nearbyint.c  |  15 +++
 .../gcc.target/i386/math-torture/rint.c       |  15 +++
 8 files changed, 168 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/math-torture/nearbyint.c
 create mode 100644 gcc/testsuite/gcc.target/i386/math-torture/rint.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 9289589ae7c..64d7f6e1b5d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@
+2006-10-29  Richard Guenther  <rguenther@suse.de>
+
+	* builtins.c (expand_builtin_mathfn): Expand nearbyint as
+	rint in case -fno-trapping-math is enabled.
+	* config/i386/i386-protos.h (ix86_expand_rint): Declare.
+	* config/i386/i386.c (ix86_gen_TWO52): New static helper function.
+	(ix86_expand_sse_fabs): Likewise.
+	(ix86_expand_rint): New function expanding rint to x87 or SSE math.
+	* config/i386/i386.md (rintdf2): Enable for SSE math if
+	-fno-trapping-math is enabled, use ix86_expand_rint for expansion.
+	(rintsf2): Likewise.
+
 2006-10-29  Richard Guenther  <rguenther@suse.de>
 
 	* genopinit.c (optabs): Change lfloor_optab and lceil_optab
diff --git a/gcc/builtins.c b/gcc/builtins.c
index b7d23c379bd..5375c5c77ab 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -1835,7 +1835,10 @@ expand_builtin_mathfn (tree exp, rtx target, rtx subtarget)
     CASE_FLT_FN (BUILT_IN_ROUND):
       builtin_optab = round_optab; break;
     CASE_FLT_FN (BUILT_IN_NEARBYINT):
-      builtin_optab = nearbyint_optab; break;
+      builtin_optab = nearbyint_optab;
+      if (flag_trapping_math)
+	break;
+      /* Else fallthrough and expand as rint.  */
     CASE_FLT_FN (BUILT_IN_RINT):
       builtin_optab = rint_optab; break;
     default:
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 996dcdbd097..4cb110c9833 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -159,6 +159,7 @@ extern enum rtx_code ix86_reverse_condition (enum rtx_code, enum machine_mode);
 
 extern void ix86_expand_lround (rtx, rtx);
 extern void ix86_expand_lfloorceil (rtx, rtx, bool);
+extern void ix86_expand_rint (rtx, rtx);
 
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 0291a97a8ae..a12453604fe 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19225,13 +19225,44 @@ asm_preferred_eh_data_format (int code, int global)
 }
 
 /* Expand copysign from SIGN to the positive value ABS_VALUE
-   storing in RESULT.  */
+   storing in RESULT.  If MASK is non-null, it shall be a mask to mask out
+   the sign-bit.  */
 static void
-ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign)
+ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
 {
   enum machine_mode mode = GET_MODE (sign);
   rtx sgn = gen_reg_rtx (mode);
-  rtx mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
+  if (mask == NULL_RTX)
+    {
+      mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
+      if (!VECTOR_MODE_P (mode))
+	{
+	  /* We need to generate a scalar mode mask in this case.  */
+	  rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
+	  tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
+	  mask = gen_reg_rtx (mode);
+	  emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
+	}
+    }
+  else
+    mask = gen_rtx_NOT (mode, mask);
+  emit_insn (gen_rtx_SET (VOIDmode, sgn,
+			  gen_rtx_AND (mode, mask, sign)));
+  emit_insn (gen_rtx_SET (VOIDmode, result,
+			  gen_rtx_IOR (mode, abs_value, sgn)));
+}
+
+/* Expand fabs (OP0) and return a new rtx that holds the result.  The
+   mask for masking out the sign-bit is stored in *SMASK, if that is
+   non-null.  */
+static rtx
+ix86_expand_sse_fabs (rtx op0, rtx *smask)
+{
+  enum machine_mode mode = GET_MODE (op0);
+  rtx xa, mask;
+
+  xa = gen_reg_rtx (mode);
+  mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
   if (!VECTOR_MODE_P (mode))
     {
       /* We need to generate a scalar mode mask in this case.  */
@@ -19240,10 +19271,13 @@ ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign)
       mask = gen_reg_rtx (mode);
       emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
     }
-  emit_insn (gen_rtx_SET (VOIDmode, sgn,
-			  gen_rtx_AND (mode, mask, sign)));
-  emit_insn (gen_rtx_SET (VOIDmode, result,
-			  gen_rtx_IOR (mode, abs_value, sgn)));
+  emit_insn (gen_rtx_SET (VOIDmode, xa,
+			  gen_rtx_AND (mode, op0, mask)));
+
+  if (smask)
+    *smask = mask;
+
+  return xa;
 }
 
 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
@@ -19276,6 +19310,21 @@ ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
   return label;
 }
 
+/* Generate and return a rtx of mode MODE for 2**n where n is the number
+   of bits of the mantissa of MODE, which must be one of DFmode or SFmode.  */
+static rtx
+ix86_gen_TWO52 (enum machine_mode mode)
+{
+  REAL_VALUE_TYPE TWO52r;
+  rtx TWO52;
+
+  real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
+  TWO52 = const_double_from_real_value (TWO52r, mode);
+  TWO52 = force_reg (mode, TWO52);
+
+  return TWO52;
+}
+
 /* Expand SSE sequence for computing lround from OP1 storing
    into OP0.  */
 void
@@ -19297,7 +19346,7 @@ ix86_expand_lround (rtx op0, rtx op1)
 
   /* adj = copysign (0.5, op1) */
   adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
-  ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1));
+  ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
 
   /* adj = op1 + adj */
   expand_simple_binop (mode, PLUS, adj, op1, adj, 0, OPTAB_DIRECT);
@@ -19339,4 +19388,39 @@ ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
   emit_move_insn (op0, ireg);
 }
 
+/* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
+   result in OPERAND0.  */
+void
+ix86_expand_rint (rtx operand0, rtx operand1)
+{
+  /* C code for the stuff we're doing below:
+        if (!isless (fabs (operand1), 2**52))
+	  return operand1;
+        tmp = copysign (2**52, operand1);
+        return operand1 + tmp - tmp;
+   */
+  enum machine_mode mode = GET_MODE (operand0);
+  rtx res, xa, label, TWO52, mask;
+
+  res = gen_reg_rtx (mode);
+  emit_move_insn (res, operand1);
+
+  /* xa = abs (operand1) */
+  xa = ix86_expand_sse_fabs (res, &mask);
+
+  /* if (!isless (xa, TWO52)) goto label; */
+  TWO52 = ix86_gen_TWO52 (mode);
+  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+  ix86_sse_copysign_to_positive (TWO52, TWO52, res, mask);
+
+  expand_simple_binop (mode, PLUS, res, TWO52, res, 0, OPTAB_DIRECT);
+  expand_simple_binop (mode, MINUS, res, TWO52, res, 0, OPTAB_DIRECT);
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (operand0, res);
+}
+
 #include "gt-i386.h"
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index abceb151c67..5aaf43ba322 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -17160,10 +17160,17 @@
 (define_expand "rintdf2"
   [(use (match_operand:DF 0 "register_operand" ""))
    (use (match_operand:DF 1 "register_operand" ""))]
-  "TARGET_USE_FANCY_MATH_387
-   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations"
-{
+  "(TARGET_USE_FANCY_MATH_387
+    && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+    && flag_unsafe_math_optimizations)
+   || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
+       && !flag_trapping_math)"
+{
+  if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
+      && !flag_trapping_math)
+    ix86_expand_rint (operand0, operand1);
+  else
+    {
   rtx op0 = gen_reg_rtx (XFmode);
   rtx op1 = gen_reg_rtx (XFmode);
 
@@ -17171,16 +17178,24 @@
   emit_insn (gen_frndintxf2 (op0, op1));
 
   emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
+    }
   DONE;
 })
 
 (define_expand "rintsf2"
   [(use (match_operand:SF 0 "register_operand" ""))
    (use (match_operand:SF 1 "register_operand" ""))]
-  "TARGET_USE_FANCY_MATH_387
-   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations"
-{
+  "(TARGET_USE_FANCY_MATH_387
+    && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+    && flag_unsafe_math_optimizations)
+   || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
+       && !flag_trapping_math)"
+{
+  if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
+      && !flag_trapping_math)
+    ix86_expand_rint (operand0, operand1);
+  else
+    {
   rtx op0 = gen_reg_rtx (XFmode);
   rtx op1 = gen_reg_rtx (XFmode);
 
@@ -17188,6 +17203,7 @@
   emit_insn (gen_frndintxf2 (op0, op1));
 
   emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+    }
   DONE;
 })
 
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index caad33e4252..2fad67cd7da 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2006-10-29  Richard Guenther  <rguenther@suse.de>
+
+	* gcc.target/i386/math-torture/rint.c: New testcase.
+	* gcc.target/i386/math-torture/nearbyint.c: Likewise.
+
 2006-10-29  Richard Guenther  <rguenther@suse.de>
 
 	* gcc.target/i386/math-torture/lfloor.c: New testcase.
diff --git a/gcc/testsuite/gcc.target/i386/math-torture/nearbyint.c b/gcc/testsuite/gcc.target/i386/math-torture/nearbyint.c
new file mode 100644
index 00000000000..dd646f01235
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/math-torture/nearbyint.c
@@ -0,0 +1,15 @@
+/* { dg-do assemble } */
+
+float testlf (float x)
+{
+  return __builtin_nearbyintf (x);
+}
+double testl (double x)
+{
+  return __builtin_nearbyint (x);
+}
+long double testll (long double x)
+{
+  return __builtin_nearbyintl (x);
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/math-torture/rint.c b/gcc/testsuite/gcc.target/i386/math-torture/rint.c
new file mode 100644
index 00000000000..f9dfff7caa0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/math-torture/rint.c
@@ -0,0 +1,15 @@
+/* { dg-do assemble } */
+
+float testlf (float x)
+{
+  return __builtin_rintf (x);
+}
+double testl (double x)
+{
+  return __builtin_rint (x);
+}
+long double testll (long double x)
+{
+  return __builtin_rintl (x);
+}
+
-- 
2.30.2