builtins.c (expand_builtin_mathfn): Expand nearbyint as rint in case -fno-trapping...

author Richard Guenther <rguenther@suse.de>

Sun, 29 Oct 2006 15:25:52 +0000 (15:25 +0000)

committer Richard Biener <rguenth@gcc.gnu.org>

Sun, 29 Oct 2006 15:25:52 +0000 (15:25 +0000)
author Richard Guenther <rguenther@suse.de>
Sun, 29 Oct 2006 15:25:52 +0000 (15:25 +0000)
committer Richard Biener <rguenth@gcc.gnu.org>
Sun, 29 Oct 2006 15:25:52 +0000 (15:25 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 9289589ae7c4f7a7eb07d50b03fceb3356d7db7a..64d7f6e1b5d7bacc28c5255e983b7c0fc7dd8196 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@
+2006-10-29  Richard Guenther  <rguenther@suse.de>
+
+       * builtins.c (expand_builtin_mathfn): Expand nearbyint as
+       rint in case -fno-trapping-math is enabled.
+       * config/i386/i386-protos.h (ix86_expand_rint): Declare.
+       * config/i386/i386.c (ix86_gen_TWO52): New static helper function.
+       (ix86_expand_sse_fabs): Likewise.
+       (ix86_expand_rint): New function expanding rint to x87 or SSE math.
+       * config/i386/i386.md (rintdf2): Enable for SSE math if
+       -fno-trapping-math is enabled, use ix86_expand_rint for expansion.
+       (rintsf2): Likewise.
+
  2006-10-29  Richard Guenther  <rguenther@suse.de>
  
         * genopinit.c (optabs): Change lfloor_optab and lceil_optab
diff --git a/gcc/builtins.c b/gcc/builtins.c

index b7d23c379bd97deab06c6bfaa4ced55726963b34..5375c5c77aba2205f5de02247eee4e408945d0cc 100644 (file)
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -1835,7 +1835,10 @@ expand_builtin_mathfn (tree exp, rtx target, rtx subtarget)
      CASE_FLT_FN (BUILT_IN_ROUND):
        builtin_optab = round_optab; break;
      CASE_FLT_FN (BUILT_IN_NEARBYINT):
-      builtin_optab = nearbyint_optab; break;
+      builtin_optab = nearbyint_optab;
+      if (flag_trapping_math)
+       break;
+      /* Else fallthrough and expand as rint.  */
      CASE_FLT_FN (BUILT_IN_RINT):
        builtin_optab = rint_optab; break;
      default:
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h

index 996dcdbd0979c6f61fd76afd72d8966bdaf1adc7..4cb110c98330d369ff655c793d8a643c252e19c0 100644 (file)
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -159,6 +159,7 @@ extern enum rtx_code ix86_reverse_condition (enum rtx_code, enum machine_mode);
  
  extern void ix86_expand_lround (rtx, rtx);
  extern void ix86_expand_lfloorceil (rtx, rtx, bool);
+extern void ix86_expand_rint (rtx, rtx);
  
  #ifdef TREE_CODE
  extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c

index 0291a97a8aee94663d7622b424885e498a4f9e25..a12453604fe6ec68ecb3dae51db81db862847e37 100644 (file)
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19225,13 +19225,44 @@ asm_preferred_eh_data_format (int code, int global)
  }
  \f
  /* Expand copysign from SIGN to the positive value ABS_VALUE
-   storing in RESULT.  */
+   storing in RESULT.  If MASK is non-null, it shall be a mask to mask out
+   the sign-bit.  */
  static void
-ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign)
+ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
  {
    enum machine_mode mode = GET_MODE (sign);
    rtx sgn = gen_reg_rtx (mode);
-  rtx mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
+  if (mask == NULL_RTX)
+    {
+      mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
+      if (!VECTOR_MODE_P (mode))
+       {
+         /* We need to generate a scalar mode mask in this case.  */
+         rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
+         tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
+         mask = gen_reg_rtx (mode);
+         emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
+       }
+    }
+  else
+    mask = gen_rtx_NOT (mode, mask);
+  emit_insn (gen_rtx_SET (VOIDmode, sgn,
+                         gen_rtx_AND (mode, mask, sign)));
+  emit_insn (gen_rtx_SET (VOIDmode, result,
+                         gen_rtx_IOR (mode, abs_value, sgn)));
+}
+
+/* Expand fabs (OP0) and return a new rtx that holds the result.  The
+   mask for masking out the sign-bit is stored in *SMASK, if that is
+   non-null.  */
+static rtx
+ix86_expand_sse_fabs (rtx op0, rtx *smask)
+{
+  enum machine_mode mode = GET_MODE (op0);
+  rtx xa, mask;
+
+  xa = gen_reg_rtx (mode);
+  mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
    if (!VECTOR_MODE_P (mode))
      {
        /* We need to generate a scalar mode mask in this case.  */
@@ -19240,10 +19271,13 @@ ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign)
        mask = gen_reg_rtx (mode);
        emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
      }
-  emit_insn (gen_rtx_SET (VOIDmode, sgn,
-                         gen_rtx_AND (mode, mask, sign)));
-  emit_insn (gen_rtx_SET (VOIDmode, result,
-                         gen_rtx_IOR (mode, abs_value, sgn)));
+  emit_insn (gen_rtx_SET (VOIDmode, xa,
+                         gen_rtx_AND (mode, op0, mask)));
+
+  if (smask)
+    *smask = mask;
+
+  return xa;
  }
  
  /* Expands a comparison of OP0 with OP1 using comparison code CODE,
@@ -19276,6 +19310,21 @@ ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
    return label;
  }
  
+/* Generate and return a rtx of mode MODE for 2**n where n is the number
+   of bits of the mantissa of MODE, which must be one of DFmode or SFmode.  */
+static rtx
+ix86_gen_TWO52 (enum machine_mode mode)
+{
+  REAL_VALUE_TYPE TWO52r;
+  rtx TWO52;
+
+  real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
+  TWO52 = const_double_from_real_value (TWO52r, mode);
+  TWO52 = force_reg (mode, TWO52);
+
+  return TWO52;
+}
+
  /* Expand SSE sequence for computing lround from OP1 storing
     into OP0.  */
  void
@@ -19297,7 +19346,7 @@ ix86_expand_lround (rtx op0, rtx op1)
  
    /* adj = copysign (0.5, op1) */
    adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
-  ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1));
+  ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
  
    /* adj = op1 + adj */
    expand_simple_binop (mode, PLUS, adj, op1, adj, 0, OPTAB_DIRECT);
@@ -19339,4 +19388,39 @@ ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
    emit_move_insn (op0, ireg);
  }
  
+/* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
+   result in OPERAND0.  */
+void
+ix86_expand_rint (rtx operand0, rtx operand1)
+{
+  /* C code for the stuff we're doing below:
+        if (!isless (fabs (operand1), 2**52))
+         return operand1;
+        tmp = copysign (2**52, operand1);
+        return operand1 + tmp - tmp;
+   */
+  enum machine_mode mode = GET_MODE (operand0);
+  rtx res, xa, label, TWO52, mask;
+
+  res = gen_reg_rtx (mode);
+  emit_move_insn (res, operand1);
+
+  /* xa = abs (operand1) */
+  xa = ix86_expand_sse_fabs (res, &mask);
+
+  /* if (!isless (xa, TWO52)) goto label; */
+  TWO52 = ix86_gen_TWO52 (mode);
+  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+  ix86_sse_copysign_to_positive (TWO52, TWO52, res, mask);
+
+  expand_simple_binop (mode, PLUS, res, TWO52, res, 0, OPTAB_DIRECT);
+  expand_simple_binop (mode, MINUS, res, TWO52, res, 0, OPTAB_DIRECT);
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (operand0, res);
+}
+
  #include "gt-i386.h"
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md

index abceb151c670104a706277a257b01c5b1c127c88..5aaf43ba322a77b17e42fc5cec72287a2760d317 100644 (file)
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -17160,10 +17160,17 @@
  (define_expand "rintdf2"
    [(use (match_operand:DF 0 "register_operand" ""))
     (use (match_operand:DF 1 "register_operand" ""))]
-  "TARGET_USE_FANCY_MATH_387
-   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations"
-{
+  "(TARGET_USE_FANCY_MATH_387
+    && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+    && flag_unsafe_math_optimizations)
+   || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
+       && !flag_trapping_math)"
+{
+  if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
+      && !flag_trapping_math)
+    ix86_expand_rint (operand0, operand1);
+  else
+    {
    rtx op0 = gen_reg_rtx (XFmode);
    rtx op1 = gen_reg_rtx (XFmode);
  
@@ -17171,16 +17178,24 @@
    emit_insn (gen_frndintxf2 (op0, op1));
  
    emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
+    }
    DONE;
  })
  
  (define_expand "rintsf2"
    [(use (match_operand:SF 0 "register_operand" ""))
     (use (match_operand:SF 1 "register_operand" ""))]
-  "TARGET_USE_FANCY_MATH_387
-   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations"
-{
+  "(TARGET_USE_FANCY_MATH_387
+    && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+    && flag_unsafe_math_optimizations)
+   || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
+       && !flag_trapping_math)"
+{
+  if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
+      && !flag_trapping_math)
+    ix86_expand_rint (operand0, operand1);
+  else
+    {
    rtx op0 = gen_reg_rtx (XFmode);
    rtx op1 = gen_reg_rtx (XFmode);
  
@@ -17188,6 +17203,7 @@
    emit_insn (gen_frndintxf2 (op0, op1));
  
    emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+    }
    DONE;
  })
  
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index caad33e4252fc88a15a3aa1467946ee4624c9fe6..2fad67cd7da8ecc060db5f9d0392f6e33384defb 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2006-10-29  Richard Guenther  <rguenther@suse.de>
+
+       * gcc.target/i386/math-torture/rint.c: New testcase.
+       * gcc.target/i386/math-torture/nearbyint.c: Likewise.
+
  2006-10-29  Richard Guenther  <rguenther@suse.de>
  
         * gcc.target/i386/math-torture/lfloor.c: New testcase.
diff --git a/gcc/testsuite/gcc.target/i386/math-torture/nearbyint.c b/gcc/testsuite/gcc.target/i386/math-torture/nearbyint.c

new file mode 100644 (file)

index 0000000..dd646f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/math-torture/nearbyint.c
@@ -0,0 +1,15 @@
+/* { dg-do assemble } */
+
+float testlf (float x)
+{
+  return __builtin_nearbyintf (x);
+}
+double testl (double x)
+{
+  return __builtin_nearbyint (x);
+}
+long double testll (long double x)
+{
+  return __builtin_nearbyintl (x);
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/math-torture/rint.c b/gcc/testsuite/gcc.target/i386/math-torture/rint.c

new file mode 100644 (file)

index 0000000..f9dfff7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/math-torture/rint.c
@@ -0,0 +1,15 @@
+/* { dg-do assemble } */
+
+float testlf (float x)
+{
+  return __builtin_rintf (x);
+}
+double testl (double x)
+{
+  return __builtin_rint (x);
+}
+long double testll (long double x)
+{
+  return __builtin_rintl (x);
+}
+
author	Richard Guenther <rguenther@suse.de>
	Sun, 29 Oct 2006 15:25:52 +0000 (15:25 +0000)
committer	Richard Biener <rguenth@gcc.gnu.org>
	Sun, 29 Oct 2006 15:25:52 +0000 (15:25 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/builtins.c		patch \| blob \| history
gcc/config/i386/i386-protos.h		patch \| blob \| history
gcc/config/i386/i386.c		patch \| blob \| history
gcc/config/i386/i386.md		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.target/i386/math-torture/nearbyint.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/math-torture/rint.c	[new file with mode: 0644]	patch \| blob