re PR tree-optimization/85466 (Performance is slow when doing 'branchless' conditiona...
authorJakub Jelinek <jakub@redhat.com>
Fri, 4 May 2018 07:19:45 +0000 (09:19 +0200)
committerJakub Jelinek <jakub@gcc.gnu.org>
Fri, 4 May 2018 07:19:45 +0000 (09:19 +0200)
PR libstdc++/85466
* real.h (real_nextafter): Declare.
* real.c (real_nextafter): New function.
* fold-const-call.c (fold_const_nextafter): New function.
(fold_const_call_sss): Call it for CASE_CFN_NEXTAFTER and
CASE_CFN_NEXTTOWARD.
(fold_const_call_1): For CASE_CFN_NEXTTOWARD call fold_const_call_sss
even when arg1_mode is different from arg0_mode.

* gcc.dg/nextafter-1.c: New test.
* gcc.dg/nextafter-2.c: New test.
* gcc.dg/nextafter-3.c: New test.
* gcc.dg/nextafter-4.c: New test.

From-SVN: r259921

gcc/ChangeLog
gcc/fold-const-call.c
gcc/real.c
gcc/real.h
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/nextafter-1.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/nextafter-2.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/nextafter-3.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/nextafter-4.c [new file with mode: 0644]

index df60e1faeae93d54461693081364bb79b3dd23ce..229e095137118474603cfc7efe14a3b9e7442151 100644 (file)
@@ -1,3 +1,14 @@
+2018-05-04  Jakub Jelinek  <jakub@redhat.com>
+
+       PR libstdc++/85466
+       * real.h (real_nextafter): Declare.
+       * real.c (real_nextafter): New function.
+       * fold-const-call.c (fold_const_nextafter): New function.
+       (fold_const_call_sss): Call it for CASE_CFN_NEXTAFTER and
+       CASE_CFN_NEXTTOWARD.
+       (fold_const_call_1): For CASE_CFN_NEXTTOWARD call fold_const_call_sss
+       even when arg1_mode is different from arg0_mode.
+
 2018-05-03  Nathan Sidwell  <nathan@acm.org>
 
        * doc/extend.texi (Deprecated Features): Remove
index fcf4a14ebaa75ca3a8fb4e4df83a724a79769140..49694922b5c4cf75cdf9d528193fc43b8624e05b 100644 (file)
@@ -527,6 +527,48 @@ fold_const_pow (real_value *result, const real_value *arg0,
   return false;
 }
 
+/* Try to evaluate:
+
+      *RESULT = nextafter (*ARG0, *ARG1)
+
+   or
+
+      *RESULT = nexttoward (*ARG0, *ARG1)
+
+   in format FORMAT.  Return true on success.  */
+
+static bool
+fold_const_nextafter (real_value *result, const real_value *arg0,
+                     const real_value *arg1, const real_format *format)
+{
+  if (REAL_VALUE_ISSIGNALING_NAN (*arg0)
+      || REAL_VALUE_ISSIGNALING_NAN (*arg1))
+    return false;
+
+  /* Don't handle composite modes, nor decimal, nor modes without
+     inf or denorm at least for now.  */
+  if (format->pnan < format->p
+      || format->b == 10
+      || !format->has_inf
+      || !format->has_denorm)
+    return false;
+
+  if (real_nextafter (result, format, arg0, arg1)
+      /* If raising underflow or overflow and setting errno to ERANGE,
+        fail if we care about those side-effects.  */
+      && (flag_trapping_math || flag_errno_math))
+    return false;
+  /* Similarly for nextafter (0, 1) raising underflow.  */
+  else if (flag_trapping_math
+          && arg0->cl == rvc_zero
+          && result->cl != rvc_zero)
+    return false;
+
+  real_convert (result, format, result);
+
+  return true;
+}
+
 /* Try to evaluate:
 
       *RESULT = ldexp (*ARG0, ARG1)
@@ -1260,6 +1302,10 @@ fold_const_call_sss (real_value *result, combined_fn fn,
     CASE_CFN_POW:
       return fold_const_pow (result, arg0, arg1, format);
 
+    CASE_CFN_NEXTAFTER:
+    CASE_CFN_NEXTTOWARD:
+      return fold_const_nextafter (result, arg0, arg1, format);
+
     default:
       return false;
     }
@@ -1365,20 +1411,33 @@ fold_const_call_1 (combined_fn fn, tree type, tree arg0, tree arg1)
   machine_mode arg0_mode = TYPE_MODE (TREE_TYPE (arg0));
   machine_mode arg1_mode = TYPE_MODE (TREE_TYPE (arg1));
 
-  if (arg0_mode == arg1_mode
+  if (mode == arg0_mode
       && real_cst_p (arg0)
       && real_cst_p (arg1))
     {
       gcc_checking_assert (SCALAR_FLOAT_MODE_P (arg0_mode));
-      if (mode == arg0_mode)
+      REAL_VALUE_TYPE result;
+      if (arg0_mode == arg1_mode)
        {
          /* real, real -> real.  */
-         REAL_VALUE_TYPE result;
          if (fold_const_call_sss (&result, fn, TREE_REAL_CST_PTR (arg0),
                                   TREE_REAL_CST_PTR (arg1),
                                   REAL_MODE_FORMAT (mode)))
            return build_real (type, result);
        }
+      else if (arg1_mode == TYPE_MODE (long_double_type_node))
+       switch (fn)
+         {
+         CASE_CFN_NEXTTOWARD:
+           /* real, long double -> real.  */
+           if (fold_const_call_sss (&result, fn, TREE_REAL_CST_PTR (arg0),
+                                    TREE_REAL_CST_PTR (arg1),
+                                    REAL_MODE_FORMAT (mode)))
+             return build_real (type, result);
+           break;
+         default:
+           break;
+         }
       return NULL_TREE;
     }
 
index 2a467376f66c2c13f591a7f7599194e38fdcfbee..eefa69e853573ab34e315172c38a4a561c7a532e 100644 (file)
@@ -5048,6 +5048,102 @@ real_isinteger (const REAL_VALUE_TYPE *c, HOST_WIDE_INT *int_out)
   return false;
 }
 
+/* Calculate nextafter (X, Y) or nexttoward (X, Y).  Return true if
+   underflow or overflow needs to be raised.  */
+
+bool
+real_nextafter (REAL_VALUE_TYPE *r, format_helper fmt,
+               const REAL_VALUE_TYPE *x, const REAL_VALUE_TYPE *y)
+{
+  int cmp = do_compare (x, y, 2);
+  /* If either operand is NaN, return qNaN.  */
+  if (cmp == 2)
+    {
+      get_canonical_qnan (r, 0);
+      return false;
+    }
+  /* If x == y, return y cast to target type.  */
+  if (cmp == 0)
+    {
+      real_convert (r, fmt, y);
+      return false;
+    }
+
+  if (x->cl == rvc_zero)
+    {
+      get_zero (r, y->sign);
+      r->cl = rvc_normal;
+      SET_REAL_EXP (r, fmt->emin - fmt->p + 1);
+      r->sig[SIGSZ - 1] = SIG_MSB;
+      return false;
+    }
+
+  int np2 = SIGNIFICAND_BITS - fmt->p;
+  /* For denormals adjust np2 correspondingly.  */
+  if (x->cl == rvc_normal && REAL_EXP (x) < fmt->emin)
+    np2 += fmt->emin - REAL_EXP (x);
+
+  REAL_VALUE_TYPE u;
+  get_zero (r, x->sign);
+  get_zero (&u, 0);
+  set_significand_bit (&u, np2);
+  r->cl = rvc_normal;
+  SET_REAL_EXP (r, REAL_EXP (x));
+
+  if (x->cl == rvc_inf)
+    {
+      bool borrow = sub_significands (r, r, &u, 0);
+      gcc_assert (borrow);
+      SET_REAL_EXP (r, fmt->emax);
+    }
+  else if (cmp == (x->sign ? 1 : -1))
+    {
+      if (add_significands (r, x, &u))
+       {
+         /* Overflow.  Means the significand had been all ones, and
+            is now all zeros.  Need to increase the exponent, and
+            possibly re-normalize it.  */
+         SET_REAL_EXP (r, REAL_EXP (r) + 1);
+         if (REAL_EXP (r) > fmt->emax)
+           {
+             get_inf (r, x->sign);
+             return true;
+           }
+         r->sig[SIGSZ - 1] = SIG_MSB;
+       }
+    }
+  else
+    {
+      if (REAL_EXP (x) > fmt->emin && x->sig[SIGSZ - 1] == SIG_MSB)
+       {
+         int i;
+         for (i = SIGSZ - 2; i >= 0; i--)
+           if (x->sig[i])
+             break;
+         if (i < 0)
+           {
+             /* When mantissa is 1.0, we need to subtract only
+                half of u: nextafter (1.0, 0.0) is 1.0 - __DBL_EPSILON__ / 2
+                rather than 1.0 - __DBL_EPSILON__.  */
+             clear_significand_bit (&u, np2);
+             np2--;
+             set_significand_bit (&u, np2);
+           }
+       }
+      sub_significands (r, x, &u, 0);
+    }
+
+  /* Clear out trailing garbage.  */
+  clear_significand_below (r, np2);
+  normalize (r);
+  if (REAL_EXP (r) <= fmt->emin - fmt->p)
+    {
+      get_zero (r, x->sign);
+      return true;
+    }
+  return r->cl == rvc_zero;
+}
+
 /* Write into BUF the maximum representable finite floating-point
    number, (1 - b**-p) * b**emax for a given FP format FMT as a hex
    float string.  LEN is the size of BUF, and the buffer must be large
index e51073b19b9f1a9d532f39387856d1c332e023d9..0ce4256570819c3f18054373c4c1615cb645b505 100644 (file)
@@ -507,6 +507,10 @@ extern void real_copysign (REAL_VALUE_TYPE *, const REAL_VALUE_TYPE *);
 extern bool real_isinteger (const REAL_VALUE_TYPE *, format_helper);
 extern bool real_isinteger (const REAL_VALUE_TYPE *, HOST_WIDE_INT *);
 
+/* Calculate nextafter (X, Y) in format FMT.  */
+extern bool real_nextafter (REAL_VALUE_TYPE *, format_helper,
+                           const REAL_VALUE_TYPE *, const REAL_VALUE_TYPE *);
+
 /* Write into BUF the maximum representable finite floating-point
    number, (1 - b**-p) * b**emax for a given FP format FMT as a hex
    float string.  BUF must be large enough to contain the result.  */
index 3ea876067e955c386282e107012c87c9a11e8c79..6b5077535db654b8cddee1b770f60f1ce7f0bc40 100644 (file)
@@ -1,3 +1,11 @@
+2018-05-04  Jakub Jelinek  <jakub@redhat.com>
+
+       PR libstdc++/85466
+       * gcc.dg/nextafter-1.c: New test.
+       * gcc.dg/nextafter-2.c: New test.
+       * gcc.dg/nextafter-3.c: New test.
+       * gcc.dg/nextafter-4.c: New test.
+
 2018-05-03  Nathan Sidwell  <nathan@acm.org>
 
        Remove -ffriend-injection.
diff --git a/gcc/testsuite/gcc.dg/nextafter-1.c b/gcc/testsuite/gcc.dg/nextafter-1.c
new file mode 100644 (file)
index 0000000..c8647ed
--- /dev/null
@@ -0,0 +1,159 @@
+/* PR libstdc++/85466 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-math-errno -fno-trapping-math -fdump-tree-optimized" } */
+/* { dg-add-options ieee } */
+/* { dg-final { scan-tree-dump-not "nextafter" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "nexttoward" "optimized" } } */
+
+float nextafterf (float, float);
+double nextafter (double, double);
+long double nextafterl (long double, long double);
+float nexttowardf (float, long double);
+double nexttoward (double, long double);
+long double nexttowardl (long double, long double);
+
+#define CHECK(x) if (!(x)) __builtin_abort ()
+
+#ifndef NEED_ERRNO
+#define NEED_ERRNO 0
+#endif
+#ifndef NEED_EXC
+#define NEED_EXC 0
+#endif
+
+#define TEST(name, fn, type, L1, L2, l1, l2, MIN1,                          \
+            MAX1, DENORM_MIN1, EPSILON1, MIN2, MAX2, DENORM_MIN2)           \
+void                                                                        \
+name (void)                                                                 \
+{                                                                           \
+  const type a = fn (0.0##L1, 0.0##L2);                                             \
+  CHECK (a == 0.0##L1 && !__builtin_signbit (a));                           \
+  const type b = fn (0.0##L1, -0.0##L2);                                    \
+  CHECK (b == 0.0##L1 && __builtin_signbit (b));                            \
+  const type c = fn (__builtin_nan##l1 (""), 0.0##L2);                      \
+  CHECK (__builtin_isnan##l1 (c));                                          \
+  const type d = fn (2.0##L1, __builtin_nan##l2 (""));                      \
+  CHECK (__builtin_isnan##l1 (d));                                          \
+  const type e = NEED_EXC ? DENORM_MIN1 : fn (0.0##L1, 8.0##L2);            \
+  CHECK (e == DENORM_MIN1);                                                 \
+  const type f = fn (1.0##L1, 8.0##L2);                                             \
+  CHECK (f == 1.0##L1 + EPSILON1);                                          \
+  const type g = fn (1.0##L1, -8.0##L2);                                    \
+  CHECK (g == 1.0##L1 - EPSILON1 / 2.0##L1);                                \
+  const type h = fn (__builtin_inf (), 0.0##L2);                            \
+  CHECK (h == MAX1);                                                        \
+  const type i = fn (-1.0##L1, -__builtin_inf ());                          \
+  CHECK (i == -1.0##L1 - EPSILON1);                                         \
+  const type j = fn (1.5##L1, __builtin_inf ());                            \
+  CHECK (j == 1.5##L1 + EPSILON1);                                          \
+  const type k = fn (1.5##L1 - EPSILON1, 100.0##L2);                        \
+  CHECK (k == 1.5##L1);                                                             \
+  const type l                                                              \
+    = (NEED_EXC || NEED_ERRNO) ? 0.0##L1 : fn (DENORM_MIN1, 0.0##L2);       \
+  CHECK (l == 0.0##L1 && !__builtin_signbit (l));                           \
+  const type m                                                              \
+    = (NEED_EXC || NEED_ERRNO) ? __builtin_inf##l1 ()                       \
+      : fn (MAX1, __builtin_inf ());                                        \
+  CHECK (__builtin_isinf##l1 (m) && !__builtin_signbit (m));                \
+  const type n = fn (DENORM_MIN1, 12.0##L2);                                \
+  CHECK (n == 2.0##L1 * DENORM_MIN1);                                       \
+  const type o = fn (n, 24.0##L2);                                          \
+  CHECK (o == 3.0##L1 * DENORM_MIN1);                                       \
+  const type p = fn (o, 132.0##L2);                                         \
+  CHECK (p == 4.0##L1 * DENORM_MIN1);                                       \
+  const type q = fn (2.0##L1 * DENORM_MIN1, -__builtin_inf ());                     \
+  CHECK (q == DENORM_MIN1);                                                 \
+  const type r = fn (3.0##L1 * DENORM_MIN1, DENORM_MIN2);                   \
+  CHECK (r == 2.0##L1 * DENORM_MIN1);                                       \
+  const type s = fn (4.0##L1 * DENORM_MIN1, 2.0##L2 * DENORM_MIN2);         \
+  CHECK (s == 3.0##L1 * DENORM_MIN1);                                       \
+  const type t = fn (MIN1, 0.0##L2);                                        \
+  CHECK (t == MIN1 - DENORM_MIN1);                                          \
+  const type u = fn (MIN1 - DENORM_MIN1, -MIN2);                            \
+  CHECK (u == MIN1 - 2.0##L1 * DENORM_MIN1);                                \
+  const type v = fn (MIN1 - 2.0##L1 * DENORM_MIN1, 100.0##L2);              \
+  CHECK (v == MIN1 - DENORM_MIN1);                                          \
+  const type w = fn (MIN1 - DENORM_MIN1, MAX2);                                     \
+  CHECK (w == MIN1);                                                        \
+  const type x = fn (MIN1, 17.0##L2);                                       \
+  CHECK (x == MIN1 + DENORM_MIN1);                                          \
+  const type y = fn (MIN1 + DENORM_MIN1, __builtin_inf##l2 ());                     \
+  CHECK (y == MIN1 + 2.0##L1 * DENORM_MIN1);                                \
+  const type z = fn (MIN1 / 2.0##L1, -MIN2);                                \
+  CHECK (z == MIN1 / 2.0##L1 - DENORM_MIN1);                                \
+  const type aa = fn (-MIN1 / 4.0##L1, MIN2);                               \
+  CHECK (aa == -MIN1 / 4.0##L1 + DENORM_MIN1);                              \
+  const type ab = fn (MIN1 * 2.0##L1, -MIN2);                               \
+  CHECK (ab == MIN1 * 2.0##L1 - DENORM_MIN1);                               \
+  const type ac = fn (MIN1 * 4.0##L1, MIN2);                                \
+  CHECK (ac == MIN1 * 4.0##L1 - DENORM_MIN1 * 2.0##L1);                             \
+  const type ad = fn (MIN1 * 64.0##L1, MIN2);                               \
+  CHECK (ad == MIN1 * 64.0##L1 - DENORM_MIN1 * 32.0##L1);                   \
+  const type ae = fn (MIN1 / 2.0##L1 - DENORM_MIN1, 100.0##L2);                     \
+  CHECK (ae == MIN1 / 2.0##L1);                                                     \
+  const type af = fn (-MIN1 / 4 + DENORM_MIN1, -100.0##L2);                 \
+  CHECK (af == -MIN1 / 4.0##L1);                                            \
+  const type ag = fn (MIN1 * 2.0##L1 - DENORM_MIN1, 100.0##L2);                     \
+  CHECK (ag == MIN1 * 2.0##L1);                                                     \
+  const type ah = fn (MIN1 * 4.0##L1 - 2.0##L1 * DENORM_MIN1, 100.0##L2);    \
+  CHECK (ah == MIN1 * 4.0##L1);                                                     \
+  const type ai = fn (MIN1 * 64.0##L1 - 32.0##L1 * DENORM_MIN1, 100.0##L2);  \
+  CHECK (ai == MIN1 * 64.0##L1);                                            \
+  const type aj = fn (MIN1 * 64.0##L1, 100.0##L2);                          \
+  CHECK (aj == MIN1 * 64.0##L1 + 64.0##L1 * DENORM_MIN1);                   \
+  const type ak = fn (MIN1 * 64.0##L1 + DENORM_MIN1 * 64.0##L1, 1024.0##L2); \
+  CHECK (ak == MIN1 * 64.0##L1 + 128.0##L1 * DENORM_MIN1);                  \
+  const type al = fn (128.0##L1, 128.0##L2);                                \
+  CHECK (al == 128.0##L1);                                                  \
+  const type am = fn (128.0##L1, 129.0##L2);                                \
+  CHECK (am == 128.0##L1 + 128.0##L1 * EPSILON1);                           \
+  const type an = fn (-128.0##L1 + -128.0##L1 * EPSILON1, -130.0##L2);      \
+  CHECK (an == -128.0##L1 - 256.0##L1 * EPSILON1);                          \
+  const type ao = fn (128.0##L1 + 256.0##L1 * EPSILON1, 256.0##L2);         \
+  CHECK (ao == 128.0##L1 + 384.0##L1 * EPSILON1);                           \
+  const type ap = fn (128.0##L1 + 384.0##L1 * EPSILON1, -0.0##L2);          \
+  CHECK (ap == 128.0##L1 + 256.0##L1 * EPSILON1);                           \
+  const type aq = fn (128.0##L1 + 256.0##L1 * EPSILON1, 1.0##L2);           \
+  CHECK (aq == 128.0##L1 + 128.0##L1 * EPSILON1);                           \
+  const type ar = fn (128.0##L1 + 128.0##L1 * EPSILON1, 0.0##L2);           \
+  CHECK (ar == 128.0##L1);                                                  \
+  const type as = fn (128.0##L1, 0.0##L2);                                  \
+  CHECK (as == 128.0##L1 - 64.0##L1 * EPSILON1);                            \
+  const type at = fn (128.0##L1 - 64.0##L1 * EPSILON1, 5.0##L2);            \
+  CHECK (at == 128.0##L1 - 128.0##L1 * EPSILON1);                           \
+}
+
+TEST (test1, nextafterf, float, F, F, f, f, __FLT_MIN__, __FLT_MAX__,
+      __FLT_DENORM_MIN__, __FLT_EPSILON__, __FLT_MIN__, __FLT_MAX__,
+      __FLT_DENORM_MIN__)
+TEST (test2, nextafter, double, , , , , __DBL_MIN__, __DBL_MAX__,
+      __DBL_DENORM_MIN__, __DBL_EPSILON__, __DBL_MIN__, __DBL_MAX__,
+      __DBL_DENORM_MIN__)
+#if __LDBL_MANT_DIG__ != 106
+TEST (test3, nextafterl, long double, L, L, l, l, __LDBL_MIN__, __LDBL_MAX__,
+      __LDBL_DENORM_MIN__, __LDBL_EPSILON__, __LDBL_MIN__, __LDBL_MAX__,
+      __LDBL_DENORM_MIN__)
+TEST (test4, nexttowardf, float, F, L, f, l, __FLT_MIN__, __FLT_MAX__,
+      __FLT_DENORM_MIN__, __FLT_EPSILON__, __LDBL_MIN__, __LDBL_MAX__,
+      __LDBL_DENORM_MIN__)
+TEST (test5, nexttoward, double, , L, , l, __DBL_MIN__, __DBL_MAX__,
+      __DBL_DENORM_MIN__, __DBL_EPSILON__, __LDBL_MIN__, __LDBL_MAX__,
+      __LDBL_DENORM_MIN__)
+TEST (test6, nexttowardl, long double, L, L, l, l, __LDBL_MIN__, __LDBL_MAX__,
+      __LDBL_DENORM_MIN__, __LDBL_EPSILON__, __LDBL_MIN__, __LDBL_MAX__,
+      __LDBL_DENORM_MIN__)
+#endif
+
+int
+main ()
+{
+  test1 ();
+  test2 ();
+#if __LDBL_MANT_DIG__ != 106
+  test3 ();
+  test4 ();
+  test5 ();
+  test6 ();
+#endif
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/nextafter-2.c b/gcc/testsuite/gcc.dg/nextafter-2.c
new file mode 100644 (file)
index 0000000..36b7e3c
--- /dev/null
@@ -0,0 +1,6 @@
+/* PR libstdc++/85466 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-builtin" } */
+/* { dg-add-options ieee } */
+
+#include "nextafter-1.c"
diff --git a/gcc/testsuite/gcc.dg/nextafter-3.c b/gcc/testsuite/gcc.dg/nextafter-3.c
new file mode 100644 (file)
index 0000000..25a520b
--- /dev/null
@@ -0,0 +1,9 @@
+/* PR libstdc++/85466 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fmath-errno -fno-trapping-math -fdump-tree-optimized" } */
+/* { dg-add-options ieee } */
+/* { dg-final { scan-tree-dump-not "nextafter" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "nexttoward" "optimized" } } */
+
+#define NEED_ERRNO 1
+#include "nextafter-1.c"
diff --git a/gcc/testsuite/gcc.dg/nextafter-4.c b/gcc/testsuite/gcc.dg/nextafter-4.c
new file mode 100644 (file)
index 0000000..7e3639b
--- /dev/null
@@ -0,0 +1,10 @@
+/* PR libstdc++/85466 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fmath-errno -ftrapping-math -fdump-tree-optimized" } */
+/* { dg-add-options ieee } */
+/* { dg-final { scan-tree-dump-not "nextafter" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "nexttoward" "optimized" } } */
+
+#define NEED_ERRNO 1
+#define NEED_EXC 1
+#include "nextafter-1.c"