From 047823853d8324eab7d6ad8f266ee5395c4a76ff Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Fri, 4 May 2018 09:19:45 +0200 Subject: [PATCH] re PR tree-optimization/85466 (Performance is slow when doing 'branchless' conditional style math operations) PR libstdc++/85466 * real.h (real_nextafter): Declare. * real.c (real_nextafter): New function. * fold-const-call.c (fold_const_nextafter): New function. (fold_const_call_sss): Call it for CASE_CFN_NEXTAFTER and CASE_CFN_NEXTTOWARD. (fold_const_call_1): For CASE_CFN_NEXTTOWARD call fold_const_call_sss even when arg1_mode is different from arg0_mode. * gcc.dg/nextafter-1.c: New test. * gcc.dg/nextafter-2.c: New test. * gcc.dg/nextafter-3.c: New test. * gcc.dg/nextafter-4.c: New test. From-SVN: r259921 --- gcc/ChangeLog | 11 ++ gcc/fold-const-call.c | 65 +++++++++++- gcc/real.c | 96 +++++++++++++++++ gcc/real.h | 4 + gcc/testsuite/ChangeLog | 8 ++ gcc/testsuite/gcc.dg/nextafter-1.c | 159 +++++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/nextafter-2.c | 6 ++ gcc/testsuite/gcc.dg/nextafter-3.c | 9 ++ gcc/testsuite/gcc.dg/nextafter-4.c | 10 ++ 9 files changed, 365 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/nextafter-1.c create mode 100644 gcc/testsuite/gcc.dg/nextafter-2.c create mode 100644 gcc/testsuite/gcc.dg/nextafter-3.c create mode 100644 gcc/testsuite/gcc.dg/nextafter-4.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index df60e1faeae..229e0951371 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2018-05-04 Jakub Jelinek + + PR libstdc++/85466 + * real.h (real_nextafter): Declare. + * real.c (real_nextafter): New function. + * fold-const-call.c (fold_const_nextafter): New function. + (fold_const_call_sss): Call it for CASE_CFN_NEXTAFTER and + CASE_CFN_NEXTTOWARD. + (fold_const_call_1): For CASE_CFN_NEXTTOWARD call fold_const_call_sss + even when arg1_mode is different from arg0_mode. + 2018-05-03 Nathan Sidwell * doc/extend.texi (Deprecated Features): Remove diff --git a/gcc/fold-const-call.c b/gcc/fold-const-call.c index fcf4a14ebaa..49694922b5c 100644 --- a/gcc/fold-const-call.c +++ b/gcc/fold-const-call.c @@ -527,6 +527,48 @@ fold_const_pow (real_value *result, const real_value *arg0, return false; } +/* Try to evaluate: + + *RESULT = nextafter (*ARG0, *ARG1) + + or + + *RESULT = nexttoward (*ARG0, *ARG1) + + in format FORMAT. Return true on success. */ + +static bool +fold_const_nextafter (real_value *result, const real_value *arg0, + const real_value *arg1, const real_format *format) +{ + if (REAL_VALUE_ISSIGNALING_NAN (*arg0) + || REAL_VALUE_ISSIGNALING_NAN (*arg1)) + return false; + + /* Don't handle composite modes, nor decimal, nor modes without + inf or denorm at least for now. */ + if (format->pnan < format->p + || format->b == 10 + || !format->has_inf + || !format->has_denorm) + return false; + + if (real_nextafter (result, format, arg0, arg1) + /* If raising underflow or overflow and setting errno to ERANGE, + fail if we care about those side-effects. */ + && (flag_trapping_math || flag_errno_math)) + return false; + /* Similarly for nextafter (0, 1) raising underflow. */ + else if (flag_trapping_math + && arg0->cl == rvc_zero + && result->cl != rvc_zero) + return false; + + real_convert (result, format, result); + + return true; +} + /* Try to evaluate: *RESULT = ldexp (*ARG0, ARG1) @@ -1260,6 +1302,10 @@ fold_const_call_sss (real_value *result, combined_fn fn, CASE_CFN_POW: return fold_const_pow (result, arg0, arg1, format); + CASE_CFN_NEXTAFTER: + CASE_CFN_NEXTTOWARD: + return fold_const_nextafter (result, arg0, arg1, format); + default: return false; } @@ -1365,20 +1411,33 @@ fold_const_call_1 (combined_fn fn, tree type, tree arg0, tree arg1) machine_mode arg0_mode = TYPE_MODE (TREE_TYPE (arg0)); machine_mode arg1_mode = TYPE_MODE (TREE_TYPE (arg1)); - if (arg0_mode == arg1_mode + if (mode == arg0_mode && real_cst_p (arg0) && real_cst_p (arg1)) { gcc_checking_assert (SCALAR_FLOAT_MODE_P (arg0_mode)); - if (mode == arg0_mode) + REAL_VALUE_TYPE result; + if (arg0_mode == arg1_mode) { /* real, real -> real. */ - REAL_VALUE_TYPE result; if (fold_const_call_sss (&result, fn, TREE_REAL_CST_PTR (arg0), TREE_REAL_CST_PTR (arg1), REAL_MODE_FORMAT (mode))) return build_real (type, result); } + else if (arg1_mode == TYPE_MODE (long_double_type_node)) + switch (fn) + { + CASE_CFN_NEXTTOWARD: + /* real, long double -> real. */ + if (fold_const_call_sss (&result, fn, TREE_REAL_CST_PTR (arg0), + TREE_REAL_CST_PTR (arg1), + REAL_MODE_FORMAT (mode))) + return build_real (type, result); + break; + default: + break; + } return NULL_TREE; } diff --git a/gcc/real.c b/gcc/real.c index 2a467376f66..eefa69e8535 100644 --- a/gcc/real.c +++ b/gcc/real.c @@ -5048,6 +5048,102 @@ real_isinteger (const REAL_VALUE_TYPE *c, HOST_WIDE_INT *int_out) return false; } +/* Calculate nextafter (X, Y) or nexttoward (X, Y). Return true if + underflow or overflow needs to be raised. */ + +bool +real_nextafter (REAL_VALUE_TYPE *r, format_helper fmt, + const REAL_VALUE_TYPE *x, const REAL_VALUE_TYPE *y) +{ + int cmp = do_compare (x, y, 2); + /* If either operand is NaN, return qNaN. */ + if (cmp == 2) + { + get_canonical_qnan (r, 0); + return false; + } + /* If x == y, return y cast to target type. */ + if (cmp == 0) + { + real_convert (r, fmt, y); + return false; + } + + if (x->cl == rvc_zero) + { + get_zero (r, y->sign); + r->cl = rvc_normal; + SET_REAL_EXP (r, fmt->emin - fmt->p + 1); + r->sig[SIGSZ - 1] = SIG_MSB; + return false; + } + + int np2 = SIGNIFICAND_BITS - fmt->p; + /* For denormals adjust np2 correspondingly. */ + if (x->cl == rvc_normal && REAL_EXP (x) < fmt->emin) + np2 += fmt->emin - REAL_EXP (x); + + REAL_VALUE_TYPE u; + get_zero (r, x->sign); + get_zero (&u, 0); + set_significand_bit (&u, np2); + r->cl = rvc_normal; + SET_REAL_EXP (r, REAL_EXP (x)); + + if (x->cl == rvc_inf) + { + bool borrow = sub_significands (r, r, &u, 0); + gcc_assert (borrow); + SET_REAL_EXP (r, fmt->emax); + } + else if (cmp == (x->sign ? 1 : -1)) + { + if (add_significands (r, x, &u)) + { + /* Overflow. Means the significand had been all ones, and + is now all zeros. Need to increase the exponent, and + possibly re-normalize it. */ + SET_REAL_EXP (r, REAL_EXP (r) + 1); + if (REAL_EXP (r) > fmt->emax) + { + get_inf (r, x->sign); + return true; + } + r->sig[SIGSZ - 1] = SIG_MSB; + } + } + else + { + if (REAL_EXP (x) > fmt->emin && x->sig[SIGSZ - 1] == SIG_MSB) + { + int i; + for (i = SIGSZ - 2; i >= 0; i--) + if (x->sig[i]) + break; + if (i < 0) + { + /* When mantissa is 1.0, we need to subtract only + half of u: nextafter (1.0, 0.0) is 1.0 - __DBL_EPSILON__ / 2 + rather than 1.0 - __DBL_EPSILON__. */ + clear_significand_bit (&u, np2); + np2--; + set_significand_bit (&u, np2); + } + } + sub_significands (r, x, &u, 0); + } + + /* Clear out trailing garbage. */ + clear_significand_below (r, np2); + normalize (r); + if (REAL_EXP (r) <= fmt->emin - fmt->p) + { + get_zero (r, x->sign); + return true; + } + return r->cl == rvc_zero; +} + /* Write into BUF the maximum representable finite floating-point number, (1 - b**-p) * b**emax for a given FP format FMT as a hex float string. LEN is the size of BUF, and the buffer must be large diff --git a/gcc/real.h b/gcc/real.h index e51073b19b9..0ce42565708 100644 --- a/gcc/real.h +++ b/gcc/real.h @@ -507,6 +507,10 @@ extern void real_copysign (REAL_VALUE_TYPE *, const REAL_VALUE_TYPE *); extern bool real_isinteger (const REAL_VALUE_TYPE *, format_helper); extern bool real_isinteger (const REAL_VALUE_TYPE *, HOST_WIDE_INT *); +/* Calculate nextafter (X, Y) in format FMT. */ +extern bool real_nextafter (REAL_VALUE_TYPE *, format_helper, + const REAL_VALUE_TYPE *, const REAL_VALUE_TYPE *); + /* Write into BUF the maximum representable finite floating-point number, (1 - b**-p) * b**emax for a given FP format FMT as a hex float string. BUF must be large enough to contain the result. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 3ea876067e9..6b5077535db 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2018-05-04 Jakub Jelinek + + PR libstdc++/85466 + * gcc.dg/nextafter-1.c: New test. + * gcc.dg/nextafter-2.c: New test. + * gcc.dg/nextafter-3.c: New test. + * gcc.dg/nextafter-4.c: New test. + 2018-05-03 Nathan Sidwell Remove -ffriend-injection. diff --git a/gcc/testsuite/gcc.dg/nextafter-1.c b/gcc/testsuite/gcc.dg/nextafter-1.c new file mode 100644 index 00000000000..c8647ede00b --- /dev/null +++ b/gcc/testsuite/gcc.dg/nextafter-1.c @@ -0,0 +1,159 @@ +/* PR libstdc++/85466 */ +/* { dg-do run } */ +/* { dg-options "-O2 -fno-math-errno -fno-trapping-math -fdump-tree-optimized" } */ +/* { dg-add-options ieee } */ +/* { dg-final { scan-tree-dump-not "nextafter" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "nexttoward" "optimized" } } */ + +float nextafterf (float, float); +double nextafter (double, double); +long double nextafterl (long double, long double); +float nexttowardf (float, long double); +double nexttoward (double, long double); +long double nexttowardl (long double, long double); + +#define CHECK(x) if (!(x)) __builtin_abort () + +#ifndef NEED_ERRNO +#define NEED_ERRNO 0 +#endif +#ifndef NEED_EXC +#define NEED_EXC 0 +#endif + +#define TEST(name, fn, type, L1, L2, l1, l2, MIN1, \ + MAX1, DENORM_MIN1, EPSILON1, MIN2, MAX2, DENORM_MIN2) \ +void \ +name (void) \ +{ \ + const type a = fn (0.0##L1, 0.0##L2); \ + CHECK (a == 0.0##L1 && !__builtin_signbit (a)); \ + const type b = fn (0.0##L1, -0.0##L2); \ + CHECK (b == 0.0##L1 && __builtin_signbit (b)); \ + const type c = fn (__builtin_nan##l1 (""), 0.0##L2); \ + CHECK (__builtin_isnan##l1 (c)); \ + const type d = fn (2.0##L1, __builtin_nan##l2 ("")); \ + CHECK (__builtin_isnan##l1 (d)); \ + const type e = NEED_EXC ? DENORM_MIN1 : fn (0.0##L1, 8.0##L2); \ + CHECK (e == DENORM_MIN1); \ + const type f = fn (1.0##L1, 8.0##L2); \ + CHECK (f == 1.0##L1 + EPSILON1); \ + const type g = fn (1.0##L1, -8.0##L2); \ + CHECK (g == 1.0##L1 - EPSILON1 / 2.0##L1); \ + const type h = fn (__builtin_inf (), 0.0##L2); \ + CHECK (h == MAX1); \ + const type i = fn (-1.0##L1, -__builtin_inf ()); \ + CHECK (i == -1.0##L1 - EPSILON1); \ + const type j = fn (1.5##L1, __builtin_inf ()); \ + CHECK (j == 1.5##L1 + EPSILON1); \ + const type k = fn (1.5##L1 - EPSILON1, 100.0##L2); \ + CHECK (k == 1.5##L1); \ + const type l \ + = (NEED_EXC || NEED_ERRNO) ? 0.0##L1 : fn (DENORM_MIN1, 0.0##L2); \ + CHECK (l == 0.0##L1 && !__builtin_signbit (l)); \ + const type m \ + = (NEED_EXC || NEED_ERRNO) ? __builtin_inf##l1 () \ + : fn (MAX1, __builtin_inf ()); \ + CHECK (__builtin_isinf##l1 (m) && !__builtin_signbit (m)); \ + const type n = fn (DENORM_MIN1, 12.0##L2); \ + CHECK (n == 2.0##L1 * DENORM_MIN1); \ + const type o = fn (n, 24.0##L2); \ + CHECK (o == 3.0##L1 * DENORM_MIN1); \ + const type p = fn (o, 132.0##L2); \ + CHECK (p == 4.0##L1 * DENORM_MIN1); \ + const type q = fn (2.0##L1 * DENORM_MIN1, -__builtin_inf ()); \ + CHECK (q == DENORM_MIN1); \ + const type r = fn (3.0##L1 * DENORM_MIN1, DENORM_MIN2); \ + CHECK (r == 2.0##L1 * DENORM_MIN1); \ + const type s = fn (4.0##L1 * DENORM_MIN1, 2.0##L2 * DENORM_MIN2); \ + CHECK (s == 3.0##L1 * DENORM_MIN1); \ + const type t = fn (MIN1, 0.0##L2); \ + CHECK (t == MIN1 - DENORM_MIN1); \ + const type u = fn (MIN1 - DENORM_MIN1, -MIN2); \ + CHECK (u == MIN1 - 2.0##L1 * DENORM_MIN1); \ + const type v = fn (MIN1 - 2.0##L1 * DENORM_MIN1, 100.0##L2); \ + CHECK (v == MIN1 - DENORM_MIN1); \ + const type w = fn (MIN1 - DENORM_MIN1, MAX2); \ + CHECK (w == MIN1); \ + const type x = fn (MIN1, 17.0##L2); \ + CHECK (x == MIN1 + DENORM_MIN1); \ + const type y = fn (MIN1 + DENORM_MIN1, __builtin_inf##l2 ()); \ + CHECK (y == MIN1 + 2.0##L1 * DENORM_MIN1); \ + const type z = fn (MIN1 / 2.0##L1, -MIN2); \ + CHECK (z == MIN1 / 2.0##L1 - DENORM_MIN1); \ + const type aa = fn (-MIN1 / 4.0##L1, MIN2); \ + CHECK (aa == -MIN1 / 4.0##L1 + DENORM_MIN1); \ + const type ab = fn (MIN1 * 2.0##L1, -MIN2); \ + CHECK (ab == MIN1 * 2.0##L1 - DENORM_MIN1); \ + const type ac = fn (MIN1 * 4.0##L1, MIN2); \ + CHECK (ac == MIN1 * 4.0##L1 - DENORM_MIN1 * 2.0##L1); \ + const type ad = fn (MIN1 * 64.0##L1, MIN2); \ + CHECK (ad == MIN1 * 64.0##L1 - DENORM_MIN1 * 32.0##L1); \ + const type ae = fn (MIN1 / 2.0##L1 - DENORM_MIN1, 100.0##L2); \ + CHECK (ae == MIN1 / 2.0##L1); \ + const type af = fn (-MIN1 / 4 + DENORM_MIN1, -100.0##L2); \ + CHECK (af == -MIN1 / 4.0##L1); \ + const type ag = fn (MIN1 * 2.0##L1 - DENORM_MIN1, 100.0##L2); \ + CHECK (ag == MIN1 * 2.0##L1); \ + const type ah = fn (MIN1 * 4.0##L1 - 2.0##L1 * DENORM_MIN1, 100.0##L2); \ + CHECK (ah == MIN1 * 4.0##L1); \ + const type ai = fn (MIN1 * 64.0##L1 - 32.0##L1 * DENORM_MIN1, 100.0##L2); \ + CHECK (ai == MIN1 * 64.0##L1); \ + const type aj = fn (MIN1 * 64.0##L1, 100.0##L2); \ + CHECK (aj == MIN1 * 64.0##L1 + 64.0##L1 * DENORM_MIN1); \ + const type ak = fn (MIN1 * 64.0##L1 + DENORM_MIN1 * 64.0##L1, 1024.0##L2); \ + CHECK (ak == MIN1 * 64.0##L1 + 128.0##L1 * DENORM_MIN1); \ + const type al = fn (128.0##L1, 128.0##L2); \ + CHECK (al == 128.0##L1); \ + const type am = fn (128.0##L1, 129.0##L2); \ + CHECK (am == 128.0##L1 + 128.0##L1 * EPSILON1); \ + const type an = fn (-128.0##L1 + -128.0##L1 * EPSILON1, -130.0##L2); \ + CHECK (an == -128.0##L1 - 256.0##L1 * EPSILON1); \ + const type ao = fn (128.0##L1 + 256.0##L1 * EPSILON1, 256.0##L2); \ + CHECK (ao == 128.0##L1 + 384.0##L1 * EPSILON1); \ + const type ap = fn (128.0##L1 + 384.0##L1 * EPSILON1, -0.0##L2); \ + CHECK (ap == 128.0##L1 + 256.0##L1 * EPSILON1); \ + const type aq = fn (128.0##L1 + 256.0##L1 * EPSILON1, 1.0##L2); \ + CHECK (aq == 128.0##L1 + 128.0##L1 * EPSILON1); \ + const type ar = fn (128.0##L1 + 128.0##L1 * EPSILON1, 0.0##L2); \ + CHECK (ar == 128.0##L1); \ + const type as = fn (128.0##L1, 0.0##L2); \ + CHECK (as == 128.0##L1 - 64.0##L1 * EPSILON1); \ + const type at = fn (128.0##L1 - 64.0##L1 * EPSILON1, 5.0##L2); \ + CHECK (at == 128.0##L1 - 128.0##L1 * EPSILON1); \ +} + +TEST (test1, nextafterf, float, F, F, f, f, __FLT_MIN__, __FLT_MAX__, + __FLT_DENORM_MIN__, __FLT_EPSILON__, __FLT_MIN__, __FLT_MAX__, + __FLT_DENORM_MIN__) +TEST (test2, nextafter, double, , , , , __DBL_MIN__, __DBL_MAX__, + __DBL_DENORM_MIN__, __DBL_EPSILON__, __DBL_MIN__, __DBL_MAX__, + __DBL_DENORM_MIN__) +#if __LDBL_MANT_DIG__ != 106 +TEST (test3, nextafterl, long double, L, L, l, l, __LDBL_MIN__, __LDBL_MAX__, + __LDBL_DENORM_MIN__, __LDBL_EPSILON__, __LDBL_MIN__, __LDBL_MAX__, + __LDBL_DENORM_MIN__) +TEST (test4, nexttowardf, float, F, L, f, l, __FLT_MIN__, __FLT_MAX__, + __FLT_DENORM_MIN__, __FLT_EPSILON__, __LDBL_MIN__, __LDBL_MAX__, + __LDBL_DENORM_MIN__) +TEST (test5, nexttoward, double, , L, , l, __DBL_MIN__, __DBL_MAX__, + __DBL_DENORM_MIN__, __DBL_EPSILON__, __LDBL_MIN__, __LDBL_MAX__, + __LDBL_DENORM_MIN__) +TEST (test6, nexttowardl, long double, L, L, l, l, __LDBL_MIN__, __LDBL_MAX__, + __LDBL_DENORM_MIN__, __LDBL_EPSILON__, __LDBL_MIN__, __LDBL_MAX__, + __LDBL_DENORM_MIN__) +#endif + +int +main () +{ + test1 (); + test2 (); +#if __LDBL_MANT_DIG__ != 106 + test3 (); + test4 (); + test5 (); + test6 (); +#endif + return 0; +} diff --git a/gcc/testsuite/gcc.dg/nextafter-2.c b/gcc/testsuite/gcc.dg/nextafter-2.c new file mode 100644 index 00000000000..36b7e3cfa1a --- /dev/null +++ b/gcc/testsuite/gcc.dg/nextafter-2.c @@ -0,0 +1,6 @@ +/* PR libstdc++/85466 */ +/* { dg-do run } */ +/* { dg-options "-O2 -fno-builtin" } */ +/* { dg-add-options ieee } */ + +#include "nextafter-1.c" diff --git a/gcc/testsuite/gcc.dg/nextafter-3.c b/gcc/testsuite/gcc.dg/nextafter-3.c new file mode 100644 index 00000000000..25a520bb15e --- /dev/null +++ b/gcc/testsuite/gcc.dg/nextafter-3.c @@ -0,0 +1,9 @@ +/* PR libstdc++/85466 */ +/* { dg-do run } */ +/* { dg-options "-O2 -fmath-errno -fno-trapping-math -fdump-tree-optimized" } */ +/* { dg-add-options ieee } */ +/* { dg-final { scan-tree-dump-not "nextafter" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "nexttoward" "optimized" } } */ + +#define NEED_ERRNO 1 +#include "nextafter-1.c" diff --git a/gcc/testsuite/gcc.dg/nextafter-4.c b/gcc/testsuite/gcc.dg/nextafter-4.c new file mode 100644 index 00000000000..7e3639b675c --- /dev/null +++ b/gcc/testsuite/gcc.dg/nextafter-4.c @@ -0,0 +1,10 @@ +/* PR libstdc++/85466 */ +/* { dg-do run } */ +/* { dg-options "-O2 -fmath-errno -ftrapping-math -fdump-tree-optimized" } */ +/* { dg-add-options ieee } */ +/* { dg-final { scan-tree-dump-not "nextafter" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "nexttoward" "optimized" } } */ + +#define NEED_ERRNO 1 +#define NEED_EXC 1 +#include "nextafter-1.c" -- 2.30.2