libquadmath/math/fmaq.c

   1 /* Compute x * y + z as ternary operation.
   2    Copyright (C) 2010-2012 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library; if not, write to the Free
  18    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  19    02111-1307 USA.  */
  20
  21 #include "quadmath-imp.h"
  22 #include <math.h>
  23 #include <float.h>
  24 #ifdef HAVE_FENV_H
  25 # include <fenv.h>
  26 # if defined HAVE_FEHOLDEXCEPT && defined HAVE_FESETROUND \
  27      && defined HAVE_FEUPDATEENV && defined HAVE_FETESTEXCEPT \
  28      && defined FE_TOWARDZERO && defined FE_INEXACT
  29 #  define USE_FENV_H
  30 # endif
  31 #endif
  32
  33 /* This implementation uses rounding to odd to avoid problems with
  34    double rounding.  See a paper by Boldo and Melquiond:
  35    http://www.lri.fr/~melquion/doc/08-tc.pdf  */
  36
  37 __float128
  38 fmaq (__float128 x, __float128 y, __float128 z)
  39 {
  40   ieee854_float128 u, v, w;
  41   int adjust = 0;
  42   u.value = x;
  43   v.value = y;
  44   w.value = z;
  45   if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
  46                         >= 0x7fff + IEEE854_FLOAT128_BIAS
  47                            - FLT128_MANT_DIG, 0)
  48       || __builtin_expect (u.ieee.exponent >= 0x7fff - FLT128_MANT_DIG, 0)
  49       || __builtin_expect (v.ieee.exponent >= 0x7fff - FLT128_MANT_DIG, 0)
  50       || __builtin_expect (w.ieee.exponent >= 0x7fff - FLT128_MANT_DIG, 0)
  51       || __builtin_expect (u.ieee.exponent + v.ieee.exponent
  52                            <= IEEE854_FLOAT128_BIAS + FLT128_MANT_DIG, 0))
  53     {
  54       /* If z is Inf, but x and y are finite, the result should be
  55          z rather than NaN.  */
  56       if (w.ieee.exponent == 0x7fff
  57           && u.ieee.exponent != 0x7fff
  58           && v.ieee.exponent != 0x7fff)
  59         return (z + x) + y;
  60       /* If z is zero and x are y are nonzero, compute the result
  61          as x * y to avoid the wrong sign of a zero result if x * y
  62          underflows to 0.  */
  63       if (z == 0 && x != 0 && y != 0)
  64         return x * y;
  65       /* If x or y or z is Inf/NaN, or if fma will certainly overflow,
  66          or if x * y is less than half of FLT128_DENORM_MIN,
  67          compute as x * y + z.  */
  68       if (u.ieee.exponent == 0x7fff
  69           || v.ieee.exponent == 0x7fff
  70           || w.ieee.exponent == 0x7fff
  71           || u.ieee.exponent + v.ieee.exponent
  72              > 0x7fff + IEEE854_FLOAT128_BIAS
  73           || u.ieee.exponent + v.ieee.exponent
  74              < IEEE854_FLOAT128_BIAS - FLT128_MANT_DIG - 2)
  75         return x * y + z;
  76       if (u.ieee.exponent + v.ieee.exponent
  77           >= 0x7fff + IEEE854_FLOAT128_BIAS - FLT128_MANT_DIG)
  78         {
  79           /* Compute 1p-113 times smaller result and multiply
  80              at the end.  */
  81           if (u.ieee.exponent > v.ieee.exponent)
  82             u.ieee.exponent -= FLT128_MANT_DIG;
  83           else
  84             v.ieee.exponent -= FLT128_MANT_DIG;
  85           /* If x + y exponent is very large and z exponent is very small,
  86              it doesn't matter if we don't adjust it.  */
  87           if (w.ieee.exponent > FLT128_MANT_DIG)
  88             w.ieee.exponent -= FLT128_MANT_DIG;
  89           adjust = 1;
  90         }
  91       else if (w.ieee.exponent >= 0x7fff - FLT128_MANT_DIG)
  92         {
  93           /* Similarly.
  94              If z exponent is very large and x and y exponents are
  95              very small, it doesn't matter if we don't adjust it.  */
  96           if (u.ieee.exponent > v.ieee.exponent)
  97             {
  98               if (u.ieee.exponent > FLT128_MANT_DIG)
  99                 u.ieee.exponent -= FLT128_MANT_DIG;
 100             }
 101           else if (v.ieee.exponent > FLT128_MANT_DIG)
 102             v.ieee.exponent -= FLT128_MANT_DIG;
 103           w.ieee.exponent -= FLT128_MANT_DIG;
 104           adjust = 1;
 105         }
 106       else if (u.ieee.exponent >= 0x7fff - FLT128_MANT_DIG)
 107         {
 108           u.ieee.exponent -= FLT128_MANT_DIG;
 109           if (v.ieee.exponent)
 110             v.ieee.exponent += FLT128_MANT_DIG;
 111           else
 112             v.value *= 0x1p113Q;
 113         }
 114       else if (v.ieee.exponent >= 0x7fff - FLT128_MANT_DIG)
 115         {
 116           v.ieee.exponent -= FLT128_MANT_DIG;
 117           if (u.ieee.exponent)
 118             u.ieee.exponent += FLT128_MANT_DIG;
 119           else
 120             u.value *= 0x1p113Q;
 121         }
 122       else /* if (u.ieee.exponent + v.ieee.exponent
 123                   <= IEEE854_FLOAT128_BIAS + FLT128_MANT_DIG) */
 124         {
 125           if (u.ieee.exponent > v.ieee.exponent)
 126             u.ieee.exponent += 2 * FLT128_MANT_DIG;
 127           else
 128             v.ieee.exponent += 2 * FLT128_MANT_DIG;
 129           if (w.ieee.exponent <= 4 * FLT128_MANT_DIG + 4)
 130             {
 131               if (w.ieee.exponent)
 132                 w.ieee.exponent += 2 * FLT128_MANT_DIG;
 133               else
 134                 w.value *= 0x1p226Q;
 135               adjust = -1;
 136             }
 137           /* Otherwise x * y should just affect inexact
 138              and nothing else.  */
 139         }
 140       x = u.value;
 141       y = v.value;
 142       z = w.value;
 143     }
 144
 145   /* Ensure correct sign of exact 0 + 0.  */
 146   if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0))
 147     return x * y + z;
 148
 149   /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */
 150 #define C ((1LL << (FLT128_MANT_DIG + 1) / 2) + 1)
 151   __float128 x1 = x * C;
 152   __float128 y1 = y * C;
 153   __float128 m1 = x * y;
 154   x1 = (x - x1) + x1;
 155   y1 = (y - y1) + y1;
 156   __float128 x2 = x - x1;
 157   __float128 y2 = y - y1;
 158   __float128 m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
 159
 160   /* Addition a1 + a2 = z + m1 using Knuth's algorithm.  */
 161   __float128 a1 = z + m1;
 162   __float128 t1 = a1 - z;
 163   __float128 t2 = a1 - t1;
 164   t1 = m1 - t1;
 165   t2 = z - t2;
 166   __float128 a2 = t1 + t2;
 167
 168 #ifdef USE_FENV_H
 169   fenv_t env;
 170   feholdexcept (&env);
 171   fesetround (FE_TOWARDZERO);
 172 #endif
 173   /* Perform m2 + a2 addition with round to odd.  */
 174   u.value = a2 + m2;
 175
 176   if (__builtin_expect (adjust == 0, 1))
 177     {
 178 #ifdef USE_FENV_H
 179       if ((u.ieee.mant_low & 1) == 0 && u.ieee.exponent != 0x7fff)
 180         u.ieee.mant_low |= fetestexcept (FE_INEXACT) != 0;
 181       feupdateenv (&env);
 182 #endif
 183       /* Result is a1 + u.value.  */
 184       return a1 + u.value;
 185     }
 186   else if (__builtin_expect (adjust > 0, 1))
 187     {
 188 #ifdef USE_FENV_H
 189       if ((u.ieee.mant_low & 1) == 0 && u.ieee.exponent != 0x7fff)
 190         u.ieee.mant_low |= fetestexcept (FE_INEXACT) != 0;
 191       feupdateenv (&env);
 192 #endif
 193       /* Result is a1 + u.value, scaled up.  */
 194       return (a1 + u.value) * 0x1p113Q;
 195     }
 196   else
 197     {
 198 #ifdef USE_FENV_H
 199       if ((u.ieee.mant_low & 1) == 0)
 200         u.ieee.mant_low |= fetestexcept (FE_INEXACT) != 0;
 201 #endif
 202       v.value = a1 + u.value;
 203       /* Ensure the addition is not scheduled after fetestexcept call.  */
 204       asm volatile ("" : : "m" (v.value));
 205 #ifdef USE_FENV_H
 206       int j = fetestexcept (FE_INEXACT) != 0;
 207       feupdateenv (&env);
 208 #else
 209       int j = 0;
 210 #endif
 211       /* Ensure the following computations are performed in default rounding
 212          mode instead of just reusing the round to zero computation.  */
 213       asm volatile ("" : "=m" (u) : "m" (u));
 214       /* If a1 + u.value is exact, the only rounding happens during
 215          scaling down.  */
 216       if (j == 0)
 217         return v.value * 0x1p-226Q;
 218       /* If result rounded to zero is not subnormal, no double
 219          rounding will occur.  */
 220       if (v.ieee.exponent > 226)
 221         return (a1 + u.value) * 0x1p-226Q;
 222       /* If v.value * 0x1p-226Q with round to zero is a subnormal above
 223          or equal to FLT128_MIN / 2, then v.value * 0x1p-226Q shifts mantissa
 224          down just by 1 bit, which means v.ieee.mant_low |= j would
 225          change the round bit, not sticky or guard bit.
 226          v.value * 0x1p-226Q never normalizes by shifting up,
 227          so round bit plus sticky bit should be already enough
 228          for proper rounding.  */
 229       if (v.ieee.exponent == 226)
 230         {
 231           /* v.ieee.mant_low & 2 is LSB bit of the result before rounding,
 232              v.ieee.mant_low & 1 is the round bit and j is our sticky
 233              bit. */
 234           w.value = 0.0Q;
 235           w.ieee.mant_low = ((v.ieee.mant_low & 3) << 1) | j;
 236           w.ieee.negative = v.ieee.negative;
 237           v.ieee.mant_low &= ~3U;
 238           v.value *= 0x1p-226L;
 239           w.value *= 0x1p-2L;
 240           return v.value + w.value;
 241         }
 242       v.ieee.mant_low |= j;
 243       return v.value * 0x1p-226Q;
 244     }
 245 }