complex.c (csqrtq): NaN and INF fixes.
[gcc.git] / libquadmath / math / fmaq.c
1 /* Compute x * y + z as ternary operation.
2 Copyright (C) 2010-2012 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
20
21 #include "quadmath-imp.h"
22 #include <math.h>
23 #include <float.h>
24 #ifdef HAVE_FENV_H
25 # include <fenv.h>
26 # if defined HAVE_FEHOLDEXCEPT && defined HAVE_FESETROUND \
27 && defined HAVE_FEUPDATEENV && defined HAVE_FETESTEXCEPT \
28 && defined FE_TOWARDZERO && defined FE_INEXACT
29 # define USE_FENV_H
30 # endif
31 #endif
32
33 /* This implementation uses rounding to odd to avoid problems with
34 double rounding. See a paper by Boldo and Melquiond:
35 http://www.lri.fr/~melquion/doc/08-tc.pdf */
36
37 __float128
38 fmaq (__float128 x, __float128 y, __float128 z)
39 {
40 ieee854_float128 u, v, w;
41 int adjust = 0;
42 u.value = x;
43 v.value = y;
44 w.value = z;
45 if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
46 >= 0x7fff + IEEE854_FLOAT128_BIAS
47 - FLT128_MANT_DIG, 0)
48 || __builtin_expect (u.ieee.exponent >= 0x7fff - FLT128_MANT_DIG, 0)
49 || __builtin_expect (v.ieee.exponent >= 0x7fff - FLT128_MANT_DIG, 0)
50 || __builtin_expect (w.ieee.exponent >= 0x7fff - FLT128_MANT_DIG, 0)
51 || __builtin_expect (u.ieee.exponent + v.ieee.exponent
52 <= IEEE854_FLOAT128_BIAS + FLT128_MANT_DIG, 0))
53 {
54 /* If z is Inf, but x and y are finite, the result should be
55 z rather than NaN. */
56 if (w.ieee.exponent == 0x7fff
57 && u.ieee.exponent != 0x7fff
58 && v.ieee.exponent != 0x7fff)
59 return (z + x) + y;
60 /* If z is zero and x are y are nonzero, compute the result
61 as x * y to avoid the wrong sign of a zero result if x * y
62 underflows to 0. */
63 if (z == 0 && x != 0 && y != 0)
64 return x * y;
65 /* If x or y or z is Inf/NaN, or if fma will certainly overflow,
66 or if x * y is less than half of FLT128_DENORM_MIN,
67 compute as x * y + z. */
68 if (u.ieee.exponent == 0x7fff
69 || v.ieee.exponent == 0x7fff
70 || w.ieee.exponent == 0x7fff
71 || u.ieee.exponent + v.ieee.exponent
72 > 0x7fff + IEEE854_FLOAT128_BIAS
73 || u.ieee.exponent + v.ieee.exponent
74 < IEEE854_FLOAT128_BIAS - FLT128_MANT_DIG - 2)
75 return x * y + z;
76 if (u.ieee.exponent + v.ieee.exponent
77 >= 0x7fff + IEEE854_FLOAT128_BIAS - FLT128_MANT_DIG)
78 {
79 /* Compute 1p-113 times smaller result and multiply
80 at the end. */
81 if (u.ieee.exponent > v.ieee.exponent)
82 u.ieee.exponent -= FLT128_MANT_DIG;
83 else
84 v.ieee.exponent -= FLT128_MANT_DIG;
85 /* If x + y exponent is very large and z exponent is very small,
86 it doesn't matter if we don't adjust it. */
87 if (w.ieee.exponent > FLT128_MANT_DIG)
88 w.ieee.exponent -= FLT128_MANT_DIG;
89 adjust = 1;
90 }
91 else if (w.ieee.exponent >= 0x7fff - FLT128_MANT_DIG)
92 {
93 /* Similarly.
94 If z exponent is very large and x and y exponents are
95 very small, it doesn't matter if we don't adjust it. */
96 if (u.ieee.exponent > v.ieee.exponent)
97 {
98 if (u.ieee.exponent > FLT128_MANT_DIG)
99 u.ieee.exponent -= FLT128_MANT_DIG;
100 }
101 else if (v.ieee.exponent > FLT128_MANT_DIG)
102 v.ieee.exponent -= FLT128_MANT_DIG;
103 w.ieee.exponent -= FLT128_MANT_DIG;
104 adjust = 1;
105 }
106 else if (u.ieee.exponent >= 0x7fff - FLT128_MANT_DIG)
107 {
108 u.ieee.exponent -= FLT128_MANT_DIG;
109 if (v.ieee.exponent)
110 v.ieee.exponent += FLT128_MANT_DIG;
111 else
112 v.value *= 0x1p113Q;
113 }
114 else if (v.ieee.exponent >= 0x7fff - FLT128_MANT_DIG)
115 {
116 v.ieee.exponent -= FLT128_MANT_DIG;
117 if (u.ieee.exponent)
118 u.ieee.exponent += FLT128_MANT_DIG;
119 else
120 u.value *= 0x1p113Q;
121 }
122 else /* if (u.ieee.exponent + v.ieee.exponent
123 <= IEEE854_FLOAT128_BIAS + FLT128_MANT_DIG) */
124 {
125 if (u.ieee.exponent > v.ieee.exponent)
126 u.ieee.exponent += 2 * FLT128_MANT_DIG;
127 else
128 v.ieee.exponent += 2 * FLT128_MANT_DIG;
129 if (w.ieee.exponent <= 4 * FLT128_MANT_DIG + 4)
130 {
131 if (w.ieee.exponent)
132 w.ieee.exponent += 2 * FLT128_MANT_DIG;
133 else
134 w.value *= 0x1p226Q;
135 adjust = -1;
136 }
137 /* Otherwise x * y should just affect inexact
138 and nothing else. */
139 }
140 x = u.value;
141 y = v.value;
142 z = w.value;
143 }
144
145 /* Ensure correct sign of exact 0 + 0. */
146 if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0))
147 return x * y + z;
148
149 /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */
150 #define C ((1LL << (FLT128_MANT_DIG + 1) / 2) + 1)
151 __float128 x1 = x * C;
152 __float128 y1 = y * C;
153 __float128 m1 = x * y;
154 x1 = (x - x1) + x1;
155 y1 = (y - y1) + y1;
156 __float128 x2 = x - x1;
157 __float128 y2 = y - y1;
158 __float128 m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
159
160 /* Addition a1 + a2 = z + m1 using Knuth's algorithm. */
161 __float128 a1 = z + m1;
162 __float128 t1 = a1 - z;
163 __float128 t2 = a1 - t1;
164 t1 = m1 - t1;
165 t2 = z - t2;
166 __float128 a2 = t1 + t2;
167
168 #ifdef USE_FENV_H
169 fenv_t env;
170 feholdexcept (&env);
171 fesetround (FE_TOWARDZERO);
172 #endif
173 /* Perform m2 + a2 addition with round to odd. */
174 u.value = a2 + m2;
175
176 if (__builtin_expect (adjust == 0, 1))
177 {
178 #ifdef USE_FENV_H
179 if ((u.ieee.mant_low & 1) == 0 && u.ieee.exponent != 0x7fff)
180 u.ieee.mant_low |= fetestexcept (FE_INEXACT) != 0;
181 feupdateenv (&env);
182 #endif
183 /* Result is a1 + u.value. */
184 return a1 + u.value;
185 }
186 else if (__builtin_expect (adjust > 0, 1))
187 {
188 #ifdef USE_FENV_H
189 if ((u.ieee.mant_low & 1) == 0 && u.ieee.exponent != 0x7fff)
190 u.ieee.mant_low |= fetestexcept (FE_INEXACT) != 0;
191 feupdateenv (&env);
192 #endif
193 /* Result is a1 + u.value, scaled up. */
194 return (a1 + u.value) * 0x1p113Q;
195 }
196 else
197 {
198 #ifdef USE_FENV_H
199 if ((u.ieee.mant_low & 1) == 0)
200 u.ieee.mant_low |= fetestexcept (FE_INEXACT) != 0;
201 #endif
202 v.value = a1 + u.value;
203 /* Ensure the addition is not scheduled after fetestexcept call. */
204 asm volatile ("" : : "m" (v.value));
205 #ifdef USE_FENV_H
206 int j = fetestexcept (FE_INEXACT) != 0;
207 feupdateenv (&env);
208 #else
209 int j = 0;
210 #endif
211 /* Ensure the following computations are performed in default rounding
212 mode instead of just reusing the round to zero computation. */
213 asm volatile ("" : "=m" (u) : "m" (u));
214 /* If a1 + u.value is exact, the only rounding happens during
215 scaling down. */
216 if (j == 0)
217 return v.value * 0x1p-226Q;
218 /* If result rounded to zero is not subnormal, no double
219 rounding will occur. */
220 if (v.ieee.exponent > 226)
221 return (a1 + u.value) * 0x1p-226Q;
222 /* If v.value * 0x1p-226Q with round to zero is a subnormal above
223 or equal to FLT128_MIN / 2, then v.value * 0x1p-226Q shifts mantissa
224 down just by 1 bit, which means v.ieee.mant_low |= j would
225 change the round bit, not sticky or guard bit.
226 v.value * 0x1p-226Q never normalizes by shifting up,
227 so round bit plus sticky bit should be already enough
228 for proper rounding. */
229 if (v.ieee.exponent == 226)
230 {
231 /* v.ieee.mant_low & 2 is LSB bit of the result before rounding,
232 v.ieee.mant_low & 1 is the round bit and j is our sticky
233 bit. */
234 w.value = 0.0Q;
235 w.ieee.mant_low = ((v.ieee.mant_low & 3) << 1) | j;
236 w.ieee.negative = v.ieee.negative;
237 v.ieee.mant_low &= ~3U;
238 v.value *= 0x1p-226L;
239 w.value *= 0x1p-2L;
240 return v.value + w.value;
241 }
242 v.ieee.mant_low |= j;
243 return v.value * 0x1p-226Q;
244 }
245 }