re PR fortran/29383 (Fortran 2003/F95[TR15580:1999]: Floating point exception (IEEE...
[gcc.git] / libgfortran / config / fpu-387.h
1 /* FPU-related code for x86 and x86_64 processors.
2 Copyright (C) 2005-2014 Free Software Foundation, Inc.
3 Contributed by Francois-Xavier Coudert <coudert@clipper.ens.fr>
4
5 This file is part of the GNU Fortran 95 runtime library (libgfortran).
6
7 Libgfortran is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public
9 License as published by the Free Software Foundation; either
10 version 3 of the License, or (at your option) any later version.
11
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 #include <assert.h>
27
28 #ifndef __SSE_MATH__
29 #include "cpuid.h"
30 #endif
31
32 static int
33 has_sse (void)
34 {
35 #ifndef __SSE_MATH__
36 unsigned int eax, ebx, ecx, edx;
37
38 if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
39 return 0;
40
41 return edx & bit_SSE;
42 #else
43 return 1;
44 #endif
45 }
46
47 /* i387 exceptions -- see linux <fpu_control.h> header file for details. */
48 #define _FPU_MASK_IM 0x01
49 #define _FPU_MASK_DM 0x02
50 #define _FPU_MASK_ZM 0x04
51 #define _FPU_MASK_OM 0x08
52 #define _FPU_MASK_UM 0x10
53 #define _FPU_MASK_PM 0x20
54 #define _FPU_MASK_ALL 0x3f
55
56 #define _FPU_EX_ALL 0x3f
57
58 /* i387 rounding modes. */
59
60 #define _FPU_RC_NEAREST 0x0
61 #define _FPU_RC_DOWN 0x1
62 #define _FPU_RC_UP 0x2
63 #define _FPU_RC_ZERO 0x3
64
65 #define _FPU_RC_MASK 0x3
66
67 /* This structure corresponds to the layout of the block
68 written by FSTENV. */
69 typedef struct
70 {
71 unsigned short int __control_word;
72 unsigned short int __unused1;
73 unsigned short int __status_word;
74 unsigned short int __unused2;
75 unsigned short int __tags;
76 unsigned short int __unused3;
77 unsigned int __eip;
78 unsigned short int __cs_selector;
79 unsigned int __opcode:11;
80 unsigned int __unused4:5;
81 unsigned int __data_offset;
82 unsigned short int __data_selector;
83 unsigned short int __unused5;
84 unsigned int __mxcsr;
85 }
86 my_fenv_t;
87
88
89 /* Raise the supported floating-point exceptions from EXCEPTS. Other
90 bits in EXCEPTS are ignored. Code originally borrowed from
91 libatomic/config/x86/fenv.c. */
92
93 static void
94 local_feraiseexcept (int excepts)
95 {
96 if (excepts & _FPU_MASK_IM)
97 {
98 float f = 0.0f;
99 #ifdef __SSE_MATH__
100 volatile float r __attribute__ ((unused));
101 __asm__ __volatile__ ("%vdivss\t{%0, %d0|%d0, %0}" : "+x" (f));
102 r = f; /* Needed to trigger exception. */
103 #else
104 __asm__ __volatile__ ("fdiv\t{%y0, %0|%0, %y0}" : "+t" (f));
105 /* No need for fwait, exception is triggered by emitted fstp. */
106 #endif
107 }
108 if (excepts & _FPU_MASK_DM)
109 {
110 my_fenv_t temp;
111 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
112 temp.__status_word |= _FPU_MASK_DM;
113 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
114 __asm__ __volatile__ ("fwait");
115 }
116 if (excepts & _FPU_MASK_ZM)
117 {
118 float f = 1.0f, g = 0.0f;
119 #ifdef __SSE_MATH__
120 volatile float r __attribute__ ((unused));
121 __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g));
122 r = f; /* Needed to trigger exception. */
123 #else
124 __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g));
125 /* No need for fwait, exception is triggered by emitted fstp. */
126 #endif
127 }
128 if (excepts & _FPU_MASK_OM)
129 {
130 my_fenv_t temp;
131 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
132 temp.__status_word |= _FPU_MASK_OM;
133 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
134 __asm__ __volatile__ ("fwait");
135 }
136 if (excepts & _FPU_MASK_UM)
137 {
138 my_fenv_t temp;
139 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
140 temp.__status_word |= _FPU_MASK_UM;
141 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
142 __asm__ __volatile__ ("fwait");
143 }
144 if (excepts & _FPU_MASK_PM)
145 {
146 float f = 1.0f, g = 3.0f;
147 #ifdef __SSE_MATH__
148 volatile float r __attribute__ ((unused));
149 __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g));
150 r = f; /* Needed to trigger exception. */
151 #else
152 __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g));
153 /* No need for fwait, exception is triggered by emitted fstp. */
154 #endif
155 }
156 }
157
158
159 void
160 set_fpu_trap_exceptions (int trap, int notrap)
161 {
162 int exc_set = 0, exc_clr = 0;
163 unsigned short cw;
164
165 if (trap & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM;
166 if (trap & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM;
167 if (trap & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM;
168 if (trap & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM;
169 if (trap & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM;
170 if (trap & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM;
171
172 if (notrap & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM;
173 if (notrap & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM;
174 if (notrap & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM;
175 if (notrap & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM;
176 if (notrap & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM;
177 if (notrap & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM;
178
179 __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
180
181 cw |= exc_clr;
182 cw &= ~exc_set;
183
184 __asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw));
185
186 if (has_sse())
187 {
188 unsigned int cw_sse;
189
190 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
191
192 /* The SSE exception masks are shifted by 7 bits. */
193 cw_sse |= (exc_clr << 7);
194 cw_sse &= ~(exc_set << 7);
195
196 /* Clear stalled exception flags. */
197 cw_sse &= ~_FPU_EX_ALL;
198
199 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
200 }
201 }
202
203 void
204 set_fpu (void)
205 {
206 set_fpu_trap_exceptions (options.fpe, 0);
207 }
208
209 int
210 get_fpu_trap_exceptions (void)
211 {
212 int res = 0;
213 unsigned short cw;
214
215 __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
216 cw &= _FPU_MASK_ALL;
217
218 if (has_sse())
219 {
220 unsigned int cw_sse;
221
222 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
223
224 /* The SSE exception masks are shifted by 7 bits. */
225 cw = cw | ((cw_sse >> 7) & _FPU_MASK_ALL);
226 }
227
228 if (~cw & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
229 if (~cw & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
230 if (~cw & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
231 if (~cw & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
232 if (~cw & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
233 if (~cw & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
234
235 return res;
236 }
237
238 int
239 support_fpu_trap (int flag __attribute__((unused)))
240 {
241 return 1;
242 }
243
244 int
245 get_fpu_except_flags (void)
246 {
247 unsigned short cw;
248 int excepts;
249 int result = 0;
250
251 __asm__ __volatile__ ("fnstsw\t%0" : "=am" (cw));
252 excepts = cw;
253
254 if (has_sse())
255 {
256 unsigned int cw_sse;
257
258 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
259 excepts |= cw_sse;
260 }
261
262 excepts &= _FPU_EX_ALL;
263
264 if (excepts & _FPU_MASK_IM) result |= GFC_FPE_INVALID;
265 if (excepts & _FPU_MASK_DM) result |= GFC_FPE_DENORMAL;
266 if (excepts & _FPU_MASK_ZM) result |= GFC_FPE_ZERO;
267 if (excepts & _FPU_MASK_OM) result |= GFC_FPE_OVERFLOW;
268 if (excepts & _FPU_MASK_UM) result |= GFC_FPE_UNDERFLOW;
269 if (excepts & _FPU_MASK_PM) result |= GFC_FPE_INEXACT;
270
271 return result;
272 }
273
274 void
275 set_fpu_except_flags (int set, int clear)
276 {
277 my_fenv_t temp;
278 int exc_set = 0, exc_clr = 0;
279
280 /* Translate from GFC_PE_* values to _FPU_MASK_* values. */
281 if (set & GFC_FPE_INVALID)
282 exc_set |= _FPU_MASK_IM;
283 if (clear & GFC_FPE_INVALID)
284 exc_clr |= _FPU_MASK_IM;
285
286 if (set & GFC_FPE_DENORMAL)
287 exc_set |= _FPU_MASK_DM;
288 if (clear & GFC_FPE_DENORMAL)
289 exc_clr |= _FPU_MASK_DM;
290
291 if (set & GFC_FPE_ZERO)
292 exc_set |= _FPU_MASK_ZM;
293 if (clear & GFC_FPE_ZERO)
294 exc_clr |= _FPU_MASK_ZM;
295
296 if (set & GFC_FPE_OVERFLOW)
297 exc_set |= _FPU_MASK_OM;
298 if (clear & GFC_FPE_OVERFLOW)
299 exc_clr |= _FPU_MASK_OM;
300
301 if (set & GFC_FPE_UNDERFLOW)
302 exc_set |= _FPU_MASK_UM;
303 if (clear & GFC_FPE_UNDERFLOW)
304 exc_clr |= _FPU_MASK_UM;
305
306 if (set & GFC_FPE_INEXACT)
307 exc_set |= _FPU_MASK_PM;
308 if (clear & GFC_FPE_INEXACT)
309 exc_clr |= _FPU_MASK_PM;
310
311
312 /* Change the flags. This is tricky on 387 (unlike SSE), because we have
313 FNSTSW but no FLDSW instruction. */
314 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
315 temp.__status_word &= ~exc_clr;
316 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
317
318 /* Change the flags on SSE. */
319
320 if (has_sse())
321 {
322 unsigned int cw_sse;
323
324 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
325 cw_sse &= ~exc_clr;
326 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
327 }
328
329 local_feraiseexcept (exc_set);
330 }
331
332 int
333 support_fpu_flag (int flag __attribute__((unused)))
334 {
335 return 1;
336 }
337
338 void
339 set_fpu_rounding_mode (int round)
340 {
341 int round_mode;
342 unsigned short cw;
343
344 switch (round)
345 {
346 case GFC_FPE_TONEAREST:
347 round_mode = _FPU_RC_NEAREST;
348 break;
349 case GFC_FPE_UPWARD:
350 round_mode = _FPU_RC_UP;
351 break;
352 case GFC_FPE_DOWNWARD:
353 round_mode = _FPU_RC_DOWN;
354 break;
355 case GFC_FPE_TOWARDZERO:
356 round_mode = _FPU_RC_ZERO;
357 break;
358 default:
359 return; /* Should be unreachable. */
360 }
361
362 __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
363
364 /* The x87 round control bits are shifted by 10 bits. */
365 cw &= ~(_FPU_RC_MASK << 10);
366 cw |= round_mode << 10;
367
368 __asm__ __volatile__ ("fldcw\t%0" : : "m" (cw));
369
370 if (has_sse())
371 {
372 unsigned int cw_sse;
373
374 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
375
376 /* The SSE round control bits are shifted by 13 bits. */
377 cw_sse &= ~(_FPU_RC_MASK << 13);
378 cw_sse |= round_mode << 13;
379
380 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
381 }
382 }
383
384 int
385 get_fpu_rounding_mode (void)
386 {
387 int round_mode;
388
389 #ifdef __SSE_MATH__
390 unsigned int cw;
391
392 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw));
393
394 /* The SSE round control bits are shifted by 13 bits. */
395 round_mode = cw >> 13;
396 #else
397 unsigned short cw;
398
399 __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
400
401 /* The x87 round control bits are shifted by 10 bits. */
402 round_mode = cw >> 10;
403 #endif
404
405 round_mode &= _FPU_RC_MASK;
406
407 switch (round_mode)
408 {
409 case _FPU_RC_NEAREST:
410 return GFC_FPE_TONEAREST;
411 case _FPU_RC_UP:
412 return GFC_FPE_UPWARD;
413 case _FPU_RC_DOWN:
414 return GFC_FPE_DOWNWARD;
415 case _FPU_RC_ZERO:
416 return GFC_FPE_TOWARDZERO;
417 default:
418 return GFC_FPE_INVALID; /* Should be unreachable. */
419 }
420 }
421
422 int
423 support_fpu_rounding_mode (int mode __attribute__((unused)))
424 {
425 return 1;
426 }
427
428 void
429 get_fpu_state (void *state)
430 {
431 my_fenv_t *envp = state;
432
433 /* Check we can actually store the FPU state in the allocated size. */
434 assert (sizeof(my_fenv_t) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE);
435
436 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (*envp));
437
438 /* fnstenv has the side effect of masking all exceptions, so we need
439 to restore the control word after that. */
440 __asm__ __volatile__ ("fldcw\t%0" : : "m" (envp->__control_word));
441
442 if (has_sse())
443 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (envp->__mxcsr));
444 }
445
446 void
447 set_fpu_state (void *state)
448 {
449 my_fenv_t *envp = state;
450
451 /* Check we can actually store the FPU state in the allocated size. */
452 assert (sizeof(my_fenv_t) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE);
453
454 /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more
455 complex than this, but I think it suffices in our case. */
456 __asm__ __volatile__ ("fldenv\t%0" : : "m" (*envp));
457
458 if (has_sse())
459 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (envp->__mxcsr));
460 }
461