Update copyright years.
[gcc.git] / libgfortran / config / fpu-387.h
1 /* FPU-related code for x86 and x86_64 processors.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
3 Contributed by Francois-Xavier Coudert <coudert@clipper.ens.fr>
4
5 This file is part of the GNU Fortran 95 runtime library (libgfortran).
6
7 Libgfortran is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public
9 License as published by the Free Software Foundation; either
10 version 3 of the License, or (at your option) any later version.
11
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 #ifndef __SSE_MATH__
27 #include "cpuid.h"
28 #endif
29
30 static int
31 has_sse (void)
32 {
33 #ifndef __SSE_MATH__
34 unsigned int eax, ebx, ecx, edx;
35
36 if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
37 return 0;
38
39 return edx & bit_SSE;
40 #else
41 return 1;
42 #endif
43 }
44
45 /* i387 exceptions -- see linux <fpu_control.h> header file for details. */
46 #define _FPU_MASK_IM 0x01
47 #define _FPU_MASK_DM 0x02
48 #define _FPU_MASK_ZM 0x04
49 #define _FPU_MASK_OM 0x08
50 #define _FPU_MASK_UM 0x10
51 #define _FPU_MASK_PM 0x20
52 #define _FPU_MASK_ALL 0x3f
53
54 #define _FPU_EX_ALL 0x3f
55
56 /* i387 rounding modes. */
57
58 #define _FPU_RC_NEAREST 0x0
59 #define _FPU_RC_DOWN 0x1
60 #define _FPU_RC_UP 0x2
61 #define _FPU_RC_ZERO 0x3
62
63 #define _FPU_RC_MASK 0x3
64
65 /* Enable flush to zero mode. */
66
67 #define MXCSR_FTZ (1 << 15)
68
69
70 /* This structure corresponds to the layout of the block
71 written by FSTENV. */
72 typedef struct
73 {
74 unsigned short int __control_word;
75 unsigned short int __unused1;
76 unsigned short int __status_word;
77 unsigned short int __unused2;
78 unsigned short int __tags;
79 unsigned short int __unused3;
80 unsigned int __eip;
81 unsigned short int __cs_selector;
82 unsigned short int __opcode;
83 unsigned int __data_offset;
84 unsigned short int __data_selector;
85 unsigned short int __unused5;
86 unsigned int __mxcsr;
87 }
88 my_fenv_t;
89
90 /* Check we can actually store the FPU state in the allocated size. */
91 _Static_assert (sizeof(my_fenv_t) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE,
92 "GFC_FPE_STATE_BUFFER_SIZE is too small");
93
94
95 /* Raise the supported floating-point exceptions from EXCEPTS. Other
96 bits in EXCEPTS are ignored. Code originally borrowed from
97 libatomic/config/x86/fenv.c. */
98
99 static void
100 local_feraiseexcept (int excepts)
101 {
102 if (excepts & _FPU_MASK_IM)
103 {
104 float f = 0.0f;
105 #ifdef __SSE_MATH__
106 volatile float r __attribute__ ((unused));
107 __asm__ __volatile__ ("%vdivss\t{%0, %d0|%d0, %0}" : "+x" (f));
108 r = f; /* Needed to trigger exception. */
109 #else
110 __asm__ __volatile__ ("fdiv\t{%y0, %0|%0, %y0}" : "+t" (f));
111 /* No need for fwait, exception is triggered by emitted fstp. */
112 #endif
113 }
114 if (excepts & _FPU_MASK_DM)
115 {
116 my_fenv_t temp;
117 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
118 temp.__status_word |= _FPU_MASK_DM;
119 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
120 __asm__ __volatile__ ("fwait");
121 }
122 if (excepts & _FPU_MASK_ZM)
123 {
124 float f = 1.0f, g = 0.0f;
125 #ifdef __SSE_MATH__
126 volatile float r __attribute__ ((unused));
127 __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g));
128 r = f; /* Needed to trigger exception. */
129 #else
130 __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g));
131 /* No need for fwait, exception is triggered by emitted fstp. */
132 #endif
133 }
134 if (excepts & _FPU_MASK_OM)
135 {
136 my_fenv_t temp;
137 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
138 temp.__status_word |= _FPU_MASK_OM;
139 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
140 __asm__ __volatile__ ("fwait");
141 }
142 if (excepts & _FPU_MASK_UM)
143 {
144 my_fenv_t temp;
145 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
146 temp.__status_word |= _FPU_MASK_UM;
147 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
148 __asm__ __volatile__ ("fwait");
149 }
150 if (excepts & _FPU_MASK_PM)
151 {
152 float f = 1.0f, g = 3.0f;
153 #ifdef __SSE_MATH__
154 volatile float r __attribute__ ((unused));
155 __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g));
156 r = f; /* Needed to trigger exception. */
157 #else
158 __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g));
159 /* No need for fwait, exception is triggered by emitted fstp. */
160 #endif
161 }
162 }
163
164
165 void
166 set_fpu_trap_exceptions (int trap, int notrap)
167 {
168 int exc_set = 0, exc_clr = 0;
169 unsigned short cw;
170
171 if (trap & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM;
172 if (trap & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM;
173 if (trap & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM;
174 if (trap & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM;
175 if (trap & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM;
176 if (trap & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM;
177
178 if (notrap & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM;
179 if (notrap & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM;
180 if (notrap & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM;
181 if (notrap & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM;
182 if (notrap & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM;
183 if (notrap & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM;
184
185 __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
186
187 cw |= exc_clr;
188 cw &= ~exc_set;
189
190 __asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw));
191
192 if (has_sse())
193 {
194 unsigned int cw_sse;
195
196 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
197
198 /* The SSE exception masks are shifted by 7 bits. */
199 cw_sse |= (exc_clr << 7);
200 cw_sse &= ~(exc_set << 7);
201
202 /* Clear stalled exception flags. */
203 cw_sse &= ~_FPU_EX_ALL;
204
205 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
206 }
207 }
208
209 void
210 set_fpu (void)
211 {
212 set_fpu_trap_exceptions (options.fpe, 0);
213 }
214
215 int
216 get_fpu_trap_exceptions (void)
217 {
218 int res = 0;
219 unsigned short cw;
220
221 __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
222 cw &= _FPU_MASK_ALL;
223
224 if (has_sse())
225 {
226 unsigned int cw_sse;
227
228 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
229
230 /* The SSE exception masks are shifted by 7 bits. */
231 cw = cw | ((cw_sse >> 7) & _FPU_MASK_ALL);
232 }
233
234 if (~cw & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
235 if (~cw & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
236 if (~cw & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
237 if (~cw & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
238 if (~cw & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
239 if (~cw & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
240
241 return res;
242 }
243
244 int
245 support_fpu_trap (int flag __attribute__((unused)))
246 {
247 return 1;
248 }
249
250 int
251 get_fpu_except_flags (void)
252 {
253 unsigned short cw;
254 int excepts;
255 int result = 0;
256
257 __asm__ __volatile__ ("fnstsw\t%0" : "=am" (cw));
258 excepts = cw;
259
260 if (has_sse())
261 {
262 unsigned int cw_sse;
263
264 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
265 excepts |= cw_sse;
266 }
267
268 excepts &= _FPU_EX_ALL;
269
270 if (excepts & _FPU_MASK_IM) result |= GFC_FPE_INVALID;
271 if (excepts & _FPU_MASK_DM) result |= GFC_FPE_DENORMAL;
272 if (excepts & _FPU_MASK_ZM) result |= GFC_FPE_ZERO;
273 if (excepts & _FPU_MASK_OM) result |= GFC_FPE_OVERFLOW;
274 if (excepts & _FPU_MASK_UM) result |= GFC_FPE_UNDERFLOW;
275 if (excepts & _FPU_MASK_PM) result |= GFC_FPE_INEXACT;
276
277 return result;
278 }
279
280 void
281 set_fpu_except_flags (int set, int clear)
282 {
283 my_fenv_t temp;
284 int exc_set = 0, exc_clr = 0;
285
286 /* Translate from GFC_PE_* values to _FPU_MASK_* values. */
287 if (set & GFC_FPE_INVALID)
288 exc_set |= _FPU_MASK_IM;
289 if (clear & GFC_FPE_INVALID)
290 exc_clr |= _FPU_MASK_IM;
291
292 if (set & GFC_FPE_DENORMAL)
293 exc_set |= _FPU_MASK_DM;
294 if (clear & GFC_FPE_DENORMAL)
295 exc_clr |= _FPU_MASK_DM;
296
297 if (set & GFC_FPE_ZERO)
298 exc_set |= _FPU_MASK_ZM;
299 if (clear & GFC_FPE_ZERO)
300 exc_clr |= _FPU_MASK_ZM;
301
302 if (set & GFC_FPE_OVERFLOW)
303 exc_set |= _FPU_MASK_OM;
304 if (clear & GFC_FPE_OVERFLOW)
305 exc_clr |= _FPU_MASK_OM;
306
307 if (set & GFC_FPE_UNDERFLOW)
308 exc_set |= _FPU_MASK_UM;
309 if (clear & GFC_FPE_UNDERFLOW)
310 exc_clr |= _FPU_MASK_UM;
311
312 if (set & GFC_FPE_INEXACT)
313 exc_set |= _FPU_MASK_PM;
314 if (clear & GFC_FPE_INEXACT)
315 exc_clr |= _FPU_MASK_PM;
316
317
318 /* Change the flags. This is tricky on 387 (unlike SSE), because we have
319 FNSTSW but no FLDSW instruction. */
320 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
321 temp.__status_word &= ~exc_clr;
322 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
323
324 /* Change the flags on SSE. */
325
326 if (has_sse())
327 {
328 unsigned int cw_sse;
329
330 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
331 cw_sse &= ~exc_clr;
332 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
333 }
334
335 local_feraiseexcept (exc_set);
336 }
337
338 int
339 support_fpu_flag (int flag __attribute__((unused)))
340 {
341 return 1;
342 }
343
344 void
345 set_fpu_rounding_mode (int round)
346 {
347 int round_mode;
348 unsigned short cw;
349
350 switch (round)
351 {
352 case GFC_FPE_TONEAREST:
353 round_mode = _FPU_RC_NEAREST;
354 break;
355 case GFC_FPE_UPWARD:
356 round_mode = _FPU_RC_UP;
357 break;
358 case GFC_FPE_DOWNWARD:
359 round_mode = _FPU_RC_DOWN;
360 break;
361 case GFC_FPE_TOWARDZERO:
362 round_mode = _FPU_RC_ZERO;
363 break;
364 default:
365 return; /* Should be unreachable. */
366 }
367
368 __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
369
370 /* The x87 round control bits are shifted by 10 bits. */
371 cw &= ~(_FPU_RC_MASK << 10);
372 cw |= round_mode << 10;
373
374 __asm__ __volatile__ ("fldcw\t%0" : : "m" (cw));
375
376 if (has_sse())
377 {
378 unsigned int cw_sse;
379
380 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
381
382 /* The SSE round control bits are shifted by 13 bits. */
383 cw_sse &= ~(_FPU_RC_MASK << 13);
384 cw_sse |= round_mode << 13;
385
386 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
387 }
388 }
389
390 int
391 get_fpu_rounding_mode (void)
392 {
393 int round_mode;
394
395 #ifdef __SSE_MATH__
396 unsigned int cw;
397
398 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw));
399
400 /* The SSE round control bits are shifted by 13 bits. */
401 round_mode = cw >> 13;
402 #else
403 unsigned short cw;
404
405 __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
406
407 /* The x87 round control bits are shifted by 10 bits. */
408 round_mode = cw >> 10;
409 #endif
410
411 round_mode &= _FPU_RC_MASK;
412
413 switch (round_mode)
414 {
415 case _FPU_RC_NEAREST:
416 return GFC_FPE_TONEAREST;
417 case _FPU_RC_UP:
418 return GFC_FPE_UPWARD;
419 case _FPU_RC_DOWN:
420 return GFC_FPE_DOWNWARD;
421 case _FPU_RC_ZERO:
422 return GFC_FPE_TOWARDZERO;
423 default:
424 return 0; /* Should be unreachable. */
425 }
426 }
427
428 int
429 support_fpu_rounding_mode (int mode __attribute__((unused)))
430 {
431 return 1;
432 }
433
434 void
435 get_fpu_state (void *state)
436 {
437 my_fenv_t *envp = state;
438
439 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (*envp));
440
441 /* fnstenv has the side effect of masking all exceptions, so we need
442 to restore the control word after that. */
443 __asm__ __volatile__ ("fldcw\t%0" : : "m" (envp->__control_word));
444
445 if (has_sse())
446 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (envp->__mxcsr));
447 }
448
449 void
450 set_fpu_state (void *state)
451 {
452 my_fenv_t *envp = state;
453
454 /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more
455 complex than this, but I think it suffices in our case. */
456 __asm__ __volatile__ ("fldenv\t%0" : : "m" (*envp));
457
458 if (has_sse())
459 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (envp->__mxcsr));
460 }
461
462
463 int
464 support_fpu_underflow_control (int kind)
465 {
466 if (!has_sse())
467 return 0;
468
469 return (kind == 4 || kind == 8) ? 1 : 0;
470 }
471
472
473 int
474 get_fpu_underflow_mode (void)
475 {
476 unsigned int cw_sse;
477
478 if (!has_sse())
479 return 1;
480
481 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
482
483 /* Return 0 for abrupt underflow (flush to zero), 1 for gradual underflow. */
484 return (cw_sse & MXCSR_FTZ) ? 0 : 1;
485 }
486
487
488 void
489 set_fpu_underflow_mode (int gradual)
490 {
491 unsigned int cw_sse;
492
493 if (!has_sse())
494 return;
495
496 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
497
498 if (gradual)
499 cw_sse &= ~MXCSR_FTZ;
500 else
501 cw_sse |= MXCSR_FTZ;
502
503 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
504 }
505