1 /* FPU-related code for x86 and x86_64 processors.
2 Copyright (C) 2005-2014 Free Software Foundation, Inc.
3 Contributed by Francois-Xavier Coudert <coudert@clipper.ens.fr>
5 This file is part of the GNU Fortran 95 runtime library (libgfortran).
7 Libgfortran is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public
9 License as published by the Free Software Foundation; either
10 version 3 of the License, or (at your option) any later version.
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
36 unsigned int eax
, ebx
, ecx
, edx
;
38 if (!__get_cpuid (1, &eax
, &ebx
, &ecx
, &edx
))
47 /* i387 exceptions -- see linux <fpu_control.h> header file for details. */
48 #define _FPU_MASK_IM 0x01
49 #define _FPU_MASK_DM 0x02
50 #define _FPU_MASK_ZM 0x04
51 #define _FPU_MASK_OM 0x08
52 #define _FPU_MASK_UM 0x10
53 #define _FPU_MASK_PM 0x20
54 #define _FPU_MASK_ALL 0x3f
56 #define _FPU_EX_ALL 0x3f
58 /* i387 rounding modes. */
60 #define _FPU_RC_NEAREST 0x0
61 #define _FPU_RC_DOWN 0x1
62 #define _FPU_RC_UP 0x2
63 #define _FPU_RC_ZERO 0x3
65 #define _FPU_RC_MASK 0x3
67 /* This structure corresponds to the layout of the block
71 unsigned short int __control_word
;
72 unsigned short int __unused1
;
73 unsigned short int __status_word
;
74 unsigned short int __unused2
;
75 unsigned short int __tags
;
76 unsigned short int __unused3
;
78 unsigned short int __cs_selector
;
79 unsigned int __opcode
:11;
80 unsigned int __unused4
:5;
81 unsigned int __data_offset
;
82 unsigned short int __data_selector
;
83 unsigned short int __unused5
;
89 /* Raise the supported floating-point exceptions from EXCEPTS. Other
90 bits in EXCEPTS are ignored. Code originally borrowed from
91 libatomic/config/x86/fenv.c. */
94 local_feraiseexcept (int excepts
)
96 if (excepts
& _FPU_MASK_IM
)
100 volatile float r
__attribute__ ((unused
));
101 __asm__
__volatile__ ("%vdivss\t{%0, %d0|%d0, %0}" : "+x" (f
));
102 r
= f
; /* Needed to trigger exception. */
104 __asm__
__volatile__ ("fdiv\t{%y0, %0|%0, %y0}" : "+t" (f
));
105 /* No need for fwait, exception is triggered by emitted fstp. */
108 if (excepts
& _FPU_MASK_DM
)
111 __asm__
__volatile__ ("fnstenv\t%0" : "=m" (temp
));
112 temp
.__status_word
|= _FPU_MASK_DM
;
113 __asm__
__volatile__ ("fldenv\t%0" : : "m" (temp
));
114 __asm__
__volatile__ ("fwait");
116 if (excepts
& _FPU_MASK_ZM
)
118 float f
= 1.0f
, g
= 0.0f
;
120 volatile float r
__attribute__ ((unused
));
121 __asm__
__volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f
) : "xm" (g
));
122 r
= f
; /* Needed to trigger exception. */
124 __asm__
__volatile__ ("fdivs\t%1" : "+t" (f
) : "m" (g
));
125 /* No need for fwait, exception is triggered by emitted fstp. */
128 if (excepts
& _FPU_MASK_OM
)
131 __asm__
__volatile__ ("fnstenv\t%0" : "=m" (temp
));
132 temp
.__status_word
|= _FPU_MASK_OM
;
133 __asm__
__volatile__ ("fldenv\t%0" : : "m" (temp
));
134 __asm__
__volatile__ ("fwait");
136 if (excepts
& _FPU_MASK_UM
)
139 __asm__
__volatile__ ("fnstenv\t%0" : "=m" (temp
));
140 temp
.__status_word
|= _FPU_MASK_UM
;
141 __asm__
__volatile__ ("fldenv\t%0" : : "m" (temp
));
142 __asm__
__volatile__ ("fwait");
144 if (excepts
& _FPU_MASK_PM
)
146 float f
= 1.0f
, g
= 3.0f
;
148 volatile float r
__attribute__ ((unused
));
149 __asm__
__volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f
) : "xm" (g
));
150 r
= f
; /* Needed to trigger exception. */
152 __asm__
__volatile__ ("fdivs\t%1" : "+t" (f
) : "m" (g
));
153 /* No need for fwait, exception is triggered by emitted fstp. */
160 set_fpu_trap_exceptions (int trap
, int notrap
)
162 int exc_set
= 0, exc_clr
= 0;
165 if (trap
& GFC_FPE_INVALID
) exc_set
|= _FPU_MASK_IM
;
166 if (trap
& GFC_FPE_DENORMAL
) exc_set
|= _FPU_MASK_DM
;
167 if (trap
& GFC_FPE_ZERO
) exc_set
|= _FPU_MASK_ZM
;
168 if (trap
& GFC_FPE_OVERFLOW
) exc_set
|= _FPU_MASK_OM
;
169 if (trap
& GFC_FPE_UNDERFLOW
) exc_set
|= _FPU_MASK_UM
;
170 if (trap
& GFC_FPE_INEXACT
) exc_set
|= _FPU_MASK_PM
;
172 if (notrap
& GFC_FPE_INVALID
) exc_clr
|= _FPU_MASK_IM
;
173 if (notrap
& GFC_FPE_DENORMAL
) exc_clr
|= _FPU_MASK_DM
;
174 if (notrap
& GFC_FPE_ZERO
) exc_clr
|= _FPU_MASK_ZM
;
175 if (notrap
& GFC_FPE_OVERFLOW
) exc_clr
|= _FPU_MASK_OM
;
176 if (notrap
& GFC_FPE_UNDERFLOW
) exc_clr
|= _FPU_MASK_UM
;
177 if (notrap
& GFC_FPE_INEXACT
) exc_clr
|= _FPU_MASK_PM
;
179 __asm__
__volatile__ ("fstcw\t%0" : "=m" (cw
));
184 __asm__
__volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw
));
190 __asm__
__volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse
));
192 /* The SSE exception masks are shifted by 7 bits. */
193 cw_sse
|= (exc_clr
<< 7);
194 cw_sse
&= ~(exc_set
<< 7);
196 /* Clear stalled exception flags. */
197 cw_sse
&= ~_FPU_EX_ALL
;
199 __asm__
__volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse
));
206 set_fpu_trap_exceptions (options
.fpe
, 0);
210 get_fpu_trap_exceptions (void)
215 __asm__
__volatile__ ("fstcw\t%0" : "=m" (cw
));
222 __asm__
__volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse
));
224 /* The SSE exception masks are shifted by 7 bits. */
225 cw
= cw
| ((cw_sse
>> 7) & _FPU_MASK_ALL
);
228 if (~cw
& _FPU_MASK_IM
) res
|= GFC_FPE_INVALID
;
229 if (~cw
& _FPU_MASK_DM
) res
|= GFC_FPE_DENORMAL
;
230 if (~cw
& _FPU_MASK_ZM
) res
|= GFC_FPE_ZERO
;
231 if (~cw
& _FPU_MASK_OM
) res
|= GFC_FPE_OVERFLOW
;
232 if (~cw
& _FPU_MASK_UM
) res
|= GFC_FPE_UNDERFLOW
;
233 if (~cw
& _FPU_MASK_PM
) res
|= GFC_FPE_INEXACT
;
239 support_fpu_trap (int flag
__attribute__((unused
)))
245 get_fpu_except_flags (void)
251 __asm__
__volatile__ ("fnstsw\t%0" : "=am" (cw
));
258 __asm__
__volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse
));
262 excepts
&= _FPU_EX_ALL
;
264 if (excepts
& _FPU_MASK_IM
) result
|= GFC_FPE_INVALID
;
265 if (excepts
& _FPU_MASK_DM
) result
|= GFC_FPE_DENORMAL
;
266 if (excepts
& _FPU_MASK_ZM
) result
|= GFC_FPE_ZERO
;
267 if (excepts
& _FPU_MASK_OM
) result
|= GFC_FPE_OVERFLOW
;
268 if (excepts
& _FPU_MASK_UM
) result
|= GFC_FPE_UNDERFLOW
;
269 if (excepts
& _FPU_MASK_PM
) result
|= GFC_FPE_INEXACT
;
275 set_fpu_except_flags (int set
, int clear
)
278 int exc_set
= 0, exc_clr
= 0;
280 /* Translate from GFC_PE_* values to _FPU_MASK_* values. */
281 if (set
& GFC_FPE_INVALID
)
282 exc_set
|= _FPU_MASK_IM
;
283 if (clear
& GFC_FPE_INVALID
)
284 exc_clr
|= _FPU_MASK_IM
;
286 if (set
& GFC_FPE_DENORMAL
)
287 exc_set
|= _FPU_MASK_DM
;
288 if (clear
& GFC_FPE_DENORMAL
)
289 exc_clr
|= _FPU_MASK_DM
;
291 if (set
& GFC_FPE_ZERO
)
292 exc_set
|= _FPU_MASK_ZM
;
293 if (clear
& GFC_FPE_ZERO
)
294 exc_clr
|= _FPU_MASK_ZM
;
296 if (set
& GFC_FPE_OVERFLOW
)
297 exc_set
|= _FPU_MASK_OM
;
298 if (clear
& GFC_FPE_OVERFLOW
)
299 exc_clr
|= _FPU_MASK_OM
;
301 if (set
& GFC_FPE_UNDERFLOW
)
302 exc_set
|= _FPU_MASK_UM
;
303 if (clear
& GFC_FPE_UNDERFLOW
)
304 exc_clr
|= _FPU_MASK_UM
;
306 if (set
& GFC_FPE_INEXACT
)
307 exc_set
|= _FPU_MASK_PM
;
308 if (clear
& GFC_FPE_INEXACT
)
309 exc_clr
|= _FPU_MASK_PM
;
312 /* Change the flags. This is tricky on 387 (unlike SSE), because we have
313 FNSTSW but no FLDSW instruction. */
314 __asm__
__volatile__ ("fnstenv\t%0" : "=m" (temp
));
315 temp
.__status_word
&= ~exc_clr
;
316 __asm__
__volatile__ ("fldenv\t%0" : : "m" (temp
));
318 /* Change the flags on SSE. */
324 __asm__
__volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse
));
326 __asm__
__volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse
));
329 local_feraiseexcept (exc_set
);
333 support_fpu_flag (int flag
__attribute__((unused
)))
339 set_fpu_rounding_mode (int round
)
346 case GFC_FPE_TONEAREST
:
347 round_mode
= _FPU_RC_NEAREST
;
350 round_mode
= _FPU_RC_UP
;
352 case GFC_FPE_DOWNWARD
:
353 round_mode
= _FPU_RC_DOWN
;
355 case GFC_FPE_TOWARDZERO
:
356 round_mode
= _FPU_RC_ZERO
;
359 return; /* Should be unreachable. */
362 __asm__
__volatile__ ("fnstcw\t%0" : "=m" (cw
));
364 /* The x87 round control bits are shifted by 10 bits. */
365 cw
&= ~(_FPU_RC_MASK
<< 10);
366 cw
|= round_mode
<< 10;
368 __asm__
__volatile__ ("fldcw\t%0" : : "m" (cw
));
374 __asm__
__volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse
));
376 /* The SSE round control bits are shifted by 13 bits. */
377 cw_sse
&= ~(_FPU_RC_MASK
<< 13);
378 cw_sse
|= round_mode
<< 13;
380 __asm__
__volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse
));
385 get_fpu_rounding_mode (void)
392 __asm__
__volatile__ ("%vstmxcsr\t%0" : "=m" (cw
));
394 /* The SSE round control bits are shifted by 13 bits. */
395 round_mode
= cw
>> 13;
399 __asm__
__volatile__ ("fnstcw\t%0" : "=m" (cw
));
401 /* The x87 round control bits are shifted by 10 bits. */
402 round_mode
= cw
>> 10;
405 round_mode
&= _FPU_RC_MASK
;
409 case _FPU_RC_NEAREST
:
410 return GFC_FPE_TONEAREST
;
412 return GFC_FPE_UPWARD
;
414 return GFC_FPE_DOWNWARD
;
416 return GFC_FPE_TOWARDZERO
;
418 return GFC_FPE_INVALID
; /* Should be unreachable. */
423 support_fpu_rounding_mode (int mode
__attribute__((unused
)))
429 get_fpu_state (void *state
)
431 my_fenv_t
*envp
= state
;
433 /* Check we can actually store the FPU state in the allocated size. */
434 assert (sizeof(my_fenv_t
) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE
);
436 __asm__
__volatile__ ("fnstenv\t%0" : "=m" (*envp
));
438 /* fnstenv has the side effect of masking all exceptions, so we need
439 to restore the control word after that. */
440 __asm__
__volatile__ ("fldcw\t%0" : : "m" (envp
->__control_word
));
443 __asm__
__volatile__ ("%vstmxcsr\t%0" : "=m" (envp
->__mxcsr
));
447 set_fpu_state (void *state
)
449 my_fenv_t
*envp
= state
;
451 /* Check we can actually store the FPU state in the allocated size. */
452 assert (sizeof(my_fenv_t
) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE
);
454 /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more
455 complex than this, but I think it suffices in our case. */
456 __asm__
__volatile__ ("fldenv\t%0" : : "m" (*envp
));
459 __asm__
__volatile__ ("%vldmxcsr\t%0" : : "m" (envp
->__mxcsr
));