1 /* FPU-related code for x86 and x86_64 processors.
2 Copyright (C) 2005-2020 Free Software Foundation, Inc.
3 Contributed by Francois-Xavier Coudert <coudert@clipper.ens.fr>
5 This file is part of the GNU Fortran 95 runtime library (libgfortran).
7 Libgfortran is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public
9 License as published by the Free Software Foundation; either
10 version 3 of the License, or (at your option) any later version.
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
34 unsigned int eax
, ebx
, ecx
, edx
;
36 if (!__get_cpuid (1, &eax
, &ebx
, &ecx
, &edx
))
45 /* i387 exceptions -- see linux <fpu_control.h> header file for details. */
46 #define _FPU_MASK_IM 0x01
47 #define _FPU_MASK_DM 0x02
48 #define _FPU_MASK_ZM 0x04
49 #define _FPU_MASK_OM 0x08
50 #define _FPU_MASK_UM 0x10
51 #define _FPU_MASK_PM 0x20
52 #define _FPU_MASK_ALL 0x3f
54 #define _FPU_EX_ALL 0x3f
56 /* i387 rounding modes. */
58 #define _FPU_RC_NEAREST 0x0
59 #define _FPU_RC_DOWN 0x1
60 #define _FPU_RC_UP 0x2
61 #define _FPU_RC_ZERO 0x3
63 #define _FPU_RC_MASK 0x3
65 /* Enable flush to zero mode. */
67 #define MXCSR_FTZ (1 << 15)
70 /* This structure corresponds to the layout of the block
74 unsigned short int __control_word
;
75 unsigned short int __unused1
;
76 unsigned short int __status_word
;
77 unsigned short int __unused2
;
78 unsigned short int __tags
;
79 unsigned short int __unused3
;
81 unsigned short int __cs_selector
;
82 unsigned short int __opcode
;
83 unsigned int __data_offset
;
84 unsigned short int __data_selector
;
85 unsigned short int __unused5
;
90 /* Check we can actually store the FPU state in the allocated size. */
91 _Static_assert (sizeof(my_fenv_t
) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE
,
92 "GFC_FPE_STATE_BUFFER_SIZE is too small");
95 # define __math_force_eval(x) __asm__ __volatile__ ("" : : "x" (x));
97 # define __math_force_eval(x) __asm__ __volatile__ ("" : : "f" (x));
100 /* Raise the supported floating-point exceptions from EXCEPTS. Other
101 bits in EXCEPTS are ignored. Code originally borrowed from
102 libatomic/config/x86/fenv.c. */
105 local_feraiseexcept (int excepts
)
107 if (excepts
& _FPU_MASK_IM
)
110 __math_force_eval (f
/ f
);
112 if (excepts
& _FPU_MASK_DM
)
115 __asm__
__volatile__ ("fnstenv\t%0" : "=m" (temp
));
116 temp
.__status_word
|= _FPU_MASK_DM
;
117 __asm__
__volatile__ ("fldenv\t%0" : : "m" (temp
));
118 __asm__
__volatile__ ("fwait");
120 if (excepts
& _FPU_MASK_ZM
)
122 float f
= 1.0f
, g
= 0.0f
;
123 __math_force_eval (f
/ g
);
125 if (excepts
& _FPU_MASK_OM
)
128 __asm__
__volatile__ ("fnstenv\t%0" : "=m" (temp
));
129 temp
.__status_word
|= _FPU_MASK_OM
;
130 __asm__
__volatile__ ("fldenv\t%0" : : "m" (temp
));
131 __asm__
__volatile__ ("fwait");
133 if (excepts
& _FPU_MASK_UM
)
136 __asm__
__volatile__ ("fnstenv\t%0" : "=m" (temp
));
137 temp
.__status_word
|= _FPU_MASK_UM
;
138 __asm__
__volatile__ ("fldenv\t%0" : : "m" (temp
));
139 __asm__
__volatile__ ("fwait");
141 if (excepts
& _FPU_MASK_PM
)
143 float f
= 1.0f
, g
= 3.0f
;
145 __asm__
__volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f
) : "xm" (g
));
147 __asm__
__volatile__ ("fdivs\t%1" : "+t" (f
) : "m" (g
));
148 /* No need for fwait, exception is triggered by emitted fstp. */
155 set_fpu_trap_exceptions (int trap
, int notrap
)
157 int exc_set
= 0, exc_clr
= 0;
160 if (trap
& GFC_FPE_INVALID
) exc_set
|= _FPU_MASK_IM
;
161 if (trap
& GFC_FPE_DENORMAL
) exc_set
|= _FPU_MASK_DM
;
162 if (trap
& GFC_FPE_ZERO
) exc_set
|= _FPU_MASK_ZM
;
163 if (trap
& GFC_FPE_OVERFLOW
) exc_set
|= _FPU_MASK_OM
;
164 if (trap
& GFC_FPE_UNDERFLOW
) exc_set
|= _FPU_MASK_UM
;
165 if (trap
& GFC_FPE_INEXACT
) exc_set
|= _FPU_MASK_PM
;
167 if (notrap
& GFC_FPE_INVALID
) exc_clr
|= _FPU_MASK_IM
;
168 if (notrap
& GFC_FPE_DENORMAL
) exc_clr
|= _FPU_MASK_DM
;
169 if (notrap
& GFC_FPE_ZERO
) exc_clr
|= _FPU_MASK_ZM
;
170 if (notrap
& GFC_FPE_OVERFLOW
) exc_clr
|= _FPU_MASK_OM
;
171 if (notrap
& GFC_FPE_UNDERFLOW
) exc_clr
|= _FPU_MASK_UM
;
172 if (notrap
& GFC_FPE_INEXACT
) exc_clr
|= _FPU_MASK_PM
;
174 __asm__
__volatile__ ("fstcw\t%0" : "=m" (cw
));
179 __asm__
__volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw
));
185 __asm__
__volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse
));
187 /* The SSE exception masks are shifted by 7 bits. */
188 cw_sse
|= (exc_clr
<< 7);
189 cw_sse
&= ~(exc_set
<< 7);
191 /* Clear stalled exception flags. */
192 cw_sse
&= ~_FPU_EX_ALL
;
194 __asm__
__volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse
));
201 set_fpu_trap_exceptions (options
.fpe
, 0);
205 get_fpu_trap_exceptions (void)
211 __asm__
__volatile__ ("fstcw\t%0" : "=m" (cw
));
218 __asm__
__volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse
));
220 /* The SSE exception masks are shifted by 7 bits. */
221 mask
|= (cw_sse
>> 7);
224 mask
= ~mask
& _FPU_MASK_ALL
;
226 if (mask
& _FPU_MASK_IM
) res
|= GFC_FPE_INVALID
;
227 if (mask
& _FPU_MASK_DM
) res
|= GFC_FPE_DENORMAL
;
228 if (mask
& _FPU_MASK_ZM
) res
|= GFC_FPE_ZERO
;
229 if (mask
& _FPU_MASK_OM
) res
|= GFC_FPE_OVERFLOW
;
230 if (mask
& _FPU_MASK_UM
) res
|= GFC_FPE_UNDERFLOW
;
231 if (mask
& _FPU_MASK_PM
) res
|= GFC_FPE_INEXACT
;
237 support_fpu_trap (int flag
__attribute__((unused
)))
243 get_fpu_except_flags (void)
249 __asm__
__volatile__ ("fnstsw\t%0" : "=am" (cw
));
256 __asm__
__volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse
));
260 excepts
&= _FPU_EX_ALL
;
262 if (excepts
& _FPU_MASK_IM
) res
|= GFC_FPE_INVALID
;
263 if (excepts
& _FPU_MASK_DM
) res
|= GFC_FPE_DENORMAL
;
264 if (excepts
& _FPU_MASK_ZM
) res
|= GFC_FPE_ZERO
;
265 if (excepts
& _FPU_MASK_OM
) res
|= GFC_FPE_OVERFLOW
;
266 if (excepts
& _FPU_MASK_UM
) res
|= GFC_FPE_UNDERFLOW
;
267 if (excepts
& _FPU_MASK_PM
) res
|= GFC_FPE_INEXACT
;
273 set_fpu_except_flags (int set
, int clear
)
276 int exc_set
= 0, exc_clr
= 0;
278 /* Translate from GFC_PE_* values to _FPU_MASK_* values. */
279 if (set
& GFC_FPE_INVALID
)
280 exc_set
|= _FPU_MASK_IM
;
281 if (clear
& GFC_FPE_INVALID
)
282 exc_clr
|= _FPU_MASK_IM
;
284 if (set
& GFC_FPE_DENORMAL
)
285 exc_set
|= _FPU_MASK_DM
;
286 if (clear
& GFC_FPE_DENORMAL
)
287 exc_clr
|= _FPU_MASK_DM
;
289 if (set
& GFC_FPE_ZERO
)
290 exc_set
|= _FPU_MASK_ZM
;
291 if (clear
& GFC_FPE_ZERO
)
292 exc_clr
|= _FPU_MASK_ZM
;
294 if (set
& GFC_FPE_OVERFLOW
)
295 exc_set
|= _FPU_MASK_OM
;
296 if (clear
& GFC_FPE_OVERFLOW
)
297 exc_clr
|= _FPU_MASK_OM
;
299 if (set
& GFC_FPE_UNDERFLOW
)
300 exc_set
|= _FPU_MASK_UM
;
301 if (clear
& GFC_FPE_UNDERFLOW
)
302 exc_clr
|= _FPU_MASK_UM
;
304 if (set
& GFC_FPE_INEXACT
)
305 exc_set
|= _FPU_MASK_PM
;
306 if (clear
& GFC_FPE_INEXACT
)
307 exc_clr
|= _FPU_MASK_PM
;
310 /* Change the flags. This is tricky on 387 (unlike SSE), because we have
311 FNSTSW but no FLDSW instruction. */
312 __asm__
__volatile__ ("fnstenv\t%0" : "=m" (temp
));
313 temp
.__status_word
&= ~exc_clr
;
314 __asm__
__volatile__ ("fldenv\t%0" : : "m" (temp
));
316 /* Change the flags on SSE. */
322 __asm__
__volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse
));
324 __asm__
__volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse
));
327 local_feraiseexcept (exc_set
);
331 support_fpu_flag (int flag
__attribute__((unused
)))
337 set_fpu_rounding_mode (int round
)
344 case GFC_FPE_TONEAREST
:
345 round_mode
= _FPU_RC_NEAREST
;
348 round_mode
= _FPU_RC_UP
;
350 case GFC_FPE_DOWNWARD
:
351 round_mode
= _FPU_RC_DOWN
;
353 case GFC_FPE_TOWARDZERO
:
354 round_mode
= _FPU_RC_ZERO
;
357 return; /* Should be unreachable. */
360 __asm__
__volatile__ ("fnstcw\t%0" : "=m" (cw
));
362 /* The x87 round control bits are shifted by 10 bits. */
363 cw
&= ~(_FPU_RC_MASK
<< 10);
364 cw
|= round_mode
<< 10;
366 __asm__
__volatile__ ("fldcw\t%0" : : "m" (cw
));
372 __asm__
__volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse
));
374 /* The SSE round control bits are shifted by 13 bits. */
375 cw_sse
&= ~(_FPU_RC_MASK
<< 13);
376 cw_sse
|= round_mode
<< 13;
378 __asm__
__volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse
));
383 get_fpu_rounding_mode (void)
390 __asm__
__volatile__ ("%vstmxcsr\t%0" : "=m" (cw
));
392 /* The SSE round control bits are shifted by 13 bits. */
393 round_mode
= cw
>> 13;
397 __asm__
__volatile__ ("fnstcw\t%0" : "=m" (cw
));
399 /* The x87 round control bits are shifted by 10 bits. */
400 round_mode
= cw
>> 10;
403 round_mode
&= _FPU_RC_MASK
;
407 case _FPU_RC_NEAREST
:
408 return GFC_FPE_TONEAREST
;
410 return GFC_FPE_UPWARD
;
412 return GFC_FPE_DOWNWARD
;
414 return GFC_FPE_TOWARDZERO
;
416 return 0; /* Should be unreachable. */
421 support_fpu_rounding_mode (int mode
__attribute__((unused
)))
427 get_fpu_state (void *state
)
429 my_fenv_t
*envp
= state
;
431 __asm__
__volatile__ ("fnstenv\t%0" : "=m" (*envp
));
433 /* fnstenv has the side effect of masking all exceptions, so we need
434 to restore the control word after that. */
435 __asm__
__volatile__ ("fldcw\t%0" : : "m" (envp
->__control_word
));
438 __asm__
__volatile__ ("%vstmxcsr\t%0" : "=m" (envp
->__mxcsr
));
442 set_fpu_state (void *state
)
444 my_fenv_t
*envp
= state
;
446 /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more
447 complex than this, but I think it suffices in our case. */
448 __asm__
__volatile__ ("fldenv\t%0" : : "m" (*envp
));
451 __asm__
__volatile__ ("%vldmxcsr\t%0" : : "m" (envp
->__mxcsr
));
456 support_fpu_underflow_control (int kind
)
461 return (kind
== 4 || kind
== 8) ? 1 : 0;
466 get_fpu_underflow_mode (void)
473 __asm__
__volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse
));
475 /* Return 0 for abrupt underflow (flush to zero), 1 for gradual underflow. */
476 return (cw_sse
& MXCSR_FTZ
) ? 0 : 1;
481 set_fpu_underflow_mode (int gradual
)
488 __asm__
__volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse
));
491 cw_sse
&= ~MXCSR_FTZ
;
495 __asm__
__volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse
));