41b82bc809858ef170ca3760e3e3af4b2912cfc2
[gcc.git] / libgfortran / config / fpu-387.h
1 /* FPU-related code for x86 and x86_64 processors.
2 Copyright (C) 2005-2020 Free Software Foundation, Inc.
3 Contributed by Francois-Xavier Coudert <coudert@clipper.ens.fr>
4
5 This file is part of the GNU Fortran 95 runtime library (libgfortran).
6
7 Libgfortran is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public
9 License as published by the Free Software Foundation; either
10 version 3 of the License, or (at your option) any later version.
11
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 #ifndef __SSE_MATH__
27 #include "cpuid.h"
28 #endif
29
30 static int
31 has_sse (void)
32 {
33 #ifndef __SSE_MATH__
34 unsigned int eax, ebx, ecx, edx;
35
36 if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
37 return 0;
38
39 return edx & bit_SSE;
40 #else
41 return 1;
42 #endif
43 }
44
45 /* i387 exceptions -- see linux <fpu_control.h> header file for details. */
46 #define _FPU_MASK_IM 0x01
47 #define _FPU_MASK_DM 0x02
48 #define _FPU_MASK_ZM 0x04
49 #define _FPU_MASK_OM 0x08
50 #define _FPU_MASK_UM 0x10
51 #define _FPU_MASK_PM 0x20
52 #define _FPU_MASK_ALL 0x3f
53
54 #define _FPU_EX_ALL 0x3f
55
56 /* i387 rounding modes. */
57
58 #define _FPU_RC_NEAREST 0x0
59 #define _FPU_RC_DOWN 0x1
60 #define _FPU_RC_UP 0x2
61 #define _FPU_RC_ZERO 0x3
62
63 #define _FPU_RC_MASK 0x3
64
65 /* Enable flush to zero mode. */
66
67 #define MXCSR_FTZ (1 << 15)
68
69
70 /* This structure corresponds to the layout of the block
71 written by FSTENV. */
72 typedef struct
73 {
74 unsigned short int __control_word;
75 unsigned short int __unused1;
76 unsigned short int __status_word;
77 unsigned short int __unused2;
78 unsigned short int __tags;
79 unsigned short int __unused3;
80 unsigned int __eip;
81 unsigned short int __cs_selector;
82 unsigned short int __opcode;
83 unsigned int __data_offset;
84 unsigned short int __data_selector;
85 unsigned short int __unused5;
86 unsigned int __mxcsr;
87 }
88 my_fenv_t;
89
90 /* Check we can actually store the FPU state in the allocated size. */
91 _Static_assert (sizeof(my_fenv_t) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE,
92 "GFC_FPE_STATE_BUFFER_SIZE is too small");
93
94 #ifdef __SSE_MATH__
95 # define __math_force_eval(x) __asm__ __volatile__ ("" : : "x" (x));
96 #else
97 # define __math_force_eval(x) __asm__ __volatile__ ("" : : "f" (x));
98 #endif
99
100 /* Raise the supported floating-point exceptions from EXCEPTS. Other
101 bits in EXCEPTS are ignored. Code originally borrowed from
102 libatomic/config/x86/fenv.c. */
103
104 static void
105 local_feraiseexcept (int excepts)
106 {
107 if (excepts & _FPU_MASK_IM)
108 {
109 float f = 0.0f;
110 __math_force_eval (f / f);
111 }
112 if (excepts & _FPU_MASK_DM)
113 {
114 my_fenv_t temp;
115 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
116 temp.__status_word |= _FPU_MASK_DM;
117 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
118 __asm__ __volatile__ ("fwait");
119 }
120 if (excepts & _FPU_MASK_ZM)
121 {
122 float f = 1.0f, g = 0.0f;
123 __math_force_eval (f / g);
124 }
125 if (excepts & _FPU_MASK_OM)
126 {
127 my_fenv_t temp;
128 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
129 temp.__status_word |= _FPU_MASK_OM;
130 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
131 __asm__ __volatile__ ("fwait");
132 }
133 if (excepts & _FPU_MASK_UM)
134 {
135 my_fenv_t temp;
136 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
137 temp.__status_word |= _FPU_MASK_UM;
138 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
139 __asm__ __volatile__ ("fwait");
140 }
141 if (excepts & _FPU_MASK_PM)
142 {
143 float f = 1.0f, g = 3.0f;
144 #ifdef __SSE_MATH__
145 __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g));
146 #else
147 __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g));
148 /* No need for fwait, exception is triggered by emitted fstp. */
149 #endif
150 }
151 }
152
153
154 void
155 set_fpu_trap_exceptions (int trap, int notrap)
156 {
157 int exc_set = 0, exc_clr = 0;
158 unsigned short cw;
159
160 if (trap & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM;
161 if (trap & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM;
162 if (trap & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM;
163 if (trap & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM;
164 if (trap & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM;
165 if (trap & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM;
166
167 if (notrap & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM;
168 if (notrap & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM;
169 if (notrap & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM;
170 if (notrap & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM;
171 if (notrap & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM;
172 if (notrap & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM;
173
174 __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
175
176 cw |= exc_clr;
177 cw &= ~exc_set;
178
179 __asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw));
180
181 if (has_sse())
182 {
183 unsigned int cw_sse;
184
185 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
186
187 /* The SSE exception masks are shifted by 7 bits. */
188 cw_sse |= (exc_clr << 7);
189 cw_sse &= ~(exc_set << 7);
190
191 /* Clear stalled exception flags. */
192 cw_sse &= ~_FPU_EX_ALL;
193
194 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
195 }
196 }
197
198 void
199 set_fpu (void)
200 {
201 set_fpu_trap_exceptions (options.fpe, 0);
202 }
203
204 int
205 get_fpu_trap_exceptions (void)
206 {
207 unsigned short cw;
208 int mask;
209 int res = 0;
210
211 __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
212 mask = cw;
213
214 if (has_sse())
215 {
216 unsigned int cw_sse;
217
218 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
219
220 /* The SSE exception masks are shifted by 7 bits. */
221 mask |= (cw_sse >> 7);
222 }
223
224 mask = ~mask & _FPU_MASK_ALL;
225
226 if (mask & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
227 if (mask & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
228 if (mask & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
229 if (mask & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
230 if (mask & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
231 if (mask & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
232
233 return res;
234 }
235
236 int
237 support_fpu_trap (int flag __attribute__((unused)))
238 {
239 return 1;
240 }
241
242 int
243 get_fpu_except_flags (void)
244 {
245 unsigned short cw;
246 int excepts;
247 int res = 0;
248
249 __asm__ __volatile__ ("fnstsw\t%0" : "=am" (cw));
250 excepts = cw;
251
252 if (has_sse())
253 {
254 unsigned int cw_sse;
255
256 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
257 excepts |= cw_sse;
258 }
259
260 excepts &= _FPU_EX_ALL;
261
262 if (excepts & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
263 if (excepts & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
264 if (excepts & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
265 if (excepts & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
266 if (excepts & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
267 if (excepts & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
268
269 return res;
270 }
271
272 void
273 set_fpu_except_flags (int set, int clear)
274 {
275 my_fenv_t temp;
276 int exc_set = 0, exc_clr = 0;
277
278 /* Translate from GFC_PE_* values to _FPU_MASK_* values. */
279 if (set & GFC_FPE_INVALID)
280 exc_set |= _FPU_MASK_IM;
281 if (clear & GFC_FPE_INVALID)
282 exc_clr |= _FPU_MASK_IM;
283
284 if (set & GFC_FPE_DENORMAL)
285 exc_set |= _FPU_MASK_DM;
286 if (clear & GFC_FPE_DENORMAL)
287 exc_clr |= _FPU_MASK_DM;
288
289 if (set & GFC_FPE_ZERO)
290 exc_set |= _FPU_MASK_ZM;
291 if (clear & GFC_FPE_ZERO)
292 exc_clr |= _FPU_MASK_ZM;
293
294 if (set & GFC_FPE_OVERFLOW)
295 exc_set |= _FPU_MASK_OM;
296 if (clear & GFC_FPE_OVERFLOW)
297 exc_clr |= _FPU_MASK_OM;
298
299 if (set & GFC_FPE_UNDERFLOW)
300 exc_set |= _FPU_MASK_UM;
301 if (clear & GFC_FPE_UNDERFLOW)
302 exc_clr |= _FPU_MASK_UM;
303
304 if (set & GFC_FPE_INEXACT)
305 exc_set |= _FPU_MASK_PM;
306 if (clear & GFC_FPE_INEXACT)
307 exc_clr |= _FPU_MASK_PM;
308
309
310 /* Change the flags. This is tricky on 387 (unlike SSE), because we have
311 FNSTSW but no FLDSW instruction. */
312 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
313 temp.__status_word &= ~exc_clr;
314 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
315
316 /* Change the flags on SSE. */
317
318 if (has_sse())
319 {
320 unsigned int cw_sse;
321
322 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
323 cw_sse &= ~exc_clr;
324 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
325 }
326
327 local_feraiseexcept (exc_set);
328 }
329
330 int
331 support_fpu_flag (int flag __attribute__((unused)))
332 {
333 return 1;
334 }
335
336 void
337 set_fpu_rounding_mode (int round)
338 {
339 int round_mode;
340 unsigned short cw;
341
342 switch (round)
343 {
344 case GFC_FPE_TONEAREST:
345 round_mode = _FPU_RC_NEAREST;
346 break;
347 case GFC_FPE_UPWARD:
348 round_mode = _FPU_RC_UP;
349 break;
350 case GFC_FPE_DOWNWARD:
351 round_mode = _FPU_RC_DOWN;
352 break;
353 case GFC_FPE_TOWARDZERO:
354 round_mode = _FPU_RC_ZERO;
355 break;
356 default:
357 return; /* Should be unreachable. */
358 }
359
360 __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
361
362 /* The x87 round control bits are shifted by 10 bits. */
363 cw &= ~(_FPU_RC_MASK << 10);
364 cw |= round_mode << 10;
365
366 __asm__ __volatile__ ("fldcw\t%0" : : "m" (cw));
367
368 if (has_sse())
369 {
370 unsigned int cw_sse;
371
372 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
373
374 /* The SSE round control bits are shifted by 13 bits. */
375 cw_sse &= ~(_FPU_RC_MASK << 13);
376 cw_sse |= round_mode << 13;
377
378 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
379 }
380 }
381
382 int
383 get_fpu_rounding_mode (void)
384 {
385 int round_mode;
386
387 #ifdef __SSE_MATH__
388 unsigned int cw;
389
390 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw));
391
392 /* The SSE round control bits are shifted by 13 bits. */
393 round_mode = cw >> 13;
394 #else
395 unsigned short cw;
396
397 __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
398
399 /* The x87 round control bits are shifted by 10 bits. */
400 round_mode = cw >> 10;
401 #endif
402
403 round_mode &= _FPU_RC_MASK;
404
405 switch (round_mode)
406 {
407 case _FPU_RC_NEAREST:
408 return GFC_FPE_TONEAREST;
409 case _FPU_RC_UP:
410 return GFC_FPE_UPWARD;
411 case _FPU_RC_DOWN:
412 return GFC_FPE_DOWNWARD;
413 case _FPU_RC_ZERO:
414 return GFC_FPE_TOWARDZERO;
415 default:
416 return 0; /* Should be unreachable. */
417 }
418 }
419
420 int
421 support_fpu_rounding_mode (int mode __attribute__((unused)))
422 {
423 return 1;
424 }
425
426 void
427 get_fpu_state (void *state)
428 {
429 my_fenv_t *envp = state;
430
431 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (*envp));
432
433 /* fnstenv has the side effect of masking all exceptions, so we need
434 to restore the control word after that. */
435 __asm__ __volatile__ ("fldcw\t%0" : : "m" (envp->__control_word));
436
437 if (has_sse())
438 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (envp->__mxcsr));
439 }
440
441 void
442 set_fpu_state (void *state)
443 {
444 my_fenv_t *envp = state;
445
446 /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more
447 complex than this, but I think it suffices in our case. */
448 __asm__ __volatile__ ("fldenv\t%0" : : "m" (*envp));
449
450 if (has_sse())
451 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (envp->__mxcsr));
452 }
453
454
455 int
456 support_fpu_underflow_control (int kind)
457 {
458 if (!has_sse())
459 return 0;
460
461 return (kind == 4 || kind == 8) ? 1 : 0;
462 }
463
464
465 int
466 get_fpu_underflow_mode (void)
467 {
468 unsigned int cw_sse;
469
470 if (!has_sse())
471 return 1;
472
473 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
474
475 /* Return 0 for abrupt underflow (flush to zero), 1 for gradual underflow. */
476 return (cw_sse & MXCSR_FTZ) ? 0 : 1;
477 }
478
479
480 void
481 set_fpu_underflow_mode (int gradual)
482 {
483 unsigned int cw_sse;
484
485 if (!has_sse())
486 return;
487
488 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
489
490 if (gradual)
491 cw_sse &= ~MXCSR_FTZ;
492 else
493 cw_sse |= MXCSR_FTZ;
494
495 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
496 }
497