fpu-387.h, [...]: Use static assertions.
[gcc.git] / libgfortran / config / fpu-387.h
1 /* FPU-related code for x86 and x86_64 processors.
2 Copyright (C) 2005-2014 Free Software Foundation, Inc.
3 Contributed by Francois-Xavier Coudert <coudert@clipper.ens.fr>
4
5 This file is part of the GNU Fortran 95 runtime library (libgfortran).
6
7 Libgfortran is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public
9 License as published by the Free Software Foundation; either
10 version 3 of the License, or (at your option) any later version.
11
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 #ifndef __SSE_MATH__
27 #include "cpuid.h"
28 #endif
29
30 static int
31 has_sse (void)
32 {
33 #ifndef __SSE_MATH__
34 unsigned int eax, ebx, ecx, edx;
35
36 if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
37 return 0;
38
39 return edx & bit_SSE;
40 #else
41 return 1;
42 #endif
43 }
44
45 /* i387 exceptions -- see linux <fpu_control.h> header file for details. */
46 #define _FPU_MASK_IM 0x01
47 #define _FPU_MASK_DM 0x02
48 #define _FPU_MASK_ZM 0x04
49 #define _FPU_MASK_OM 0x08
50 #define _FPU_MASK_UM 0x10
51 #define _FPU_MASK_PM 0x20
52 #define _FPU_MASK_ALL 0x3f
53
54 #define _FPU_EX_ALL 0x3f
55
56 /* i387 rounding modes. */
57
58 #define _FPU_RC_NEAREST 0x0
59 #define _FPU_RC_DOWN 0x1
60 #define _FPU_RC_UP 0x2
61 #define _FPU_RC_ZERO 0x3
62
63 #define _FPU_RC_MASK 0x3
64
65 /* This structure corresponds to the layout of the block
66 written by FSTENV. */
67 typedef struct
68 {
69 unsigned short int __control_word;
70 unsigned short int __unused1;
71 unsigned short int __status_word;
72 unsigned short int __unused2;
73 unsigned short int __tags;
74 unsigned short int __unused3;
75 unsigned int __eip;
76 unsigned short int __cs_selector;
77 unsigned short int __opcode;
78 unsigned int __data_offset;
79 unsigned short int __data_selector;
80 unsigned short int __unused5;
81 unsigned int __mxcsr;
82 }
83 my_fenv_t;
84
85
86 /* Check we can actually store the FPU state in the allocated size. */
87 _Static_assert (sizeof(my_fenv_t) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE,
88 "GFC_FPE_STATE_BUFFER_SIZE is too small");
89
90
91 /* Raise the supported floating-point exceptions from EXCEPTS. Other
92 bits in EXCEPTS are ignored. Code originally borrowed from
93 libatomic/config/x86/fenv.c. */
94
95 static void
96 local_feraiseexcept (int excepts)
97 {
98 if (excepts & _FPU_MASK_IM)
99 {
100 float f = 0.0f;
101 #ifdef __SSE_MATH__
102 volatile float r __attribute__ ((unused));
103 __asm__ __volatile__ ("%vdivss\t{%0, %d0|%d0, %0}" : "+x" (f));
104 r = f; /* Needed to trigger exception. */
105 #else
106 __asm__ __volatile__ ("fdiv\t{%y0, %0|%0, %y0}" : "+t" (f));
107 /* No need for fwait, exception is triggered by emitted fstp. */
108 #endif
109 }
110 if (excepts & _FPU_MASK_DM)
111 {
112 my_fenv_t temp;
113 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
114 temp.__status_word |= _FPU_MASK_DM;
115 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
116 __asm__ __volatile__ ("fwait");
117 }
118 if (excepts & _FPU_MASK_ZM)
119 {
120 float f = 1.0f, g = 0.0f;
121 #ifdef __SSE_MATH__
122 volatile float r __attribute__ ((unused));
123 __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g));
124 r = f; /* Needed to trigger exception. */
125 #else
126 __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g));
127 /* No need for fwait, exception is triggered by emitted fstp. */
128 #endif
129 }
130 if (excepts & _FPU_MASK_OM)
131 {
132 my_fenv_t temp;
133 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
134 temp.__status_word |= _FPU_MASK_OM;
135 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
136 __asm__ __volatile__ ("fwait");
137 }
138 if (excepts & _FPU_MASK_UM)
139 {
140 my_fenv_t temp;
141 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
142 temp.__status_word |= _FPU_MASK_UM;
143 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
144 __asm__ __volatile__ ("fwait");
145 }
146 if (excepts & _FPU_MASK_PM)
147 {
148 float f = 1.0f, g = 3.0f;
149 #ifdef __SSE_MATH__
150 volatile float r __attribute__ ((unused));
151 __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g));
152 r = f; /* Needed to trigger exception. */
153 #else
154 __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g));
155 /* No need for fwait, exception is triggered by emitted fstp. */
156 #endif
157 }
158 }
159
160
161 void
162 set_fpu_trap_exceptions (int trap, int notrap)
163 {
164 int exc_set = 0, exc_clr = 0;
165 unsigned short cw;
166
167 if (trap & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM;
168 if (trap & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM;
169 if (trap & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM;
170 if (trap & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM;
171 if (trap & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM;
172 if (trap & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM;
173
174 if (notrap & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM;
175 if (notrap & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM;
176 if (notrap & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM;
177 if (notrap & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM;
178 if (notrap & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM;
179 if (notrap & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM;
180
181 __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
182
183 cw |= exc_clr;
184 cw &= ~exc_set;
185
186 __asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw));
187
188 if (has_sse())
189 {
190 unsigned int cw_sse;
191
192 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
193
194 /* The SSE exception masks are shifted by 7 bits. */
195 cw_sse |= (exc_clr << 7);
196 cw_sse &= ~(exc_set << 7);
197
198 /* Clear stalled exception flags. */
199 cw_sse &= ~_FPU_EX_ALL;
200
201 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
202 }
203 }
204
205 void
206 set_fpu (void)
207 {
208 set_fpu_trap_exceptions (options.fpe, 0);
209 }
210
211 int
212 get_fpu_trap_exceptions (void)
213 {
214 int res = 0;
215 unsigned short cw;
216
217 __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
218 cw &= _FPU_MASK_ALL;
219
220 if (has_sse())
221 {
222 unsigned int cw_sse;
223
224 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
225
226 /* The SSE exception masks are shifted by 7 bits. */
227 cw = cw | ((cw_sse >> 7) & _FPU_MASK_ALL);
228 }
229
230 if (~cw & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
231 if (~cw & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
232 if (~cw & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
233 if (~cw & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
234 if (~cw & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
235 if (~cw & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
236
237 return res;
238 }
239
240 int
241 support_fpu_trap (int flag __attribute__((unused)))
242 {
243 return 1;
244 }
245
246 int
247 get_fpu_except_flags (void)
248 {
249 unsigned short cw;
250 int excepts;
251 int result = 0;
252
253 __asm__ __volatile__ ("fnstsw\t%0" : "=am" (cw));
254 excepts = cw;
255
256 if (has_sse())
257 {
258 unsigned int cw_sse;
259
260 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
261 excepts |= cw_sse;
262 }
263
264 excepts &= _FPU_EX_ALL;
265
266 if (excepts & _FPU_MASK_IM) result |= GFC_FPE_INVALID;
267 if (excepts & _FPU_MASK_DM) result |= GFC_FPE_DENORMAL;
268 if (excepts & _FPU_MASK_ZM) result |= GFC_FPE_ZERO;
269 if (excepts & _FPU_MASK_OM) result |= GFC_FPE_OVERFLOW;
270 if (excepts & _FPU_MASK_UM) result |= GFC_FPE_UNDERFLOW;
271 if (excepts & _FPU_MASK_PM) result |= GFC_FPE_INEXACT;
272
273 return result;
274 }
275
276 void
277 set_fpu_except_flags (int set, int clear)
278 {
279 my_fenv_t temp;
280 int exc_set = 0, exc_clr = 0;
281
282 /* Translate from GFC_PE_* values to _FPU_MASK_* values. */
283 if (set & GFC_FPE_INVALID)
284 exc_set |= _FPU_MASK_IM;
285 if (clear & GFC_FPE_INVALID)
286 exc_clr |= _FPU_MASK_IM;
287
288 if (set & GFC_FPE_DENORMAL)
289 exc_set |= _FPU_MASK_DM;
290 if (clear & GFC_FPE_DENORMAL)
291 exc_clr |= _FPU_MASK_DM;
292
293 if (set & GFC_FPE_ZERO)
294 exc_set |= _FPU_MASK_ZM;
295 if (clear & GFC_FPE_ZERO)
296 exc_clr |= _FPU_MASK_ZM;
297
298 if (set & GFC_FPE_OVERFLOW)
299 exc_set |= _FPU_MASK_OM;
300 if (clear & GFC_FPE_OVERFLOW)
301 exc_clr |= _FPU_MASK_OM;
302
303 if (set & GFC_FPE_UNDERFLOW)
304 exc_set |= _FPU_MASK_UM;
305 if (clear & GFC_FPE_UNDERFLOW)
306 exc_clr |= _FPU_MASK_UM;
307
308 if (set & GFC_FPE_INEXACT)
309 exc_set |= _FPU_MASK_PM;
310 if (clear & GFC_FPE_INEXACT)
311 exc_clr |= _FPU_MASK_PM;
312
313
314 /* Change the flags. This is tricky on 387 (unlike SSE), because we have
315 FNSTSW but no FLDSW instruction. */
316 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
317 temp.__status_word &= ~exc_clr;
318 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
319
320 /* Change the flags on SSE. */
321
322 if (has_sse())
323 {
324 unsigned int cw_sse;
325
326 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
327 cw_sse &= ~exc_clr;
328 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
329 }
330
331 local_feraiseexcept (exc_set);
332 }
333
334 int
335 support_fpu_flag (int flag __attribute__((unused)))
336 {
337 return 1;
338 }
339
340 void
341 set_fpu_rounding_mode (int round)
342 {
343 int round_mode;
344 unsigned short cw;
345
346 switch (round)
347 {
348 case GFC_FPE_TONEAREST:
349 round_mode = _FPU_RC_NEAREST;
350 break;
351 case GFC_FPE_UPWARD:
352 round_mode = _FPU_RC_UP;
353 break;
354 case GFC_FPE_DOWNWARD:
355 round_mode = _FPU_RC_DOWN;
356 break;
357 case GFC_FPE_TOWARDZERO:
358 round_mode = _FPU_RC_ZERO;
359 break;
360 default:
361 return; /* Should be unreachable. */
362 }
363
364 __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
365
366 /* The x87 round control bits are shifted by 10 bits. */
367 cw &= ~(_FPU_RC_MASK << 10);
368 cw |= round_mode << 10;
369
370 __asm__ __volatile__ ("fldcw\t%0" : : "m" (cw));
371
372 if (has_sse())
373 {
374 unsigned int cw_sse;
375
376 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
377
378 /* The SSE round control bits are shifted by 13 bits. */
379 cw_sse &= ~(_FPU_RC_MASK << 13);
380 cw_sse |= round_mode << 13;
381
382 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
383 }
384 }
385
386 int
387 get_fpu_rounding_mode (void)
388 {
389 int round_mode;
390
391 #ifdef __SSE_MATH__
392 unsigned int cw;
393
394 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw));
395
396 /* The SSE round control bits are shifted by 13 bits. */
397 round_mode = cw >> 13;
398 #else
399 unsigned short cw;
400
401 __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
402
403 /* The x87 round control bits are shifted by 10 bits. */
404 round_mode = cw >> 10;
405 #endif
406
407 round_mode &= _FPU_RC_MASK;
408
409 switch (round_mode)
410 {
411 case _FPU_RC_NEAREST:
412 return GFC_FPE_TONEAREST;
413 case _FPU_RC_UP:
414 return GFC_FPE_UPWARD;
415 case _FPU_RC_DOWN:
416 return GFC_FPE_DOWNWARD;
417 case _FPU_RC_ZERO:
418 return GFC_FPE_TOWARDZERO;
419 default:
420 return GFC_FPE_INVALID; /* Should be unreachable. */
421 }
422 }
423
424 int
425 support_fpu_rounding_mode (int mode __attribute__((unused)))
426 {
427 return 1;
428 }
429
430 void
431 get_fpu_state (void *state)
432 {
433 my_fenv_t *envp = state;
434
435 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (*envp));
436
437 /* fnstenv has the side effect of masking all exceptions, so we need
438 to restore the control word after that. */
439 __asm__ __volatile__ ("fldcw\t%0" : : "m" (envp->__control_word));
440
441 if (has_sse())
442 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (envp->__mxcsr));
443 }
444
445 void
446 set_fpu_state (void *state)
447 {
448 my_fenv_t *envp = state;
449
450 /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more
451 complex than this, but I think it suffices in our case. */
452 __asm__ __volatile__ ("fldenv\t%0" : : "m" (*envp));
453
454 if (has_sse())
455 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (envp->__mxcsr));
456 }
457