1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
64 /* Processor costs (relative to an add) */
66 struct processor_costs size_cost
= { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
111 struct processor_costs i386_cost
= { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
155 struct processor_costs i486_cost
= { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
199 struct processor_costs pentium_cost
= {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
243 struct processor_costs pentiumpro_cost
= {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
287 struct processor_costs k6_cost
= {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
331 struct processor_costs athlon_cost
= {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
375 struct processor_costs k8_cost
= {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
419 struct processor_costs pentium4_cost
= {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
463 struct processor_costs nocona_cost
= {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs
*ix86_cost
= &pentium_cost
;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON_K8
;
521 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
522 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
523 const int x86_movx
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
/* m_386 | m_K6 */;
524 const int x86_double_with_add
= ~m_386
;
525 const int x86_use_bit_test
= m_386
;
526 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8
| m_K6
;
527 const int x86_cmove
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
528 const int x86_3dnow_a
= m_ATHLON_K8
;
529 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
530 const int x86_branch_hints
= m_PENT4
| m_NOCONA
;
531 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
| m_NOCONA
;
532 const int x86_partial_reg_stall
= m_PPRO
;
533 const int x86_use_loop
= m_K6
;
534 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON_K8
| m_PENT
);
535 const int x86_use_mov0
= m_K6
;
536 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
537 const int x86_read_modify_write
= ~m_PENT
;
538 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
539 const int x86_split_long_moves
= m_PPRO
;
540 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
| m_ATHLON_K8
;
541 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
542 const int x86_single_stringop
= m_386
| m_PENT4
| m_NOCONA
;
543 const int x86_qimode_math
= ~(0);
544 const int x86_promote_qi_regs
= 0;
545 const int x86_himode_math
= ~(m_PPRO
);
546 const int x86_promote_hi_regs
= m_PPRO
;
547 const int x86_sub_esp_4
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
;
548 const int x86_sub_esp_8
= m_ATHLON_K8
| m_PPRO
| m_386
| m_486
| m_PENT4
| m_NOCONA
;
549 const int x86_add_esp_4
= m_ATHLON_K8
| m_K6
| m_PENT4
| m_NOCONA
;
550 const int x86_add_esp_8
= m_ATHLON_K8
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
| m_NOCONA
;
551 const int x86_integer_DFmode_moves
= ~(m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
);
552 const int x86_partial_reg_dependency
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
553 const int x86_memory_mismatch_stall
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
554 const int x86_accumulate_outgoing_args
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
;
555 const int x86_prologue_using_move
= m_ATHLON_K8
| m_PPRO
;
556 const int x86_epilogue_using_move
= m_ATHLON_K8
| m_PPRO
;
557 const int x86_decompose_lea
= m_PENT4
| m_NOCONA
;
558 const int x86_shift1
= ~m_486
;
559 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
560 const int x86_sse_partial_reg_dependency
= m_PENT4
| m_NOCONA
| m_PPRO
;
561 /* Set for machines where the type and dependencies are resolved on SSE register
562 parts instead of whole registers, so we may maintain just lower part of
563 scalar values in proper format leaving the upper part undefined. */
564 const int x86_sse_partial_regs
= m_ATHLON_K8
;
565 /* Athlon optimizes partial-register FPS special case, thus avoiding the
566 need for extra instructions beforehand */
567 const int x86_sse_partial_regs_for_cvtsd2ss
= 0;
568 const int x86_sse_typeless_stores
= m_ATHLON_K8
;
569 const int x86_sse_load0_by_pxor
= m_PPRO
| m_PENT4
| m_NOCONA
;
570 const int x86_use_ffreep
= m_ATHLON_K8
;
571 const int x86_rep_movl_optimal
= m_386
| m_PENT
| m_PPRO
| m_K6
;
572 const int x86_inter_unit_moves
= ~(m_ATHLON_K8
);
573 const int x86_ext_80387_constants
= m_K6
| m_ATHLON
| m_PENT4
| m_NOCONA
| m_PPRO
;
574 /* Some CPU cores are not able to predict more than 4 branch instructions in
575 the 16 byte window. */
576 const int x86_four_jump_limit
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
578 /* In case the average insn count for single function invocation is
579 lower than this constant, emit fast (but longer) prologue and
581 #define FAST_PROLOGUE_INSN_COUNT 20
583 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
584 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
585 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
586 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
588 /* Array of the smallest class containing reg number REGNO, indexed by
589 REGNO. Used by REGNO_REG_CLASS in i386.h. */
591 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
594 AREG
, DREG
, CREG
, BREG
,
596 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
598 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
599 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
602 /* flags, fpsr, dirflag, frame */
603 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
604 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
606 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
608 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
609 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
610 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
614 /* The "default" register map used in 32bit mode. */
616 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
618 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
619 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
620 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
621 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
622 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
623 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
624 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
627 static int const x86_64_int_parameter_registers
[6] =
629 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
630 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
633 static int const x86_64_int_return_registers
[4] =
635 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
638 /* The "default" register map used in 64bit mode. */
639 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
641 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
642 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
643 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
644 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
645 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
646 8,9,10,11,12,13,14,15, /* extended integer registers */
647 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
650 /* Define the register numbers to be used in Dwarf debugging information.
651 The SVR4 reference port C compiler uses the following register numbers
652 in its Dwarf output code:
653 0 for %eax (gcc regno = 0)
654 1 for %ecx (gcc regno = 2)
655 2 for %edx (gcc regno = 1)
656 3 for %ebx (gcc regno = 3)
657 4 for %esp (gcc regno = 7)
658 5 for %ebp (gcc regno = 6)
659 6 for %esi (gcc regno = 4)
660 7 for %edi (gcc regno = 5)
661 The following three DWARF register numbers are never generated by
662 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
663 believes these numbers have these meanings.
664 8 for %eip (no gcc equivalent)
665 9 for %eflags (gcc regno = 17)
666 10 for %trapno (no gcc equivalent)
667 It is not at all clear how we should number the FP stack registers
668 for the x86 architecture. If the version of SDB on x86/svr4 were
669 a bit less brain dead with respect to floating-point then we would
670 have a precedent to follow with respect to DWARF register numbers
671 for x86 FP registers, but the SDB on x86/svr4 is so completely
672 broken with respect to FP registers that it is hardly worth thinking
673 of it as something to strive for compatibility with.
674 The version of x86/svr4 SDB I have at the moment does (partially)
675 seem to believe that DWARF register number 11 is associated with
676 the x86 register %st(0), but that's about all. Higher DWARF
677 register numbers don't seem to be associated with anything in
678 particular, and even for DWARF regno 11, SDB only seems to under-
679 stand that it should say that a variable lives in %st(0) (when
680 asked via an `=' command) if we said it was in DWARF regno 11,
681 but SDB still prints garbage when asked for the value of the
682 variable in question (via a `/' command).
683 (Also note that the labels SDB prints for various FP stack regs
684 when doing an `x' command are all wrong.)
685 Note that these problems generally don't affect the native SVR4
686 C compiler because it doesn't allow the use of -O with -g and
687 because when it is *not* optimizing, it allocates a memory
688 location for each floating-point variable, and the memory
689 location is what gets described in the DWARF AT_location
690 attribute for the variable in question.
691 Regardless of the severe mental illness of the x86/svr4 SDB, we
692 do something sensible here and we use the following DWARF
693 register numbers. Note that these are all stack-top-relative
695 11 for %st(0) (gcc regno = 8)
696 12 for %st(1) (gcc regno = 9)
697 13 for %st(2) (gcc regno = 10)
698 14 for %st(3) (gcc regno = 11)
699 15 for %st(4) (gcc regno = 12)
700 16 for %st(5) (gcc regno = 13)
701 17 for %st(6) (gcc regno = 14)
702 18 for %st(7) (gcc regno = 15)
704 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
706 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
707 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
708 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
709 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
710 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
711 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
712 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
715 /* Test and compare insns in i386.md store the information needed to
716 generate branch and scc insns here. */
718 rtx ix86_compare_op0
= NULL_RTX
;
719 rtx ix86_compare_op1
= NULL_RTX
;
721 #define MAX_386_STACK_LOCALS 3
722 /* Size of the register save area. */
723 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
725 /* Define the structure for the machine field in struct function. */
727 struct stack_local_entry
GTY(())
732 struct stack_local_entry
*next
;
735 /* Structure describing stack frame layout.
736 Stack grows downward:
742 saved frame pointer if frame_pointer_needed
743 <- HARD_FRAME_POINTER
749 > to_allocate <- FRAME_POINTER
761 int outgoing_arguments_size
;
764 HOST_WIDE_INT to_allocate
;
765 /* The offsets relative to ARG_POINTER. */
766 HOST_WIDE_INT frame_pointer_offset
;
767 HOST_WIDE_INT hard_frame_pointer_offset
;
768 HOST_WIDE_INT stack_pointer_offset
;
770 /* When save_regs_using_mov is set, emit prologue using
771 move instead of push instructions. */
772 bool save_regs_using_mov
;
775 /* Used to enable/disable debugging features. */
776 const char *ix86_debug_arg_string
, *ix86_debug_addr_string
;
777 /* Code model option as passed by user. */
778 const char *ix86_cmodel_string
;
780 enum cmodel ix86_cmodel
;
782 const char *ix86_asm_string
;
783 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
785 const char *ix86_tls_dialect_string
;
786 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
788 /* Which unit we are generating floating point math for. */
789 enum fpmath_unit ix86_fpmath
;
791 /* Which cpu are we scheduling for. */
792 enum processor_type ix86_tune
;
793 /* Which instruction set architecture to use. */
794 enum processor_type ix86_arch
;
796 /* Strings to hold which cpu and instruction set architecture to use. */
797 const char *ix86_tune_string
; /* for -mtune=<xxx> */
798 const char *ix86_arch_string
; /* for -march=<xxx> */
799 const char *ix86_fpmath_string
; /* for -mfpmath=<xxx> */
801 /* # of registers to use to pass arguments. */
802 const char *ix86_regparm_string
;
804 /* true if sse prefetch instruction is not NOOP. */
805 int x86_prefetch_sse
;
807 /* ix86_regparm_string as a number */
810 /* Alignment to use for loops and jumps: */
812 /* Power of two alignment for loops. */
813 const char *ix86_align_loops_string
;
815 /* Power of two alignment for non-loop jumps. */
816 const char *ix86_align_jumps_string
;
818 /* Power of two alignment for stack boundary in bytes. */
819 const char *ix86_preferred_stack_boundary_string
;
821 /* Preferred alignment for stack boundary in bits. */
822 int ix86_preferred_stack_boundary
;
824 /* Values 1-5: see jump.c */
825 int ix86_branch_cost
;
826 const char *ix86_branch_cost_string
;
828 /* Power of two alignment for functions. */
829 const char *ix86_align_funcs_string
;
831 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
832 static char internal_label_prefix
[16];
833 static int internal_label_prefix_len
;
835 static int local_symbolic_operand (rtx
, enum machine_mode
);
836 static int tls_symbolic_operand_1 (rtx
, enum tls_model
);
837 static void output_pic_addr_const (FILE *, rtx
, int);
838 static void put_condition_code (enum rtx_code
, enum machine_mode
,
840 static const char *get_some_local_dynamic_name (void);
841 static int get_some_local_dynamic_name_1 (rtx
*, void *);
842 static rtx
maybe_get_pool_constant (rtx
);
843 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
844 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
846 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
847 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
849 static rtx
get_thread_pointer (int);
850 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
851 static void get_pc_thunk_name (char [32], unsigned int);
852 static rtx
gen_push (rtx
);
853 static int memory_address_length (rtx addr
);
854 static int ix86_flags_dependant (rtx
, rtx
, enum attr_type
);
855 static int ix86_agi_dependant (rtx
, rtx
, enum attr_type
);
856 static struct machine_function
* ix86_init_machine_status (void);
857 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
858 static int ix86_nsaved_regs (void);
859 static void ix86_emit_save_regs (void);
860 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
861 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
862 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
863 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
864 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
865 static rtx
ix86_expand_aligntest (rtx
, int);
866 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
867 static int ix86_issue_rate (void);
868 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
869 static int ia32_multipass_dfa_lookahead (void);
870 static void ix86_init_mmx_sse_builtins (void);
871 static rtx
x86_this_parameter (tree
);
872 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
873 HOST_WIDE_INT
, tree
);
874 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
875 static void x86_file_start (void);
876 static void ix86_reorg (void);
877 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
878 static tree
ix86_build_builtin_va_list (void);
879 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
881 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
885 rtx base
, index
, disp
;
887 enum ix86_address_seg
{ SEG_DEFAULT
, SEG_FS
, SEG_GS
} seg
;
890 static int ix86_decompose_address (rtx
, struct ix86_address
*);
891 static int ix86_address_cost (rtx
);
892 static bool ix86_cannot_force_const_mem (rtx
);
893 static rtx
ix86_delegitimize_address (rtx
);
895 struct builtin_description
;
896 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
898 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
900 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
901 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
902 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
903 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
904 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
905 static enum rtx_code
ix86_fp_compare_code_to_integer (enum rtx_code
);
906 static void ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*,
907 enum rtx_code
*, enum rtx_code
*);
908 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
909 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
910 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
911 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
912 static int ix86_fp_comparison_cost (enum rtx_code code
);
913 static unsigned int ix86_select_alt_pic_regnum (void);
914 static int ix86_save_reg (unsigned int, int);
915 static void ix86_compute_frame_layout (struct ix86_frame
*);
916 static int ix86_comp_type_attributes (tree
, tree
);
917 static int ix86_function_regparm (tree
, tree
);
918 const struct attribute_spec ix86_attribute_table
[];
919 static bool ix86_function_ok_for_sibcall (tree
, tree
);
920 static tree
ix86_handle_cdecl_attribute (tree
*, tree
, tree
, int, bool *);
921 static tree
ix86_handle_regparm_attribute (tree
*, tree
, tree
, int, bool *);
922 static int ix86_value_regno (enum machine_mode
);
923 static bool contains_128bit_aligned_vector_p (tree
);
924 static rtx
ix86_struct_value_rtx (tree
, int);
925 static bool ix86_ms_bitfield_layout_p (tree
);
926 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
927 static int extended_reg_mentioned_1 (rtx
*, void *);
928 static bool ix86_rtx_costs (rtx
, int, int, int *);
929 static int min_insn_size (rtx
);
930 static tree
ix86_md_asm_clobbers (tree clobbers
);
932 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
933 static void ix86_svr3_asm_out_constructor (rtx
, int);
936 /* Register class used for passing given 64bit part of the argument.
937 These represent classes as documented by the PS ABI, with the exception
938 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
939 use SF or DFmode move instead of DImode to avoid reformatting penalties.
941 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
942 whenever possible (upper half does contain padding).
944 enum x86_64_reg_class
947 X86_64_INTEGER_CLASS
,
948 X86_64_INTEGERSI_CLASS
,
957 static const char * const x86_64_reg_class_name
[] =
958 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
960 #define MAX_CLASSES 4
961 static int classify_argument (enum machine_mode
, tree
,
962 enum x86_64_reg_class
[MAX_CLASSES
], int);
963 static int examine_argument (enum machine_mode
, tree
, int, int *, int *);
964 static rtx
construct_container (enum machine_mode
, tree
, int, int, int,
966 static enum x86_64_reg_class
merge_classes (enum x86_64_reg_class
,
967 enum x86_64_reg_class
);
969 /* Table of constants used by fldpi, fldln2, etc.... */
970 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
971 static bool ext_80387_constants_init
= 0;
972 static void init_ext_80387_constants (void);
974 /* Initialize the GCC target structure. */
975 #undef TARGET_ATTRIBUTE_TABLE
976 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
977 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
978 # undef TARGET_MERGE_DECL_ATTRIBUTES
979 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
982 #undef TARGET_COMP_TYPE_ATTRIBUTES
983 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
985 #undef TARGET_INIT_BUILTINS
986 #define TARGET_INIT_BUILTINS ix86_init_builtins
988 #undef TARGET_EXPAND_BUILTIN
989 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
991 #undef TARGET_ASM_FUNCTION_EPILOGUE
992 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
994 #undef TARGET_ASM_OPEN_PAREN
995 #define TARGET_ASM_OPEN_PAREN ""
996 #undef TARGET_ASM_CLOSE_PAREN
997 #define TARGET_ASM_CLOSE_PAREN ""
999 #undef TARGET_ASM_ALIGNED_HI_OP
1000 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1001 #undef TARGET_ASM_ALIGNED_SI_OP
1002 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1004 #undef TARGET_ASM_ALIGNED_DI_OP
1005 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1008 #undef TARGET_ASM_UNALIGNED_HI_OP
1009 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1010 #undef TARGET_ASM_UNALIGNED_SI_OP
1011 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1012 #undef TARGET_ASM_UNALIGNED_DI_OP
1013 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1015 #undef TARGET_SCHED_ADJUST_COST
1016 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1017 #undef TARGET_SCHED_ISSUE_RATE
1018 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1019 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
1020 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE hook_int_void_1
1021 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1022 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1023 ia32_multipass_dfa_lookahead
1025 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1026 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1029 #undef TARGET_HAVE_TLS
1030 #define TARGET_HAVE_TLS true
1032 #undef TARGET_CANNOT_FORCE_CONST_MEM
1033 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1035 #undef TARGET_DELEGITIMIZE_ADDRESS
1036 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1038 #undef TARGET_MS_BITFIELD_LAYOUT_P
1039 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1041 #undef TARGET_ASM_OUTPUT_MI_THUNK
1042 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1043 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1044 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1046 #undef TARGET_ASM_FILE_START
1047 #define TARGET_ASM_FILE_START x86_file_start
1049 #undef TARGET_RTX_COSTS
1050 #define TARGET_RTX_COSTS ix86_rtx_costs
1051 #undef TARGET_ADDRESS_COST
1052 #define TARGET_ADDRESS_COST ix86_address_cost
1054 #undef TARGET_FIXED_CONDITION_CODE_REGS
1055 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1056 #undef TARGET_CC_MODES_COMPATIBLE
1057 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1059 #undef TARGET_MACHINE_DEPENDENT_REORG
1060 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1062 #undef TARGET_BUILD_BUILTIN_VA_LIST
1063 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1065 #undef TARGET_MD_ASM_CLOBBERS
1066 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1068 #undef TARGET_PROMOTE_PROTOTYPES
1069 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1070 #undef TARGET_STRUCT_VALUE_RTX
1071 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1072 #undef TARGET_SETUP_INCOMING_VARARGS
1073 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1075 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1076 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1078 struct gcc_target targetm
= TARGET_INITIALIZER
;
1081 /* The svr4 ABI for the i386 says that records and unions are returned
1083 #ifndef DEFAULT_PCC_STRUCT_RETURN
1084 #define DEFAULT_PCC_STRUCT_RETURN 1
1087 /* Sometimes certain combinations of command options do not make
1088 sense on a particular target machine. You can define a macro
1089 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1090 defined, is executed once just after all the command options have
1093 Don't use this macro to turn on various extra optimizations for
1094 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1097 override_options (void)
1100 /* Comes from final.c -- no real reason to change it. */
1101 #define MAX_CODE_ALIGN 16
1105 const struct processor_costs
*cost
; /* Processor costs */
1106 const int target_enable
; /* Target flags to enable. */
1107 const int target_disable
; /* Target flags to disable. */
1108 const int align_loop
; /* Default alignments. */
1109 const int align_loop_max_skip
;
1110 const int align_jump
;
1111 const int align_jump_max_skip
;
1112 const int align_func
;
1114 const processor_target_table
[PROCESSOR_max
] =
1116 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1117 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1118 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1119 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1120 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1121 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1122 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1123 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1124 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0}
1127 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1130 const char *const name
; /* processor name or nickname. */
1131 const enum processor_type processor
;
1132 const enum pta_flags
1138 PTA_PREFETCH_SSE
= 16,
1144 const processor_alias_table
[] =
1146 {"i386", PROCESSOR_I386
, 0},
1147 {"i486", PROCESSOR_I486
, 0},
1148 {"i586", PROCESSOR_PENTIUM
, 0},
1149 {"pentium", PROCESSOR_PENTIUM
, 0},
1150 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1151 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1152 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1153 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1154 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1155 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1156 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1157 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1158 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1159 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1160 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1161 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1162 | PTA_MMX
| PTA_PREFETCH_SSE
},
1163 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1164 | PTA_MMX
| PTA_PREFETCH_SSE
},
1165 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1166 | PTA_MMX
| PTA_PREFETCH_SSE
},
1167 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1168 | PTA_MMX
| PTA_PREFETCH_SSE
},
1169 {"k6", PROCESSOR_K6
, PTA_MMX
},
1170 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1171 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1172 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1174 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1175 | PTA_3DNOW
| PTA_3DNOW_A
},
1176 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1177 | PTA_3DNOW_A
| PTA_SSE
},
1178 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1179 | PTA_3DNOW_A
| PTA_SSE
},
1180 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1181 | PTA_3DNOW_A
| PTA_SSE
},
1182 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1183 | PTA_SSE
| PTA_SSE2
},
1184 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1185 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1186 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1187 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1188 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1189 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1190 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1191 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1194 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1196 /* Set the default values for switches whose default depends on TARGET_64BIT
1197 in case they weren't overwritten by command line options. */
1200 if (flag_omit_frame_pointer
== 2)
1201 flag_omit_frame_pointer
= 1;
1202 if (flag_asynchronous_unwind_tables
== 2)
1203 flag_asynchronous_unwind_tables
= 1;
1204 if (flag_pcc_struct_return
== 2)
1205 flag_pcc_struct_return
= 0;
1209 if (flag_omit_frame_pointer
== 2)
1210 flag_omit_frame_pointer
= 0;
1211 if (flag_asynchronous_unwind_tables
== 2)
1212 flag_asynchronous_unwind_tables
= 0;
1213 if (flag_pcc_struct_return
== 2)
1214 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1217 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1218 SUBTARGET_OVERRIDE_OPTIONS
;
1221 if (!ix86_tune_string
&& ix86_arch_string
)
1222 ix86_tune_string
= ix86_arch_string
;
1223 if (!ix86_tune_string
)
1224 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1225 if (!ix86_arch_string
)
1226 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
1228 if (ix86_cmodel_string
!= 0)
1230 if (!strcmp (ix86_cmodel_string
, "small"))
1231 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1233 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
1234 else if (!strcmp (ix86_cmodel_string
, "32"))
1235 ix86_cmodel
= CM_32
;
1236 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
1237 ix86_cmodel
= CM_KERNEL
;
1238 else if (!strcmp (ix86_cmodel_string
, "medium") && !flag_pic
)
1239 ix86_cmodel
= CM_MEDIUM
;
1240 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
1241 ix86_cmodel
= CM_LARGE
;
1243 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
1247 ix86_cmodel
= CM_32
;
1249 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1251 if (ix86_asm_string
!= 0)
1253 if (!strcmp (ix86_asm_string
, "intel"))
1254 ix86_asm_dialect
= ASM_INTEL
;
1255 else if (!strcmp (ix86_asm_string
, "att"))
1256 ix86_asm_dialect
= ASM_ATT
;
1258 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
1260 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
1261 error ("code model `%s' not supported in the %s bit mode",
1262 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
1263 if (ix86_cmodel
== CM_LARGE
)
1264 sorry ("code model `large' not supported yet");
1265 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
1266 sorry ("%i-bit mode not compiled in",
1267 (target_flags
& MASK_64BIT
) ? 64 : 32);
1269 for (i
= 0; i
< pta_size
; i
++)
1270 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
1272 ix86_arch
= processor_alias_table
[i
].processor
;
1273 /* Default cpu tuning to the architecture. */
1274 ix86_tune
= ix86_arch
;
1275 if (processor_alias_table
[i
].flags
& PTA_MMX
1276 && !(target_flags_explicit
& MASK_MMX
))
1277 target_flags
|= MASK_MMX
;
1278 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1279 && !(target_flags_explicit
& MASK_3DNOW
))
1280 target_flags
|= MASK_3DNOW
;
1281 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1282 && !(target_flags_explicit
& MASK_3DNOW_A
))
1283 target_flags
|= MASK_3DNOW_A
;
1284 if (processor_alias_table
[i
].flags
& PTA_SSE
1285 && !(target_flags_explicit
& MASK_SSE
))
1286 target_flags
|= MASK_SSE
;
1287 if (processor_alias_table
[i
].flags
& PTA_SSE2
1288 && !(target_flags_explicit
& MASK_SSE2
))
1289 target_flags
|= MASK_SSE2
;
1290 if (processor_alias_table
[i
].flags
& PTA_SSE3
1291 && !(target_flags_explicit
& MASK_SSE3
))
1292 target_flags
|= MASK_SSE3
;
1293 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1294 x86_prefetch_sse
= true;
1295 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1296 error ("CPU you selected does not support x86-64 instruction set");
1301 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1303 for (i
= 0; i
< pta_size
; i
++)
1304 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
1306 ix86_tune
= processor_alias_table
[i
].processor
;
1307 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1308 error ("CPU you selected does not support x86-64 instruction set");
1312 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1315 ix86_cost
= &size_cost
;
1317 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
1318 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
1319 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
1321 /* Arrange to set up i386_stack_locals for all functions. */
1322 init_machine_status
= ix86_init_machine_status
;
1324 /* Validate -mregparm= value. */
1325 if (ix86_regparm_string
)
1327 i
= atoi (ix86_regparm_string
);
1328 if (i
< 0 || i
> REGPARM_MAX
)
1329 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1335 ix86_regparm
= REGPARM_MAX
;
1337 /* If the user has provided any of the -malign-* options,
1338 warn and use that value only if -falign-* is not set.
1339 Remove this code in GCC 3.2 or later. */
1340 if (ix86_align_loops_string
)
1342 warning ("-malign-loops is obsolete, use -falign-loops");
1343 if (align_loops
== 0)
1345 i
= atoi (ix86_align_loops_string
);
1346 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1347 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1349 align_loops
= 1 << i
;
1353 if (ix86_align_jumps_string
)
1355 warning ("-malign-jumps is obsolete, use -falign-jumps");
1356 if (align_jumps
== 0)
1358 i
= atoi (ix86_align_jumps_string
);
1359 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1360 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1362 align_jumps
= 1 << i
;
1366 if (ix86_align_funcs_string
)
1368 warning ("-malign-functions is obsolete, use -falign-functions");
1369 if (align_functions
== 0)
1371 i
= atoi (ix86_align_funcs_string
);
1372 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1373 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1375 align_functions
= 1 << i
;
1379 /* Default align_* from the processor table. */
1380 if (align_loops
== 0)
1382 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
1383 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
1385 if (align_jumps
== 0)
1387 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
1388 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
1390 if (align_functions
== 0)
1392 align_functions
= processor_target_table
[ix86_tune
].align_func
;
1395 /* Validate -mpreferred-stack-boundary= value, or provide default.
1396 The default of 128 bits is for Pentium III's SSE __m128, but we
1397 don't want additional code to keep the stack aligned when
1398 optimizing for code size. */
1399 ix86_preferred_stack_boundary
= (optimize_size
1400 ? TARGET_64BIT
? 128 : 32
1402 if (ix86_preferred_stack_boundary_string
)
1404 i
= atoi (ix86_preferred_stack_boundary_string
);
1405 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
1406 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1407 TARGET_64BIT
? 4 : 2);
1409 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1412 /* Validate -mbranch-cost= value, or provide default. */
1413 ix86_branch_cost
= processor_target_table
[ix86_tune
].cost
->branch_cost
;
1414 if (ix86_branch_cost_string
)
1416 i
= atoi (ix86_branch_cost_string
);
1418 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1420 ix86_branch_cost
= i
;
1423 if (ix86_tls_dialect_string
)
1425 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
1426 ix86_tls_dialect
= TLS_DIALECT_GNU
;
1427 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
1428 ix86_tls_dialect
= TLS_DIALECT_SUN
;
1430 error ("bad value (%s) for -mtls-dialect= switch",
1431 ix86_tls_dialect_string
);
1434 /* Keep nonleaf frame pointers. */
1435 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1436 flag_omit_frame_pointer
= 1;
1438 /* If we're doing fast math, we don't care about comparison order
1439 wrt NaNs. This lets us use a shorter comparison sequence. */
1440 if (flag_unsafe_math_optimizations
)
1441 target_flags
&= ~MASK_IEEE_FP
;
1443 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1444 since the insns won't need emulation. */
1445 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1446 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1448 /* Turn on SSE2 builtins for -msse3. */
1450 target_flags
|= MASK_SSE2
;
1452 /* Turn on SSE builtins for -msse2. */
1454 target_flags
|= MASK_SSE
;
1458 if (TARGET_ALIGN_DOUBLE
)
1459 error ("-malign-double makes no sense in the 64bit mode");
1461 error ("-mrtd calling convention not supported in the 64bit mode");
1462 /* Enable by default the SSE and MMX builtins. */
1463 target_flags
|= (MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
);
1464 ix86_fpmath
= FPMATH_SSE
;
1468 ix86_fpmath
= FPMATH_387
;
1469 /* i386 ABI does not specify red zone. It still makes sense to use it
1470 when programmer takes care to stack from being destroyed. */
1471 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
1472 target_flags
|= MASK_NO_RED_ZONE
;
1475 if (ix86_fpmath_string
!= 0)
1477 if (! strcmp (ix86_fpmath_string
, "387"))
1478 ix86_fpmath
= FPMATH_387
;
1479 else if (! strcmp (ix86_fpmath_string
, "sse"))
1483 warning ("SSE instruction set disabled, using 387 arithmetics");
1484 ix86_fpmath
= FPMATH_387
;
1487 ix86_fpmath
= FPMATH_SSE
;
1489 else if (! strcmp (ix86_fpmath_string
, "387,sse")
1490 || ! strcmp (ix86_fpmath_string
, "sse,387"))
1494 warning ("SSE instruction set disabled, using 387 arithmetics");
1495 ix86_fpmath
= FPMATH_387
;
1497 else if (!TARGET_80387
)
1499 warning ("387 instruction set disabled, using SSE arithmetics");
1500 ix86_fpmath
= FPMATH_SSE
;
1503 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
1506 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
1509 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1513 target_flags
|= MASK_MMX
;
1514 x86_prefetch_sse
= true;
1517 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1520 target_flags
|= MASK_MMX
;
1521 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1522 extensions it adds. */
1523 if (x86_3dnow_a
& (1 << ix86_arch
))
1524 target_flags
|= MASK_3DNOW_A
;
1526 if ((x86_accumulate_outgoing_args
& TUNEMASK
)
1527 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
1529 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1531 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1534 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
1535 p
= strchr (internal_label_prefix
, 'X');
1536 internal_label_prefix_len
= p
- internal_label_prefix
;
1542 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
1544 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1545 make the problem with not enough registers even worse. */
1546 #ifdef INSN_SCHEDULING
1548 flag_schedule_insns
= 0;
1551 /* The default values of these switches depend on the TARGET_64BIT
1552 that is not known at this moment. Mark these values with 2 and
1553 let user the to override these. In case there is no command line option
1554 specifying them, we will set the defaults in override_options. */
1556 flag_omit_frame_pointer
= 2;
1557 flag_pcc_struct_return
= 2;
1558 flag_asynchronous_unwind_tables
= 2;
1561 /* Table of valid machine attributes. */
1562 const struct attribute_spec ix86_attribute_table
[] =
1564 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1565 /* Stdcall attribute says callee is responsible for popping arguments
1566 if they are not variable. */
1567 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1568 /* Fastcall attribute says callee is responsible for popping arguments
1569 if they are not variable. */
1570 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1571 /* Cdecl attribute says the callee is a normal C declaration */
1572 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1573 /* Regparm attribute specifies how many integer arguments are to be
1574 passed in registers. */
1575 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute
},
1576 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1577 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1578 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1579 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
1581 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
1582 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
1583 { NULL
, 0, 0, false, false, false, NULL
}
1586 /* Decide whether we can make a sibling call to a function. DECL is the
1587 declaration of the function being targeted by the call and EXP is the
1588 CALL_EXPR representing the call. */
1591 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
1593 /* If we are generating position-independent code, we cannot sibcall
1594 optimize any indirect call, or a direct call to a global function,
1595 as the PLT requires %ebx be live. */
1596 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| TREE_PUBLIC (decl
)))
1599 /* If we are returning floats on the 80387 register stack, we cannot
1600 make a sibcall from a function that doesn't return a float to a
1601 function that does or, conversely, from a function that does return
1602 a float to a function that doesn't; the necessary stack adjustment
1603 would not be executed. */
1604 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp
)))
1605 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)))))
1608 /* If this call is indirect, we'll need to be able to use a call-clobbered
1609 register for the address of the target function. Make sure that all
1610 such registers are not used for passing parameters. */
1611 if (!decl
&& !TARGET_64BIT
)
1615 /* We're looking at the CALL_EXPR, we need the type of the function. */
1616 type
= TREE_OPERAND (exp
, 0); /* pointer expression */
1617 type
= TREE_TYPE (type
); /* pointer type */
1618 type
= TREE_TYPE (type
); /* function type */
1620 if (ix86_function_regparm (type
, NULL
) >= 3)
1622 /* ??? Need to count the actual number of registers to be used,
1623 not the possible number of registers. Fix later. */
1628 /* Otherwise okay. That also includes certain types of indirect calls. */
1632 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1633 arguments as in struct attribute_spec.handler. */
1635 ix86_handle_cdecl_attribute (tree
*node
, tree name
,
1636 tree args ATTRIBUTE_UNUSED
,
1637 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1639 if (TREE_CODE (*node
) != FUNCTION_TYPE
1640 && TREE_CODE (*node
) != METHOD_TYPE
1641 && TREE_CODE (*node
) != FIELD_DECL
1642 && TREE_CODE (*node
) != TYPE_DECL
)
1644 warning ("`%s' attribute only applies to functions",
1645 IDENTIFIER_POINTER (name
));
1646 *no_add_attrs
= true;
1650 if (is_attribute_p ("fastcall", name
))
1652 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
1654 error ("fastcall and stdcall attributes are not compatible");
1656 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
1658 error ("fastcall and regparm attributes are not compatible");
1661 else if (is_attribute_p ("stdcall", name
))
1663 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
1665 error ("fastcall and stdcall attributes are not compatible");
1672 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
1673 *no_add_attrs
= true;
1679 /* Handle a "regparm" attribute;
1680 arguments as in struct attribute_spec.handler. */
1682 ix86_handle_regparm_attribute (tree
*node
, tree name
, tree args
,
1683 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1685 if (TREE_CODE (*node
) != FUNCTION_TYPE
1686 && TREE_CODE (*node
) != METHOD_TYPE
1687 && TREE_CODE (*node
) != FIELD_DECL
1688 && TREE_CODE (*node
) != TYPE_DECL
)
1690 warning ("`%s' attribute only applies to functions",
1691 IDENTIFIER_POINTER (name
));
1692 *no_add_attrs
= true;
1698 cst
= TREE_VALUE (args
);
1699 if (TREE_CODE (cst
) != INTEGER_CST
)
1701 warning ("`%s' attribute requires an integer constant argument",
1702 IDENTIFIER_POINTER (name
));
1703 *no_add_attrs
= true;
1705 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
1707 warning ("argument to `%s' attribute larger than %d",
1708 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
1709 *no_add_attrs
= true;
1712 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
1714 error ("fastcall and regparm attributes are not compatible");
1721 /* Return 0 if the attributes for two types are incompatible, 1 if they
1722 are compatible, and 2 if they are nearly compatible (which causes a
1723 warning to be generated). */
1726 ix86_comp_type_attributes (tree type1
, tree type2
)
1728 /* Check for mismatch of non-default calling convention. */
1729 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
1731 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
1734 /* Check for mismatched fastcall types */
1735 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
1736 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
1739 /* Check for mismatched return types (cdecl vs stdcall). */
1740 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
1741 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
1743 if (ix86_function_regparm (type1
, NULL
)
1744 != ix86_function_regparm (type2
, NULL
))
1749 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1750 DECL may be NULL when calling function indirectly
1751 or considering a libcall. */
1754 ix86_function_regparm (tree type
, tree decl
)
1757 int regparm
= ix86_regparm
;
1758 bool user_convention
= false;
1762 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
1765 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1766 user_convention
= true;
1769 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
1772 user_convention
= true;
1775 /* Use register calling convention for local functions when possible. */
1776 if (!TARGET_64BIT
&& !user_convention
&& decl
1777 && flag_unit_at_a_time
&& !profile_flag
)
1779 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
1782 /* We can't use regparm(3) for nested functions as these use
1783 static chain pointer in third argument. */
1784 if (DECL_CONTEXT (decl
) && !DECL_NO_STATIC_CHAIN (decl
))
1794 /* Return true if EAX is live at the start of the function. Used by
1795 ix86_expand_prologue to determine if we need special help before
1796 calling allocate_stack_worker. */
1799 ix86_eax_live_at_start_p (void)
1801 /* Cheat. Don't bother working forward from ix86_function_regparm
1802 to the function type to whether an actual argument is located in
1803 eax. Instead just look at cfg info, which is still close enough
1804 to correct at this point. This gives false positives for broken
1805 functions that might use uninitialized data that happens to be
1806 allocated in eax, but who cares? */
1807 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->global_live_at_end
, 0);
1810 /* Value is the number of bytes of arguments automatically
1811 popped when returning from a subroutine call.
1812 FUNDECL is the declaration node of the function (as a tree),
1813 FUNTYPE is the data type of the function (as a tree),
1814 or for a library call it is an identifier node for the subroutine name.
1815 SIZE is the number of bytes of arguments passed on the stack.
1817 On the 80386, the RTD insn may be used to pop them if the number
1818 of args is fixed, but if the number is variable then the caller
1819 must pop them all. RTD can't be used for library calls now
1820 because the library is compiled with the Unix compiler.
1821 Use of RTD is a selectable option, since it is incompatible with
1822 standard Unix calling sequences. If the option is not selected,
1823 the caller must always pop the args.
1825 The attribute stdcall is equivalent to RTD on a per module basis. */
1828 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
1830 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
1832 /* Cdecl functions override -mrtd, and never pop the stack. */
1833 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
1835 /* Stdcall and fastcall functions will pop the stack if not
1837 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
1838 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
1842 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
1843 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
1844 == void_type_node
)))
1848 /* Lose any fake structure return argument if it is passed on the stack. */
1849 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
1852 int nregs
= ix86_function_regparm (funtype
, fundecl
);
1855 return GET_MODE_SIZE (Pmode
);
1861 /* Argument support functions. */
1863 /* Return true when register may be used to pass function parameters. */
1865 ix86_function_arg_regno_p (int regno
)
1869 return (regno
< REGPARM_MAX
1870 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
1871 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
1873 /* RAX is used as hidden argument to va_arg functions. */
1876 for (i
= 0; i
< REGPARM_MAX
; i
++)
1877 if (regno
== x86_64_int_parameter_registers
[i
])
1882 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1883 for a call to a function whose data type is FNTYPE.
1884 For a library call, FNTYPE is 0. */
1887 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
1888 tree fntype
, /* tree ptr for function decl */
1889 rtx libname
, /* SYMBOL_REF of library name or 0 */
1892 static CUMULATIVE_ARGS zero_cum
;
1893 tree param
, next_param
;
1895 if (TARGET_DEBUG_ARG
)
1897 fprintf (stderr
, "\ninit_cumulative_args (");
1899 fprintf (stderr
, "fntype code = %s, ret code = %s",
1900 tree_code_name
[(int) TREE_CODE (fntype
)],
1901 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
1903 fprintf (stderr
, "no fntype");
1906 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
1911 /* Set up the number of registers to use for passing arguments. */
1913 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
1915 cum
->nregs
= ix86_regparm
;
1916 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1917 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
1918 cum
->warn_sse
= true;
1919 cum
->warn_mmx
= true;
1920 cum
->maybe_vaarg
= false;
1922 /* Use ecx and edx registers if function has fastcall attribute */
1923 if (fntype
&& !TARGET_64BIT
)
1925 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
1933 /* Determine if this function has variable arguments. This is
1934 indicated by the last argument being 'void_type_mode' if there
1935 are no variable arguments. If there are variable arguments, then
1936 we won't pass anything in registers */
1938 if (cum
->nregs
|| !TARGET_MMX
|| !TARGET_SSE
)
1940 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
1941 param
!= 0; param
= next_param
)
1943 next_param
= TREE_CHAIN (param
);
1944 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
1955 cum
->maybe_vaarg
= true;
1959 if ((!fntype
&& !libname
)
1960 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
1961 cum
->maybe_vaarg
= 1;
1963 if (TARGET_DEBUG_ARG
)
1964 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
1969 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1970 of this code is to classify each 8bytes of incoming argument by the register
1971 class and assign registers accordingly. */
1973 /* Return the union class of CLASS1 and CLASS2.
1974 See the x86-64 PS ABI for details. */
1976 static enum x86_64_reg_class
1977 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
1979 /* Rule #1: If both classes are equal, this is the resulting class. */
1980 if (class1
== class2
)
1983 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1985 if (class1
== X86_64_NO_CLASS
)
1987 if (class2
== X86_64_NO_CLASS
)
1990 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1991 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
1992 return X86_64_MEMORY_CLASS
;
1994 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1995 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
1996 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
1997 return X86_64_INTEGERSI_CLASS
;
1998 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
1999 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
2000 return X86_64_INTEGER_CLASS
;
2002 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
2003 if (class1
== X86_64_X87_CLASS
|| class1
== X86_64_X87UP_CLASS
2004 || class2
== X86_64_X87_CLASS
|| class2
== X86_64_X87UP_CLASS
)
2005 return X86_64_MEMORY_CLASS
;
2007 /* Rule #6: Otherwise class SSE is used. */
2008 return X86_64_SSE_CLASS
;
2011 /* Classify the argument of type TYPE and mode MODE.
2012 CLASSES will be filled by the register class used to pass each word
2013 of the operand. The number of words is returned. In case the parameter
2014 should be passed in memory, 0 is returned. As a special case for zero
2015 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2017 BIT_OFFSET is used internally for handling records and specifies offset
2018 of the offset in bits modulo 256 to avoid overflow cases.
2020 See the x86-64 PS ABI for details.
2024 classify_argument (enum machine_mode mode
, tree type
,
2025 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
2027 HOST_WIDE_INT bytes
=
2028 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2029 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2031 /* Variable sized entities are always passed/returned in memory. */
2035 if (mode
!= VOIDmode
2036 && MUST_PASS_IN_STACK (mode
, type
))
2039 if (type
&& AGGREGATE_TYPE_P (type
))
2043 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
2045 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2049 for (i
= 0; i
< words
; i
++)
2050 classes
[i
] = X86_64_NO_CLASS
;
2052 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2053 signalize memory class, so handle it as special case. */
2056 classes
[0] = X86_64_NO_CLASS
;
2060 /* Classify each field of record and merge classes. */
2061 if (TREE_CODE (type
) == RECORD_TYPE
)
2063 /* For classes first merge in the field of the subclasses. */
2064 if (TYPE_BINFO (type
) && BINFO_BASE_BINFOS (TYPE_BINFO (type
)))
2066 tree bases
= BINFO_BASE_BINFOS (TYPE_BINFO (type
));
2067 int n_bases
= BINFO_N_BASE_BINFOS (TYPE_BINFO (type
));
2070 for (i
= 0; i
< n_bases
; ++i
)
2072 tree binfo
= TREE_VEC_ELT (bases
, i
);
2074 int offset
= tree_low_cst (BINFO_OFFSET (binfo
), 0) * 8;
2075 tree type
= BINFO_TYPE (binfo
);
2077 num
= classify_argument (TYPE_MODE (type
),
2079 (offset
+ bit_offset
) % 256);
2082 for (i
= 0; i
< num
; i
++)
2084 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
2086 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2090 /* And now merge the fields of structure. */
2091 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2093 if (TREE_CODE (field
) == FIELD_DECL
)
2097 /* Bitfields are always classified as integer. Handle them
2098 early, since later code would consider them to be
2099 misaligned integers. */
2100 if (DECL_BIT_FIELD (field
))
2102 for (i
= int_bit_position (field
) / 8 / 8;
2103 i
< (int_bit_position (field
)
2104 + tree_low_cst (DECL_SIZE (field
), 0)
2107 merge_classes (X86_64_INTEGER_CLASS
,
2112 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2113 TREE_TYPE (field
), subclasses
,
2114 (int_bit_position (field
)
2115 + bit_offset
) % 256);
2118 for (i
= 0; i
< num
; i
++)
2121 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
2123 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2129 /* Arrays are handled as small records. */
2130 else if (TREE_CODE (type
) == ARRAY_TYPE
)
2133 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
2134 TREE_TYPE (type
), subclasses
, bit_offset
);
2138 /* The partial classes are now full classes. */
2139 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
2140 subclasses
[0] = X86_64_SSE_CLASS
;
2141 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
2142 subclasses
[0] = X86_64_INTEGER_CLASS
;
2144 for (i
= 0; i
< words
; i
++)
2145 classes
[i
] = subclasses
[i
% num
];
2147 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2148 else if (TREE_CODE (type
) == UNION_TYPE
2149 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
2151 /* For classes first merge in the field of the subclasses. */
2152 if (TYPE_BINFO (type
) && BINFO_BASE_BINFOS (TYPE_BINFO (type
)))
2154 tree bases
= BINFO_BASE_BINFOS (TYPE_BINFO (type
));
2155 int n_bases
= BINFO_N_BASE_BINFOS (TYPE_BINFO (type
));
2158 for (i
= 0; i
< n_bases
; ++i
)
2160 tree binfo
= TREE_VEC_ELT (bases
, i
);
2162 int offset
= tree_low_cst (BINFO_OFFSET (binfo
), 0) * 8;
2163 tree type
= BINFO_TYPE (binfo
);
2165 num
= classify_argument (TYPE_MODE (type
),
2167 (offset
+ (bit_offset
% 64)) % 256);
2170 for (i
= 0; i
< num
; i
++)
2172 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
2174 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2178 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2180 if (TREE_CODE (field
) == FIELD_DECL
)
2183 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2184 TREE_TYPE (field
), subclasses
,
2188 for (i
= 0; i
< num
; i
++)
2189 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
2193 else if (TREE_CODE (type
) == SET_TYPE
)
2197 classes
[0] = X86_64_INTEGERSI_CLASS
;
2200 else if (bytes
<= 8)
2202 classes
[0] = X86_64_INTEGER_CLASS
;
2205 else if (bytes
<= 12)
2207 classes
[0] = X86_64_INTEGER_CLASS
;
2208 classes
[1] = X86_64_INTEGERSI_CLASS
;
2213 classes
[0] = X86_64_INTEGER_CLASS
;
2214 classes
[1] = X86_64_INTEGER_CLASS
;
2221 /* Final merger cleanup. */
2222 for (i
= 0; i
< words
; i
++)
2224 /* If one class is MEMORY, everything should be passed in
2226 if (classes
[i
] == X86_64_MEMORY_CLASS
)
2229 /* The X86_64_SSEUP_CLASS should be always preceded by
2230 X86_64_SSE_CLASS. */
2231 if (classes
[i
] == X86_64_SSEUP_CLASS
2232 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
2233 classes
[i
] = X86_64_SSE_CLASS
;
2235 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2236 if (classes
[i
] == X86_64_X87UP_CLASS
2237 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
2238 classes
[i
] = X86_64_SSE_CLASS
;
2243 /* Compute alignment needed. We align all types to natural boundaries with
2244 exception of XFmode that is aligned to 64bits. */
2245 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
2247 int mode_alignment
= GET_MODE_BITSIZE (mode
);
2250 mode_alignment
= 128;
2251 else if (mode
== XCmode
)
2252 mode_alignment
= 256;
2253 if (COMPLEX_MODE_P (mode
))
2254 mode_alignment
/= 2;
2255 /* Misaligned fields are always returned in memory. */
2256 if (bit_offset
% mode_alignment
)
2260 /* Classification of atomic types. */
2270 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
2271 classes
[0] = X86_64_INTEGERSI_CLASS
;
2273 classes
[0] = X86_64_INTEGER_CLASS
;
2277 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2280 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2281 classes
[2] = classes
[3] = X86_64_INTEGER_CLASS
;
2284 if (!(bit_offset
% 64))
2285 classes
[0] = X86_64_SSESF_CLASS
;
2287 classes
[0] = X86_64_SSE_CLASS
;
2290 classes
[0] = X86_64_SSEDF_CLASS
;
2293 classes
[0] = X86_64_X87_CLASS
;
2294 classes
[1] = X86_64_X87UP_CLASS
;
2300 classes
[0] = X86_64_X87_CLASS
;
2301 classes
[1] = X86_64_X87UP_CLASS
;
2302 classes
[2] = X86_64_X87_CLASS
;
2303 classes
[3] = X86_64_X87UP_CLASS
;
2306 classes
[0] = X86_64_SSEDF_CLASS
;
2307 classes
[1] = X86_64_SSEDF_CLASS
;
2310 classes
[0] = X86_64_SSE_CLASS
;
2318 classes
[0] = X86_64_SSE_CLASS
;
2319 classes
[1] = X86_64_SSEUP_CLASS
;
2334 /* Examine the argument and return set number of register required in each
2335 class. Return 0 iff parameter should be passed in memory. */
2337 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
2338 int *int_nregs
, int *sse_nregs
)
2340 enum x86_64_reg_class
class[MAX_CLASSES
];
2341 int n
= classify_argument (mode
, type
, class, 0);
2347 for (n
--; n
>= 0; n
--)
2350 case X86_64_INTEGER_CLASS
:
2351 case X86_64_INTEGERSI_CLASS
:
2354 case X86_64_SSE_CLASS
:
2355 case X86_64_SSESF_CLASS
:
2356 case X86_64_SSEDF_CLASS
:
2359 case X86_64_NO_CLASS
:
2360 case X86_64_SSEUP_CLASS
:
2362 case X86_64_X87_CLASS
:
2363 case X86_64_X87UP_CLASS
:
2367 case X86_64_MEMORY_CLASS
:
2372 /* Construct container for the argument used by GCC interface. See
2373 FUNCTION_ARG for the detailed description. */
2375 construct_container (enum machine_mode mode
, tree type
, int in_return
,
2376 int nintregs
, int nsseregs
, const int * intreg
,
2379 enum machine_mode tmpmode
;
2381 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2382 enum x86_64_reg_class
class[MAX_CLASSES
];
2386 int needed_sseregs
, needed_intregs
;
2387 rtx exp
[MAX_CLASSES
];
2390 n
= classify_argument (mode
, type
, class, 0);
2391 if (TARGET_DEBUG_ARG
)
2394 fprintf (stderr
, "Memory class\n");
2397 fprintf (stderr
, "Classes:");
2398 for (i
= 0; i
< n
; i
++)
2400 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
2402 fprintf (stderr
, "\n");
2407 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
, &needed_sseregs
))
2409 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
2412 /* First construct simple cases. Avoid SCmode, since we want to use
2413 single register to pass this type. */
2414 if (n
== 1 && mode
!= SCmode
)
2417 case X86_64_INTEGER_CLASS
:
2418 case X86_64_INTEGERSI_CLASS
:
2419 return gen_rtx_REG (mode
, intreg
[0]);
2420 case X86_64_SSE_CLASS
:
2421 case X86_64_SSESF_CLASS
:
2422 case X86_64_SSEDF_CLASS
:
2423 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2424 case X86_64_X87_CLASS
:
2425 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
2426 case X86_64_NO_CLASS
:
2427 /* Zero sized array, struct or class. */
2432 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
2434 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2436 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
2437 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
2438 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
2439 && class[1] == X86_64_INTEGER_CLASS
2440 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
2441 && intreg
[0] + 1 == intreg
[1])
2442 return gen_rtx_REG (mode
, intreg
[0]);
2444 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
2445 && class[2] == X86_64_X87_CLASS
&& class[3] == X86_64_X87UP_CLASS
2447 return gen_rtx_REG (XCmode
, FIRST_STACK_REG
);
2449 /* Otherwise figure out the entries of the PARALLEL. */
2450 for (i
= 0; i
< n
; i
++)
2454 case X86_64_NO_CLASS
:
2456 case X86_64_INTEGER_CLASS
:
2457 case X86_64_INTEGERSI_CLASS
:
2458 /* Merge TImodes on aligned occasions here too. */
2459 if (i
* 8 + 8 > bytes
)
2460 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
2461 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
2465 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2466 if (tmpmode
== BLKmode
)
2468 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2469 gen_rtx_REG (tmpmode
, *intreg
),
2473 case X86_64_SSESF_CLASS
:
2474 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2475 gen_rtx_REG (SFmode
,
2476 SSE_REGNO (sse_regno
)),
2480 case X86_64_SSEDF_CLASS
:
2481 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2482 gen_rtx_REG (DFmode
,
2483 SSE_REGNO (sse_regno
)),
2487 case X86_64_SSE_CLASS
:
2488 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
2492 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2493 gen_rtx_REG (tmpmode
,
2494 SSE_REGNO (sse_regno
)),
2496 if (tmpmode
== TImode
)
2504 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2505 for (i
= 0; i
< nexps
; i
++)
2506 XVECEXP (ret
, 0, i
) = exp
[i
];
2510 /* Update the data in CUM to advance over an argument
2511 of mode MODE and data type TYPE.
2512 (TYPE is null for libcalls where that information may not be available.) */
2515 function_arg_advance (CUMULATIVE_ARGS
*cum
, /* current arg information */
2516 enum machine_mode mode
, /* current arg mode */
2517 tree type
, /* type of the argument or 0 if lib support */
2518 int named
) /* whether or not the argument was named */
2521 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2522 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2524 if (TARGET_DEBUG_ARG
)
2526 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2527 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
, GET_MODE_NAME (mode
), named
);
2530 int int_nregs
, sse_nregs
;
2531 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
2532 cum
->words
+= words
;
2533 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
2535 cum
->nregs
-= int_nregs
;
2536 cum
->sse_nregs
-= sse_nregs
;
2537 cum
->regno
+= int_nregs
;
2538 cum
->sse_regno
+= sse_nregs
;
2541 cum
->words
+= words
;
2545 if (TARGET_SSE
&& SSE_REG_MODE_P (mode
)
2546 && (!type
|| !AGGREGATE_TYPE_P (type
)))
2548 cum
->sse_words
+= words
;
2549 cum
->sse_nregs
-= 1;
2550 cum
->sse_regno
+= 1;
2551 if (cum
->sse_nregs
<= 0)
2557 else if (TARGET_MMX
&& MMX_REG_MODE_P (mode
)
2558 && (!type
|| !AGGREGATE_TYPE_P (type
)))
2560 cum
->mmx_words
+= words
;
2561 cum
->mmx_nregs
-= 1;
2562 cum
->mmx_regno
+= 1;
2563 if (cum
->mmx_nregs
<= 0)
2571 cum
->words
+= words
;
2572 cum
->nregs
-= words
;
2573 cum
->regno
+= words
;
2575 if (cum
->nregs
<= 0)
2585 /* Define where to put the arguments to a function.
2586 Value is zero to push the argument on the stack,
2587 or a hard register in which to store the argument.
2589 MODE is the argument's machine mode.
2590 TYPE is the data type of the argument (as a tree).
2591 This is null for libcalls where that information may
2593 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2594 the preceding args and about the function being called.
2595 NAMED is nonzero if this argument is a named parameter
2596 (otherwise it is an extra parameter matching an ellipsis). */
2599 function_arg (CUMULATIVE_ARGS
*cum
, /* current arg information */
2600 enum machine_mode mode
, /* current arg mode */
2601 tree type
, /* type of the argument or 0 if lib support */
2602 int named
) /* != 0 for normal args, == 0 for ... args */
2606 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2607 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2608 static bool warnedsse
, warnedmmx
;
2610 /* Handle a hidden AL argument containing number of registers for varargs
2611 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2613 if (mode
== VOIDmode
)
2616 return GEN_INT (cum
->maybe_vaarg
2617 ? (cum
->sse_nregs
< 0
2625 ret
= construct_container (mode
, type
, 0, cum
->nregs
, cum
->sse_nregs
,
2626 &x86_64_int_parameter_registers
[cum
->regno
],
2631 /* For now, pass fp/complex values on the stack. */
2643 if (words
<= cum
->nregs
)
2645 int regno
= cum
->regno
;
2647 /* Fastcall allocates the first two DWORD (SImode) or
2648 smaller arguments to ECX and EDX. */
2651 if (mode
== BLKmode
|| mode
== DImode
)
2654 /* ECX not EAX is the first allocated register. */
2658 ret
= gen_rtx_REG (mode
, regno
);
2668 if (!type
|| !AGGREGATE_TYPE_P (type
))
2670 if (!TARGET_SSE
&& !warnedmmx
&& cum
->warn_sse
)
2673 warning ("SSE vector argument without SSE enabled "
2677 ret
= gen_rtx_REG (mode
, cum
->sse_regno
+ FIRST_SSE_REG
);
2684 if (!type
|| !AGGREGATE_TYPE_P (type
))
2686 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
2689 warning ("MMX vector argument without MMX enabled "
2693 ret
= gen_rtx_REG (mode
, cum
->mmx_regno
+ FIRST_MMX_REG
);
2698 if (TARGET_DEBUG_ARG
)
2701 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2702 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2705 print_simple_rtl (stderr
, ret
);
2707 fprintf (stderr
, ", stack");
2709 fprintf (stderr
, " )\n");
2715 /* A C expression that indicates when an argument must be passed by
2716 reference. If nonzero for an argument, a copy of that argument is
2717 made in memory and a pointer to the argument is passed instead of
2718 the argument itself. The pointer is passed in whatever way is
2719 appropriate for passing a pointer to that type. */
2722 function_arg_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
2723 enum machine_mode mode ATTRIBUTE_UNUSED
,
2724 tree type
, int named ATTRIBUTE_UNUSED
)
2729 if (type
&& int_size_in_bytes (type
) == -1)
2731 if (TARGET_DEBUG_ARG
)
2732 fprintf (stderr
, "function_arg_pass_by_reference\n");
2739 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2742 contains_128bit_aligned_vector_p (tree type
)
2744 enum machine_mode mode
= TYPE_MODE (type
);
2745 if (SSE_REG_MODE_P (mode
)
2746 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
2748 if (TYPE_ALIGN (type
) < 128)
2751 if (AGGREGATE_TYPE_P (type
))
2753 /* Walk the aggregates recursively. */
2754 if (TREE_CODE (type
) == RECORD_TYPE
2755 || TREE_CODE (type
) == UNION_TYPE
2756 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
2760 if (TYPE_BINFO (type
) && BINFO_BASE_BINFOS (TYPE_BINFO (type
)))
2762 tree bases
= BINFO_BASE_BINFOS (TYPE_BINFO (type
));
2763 int n_bases
= BINFO_N_BASE_BINFOS (TYPE_BINFO (type
));
2766 for (i
= 0; i
< n_bases
; ++i
)
2768 tree binfo
= TREE_VEC_ELT (bases
, i
);
2769 tree type
= BINFO_TYPE (binfo
);
2771 if (contains_128bit_aligned_vector_p (type
))
2775 /* And now merge the fields of structure. */
2776 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2778 if (TREE_CODE (field
) == FIELD_DECL
2779 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
2783 /* Just for use if some languages passes arrays by value. */
2784 else if (TREE_CODE (type
) == ARRAY_TYPE
)
2786 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
2795 /* Gives the alignment boundary, in bits, of an argument with the
2796 specified mode and type. */
2799 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
2803 align
= TYPE_ALIGN (type
);
2805 align
= GET_MODE_ALIGNMENT (mode
);
2806 if (align
< PARM_BOUNDARY
)
2807 align
= PARM_BOUNDARY
;
2810 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2811 make an exception for SSE modes since these require 128bit
2814 The handling here differs from field_alignment. ICC aligns MMX
2815 arguments to 4 byte boundaries, while structure fields are aligned
2816 to 8 byte boundaries. */
2819 if (!SSE_REG_MODE_P (mode
))
2820 align
= PARM_BOUNDARY
;
2824 if (!contains_128bit_aligned_vector_p (type
))
2825 align
= PARM_BOUNDARY
;
2833 /* Return true if N is a possible register number of function value. */
2835 ix86_function_value_regno_p (int regno
)
2839 return ((regno
) == 0
2840 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
2841 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
2843 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
2844 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
2845 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
2848 /* Define how to find the value returned by a function.
2849 VALTYPE is the data type of the value (as a tree).
2850 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2851 otherwise, FUNC is 0. */
2853 ix86_function_value (tree valtype
)
2857 rtx ret
= construct_container (TYPE_MODE (valtype
), valtype
, 1,
2858 REGPARM_MAX
, SSE_REGPARM_MAX
,
2859 x86_64_int_return_registers
, 0);
2860 /* For zero sized structures, construct_container return NULL, but we need
2861 to keep rest of compiler happy by returning meaningful value. */
2863 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
2867 return gen_rtx_REG (TYPE_MODE (valtype
),
2868 ix86_value_regno (TYPE_MODE (valtype
)));
2871 /* Return false iff type is returned in memory. */
2873 ix86_return_in_memory (tree type
)
2875 int needed_intregs
, needed_sseregs
, size
;
2876 enum machine_mode mode
= TYPE_MODE (type
);
2879 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
2881 if (mode
== BLKmode
)
2884 size
= int_size_in_bytes (type
);
2886 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
2889 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
2891 /* User-created vectors small enough to fit in EAX. */
2895 /* MMX/3dNow values are returned on the stack, since we've
2896 got to EMMS/FEMMS before returning. */
2900 /* SSE values are returned in XMM0, except when it doesn't exist. */
2902 return (TARGET_SSE
? 0 : 1);
2913 /* When returning SSE vector types, we have a choice of either
2914 (1) being abi incompatible with a -march switch, or
2915 (2) generating an error.
2916 Given no good solution, I think the safest thing is one warning.
2917 The user won't be able to use -Werror, but....
2919 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
2920 called in response to actually generating a caller or callee that
2921 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
2922 via aggregate_value_p for general type probing from tree-ssa. */
2925 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
2929 if (!TARGET_SSE
&& type
&& !warned
)
2931 /* Look at the return type of the function, not the function type. */
2932 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
2935 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
2938 warning ("SSE vector return without SSE enabled changes the ABI");
2945 /* Define how to find the value returned by a library function
2946 assuming the value has mode MODE. */
2948 ix86_libcall_value (enum machine_mode mode
)
2958 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
2961 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
2966 return gen_rtx_REG (mode
, 0);
2970 return gen_rtx_REG (mode
, ix86_value_regno (mode
));
2973 /* Given a mode, return the register to use for a return value. */
2976 ix86_value_regno (enum machine_mode mode
)
2978 /* Floating point return values in %st(0). */
2979 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
&& TARGET_FLOAT_RETURNS_IN_80387
)
2980 return FIRST_FLOAT_REG
;
2981 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2982 we prevent this case when sse is not available. */
2983 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
2984 return FIRST_SSE_REG
;
2985 /* Everything else in %eax. */
2989 /* Create the va_list data type. */
2992 ix86_build_builtin_va_list (void)
2994 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
2996 /* For i386 we use plain pointer to argument area. */
2998 return build_pointer_type (char_type_node
);
3000 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
3001 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3003 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
3004 unsigned_type_node
);
3005 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
3006 unsigned_type_node
);
3007 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
3009 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
3012 DECL_FIELD_CONTEXT (f_gpr
) = record
;
3013 DECL_FIELD_CONTEXT (f_fpr
) = record
;
3014 DECL_FIELD_CONTEXT (f_ovf
) = record
;
3015 DECL_FIELD_CONTEXT (f_sav
) = record
;
3017 TREE_CHAIN (record
) = type_decl
;
3018 TYPE_NAME (record
) = type_decl
;
3019 TYPE_FIELDS (record
) = f_gpr
;
3020 TREE_CHAIN (f_gpr
) = f_fpr
;
3021 TREE_CHAIN (f_fpr
) = f_ovf
;
3022 TREE_CHAIN (f_ovf
) = f_sav
;
3024 layout_type (record
);
3026 /* The correct type is an array type of one element. */
3027 return build_array_type (record
, build_index_type (size_zero_node
));
3030 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3033 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3034 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
3037 CUMULATIVE_ARGS next_cum
;
3038 rtx save_area
= NULL_RTX
, mem
;
3051 /* Indicate to allocate space on the stack for varargs save area. */
3052 ix86_save_varrargs_registers
= 1;
3054 cfun
->stack_alignment_needed
= 128;
3056 fntype
= TREE_TYPE (current_function_decl
);
3057 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
3058 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
3059 != void_type_node
));
3061 /* For varargs, we do not want to skip the dummy va_dcl argument.
3062 For stdargs, we do want to skip the last named argument. */
3065 function_arg_advance (&next_cum
, mode
, type
, 1);
3068 save_area
= frame_pointer_rtx
;
3070 set
= get_varargs_alias_set ();
3072 for (i
= next_cum
.regno
; i
< ix86_regparm
; i
++)
3074 mem
= gen_rtx_MEM (Pmode
,
3075 plus_constant (save_area
, i
* UNITS_PER_WORD
));
3076 set_mem_alias_set (mem
, set
);
3077 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
3078 x86_64_int_parameter_registers
[i
]));
3081 if (next_cum
.sse_nregs
)
3083 /* Now emit code to save SSE registers. The AX parameter contains number
3084 of SSE parameter registers used to call this function. We use
3085 sse_prologue_save insn template that produces computed jump across
3086 SSE saves. We need some preparation work to get this working. */
3088 label
= gen_label_rtx ();
3089 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
3091 /* Compute address to jump to :
3092 label - 5*eax + nnamed_sse_arguments*5 */
3093 tmp_reg
= gen_reg_rtx (Pmode
);
3094 nsse_reg
= gen_reg_rtx (Pmode
);
3095 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
3096 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
3097 gen_rtx_MULT (Pmode
, nsse_reg
,
3099 if (next_cum
.sse_regno
)
3102 gen_rtx_CONST (DImode
,
3103 gen_rtx_PLUS (DImode
,
3105 GEN_INT (next_cum
.sse_regno
* 4))));
3107 emit_move_insn (nsse_reg
, label_ref
);
3108 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
3110 /* Compute address of memory block we save into. We always use pointer
3111 pointing 127 bytes after first byte to store - this is needed to keep
3112 instruction size limited by 4 bytes. */
3113 tmp_reg
= gen_reg_rtx (Pmode
);
3114 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
3115 plus_constant (save_area
,
3116 8 * REGPARM_MAX
+ 127)));
3117 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
3118 set_mem_alias_set (mem
, set
);
3119 set_mem_align (mem
, BITS_PER_WORD
);
3121 /* And finally do the dirty job! */
3122 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
3123 GEN_INT (next_cum
.sse_regno
), label
));
3128 /* Implement va_start. */
3131 ix86_va_start (tree valist
, rtx nextarg
)
3133 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
3134 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
3135 tree gpr
, fpr
, ovf
, sav
, t
;
3137 /* Only 64bit target needs something special. */
3140 std_expand_builtin_va_start (valist
, nextarg
);
3144 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3145 f_fpr
= TREE_CHAIN (f_gpr
);
3146 f_ovf
= TREE_CHAIN (f_fpr
);
3147 f_sav
= TREE_CHAIN (f_ovf
);
3149 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
3150 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
3151 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
3152 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
3153 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
3155 /* Count number of gp and fp argument registers used. */
3156 words
= current_function_args_info
.words
;
3157 n_gpr
= current_function_args_info
.regno
;
3158 n_fpr
= current_function_args_info
.sse_regno
;
3160 if (TARGET_DEBUG_ARG
)
3161 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3162 (int) words
, (int) n_gpr
, (int) n_fpr
);
3164 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
3165 build_int_2 (n_gpr
* 8, 0));
3166 TREE_SIDE_EFFECTS (t
) = 1;
3167 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3169 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
3170 build_int_2 (n_fpr
* 16 + 8*REGPARM_MAX
, 0));
3171 TREE_SIDE_EFFECTS (t
) = 1;
3172 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3174 /* Find the overflow area. */
3175 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
3177 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), t
,
3178 build_int_2 (words
* UNITS_PER_WORD
, 0));
3179 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
3180 TREE_SIDE_EFFECTS (t
) = 1;
3181 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3183 /* Find the register save area.
3184 Prologue of the function save it right above stack frame. */
3185 t
= make_tree (TREE_TYPE (sav
), frame_pointer_rtx
);
3186 t
= build (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
3187 TREE_SIDE_EFFECTS (t
) = 1;
3188 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3191 /* Implement va_arg. */
3194 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
3196 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
3197 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
3198 tree gpr
, fpr
, ovf
, sav
, t
;
3200 tree lab_false
, lab_over
= NULL_TREE
;
3206 /* Only 64bit target needs something special. */
3208 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
3210 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3211 f_fpr
= TREE_CHAIN (f_gpr
);
3212 f_ovf
= TREE_CHAIN (f_fpr
);
3213 f_sav
= TREE_CHAIN (f_ovf
);
3215 valist
= build_fold_indirect_ref (valist
);
3216 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
3217 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
3218 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
3219 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
3221 size
= int_size_in_bytes (type
);
3224 /* Variable-size types are passed by reference. */
3226 type
= build_pointer_type (type
);
3227 size
= int_size_in_bytes (type
);
3229 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3231 container
= construct_container (TYPE_MODE (type
), type
, 0,
3232 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
3234 * Pull the value out of the saved registers ...
3237 addr
= create_tmp_var (ptr_type_node
, "addr");
3238 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
3242 int needed_intregs
, needed_sseregs
;
3244 tree int_addr
, sse_addr
;
3246 lab_false
= create_artificial_label ();
3247 lab_over
= create_artificial_label ();
3249 examine_argument (TYPE_MODE (type
), type
, 0,
3250 &needed_intregs
, &needed_sseregs
);
3252 need_temp
= (!REG_P (container
)
3253 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
3254 || TYPE_ALIGN (type
) > 128));
3256 /* In case we are passing structure, verify that it is consecutive block
3257 on the register save area. If not we need to do moves. */
3258 if (!need_temp
&& !REG_P (container
))
3260 /* Verify that all registers are strictly consecutive */
3261 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
3265 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
3267 rtx slot
= XVECEXP (container
, 0, i
);
3268 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
3269 || INTVAL (XEXP (slot
, 1)) != i
* 16)
3277 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
3279 rtx slot
= XVECEXP (container
, 0, i
);
3280 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
3281 || INTVAL (XEXP (slot
, 1)) != i
* 8)
3293 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
3294 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
3295 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
3296 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
3298 /* First ensure that we fit completely in registers. */
3301 t
= build_int_2 ((REGPARM_MAX
- needed_intregs
+ 1) * 8, 0);
3302 TREE_TYPE (t
) = TREE_TYPE (gpr
);
3303 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
3304 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
3305 t
= build (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
3306 gimplify_and_add (t
, pre_p
);
3310 t
= build_int_2 ((SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
3311 + REGPARM_MAX
* 8, 0);
3312 TREE_TYPE (t
) = TREE_TYPE (fpr
);
3313 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
3314 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
3315 t
= build (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
3316 gimplify_and_add (t
, pre_p
);
3319 /* Compute index to start of area used for integer regs. */
3322 /* int_addr = gpr + sav; */
3323 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, gpr
);
3324 t
= build2 (MODIFY_EXPR
, void_type_node
, int_addr
, t
);
3325 gimplify_and_add (t
, pre_p
);
3329 /* sse_addr = fpr + sav; */
3330 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, fpr
);
3331 t
= build2 (MODIFY_EXPR
, void_type_node
, sse_addr
, t
);
3332 gimplify_and_add (t
, pre_p
);
3337 tree temp
= create_tmp_var (type
, "va_arg_tmp");
3340 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
3341 t
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
3342 gimplify_and_add (t
, pre_p
);
3344 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
3346 rtx slot
= XVECEXP (container
, 0, i
);
3347 rtx reg
= XEXP (slot
, 0);
3348 enum machine_mode mode
= GET_MODE (reg
);
3349 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
3350 tree addr_type
= build_pointer_type (piece_type
);
3353 tree dest_addr
, dest
;
3355 if (SSE_REGNO_P (REGNO (reg
)))
3357 src_addr
= sse_addr
;
3358 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
3362 src_addr
= int_addr
;
3363 src_offset
= REGNO (reg
) * 8;
3365 src_addr
= fold_convert (addr_type
, src_addr
);
3366 src_addr
= fold (build2 (PLUS_EXPR
, addr_type
, src_addr
,
3367 size_int (src_offset
)));
3368 src
= build_fold_indirect_ref (src_addr
);
3370 dest_addr
= fold_convert (addr_type
, addr
);
3371 dest_addr
= fold (build2 (PLUS_EXPR
, addr_type
, dest_addr
,
3372 size_int (INTVAL (XEXP (slot
, 1)))));
3373 dest
= build_fold_indirect_ref (dest_addr
);
3375 t
= build2 (MODIFY_EXPR
, void_type_node
, dest
, src
);
3376 gimplify_and_add (t
, pre_p
);
3382 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
3383 build_int_2 (needed_intregs
* 8, 0));
3384 t
= build2 (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
3385 gimplify_and_add (t
, pre_p
);
3390 build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
3391 build_int_2 (needed_sseregs
* 16, 0));
3392 t
= build2 (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
3393 gimplify_and_add (t
, pre_p
);
3396 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
3397 gimplify_and_add (t
, pre_p
);
3399 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
3400 append_to_statement_list (t
, pre_p
);
3403 /* ... otherwise out of the overflow area. */
3405 /* Care for on-stack alignment if needed. */
3406 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64)
3410 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
3411 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
, build_int_2 (align
- 1, 0));
3412 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
, build_int_2 (-align
, -1));
3414 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
3416 t2
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
3417 gimplify_and_add (t2
, pre_p
);
3419 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
3420 build_int_2 (rsize
* UNITS_PER_WORD
, 0));
3421 t
= build2 (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
3422 gimplify_and_add (t
, pre_p
);
3426 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
3427 append_to_statement_list (t
, pre_p
);
3430 ptrtype
= build_pointer_type (type
);
3431 addr
= fold_convert (ptrtype
, addr
);
3434 addr
= build_fold_indirect_ref (addr
);
3435 return build_fold_indirect_ref (addr
);
3438 /* Return nonzero if OP is either a i387 or SSE fp register. */
3440 any_fp_register_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3442 return ANY_FP_REG_P (op
);
3445 /* Return nonzero if OP is an i387 fp register. */
3447 fp_register_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3449 return FP_REG_P (op
);
3452 /* Return nonzero if OP is a non-fp register_operand. */
3454 register_and_not_any_fp_reg_operand (rtx op
, enum machine_mode mode
)
3456 return register_operand (op
, mode
) && !ANY_FP_REG_P (op
);
3459 /* Return nonzero if OP is a register operand other than an
3460 i387 fp register. */
3462 register_and_not_fp_reg_operand (rtx op
, enum machine_mode mode
)
3464 return register_operand (op
, mode
) && !FP_REG_P (op
);
3467 /* Return nonzero if OP is general operand representable on x86_64. */
3470 x86_64_general_operand (rtx op
, enum machine_mode mode
)
3473 return general_operand (op
, mode
);
3474 if (nonimmediate_operand (op
, mode
))
3476 return x86_64_sign_extended_value (op
);
3479 /* Return nonzero if OP is general operand representable on x86_64
3480 as either sign extended or zero extended constant. */
3483 x86_64_szext_general_operand (rtx op
, enum machine_mode mode
)
3486 return general_operand (op
, mode
);
3487 if (nonimmediate_operand (op
, mode
))
3489 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
3492 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3495 x86_64_nonmemory_operand (rtx op
, enum machine_mode mode
)
3498 return nonmemory_operand (op
, mode
);
3499 if (register_operand (op
, mode
))
3501 return x86_64_sign_extended_value (op
);
3504 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3507 x86_64_movabs_operand (rtx op
, enum machine_mode mode
)
3509 if (!TARGET_64BIT
|| !flag_pic
)
3510 return nonmemory_operand (op
, mode
);
3511 if (register_operand (op
, mode
) || x86_64_sign_extended_value (op
))
3513 if (CONSTANT_P (op
) && !symbolic_reference_mentioned_p (op
))
3518 /* Return nonzero if OPNUM's MEM should be matched
3519 in movabs* patterns. */
3522 ix86_check_movabs (rtx insn
, int opnum
)
3526 set
= PATTERN (insn
);
3527 if (GET_CODE (set
) == PARALLEL
)
3528 set
= XVECEXP (set
, 0, 0);
3529 if (GET_CODE (set
) != SET
)
3531 mem
= XEXP (set
, opnum
);
3532 while (GET_CODE (mem
) == SUBREG
)
3533 mem
= SUBREG_REG (mem
);
3534 if (GET_CODE (mem
) != MEM
)
3536 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
3539 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3542 x86_64_szext_nonmemory_operand (rtx op
, enum machine_mode mode
)
3545 return nonmemory_operand (op
, mode
);
3546 if (register_operand (op
, mode
))
3548 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
3551 /* Return nonzero if OP is immediate operand representable on x86_64. */
3554 x86_64_immediate_operand (rtx op
, enum machine_mode mode
)
3557 return immediate_operand (op
, mode
);
3558 return x86_64_sign_extended_value (op
);
3561 /* Return nonzero if OP is immediate operand representable on x86_64. */
3564 x86_64_zext_immediate_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3566 return x86_64_zero_extended_value (op
);
3569 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3570 for shift & compare patterns, as shifting by 0 does not change flags),
3571 else return zero. */
3574 const_int_1_31_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3576 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 1 && INTVAL (op
) <= 31);
3579 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3580 reference and a constant. */
3583 symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3585 switch (GET_CODE (op
))
3593 if (GET_CODE (op
) == SYMBOL_REF
3594 || GET_CODE (op
) == LABEL_REF
3595 || (GET_CODE (op
) == UNSPEC
3596 && (XINT (op
, 1) == UNSPEC_GOT
3597 || XINT (op
, 1) == UNSPEC_GOTOFF
3598 || XINT (op
, 1) == UNSPEC_GOTPCREL
)))
3600 if (GET_CODE (op
) != PLUS
3601 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
3605 if (GET_CODE (op
) == SYMBOL_REF
3606 || GET_CODE (op
) == LABEL_REF
)
3608 /* Only @GOTOFF gets offsets. */
3609 if (GET_CODE (op
) != UNSPEC
3610 || XINT (op
, 1) != UNSPEC_GOTOFF
)
3613 op
= XVECEXP (op
, 0, 0);
3614 if (GET_CODE (op
) == SYMBOL_REF
3615 || GET_CODE (op
) == LABEL_REF
)
3624 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3627 pic_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3629 if (GET_CODE (op
) != CONST
)
3634 if (GET_CODE (op
) == UNSPEC
3635 && XINT (op
, 1) == UNSPEC_GOTPCREL
)
3637 if (GET_CODE (op
) == PLUS
3638 && GET_CODE (XEXP (op
, 0)) == UNSPEC
3639 && XINT (XEXP (op
, 0), 1) == UNSPEC_GOTPCREL
)
3644 if (GET_CODE (op
) == UNSPEC
)
3646 if (GET_CODE (op
) != PLUS
3647 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
3650 if (GET_CODE (op
) == UNSPEC
)
3656 /* Return true if OP is a symbolic operand that resolves locally. */
3659 local_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3661 if (GET_CODE (op
) == CONST
3662 && GET_CODE (XEXP (op
, 0)) == PLUS
3663 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
3664 op
= XEXP (XEXP (op
, 0), 0);
3666 if (GET_CODE (op
) == LABEL_REF
)
3669 if (GET_CODE (op
) != SYMBOL_REF
)
3672 if (SYMBOL_REF_LOCAL_P (op
))
3675 /* There is, however, a not insubstantial body of code in the rest of
3676 the compiler that assumes it can just stick the results of
3677 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3678 /* ??? This is a hack. Should update the body of the compiler to
3679 always create a DECL an invoke targetm.encode_section_info. */
3680 if (strncmp (XSTR (op
, 0), internal_label_prefix
,
3681 internal_label_prefix_len
) == 0)
3687 /* Test for various thread-local symbols. */
3690 tls_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3692 if (GET_CODE (op
) != SYMBOL_REF
)
3694 return SYMBOL_REF_TLS_MODEL (op
);
3698 tls_symbolic_operand_1 (rtx op
, enum tls_model kind
)
3700 if (GET_CODE (op
) != SYMBOL_REF
)
3702 return SYMBOL_REF_TLS_MODEL (op
) == kind
;
3706 global_dynamic_symbolic_operand (rtx op
,
3707 enum machine_mode mode ATTRIBUTE_UNUSED
)
3709 return tls_symbolic_operand_1 (op
, TLS_MODEL_GLOBAL_DYNAMIC
);
3713 local_dynamic_symbolic_operand (rtx op
,
3714 enum machine_mode mode ATTRIBUTE_UNUSED
)
3716 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_DYNAMIC
);
3720 initial_exec_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3722 return tls_symbolic_operand_1 (op
, TLS_MODEL_INITIAL_EXEC
);
3726 local_exec_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3728 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_EXEC
);
3731 /* Test for a valid operand for a call instruction. Don't allow the
3732 arg pointer register or virtual regs since they may decay into
3733 reg + const, which the patterns can't handle. */
3736 call_insn_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3738 /* Disallow indirect through a virtual register. This leads to
3739 compiler aborts when trying to eliminate them. */
3740 if (GET_CODE (op
) == REG
3741 && (op
== arg_pointer_rtx
3742 || op
== frame_pointer_rtx
3743 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
3744 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
3747 /* Disallow `call 1234'. Due to varying assembler lameness this
3748 gets either rejected or translated to `call .+1234'. */
3749 if (GET_CODE (op
) == CONST_INT
)
3752 /* Explicitly allow SYMBOL_REF even if pic. */
3753 if (GET_CODE (op
) == SYMBOL_REF
)
3756 /* Otherwise we can allow any general_operand in the address. */
3757 return general_operand (op
, Pmode
);
3760 /* Test for a valid operand for a call instruction. Don't allow the
3761 arg pointer register or virtual regs since they may decay into
3762 reg + const, which the patterns can't handle. */
3765 sibcall_insn_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3767 /* Disallow indirect through a virtual register. This leads to
3768 compiler aborts when trying to eliminate them. */
3769 if (GET_CODE (op
) == REG
3770 && (op
== arg_pointer_rtx
3771 || op
== frame_pointer_rtx
3772 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
3773 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
3776 /* Explicitly allow SYMBOL_REF even if pic. */
3777 if (GET_CODE (op
) == SYMBOL_REF
)
3780 /* Otherwise we can only allow register operands. */
3781 return register_operand (op
, Pmode
);
3785 constant_call_address_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3787 if (GET_CODE (op
) == CONST
3788 && GET_CODE (XEXP (op
, 0)) == PLUS
3789 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
3790 op
= XEXP (XEXP (op
, 0), 0);
3791 return GET_CODE (op
) == SYMBOL_REF
;
3794 /* Match exactly zero and one. */
3797 const0_operand (rtx op
, enum machine_mode mode
)
3799 return op
== CONST0_RTX (mode
);
3803 const1_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3805 return op
== const1_rtx
;
3808 /* Match 2, 4, or 8. Used for leal multiplicands. */
3811 const248_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3813 return (GET_CODE (op
) == CONST_INT
3814 && (INTVAL (op
) == 2 || INTVAL (op
) == 4 || INTVAL (op
) == 8));
3818 const_0_to_3_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3820 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 4);
3824 const_0_to_7_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3826 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 8);
3830 const_0_to_15_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3832 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 16);
3836 const_0_to_255_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3838 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 256);
3842 /* True if this is a constant appropriate for an increment or decrement. */
3845 incdec_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3847 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3848 registers, since carry flag is not set. */
3849 if ((TARGET_PENTIUM4
|| TARGET_NOCONA
) && !optimize_size
)
3851 return op
== const1_rtx
|| op
== constm1_rtx
;
3854 /* Return nonzero if OP is acceptable as operand of DImode shift
3858 shiftdi_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3861 return nonimmediate_operand (op
, mode
);
3863 return register_operand (op
, mode
);
3866 /* Return false if this is the stack pointer, or any other fake
3867 register eliminable to the stack pointer. Otherwise, this is
3870 This is used to prevent esp from being used as an index reg.
3871 Which would only happen in pathological cases. */
3874 reg_no_sp_operand (rtx op
, enum machine_mode mode
)
3877 if (GET_CODE (t
) == SUBREG
)
3879 if (t
== stack_pointer_rtx
|| t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
)
3882 return register_operand (op
, mode
);
3886 mmx_reg_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3888 return MMX_REG_P (op
);
3891 /* Return false if this is any eliminable register. Otherwise
3895 general_no_elim_operand (rtx op
, enum machine_mode mode
)
3898 if (GET_CODE (t
) == SUBREG
)
3900 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3901 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3902 || t
== virtual_stack_dynamic_rtx
)
3905 && REGNO (t
) >= FIRST_VIRTUAL_REGISTER
3906 && REGNO (t
) <= LAST_VIRTUAL_REGISTER
)
3909 return general_operand (op
, mode
);
3912 /* Return false if this is any eliminable register. Otherwise
3913 register_operand or const_int. */
3916 nonmemory_no_elim_operand (rtx op
, enum machine_mode mode
)
3919 if (GET_CODE (t
) == SUBREG
)
3921 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3922 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3923 || t
== virtual_stack_dynamic_rtx
)
3926 return GET_CODE (op
) == CONST_INT
|| register_operand (op
, mode
);
3929 /* Return false if this is any eliminable register or stack register,
3930 otherwise work like register_operand. */
3933 index_register_operand (rtx op
, enum machine_mode mode
)
3936 if (GET_CODE (t
) == SUBREG
)
3940 if (t
== arg_pointer_rtx
3941 || t
== frame_pointer_rtx
3942 || t
== virtual_incoming_args_rtx
3943 || t
== virtual_stack_vars_rtx
3944 || t
== virtual_stack_dynamic_rtx
3945 || REGNO (t
) == STACK_POINTER_REGNUM
)
3948 return general_operand (op
, mode
);
3951 /* Return true if op is a Q_REGS class register. */
3954 q_regs_operand (rtx op
, enum machine_mode mode
)
3956 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3958 if (GET_CODE (op
) == SUBREG
)
3959 op
= SUBREG_REG (op
);
3960 return ANY_QI_REG_P (op
);
3963 /* Return true if op is an flags register. */
3966 flags_reg_operand (rtx op
, enum machine_mode mode
)
3968 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3970 return REG_P (op
) && REGNO (op
) == FLAGS_REG
&& GET_MODE (op
) != VOIDmode
;
3973 /* Return true if op is a NON_Q_REGS class register. */
3976 non_q_regs_operand (rtx op
, enum machine_mode mode
)
3978 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3980 if (GET_CODE (op
) == SUBREG
)
3981 op
= SUBREG_REG (op
);
3982 return NON_QI_REG_P (op
);
3986 zero_extended_scalar_load_operand (rtx op
,
3987 enum machine_mode mode ATTRIBUTE_UNUSED
)
3990 if (GET_CODE (op
) != MEM
)
3992 op
= maybe_get_pool_constant (op
);
3995 if (GET_CODE (op
) != CONST_VECTOR
)
3998 (GET_MODE_SIZE (GET_MODE (op
)) /
3999 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op
))));
4000 for (n_elts
--; n_elts
> 0; n_elts
--)
4002 rtx elt
= CONST_VECTOR_ELT (op
, n_elts
);
4003 if (elt
!= CONST0_RTX (GET_MODE_INNER (GET_MODE (op
))))
4009 /* Return 1 when OP is operand acceptable for standard SSE move. */
4011 vector_move_operand (rtx op
, enum machine_mode mode
)
4013 if (nonimmediate_operand (op
, mode
))
4015 if (GET_MODE (op
) != mode
&& mode
!= VOIDmode
)
4017 return (op
== CONST0_RTX (GET_MODE (op
)));
4020 /* Return true if op if a valid address, and does not contain
4021 a segment override. */
4024 no_seg_address_operand (rtx op
, enum machine_mode mode
)
4026 struct ix86_address parts
;
4028 if (! address_operand (op
, mode
))
4031 if (! ix86_decompose_address (op
, &parts
))
4034 return parts
.seg
== SEG_DEFAULT
;
4037 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
4040 sse_comparison_operator (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
4042 enum rtx_code code
= GET_CODE (op
);
4045 /* Operations supported directly. */
4055 /* These are equivalent to ones above in non-IEEE comparisons. */
4062 return !TARGET_IEEE_FP
;
4067 /* Return 1 if OP is a valid comparison operator in valid mode. */
4069 ix86_comparison_operator (rtx op
, enum machine_mode mode
)
4071 enum machine_mode inmode
;
4072 enum rtx_code code
= GET_CODE (op
);
4073 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
4075 if (!COMPARISON_P (op
))
4077 inmode
= GET_MODE (XEXP (op
, 0));
4079 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
4081 enum rtx_code second_code
, bypass_code
;
4082 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
4083 return (bypass_code
== NIL
&& second_code
== NIL
);
4090 if (inmode
== CCmode
|| inmode
== CCGCmode
4091 || inmode
== CCGOCmode
|| inmode
== CCNOmode
)
4094 case LTU
: case GTU
: case LEU
: case ORDERED
: case UNORDERED
: case GEU
:
4095 if (inmode
== CCmode
)
4099 if (inmode
== CCmode
|| inmode
== CCGCmode
|| inmode
== CCNOmode
)
4107 /* Return 1 if OP is a valid comparison operator testing carry flag
4110 ix86_carry_flag_operator (rtx op
, enum machine_mode mode
)
4112 enum machine_mode inmode
;
4113 enum rtx_code code
= GET_CODE (op
);
4115 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
4117 if (!COMPARISON_P (op
))
4119 inmode
= GET_MODE (XEXP (op
, 0));
4120 if (GET_CODE (XEXP (op
, 0)) != REG
4121 || REGNO (XEXP (op
, 0)) != 17
4122 || XEXP (op
, 1) != const0_rtx
)
4125 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
4127 enum rtx_code second_code
, bypass_code
;
4129 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
4130 if (bypass_code
!= NIL
|| second_code
!= NIL
)
4132 code
= ix86_fp_compare_code_to_integer (code
);
4134 else if (inmode
!= CCmode
)
4139 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4142 fcmov_comparison_operator (rtx op
, enum machine_mode mode
)
4144 enum machine_mode inmode
;
4145 enum rtx_code code
= GET_CODE (op
);
4147 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
4149 if (!COMPARISON_P (op
))
4151 inmode
= GET_MODE (XEXP (op
, 0));
4152 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
4154 enum rtx_code second_code
, bypass_code
;
4156 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
4157 if (bypass_code
!= NIL
|| second_code
!= NIL
)
4159 code
= ix86_fp_compare_code_to_integer (code
);
4161 /* i387 supports just limited amount of conditional codes. */
4164 case LTU
: case GTU
: case LEU
: case GEU
:
4165 if (inmode
== CCmode
|| inmode
== CCFPmode
|| inmode
== CCFPUmode
)
4168 case ORDERED
: case UNORDERED
:
4176 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4179 promotable_binary_operator (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
4181 switch (GET_CODE (op
))
4184 /* Modern CPUs have same latency for HImode and SImode multiply,
4185 but 386 and 486 do HImode multiply faster. */
4186 return ix86_tune
> PROCESSOR_I486
;
4198 /* Nearly general operand, but accept any const_double, since we wish
4199 to be able to drop them into memory rather than have them get pulled
4203 cmp_fp_expander_operand (rtx op
, enum machine_mode mode
)
4205 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
4207 if (GET_CODE (op
) == CONST_DOUBLE
)
4209 return general_operand (op
, mode
);
4212 /* Match an SI or HImode register for a zero_extract. */
4215 ext_register_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
4218 if ((!TARGET_64BIT
|| GET_MODE (op
) != DImode
)
4219 && GET_MODE (op
) != SImode
&& GET_MODE (op
) != HImode
)
4222 if (!register_operand (op
, VOIDmode
))
4225 /* Be careful to accept only registers having upper parts. */
4226 regno
= REG_P (op
) ? REGNO (op
) : REGNO (SUBREG_REG (op
));
4227 return (regno
> LAST_VIRTUAL_REGISTER
|| regno
< 4);
4230 /* Return 1 if this is a valid binary floating-point operation.
4231 OP is the expression matched, and MODE is its mode. */
4234 binary_fp_operator (rtx op
, enum machine_mode mode
)
4236 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
4239 switch (GET_CODE (op
))
4245 return GET_MODE_CLASS (GET_MODE (op
)) == MODE_FLOAT
;
4253 mult_operator (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
4255 return GET_CODE (op
) == MULT
;
4259 div_operator (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
4261 return GET_CODE (op
) == DIV
;
4265 arith_or_logical_operator (rtx op
, enum machine_mode mode
)
4267 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
4268 && ARITHMETIC_P (op
));
4271 /* Returns 1 if OP is memory operand with a displacement. */
4274 memory_displacement_operand (rtx op
, enum machine_mode mode
)
4276 struct ix86_address parts
;
4278 if (! memory_operand (op
, mode
))
4281 if (! ix86_decompose_address (XEXP (op
, 0), &parts
))
4284 return parts
.disp
!= NULL_RTX
;
4287 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4288 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4290 ??? It seems likely that this will only work because cmpsi is an
4291 expander, and no actual insns use this. */
4294 cmpsi_operand (rtx op
, enum machine_mode mode
)
4296 if (nonimmediate_operand (op
, mode
))
4299 if (GET_CODE (op
) == AND
4300 && GET_MODE (op
) == SImode
4301 && GET_CODE (XEXP (op
, 0)) == ZERO_EXTRACT
4302 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
4303 && GET_CODE (XEXP (XEXP (op
, 0), 2)) == CONST_INT
4304 && INTVAL (XEXP (XEXP (op
, 0), 1)) == 8
4305 && INTVAL (XEXP (XEXP (op
, 0), 2)) == 8
4306 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
4312 /* Returns 1 if OP is memory operand that can not be represented by the
4316 long_memory_operand (rtx op
, enum machine_mode mode
)
4318 if (! memory_operand (op
, mode
))
4321 return memory_address_length (op
) != 0;
4324 /* Return nonzero if the rtx is known aligned. */
4327 aligned_operand (rtx op
, enum machine_mode mode
)
4329 struct ix86_address parts
;
4331 if (!general_operand (op
, mode
))
4334 /* Registers and immediate operands are always "aligned". */
4335 if (GET_CODE (op
) != MEM
)
4338 /* Don't even try to do any aligned optimizations with volatiles. */
4339 if (MEM_VOLATILE_P (op
))
4344 /* Pushes and pops are only valid on the stack pointer. */
4345 if (GET_CODE (op
) == PRE_DEC
4346 || GET_CODE (op
) == POST_INC
)
4349 /* Decode the address. */
4350 if (! ix86_decompose_address (op
, &parts
))
4353 /* Look for some component that isn't known to be aligned. */
4357 && REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 32)
4362 if (REGNO_POINTER_ALIGN (REGNO (parts
.base
)) < 32)
4367 if (GET_CODE (parts
.disp
) != CONST_INT
4368 || (INTVAL (parts
.disp
) & 3) != 0)
4372 /* Didn't find one -- this must be an aligned address. */
4376 /* Initialize the table of extra 80387 mathematical constants. */
4379 init_ext_80387_constants (void)
4381 static const char * cst
[5] =
4383 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4384 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4385 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4386 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4387 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4391 for (i
= 0; i
< 5; i
++)
4393 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
4394 /* Ensure each constant is rounded to XFmode precision. */
4395 real_convert (&ext_80387_constants_table
[i
],
4396 XFmode
, &ext_80387_constants_table
[i
]);
4399 ext_80387_constants_init
= 1;
4402 /* Return true if the constant is something that can be loaded with
4403 a special instruction. */
4406 standard_80387_constant_p (rtx x
)
4408 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
4411 if (x
== CONST0_RTX (GET_MODE (x
)))
4413 if (x
== CONST1_RTX (GET_MODE (x
)))
4416 /* For XFmode constants, try to find a special 80387 instruction when
4417 optimizing for size or on those CPUs that benefit from them. */
4418 if (GET_MODE (x
) == XFmode
4419 && (optimize_size
|| x86_ext_80387_constants
& TUNEMASK
))
4424 if (! ext_80387_constants_init
)
4425 init_ext_80387_constants ();
4427 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
4428 for (i
= 0; i
< 5; i
++)
4429 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
4436 /* Return the opcode of the special instruction to be used to load
4440 standard_80387_constant_opcode (rtx x
)
4442 switch (standard_80387_constant_p (x
))
4462 /* Return the CONST_DOUBLE representing the 80387 constant that is
4463 loaded by the specified special instruction. The argument IDX
4464 matches the return value from standard_80387_constant_p. */
4467 standard_80387_constant_rtx (int idx
)
4471 if (! ext_80387_constants_init
)
4472 init_ext_80387_constants ();
4488 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
4492 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4495 standard_sse_constant_p (rtx x
)
4497 if (x
== const0_rtx
)
4499 return (x
== CONST0_RTX (GET_MODE (x
)));
4502 /* Returns 1 if OP contains a symbol reference */
4505 symbolic_reference_mentioned_p (rtx op
)
4510 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
4513 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
4514 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
4520 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
4521 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
4525 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
4532 /* Return 1 if it is appropriate to emit `ret' instructions in the
4533 body of a function. Do this only if the epilogue is simple, needing a
4534 couple of insns. Prior to reloading, we can't tell how many registers
4535 must be saved, so return 0 then. Return 0 if there is no frame
4536 marker to de-allocate.
4538 If NON_SAVING_SETJMP is defined and true, then it is not possible
4539 for the epilogue to be simple, so return 0. This is a special case
4540 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4541 until final, but jump_optimize may need to know sooner if a
4545 ix86_can_use_return_insn_p (void)
4547 struct ix86_frame frame
;
4549 #ifdef NON_SAVING_SETJMP
4550 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
4554 if (! reload_completed
|| frame_pointer_needed
)
4557 /* Don't allow more than 32 pop, since that's all we can do
4558 with one instruction. */
4559 if (current_function_pops_args
4560 && current_function_args_size
>= 32768)
4563 ix86_compute_frame_layout (&frame
);
4564 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
4567 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4569 x86_64_sign_extended_value (rtx value
)
4571 switch (GET_CODE (value
))
4573 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4574 to be at least 32 and this all acceptable constants are
4575 represented as CONST_INT. */
4577 if (HOST_BITS_PER_WIDE_INT
== 32)
4581 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (value
), DImode
);
4582 return trunc_int_for_mode (val
, SImode
) == val
;
4586 /* For certain code models, the symbolic references are known to fit.
4587 in CM_SMALL_PIC model we know it fits if it is local to the shared
4588 library. Don't count TLS SYMBOL_REFs here, since they should fit
4589 only if inside of UNSPEC handled below. */
4591 /* TLS symbols are not constant. */
4592 if (tls_symbolic_operand (value
, Pmode
))
4594 return (ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_KERNEL
);
4596 /* For certain code models, the code is near as well. */
4598 return (ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
4599 || ix86_cmodel
== CM_KERNEL
);
4601 /* We also may accept the offsetted memory references in certain special
4604 if (GET_CODE (XEXP (value
, 0)) == UNSPEC
)
4605 switch (XINT (XEXP (value
, 0), 1))
4607 case UNSPEC_GOTPCREL
:
4609 case UNSPEC_GOTNTPOFF
:
4615 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
4617 rtx op1
= XEXP (XEXP (value
, 0), 0);
4618 rtx op2
= XEXP (XEXP (value
, 0), 1);
4619 HOST_WIDE_INT offset
;
4621 if (ix86_cmodel
== CM_LARGE
)
4623 if (GET_CODE (op2
) != CONST_INT
)
4625 offset
= trunc_int_for_mode (INTVAL (op2
), DImode
);
4626 switch (GET_CODE (op1
))
4629 /* For CM_SMALL assume that latest object is 16MB before
4630 end of 31bits boundary. We may also accept pretty
4631 large negative constants knowing that all objects are
4632 in the positive half of address space. */
4633 if (ix86_cmodel
== CM_SMALL
4634 && offset
< 16*1024*1024
4635 && trunc_int_for_mode (offset
, SImode
) == offset
)
4637 /* For CM_KERNEL we know that all object resist in the
4638 negative half of 32bits address space. We may not
4639 accept negative offsets, since they may be just off
4640 and we may accept pretty large positive ones. */
4641 if (ix86_cmodel
== CM_KERNEL
4643 && trunc_int_for_mode (offset
, SImode
) == offset
)
4647 /* These conditions are similar to SYMBOL_REF ones, just the
4648 constraints for code models differ. */
4649 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
4650 && offset
< 16*1024*1024
4651 && trunc_int_for_mode (offset
, SImode
) == offset
)
4653 if (ix86_cmodel
== CM_KERNEL
4655 && trunc_int_for_mode (offset
, SImode
) == offset
)
4659 switch (XINT (op1
, 1))
4664 && trunc_int_for_mode (offset
, SImode
) == offset
)
4678 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4680 x86_64_zero_extended_value (rtx value
)
4682 switch (GET_CODE (value
))
4685 if (HOST_BITS_PER_WIDE_INT
== 32)
4686 return (GET_MODE (value
) == VOIDmode
4687 && !CONST_DOUBLE_HIGH (value
));
4691 if (HOST_BITS_PER_WIDE_INT
== 32)
4692 return INTVAL (value
) >= 0;
4694 return !(INTVAL (value
) & ~(HOST_WIDE_INT
) 0xffffffff);
4697 /* For certain code models, the symbolic references are known to fit. */
4699 /* TLS symbols are not constant. */
4700 if (tls_symbolic_operand (value
, Pmode
))
4702 return ix86_cmodel
== CM_SMALL
;
4704 /* For certain code models, the code is near as well. */
4706 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
;
4708 /* We also may accept the offsetted memory references in certain special
4711 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
4713 rtx op1
= XEXP (XEXP (value
, 0), 0);
4714 rtx op2
= XEXP (XEXP (value
, 0), 1);
4716 if (ix86_cmodel
== CM_LARGE
)
4718 switch (GET_CODE (op1
))
4722 /* For small code model we may accept pretty large positive
4723 offsets, since one bit is available for free. Negative
4724 offsets are limited by the size of NULL pointer area
4725 specified by the ABI. */
4726 if (ix86_cmodel
== CM_SMALL
4727 && GET_CODE (op2
) == CONST_INT
4728 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
4729 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
4732 /* ??? For the kernel, we may accept adjustment of
4733 -0x10000000, since we know that it will just convert
4734 negative address space to positive, but perhaps this
4735 is not worthwhile. */
4738 /* These conditions are similar to SYMBOL_REF ones, just the
4739 constraints for code models differ. */
4740 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
4741 && GET_CODE (op2
) == CONST_INT
4742 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
4743 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
4757 /* Value should be nonzero if functions must have frame pointers.
4758 Zero means the frame pointer need not be set up (and parms may
4759 be accessed via the stack pointer) in functions that seem suitable. */
4762 ix86_frame_pointer_required (void)
4764 /* If we accessed previous frames, then the generated code expects
4765 to be able to access the saved ebp value in our frame. */
4766 if (cfun
->machine
->accesses_prev_frame
)
4769 /* Several x86 os'es need a frame pointer for other reasons,
4770 usually pertaining to setjmp. */
4771 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
4774 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4775 the frame pointer by default. Turn it back on now if we've not
4776 got a leaf function. */
4777 if (TARGET_OMIT_LEAF_FRAME_POINTER
4778 && (!current_function_is_leaf
))
4781 if (current_function_profile
)
4787 /* Record that the current function accesses previous call frames. */
4790 ix86_setup_frame_addresses (void)
4792 cfun
->machine
->accesses_prev_frame
= 1;
4795 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4796 # define USE_HIDDEN_LINKONCE 1
4798 # define USE_HIDDEN_LINKONCE 0
4801 static int pic_labels_used
;
4803 /* Fills in the label name that should be used for a pc thunk for
4804 the given register. */
4807 get_pc_thunk_name (char name
[32], unsigned int regno
)
4809 if (USE_HIDDEN_LINKONCE
)
4810 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
4812 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
4816 /* This function generates code for -fpic that loads %ebx with
4817 the return address of the caller and then returns. */
4820 ix86_file_end (void)
4825 for (regno
= 0; regno
< 8; ++regno
)
4829 if (! ((pic_labels_used
>> regno
) & 1))
4832 get_pc_thunk_name (name
, regno
);
4834 if (USE_HIDDEN_LINKONCE
)
4838 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
4840 TREE_PUBLIC (decl
) = 1;
4841 TREE_STATIC (decl
) = 1;
4842 DECL_ONE_ONLY (decl
) = 1;
4844 (*targetm
.asm_out
.unique_section
) (decl
, 0);
4845 named_section (decl
, NULL
, 0);
4847 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
4848 fputs ("\t.hidden\t", asm_out_file
);
4849 assemble_name (asm_out_file
, name
);
4850 fputc ('\n', asm_out_file
);
4851 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
4856 ASM_OUTPUT_LABEL (asm_out_file
, name
);
4859 xops
[0] = gen_rtx_REG (SImode
, regno
);
4860 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
4861 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
4862 output_asm_insn ("ret", xops
);
4865 if (NEED_INDICATE_EXEC_STACK
)
4866 file_end_indicate_exec_stack ();
4869 /* Emit code for the SET_GOT patterns. */
4872 output_set_got (rtx dest
)
4877 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
4879 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
4881 xops
[2] = gen_rtx_LABEL_REF (Pmode
, gen_label_rtx ());
4884 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
4886 output_asm_insn ("call\t%a2", xops
);
4889 /* Output the "canonical" label name ("Lxx$pb") here too. This
4890 is what will be referred to by the Mach-O PIC subsystem. */
4891 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
4893 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
4894 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
4897 output_asm_insn ("pop{l}\t%0", xops
);
4902 get_pc_thunk_name (name
, REGNO (dest
));
4903 pic_labels_used
|= 1 << REGNO (dest
);
4905 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
4906 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
4907 output_asm_insn ("call\t%X2", xops
);
4910 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
4911 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
4912 else if (!TARGET_MACHO
)
4913 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops
);
4918 /* Generate an "push" pattern for input ARG. */
4923 return gen_rtx_SET (VOIDmode
,
4925 gen_rtx_PRE_DEC (Pmode
,
4926 stack_pointer_rtx
)),
4930 /* Return >= 0 if there is an unused call-clobbered register available
4931 for the entire function. */
4934 ix86_select_alt_pic_regnum (void)
4936 if (current_function_is_leaf
&& !current_function_profile
)
4939 for (i
= 2; i
>= 0; --i
)
4940 if (!regs_ever_live
[i
])
4944 return INVALID_REGNUM
;
4947 /* Return 1 if we need to save REGNO. */
4949 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
4951 if (pic_offset_table_rtx
4952 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
4953 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4954 || current_function_profile
4955 || current_function_calls_eh_return
4956 || current_function_uses_const_pool
))
4958 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
4963 if (current_function_calls_eh_return
&& maybe_eh_return
)
4968 unsigned test
= EH_RETURN_DATA_REGNO (i
);
4969 if (test
== INVALID_REGNUM
)
4976 return (regs_ever_live
[regno
]
4977 && !call_used_regs
[regno
]
4978 && !fixed_regs
[regno
]
4979 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
4982 /* Return number of registers to be saved on the stack. */
4985 ix86_nsaved_regs (void)
4990 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4991 if (ix86_save_reg (regno
, true))
4996 /* Return the offset between two registers, one to be eliminated, and the other
4997 its replacement, at the start of a routine. */
5000 ix86_initial_elimination_offset (int from
, int to
)
5002 struct ix86_frame frame
;
5003 ix86_compute_frame_layout (&frame
);
5005 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5006 return frame
.hard_frame_pointer_offset
;
5007 else if (from
== FRAME_POINTER_REGNUM
5008 && to
== HARD_FRAME_POINTER_REGNUM
)
5009 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5012 if (to
!= STACK_POINTER_REGNUM
)
5014 else if (from
== ARG_POINTER_REGNUM
)
5015 return frame
.stack_pointer_offset
;
5016 else if (from
!= FRAME_POINTER_REGNUM
)
5019 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5023 /* Fill structure ix86_frame about frame of currently computed function. */
5026 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5028 HOST_WIDE_INT total_size
;
5029 int stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5030 HOST_WIDE_INT offset
;
5031 int preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5032 HOST_WIDE_INT size
= get_frame_size ();
5034 frame
->nregs
= ix86_nsaved_regs ();
5037 /* During reload iteration the amount of registers saved can change.
5038 Recompute the value as needed. Do not recompute when amount of registers
5039 didn't change as reload does mutiple calls to the function and does not
5040 expect the decision to change within single iteration. */
5042 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5044 int count
= frame
->nregs
;
5046 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5047 /* The fast prologue uses move instead of push to save registers. This
5048 is significantly longer, but also executes faster as modern hardware
5049 can execute the moves in parallel, but can't do that for push/pop.
5051 Be careful about choosing what prologue to emit: When function takes
5052 many instructions to execute we may use slow version as well as in
5053 case function is known to be outside hot spot (this is known with
5054 feedback only). Weight the size of function by number of registers
5055 to save as it is cheap to use one or two push instructions but very
5056 slow to use many of them. */
5058 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5059 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5060 || (flag_branch_probabilities
5061 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5062 cfun
->machine
->use_fast_prologue_epilogue
= false;
5064 cfun
->machine
->use_fast_prologue_epilogue
5065 = !expensive_function_p (count
);
5067 if (TARGET_PROLOGUE_USING_MOVE
5068 && cfun
->machine
->use_fast_prologue_epilogue
)
5069 frame
->save_regs_using_mov
= true;
5071 frame
->save_regs_using_mov
= false;
5074 /* Skip return address and saved base pointer. */
5075 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5077 frame
->hard_frame_pointer_offset
= offset
;
5079 /* Do some sanity checking of stack_alignment_needed and
5080 preferred_alignment, since i386 port is the only using those features
5081 that may break easily. */
5083 if (size
&& !stack_alignment_needed
)
5085 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5087 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
5089 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
5092 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5093 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5095 /* Register save area */
5096 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5099 if (ix86_save_varrargs_registers
)
5101 offset
+= X86_64_VARARGS_SIZE
;
5102 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5105 frame
->va_arg_size
= 0;
5107 /* Align start of frame for local function. */
5108 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5109 & -stack_alignment_needed
) - offset
;
5111 offset
+= frame
->padding1
;
5113 /* Frame pointer points here. */
5114 frame
->frame_pointer_offset
= offset
;
5118 /* Add outgoing arguments area. Can be skipped if we eliminated
5119 all the function calls as dead code.
5120 Skipping is however impossible when function calls alloca. Alloca
5121 expander assumes that last current_function_outgoing_args_size
5122 of stack frame are unused. */
5123 if (ACCUMULATE_OUTGOING_ARGS
5124 && (!current_function_is_leaf
|| current_function_calls_alloca
))
5126 offset
+= current_function_outgoing_args_size
;
5127 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5130 frame
->outgoing_arguments_size
= 0;
5132 /* Align stack boundary. Only needed if we're calling another function
5134 if (!current_function_is_leaf
|| current_function_calls_alloca
)
5135 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5136 & -preferred_alignment
) - offset
;
5138 frame
->padding2
= 0;
5140 offset
+= frame
->padding2
;
5142 /* We've reached end of stack frame. */
5143 frame
->stack_pointer_offset
= offset
;
5145 /* Size prologue needs to allocate. */
5146 frame
->to_allocate
=
5147 (size
+ frame
->padding1
+ frame
->padding2
5148 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5150 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5151 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5152 frame
->save_regs_using_mov
= false;
5154 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5155 && current_function_is_leaf
)
5157 frame
->red_zone_size
= frame
->to_allocate
;
5158 if (frame
->save_regs_using_mov
)
5159 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5160 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5161 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5164 frame
->red_zone_size
= 0;
5165 frame
->to_allocate
-= frame
->red_zone_size
;
5166 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5168 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
5169 fprintf (stderr
, "size: %i\n", size
);
5170 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
5171 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
5172 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
5173 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
5174 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
5175 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
5176 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
5177 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
5178 frame
->hard_frame_pointer_offset
);
5179 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
5183 /* Emit code to save registers in the prologue. */
5186 ix86_emit_save_regs (void)
5191 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5192 if (ix86_save_reg (regno
, true))
5194 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5195 RTX_FRAME_RELATED_P (insn
) = 1;
5199 /* Emit code to save registers using MOV insns. First register
5200 is restored from POINTER + OFFSET. */
5202 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5207 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5208 if (ix86_save_reg (regno
, true))
5210 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5212 gen_rtx_REG (Pmode
, regno
));
5213 RTX_FRAME_RELATED_P (insn
) = 1;
5214 offset
+= UNITS_PER_WORD
;
5218 /* Expand prologue or epilogue stack adjustment.
5219 The pattern exist to put a dependency on all ebp-based memory accesses.
5220 STYLE should be negative if instructions should be marked as frame related,
5221 zero if %r11 register is live and cannot be freely used and positive
5225 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5230 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5231 else if (x86_64_immediate_operand (offset
, DImode
))
5232 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5236 /* r11 is used by indirect sibcall return as well, set before the
5237 epilogue and used after the epilogue. ATM indirect sibcall
5238 shouldn't be used together with huge frame sizes in one
5239 function because of the frame_size check in sibcall.c. */
5242 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
5243 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5245 RTX_FRAME_RELATED_P (insn
) = 1;
5246 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5250 RTX_FRAME_RELATED_P (insn
) = 1;
5253 /* Expand the prologue into a bunch of separate insns. */
5256 ix86_expand_prologue (void)
5260 struct ix86_frame frame
;
5261 HOST_WIDE_INT allocate
;
5263 ix86_compute_frame_layout (&frame
);
5265 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5266 slower on all targets. Also sdb doesn't like it. */
5268 if (frame_pointer_needed
)
5270 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
5271 RTX_FRAME_RELATED_P (insn
) = 1;
5273 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
5274 RTX_FRAME_RELATED_P (insn
) = 1;
5277 allocate
= frame
.to_allocate
;
5279 if (!frame
.save_regs_using_mov
)
5280 ix86_emit_save_regs ();
5282 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
5284 /* When using red zone we may start register saving before allocating
5285 the stack frame saving one cycle of the prologue. */
5286 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
5287 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
5288 : stack_pointer_rtx
,
5289 -frame
.nregs
* UNITS_PER_WORD
);
5293 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
5294 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5295 GEN_INT (-allocate
), -1);
5298 /* Only valid for Win32. */
5299 rtx eax
= gen_rtx_REG (SImode
, 0);
5300 bool eax_live
= ix86_eax_live_at_start_p ();
5307 emit_insn (gen_push (eax
));
5311 insn
= emit_move_insn (eax
, GEN_INT (allocate
));
5312 RTX_FRAME_RELATED_P (insn
) = 1;
5314 insn
= emit_insn (gen_allocate_stack_worker (eax
));
5315 RTX_FRAME_RELATED_P (insn
) = 1;
5319 rtx t
= plus_constant (stack_pointer_rtx
, allocate
);
5320 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
5324 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
5326 if (!frame_pointer_needed
|| !frame
.to_allocate
)
5327 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
5329 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
5330 -frame
.nregs
* UNITS_PER_WORD
);
5333 pic_reg_used
= false;
5334 if (pic_offset_table_rtx
5335 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5336 || current_function_profile
))
5338 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
5340 if (alt_pic_reg_used
!= INVALID_REGNUM
)
5341 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
5343 pic_reg_used
= true;
5348 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
5350 /* Even with accurate pre-reload life analysis, we can wind up
5351 deleting all references to the pic register after reload.
5352 Consider if cross-jumping unifies two sides of a branch
5353 controlled by a comparison vs the only read from a global.
5354 In which case, allow the set_got to be deleted, though we're
5355 too late to do anything about the ebx save in the prologue. */
5356 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
5359 /* Prevent function calls from be scheduled before the call to mcount.
5360 In the pic_reg_used case, make sure that the got load isn't deleted. */
5361 if (current_function_profile
)
5362 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
5365 /* Emit code to restore saved registers using MOV insns. First register
5366 is restored from POINTER + OFFSET. */
5368 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
5369 int maybe_eh_return
)
5372 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
5374 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5375 if (ix86_save_reg (regno
, maybe_eh_return
))
5377 /* Ensure that adjust_address won't be forced to produce pointer
5378 out of range allowed by x86-64 instruction set. */
5379 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
5383 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
5384 emit_move_insn (r11
, GEN_INT (offset
));
5385 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
5386 base_address
= gen_rtx_MEM (Pmode
, r11
);
5389 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
5390 adjust_address (base_address
, Pmode
, offset
));
5391 offset
+= UNITS_PER_WORD
;
5395 /* Restore function stack, frame, and registers. */
5398 ix86_expand_epilogue (int style
)
5401 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
5402 struct ix86_frame frame
;
5403 HOST_WIDE_INT offset
;
5405 ix86_compute_frame_layout (&frame
);
5407 /* Calculate start of saved registers relative to ebp. Special care
5408 must be taken for the normal return case of a function using
5409 eh_return: the eax and edx registers are marked as saved, but not
5410 restored along this path. */
5411 offset
= frame
.nregs
;
5412 if (current_function_calls_eh_return
&& style
!= 2)
5414 offset
*= -UNITS_PER_WORD
;
5416 /* If we're only restoring one register and sp is not valid then
5417 using a move instruction to restore the register since it's
5418 less work than reloading sp and popping the register.
5420 The default code result in stack adjustment using add/lea instruction,
5421 while this code results in LEAVE instruction (or discrete equivalent),
5422 so it is profitable in some other cases as well. Especially when there
5423 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5424 and there is exactly one register to pop. This heuristic may need some
5425 tuning in future. */
5426 if ((!sp_valid
&& frame
.nregs
<= 1)
5427 || (TARGET_EPILOGUE_USING_MOVE
5428 && cfun
->machine
->use_fast_prologue_epilogue
5429 && (frame
.nregs
> 1 || frame
.to_allocate
))
5430 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
5431 || (frame_pointer_needed
&& TARGET_USE_LEAVE
5432 && cfun
->machine
->use_fast_prologue_epilogue
5433 && frame
.nregs
== 1)
5434 || current_function_calls_eh_return
)
5436 /* Restore registers. We can use ebp or esp to address the memory
5437 locations. If both are available, default to ebp, since offsets
5438 are known to be small. Only exception is esp pointing directly to the
5439 end of block of saved registers, where we may simplify addressing
5442 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
5443 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
5444 frame
.to_allocate
, style
== 2);
5446 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
5447 offset
, style
== 2);
5449 /* eh_return epilogues need %ecx added to the stack pointer. */
5452 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
5454 if (frame_pointer_needed
)
5456 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
5457 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
5458 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
5460 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
5461 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
5463 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
5468 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
5469 tmp
= plus_constant (tmp
, (frame
.to_allocate
5470 + frame
.nregs
* UNITS_PER_WORD
));
5471 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
5474 else if (!frame_pointer_needed
)
5475 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5476 GEN_INT (frame
.to_allocate
5477 + frame
.nregs
* UNITS_PER_WORD
),
5479 /* If not an i386, mov & pop is faster than "leave". */
5480 else if (TARGET_USE_LEAVE
|| optimize_size
5481 || !cfun
->machine
->use_fast_prologue_epilogue
)
5482 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5485 pro_epilogue_adjust_stack (stack_pointer_rtx
,
5486 hard_frame_pointer_rtx
,
5489 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5491 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5496 /* First step is to deallocate the stack frame so that we can
5497 pop the registers. */
5500 if (!frame_pointer_needed
)
5502 pro_epilogue_adjust_stack (stack_pointer_rtx
,
5503 hard_frame_pointer_rtx
,
5504 GEN_INT (offset
), style
);
5506 else if (frame
.to_allocate
)
5507 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5508 GEN_INT (frame
.to_allocate
), style
);
5510 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5511 if (ix86_save_reg (regno
, false))
5514 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
5516 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
5518 if (frame_pointer_needed
)
5520 /* Leave results in shorter dependency chains on CPUs that are
5521 able to grok it fast. */
5522 if (TARGET_USE_LEAVE
)
5523 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5524 else if (TARGET_64BIT
)
5525 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5527 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5531 /* Sibcall epilogues don't want a return instruction. */
5535 if (current_function_pops_args
&& current_function_args_size
)
5537 rtx popc
= GEN_INT (current_function_pops_args
);
5539 /* i386 can only pop 64K bytes. If asked to pop more, pop
5540 return address, do explicit add, and jump indirectly to the
5543 if (current_function_pops_args
>= 65536)
5545 rtx ecx
= gen_rtx_REG (SImode
, 2);
5547 /* There is no "pascal" calling convention in 64bit ABI. */
5551 emit_insn (gen_popsi1 (ecx
));
5552 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
5553 emit_jump_insn (gen_return_indirect_internal (ecx
));
5556 emit_jump_insn (gen_return_pop_internal (popc
));
5559 emit_jump_insn (gen_return_internal ());
5562 /* Reset from the function's potential modifications. */
5565 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
5566 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
5568 if (pic_offset_table_rtx
)
5569 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
5572 /* Extract the parts of an RTL expression that is a valid memory address
5573 for an instruction. Return 0 if the structure of the address is
5574 grossly off. Return -1 if the address contains ASHIFT, so it is not
5575 strictly valid, but still used for computing length of lea instruction. */
5578 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
5580 rtx base
= NULL_RTX
;
5581 rtx index
= NULL_RTX
;
5582 rtx disp
= NULL_RTX
;
5583 HOST_WIDE_INT scale
= 1;
5584 rtx scale_rtx
= NULL_RTX
;
5586 enum ix86_address_seg seg
= SEG_DEFAULT
;
5588 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
5590 else if (GET_CODE (addr
) == PLUS
)
5600 addends
[n
++] = XEXP (op
, 1);
5603 while (GET_CODE (op
) == PLUS
);
5608 for (i
= n
; i
>= 0; --i
)
5611 switch (GET_CODE (op
))
5616 index
= XEXP (op
, 0);
5617 scale_rtx
= XEXP (op
, 1);
5621 if (XINT (op
, 1) == UNSPEC_TP
5622 && TARGET_TLS_DIRECT_SEG_REFS
5623 && seg
== SEG_DEFAULT
)
5624 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
5653 else if (GET_CODE (addr
) == MULT
)
5655 index
= XEXP (addr
, 0); /* index*scale */
5656 scale_rtx
= XEXP (addr
, 1);
5658 else if (GET_CODE (addr
) == ASHIFT
)
5662 /* We're called for lea too, which implements ashift on occasion. */
5663 index
= XEXP (addr
, 0);
5664 tmp
= XEXP (addr
, 1);
5665 if (GET_CODE (tmp
) != CONST_INT
)
5667 scale
= INTVAL (tmp
);
5668 if ((unsigned HOST_WIDE_INT
) scale
> 3)
5674 disp
= addr
; /* displacement */
5676 /* Extract the integral value of scale. */
5679 if (GET_CODE (scale_rtx
) != CONST_INT
)
5681 scale
= INTVAL (scale_rtx
);
5684 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5685 if (base
&& index
&& scale
== 1
5686 && (index
== arg_pointer_rtx
5687 || index
== frame_pointer_rtx
5688 || (REG_P (index
) && REGNO (index
) == STACK_POINTER_REGNUM
)))
5695 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5696 if ((base
== hard_frame_pointer_rtx
5697 || base
== frame_pointer_rtx
5698 || base
== arg_pointer_rtx
) && !disp
)
5701 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5702 Avoid this by transforming to [%esi+0]. */
5703 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
5704 && base
&& !index
&& !disp
5706 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
5709 /* Special case: encode reg+reg instead of reg*2. */
5710 if (!base
&& index
&& scale
&& scale
== 2)
5711 base
= index
, scale
= 1;
5713 /* Special case: scaling cannot be encoded without base or displacement. */
5714 if (!base
&& !disp
&& index
&& scale
!= 1)
5726 /* Return cost of the memory address x.
5727 For i386, it is better to use a complex address than let gcc copy
5728 the address into a reg and make a new pseudo. But not if the address
5729 requires to two regs - that would mean more pseudos with longer
5732 ix86_address_cost (rtx x
)
5734 struct ix86_address parts
;
5737 if (!ix86_decompose_address (x
, &parts
))
5740 /* More complex memory references are better. */
5741 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
5743 if (parts
.seg
!= SEG_DEFAULT
)
5746 /* Attempt to minimize number of registers in the address. */
5748 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
5750 && (!REG_P (parts
.index
)
5751 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
5755 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
5757 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
5758 && parts
.base
!= parts
.index
)
5761 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5762 since it's predecode logic can't detect the length of instructions
5763 and it degenerates to vector decoded. Increase cost of such
5764 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5765 to split such addresses or even refuse such addresses at all.
5767 Following addressing modes are affected:
5772 The first and last case may be avoidable by explicitly coding the zero in
5773 memory address, but I don't have AMD-K6 machine handy to check this
5777 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
5778 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
5779 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
5785 /* If X is a machine specific address (i.e. a symbol or label being
5786 referenced as a displacement from the GOT implemented using an
5787 UNSPEC), then return the base term. Otherwise return X. */
5790 ix86_find_base_term (rtx x
)
5796 if (GET_CODE (x
) != CONST
)
5799 if (GET_CODE (term
) == PLUS
5800 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
5801 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
5802 term
= XEXP (term
, 0);
5803 if (GET_CODE (term
) != UNSPEC
5804 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
5807 term
= XVECEXP (term
, 0, 0);
5809 if (GET_CODE (term
) != SYMBOL_REF
5810 && GET_CODE (term
) != LABEL_REF
)
5816 term
= ix86_delegitimize_address (x
);
5818 if (GET_CODE (term
) != SYMBOL_REF
5819 && GET_CODE (term
) != LABEL_REF
)
5825 /* Determine if a given RTX is a valid constant. We already know this
5826 satisfies CONSTANT_P. */
5829 legitimate_constant_p (rtx x
)
5833 switch (GET_CODE (x
))
5836 /* TLS symbols are not constant. */
5837 if (tls_symbolic_operand (x
, Pmode
))
5842 inner
= XEXP (x
, 0);
5844 /* Offsets of TLS symbols are never valid.
5845 Discourage CSE from creating them. */
5846 if (GET_CODE (inner
) == PLUS
5847 && tls_symbolic_operand (XEXP (inner
, 0), Pmode
))
5850 if (GET_CODE (inner
) == PLUS
5851 || GET_CODE (inner
) == MINUS
)
5853 if (GET_CODE (XEXP (inner
, 1)) != CONST_INT
)
5855 inner
= XEXP (inner
, 0);
5858 /* Only some unspecs are valid as "constants". */
5859 if (GET_CODE (inner
) == UNSPEC
)
5860 switch (XINT (inner
, 1))
5864 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
5866 return local_dynamic_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
5876 /* Otherwise we handle everything else in the move patterns. */
5880 /* Determine if it's legal to put X into the constant pool. This
5881 is not possible for the address of thread-local symbols, which
5882 is checked above. */
5885 ix86_cannot_force_const_mem (rtx x
)
5887 return !legitimate_constant_p (x
);
5890 /* Determine if a given RTX is a valid constant address. */
5893 constant_address_p (rtx x
)
5895 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
5898 /* Nonzero if the constant value X is a legitimate general operand
5899 when generating PIC code. It is given that flag_pic is on and
5900 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5903 legitimate_pic_operand_p (rtx x
)
5907 switch (GET_CODE (x
))
5910 inner
= XEXP (x
, 0);
5912 /* Only some unspecs are valid as "constants". */
5913 if (GET_CODE (inner
) == UNSPEC
)
5914 switch (XINT (inner
, 1))
5917 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
5925 return legitimate_pic_address_disp_p (x
);
5932 /* Determine if a given CONST RTX is a valid memory displacement
5936 legitimate_pic_address_disp_p (rtx disp
)
5940 /* In 64bit mode we can allow direct addresses of symbols and labels
5941 when they are not dynamic symbols. */
5944 /* TLS references should always be enclosed in UNSPEC. */
5945 if (tls_symbolic_operand (disp
, GET_MODE (disp
)))
5947 if (GET_CODE (disp
) == SYMBOL_REF
5948 && ix86_cmodel
== CM_SMALL_PIC
5949 && SYMBOL_REF_LOCAL_P (disp
))
5951 if (GET_CODE (disp
) == LABEL_REF
)
5953 if (GET_CODE (disp
) == CONST
5954 && GET_CODE (XEXP (disp
, 0)) == PLUS
)
5956 rtx op0
= XEXP (XEXP (disp
, 0), 0);
5957 rtx op1
= XEXP (XEXP (disp
, 0), 1);
5959 /* TLS references should always be enclosed in UNSPEC. */
5960 if (tls_symbolic_operand (op0
, GET_MODE (op0
)))
5962 if (((GET_CODE (op0
) == SYMBOL_REF
5963 && ix86_cmodel
== CM_SMALL_PIC
5964 && SYMBOL_REF_LOCAL_P (op0
))
5965 || GET_CODE (op0
) == LABEL_REF
)
5966 && GET_CODE (op1
) == CONST_INT
5967 && INTVAL (op1
) < 16*1024*1024
5968 && INTVAL (op1
) >= -16*1024*1024)
5972 if (GET_CODE (disp
) != CONST
)
5974 disp
= XEXP (disp
, 0);
5978 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5979 of GOT tables. We should not need these anyway. */
5980 if (GET_CODE (disp
) != UNSPEC
5981 || XINT (disp
, 1) != UNSPEC_GOTPCREL
)
5984 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
5985 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
5991 if (GET_CODE (disp
) == PLUS
)
5993 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
5995 disp
= XEXP (disp
, 0);
5999 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
6000 if (TARGET_MACHO
&& GET_CODE (disp
) == MINUS
)
6002 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6003 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6004 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6006 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6007 if (! strcmp (sym_name
, "<pic base>"))
6012 if (GET_CODE (disp
) != UNSPEC
)
6015 switch (XINT (disp
, 1))
6020 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
6022 if (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6023 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
6024 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
6026 case UNSPEC_GOTTPOFF
:
6027 case UNSPEC_GOTNTPOFF
:
6028 case UNSPEC_INDNTPOFF
:
6031 return initial_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
6033 return local_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
6035 return local_dynamic_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
6041 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6042 memory address for an instruction. The MODE argument is the machine mode
6043 for the MEM expression that wants to use this address.
6045 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6046 convert common non-canonical forms to canonical form so that they will
6050 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
6052 struct ix86_address parts
;
6053 rtx base
, index
, disp
;
6054 HOST_WIDE_INT scale
;
6055 const char *reason
= NULL
;
6056 rtx reason_rtx
= NULL_RTX
;
6058 if (TARGET_DEBUG_ADDR
)
6061 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6062 GET_MODE_NAME (mode
), strict
);
6066 if (ix86_decompose_address (addr
, &parts
) <= 0)
6068 reason
= "decomposition failed";
6073 index
= parts
.index
;
6075 scale
= parts
.scale
;
6077 /* Validate base register.
6079 Don't allow SUBREG's here, it can lead to spill failures when the base
6080 is one word out of a two word structure, which is represented internally
6087 if (GET_CODE (base
) != REG
)
6089 reason
= "base is not a register";
6093 if (GET_MODE (base
) != Pmode
)
6095 reason
= "base is not in Pmode";
6099 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (base
))
6100 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (base
)))
6102 reason
= "base is not valid";
6107 /* Validate index register.
6109 Don't allow SUBREG's here, it can lead to spill failures when the index
6110 is one word out of a two word structure, which is represented internally
6117 if (GET_CODE (index
) != REG
)
6119 reason
= "index is not a register";
6123 if (GET_MODE (index
) != Pmode
)
6125 reason
= "index is not in Pmode";
6129 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (index
))
6130 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (index
)))
6132 reason
= "index is not valid";
6137 /* Validate scale factor. */
6140 reason_rtx
= GEN_INT (scale
);
6143 reason
= "scale without index";
6147 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
6149 reason
= "scale is not a valid multiplier";
6154 /* Validate displacement. */
6159 if (GET_CODE (disp
) == CONST
6160 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
6161 switch (XINT (XEXP (disp
, 0), 1))
6165 case UNSPEC_GOTPCREL
:
6168 goto is_legitimate_pic
;
6170 case UNSPEC_GOTTPOFF
:
6171 case UNSPEC_GOTNTPOFF
:
6172 case UNSPEC_INDNTPOFF
:
6178 reason
= "invalid address unspec";
6182 else if (flag_pic
&& (SYMBOLIC_CONST (disp
)
6184 && !machopic_operand_p (disp
)
6189 if (TARGET_64BIT
&& (index
|| base
))
6191 /* foo@dtpoff(%rX) is ok. */
6192 if (GET_CODE (disp
) != CONST
6193 || GET_CODE (XEXP (disp
, 0)) != PLUS
6194 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
6195 || GET_CODE (XEXP (XEXP (disp
, 0), 1)) != CONST_INT
6196 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
6197 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
6199 reason
= "non-constant pic memory reference";
6203 else if (! legitimate_pic_address_disp_p (disp
))
6205 reason
= "displacement is an invalid pic construct";
6209 /* This code used to verify that a symbolic pic displacement
6210 includes the pic_offset_table_rtx register.
6212 While this is good idea, unfortunately these constructs may
6213 be created by "adds using lea" optimization for incorrect
6222 This code is nonsensical, but results in addressing
6223 GOT table with pic_offset_table_rtx base. We can't
6224 just refuse it easily, since it gets matched by
6225 "addsi3" pattern, that later gets split to lea in the
6226 case output register differs from input. While this
6227 can be handled by separate addsi pattern for this case
6228 that never results in lea, this seems to be easier and
6229 correct fix for crash to disable this test. */
6231 else if (GET_CODE (disp
) != LABEL_REF
6232 && GET_CODE (disp
) != CONST_INT
6233 && (GET_CODE (disp
) != CONST
6234 || !legitimate_constant_p (disp
))
6235 && (GET_CODE (disp
) != SYMBOL_REF
6236 || !legitimate_constant_p (disp
)))
6238 reason
= "displacement is not constant";
6241 else if (TARGET_64BIT
&& !x86_64_sign_extended_value (disp
))
6243 reason
= "displacement is out of range";
6248 /* Everything looks valid. */
6249 if (TARGET_DEBUG_ADDR
)
6250 fprintf (stderr
, "Success.\n");
6254 if (TARGET_DEBUG_ADDR
)
6256 fprintf (stderr
, "Error: %s\n", reason
);
6257 debug_rtx (reason_rtx
);
6262 /* Return an unique alias set for the GOT. */
6264 static HOST_WIDE_INT
6265 ix86_GOT_alias_set (void)
6267 static HOST_WIDE_INT set
= -1;
6269 set
= new_alias_set ();
6273 /* Return a legitimate reference for ORIG (an address) using the
6274 register REG. If REG is 0, a new pseudo is generated.
6276 There are two types of references that must be handled:
6278 1. Global data references must load the address from the GOT, via
6279 the PIC reg. An insn is emitted to do this load, and the reg is
6282 2. Static data references, constant pool addresses, and code labels
6283 compute the address as an offset from the GOT, whose base is in
6284 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6285 differentiate them from global data objects. The returned
6286 address is the PIC reg + an unspec constant.
6288 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6289 reg also appears in the address. */
6292 legitimize_pic_address (rtx orig
, rtx reg
)
6300 reg
= gen_reg_rtx (Pmode
);
6301 /* Use the generic Mach-O PIC machinery. */
6302 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
6305 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
6307 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
6309 /* This symbol may be referenced via a displacement from the PIC
6310 base address (@GOTOFF). */
6312 if (reload_in_progress
)
6313 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6314 if (GET_CODE (addr
) == CONST
)
6315 addr
= XEXP (addr
, 0);
6316 if (GET_CODE (addr
) == PLUS
)
6318 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
6319 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
6322 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
6323 new = gen_rtx_CONST (Pmode
, new);
6324 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6328 emit_move_insn (reg
, new);
6332 else if (GET_CODE (addr
) == SYMBOL_REF
)
6336 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
6337 new = gen_rtx_CONST (Pmode
, new);
6338 new = gen_rtx_MEM (Pmode
, new);
6339 RTX_UNCHANGING_P (new) = 1;
6340 set_mem_alias_set (new, ix86_GOT_alias_set ());
6343 reg
= gen_reg_rtx (Pmode
);
6344 /* Use directly gen_movsi, otherwise the address is loaded
6345 into register for CSE. We don't want to CSE this addresses,
6346 instead we CSE addresses from the GOT table, so skip this. */
6347 emit_insn (gen_movsi (reg
, new));
6352 /* This symbol must be referenced via a load from the
6353 Global Offset Table (@GOT). */
6355 if (reload_in_progress
)
6356 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6357 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
6358 new = gen_rtx_CONST (Pmode
, new);
6359 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6360 new = gen_rtx_MEM (Pmode
, new);
6361 RTX_UNCHANGING_P (new) = 1;
6362 set_mem_alias_set (new, ix86_GOT_alias_set ());
6365 reg
= gen_reg_rtx (Pmode
);
6366 emit_move_insn (reg
, new);
6372 if (GET_CODE (addr
) == CONST
)
6374 addr
= XEXP (addr
, 0);
6376 /* We must match stuff we generate before. Assume the only
6377 unspecs that can get here are ours. Not that we could do
6378 anything with them anyway.... */
6379 if (GET_CODE (addr
) == UNSPEC
6380 || (GET_CODE (addr
) == PLUS
6381 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
6383 if (GET_CODE (addr
) != PLUS
)
6386 if (GET_CODE (addr
) == PLUS
)
6388 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
6390 /* Check first to see if this is a constant offset from a @GOTOFF
6391 symbol reference. */
6392 if (local_symbolic_operand (op0
, Pmode
)
6393 && GET_CODE (op1
) == CONST_INT
)
6397 if (reload_in_progress
)
6398 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6399 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
6401 new = gen_rtx_PLUS (Pmode
, new, op1
);
6402 new = gen_rtx_CONST (Pmode
, new);
6403 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6407 emit_move_insn (reg
, new);
6413 if (INTVAL (op1
) < -16*1024*1024
6414 || INTVAL (op1
) >= 16*1024*1024)
6415 new = gen_rtx_PLUS (Pmode
, op0
, force_reg (Pmode
, op1
));
6420 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
6421 new = legitimize_pic_address (XEXP (addr
, 1),
6422 base
== reg
? NULL_RTX
: reg
);
6424 if (GET_CODE (new) == CONST_INT
)
6425 new = plus_constant (base
, INTVAL (new));
6428 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
6430 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
6431 new = XEXP (new, 1);
6433 new = gen_rtx_PLUS (Pmode
, base
, new);
6441 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6444 get_thread_pointer (int to_reg
)
6448 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
6452 reg
= gen_reg_rtx (Pmode
);
6453 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
6454 insn
= emit_insn (insn
);
6459 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6460 false if we expect this to be used for a memory address and true if
6461 we expect to load the address into a register. */
6464 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
6466 rtx dest
, base
, off
, pic
;
6471 case TLS_MODEL_GLOBAL_DYNAMIC
:
6472 dest
= gen_reg_rtx (Pmode
);
6475 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
6478 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
6479 insns
= get_insns ();
6482 emit_libcall_block (insns
, dest
, rax
, x
);
6485 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
6488 case TLS_MODEL_LOCAL_DYNAMIC
:
6489 base
= gen_reg_rtx (Pmode
);
6492 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
6495 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
6496 insns
= get_insns ();
6499 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
6500 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
6501 emit_libcall_block (insns
, base
, rax
, note
);
6504 emit_insn (gen_tls_local_dynamic_base_32 (base
));
6506 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
6507 off
= gen_rtx_CONST (Pmode
, off
);
6509 return gen_rtx_PLUS (Pmode
, base
, off
);
6511 case TLS_MODEL_INITIAL_EXEC
:
6515 type
= UNSPEC_GOTNTPOFF
;
6519 if (reload_in_progress
)
6520 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6521 pic
= pic_offset_table_rtx
;
6522 type
= TARGET_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
6524 else if (!TARGET_GNU_TLS
)
6526 pic
= gen_reg_rtx (Pmode
);
6527 emit_insn (gen_set_got (pic
));
6528 type
= UNSPEC_GOTTPOFF
;
6533 type
= UNSPEC_INDNTPOFF
;
6536 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
6537 off
= gen_rtx_CONST (Pmode
, off
);
6539 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
6540 off
= gen_rtx_MEM (Pmode
, off
);
6541 RTX_UNCHANGING_P (off
) = 1;
6542 set_mem_alias_set (off
, ix86_GOT_alias_set ());
6544 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
6546 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
6547 off
= force_reg (Pmode
, off
);
6548 return gen_rtx_PLUS (Pmode
, base
, off
);
6552 base
= get_thread_pointer (true);
6553 dest
= gen_reg_rtx (Pmode
);
6554 emit_insn (gen_subsi3 (dest
, base
, off
));
6558 case TLS_MODEL_LOCAL_EXEC
:
6559 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
6560 (TARGET_64BIT
|| TARGET_GNU_TLS
)
6561 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
6562 off
= gen_rtx_CONST (Pmode
, off
);
6564 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
6566 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
6567 return gen_rtx_PLUS (Pmode
, base
, off
);
6571 base
= get_thread_pointer (true);
6572 dest
= gen_reg_rtx (Pmode
);
6573 emit_insn (gen_subsi3 (dest
, base
, off
));
6584 /* Try machine-dependent ways of modifying an illegitimate address
6585 to be legitimate. If we find one, return the new, valid address.
6586 This macro is used in only one place: `memory_address' in explow.c.
6588 OLDX is the address as it was before break_out_memory_refs was called.
6589 In some cases it is useful to look at this to decide what needs to be done.
6591 MODE and WIN are passed so that this macro can use
6592 GO_IF_LEGITIMATE_ADDRESS.
6594 It is always safe for this macro to do nothing. It exists to recognize
6595 opportunities to optimize the output.
6597 For the 80386, we handle X+REG by loading X into a register R and
6598 using R+REG. R will go in a general reg and indexing will be used.
6599 However, if REG is a broken-out memory address or multiplication,
6600 nothing needs to be done because REG can certainly go in a general reg.
6602 When -fpic is used, special handling is needed for symbolic references.
6603 See comments by legitimize_pic_address in i386.c for details. */
6606 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
6611 if (TARGET_DEBUG_ADDR
)
6613 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6614 GET_MODE_NAME (mode
));
6618 log
= tls_symbolic_operand (x
, mode
);
6620 return legitimize_tls_address (x
, log
, false);
6621 if (GET_CODE (x
) == CONST
6622 && GET_CODE (XEXP (x
, 0)) == PLUS
6623 && (log
= tls_symbolic_operand (XEXP (XEXP (x
, 0), 0), Pmode
)))
6625 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
6626 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
6629 if (flag_pic
&& SYMBOLIC_CONST (x
))
6630 return legitimize_pic_address (x
, 0);
6632 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6633 if (GET_CODE (x
) == ASHIFT
6634 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6635 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
6638 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
6639 GEN_INT (1 << log
));
6642 if (GET_CODE (x
) == PLUS
)
6644 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6646 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
6647 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
6648 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
6651 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
6652 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
6653 GEN_INT (1 << log
));
6656 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
6657 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
6658 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
6661 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
6662 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
6663 GEN_INT (1 << log
));
6666 /* Put multiply first if it isn't already. */
6667 if (GET_CODE (XEXP (x
, 1)) == MULT
)
6669 rtx tmp
= XEXP (x
, 0);
6670 XEXP (x
, 0) = XEXP (x
, 1);
6675 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6676 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6677 created by virtual register instantiation, register elimination, and
6678 similar optimizations. */
6679 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
6682 x
= gen_rtx_PLUS (Pmode
,
6683 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
6684 XEXP (XEXP (x
, 1), 0)),
6685 XEXP (XEXP (x
, 1), 1));
6689 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6690 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6691 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
6692 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
6693 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
6694 && CONSTANT_P (XEXP (x
, 1)))
6697 rtx other
= NULL_RTX
;
6699 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6701 constant
= XEXP (x
, 1);
6702 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
6704 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
6706 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
6707 other
= XEXP (x
, 1);
6715 x
= gen_rtx_PLUS (Pmode
,
6716 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
6717 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
6718 plus_constant (other
, INTVAL (constant
)));
6722 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
6725 if (GET_CODE (XEXP (x
, 0)) == MULT
)
6728 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
6731 if (GET_CODE (XEXP (x
, 1)) == MULT
)
6734 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
6738 && GET_CODE (XEXP (x
, 1)) == REG
6739 && GET_CODE (XEXP (x
, 0)) == REG
)
6742 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
6745 x
= legitimize_pic_address (x
, 0);
6748 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
6751 if (GET_CODE (XEXP (x
, 0)) == REG
)
6753 rtx temp
= gen_reg_rtx (Pmode
);
6754 rtx val
= force_operand (XEXP (x
, 1), temp
);
6756 emit_move_insn (temp
, val
);
6762 else if (GET_CODE (XEXP (x
, 1)) == REG
)
6764 rtx temp
= gen_reg_rtx (Pmode
);
6765 rtx val
= force_operand (XEXP (x
, 0), temp
);
6767 emit_move_insn (temp
, val
);
6777 /* Print an integer constant expression in assembler syntax. Addition
6778 and subtraction are the only arithmetic that may appear in these
6779 expressions. FILE is the stdio stream to write to, X is the rtx, and
6780 CODE is the operand print code from the output string. */
6783 output_pic_addr_const (FILE *file
, rtx x
, int code
)
6787 switch (GET_CODE (x
))
6797 /* Mark the decl as referenced so that cgraph will output the function. */
6798 if (SYMBOL_REF_DECL (x
))
6799 mark_decl_referenced (SYMBOL_REF_DECL (x
));
6801 assemble_name (file
, XSTR (x
, 0));
6802 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
6803 fputs ("@PLT", file
);
6810 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
6811 assemble_name (asm_out_file
, buf
);
6815 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6819 /* This used to output parentheses around the expression,
6820 but that does not work on the 386 (either ATT or BSD assembler). */
6821 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6825 if (GET_MODE (x
) == VOIDmode
)
6827 /* We can use %d if the number is <32 bits and positive. */
6828 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
6829 fprintf (file
, "0x%lx%08lx",
6830 (unsigned long) CONST_DOUBLE_HIGH (x
),
6831 (unsigned long) CONST_DOUBLE_LOW (x
));
6833 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
6836 /* We can't handle floating point constants;
6837 PRINT_OPERAND must handle them. */
6838 output_operand_lossage ("floating constant misused");
6842 /* Some assemblers need integer constants to appear first. */
6843 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
6845 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6847 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6849 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6851 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6853 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6861 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
6862 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6864 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6866 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
6870 if (XVECLEN (x
, 0) != 1)
6872 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
6873 switch (XINT (x
, 1))
6876 fputs ("@GOT", file
);
6879 fputs ("@GOTOFF", file
);
6881 case UNSPEC_GOTPCREL
:
6882 fputs ("@GOTPCREL(%rip)", file
);
6884 case UNSPEC_GOTTPOFF
:
6885 /* FIXME: This might be @TPOFF in Sun ld too. */
6886 fputs ("@GOTTPOFF", file
);
6889 fputs ("@TPOFF", file
);
6893 fputs ("@TPOFF", file
);
6895 fputs ("@NTPOFF", file
);
6898 fputs ("@DTPOFF", file
);
6900 case UNSPEC_GOTNTPOFF
:
6902 fputs ("@GOTTPOFF(%rip)", file
);
6904 fputs ("@GOTNTPOFF", file
);
6906 case UNSPEC_INDNTPOFF
:
6907 fputs ("@INDNTPOFF", file
);
6910 output_operand_lossage ("invalid UNSPEC as operand");
6916 output_operand_lossage ("invalid expression as operand");
6920 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6921 We need to handle our special PIC relocations. */
6924 i386_dwarf_output_addr_const (FILE *file
, rtx x
)
6927 fprintf (file
, "%s", TARGET_64BIT
? ASM_QUAD
: ASM_LONG
);
6931 fprintf (file
, "%s", ASM_LONG
);
6934 output_pic_addr_const (file
, x
, '\0');
6936 output_addr_const (file
, x
);
6940 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6941 We need to emit DTP-relative relocations. */
6944 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
6946 fputs (ASM_LONG
, file
);
6947 output_addr_const (file
, x
);
6948 fputs ("@DTPOFF", file
);
6954 fputs (", 0", file
);
6961 /* In the name of slightly smaller debug output, and to cater to
6962 general assembler losage, recognize PIC+GOTOFF and turn it back
6963 into a direct symbol reference. */
6966 ix86_delegitimize_address (rtx orig_x
)
6970 if (GET_CODE (x
) == MEM
)
6975 if (GET_CODE (x
) != CONST
6976 || GET_CODE (XEXP (x
, 0)) != UNSPEC
6977 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
6978 || GET_CODE (orig_x
) != MEM
)
6980 return XVECEXP (XEXP (x
, 0), 0, 0);
6983 if (GET_CODE (x
) != PLUS
6984 || GET_CODE (XEXP (x
, 1)) != CONST
)
6987 if (GET_CODE (XEXP (x
, 0)) == REG
6988 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
6989 /* %ebx + GOT/GOTOFF */
6991 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
6993 /* %ebx + %reg * scale + GOT/GOTOFF */
6995 if (GET_CODE (XEXP (y
, 0)) == REG
6996 && REGNO (XEXP (y
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
6998 else if (GET_CODE (XEXP (y
, 1)) == REG
6999 && REGNO (XEXP (y
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
7003 if (GET_CODE (y
) != REG
7004 && GET_CODE (y
) != MULT
7005 && GET_CODE (y
) != ASHIFT
)
7011 x
= XEXP (XEXP (x
, 1), 0);
7012 if (GET_CODE (x
) == UNSPEC
7013 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
7014 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
7017 return gen_rtx_PLUS (Pmode
, y
, XVECEXP (x
, 0, 0));
7018 return XVECEXP (x
, 0, 0);
7021 if (GET_CODE (x
) == PLUS
7022 && GET_CODE (XEXP (x
, 0)) == UNSPEC
7023 && GET_CODE (XEXP (x
, 1)) == CONST_INT
7024 && ((XINT (XEXP (x
, 0), 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
7025 || (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTOFF
7026 && GET_CODE (orig_x
) != MEM
)))
7028 x
= gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
7030 return gen_rtx_PLUS (Pmode
, y
, x
);
7038 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
7043 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
7045 enum rtx_code second_code
, bypass_code
;
7046 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
7047 if (bypass_code
!= NIL
|| second_code
!= NIL
)
7049 code
= ix86_fp_compare_code_to_integer (code
);
7053 code
= reverse_condition (code
);
7064 if (mode
!= CCmode
&& mode
!= CCNOmode
&& mode
!= CCGCmode
)
7069 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7070 Those same assemblers have the same but opposite losage on cmov. */
7073 suffix
= fp
? "nbe" : "a";
7076 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
7078 else if (mode
== CCmode
|| mode
== CCGCmode
)
7089 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
7091 else if (mode
== CCmode
|| mode
== CCGCmode
)
7100 suffix
= fp
? "nb" : "ae";
7103 if (mode
!= CCmode
&& mode
!= CCGCmode
&& mode
!= CCNOmode
)
7113 suffix
= fp
? "u" : "p";
7116 suffix
= fp
? "nu" : "np";
7121 fputs (suffix
, file
);
7124 /* Print the name of register X to FILE based on its machine mode and number.
7125 If CODE is 'w', pretend the mode is HImode.
7126 If CODE is 'b', pretend the mode is QImode.
7127 If CODE is 'k', pretend the mode is SImode.
7128 If CODE is 'q', pretend the mode is DImode.
7129 If CODE is 'h', pretend the reg is the `high' byte register.
7130 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7133 print_reg (rtx x
, int code
, FILE *file
)
7135 if (REGNO (x
) == ARG_POINTER_REGNUM
7136 || REGNO (x
) == FRAME_POINTER_REGNUM
7137 || REGNO (x
) == FLAGS_REG
7138 || REGNO (x
) == FPSR_REG
)
7141 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
7144 if (code
== 'w' || MMX_REG_P (x
))
7146 else if (code
== 'b')
7148 else if (code
== 'k')
7150 else if (code
== 'q')
7152 else if (code
== 'y')
7154 else if (code
== 'h')
7157 code
= GET_MODE_SIZE (GET_MODE (x
));
7159 /* Irritatingly, AMD extended registers use different naming convention
7160 from the normal registers. */
7161 if (REX_INT_REG_P (x
))
7168 error ("extended registers have no high halves");
7171 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7174 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7177 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7180 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7183 error ("unsupported operand size for extended register");
7191 if (STACK_TOP_P (x
))
7193 fputs ("st(0)", file
);
7200 if (! ANY_FP_REG_P (x
))
7201 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
7206 fputs (hi_reg_name
[REGNO (x
)], file
);
7209 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
7211 fputs (qi_reg_name
[REGNO (x
)], file
);
7214 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
7216 fputs (qi_high_reg_name
[REGNO (x
)], file
);
7223 /* Locate some local-dynamic symbol still in use by this function
7224 so that we can print its name in some tls_local_dynamic_base
7228 get_some_local_dynamic_name (void)
7232 if (cfun
->machine
->some_ld_name
)
7233 return cfun
->machine
->some_ld_name
;
7235 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
7237 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
7238 return cfun
->machine
->some_ld_name
;
7244 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
7248 if (GET_CODE (x
) == SYMBOL_REF
7249 && local_dynamic_symbolic_operand (x
, Pmode
))
7251 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
7259 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7260 C -- print opcode suffix for set/cmov insn.
7261 c -- like C, but print reversed condition
7262 F,f -- likewise, but for floating-point.
7263 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7265 R -- print the prefix for register names.
7266 z -- print the opcode suffix for the size of the current operand.
7267 * -- print a star (in certain assembler syntax)
7268 A -- print an absolute memory reference.
7269 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7270 s -- print a shift double count, followed by the assemblers argument
7272 b -- print the QImode name of the register for the indicated operand.
7273 %b0 would print %al if operands[0] is reg 0.
7274 w -- likewise, print the HImode name of the register.
7275 k -- likewise, print the SImode name of the register.
7276 q -- likewise, print the DImode name of the register.
7277 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7278 y -- print "st(0)" instead of "st" as a register.
7279 D -- print condition for SSE cmp instruction.
7280 P -- if PIC, print an @PLT suffix.
7281 X -- don't print any sort of PIC '@' suffix for a symbol.
7282 & -- print some in-use local-dynamic symbol name.
7286 print_operand (FILE *file
, rtx x
, int code
)
7293 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7298 assemble_name (file
, get_some_local_dynamic_name ());
7302 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7304 else if (ASSEMBLER_DIALECT
== ASM_INTEL
)
7306 /* Intel syntax. For absolute addresses, registers should not
7307 be surrounded by braces. */
7308 if (GET_CODE (x
) != REG
)
7311 PRINT_OPERAND (file
, x
, 0);
7319 PRINT_OPERAND (file
, x
, 0);
7324 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7329 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7334 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7339 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7344 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7349 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7354 /* 387 opcodes don't get size suffixes if the operands are
7356 if (STACK_REG_P (x
))
7359 /* Likewise if using Intel opcodes. */
7360 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
7363 /* This is the size of op from size of operand. */
7364 switch (GET_MODE_SIZE (GET_MODE (x
)))
7367 #ifdef HAVE_GAS_FILDS_FISTS
7373 if (GET_MODE (x
) == SFmode
)
7388 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
7390 #ifdef GAS_MNEMONICS
7416 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
7418 PRINT_OPERAND (file
, x
, 0);
7424 /* Little bit of braindamage here. The SSE compare instructions
7425 does use completely different names for the comparisons that the
7426 fp conditional moves. */
7427 switch (GET_CODE (x
))
7442 fputs ("unord", file
);
7446 fputs ("neq", file
);
7450 fputs ("nlt", file
);
7454 fputs ("nle", file
);
7457 fputs ("ord", file
);
7465 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7466 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7468 switch (GET_MODE (x
))
7470 case HImode
: putc ('w', file
); break;
7472 case SFmode
: putc ('l', file
); break;
7474 case DFmode
: putc ('q', file
); break;
7482 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
7485 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7486 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7489 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
7492 /* Like above, but reverse condition */
7494 /* Check to see if argument to %c is really a constant
7495 and not a condition code which needs to be reversed. */
7496 if (!COMPARISON_P (x
))
7498 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7501 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
7504 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7505 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7508 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
7514 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
7517 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
7520 int pred_val
= INTVAL (XEXP (x
, 0));
7522 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
7523 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
7525 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
7526 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
7528 /* Emit hints only in the case default branch prediction
7529 heuristics would fail. */
7530 if (taken
!= cputaken
)
7532 /* We use 3e (DS) prefix for taken branches and
7533 2e (CS) prefix for not taken branches. */
7535 fputs ("ds ; ", file
);
7537 fputs ("cs ; ", file
);
7544 output_operand_lossage ("invalid operand code `%c'", code
);
7548 if (GET_CODE (x
) == REG
)
7549 print_reg (x
, code
, file
);
7551 else if (GET_CODE (x
) == MEM
)
7553 /* No `byte ptr' prefix for call instructions. */
7554 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
7557 switch (GET_MODE_SIZE (GET_MODE (x
)))
7559 case 1: size
= "BYTE"; break;
7560 case 2: size
= "WORD"; break;
7561 case 4: size
= "DWORD"; break;
7562 case 8: size
= "QWORD"; break;
7563 case 12: size
= "XWORD"; break;
7564 case 16: size
= "XMMWORD"; break;
7569 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7572 else if (code
== 'w')
7574 else if (code
== 'k')
7578 fputs (" PTR ", file
);
7582 /* Avoid (%rip) for call operands. */
7583 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
7584 && GET_CODE (x
) != CONST_INT
)
7585 output_addr_const (file
, x
);
7586 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
7587 output_operand_lossage ("invalid constraints for operand");
7592 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
7597 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7598 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
7600 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7602 fprintf (file
, "0x%08lx", l
);
7605 /* These float cases don't actually occur as immediate operands. */
7606 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
7610 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
7611 fprintf (file
, "%s", dstr
);
7614 else if (GET_CODE (x
) == CONST_DOUBLE
7615 && GET_MODE (x
) == XFmode
)
7619 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
7620 fprintf (file
, "%s", dstr
);
7627 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
7629 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7632 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
7633 || GET_CODE (x
) == LABEL_REF
)
7635 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7638 fputs ("OFFSET FLAT:", file
);
7641 if (GET_CODE (x
) == CONST_INT
)
7642 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7644 output_pic_addr_const (file
, x
, code
);
7646 output_addr_const (file
, x
);
7650 /* Print a memory operand whose address is ADDR. */
7653 print_operand_address (FILE *file
, rtx addr
)
7655 struct ix86_address parts
;
7656 rtx base
, index
, disp
;
7659 if (! ix86_decompose_address (addr
, &parts
))
7663 index
= parts
.index
;
7665 scale
= parts
.scale
;
7673 if (USER_LABEL_PREFIX
[0] == 0)
7675 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
7681 if (!base
&& !index
)
7683 /* Displacement only requires special attention. */
7685 if (GET_CODE (disp
) == CONST_INT
)
7687 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
7689 if (USER_LABEL_PREFIX
[0] == 0)
7691 fputs ("ds:", file
);
7693 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
7696 output_pic_addr_const (file
, disp
, 0);
7698 output_addr_const (file
, disp
);
7700 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7702 && ((GET_CODE (disp
) == SYMBOL_REF
7703 && ! tls_symbolic_operand (disp
, GET_MODE (disp
)))
7704 || GET_CODE (disp
) == LABEL_REF
7705 || (GET_CODE (disp
) == CONST
7706 && GET_CODE (XEXP (disp
, 0)) == PLUS
7707 && (GET_CODE (XEXP (XEXP (disp
, 0), 0)) == SYMBOL_REF
7708 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) == LABEL_REF
)
7709 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)))
7710 fputs ("(%rip)", file
);
7714 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7719 output_pic_addr_const (file
, disp
, 0);
7720 else if (GET_CODE (disp
) == LABEL_REF
)
7721 output_asm_label (disp
);
7723 output_addr_const (file
, disp
);
7728 print_reg (base
, 0, file
);
7732 print_reg (index
, 0, file
);
7734 fprintf (file
, ",%d", scale
);
7740 rtx offset
= NULL_RTX
;
7744 /* Pull out the offset of a symbol; print any symbol itself. */
7745 if (GET_CODE (disp
) == CONST
7746 && GET_CODE (XEXP (disp
, 0)) == PLUS
7747 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
7749 offset
= XEXP (XEXP (disp
, 0), 1);
7750 disp
= gen_rtx_CONST (VOIDmode
,
7751 XEXP (XEXP (disp
, 0), 0));
7755 output_pic_addr_const (file
, disp
, 0);
7756 else if (GET_CODE (disp
) == LABEL_REF
)
7757 output_asm_label (disp
);
7758 else if (GET_CODE (disp
) == CONST_INT
)
7761 output_addr_const (file
, disp
);
7767 print_reg (base
, 0, file
);
7770 if (INTVAL (offset
) >= 0)
7772 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
7776 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
7783 print_reg (index
, 0, file
);
7785 fprintf (file
, "*%d", scale
);
7793 output_addr_const_extra (FILE *file
, rtx x
)
7797 if (GET_CODE (x
) != UNSPEC
)
7800 op
= XVECEXP (x
, 0, 0);
7801 switch (XINT (x
, 1))
7803 case UNSPEC_GOTTPOFF
:
7804 output_addr_const (file
, op
);
7805 /* FIXME: This might be @TPOFF in Sun ld. */
7806 fputs ("@GOTTPOFF", file
);
7809 output_addr_const (file
, op
);
7810 fputs ("@TPOFF", file
);
7813 output_addr_const (file
, op
);
7815 fputs ("@TPOFF", file
);
7817 fputs ("@NTPOFF", file
);
7820 output_addr_const (file
, op
);
7821 fputs ("@DTPOFF", file
);
7823 case UNSPEC_GOTNTPOFF
:
7824 output_addr_const (file
, op
);
7826 fputs ("@GOTTPOFF(%rip)", file
);
7828 fputs ("@GOTNTPOFF", file
);
7830 case UNSPEC_INDNTPOFF
:
7831 output_addr_const (file
, op
);
7832 fputs ("@INDNTPOFF", file
);
7842 /* Split one or more DImode RTL references into pairs of SImode
7843 references. The RTL can be REG, offsettable MEM, integer constant, or
7844 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7845 split and "num" is its length. lo_half and hi_half are output arrays
7846 that parallel "operands". */
7849 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
7853 rtx op
= operands
[num
];
7855 /* simplify_subreg refuse to split volatile memory addresses,
7856 but we still have to handle it. */
7857 if (GET_CODE (op
) == MEM
)
7859 lo_half
[num
] = adjust_address (op
, SImode
, 0);
7860 hi_half
[num
] = adjust_address (op
, SImode
, 4);
7864 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
7865 GET_MODE (op
) == VOIDmode
7866 ? DImode
: GET_MODE (op
), 0);
7867 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
7868 GET_MODE (op
) == VOIDmode
7869 ? DImode
: GET_MODE (op
), 4);
7873 /* Split one or more TImode RTL references into pairs of SImode
7874 references. The RTL can be REG, offsettable MEM, integer constant, or
7875 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7876 split and "num" is its length. lo_half and hi_half are output arrays
7877 that parallel "operands". */
7880 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
7884 rtx op
= operands
[num
];
7886 /* simplify_subreg refuse to split volatile memory addresses, but we
7887 still have to handle it. */
7888 if (GET_CODE (op
) == MEM
)
7890 lo_half
[num
] = adjust_address (op
, DImode
, 0);
7891 hi_half
[num
] = adjust_address (op
, DImode
, 8);
7895 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
7896 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
7901 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7902 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7903 is the expression of the binary operation. The output may either be
7904 emitted here, or returned to the caller, like all output_* functions.
7906 There is no guarantee that the operands are the same mode, as they
7907 might be within FLOAT or FLOAT_EXTEND expressions. */
7909 #ifndef SYSV386_COMPAT
7910 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7911 wants to fix the assemblers because that causes incompatibility
7912 with gcc. No-one wants to fix gcc because that causes
7913 incompatibility with assemblers... You can use the option of
7914 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7915 #define SYSV386_COMPAT 1
7919 output_387_binary_op (rtx insn
, rtx
*operands
)
7921 static char buf
[30];
7924 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]) | SSE_REG_P (operands
[2]);
7926 #ifdef ENABLE_CHECKING
7927 /* Even if we do not want to check the inputs, this documents input
7928 constraints. Which helps in understanding the following code. */
7929 if (STACK_REG_P (operands
[0])
7930 && ((REG_P (operands
[1])
7931 && REGNO (operands
[0]) == REGNO (operands
[1])
7932 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
7933 || (REG_P (operands
[2])
7934 && REGNO (operands
[0]) == REGNO (operands
[2])
7935 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
7936 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
7942 switch (GET_CODE (operands
[3]))
7945 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7946 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7954 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7955 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7963 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7964 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7972 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7973 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7987 if (GET_MODE (operands
[0]) == SFmode
)
7988 strcat (buf
, "ss\t{%2, %0|%0, %2}");
7990 strcat (buf
, "sd\t{%2, %0|%0, %2}");
7995 switch (GET_CODE (operands
[3]))
7999 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
8001 rtx temp
= operands
[2];
8002 operands
[2] = operands
[1];
8006 /* know operands[0] == operands[1]. */
8008 if (GET_CODE (operands
[2]) == MEM
)
8014 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8016 if (STACK_TOP_P (operands
[0]))
8017 /* How is it that we are storing to a dead operand[2]?
8018 Well, presumably operands[1] is dead too. We can't
8019 store the result to st(0) as st(0) gets popped on this
8020 instruction. Instead store to operands[2] (which I
8021 think has to be st(1)). st(1) will be popped later.
8022 gcc <= 2.8.1 didn't have this check and generated
8023 assembly code that the Unixware assembler rejected. */
8024 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8026 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8030 if (STACK_TOP_P (operands
[0]))
8031 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8033 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8038 if (GET_CODE (operands
[1]) == MEM
)
8044 if (GET_CODE (operands
[2]) == MEM
)
8050 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8053 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8054 derived assemblers, confusingly reverse the direction of
8055 the operation for fsub{r} and fdiv{r} when the
8056 destination register is not st(0). The Intel assembler
8057 doesn't have this brain damage. Read !SYSV386_COMPAT to
8058 figure out what the hardware really does. */
8059 if (STACK_TOP_P (operands
[0]))
8060 p
= "{p\t%0, %2|rp\t%2, %0}";
8062 p
= "{rp\t%2, %0|p\t%0, %2}";
8064 if (STACK_TOP_P (operands
[0]))
8065 /* As above for fmul/fadd, we can't store to st(0). */
8066 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8068 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8073 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
8076 if (STACK_TOP_P (operands
[0]))
8077 p
= "{rp\t%0, %1|p\t%1, %0}";
8079 p
= "{p\t%1, %0|rp\t%0, %1}";
8081 if (STACK_TOP_P (operands
[0]))
8082 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8084 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8089 if (STACK_TOP_P (operands
[0]))
8091 if (STACK_TOP_P (operands
[1]))
8092 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8094 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8097 else if (STACK_TOP_P (operands
[1]))
8100 p
= "{\t%1, %0|r\t%0, %1}";
8102 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8108 p
= "{r\t%2, %0|\t%0, %2}";
8110 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8123 /* Output code to initialize control word copies used by
8124 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8125 is set to control word rounding downwards. */
8127 emit_i387_cw_initialization (rtx normal
, rtx round_down
)
8129 rtx reg
= gen_reg_rtx (HImode
);
8131 emit_insn (gen_x86_fnstcw_1 (normal
));
8132 emit_move_insn (reg
, normal
);
8133 if (!TARGET_PARTIAL_REG_STALL
&& !optimize_size
8135 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
8137 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0xc00)));
8138 emit_move_insn (round_down
, reg
);
8141 /* Output code for INSN to convert a float to a signed int. OPERANDS
8142 are the insn operands. The output may be [HSD]Imode and the input
8143 operand may be [SDX]Fmode. */
8146 output_fix_trunc (rtx insn
, rtx
*operands
)
8148 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
8149 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
8151 /* Jump through a hoop or two for DImode, since the hardware has no
8152 non-popping instruction. We used to do this a different way, but
8153 that was somewhat fragile and broke with post-reload splitters. */
8154 if (dimode_p
&& !stack_top_dies
)
8155 output_asm_insn ("fld\t%y1", operands
);
8157 if (!STACK_TOP_P (operands
[1]))
8160 if (GET_CODE (operands
[0]) != MEM
)
8163 output_asm_insn ("fldcw\t%3", operands
);
8164 if (stack_top_dies
|| dimode_p
)
8165 output_asm_insn ("fistp%z0\t%0", operands
);
8167 output_asm_insn ("fist%z0\t%0", operands
);
8168 output_asm_insn ("fldcw\t%2", operands
);
8173 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8174 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8175 when fucom should be used. */
8178 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
8181 rtx cmp_op0
= operands
[0];
8182 rtx cmp_op1
= operands
[1];
8183 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]);
8188 cmp_op1
= operands
[2];
8192 if (GET_MODE (operands
[0]) == SFmode
)
8194 return "ucomiss\t{%1, %0|%0, %1}";
8196 return "comiss\t{%1, %0|%0, %1}";
8199 return "ucomisd\t{%1, %0|%0, %1}";
8201 return "comisd\t{%1, %0|%0, %1}";
8204 if (! STACK_TOP_P (cmp_op0
))
8207 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
8209 if (STACK_REG_P (cmp_op1
)
8211 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
8212 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
8214 /* If both the top of the 387 stack dies, and the other operand
8215 is also a stack register that dies, then this must be a
8216 `fcompp' float compare */
8220 /* There is no double popping fcomi variant. Fortunately,
8221 eflags is immune from the fstp's cc clobbering. */
8223 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
8225 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
8233 return "fucompp\n\tfnstsw\t%0";
8235 return "fcompp\n\tfnstsw\t%0";
8248 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8250 static const char * const alt
[24] =
8262 "fcomi\t{%y1, %0|%0, %y1}",
8263 "fcomip\t{%y1, %0|%0, %y1}",
8264 "fucomi\t{%y1, %0|%0, %y1}",
8265 "fucomip\t{%y1, %0|%0, %y1}",
8272 "fcom%z2\t%y2\n\tfnstsw\t%0",
8273 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8274 "fucom%z2\t%y2\n\tfnstsw\t%0",
8275 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8277 "ficom%z2\t%y2\n\tfnstsw\t%0",
8278 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8286 mask
= eflags_p
<< 3;
8287 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
8288 mask
|= unordered_p
<< 1;
8289 mask
|= stack_top_dies
;
8302 ix86_output_addr_vec_elt (FILE *file
, int value
)
8304 const char *directive
= ASM_LONG
;
8309 directive
= ASM_QUAD
;
8315 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
8319 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
8322 fprintf (file
, "%s%s%d-%s%d\n",
8323 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
8324 else if (HAVE_AS_GOTOFF_IN_DATA
)
8325 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
8327 else if (TARGET_MACHO
)
8329 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
8330 machopic_output_function_base_name (file
);
8331 fprintf(file
, "\n");
8335 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
8336 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
8339 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8343 ix86_expand_clear (rtx dest
)
8347 /* We play register width games, which are only valid after reload. */
8348 if (!reload_completed
)
8351 /* Avoid HImode and its attendant prefix byte. */
8352 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
8353 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
8355 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
8357 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8358 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
8360 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
8361 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
8367 /* X is an unchanging MEM. If it is a constant pool reference, return
8368 the constant pool rtx, else NULL. */
8371 maybe_get_pool_constant (rtx x
)
8373 x
= ix86_delegitimize_address (XEXP (x
, 0));
8375 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8376 return get_pool_constant (x
);
8382 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
8384 int strict
= (reload_in_progress
|| reload_completed
);
8386 enum tls_model model
;
8391 model
= tls_symbolic_operand (op1
, Pmode
);
8394 op1
= legitimize_tls_address (op1
, model
, true);
8395 op1
= force_operand (op1
, op0
);
8400 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
8405 rtx temp
= ((reload_in_progress
8406 || ((op0
&& GET_CODE (op0
) == REG
)
8408 ? op0
: gen_reg_rtx (Pmode
));
8409 op1
= machopic_indirect_data_reference (op1
, temp
);
8410 op1
= machopic_legitimize_pic_address (op1
, mode
,
8411 temp
== op1
? 0 : temp
);
8413 else if (MACHOPIC_INDIRECT
)
8414 op1
= machopic_indirect_data_reference (op1
, 0);
8418 if (GET_CODE (op0
) == MEM
)
8419 op1
= force_reg (Pmode
, op1
);
8421 op1
= legitimize_address (op1
, op1
, Pmode
);
8422 #endif /* TARGET_MACHO */
8426 if (GET_CODE (op0
) == MEM
8427 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
8428 || !push_operand (op0
, mode
))
8429 && GET_CODE (op1
) == MEM
)
8430 op1
= force_reg (mode
, op1
);
8432 if (push_operand (op0
, mode
)
8433 && ! general_no_elim_operand (op1
, mode
))
8434 op1
= copy_to_mode_reg (mode
, op1
);
8436 /* Force large constants in 64bit compilation into register
8437 to get them CSEed. */
8438 if (TARGET_64BIT
&& mode
== DImode
8439 && immediate_operand (op1
, mode
)
8440 && !x86_64_zero_extended_value (op1
)
8441 && !register_operand (op0
, mode
)
8442 && optimize
&& !reload_completed
&& !reload_in_progress
)
8443 op1
= copy_to_mode_reg (mode
, op1
);
8445 if (FLOAT_MODE_P (mode
))
8447 /* If we are loading a floating point constant to a register,
8448 force the value to memory now, since we'll get better code
8449 out the back end. */
8453 else if (GET_CODE (op1
) == CONST_DOUBLE
)
8455 op1
= validize_mem (force_const_mem (mode
, op1
));
8456 if (!register_operand (op0
, mode
))
8458 rtx temp
= gen_reg_rtx (mode
);
8459 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
8460 emit_move_insn (op0
, temp
);
8467 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
8471 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
8473 /* Force constants other than zero into memory. We do not know how
8474 the instructions used to build constants modify the upper 64 bits
8475 of the register, once we have that information we may be able
8476 to handle some of them more efficiently. */
8477 if ((reload_in_progress
| reload_completed
) == 0
8478 && register_operand (operands
[0], mode
)
8479 && CONSTANT_P (operands
[1]) && operands
[1] != CONST0_RTX (mode
))
8480 operands
[1] = validize_mem (force_const_mem (mode
, operands
[1]));
8482 /* Make operand1 a register if it isn't already. */
8484 && !register_operand (operands
[0], mode
)
8485 && !register_operand (operands
[1], mode
))
8487 rtx temp
= force_reg (GET_MODE (operands
[1]), operands
[1]);
8488 emit_move_insn (operands
[0], temp
);
8492 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]));
8495 /* Attempt to expand a binary operator. Make the expansion closer to the
8496 actual machine, then just general_operand, which will allow 3 separate
8497 memory references (one output, two input) in a single insn. */
8500 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
8503 int matching_memory
;
8504 rtx src1
, src2
, dst
, op
, clob
;
8510 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8511 if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
8512 && (rtx_equal_p (dst
, src2
)
8513 || immediate_operand (src1
, mode
)))
8520 /* If the destination is memory, and we do not have matching source
8521 operands, do things in registers. */
8522 matching_memory
= 0;
8523 if (GET_CODE (dst
) == MEM
)
8525 if (rtx_equal_p (dst
, src1
))
8526 matching_memory
= 1;
8527 else if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
8528 && rtx_equal_p (dst
, src2
))
8529 matching_memory
= 2;
8531 dst
= gen_reg_rtx (mode
);
8534 /* Both source operands cannot be in memory. */
8535 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
8537 if (matching_memory
!= 2)
8538 src2
= force_reg (mode
, src2
);
8540 src1
= force_reg (mode
, src1
);
8543 /* If the operation is not commutable, source 1 cannot be a constant
8544 or non-matching memory. */
8545 if ((CONSTANT_P (src1
)
8546 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
8547 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
8548 src1
= force_reg (mode
, src1
);
8550 /* If optimizing, copy to regs to improve CSE */
8551 if (optimize
&& ! no_new_pseudos
)
8553 if (GET_CODE (dst
) == MEM
)
8554 dst
= gen_reg_rtx (mode
);
8555 if (GET_CODE (src1
) == MEM
)
8556 src1
= force_reg (mode
, src1
);
8557 if (GET_CODE (src2
) == MEM
)
8558 src2
= force_reg (mode
, src2
);
8561 /* Emit the instruction. */
8563 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
8564 if (reload_in_progress
)
8566 /* Reload doesn't know about the flags register, and doesn't know that
8567 it doesn't want to clobber it. We can only do this with PLUS. */
8574 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
8575 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
8578 /* Fix up the destination if needed. */
8579 if (dst
!= operands
[0])
8580 emit_move_insn (operands
[0], dst
);
8583 /* Return TRUE or FALSE depending on whether the binary operator meets the
8584 appropriate constraints. */
8587 ix86_binary_operator_ok (enum rtx_code code
,
8588 enum machine_mode mode ATTRIBUTE_UNUSED
,
8591 /* Both source operands cannot be in memory. */
8592 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
8594 /* If the operation is not commutable, source 1 cannot be a constant. */
8595 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
8597 /* If the destination is memory, we must have a matching source operand. */
8598 if (GET_CODE (operands
[0]) == MEM
8599 && ! (rtx_equal_p (operands
[0], operands
[1])
8600 || (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
8601 && rtx_equal_p (operands
[0], operands
[2]))))
8603 /* If the operation is not commutable and the source 1 is memory, we must
8604 have a matching destination. */
8605 if (GET_CODE (operands
[1]) == MEM
8606 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
8607 && ! rtx_equal_p (operands
[0], operands
[1]))
8612 /* Attempt to expand a unary operator. Make the expansion closer to the
8613 actual machine, then just general_operand, which will allow 2 separate
8614 memory references (one output, one input) in a single insn. */
8617 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
8620 int matching_memory
;
8621 rtx src
, dst
, op
, clob
;
8626 /* If the destination is memory, and we do not have matching source
8627 operands, do things in registers. */
8628 matching_memory
= 0;
8629 if (GET_CODE (dst
) == MEM
)
8631 if (rtx_equal_p (dst
, src
))
8632 matching_memory
= 1;
8634 dst
= gen_reg_rtx (mode
);
8637 /* When source operand is memory, destination must match. */
8638 if (!matching_memory
&& GET_CODE (src
) == MEM
)
8639 src
= force_reg (mode
, src
);
8641 /* If optimizing, copy to regs to improve CSE */
8642 if (optimize
&& ! no_new_pseudos
)
8644 if (GET_CODE (dst
) == MEM
)
8645 dst
= gen_reg_rtx (mode
);
8646 if (GET_CODE (src
) == MEM
)
8647 src
= force_reg (mode
, src
);
8650 /* Emit the instruction. */
8652 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
8653 if (reload_in_progress
|| code
== NOT
)
8655 /* Reload doesn't know about the flags register, and doesn't know that
8656 it doesn't want to clobber it. */
8663 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
8664 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
8667 /* Fix up the destination if needed. */
8668 if (dst
!= operands
[0])
8669 emit_move_insn (operands
[0], dst
);
8672 /* Return TRUE or FALSE depending on whether the unary operator meets the
8673 appropriate constraints. */
8676 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
8677 enum machine_mode mode ATTRIBUTE_UNUSED
,
8678 rtx operands
[2] ATTRIBUTE_UNUSED
)
8680 /* If one of operands is memory, source and destination must match. */
8681 if ((GET_CODE (operands
[0]) == MEM
8682 || GET_CODE (operands
[1]) == MEM
)
8683 && ! rtx_equal_p (operands
[0], operands
[1]))
8688 /* Return TRUE or FALSE depending on whether the first SET in INSN
8689 has source and destination with matching CC modes, and that the
8690 CC mode is at least as constrained as REQ_MODE. */
8693 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
8696 enum machine_mode set_mode
;
8698 set
= PATTERN (insn
);
8699 if (GET_CODE (set
) == PARALLEL
)
8700 set
= XVECEXP (set
, 0, 0);
8701 if (GET_CODE (set
) != SET
)
8703 if (GET_CODE (SET_SRC (set
)) != COMPARE
)
8706 set_mode
= GET_MODE (SET_DEST (set
));
8710 if (req_mode
!= CCNOmode
8711 && (req_mode
!= CCmode
8712 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
8716 if (req_mode
== CCGCmode
)
8720 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
8724 if (req_mode
== CCZmode
)
8734 return (GET_MODE (SET_SRC (set
)) == set_mode
);
8737 /* Generate insn patterns to do an integer compare of OPERANDS. */
8740 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
8742 enum machine_mode cmpmode
;
8745 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
8746 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
8748 /* This is very simple, but making the interface the same as in the
8749 FP case makes the rest of the code easier. */
8750 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
8751 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
8753 /* Return the test that should be put into the flags user, i.e.
8754 the bcc, scc, or cmov instruction. */
8755 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
8758 /* Figure out whether to use ordered or unordered fp comparisons.
8759 Return the appropriate mode to use. */
8762 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
8764 /* ??? In order to make all comparisons reversible, we do all comparisons
8765 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8766 all forms trapping and nontrapping comparisons, we can make inequality
8767 comparisons trapping again, since it results in better code when using
8768 FCOM based compares. */
8769 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
8773 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
8775 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
8776 return ix86_fp_compare_mode (code
);
8779 /* Only zero flag is needed. */
8781 case NE
: /* ZF!=0 */
8783 /* Codes needing carry flag. */
8784 case GEU
: /* CF=0 */
8785 case GTU
: /* CF=0 & ZF=0 */
8786 case LTU
: /* CF=1 */
8787 case LEU
: /* CF=1 | ZF=1 */
8789 /* Codes possibly doable only with sign flag when
8790 comparing against zero. */
8791 case GE
: /* SF=OF or SF=0 */
8792 case LT
: /* SF<>OF or SF=1 */
8793 if (op1
== const0_rtx
)
8796 /* For other cases Carry flag is not required. */
8798 /* Codes doable only with sign flag when comparing
8799 against zero, but we miss jump instruction for it
8800 so we need to use relational tests against overflow
8801 that thus needs to be zero. */
8802 case GT
: /* ZF=0 & SF=OF */
8803 case LE
: /* ZF=1 | SF<>OF */
8804 if (op1
== const0_rtx
)
8808 /* strcmp pattern do (use flags) and combine may ask us for proper
8817 /* Return the fixed registers used for condition codes. */
8820 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
8827 /* If two condition code modes are compatible, return a condition code
8828 mode which is compatible with both. Otherwise, return
8831 static enum machine_mode
8832 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
8837 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
8840 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
8841 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
8869 /* These are only compatible with themselves, which we already
8875 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8878 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
8880 enum rtx_code swapped_code
= swap_condition (code
);
8881 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
8882 || (ix86_fp_comparison_cost (swapped_code
)
8883 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
8886 /* Swap, force into registers, or otherwise massage the two operands
8887 to a fp comparison. The operands are updated in place; the new
8888 comparison code is returned. */
8890 static enum rtx_code
8891 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
8893 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
8894 rtx op0
= *pop0
, op1
= *pop1
;
8895 enum machine_mode op_mode
= GET_MODE (op0
);
8896 int is_sse
= SSE_REG_P (op0
) | SSE_REG_P (op1
);
8898 /* All of the unordered compare instructions only work on registers.
8899 The same is true of the XFmode compare instructions. The same is
8900 true of the fcomi compare instructions. */
8903 && (fpcmp_mode
== CCFPUmode
8904 || op_mode
== XFmode
8905 || ix86_use_fcomi_compare (code
)))
8907 op0
= force_reg (op_mode
, op0
);
8908 op1
= force_reg (op_mode
, op1
);
8912 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8913 things around if they appear profitable, otherwise force op0
8916 if (standard_80387_constant_p (op0
) == 0
8917 || (GET_CODE (op0
) == MEM
8918 && ! (standard_80387_constant_p (op1
) == 0
8919 || GET_CODE (op1
) == MEM
)))
8922 tmp
= op0
, op0
= op1
, op1
= tmp
;
8923 code
= swap_condition (code
);
8926 if (GET_CODE (op0
) != REG
)
8927 op0
= force_reg (op_mode
, op0
);
8929 if (CONSTANT_P (op1
))
8931 if (standard_80387_constant_p (op1
))
8932 op1
= force_reg (op_mode
, op1
);
8934 op1
= validize_mem (force_const_mem (op_mode
, op1
));
8938 /* Try to rearrange the comparison to make it cheaper. */
8939 if (ix86_fp_comparison_cost (code
)
8940 > ix86_fp_comparison_cost (swap_condition (code
))
8941 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
8944 tmp
= op0
, op0
= op1
, op1
= tmp
;
8945 code
= swap_condition (code
);
8946 if (GET_CODE (op0
) != REG
)
8947 op0
= force_reg (op_mode
, op0
);
8955 /* Convert comparison codes we use to represent FP comparison to integer
8956 code that will result in proper branch. Return UNKNOWN if no such code
8958 static enum rtx_code
8959 ix86_fp_compare_code_to_integer (enum rtx_code code
)
8988 /* Split comparison code CODE into comparisons we can do using branch
8989 instructions. BYPASS_CODE is comparison code for branch that will
8990 branch around FIRST_CODE and SECOND_CODE. If some of branches
8991 is not required, set value to NIL.
8992 We never require more than two branches. */
8994 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
8995 enum rtx_code
*first_code
,
8996 enum rtx_code
*second_code
)
9002 /* The fcomi comparison sets flags as follows:
9012 case GT
: /* GTU - CF=0 & ZF=0 */
9013 case GE
: /* GEU - CF=0 */
9014 case ORDERED
: /* PF=0 */
9015 case UNORDERED
: /* PF=1 */
9016 case UNEQ
: /* EQ - ZF=1 */
9017 case UNLT
: /* LTU - CF=1 */
9018 case UNLE
: /* LEU - CF=1 | ZF=1 */
9019 case LTGT
: /* EQ - ZF=0 */
9021 case LT
: /* LTU - CF=1 - fails on unordered */
9023 *bypass_code
= UNORDERED
;
9025 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
9027 *bypass_code
= UNORDERED
;
9029 case EQ
: /* EQ - ZF=1 - fails on unordered */
9031 *bypass_code
= UNORDERED
;
9033 case NE
: /* NE - ZF=0 - fails on unordered */
9035 *second_code
= UNORDERED
;
9037 case UNGE
: /* GEU - CF=0 - fails on unordered */
9039 *second_code
= UNORDERED
;
9041 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
9043 *second_code
= UNORDERED
;
9048 if (!TARGET_IEEE_FP
)
9055 /* Return cost of comparison done fcom + arithmetics operations on AX.
9056 All following functions do use number of instructions as a cost metrics.
9057 In future this should be tweaked to compute bytes for optimize_size and
9058 take into account performance of various instructions on various CPUs. */
9060 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
9062 if (!TARGET_IEEE_FP
)
9064 /* The cost of code output by ix86_expand_fp_compare. */
9092 /* Return cost of comparison done using fcomi operation.
9093 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9095 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
9097 enum rtx_code bypass_code
, first_code
, second_code
;
9098 /* Return arbitrarily high cost when instruction is not supported - this
9099 prevents gcc from using it. */
9102 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9103 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 2;
9106 /* Return cost of comparison done using sahf operation.
9107 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9109 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
9111 enum rtx_code bypass_code
, first_code
, second_code
;
9112 /* Return arbitrarily high cost when instruction is not preferred - this
9113 avoids gcc from using it. */
9114 if (!TARGET_USE_SAHF
&& !optimize_size
)
9116 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9117 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 3;
9120 /* Compute cost of the comparison done using any method.
9121 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9123 ix86_fp_comparison_cost (enum rtx_code code
)
9125 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
9128 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
9129 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
9131 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
9132 if (min
> sahf_cost
)
9134 if (min
> fcomi_cost
)
9139 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9142 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
9143 rtx
*second_test
, rtx
*bypass_test
)
9145 enum machine_mode fpcmp_mode
, intcmp_mode
;
9147 int cost
= ix86_fp_comparison_cost (code
);
9148 enum rtx_code bypass_code
, first_code
, second_code
;
9150 fpcmp_mode
= ix86_fp_compare_mode (code
);
9151 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
9154 *second_test
= NULL_RTX
;
9156 *bypass_test
= NULL_RTX
;
9158 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9160 /* Do fcomi/sahf based test when profitable. */
9161 if ((bypass_code
== NIL
|| bypass_test
)
9162 && (second_code
== NIL
|| second_test
)
9163 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
9167 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
9168 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
9174 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
9175 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
9177 scratch
= gen_reg_rtx (HImode
);
9178 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
9179 emit_insn (gen_x86_sahf_1 (scratch
));
9182 /* The FP codes work out to act like unsigned. */
9183 intcmp_mode
= fpcmp_mode
;
9185 if (bypass_code
!= NIL
)
9186 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
9187 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
9189 if (second_code
!= NIL
)
9190 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
9191 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
9196 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9197 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
9198 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
9200 scratch
= gen_reg_rtx (HImode
);
9201 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
9203 /* In the unordered case, we have to check C2 for NaN's, which
9204 doesn't happen to work out to anything nice combination-wise.
9205 So do some bit twiddling on the value we've got in AH to come
9206 up with an appropriate set of condition codes. */
9208 intcmp_mode
= CCNOmode
;
9213 if (code
== GT
|| !TARGET_IEEE_FP
)
9215 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
9220 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9221 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
9222 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
9223 intcmp_mode
= CCmode
;
9229 if (code
== LT
&& TARGET_IEEE_FP
)
9231 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9232 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
9233 intcmp_mode
= CCmode
;
9238 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
9244 if (code
== GE
|| !TARGET_IEEE_FP
)
9246 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
9251 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9252 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
9259 if (code
== LE
&& TARGET_IEEE_FP
)
9261 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9262 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
9263 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
9264 intcmp_mode
= CCmode
;
9269 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
9275 if (code
== EQ
&& TARGET_IEEE_FP
)
9277 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9278 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
9279 intcmp_mode
= CCmode
;
9284 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
9291 if (code
== NE
&& TARGET_IEEE_FP
)
9293 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9294 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
9300 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
9306 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
9310 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
9319 /* Return the test that should be put into the flags user, i.e.
9320 the bcc, scc, or cmov instruction. */
9321 return gen_rtx_fmt_ee (code
, VOIDmode
,
9322 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
9327 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
9330 op0
= ix86_compare_op0
;
9331 op1
= ix86_compare_op1
;
9334 *second_test
= NULL_RTX
;
9336 *bypass_test
= NULL_RTX
;
9338 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
9339 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
9340 second_test
, bypass_test
);
9342 ret
= ix86_expand_int_compare (code
, op0
, op1
);
9347 /* Return true if the CODE will result in nontrivial jump sequence. */
9349 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
9351 enum rtx_code bypass_code
, first_code
, second_code
;
9354 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9355 return bypass_code
!= NIL
|| second_code
!= NIL
;
9359 ix86_expand_branch (enum rtx_code code
, rtx label
)
9363 switch (GET_MODE (ix86_compare_op0
))
9369 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
9370 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
9371 gen_rtx_LABEL_REF (VOIDmode
, label
),
9373 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
9382 enum rtx_code bypass_code
, first_code
, second_code
;
9384 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
9387 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9389 /* Check whether we will use the natural sequence with one jump. If
9390 so, we can expand jump early. Otherwise delay expansion by
9391 creating compound insn to not confuse optimizers. */
9392 if (bypass_code
== NIL
&& second_code
== NIL
9395 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
9396 gen_rtx_LABEL_REF (VOIDmode
, label
),
9401 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
9402 ix86_compare_op0
, ix86_compare_op1
);
9403 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
9404 gen_rtx_LABEL_REF (VOIDmode
, label
),
9406 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
9408 use_fcomi
= ix86_use_fcomi_compare (code
);
9409 vec
= rtvec_alloc (3 + !use_fcomi
);
9410 RTVEC_ELT (vec
, 0) = tmp
;
9412 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
9414 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
9417 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
9419 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
9427 /* Expand DImode branch into multiple compare+branch. */
9429 rtx lo
[2], hi
[2], label2
;
9430 enum rtx_code code1
, code2
, code3
;
9432 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
9434 tmp
= ix86_compare_op0
;
9435 ix86_compare_op0
= ix86_compare_op1
;
9436 ix86_compare_op1
= tmp
;
9437 code
= swap_condition (code
);
9439 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
9440 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
9442 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9443 avoid two branches. This costs one extra insn, so disable when
9444 optimizing for size. */
9446 if ((code
== EQ
|| code
== NE
)
9448 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
9453 if (hi
[1] != const0_rtx
)
9454 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
9455 NULL_RTX
, 0, OPTAB_WIDEN
);
9458 if (lo
[1] != const0_rtx
)
9459 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
9460 NULL_RTX
, 0, OPTAB_WIDEN
);
9462 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
9463 NULL_RTX
, 0, OPTAB_WIDEN
);
9465 ix86_compare_op0
= tmp
;
9466 ix86_compare_op1
= const0_rtx
;
9467 ix86_expand_branch (code
, label
);
9471 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9472 op1 is a constant and the low word is zero, then we can just
9473 examine the high word. */
9475 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
9478 case LT
: case LTU
: case GE
: case GEU
:
9479 ix86_compare_op0
= hi
[0];
9480 ix86_compare_op1
= hi
[1];
9481 ix86_expand_branch (code
, label
);
9487 /* Otherwise, we need two or three jumps. */
9489 label2
= gen_label_rtx ();
9492 code2
= swap_condition (code
);
9493 code3
= unsigned_condition (code
);
9497 case LT
: case GT
: case LTU
: case GTU
:
9500 case LE
: code1
= LT
; code2
= GT
; break;
9501 case GE
: code1
= GT
; code2
= LT
; break;
9502 case LEU
: code1
= LTU
; code2
= GTU
; break;
9503 case GEU
: code1
= GTU
; code2
= LTU
; break;
9505 case EQ
: code1
= NIL
; code2
= NE
; break;
9506 case NE
: code2
= NIL
; break;
9514 * if (hi(a) < hi(b)) goto true;
9515 * if (hi(a) > hi(b)) goto false;
9516 * if (lo(a) < lo(b)) goto true;
9520 ix86_compare_op0
= hi
[0];
9521 ix86_compare_op1
= hi
[1];
9524 ix86_expand_branch (code1
, label
);
9526 ix86_expand_branch (code2
, label2
);
9528 ix86_compare_op0
= lo
[0];
9529 ix86_compare_op1
= lo
[1];
9530 ix86_expand_branch (code3
, label
);
9533 emit_label (label2
);
9542 /* Split branch based on floating point condition. */
9544 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
9545 rtx target1
, rtx target2
, rtx tmp
)
9548 rtx label
= NULL_RTX
;
9550 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
9553 if (target2
!= pc_rtx
)
9556 code
= reverse_condition_maybe_unordered (code
);
9561 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
9562 tmp
, &second
, &bypass
);
9564 if (split_branch_probability
>= 0)
9566 /* Distribute the probabilities across the jumps.
9567 Assume the BYPASS and SECOND to be always test
9569 probability
= split_branch_probability
;
9571 /* Value of 1 is low enough to make no need for probability
9572 to be updated. Later we may run some experiments and see
9573 if unordered values are more frequent in practice. */
9575 bypass_probability
= 1;
9577 second_probability
= 1;
9579 if (bypass
!= NULL_RTX
)
9581 label
= gen_label_rtx ();
9582 i
= emit_jump_insn (gen_rtx_SET
9584 gen_rtx_IF_THEN_ELSE (VOIDmode
,
9586 gen_rtx_LABEL_REF (VOIDmode
,
9589 if (bypass_probability
>= 0)
9591 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9592 GEN_INT (bypass_probability
),
9595 i
= emit_jump_insn (gen_rtx_SET
9597 gen_rtx_IF_THEN_ELSE (VOIDmode
,
9598 condition
, target1
, target2
)));
9599 if (probability
>= 0)
9601 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9602 GEN_INT (probability
),
9604 if (second
!= NULL_RTX
)
9606 i
= emit_jump_insn (gen_rtx_SET
9608 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
9610 if (second_probability
>= 0)
9612 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9613 GEN_INT (second_probability
),
9616 if (label
!= NULL_RTX
)
9621 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
9623 rtx ret
, tmp
, tmpreg
, equiv
;
9624 rtx second_test
, bypass_test
;
9626 if (GET_MODE (ix86_compare_op0
) == DImode
9628 return 0; /* FAIL */
9630 if (GET_MODE (dest
) != QImode
)
9633 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9634 PUT_MODE (ret
, QImode
);
9639 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
9640 if (bypass_test
|| second_test
)
9642 rtx test
= second_test
;
9644 rtx tmp2
= gen_reg_rtx (QImode
);
9651 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
9653 PUT_MODE (test
, QImode
);
9654 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
9657 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
9659 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
9662 /* Attach a REG_EQUAL note describing the comparison result. */
9663 equiv
= simplify_gen_relational (code
, QImode
,
9664 GET_MODE (ix86_compare_op0
),
9665 ix86_compare_op0
, ix86_compare_op1
);
9666 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
9668 return 1; /* DONE */
9671 /* Expand comparison setting or clearing carry flag. Return true when
9672 successful and set pop for the operation. */
9674 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
9676 enum machine_mode mode
=
9677 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
9679 /* Do not handle DImode compares that go trought special path. Also we can't
9680 deal with FP compares yet. This is possible to add. */
9681 if ((mode
== DImode
&& !TARGET_64BIT
))
9683 if (FLOAT_MODE_P (mode
))
9685 rtx second_test
= NULL
, bypass_test
= NULL
;
9686 rtx compare_op
, compare_seq
;
9688 /* Shortcut: following common codes never translate into carry flag compares. */
9689 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
9690 || code
== ORDERED
|| code
== UNORDERED
)
9693 /* These comparisons require zero flag; swap operands so they won't. */
9694 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
9700 code
= swap_condition (code
);
9703 /* Try to expand the comparison and verify that we end up with carry flag
9704 based comparison. This is fails to be true only when we decide to expand
9705 comparison using arithmetic that is not too common scenario. */
9707 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
9708 &second_test
, &bypass_test
);
9709 compare_seq
= get_insns ();
9712 if (second_test
|| bypass_test
)
9714 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
9715 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
9716 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
9718 code
= GET_CODE (compare_op
);
9719 if (code
!= LTU
&& code
!= GEU
)
9721 emit_insn (compare_seq
);
9725 if (!INTEGRAL_MODE_P (mode
))
9733 /* Convert a==0 into (unsigned)a<1. */
9736 if (op1
!= const0_rtx
)
9739 code
= (code
== EQ
? LTU
: GEU
);
9742 /* Convert a>b into b<a or a>=b-1. */
9745 if (GET_CODE (op1
) == CONST_INT
)
9747 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
9748 /* Bail out on overflow. We still can swap operands but that
9749 would force loading of the constant into register. */
9750 if (op1
== const0_rtx
9751 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
9753 code
= (code
== GTU
? GEU
: LTU
);
9760 code
= (code
== GTU
? LTU
: GEU
);
9764 /* Convert a>=0 into (unsigned)a<0x80000000. */
9767 if (mode
== DImode
|| op1
!= const0_rtx
)
9769 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
9770 code
= (code
== LT
? GEU
: LTU
);
9774 if (mode
== DImode
|| op1
!= constm1_rtx
)
9776 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
9777 code
= (code
== LE
? GEU
: LTU
);
9783 /* Swapping operands may cause constant to appear as first operand. */
9784 if (!nonimmediate_operand (op0
, VOIDmode
))
9788 op0
= force_reg (mode
, op0
);
9790 ix86_compare_op0
= op0
;
9791 ix86_compare_op1
= op1
;
9792 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
9793 if (GET_CODE (*pop
) != LTU
&& GET_CODE (*pop
) != GEU
)
9799 ix86_expand_int_movcc (rtx operands
[])
9801 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
9802 rtx compare_seq
, compare_op
;
9803 rtx second_test
, bypass_test
;
9804 enum machine_mode mode
= GET_MODE (operands
[0]);
9805 bool sign_bit_compare_p
= false;;
9808 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9809 compare_seq
= get_insns ();
9812 compare_code
= GET_CODE (compare_op
);
9814 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
9815 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
9816 sign_bit_compare_p
= true;
9818 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9819 HImode insns, we'd be swallowed in word prefix ops. */
9821 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
9822 && (mode
!= DImode
|| TARGET_64BIT
)
9823 && GET_CODE (operands
[2]) == CONST_INT
9824 && GET_CODE (operands
[3]) == CONST_INT
)
9826 rtx out
= operands
[0];
9827 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
9828 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
9832 /* Sign bit compares are better done using shifts than we do by using
9834 if (sign_bit_compare_p
9835 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
9836 ix86_compare_op1
, &compare_op
))
9838 /* Detect overlap between destination and compare sources. */
9841 if (!sign_bit_compare_p
)
9845 compare_code
= GET_CODE (compare_op
);
9847 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
9848 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
9851 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
9854 /* To simplify rest of code, restrict to the GEU case. */
9855 if (compare_code
== LTU
)
9857 HOST_WIDE_INT tmp
= ct
;
9860 compare_code
= reverse_condition (compare_code
);
9861 code
= reverse_condition (code
);
9866 PUT_CODE (compare_op
,
9867 reverse_condition_maybe_unordered
9868 (GET_CODE (compare_op
)));
9870 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
9874 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
9875 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
9876 tmp
= gen_reg_rtx (mode
);
9879 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
9881 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
9885 if (code
== GT
|| code
== GE
)
9886 code
= reverse_condition (code
);
9889 HOST_WIDE_INT tmp
= ct
;
9894 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
9895 ix86_compare_op1
, VOIDmode
, 0, -1);
9908 tmp
= expand_simple_binop (mode
, PLUS
,
9910 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9921 tmp
= expand_simple_binop (mode
, IOR
,
9923 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9925 else if (diff
== -1 && ct
)
9935 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
9937 tmp
= expand_simple_binop (mode
, PLUS
,
9938 copy_rtx (tmp
), GEN_INT (cf
),
9939 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9947 * andl cf - ct, dest
9957 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
9960 tmp
= expand_simple_binop (mode
, AND
,
9962 gen_int_mode (cf
- ct
, mode
),
9963 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9965 tmp
= expand_simple_binop (mode
, PLUS
,
9966 copy_rtx (tmp
), GEN_INT (ct
),
9967 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9970 if (!rtx_equal_p (tmp
, out
))
9971 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
9973 return 1; /* DONE */
9979 tmp
= ct
, ct
= cf
, cf
= tmp
;
9981 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
9983 /* We may be reversing unordered compare to normal compare, that
9984 is not valid in general (we may convert non-trapping condition
9985 to trapping one), however on i386 we currently emit all
9986 comparisons unordered. */
9987 compare_code
= reverse_condition_maybe_unordered (compare_code
);
9988 code
= reverse_condition_maybe_unordered (code
);
9992 compare_code
= reverse_condition (compare_code
);
9993 code
= reverse_condition (code
);
9998 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
9999 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
10001 if (ix86_compare_op1
== const0_rtx
10002 && (code
== LT
|| code
== GE
))
10003 compare_code
= code
;
10004 else if (ix86_compare_op1
== constm1_rtx
)
10008 else if (code
== GT
)
10013 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10014 if (compare_code
!= NIL
10015 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
10016 && (cf
== -1 || ct
== -1))
10018 /* If lea code below could be used, only optimize
10019 if it results in a 2 insn sequence. */
10021 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
10022 || diff
== 3 || diff
== 5 || diff
== 9)
10023 || (compare_code
== LT
&& ct
== -1)
10024 || (compare_code
== GE
&& cf
== -1))
10027 * notl op1 (if necessary)
10035 code
= reverse_condition (code
);
10038 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
10039 ix86_compare_op1
, VOIDmode
, 0, -1);
10041 out
= expand_simple_binop (mode
, IOR
,
10043 out
, 1, OPTAB_DIRECT
);
10044 if (out
!= operands
[0])
10045 emit_move_insn (operands
[0], out
);
10047 return 1; /* DONE */
10052 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
10053 || diff
== 3 || diff
== 5 || diff
== 9)
10054 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
10055 && (mode
!= DImode
|| x86_64_sign_extended_value (GEN_INT (cf
))))
10061 * lea cf(dest*(ct-cf)),dest
10065 * This also catches the degenerate setcc-only case.
10071 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
10072 ix86_compare_op1
, VOIDmode
, 0, 1);
10075 /* On x86_64 the lea instruction operates on Pmode, so we need
10076 to get arithmetics done in proper mode to match. */
10078 tmp
= copy_rtx (out
);
10082 out1
= copy_rtx (out
);
10083 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
10087 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
10093 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
10096 if (!rtx_equal_p (tmp
, out
))
10099 out
= force_operand (tmp
, copy_rtx (out
));
10101 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
10103 if (!rtx_equal_p (out
, operands
[0]))
10104 emit_move_insn (operands
[0], copy_rtx (out
));
10106 return 1; /* DONE */
10110 * General case: Jumpful:
10111 * xorl dest,dest cmpl op1, op2
10112 * cmpl op1, op2 movl ct, dest
10113 * setcc dest jcc 1f
10114 * decl dest movl cf, dest
10115 * andl (cf-ct),dest 1:
10118 * Size 20. Size 14.
10120 * This is reasonably steep, but branch mispredict costs are
10121 * high on modern cpus, so consider failing only if optimizing
10125 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
10126 && BRANCH_COST
>= 2)
10132 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
10133 /* We may be reversing unordered compare to normal compare,
10134 that is not valid in general (we may convert non-trapping
10135 condition to trapping one), however on i386 we currently
10136 emit all comparisons unordered. */
10137 code
= reverse_condition_maybe_unordered (code
);
10140 code
= reverse_condition (code
);
10141 if (compare_code
!= NIL
)
10142 compare_code
= reverse_condition (compare_code
);
10146 if (compare_code
!= NIL
)
10148 /* notl op1 (if needed)
10153 For x < 0 (resp. x <= -1) there will be no notl,
10154 so if possible swap the constants to get rid of the
10156 True/false will be -1/0 while code below (store flag
10157 followed by decrement) is 0/-1, so the constants need
10158 to be exchanged once more. */
10160 if (compare_code
== GE
|| !cf
)
10162 code
= reverse_condition (code
);
10167 HOST_WIDE_INT tmp
= cf
;
10172 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
10173 ix86_compare_op1
, VOIDmode
, 0, -1);
10177 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
10178 ix86_compare_op1
, VOIDmode
, 0, 1);
10180 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
10181 copy_rtx (out
), 1, OPTAB_DIRECT
);
10184 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
10185 gen_int_mode (cf
- ct
, mode
),
10186 copy_rtx (out
), 1, OPTAB_DIRECT
);
10188 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
10189 copy_rtx (out
), 1, OPTAB_DIRECT
);
10190 if (!rtx_equal_p (out
, operands
[0]))
10191 emit_move_insn (operands
[0], copy_rtx (out
));
10193 return 1; /* DONE */
10197 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
10199 /* Try a few things more with specific constants and a variable. */
10202 rtx var
, orig_out
, out
, tmp
;
10204 if (BRANCH_COST
<= 2)
10205 return 0; /* FAIL */
10207 /* If one of the two operands is an interesting constant, load a
10208 constant with the above and mask it in with a logical operation. */
10210 if (GET_CODE (operands
[2]) == CONST_INT
)
10213 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
10214 operands
[3] = constm1_rtx
, op
= and_optab
;
10215 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
10216 operands
[3] = const0_rtx
, op
= ior_optab
;
10218 return 0; /* FAIL */
10220 else if (GET_CODE (operands
[3]) == CONST_INT
)
10223 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
10224 operands
[2] = constm1_rtx
, op
= and_optab
;
10225 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
10226 operands
[2] = const0_rtx
, op
= ior_optab
;
10228 return 0; /* FAIL */
10231 return 0; /* FAIL */
10233 orig_out
= operands
[0];
10234 tmp
= gen_reg_rtx (mode
);
10237 /* Recurse to get the constant loaded. */
10238 if (ix86_expand_int_movcc (operands
) == 0)
10239 return 0; /* FAIL */
10241 /* Mask in the interesting variable. */
10242 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
10244 if (!rtx_equal_p (out
, orig_out
))
10245 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
10247 return 1; /* DONE */
10251 * For comparison with above,
10261 if (! nonimmediate_operand (operands
[2], mode
))
10262 operands
[2] = force_reg (mode
, operands
[2]);
10263 if (! nonimmediate_operand (operands
[3], mode
))
10264 operands
[3] = force_reg (mode
, operands
[3]);
10266 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
10268 rtx tmp
= gen_reg_rtx (mode
);
10269 emit_move_insn (tmp
, operands
[3]);
10272 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
10274 rtx tmp
= gen_reg_rtx (mode
);
10275 emit_move_insn (tmp
, operands
[2]);
10279 if (! register_operand (operands
[2], VOIDmode
)
10281 || ! register_operand (operands
[3], VOIDmode
)))
10282 operands
[2] = force_reg (mode
, operands
[2]);
10285 && ! register_operand (operands
[3], VOIDmode
))
10286 operands
[3] = force_reg (mode
, operands
[3]);
10288 emit_insn (compare_seq
);
10289 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10290 gen_rtx_IF_THEN_ELSE (mode
,
10291 compare_op
, operands
[2],
10294 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
10295 gen_rtx_IF_THEN_ELSE (mode
,
10297 copy_rtx (operands
[3]),
10298 copy_rtx (operands
[0]))));
10300 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
10301 gen_rtx_IF_THEN_ELSE (mode
,
10303 copy_rtx (operands
[2]),
10304 copy_rtx (operands
[0]))));
10306 return 1; /* DONE */
10310 ix86_expand_fp_movcc (rtx operands
[])
10312 enum rtx_code code
;
10314 rtx compare_op
, second_test
, bypass_test
;
10316 /* For SF/DFmode conditional moves based on comparisons
10317 in same mode, we may want to use SSE min/max instructions. */
10318 if (((TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == SFmode
)
10319 || (TARGET_SSE2
&& TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == DFmode
))
10320 && GET_MODE (ix86_compare_op0
) == GET_MODE (operands
[0])
10321 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10322 && (!TARGET_IEEE_FP
10323 || (GET_CODE (operands
[1]) != LTGT
&& GET_CODE (operands
[1]) != UNEQ
))
10324 /* We may be called from the post-reload splitter. */
10325 && (!REG_P (operands
[0])
10326 || SSE_REG_P (operands
[0])
10327 || REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
))
10329 rtx op0
= ix86_compare_op0
, op1
= ix86_compare_op1
;
10330 code
= GET_CODE (operands
[1]);
10332 /* See if we have (cross) match between comparison operands and
10333 conditional move operands. */
10334 if (rtx_equal_p (operands
[2], op1
))
10339 code
= reverse_condition_maybe_unordered (code
);
10341 if (rtx_equal_p (operands
[2], op0
) && rtx_equal_p (operands
[3], op1
))
10343 /* Check for min operation. */
10344 if (code
== LT
|| code
== UNLE
)
10352 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
10353 if (memory_operand (op0
, VOIDmode
))
10354 op0
= force_reg (GET_MODE (operands
[0]), op0
);
10355 if (GET_MODE (operands
[0]) == SFmode
)
10356 emit_insn (gen_minsf3 (operands
[0], op0
, op1
));
10358 emit_insn (gen_mindf3 (operands
[0], op0
, op1
));
10361 /* Check for max operation. */
10362 if (code
== GT
|| code
== UNGE
)
10370 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
10371 if (memory_operand (op0
, VOIDmode
))
10372 op0
= force_reg (GET_MODE (operands
[0]), op0
);
10373 if (GET_MODE (operands
[0]) == SFmode
)
10374 emit_insn (gen_maxsf3 (operands
[0], op0
, op1
));
10376 emit_insn (gen_maxdf3 (operands
[0], op0
, op1
));
10380 /* Manage condition to be sse_comparison_operator. In case we are
10381 in non-ieee mode, try to canonicalize the destination operand
10382 to be first in the comparison - this helps reload to avoid extra
10384 if (!sse_comparison_operator (operands
[1], VOIDmode
)
10385 || (rtx_equal_p (operands
[0], ix86_compare_op1
) && !TARGET_IEEE_FP
))
10387 rtx tmp
= ix86_compare_op0
;
10388 ix86_compare_op0
= ix86_compare_op1
;
10389 ix86_compare_op1
= tmp
;
10390 operands
[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands
[1])),
10391 VOIDmode
, ix86_compare_op0
,
10394 /* Similarly try to manage result to be first operand of conditional
10395 move. We also don't support the NE comparison on SSE, so try to
10397 if ((rtx_equal_p (operands
[0], operands
[3])
10398 && (!TARGET_IEEE_FP
|| GET_CODE (operands
[1]) != EQ
))
10399 || (GET_CODE (operands
[1]) == NE
&& TARGET_IEEE_FP
))
10401 rtx tmp
= operands
[2];
10402 operands
[2] = operands
[3];
10404 operands
[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10405 (GET_CODE (operands
[1])),
10406 VOIDmode
, ix86_compare_op0
,
10409 if (GET_MODE (operands
[0]) == SFmode
)
10410 emit_insn (gen_sse_movsfcc (operands
[0], operands
[1],
10411 operands
[2], operands
[3],
10412 ix86_compare_op0
, ix86_compare_op1
));
10414 emit_insn (gen_sse_movdfcc (operands
[0], operands
[1],
10415 operands
[2], operands
[3],
10416 ix86_compare_op0
, ix86_compare_op1
));
10420 /* The floating point conditional move instructions don't directly
10421 support conditions resulting from a signed integer comparison. */
10423 code
= GET_CODE (operands
[1]);
10424 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10426 /* The floating point conditional move instructions don't directly
10427 support signed integer comparisons. */
10429 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
10431 if (second_test
!= NULL
|| bypass_test
!= NULL
)
10433 tmp
= gen_reg_rtx (QImode
);
10434 ix86_expand_setcc (code
, tmp
);
10436 ix86_compare_op0
= tmp
;
10437 ix86_compare_op1
= const0_rtx
;
10438 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10440 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
10442 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
10443 emit_move_insn (tmp
, operands
[3]);
10446 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
10448 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
10449 emit_move_insn (tmp
, operands
[2]);
10453 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10454 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
10459 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10460 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
10465 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10466 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
10474 /* Expand conditional increment or decrement using adb/sbb instructions.
10475 The default case using setcc followed by the conditional move can be
10476 done by generic code. */
10478 ix86_expand_int_addcc (rtx operands
[])
10480 enum rtx_code code
= GET_CODE (operands
[1]);
10482 rtx val
= const0_rtx
;
10483 bool fpcmp
= false;
10484 enum machine_mode mode
= GET_MODE (operands
[0]);
10486 if (operands
[3] != const1_rtx
10487 && operands
[3] != constm1_rtx
)
10489 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
10490 ix86_compare_op1
, &compare_op
))
10492 code
= GET_CODE (compare_op
);
10494 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
10495 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
10498 code
= ix86_fp_compare_code_to_integer (code
);
10505 PUT_CODE (compare_op
,
10506 reverse_condition_maybe_unordered
10507 (GET_CODE (compare_op
)));
10509 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
10511 PUT_MODE (compare_op
, mode
);
10513 /* Construct either adc or sbb insn. */
10514 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
10516 switch (GET_MODE (operands
[0]))
10519 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
10522 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
10525 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
10528 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
10536 switch (GET_MODE (operands
[0]))
10539 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
10542 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
10545 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
10548 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
10554 return 1; /* DONE */
10558 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10559 works for floating pointer parameters and nonoffsetable memories.
10560 For pushes, it returns just stack offsets; the values will be saved
10561 in the right order. Maximally three parts are generated. */
10564 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
10569 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
10571 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
10573 if (GET_CODE (operand
) == REG
&& MMX_REGNO_P (REGNO (operand
)))
10575 if (size
< 2 || size
> 3)
10578 /* Optimize constant pool reference to immediates. This is used by fp
10579 moves, that force all constants to memory to allow combining. */
10580 if (GET_CODE (operand
) == MEM
&& RTX_UNCHANGING_P (operand
))
10582 rtx tmp
= maybe_get_pool_constant (operand
);
10587 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
10589 /* The only non-offsetable memories we handle are pushes. */
10590 if (! push_operand (operand
, VOIDmode
))
10593 operand
= copy_rtx (operand
);
10594 PUT_MODE (operand
, Pmode
);
10595 parts
[0] = parts
[1] = parts
[2] = operand
;
10597 else if (!TARGET_64BIT
)
10599 if (mode
== DImode
)
10600 split_di (&operand
, 1, &parts
[0], &parts
[1]);
10603 if (REG_P (operand
))
10605 if (!reload_completed
)
10607 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
10608 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
10610 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
10612 else if (offsettable_memref_p (operand
))
10614 operand
= adjust_address (operand
, SImode
, 0);
10615 parts
[0] = operand
;
10616 parts
[1] = adjust_address (operand
, SImode
, 4);
10618 parts
[2] = adjust_address (operand
, SImode
, 8);
10620 else if (GET_CODE (operand
) == CONST_DOUBLE
)
10625 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
10629 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
10630 parts
[2] = gen_int_mode (l
[2], SImode
);
10633 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
10638 parts
[1] = gen_int_mode (l
[1], SImode
);
10639 parts
[0] = gen_int_mode (l
[0], SImode
);
10647 if (mode
== TImode
)
10648 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
10649 if (mode
== XFmode
|| mode
== TFmode
)
10651 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
10652 if (REG_P (operand
))
10654 if (!reload_completed
)
10656 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
10657 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
10659 else if (offsettable_memref_p (operand
))
10661 operand
= adjust_address (operand
, DImode
, 0);
10662 parts
[0] = operand
;
10663 parts
[1] = adjust_address (operand
, upper_mode
, 8);
10665 else if (GET_CODE (operand
) == CONST_DOUBLE
)
10670 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
10671 real_to_target (l
, &r
, mode
);
10672 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10673 if (HOST_BITS_PER_WIDE_INT
>= 64)
10676 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
10677 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
10680 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
10681 if (upper_mode
== SImode
)
10682 parts
[1] = gen_int_mode (l
[2], SImode
);
10683 else if (HOST_BITS_PER_WIDE_INT
>= 64)
10686 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
10687 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
10690 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
10700 /* Emit insns to perform a move or push of DI, DF, and XF values.
10701 Return false when normal moves are needed; true when all required
10702 insns have been emitted. Operands 2-4 contain the input values
10703 int the correct order; operands 5-7 contain the output values. */
10706 ix86_split_long_move (rtx operands
[])
10711 int collisions
= 0;
10712 enum machine_mode mode
= GET_MODE (operands
[0]);
10714 /* The DFmode expanders may ask us to move double.
10715 For 64bit target this is single move. By hiding the fact
10716 here we simplify i386.md splitters. */
10717 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
10719 /* Optimize constant pool reference to immediates. This is used by
10720 fp moves, that force all constants to memory to allow combining. */
10722 if (GET_CODE (operands
[1]) == MEM
10723 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
10724 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
10725 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
10726 if (push_operand (operands
[0], VOIDmode
))
10728 operands
[0] = copy_rtx (operands
[0]);
10729 PUT_MODE (operands
[0], Pmode
);
10732 operands
[0] = gen_lowpart (DImode
, operands
[0]);
10733 operands
[1] = gen_lowpart (DImode
, operands
[1]);
10734 emit_move_insn (operands
[0], operands
[1]);
10738 /* The only non-offsettable memory we handle is push. */
10739 if (push_operand (operands
[0], VOIDmode
))
10741 else if (GET_CODE (operands
[0]) == MEM
10742 && ! offsettable_memref_p (operands
[0]))
10745 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
10746 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
10748 /* When emitting push, take care for source operands on the stack. */
10749 if (push
&& GET_CODE (operands
[1]) == MEM
10750 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
10753 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
10754 XEXP (part
[1][2], 0));
10755 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
10756 XEXP (part
[1][1], 0));
10759 /* We need to do copy in the right order in case an address register
10760 of the source overlaps the destination. */
10761 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
10763 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
10765 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
10768 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
10771 /* Collision in the middle part can be handled by reordering. */
10772 if (collisions
== 1 && nparts
== 3
10773 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
10776 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
10777 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
10780 /* If there are more collisions, we can't handle it by reordering.
10781 Do an lea to the last part and use only one colliding move. */
10782 else if (collisions
> 1)
10788 base
= part
[0][nparts
- 1];
10790 /* Handle the case when the last part isn't valid for lea.
10791 Happens in 64-bit mode storing the 12-byte XFmode. */
10792 if (GET_MODE (base
) != Pmode
)
10793 base
= gen_rtx_REG (Pmode
, REGNO (base
));
10795 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
10796 part
[1][0] = replace_equiv_address (part
[1][0], base
);
10797 part
[1][1] = replace_equiv_address (part
[1][1],
10798 plus_constant (base
, UNITS_PER_WORD
));
10800 part
[1][2] = replace_equiv_address (part
[1][2],
10801 plus_constant (base
, 8));
10811 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
10812 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
10813 emit_move_insn (part
[0][2], part
[1][2]);
10818 /* In 64bit mode we don't have 32bit push available. In case this is
10819 register, it is OK - we will just use larger counterpart. We also
10820 retype memory - these comes from attempt to avoid REX prefix on
10821 moving of second half of TFmode value. */
10822 if (GET_MODE (part
[1][1]) == SImode
)
10824 if (GET_CODE (part
[1][1]) == MEM
)
10825 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
10826 else if (REG_P (part
[1][1]))
10827 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
10830 if (GET_MODE (part
[1][0]) == SImode
)
10831 part
[1][0] = part
[1][1];
10834 emit_move_insn (part
[0][1], part
[1][1]);
10835 emit_move_insn (part
[0][0], part
[1][0]);
10839 /* Choose correct order to not overwrite the source before it is copied. */
10840 if ((REG_P (part
[0][0])
10841 && REG_P (part
[1][1])
10842 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
10844 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
10846 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
10850 operands
[2] = part
[0][2];
10851 operands
[3] = part
[0][1];
10852 operands
[4] = part
[0][0];
10853 operands
[5] = part
[1][2];
10854 operands
[6] = part
[1][1];
10855 operands
[7] = part
[1][0];
10859 operands
[2] = part
[0][1];
10860 operands
[3] = part
[0][0];
10861 operands
[5] = part
[1][1];
10862 operands
[6] = part
[1][0];
10869 operands
[2] = part
[0][0];
10870 operands
[3] = part
[0][1];
10871 operands
[4] = part
[0][2];
10872 operands
[5] = part
[1][0];
10873 operands
[6] = part
[1][1];
10874 operands
[7] = part
[1][2];
10878 operands
[2] = part
[0][0];
10879 operands
[3] = part
[0][1];
10880 operands
[5] = part
[1][0];
10881 operands
[6] = part
[1][1];
10884 emit_move_insn (operands
[2], operands
[5]);
10885 emit_move_insn (operands
[3], operands
[6]);
10887 emit_move_insn (operands
[4], operands
[7]);
10893 ix86_split_ashldi (rtx
*operands
, rtx scratch
)
10895 rtx low
[2], high
[2];
10898 if (GET_CODE (operands
[2]) == CONST_INT
)
10900 split_di (operands
, 2, low
, high
);
10901 count
= INTVAL (operands
[2]) & 63;
10905 emit_move_insn (high
[0], low
[1]);
10906 emit_move_insn (low
[0], const0_rtx
);
10909 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
10913 if (!rtx_equal_p (operands
[0], operands
[1]))
10914 emit_move_insn (operands
[0], operands
[1]);
10915 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
10916 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
10921 if (!rtx_equal_p (operands
[0], operands
[1]))
10922 emit_move_insn (operands
[0], operands
[1]);
10924 split_di (operands
, 1, low
, high
);
10926 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
10927 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
10929 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
10931 if (! no_new_pseudos
)
10932 scratch
= force_reg (SImode
, const0_rtx
);
10934 emit_move_insn (scratch
, const0_rtx
);
10936 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
10940 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
10945 ix86_split_ashrdi (rtx
*operands
, rtx scratch
)
10947 rtx low
[2], high
[2];
10950 if (GET_CODE (operands
[2]) == CONST_INT
)
10952 split_di (operands
, 2, low
, high
);
10953 count
= INTVAL (operands
[2]) & 63;
10957 emit_move_insn (high
[0], high
[1]);
10958 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
10959 emit_move_insn (low
[0], high
[0]);
10962 else if (count
>= 32)
10964 emit_move_insn (low
[0], high
[1]);
10966 if (! reload_completed
)
10967 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
10970 emit_move_insn (high
[0], low
[0]);
10971 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
10975 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
10979 if (!rtx_equal_p (operands
[0], operands
[1]))
10980 emit_move_insn (operands
[0], operands
[1]);
10981 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
10982 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
10987 if (!rtx_equal_p (operands
[0], operands
[1]))
10988 emit_move_insn (operands
[0], operands
[1]);
10990 split_di (operands
, 1, low
, high
);
10992 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
10993 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
10995 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
10997 if (! no_new_pseudos
)
10998 scratch
= gen_reg_rtx (SImode
);
10999 emit_move_insn (scratch
, high
[0]);
11000 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
11001 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
11005 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
11010 ix86_split_lshrdi (rtx
*operands
, rtx scratch
)
11012 rtx low
[2], high
[2];
11015 if (GET_CODE (operands
[2]) == CONST_INT
)
11017 split_di (operands
, 2, low
, high
);
11018 count
= INTVAL (operands
[2]) & 63;
11022 emit_move_insn (low
[0], high
[1]);
11023 emit_move_insn (high
[0], const0_rtx
);
11026 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
11030 if (!rtx_equal_p (operands
[0], operands
[1]))
11031 emit_move_insn (operands
[0], operands
[1]);
11032 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
11033 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
11038 if (!rtx_equal_p (operands
[0], operands
[1]))
11039 emit_move_insn (operands
[0], operands
[1]);
11041 split_di (operands
, 1, low
, high
);
11043 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
11044 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
11046 /* Heh. By reversing the arguments, we can reuse this pattern. */
11047 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
11049 if (! no_new_pseudos
)
11050 scratch
= force_reg (SImode
, const0_rtx
);
11052 emit_move_insn (scratch
, const0_rtx
);
11054 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
11058 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
11062 /* Helper function for the string operations below. Dest VARIABLE whether
11063 it is aligned to VALUE bytes. If true, jump to the label. */
11065 ix86_expand_aligntest (rtx variable
, int value
)
11067 rtx label
= gen_label_rtx ();
11068 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
11069 if (GET_MODE (variable
) == DImode
)
11070 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
11072 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
11073 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
11078 /* Adjust COUNTER by the VALUE. */
11080 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
11082 if (GET_MODE (countreg
) == DImode
)
11083 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
11085 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
11088 /* Zero extend possibly SImode EXP to Pmode register. */
11090 ix86_zero_extend_to_Pmode (rtx exp
)
11093 if (GET_MODE (exp
) == VOIDmode
)
11094 return force_reg (Pmode
, exp
);
11095 if (GET_MODE (exp
) == Pmode
)
11096 return copy_to_mode_reg (Pmode
, exp
);
11097 r
= gen_reg_rtx (Pmode
);
11098 emit_insn (gen_zero_extendsidi2 (r
, exp
));
11102 /* Expand string move (memcpy) operation. Use i386 string operations when
11103 profitable. expand_clrmem contains similar code. */
11105 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
)
11107 rtx srcreg
, destreg
, countreg
, srcexp
, destexp
;
11108 enum machine_mode counter_mode
;
11109 HOST_WIDE_INT align
= 0;
11110 unsigned HOST_WIDE_INT count
= 0;
11112 if (GET_CODE (align_exp
) == CONST_INT
)
11113 align
= INTVAL (align_exp
);
11115 /* Can't use any of this if the user has appropriated esi or edi. */
11116 if (global_regs
[4] || global_regs
[5])
11119 /* This simple hack avoids all inlining code and simplifies code below. */
11120 if (!TARGET_ALIGN_STRINGOPS
)
11123 if (GET_CODE (count_exp
) == CONST_INT
)
11125 count
= INTVAL (count_exp
);
11126 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
11130 /* Figure out proper mode for counter. For 32bits it is always SImode,
11131 for 64bits use SImode when possible, otherwise DImode.
11132 Set count to number of bytes copied when known at compile time. */
11133 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
11134 || x86_64_zero_extended_value (count_exp
))
11135 counter_mode
= SImode
;
11137 counter_mode
= DImode
;
11139 if (counter_mode
!= SImode
&& counter_mode
!= DImode
)
11142 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
11143 if (destreg
!= XEXP (dst
, 0))
11144 dst
= replace_equiv_address_nv (dst
, destreg
);
11145 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
11146 if (srcreg
!= XEXP (src
, 0))
11147 src
= replace_equiv_address_nv (src
, srcreg
);
11149 /* When optimizing for size emit simple rep ; movsb instruction for
11150 counts not divisible by 4. */
11152 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
11154 emit_insn (gen_cld ());
11155 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
11156 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
11157 srcexp
= gen_rtx_PLUS (Pmode
, srcreg
, countreg
);
11158 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
, countreg
,
11162 /* For constant aligned (or small unaligned) copies use rep movsl
11163 followed by code copying the rest. For PentiumPro ensure 8 byte
11164 alignment to allow rep movsl acceleration. */
11166 else if (count
!= 0
11168 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
11169 || optimize_size
|| count
< (unsigned int) 64))
11171 unsigned HOST_WIDE_INT offset
= 0;
11172 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
11173 rtx srcmem
, dstmem
;
11175 emit_insn (gen_cld ());
11176 if (count
& ~(size
- 1))
11178 countreg
= copy_to_mode_reg (counter_mode
,
11179 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
11180 & (TARGET_64BIT
? -1 : 0x3fffffff)));
11181 countreg
= ix86_zero_extend_to_Pmode (countreg
);
11183 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
11184 GEN_INT (size
== 4 ? 2 : 3));
11185 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
11186 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11188 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
11189 countreg
, destexp
, srcexp
));
11190 offset
= count
& ~(size
- 1);
11192 if (size
== 8 && (count
& 0x04))
11194 srcmem
= adjust_automodify_address_nv (src
, SImode
, srcreg
,
11196 dstmem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
11198 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11203 srcmem
= adjust_automodify_address_nv (src
, HImode
, srcreg
,
11205 dstmem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
11207 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11212 srcmem
= adjust_automodify_address_nv (src
, QImode
, srcreg
,
11214 dstmem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
11216 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11219 /* The generic code based on the glibc implementation:
11220 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11221 allowing accelerated copying there)
11222 - copy the data using rep movsl
11223 - copy the rest. */
11228 rtx srcmem
, dstmem
;
11229 int desired_alignment
= (TARGET_PENTIUMPRO
11230 && (count
== 0 || count
>= (unsigned int) 260)
11231 ? 8 : UNITS_PER_WORD
);
11232 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11233 dst
= change_address (dst
, BLKmode
, destreg
);
11234 src
= change_address (src
, BLKmode
, srcreg
);
11236 /* In case we don't know anything about the alignment, default to
11237 library version, since it is usually equally fast and result in
11240 Also emit call when we know that the count is large and call overhead
11241 will not be important. */
11242 if (!TARGET_INLINE_ALL_STRINGOPS
11243 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
11246 if (TARGET_SINGLE_STRINGOP
)
11247 emit_insn (gen_cld ());
11249 countreg2
= gen_reg_rtx (Pmode
);
11250 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
11252 /* We don't use loops to align destination and to copy parts smaller
11253 than 4 bytes, because gcc is able to optimize such code better (in
11254 the case the destination or the count really is aligned, gcc is often
11255 able to predict the branches) and also it is friendlier to the
11256 hardware branch prediction.
11258 Using loops is beneficial for generic case, because we can
11259 handle small counts using the loops. Many CPUs (such as Athlon)
11260 have large REP prefix setup costs.
11262 This is quite costly. Maybe we can revisit this decision later or
11263 add some customizability to this code. */
11265 if (count
== 0 && align
< desired_alignment
)
11267 label
= gen_label_rtx ();
11268 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
11269 LEU
, 0, counter_mode
, 1, label
);
11273 rtx label
= ix86_expand_aligntest (destreg
, 1);
11274 srcmem
= change_address (src
, QImode
, srcreg
);
11275 dstmem
= change_address (dst
, QImode
, destreg
);
11276 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11277 ix86_adjust_counter (countreg
, 1);
11278 emit_label (label
);
11279 LABEL_NUSES (label
) = 1;
11283 rtx label
= ix86_expand_aligntest (destreg
, 2);
11284 srcmem
= change_address (src
, HImode
, srcreg
);
11285 dstmem
= change_address (dst
, HImode
, destreg
);
11286 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11287 ix86_adjust_counter (countreg
, 2);
11288 emit_label (label
);
11289 LABEL_NUSES (label
) = 1;
11291 if (align
<= 4 && desired_alignment
> 4)
11293 rtx label
= ix86_expand_aligntest (destreg
, 4);
11294 srcmem
= change_address (src
, SImode
, srcreg
);
11295 dstmem
= change_address (dst
, SImode
, destreg
);
11296 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11297 ix86_adjust_counter (countreg
, 4);
11298 emit_label (label
);
11299 LABEL_NUSES (label
) = 1;
11302 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
11304 emit_label (label
);
11305 LABEL_NUSES (label
) = 1;
11308 if (!TARGET_SINGLE_STRINGOP
)
11309 emit_insn (gen_cld ());
11312 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
11314 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
11318 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
11319 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
11321 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
11322 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11323 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
11324 countreg2
, destexp
, srcexp
));
11328 emit_label (label
);
11329 LABEL_NUSES (label
) = 1;
11331 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
11333 srcmem
= change_address (src
, SImode
, srcreg
);
11334 dstmem
= change_address (dst
, SImode
, destreg
);
11335 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11337 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
11339 rtx label
= ix86_expand_aligntest (countreg
, 4);
11340 srcmem
= change_address (src
, SImode
, srcreg
);
11341 dstmem
= change_address (dst
, SImode
, destreg
);
11342 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11343 emit_label (label
);
11344 LABEL_NUSES (label
) = 1;
11346 if (align
> 2 && count
!= 0 && (count
& 2))
11348 srcmem
= change_address (src
, HImode
, srcreg
);
11349 dstmem
= change_address (dst
, HImode
, destreg
);
11350 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11352 if (align
<= 2 || count
== 0)
11354 rtx label
= ix86_expand_aligntest (countreg
, 2);
11355 srcmem
= change_address (src
, HImode
, srcreg
);
11356 dstmem
= change_address (dst
, HImode
, destreg
);
11357 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11358 emit_label (label
);
11359 LABEL_NUSES (label
) = 1;
11361 if (align
> 1 && count
!= 0 && (count
& 1))
11363 srcmem
= change_address (src
, QImode
, srcreg
);
11364 dstmem
= change_address (dst
, QImode
, destreg
);
11365 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11367 if (align
<= 1 || count
== 0)
11369 rtx label
= ix86_expand_aligntest (countreg
, 1);
11370 srcmem
= change_address (src
, QImode
, srcreg
);
11371 dstmem
= change_address (dst
, QImode
, destreg
);
11372 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11373 emit_label (label
);
11374 LABEL_NUSES (label
) = 1;
11381 /* Expand string clear operation (bzero). Use i386 string operations when
11382 profitable. expand_movmem contains similar code. */
11384 ix86_expand_clrmem (rtx dst
, rtx count_exp
, rtx align_exp
)
11386 rtx destreg
, zeroreg
, countreg
, destexp
;
11387 enum machine_mode counter_mode
;
11388 HOST_WIDE_INT align
= 0;
11389 unsigned HOST_WIDE_INT count
= 0;
11391 if (GET_CODE (align_exp
) == CONST_INT
)
11392 align
= INTVAL (align_exp
);
11394 /* Can't use any of this if the user has appropriated esi. */
11395 if (global_regs
[4])
11398 /* This simple hack avoids all inlining code and simplifies code below. */
11399 if (!TARGET_ALIGN_STRINGOPS
)
11402 if (GET_CODE (count_exp
) == CONST_INT
)
11404 count
= INTVAL (count_exp
);
11405 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
11408 /* Figure out proper mode for counter. For 32bits it is always SImode,
11409 for 64bits use SImode when possible, otherwise DImode.
11410 Set count to number of bytes copied when known at compile time. */
11411 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
11412 || x86_64_zero_extended_value (count_exp
))
11413 counter_mode
= SImode
;
11415 counter_mode
= DImode
;
11417 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
11418 if (destreg
!= XEXP (dst
, 0))
11419 dst
= replace_equiv_address_nv (dst
, destreg
);
11421 emit_insn (gen_cld ());
11423 /* When optimizing for size emit simple rep ; movsb instruction for
11424 counts not divisible by 4. */
11426 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
11428 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
11429 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
11430 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
11431 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
, destexp
));
11433 else if (count
!= 0
11435 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
11436 || optimize_size
|| count
< (unsigned int) 64))
11438 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
11439 unsigned HOST_WIDE_INT offset
= 0;
11441 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
11442 if (count
& ~(size
- 1))
11444 countreg
= copy_to_mode_reg (counter_mode
,
11445 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
11446 & (TARGET_64BIT
? -1 : 0x3fffffff)));
11447 countreg
= ix86_zero_extend_to_Pmode (countreg
);
11448 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
, GEN_INT (size
== 4 ? 2 : 3));
11449 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11450 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
, destexp
));
11451 offset
= count
& ~(size
- 1);
11453 if (size
== 8 && (count
& 0x04))
11455 rtx mem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
11457 emit_insn (gen_strset (destreg
, mem
,
11458 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11463 rtx mem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
11465 emit_insn (gen_strset (destreg
, mem
,
11466 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11471 rtx mem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
11473 emit_insn (gen_strset (destreg
, mem
,
11474 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11481 /* Compute desired alignment of the string operation. */
11482 int desired_alignment
= (TARGET_PENTIUMPRO
11483 && (count
== 0 || count
>= (unsigned int) 260)
11484 ? 8 : UNITS_PER_WORD
);
11486 /* In case we don't know anything about the alignment, default to
11487 library version, since it is usually equally fast and result in
11490 Also emit call when we know that the count is large and call overhead
11491 will not be important. */
11492 if (!TARGET_INLINE_ALL_STRINGOPS
11493 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
11496 if (TARGET_SINGLE_STRINGOP
)
11497 emit_insn (gen_cld ());
11499 countreg2
= gen_reg_rtx (Pmode
);
11500 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
11501 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
11502 /* Get rid of MEM_OFFSET, it won't be accurate. */
11503 dst
= change_address (dst
, BLKmode
, destreg
);
11505 if (count
== 0 && align
< desired_alignment
)
11507 label
= gen_label_rtx ();
11508 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
11509 LEU
, 0, counter_mode
, 1, label
);
11513 rtx label
= ix86_expand_aligntest (destreg
, 1);
11514 emit_insn (gen_strset (destreg
, dst
,
11515 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11516 ix86_adjust_counter (countreg
, 1);
11517 emit_label (label
);
11518 LABEL_NUSES (label
) = 1;
11522 rtx label
= ix86_expand_aligntest (destreg
, 2);
11523 emit_insn (gen_strset (destreg
, dst
,
11524 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11525 ix86_adjust_counter (countreg
, 2);
11526 emit_label (label
);
11527 LABEL_NUSES (label
) = 1;
11529 if (align
<= 4 && desired_alignment
> 4)
11531 rtx label
= ix86_expand_aligntest (destreg
, 4);
11532 emit_insn (gen_strset (destreg
, dst
,
11534 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
11536 ix86_adjust_counter (countreg
, 4);
11537 emit_label (label
);
11538 LABEL_NUSES (label
) = 1;
11541 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
11543 emit_label (label
);
11544 LABEL_NUSES (label
) = 1;
11548 if (!TARGET_SINGLE_STRINGOP
)
11549 emit_insn (gen_cld ());
11552 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
11554 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
11558 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
11559 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
11561 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11562 emit_insn (gen_rep_stos (destreg
, countreg2
, dst
, zeroreg
, destexp
));
11566 emit_label (label
);
11567 LABEL_NUSES (label
) = 1;
11570 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
11571 emit_insn (gen_strset (destreg
, dst
,
11572 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11573 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
11575 rtx label
= ix86_expand_aligntest (countreg
, 4);
11576 emit_insn (gen_strset (destreg
, dst
,
11577 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11578 emit_label (label
);
11579 LABEL_NUSES (label
) = 1;
11581 if (align
> 2 && count
!= 0 && (count
& 2))
11582 emit_insn (gen_strset (destreg
, dst
,
11583 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11584 if (align
<= 2 || count
== 0)
11586 rtx label
= ix86_expand_aligntest (countreg
, 2);
11587 emit_insn (gen_strset (destreg
, dst
,
11588 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11589 emit_label (label
);
11590 LABEL_NUSES (label
) = 1;
11592 if (align
> 1 && count
!= 0 && (count
& 1))
11593 emit_insn (gen_strset (destreg
, dst
,
11594 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11595 if (align
<= 1 || count
== 0)
11597 rtx label
= ix86_expand_aligntest (countreg
, 1);
11598 emit_insn (gen_strset (destreg
, dst
,
11599 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11600 emit_label (label
);
11601 LABEL_NUSES (label
) = 1;
11607 /* Expand strlen. */
11609 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
11611 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
11613 /* The generic case of strlen expander is long. Avoid it's
11614 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11616 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
11617 && !TARGET_INLINE_ALL_STRINGOPS
11619 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
11622 addr
= force_reg (Pmode
, XEXP (src
, 0));
11623 scratch1
= gen_reg_rtx (Pmode
);
11625 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
11628 /* Well it seems that some optimizer does not combine a call like
11629 foo(strlen(bar), strlen(bar));
11630 when the move and the subtraction is done here. It does calculate
11631 the length just once when these instructions are done inside of
11632 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11633 often used and I use one fewer register for the lifetime of
11634 output_strlen_unroll() this is better. */
11636 emit_move_insn (out
, addr
);
11638 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
11640 /* strlensi_unroll_1 returns the address of the zero at the end of
11641 the string, like memchr(), so compute the length by subtracting
11642 the start address. */
11644 emit_insn (gen_subdi3 (out
, out
, addr
));
11646 emit_insn (gen_subsi3 (out
, out
, addr
));
11651 scratch2
= gen_reg_rtx (Pmode
);
11652 scratch3
= gen_reg_rtx (Pmode
);
11653 scratch4
= force_reg (Pmode
, constm1_rtx
);
11655 emit_move_insn (scratch3
, addr
);
11656 eoschar
= force_reg (QImode
, eoschar
);
11658 emit_insn (gen_cld ());
11659 src
= replace_equiv_address_nv (src
, scratch3
);
11661 /* If .md starts supporting :P, this can be done in .md. */
11662 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
11663 scratch4
), UNSPEC_SCAS
);
11664 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
11667 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
11668 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
11672 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
11673 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
11679 /* Expand the appropriate insns for doing strlen if not just doing
11682 out = result, initialized with the start address
11683 align_rtx = alignment of the address.
11684 scratch = scratch register, initialized with the startaddress when
11685 not aligned, otherwise undefined
11687 This is just the body. It needs the initializations mentioned above and
11688 some address computing at the end. These things are done in i386.md. */
11691 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
11695 rtx align_2_label
= NULL_RTX
;
11696 rtx align_3_label
= NULL_RTX
;
11697 rtx align_4_label
= gen_label_rtx ();
11698 rtx end_0_label
= gen_label_rtx ();
11700 rtx tmpreg
= gen_reg_rtx (SImode
);
11701 rtx scratch
= gen_reg_rtx (SImode
);
11705 if (GET_CODE (align_rtx
) == CONST_INT
)
11706 align
= INTVAL (align_rtx
);
11708 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11710 /* Is there a known alignment and is it less than 4? */
11713 rtx scratch1
= gen_reg_rtx (Pmode
);
11714 emit_move_insn (scratch1
, out
);
11715 /* Is there a known alignment and is it not 2? */
11718 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
11719 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
11721 /* Leave just the 3 lower bits. */
11722 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
11723 NULL_RTX
, 0, OPTAB_WIDEN
);
11725 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
11726 Pmode
, 1, align_4_label
);
11727 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
11728 Pmode
, 1, align_2_label
);
11729 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
11730 Pmode
, 1, align_3_label
);
11734 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11735 check if is aligned to 4 - byte. */
11737 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
11738 NULL_RTX
, 0, OPTAB_WIDEN
);
11740 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
11741 Pmode
, 1, align_4_label
);
11744 mem
= change_address (src
, QImode
, out
);
11746 /* Now compare the bytes. */
11748 /* Compare the first n unaligned byte on a byte per byte basis. */
11749 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
11750 QImode
, 1, end_0_label
);
11752 /* Increment the address. */
11754 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11756 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11758 /* Not needed with an alignment of 2 */
11761 emit_label (align_2_label
);
11763 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
11767 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11769 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11771 emit_label (align_3_label
);
11774 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
11778 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11780 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11783 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11784 align this loop. It gives only huge programs, but does not help to
11786 emit_label (align_4_label
);
11788 mem
= change_address (src
, SImode
, out
);
11789 emit_move_insn (scratch
, mem
);
11791 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
11793 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
11795 /* This formula yields a nonzero result iff one of the bytes is zero.
11796 This saves three branches inside loop and many cycles. */
11798 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
11799 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
11800 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
11801 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
11802 gen_int_mode (0x80808080, SImode
)));
11803 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
11808 rtx reg
= gen_reg_rtx (SImode
);
11809 rtx reg2
= gen_reg_rtx (Pmode
);
11810 emit_move_insn (reg
, tmpreg
);
11811 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
11813 /* If zero is not in the first two bytes, move two bytes forward. */
11814 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
11815 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11816 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
11817 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
11818 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
11821 /* Emit lea manually to avoid clobbering of flags. */
11822 emit_insn (gen_rtx_SET (SImode
, reg2
,
11823 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
11825 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11826 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
11827 emit_insn (gen_rtx_SET (VOIDmode
, out
,
11828 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
11835 rtx end_2_label
= gen_label_rtx ();
11836 /* Is zero in the first two bytes? */
11838 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
11839 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11840 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
11841 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11842 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
11844 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11845 JUMP_LABEL (tmp
) = end_2_label
;
11847 /* Not in the first two. Move two bytes forward. */
11848 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
11850 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
11852 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
11854 emit_label (end_2_label
);
11858 /* Avoid branch in fixing the byte. */
11859 tmpreg
= gen_lowpart (QImode
, tmpreg
);
11860 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
11861 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
11863 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
11865 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
11867 emit_label (end_0_label
);
11871 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
11872 rtx callarg2 ATTRIBUTE_UNUSED
,
11873 rtx pop
, int sibcall
)
11875 rtx use
= NULL
, call
;
11877 if (pop
== const0_rtx
)
11879 if (TARGET_64BIT
&& pop
)
11883 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
11884 fnaddr
= machopic_indirect_call_target (fnaddr
);
11886 /* Static functions and indirect calls don't need the pic register. */
11887 if (! TARGET_64BIT
&& flag_pic
11888 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
11889 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
11890 use_reg (&use
, pic_offset_table_rtx
);
11892 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
11894 rtx al
= gen_rtx_REG (QImode
, 0);
11895 emit_move_insn (al
, callarg2
);
11896 use_reg (&use
, al
);
11898 #endif /* TARGET_MACHO */
11900 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
11902 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
11903 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
11905 if (sibcall
&& TARGET_64BIT
11906 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
11909 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
11910 fnaddr
= gen_rtx_REG (Pmode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
11911 emit_move_insn (fnaddr
, addr
);
11912 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
11915 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
11917 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
11920 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
11921 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
11922 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
11925 call
= emit_call_insn (call
);
11927 CALL_INSN_FUNCTION_USAGE (call
) = use
;
11931 /* Clear stack slot assignments remembered from previous functions.
11932 This is called from INIT_EXPANDERS once before RTL is emitted for each
11935 static struct machine_function
*
11936 ix86_init_machine_status (void)
11938 struct machine_function
*f
;
11940 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
11941 f
->use_fast_prologue_epilogue_nregs
= -1;
11946 /* Return a MEM corresponding to a stack slot with mode MODE.
11947 Allocate a new slot if necessary.
11949 The RTL for a function can have several slots available: N is
11950 which slot to use. */
11953 assign_386_stack_local (enum machine_mode mode
, int n
)
11955 struct stack_local_entry
*s
;
11957 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
11960 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
11961 if (s
->mode
== mode
&& s
->n
== n
)
11964 s
= (struct stack_local_entry
*)
11965 ggc_alloc (sizeof (struct stack_local_entry
));
11968 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
11970 s
->next
= ix86_stack_locals
;
11971 ix86_stack_locals
= s
;
11975 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11977 static GTY(()) rtx ix86_tls_symbol
;
11979 ix86_tls_get_addr (void)
11982 if (!ix86_tls_symbol
)
11984 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
11985 (TARGET_GNU_TLS
&& !TARGET_64BIT
)
11986 ? "___tls_get_addr"
11987 : "__tls_get_addr");
11990 return ix86_tls_symbol
;
11993 /* Calculate the length of the memory address in the instruction
11994 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11997 memory_address_length (rtx addr
)
11999 struct ix86_address parts
;
12000 rtx base
, index
, disp
;
12003 if (GET_CODE (addr
) == PRE_DEC
12004 || GET_CODE (addr
) == POST_INC
12005 || GET_CODE (addr
) == PRE_MODIFY
12006 || GET_CODE (addr
) == POST_MODIFY
)
12009 if (! ix86_decompose_address (addr
, &parts
))
12013 index
= parts
.index
;
12018 - esp as the base always wants an index,
12019 - ebp as the base always wants a displacement. */
12021 /* Register Indirect. */
12022 if (base
&& !index
&& !disp
)
12024 /* esp (for its index) and ebp (for its displacement) need
12025 the two-byte modrm form. */
12026 if (addr
== stack_pointer_rtx
12027 || addr
== arg_pointer_rtx
12028 || addr
== frame_pointer_rtx
12029 || addr
== hard_frame_pointer_rtx
)
12033 /* Direct Addressing. */
12034 else if (disp
&& !base
&& !index
)
12039 /* Find the length of the displacement constant. */
12042 if (GET_CODE (disp
) == CONST_INT
12043 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K')
12049 /* ebp always wants a displacement. */
12050 else if (base
== hard_frame_pointer_rtx
)
12053 /* An index requires the two-byte modrm form.... */
12055 /* ...like esp, which always wants an index. */
12056 || base
== stack_pointer_rtx
12057 || base
== arg_pointer_rtx
12058 || base
== frame_pointer_rtx
)
12065 /* Compute default value for "length_immediate" attribute. When SHORTFORM
12066 is set, expect that insn have 8bit immediate alternative. */
12068 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
12072 extract_insn_cached (insn
);
12073 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
12074 if (CONSTANT_P (recog_data
.operand
[i
]))
12079 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
12080 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
12084 switch (get_attr_mode (insn
))
12095 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12100 fatal_insn ("unknown insn mode", insn
);
12106 /* Compute default value for "length_address" attribute. */
12108 ix86_attr_length_address_default (rtx insn
)
12112 if (get_attr_type (insn
) == TYPE_LEA
)
12114 rtx set
= PATTERN (insn
);
12115 if (GET_CODE (set
) == SET
)
12117 else if (GET_CODE (set
) == PARALLEL
12118 && GET_CODE (XVECEXP (set
, 0, 0)) == SET
)
12119 set
= XVECEXP (set
, 0, 0);
12122 #ifdef ENABLE_CHECKING
12128 return memory_address_length (SET_SRC (set
));
12131 extract_insn_cached (insn
);
12132 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
12133 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
12135 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
12141 /* Return the maximum number of instructions a cpu can issue. */
12144 ix86_issue_rate (void)
12148 case PROCESSOR_PENTIUM
:
12152 case PROCESSOR_PENTIUMPRO
:
12153 case PROCESSOR_PENTIUM4
:
12154 case PROCESSOR_ATHLON
:
12156 case PROCESSOR_NOCONA
:
12164 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12165 by DEP_INSN and nothing set by DEP_INSN. */
12168 ix86_flags_dependant (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
12172 /* Simplify the test for uninteresting insns. */
12173 if (insn_type
!= TYPE_SETCC
12174 && insn_type
!= TYPE_ICMOV
12175 && insn_type
!= TYPE_FCMOV
12176 && insn_type
!= TYPE_IBR
)
12179 if ((set
= single_set (dep_insn
)) != 0)
12181 set
= SET_DEST (set
);
12184 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
12185 && XVECLEN (PATTERN (dep_insn
), 0) == 2
12186 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
12187 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
12189 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
12190 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
12195 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
12198 /* This test is true if the dependent insn reads the flags but
12199 not any other potentially set register. */
12200 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
12203 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
12209 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12210 address with operands set by DEP_INSN. */
12213 ix86_agi_dependant (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
12217 if (insn_type
== TYPE_LEA
12220 addr
= PATTERN (insn
);
12221 if (GET_CODE (addr
) == SET
)
12223 else if (GET_CODE (addr
) == PARALLEL
12224 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
12225 addr
= XVECEXP (addr
, 0, 0);
12228 addr
= SET_SRC (addr
);
12233 extract_insn_cached (insn
);
12234 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
12235 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
12237 addr
= XEXP (recog_data
.operand
[i
], 0);
12244 return modified_in_p (addr
, dep_insn
);
12248 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
12250 enum attr_type insn_type
, dep_insn_type
;
12251 enum attr_memory memory
;
12253 int dep_insn_code_number
;
12255 /* Anti and output dependencies have zero cost on all CPUs. */
12256 if (REG_NOTE_KIND (link
) != 0)
12259 dep_insn_code_number
= recog_memoized (dep_insn
);
12261 /* If we can't recognize the insns, we can't really do anything. */
12262 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
12265 insn_type
= get_attr_type (insn
);
12266 dep_insn_type
= get_attr_type (dep_insn
);
12270 case PROCESSOR_PENTIUM
:
12271 /* Address Generation Interlock adds a cycle of latency. */
12272 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12275 /* ??? Compares pair with jump/setcc. */
12276 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
12279 /* Floating point stores require value to be ready one cycle earlier. */
12280 if (insn_type
== TYPE_FMOV
12281 && get_attr_memory (insn
) == MEMORY_STORE
12282 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12286 case PROCESSOR_PENTIUMPRO
:
12287 memory
= get_attr_memory (insn
);
12289 /* INT->FP conversion is expensive. */
12290 if (get_attr_fp_int_src (dep_insn
))
12293 /* There is one cycle extra latency between an FP op and a store. */
12294 if (insn_type
== TYPE_FMOV
12295 && (set
= single_set (dep_insn
)) != NULL_RTX
12296 && (set2
= single_set (insn
)) != NULL_RTX
12297 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
12298 && GET_CODE (SET_DEST (set2
)) == MEM
)
12301 /* Show ability of reorder buffer to hide latency of load by executing
12302 in parallel with previous instruction in case
12303 previous instruction is not needed to compute the address. */
12304 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
12305 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12307 /* Claim moves to take one cycle, as core can issue one load
12308 at time and the next load can start cycle later. */
12309 if (dep_insn_type
== TYPE_IMOV
12310 || dep_insn_type
== TYPE_FMOV
)
12318 memory
= get_attr_memory (insn
);
12320 /* The esp dependency is resolved before the instruction is really
12322 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
12323 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
12326 /* INT->FP conversion is expensive. */
12327 if (get_attr_fp_int_src (dep_insn
))
12330 /* Show ability of reorder buffer to hide latency of load by executing
12331 in parallel with previous instruction in case
12332 previous instruction is not needed to compute the address. */
12333 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
12334 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12336 /* Claim moves to take one cycle, as core can issue one load
12337 at time and the next load can start cycle later. */
12338 if (dep_insn_type
== TYPE_IMOV
12339 || dep_insn_type
== TYPE_FMOV
)
12348 case PROCESSOR_ATHLON
:
12350 memory
= get_attr_memory (insn
);
12352 /* Show ability of reorder buffer to hide latency of load by executing
12353 in parallel with previous instruction in case
12354 previous instruction is not needed to compute the address. */
12355 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
12356 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12358 enum attr_unit unit
= get_attr_unit (insn
);
12361 /* Because of the difference between the length of integer and
12362 floating unit pipeline preparation stages, the memory operands
12363 for floating point are cheaper.
12365 ??? For Athlon it the difference is most probably 2. */
12366 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
12369 loadcost
= TARGET_ATHLON
? 2 : 0;
12371 if (cost
>= loadcost
)
12384 /* How many alternative schedules to try. This should be as wide as the
12385 scheduling freedom in the DFA, but no wider. Making this value too
12386 large results extra work for the scheduler. */
12389 ia32_multipass_dfa_lookahead (void)
12391 if (ix86_tune
== PROCESSOR_PENTIUM
)
12394 if (ix86_tune
== PROCESSOR_PENTIUMPRO
12395 || ix86_tune
== PROCESSOR_K6
)
12403 /* Compute the alignment given to a constant that is being placed in memory.
12404 EXP is the constant and ALIGN is the alignment that the object would
12406 The value of this function is used instead of that alignment to align
12410 ix86_constant_alignment (tree exp
, int align
)
12412 if (TREE_CODE (exp
) == REAL_CST
)
12414 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
12416 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
12419 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
12420 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
12421 return BITS_PER_WORD
;
12426 /* Compute the alignment for a static variable.
12427 TYPE is the data type, and ALIGN is the alignment that
12428 the object would ordinarily have. The value of this function is used
12429 instead of that alignment to align the object. */
12432 ix86_data_alignment (tree type
, int align
)
12434 if (AGGREGATE_TYPE_P (type
)
12435 && TYPE_SIZE (type
)
12436 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12437 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
12438 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
12441 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12442 to 16byte boundary. */
12445 if (AGGREGATE_TYPE_P (type
)
12446 && TYPE_SIZE (type
)
12447 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12448 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
12449 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
12453 if (TREE_CODE (type
) == ARRAY_TYPE
)
12455 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
12457 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
12460 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
12463 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
12465 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
12468 else if ((TREE_CODE (type
) == RECORD_TYPE
12469 || TREE_CODE (type
) == UNION_TYPE
12470 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
12471 && TYPE_FIELDS (type
))
12473 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
12475 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
12478 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
12479 || TREE_CODE (type
) == INTEGER_TYPE
)
12481 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
12483 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
12490 /* Compute the alignment for a local variable.
12491 TYPE is the data type, and ALIGN is the alignment that
12492 the object would ordinarily have. The value of this macro is used
12493 instead of that alignment to align the object. */
12496 ix86_local_alignment (tree type
, int align
)
12498 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12499 to 16byte boundary. */
12502 if (AGGREGATE_TYPE_P (type
)
12503 && TYPE_SIZE (type
)
12504 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12505 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
12506 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
12509 if (TREE_CODE (type
) == ARRAY_TYPE
)
12511 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
12513 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
12516 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
12518 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
12520 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
12523 else if ((TREE_CODE (type
) == RECORD_TYPE
12524 || TREE_CODE (type
) == UNION_TYPE
12525 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
12526 && TYPE_FIELDS (type
))
12528 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
12530 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
12533 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
12534 || TREE_CODE (type
) == INTEGER_TYPE
)
12537 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
12539 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
12545 /* Emit RTL insns to initialize the variable parts of a trampoline.
12546 FNADDR is an RTX for the address of the function's pure code.
12547 CXT is an RTX for the static chain value for the function. */
12549 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
12553 /* Compute offset from the end of the jmp to the target function. */
12554 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
12555 plus_constant (tramp
, 10),
12556 NULL_RTX
, 1, OPTAB_DIRECT
);
12557 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
12558 gen_int_mode (0xb9, QImode
));
12559 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
12560 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
12561 gen_int_mode (0xe9, QImode
));
12562 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
12567 /* Try to load address using shorter movl instead of movabs.
12568 We may want to support movq for kernel mode, but kernel does not use
12569 trampolines at the moment. */
12570 if (x86_64_zero_extended_value (fnaddr
))
12572 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
12573 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12574 gen_int_mode (0xbb41, HImode
));
12575 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
12576 gen_lowpart (SImode
, fnaddr
));
12581 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12582 gen_int_mode (0xbb49, HImode
));
12583 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
12587 /* Load static chain using movabs to r10. */
12588 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12589 gen_int_mode (0xba49, HImode
));
12590 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
12593 /* Jump to the r11 */
12594 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12595 gen_int_mode (0xff49, HImode
));
12596 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
12597 gen_int_mode (0xe3, QImode
));
12599 if (offset
> TRAMPOLINE_SIZE
)
12603 #ifdef TRANSFER_FROM_TRAMPOLINE
12604 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
12605 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
12609 #define def_builtin(MASK, NAME, TYPE, CODE) \
12611 if ((MASK) & target_flags \
12612 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12613 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12614 NULL, NULL_TREE); \
12617 struct builtin_description
12619 const unsigned int mask
;
12620 const enum insn_code icode
;
12621 const char *const name
;
12622 const enum ix86_builtins code
;
12623 const enum rtx_code comparison
;
12624 const unsigned int flag
;
12627 static const struct builtin_description bdesc_comi
[] =
12629 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
12630 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
12631 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
12632 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
12633 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
12634 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
12635 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
12636 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
12637 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
12638 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
12639 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
12640 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
12641 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
12642 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
12643 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
12644 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
12645 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
12646 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
12647 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
12648 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
12649 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
12650 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
12651 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
12652 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
12655 static const struct builtin_description bdesc_2arg
[] =
12658 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
12659 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
12660 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
12661 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
12662 { MASK_SSE
, CODE_FOR_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
12663 { MASK_SSE
, CODE_FOR_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
12664 { MASK_SSE
, CODE_FOR_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
12665 { MASK_SSE
, CODE_FOR_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
12667 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
12668 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
12669 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
12670 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, 1 },
12671 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, 1 },
12672 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
12673 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, EQ
, 0 },
12674 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, LT
, 0 },
12675 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, LE
, 0 },
12676 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, LT
, 1 },
12677 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, LE
, 1 },
12678 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, UNORDERED
, 0 },
12679 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
12680 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
12681 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
12682 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
12683 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, EQ
, 0 },
12684 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, LT
, 0 },
12685 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, LE
, 0 },
12686 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
12688 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
12689 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
12690 { MASK_SSE
, CODE_FOR_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
12691 { MASK_SSE
, CODE_FOR_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
12693 { MASK_SSE
, CODE_FOR_sse_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
12694 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
12695 { MASK_SSE
, CODE_FOR_sse_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
12696 { MASK_SSE
, CODE_FOR_sse_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
12698 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
12699 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
12700 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
12701 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
12702 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
12705 { MASK_MMX
, CODE_FOR_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
12706 { MASK_MMX
, CODE_FOR_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
12707 { MASK_MMX
, CODE_FOR_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
12708 { MASK_MMX
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
12709 { MASK_MMX
, CODE_FOR_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
12710 { MASK_MMX
, CODE_FOR_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
12711 { MASK_MMX
, CODE_FOR_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
12712 { MASK_MMX
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
12714 { MASK_MMX
, CODE_FOR_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
12715 { MASK_MMX
, CODE_FOR_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
12716 { MASK_MMX
, CODE_FOR_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
12717 { MASK_MMX
, CODE_FOR_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
12718 { MASK_MMX
, CODE_FOR_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
12719 { MASK_MMX
, CODE_FOR_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
12720 { MASK_MMX
, CODE_FOR_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
12721 { MASK_MMX
, CODE_FOR_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
12723 { MASK_MMX
, CODE_FOR_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
12724 { MASK_MMX
, CODE_FOR_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
12725 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
12727 { MASK_MMX
, CODE_FOR_mmx_anddi3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
12728 { MASK_MMX
, CODE_FOR_mmx_nanddi3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
12729 { MASK_MMX
, CODE_FOR_mmx_iordi3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
12730 { MASK_MMX
, CODE_FOR_mmx_xordi3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
12732 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
12733 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
12735 { MASK_MMX
, CODE_FOR_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
12736 { MASK_MMX
, CODE_FOR_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
12737 { MASK_MMX
, CODE_FOR_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
12738 { MASK_MMX
, CODE_FOR_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
12739 { MASK_MMX
, CODE_FOR_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
12740 { MASK_MMX
, CODE_FOR_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
12742 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
12743 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
12744 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
12745 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
12747 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
12748 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
12749 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
12750 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
12751 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
12752 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
12755 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
12756 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
12757 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
12759 { MASK_SSE
, CODE_FOR_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
12760 { MASK_SSE
, CODE_FOR_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
12761 { MASK_SSE
| MASK_64BIT
, CODE_FOR_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
12763 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
12764 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
12765 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
12766 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
12767 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
12768 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
12770 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
12771 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
12772 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
12773 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
12774 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
12775 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
12777 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
12778 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
12779 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
12780 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
12782 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
12783 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
12786 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
12787 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
12788 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
12789 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
12790 { MASK_SSE2
, CODE_FOR_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
12791 { MASK_SSE2
, CODE_FOR_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
12792 { MASK_SSE2
, CODE_FOR_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
12793 { MASK_SSE2
, CODE_FOR_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
12795 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
12796 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
12797 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
12798 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, 1 },
12799 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, 1 },
12800 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
12801 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, EQ
, 0 },
12802 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, LT
, 0 },
12803 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, LE
, 0 },
12804 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, LT
, 1 },
12805 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, LE
, 1 },
12806 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, UNORDERED
, 0 },
12807 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
12808 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
12809 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
12810 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
12811 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, EQ
, 0 },
12812 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, LT
, 0 },
12813 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, LE
, 0 },
12814 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, UNORDERED
, 0 },
12816 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
12817 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
12818 { MASK_SSE2
, CODE_FOR_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
12819 { MASK_SSE2
, CODE_FOR_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
12821 { MASK_SSE2
, CODE_FOR_sse2_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
12822 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
12823 { MASK_SSE2
, CODE_FOR_sse2_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
12824 { MASK_SSE2
, CODE_FOR_sse2_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
12826 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
12827 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
12828 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
12831 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
12832 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
12833 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
12834 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
12835 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
12836 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
12837 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
12838 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
12840 { MASK_MMX
, CODE_FOR_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
12841 { MASK_MMX
, CODE_FOR_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
12842 { MASK_MMX
, CODE_FOR_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
12843 { MASK_MMX
, CODE_FOR_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
12844 { MASK_MMX
, CODE_FOR_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
12845 { MASK_MMX
, CODE_FOR_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
12846 { MASK_MMX
, CODE_FOR_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
12847 { MASK_MMX
, CODE_FOR_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
12849 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
12850 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
12851 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, 0, 0 },
12852 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, 0, 0 },
12854 { MASK_SSE2
, CODE_FOR_sse2_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
12855 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
12856 { MASK_SSE2
, CODE_FOR_sse2_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
12857 { MASK_SSE2
, CODE_FOR_sse2_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
12859 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
12860 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
12862 { MASK_SSE2
, CODE_FOR_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
12863 { MASK_SSE2
, CODE_FOR_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
12864 { MASK_SSE2
, CODE_FOR_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
12865 { MASK_SSE2
, CODE_FOR_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
12866 { MASK_SSE2
, CODE_FOR_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
12867 { MASK_SSE2
, CODE_FOR_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
12869 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
12870 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
12871 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
12872 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
12874 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
12875 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
12876 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
12877 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
12878 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
12879 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
12880 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
12881 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
12883 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
12884 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
12885 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
12887 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
12888 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
12890 { MASK_SSE2
, CODE_FOR_ashlv8hi3_ti
, 0, IX86_BUILTIN_PSLLW128
, 0, 0 },
12891 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
12892 { MASK_SSE2
, CODE_FOR_ashlv4si3_ti
, 0, IX86_BUILTIN_PSLLD128
, 0, 0 },
12893 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
12894 { MASK_SSE2
, CODE_FOR_ashlv2di3_ti
, 0, IX86_BUILTIN_PSLLQ128
, 0, 0 },
12895 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
12897 { MASK_SSE2
, CODE_FOR_lshrv8hi3_ti
, 0, IX86_BUILTIN_PSRLW128
, 0, 0 },
12898 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
12899 { MASK_SSE2
, CODE_FOR_lshrv4si3_ti
, 0, IX86_BUILTIN_PSRLD128
, 0, 0 },
12900 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
12901 { MASK_SSE2
, CODE_FOR_lshrv2di3_ti
, 0, IX86_BUILTIN_PSRLQ128
, 0, 0 },
12902 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
12904 { MASK_SSE2
, CODE_FOR_ashrv8hi3_ti
, 0, IX86_BUILTIN_PSRAW128
, 0, 0 },
12905 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
12906 { MASK_SSE2
, CODE_FOR_ashrv4si3_ti
, 0, IX86_BUILTIN_PSRAD128
, 0, 0 },
12907 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
12909 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
12911 { MASK_SSE2
, CODE_FOR_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
12912 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
12913 { MASK_SSE2
, CODE_FOR_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
12914 { MASK_SSE2
, CODE_FOR_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
12917 { MASK_SSE3
, CODE_FOR_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
12918 { MASK_SSE3
, CODE_FOR_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
12919 { MASK_SSE3
, CODE_FOR_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
12920 { MASK_SSE3
, CODE_FOR_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
12921 { MASK_SSE3
, CODE_FOR_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
12922 { MASK_SSE3
, CODE_FOR_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 }
12925 static const struct builtin_description bdesc_1arg
[] =
12927 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
12928 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
12930 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
12931 { MASK_SSE
, CODE_FOR_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
12932 { MASK_SSE
, CODE_FOR_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
12934 { MASK_SSE
, CODE_FOR_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
12935 { MASK_SSE
, CODE_FOR_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
12936 { MASK_SSE
| MASK_64BIT
, CODE_FOR_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
12937 { MASK_SSE
, CODE_FOR_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
12938 { MASK_SSE
, CODE_FOR_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
12939 { MASK_SSE
| MASK_64BIT
, CODE_FOR_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
12941 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
12942 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
12943 { MASK_SSE2
, CODE_FOR_sse2_movq2dq
, 0, IX86_BUILTIN_MOVQ2DQ
, 0, 0 },
12944 { MASK_SSE2
, CODE_FOR_sse2_movdq2q
, 0, IX86_BUILTIN_MOVDQ2Q
, 0, 0 },
12946 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
12948 { MASK_SSE2
, CODE_FOR_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
12949 { MASK_SSE2
, CODE_FOR_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
12951 { MASK_SSE2
, CODE_FOR_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
12952 { MASK_SSE2
, CODE_FOR_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
12953 { MASK_SSE2
, CODE_FOR_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
12954 { MASK_SSE2
, CODE_FOR_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
12955 { MASK_SSE2
, CODE_FOR_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
12957 { MASK_SSE2
, CODE_FOR_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
12959 { MASK_SSE2
, CODE_FOR_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
12960 { MASK_SSE2
, CODE_FOR_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
12961 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
12962 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
12964 { MASK_SSE2
, CODE_FOR_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
12965 { MASK_SSE2
, CODE_FOR_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
12966 { MASK_SSE2
, CODE_FOR_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
12968 { MASK_SSE2
, CODE_FOR_sse2_movq
, 0, IX86_BUILTIN_MOVQ
, 0, 0 },
12971 { MASK_SSE3
, CODE_FOR_movshdup
, 0, IX86_BUILTIN_MOVSHDUP
, 0, 0 },
12972 { MASK_SSE3
, CODE_FOR_movsldup
, 0, IX86_BUILTIN_MOVSLDUP
, 0, 0 },
12973 { MASK_SSE3
, CODE_FOR_movddup
, 0, IX86_BUILTIN_MOVDDUP
, 0, 0 }
12977 ix86_init_builtins (void)
12980 ix86_init_mmx_sse_builtins ();
12983 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12984 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12987 ix86_init_mmx_sse_builtins (void)
12989 const struct builtin_description
* d
;
12992 tree V16QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V16QImode
);
12993 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
12994 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
12995 tree V2DI_type_node
= build_vector_type_for_mode (intDI_type_node
, V2DImode
);
12996 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
12997 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
12998 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
12999 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
13000 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
13001 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
13003 tree pchar_type_node
= build_pointer_type (char_type_node
);
13004 tree pcchar_type_node
= build_pointer_type (
13005 build_type_variant (char_type_node
, 1, 0));
13006 tree pfloat_type_node
= build_pointer_type (float_type_node
);
13007 tree pcfloat_type_node
= build_pointer_type (
13008 build_type_variant (float_type_node
, 1, 0));
13009 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
13010 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
13011 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
13014 tree int_ftype_v4sf_v4sf
13015 = build_function_type_list (integer_type_node
,
13016 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
13017 tree v4si_ftype_v4sf_v4sf
13018 = build_function_type_list (V4SI_type_node
,
13019 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
13020 /* MMX/SSE/integer conversions. */
13021 tree int_ftype_v4sf
13022 = build_function_type_list (integer_type_node
,
13023 V4SF_type_node
, NULL_TREE
);
13024 tree int64_ftype_v4sf
13025 = build_function_type_list (long_long_integer_type_node
,
13026 V4SF_type_node
, NULL_TREE
);
13027 tree int_ftype_v8qi
13028 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
13029 tree v4sf_ftype_v4sf_int
13030 = build_function_type_list (V4SF_type_node
,
13031 V4SF_type_node
, integer_type_node
, NULL_TREE
);
13032 tree v4sf_ftype_v4sf_int64
13033 = build_function_type_list (V4SF_type_node
,
13034 V4SF_type_node
, long_long_integer_type_node
,
13036 tree v4sf_ftype_v4sf_v2si
13037 = build_function_type_list (V4SF_type_node
,
13038 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
13039 tree int_ftype_v4hi_int
13040 = build_function_type_list (integer_type_node
,
13041 V4HI_type_node
, integer_type_node
, NULL_TREE
);
13042 tree v4hi_ftype_v4hi_int_int
13043 = build_function_type_list (V4HI_type_node
, V4HI_type_node
,
13044 integer_type_node
, integer_type_node
,
13046 /* Miscellaneous. */
13047 tree v8qi_ftype_v4hi_v4hi
13048 = build_function_type_list (V8QI_type_node
,
13049 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
13050 tree v4hi_ftype_v2si_v2si
13051 = build_function_type_list (V4HI_type_node
,
13052 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
13053 tree v4sf_ftype_v4sf_v4sf_int
13054 = build_function_type_list (V4SF_type_node
,
13055 V4SF_type_node
, V4SF_type_node
,
13056 integer_type_node
, NULL_TREE
);
13057 tree v2si_ftype_v4hi_v4hi
13058 = build_function_type_list (V2SI_type_node
,
13059 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
13060 tree v4hi_ftype_v4hi_int
13061 = build_function_type_list (V4HI_type_node
,
13062 V4HI_type_node
, integer_type_node
, NULL_TREE
);
13063 tree v4hi_ftype_v4hi_di
13064 = build_function_type_list (V4HI_type_node
,
13065 V4HI_type_node
, long_long_unsigned_type_node
,
13067 tree v2si_ftype_v2si_di
13068 = build_function_type_list (V2SI_type_node
,
13069 V2SI_type_node
, long_long_unsigned_type_node
,
13071 tree void_ftype_void
13072 = build_function_type (void_type_node
, void_list_node
);
13073 tree void_ftype_unsigned
13074 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
13075 tree void_ftype_unsigned_unsigned
13076 = build_function_type_list (void_type_node
, unsigned_type_node
,
13077 unsigned_type_node
, NULL_TREE
);
13078 tree void_ftype_pcvoid_unsigned_unsigned
13079 = build_function_type_list (void_type_node
, const_ptr_type_node
,
13080 unsigned_type_node
, unsigned_type_node
,
13082 tree unsigned_ftype_void
13083 = build_function_type (unsigned_type_node
, void_list_node
);
13085 = build_function_type (long_long_unsigned_type_node
, void_list_node
);
13086 tree v4sf_ftype_void
13087 = build_function_type (V4SF_type_node
, void_list_node
);
13088 tree v2si_ftype_v4sf
13089 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
13090 /* Loads/stores. */
13091 tree void_ftype_v8qi_v8qi_pchar
13092 = build_function_type_list (void_type_node
,
13093 V8QI_type_node
, V8QI_type_node
,
13094 pchar_type_node
, NULL_TREE
);
13095 tree v4sf_ftype_pcfloat
13096 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
13097 /* @@@ the type is bogus */
13098 tree v4sf_ftype_v4sf_pv2si
13099 = build_function_type_list (V4SF_type_node
,
13100 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
13101 tree void_ftype_pv2si_v4sf
13102 = build_function_type_list (void_type_node
,
13103 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
13104 tree void_ftype_pfloat_v4sf
13105 = build_function_type_list (void_type_node
,
13106 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
13107 tree void_ftype_pdi_di
13108 = build_function_type_list (void_type_node
,
13109 pdi_type_node
, long_long_unsigned_type_node
,
13111 tree void_ftype_pv2di_v2di
13112 = build_function_type_list (void_type_node
,
13113 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
13114 /* Normal vector unops. */
13115 tree v4sf_ftype_v4sf
13116 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
13118 /* Normal vector binops. */
13119 tree v4sf_ftype_v4sf_v4sf
13120 = build_function_type_list (V4SF_type_node
,
13121 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
13122 tree v8qi_ftype_v8qi_v8qi
13123 = build_function_type_list (V8QI_type_node
,
13124 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
13125 tree v4hi_ftype_v4hi_v4hi
13126 = build_function_type_list (V4HI_type_node
,
13127 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
13128 tree v2si_ftype_v2si_v2si
13129 = build_function_type_list (V2SI_type_node
,
13130 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
13131 tree di_ftype_di_di
13132 = build_function_type_list (long_long_unsigned_type_node
,
13133 long_long_unsigned_type_node
,
13134 long_long_unsigned_type_node
, NULL_TREE
);
13136 tree v2si_ftype_v2sf
13137 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
13138 tree v2sf_ftype_v2si
13139 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
13140 tree v2si_ftype_v2si
13141 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
13142 tree v2sf_ftype_v2sf
13143 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
13144 tree v2sf_ftype_v2sf_v2sf
13145 = build_function_type_list (V2SF_type_node
,
13146 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
13147 tree v2si_ftype_v2sf_v2sf
13148 = build_function_type_list (V2SI_type_node
,
13149 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
13150 tree pint_type_node
= build_pointer_type (integer_type_node
);
13151 tree pcint_type_node
= build_pointer_type (
13152 build_type_variant (integer_type_node
, 1, 0));
13153 tree pdouble_type_node
= build_pointer_type (double_type_node
);
13154 tree pcdouble_type_node
= build_pointer_type (
13155 build_type_variant (double_type_node
, 1, 0));
13156 tree int_ftype_v2df_v2df
13157 = build_function_type_list (integer_type_node
,
13158 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13161 = build_function_type (intTI_type_node
, void_list_node
);
13162 tree v2di_ftype_void
13163 = build_function_type (V2DI_type_node
, void_list_node
);
13164 tree ti_ftype_ti_ti
13165 = build_function_type_list (intTI_type_node
,
13166 intTI_type_node
, intTI_type_node
, NULL_TREE
);
13167 tree void_ftype_pcvoid
13168 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
13170 = build_function_type_list (V2DI_type_node
,
13171 long_long_unsigned_type_node
, NULL_TREE
);
13173 = build_function_type_list (long_long_unsigned_type_node
,
13174 V2DI_type_node
, NULL_TREE
);
13175 tree v4sf_ftype_v4si
13176 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
13177 tree v4si_ftype_v4sf
13178 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
13179 tree v2df_ftype_v4si
13180 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
13181 tree v4si_ftype_v2df
13182 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
13183 tree v2si_ftype_v2df
13184 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
13185 tree v4sf_ftype_v2df
13186 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
13187 tree v2df_ftype_v2si
13188 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
13189 tree v2df_ftype_v4sf
13190 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
13191 tree int_ftype_v2df
13192 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
13193 tree int64_ftype_v2df
13194 = build_function_type_list (long_long_integer_type_node
,
13195 V2DF_type_node
, NULL_TREE
);
13196 tree v2df_ftype_v2df_int
13197 = build_function_type_list (V2DF_type_node
,
13198 V2DF_type_node
, integer_type_node
, NULL_TREE
);
13199 tree v2df_ftype_v2df_int64
13200 = build_function_type_list (V2DF_type_node
,
13201 V2DF_type_node
, long_long_integer_type_node
,
13203 tree v4sf_ftype_v4sf_v2df
13204 = build_function_type_list (V4SF_type_node
,
13205 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
13206 tree v2df_ftype_v2df_v4sf
13207 = build_function_type_list (V2DF_type_node
,
13208 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
13209 tree v2df_ftype_v2df_v2df_int
13210 = build_function_type_list (V2DF_type_node
,
13211 V2DF_type_node
, V2DF_type_node
,
13214 tree v2df_ftype_v2df_pv2si
13215 = build_function_type_list (V2DF_type_node
,
13216 V2DF_type_node
, pv2si_type_node
, NULL_TREE
);
13217 tree void_ftype_pv2si_v2df
13218 = build_function_type_list (void_type_node
,
13219 pv2si_type_node
, V2DF_type_node
, NULL_TREE
);
13220 tree void_ftype_pdouble_v2df
13221 = build_function_type_list (void_type_node
,
13222 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
13223 tree void_ftype_pint_int
13224 = build_function_type_list (void_type_node
,
13225 pint_type_node
, integer_type_node
, NULL_TREE
);
13226 tree void_ftype_v16qi_v16qi_pchar
13227 = build_function_type_list (void_type_node
,
13228 V16QI_type_node
, V16QI_type_node
,
13229 pchar_type_node
, NULL_TREE
);
13230 tree v2df_ftype_pcdouble
13231 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
13232 tree v2df_ftype_v2df_v2df
13233 = build_function_type_list (V2DF_type_node
,
13234 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13235 tree v16qi_ftype_v16qi_v16qi
13236 = build_function_type_list (V16QI_type_node
,
13237 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
13238 tree v8hi_ftype_v8hi_v8hi
13239 = build_function_type_list (V8HI_type_node
,
13240 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
13241 tree v4si_ftype_v4si_v4si
13242 = build_function_type_list (V4SI_type_node
,
13243 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
13244 tree v2di_ftype_v2di_v2di
13245 = build_function_type_list (V2DI_type_node
,
13246 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
13247 tree v2di_ftype_v2df_v2df
13248 = build_function_type_list (V2DI_type_node
,
13249 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13250 tree v2df_ftype_v2df
13251 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13252 tree v2df_ftype_double
13253 = build_function_type_list (V2DF_type_node
, double_type_node
, NULL_TREE
);
13254 tree v2df_ftype_double_double
13255 = build_function_type_list (V2DF_type_node
,
13256 double_type_node
, double_type_node
, NULL_TREE
);
13257 tree int_ftype_v8hi_int
13258 = build_function_type_list (integer_type_node
,
13259 V8HI_type_node
, integer_type_node
, NULL_TREE
);
13260 tree v8hi_ftype_v8hi_int_int
13261 = build_function_type_list (V8HI_type_node
,
13262 V8HI_type_node
, integer_type_node
,
13263 integer_type_node
, NULL_TREE
);
13264 tree v2di_ftype_v2di_int
13265 = build_function_type_list (V2DI_type_node
,
13266 V2DI_type_node
, integer_type_node
, NULL_TREE
);
13267 tree v4si_ftype_v4si_int
13268 = build_function_type_list (V4SI_type_node
,
13269 V4SI_type_node
, integer_type_node
, NULL_TREE
);
13270 tree v8hi_ftype_v8hi_int
13271 = build_function_type_list (V8HI_type_node
,
13272 V8HI_type_node
, integer_type_node
, NULL_TREE
);
13273 tree v8hi_ftype_v8hi_v2di
13274 = build_function_type_list (V8HI_type_node
,
13275 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
13276 tree v4si_ftype_v4si_v2di
13277 = build_function_type_list (V4SI_type_node
,
13278 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
13279 tree v4si_ftype_v8hi_v8hi
13280 = build_function_type_list (V4SI_type_node
,
13281 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
13282 tree di_ftype_v8qi_v8qi
13283 = build_function_type_list (long_long_unsigned_type_node
,
13284 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
13285 tree v2di_ftype_v16qi_v16qi
13286 = build_function_type_list (V2DI_type_node
,
13287 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
13288 tree int_ftype_v16qi
13289 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
13290 tree v16qi_ftype_pcchar
13291 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
13292 tree void_ftype_pchar_v16qi
13293 = build_function_type_list (void_type_node
,
13294 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
13295 tree v4si_ftype_pcint
13296 = build_function_type_list (V4SI_type_node
, pcint_type_node
, NULL_TREE
);
13297 tree void_ftype_pcint_v4si
13298 = build_function_type_list (void_type_node
,
13299 pcint_type_node
, V4SI_type_node
, NULL_TREE
);
13300 tree v2di_ftype_v2di
13301 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
13304 tree float128_type
;
13306 /* The __float80 type. */
13307 if (TYPE_MODE (long_double_type_node
) == XFmode
)
13308 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
13312 /* The __float80 type. */
13313 float80_type
= make_node (REAL_TYPE
);
13314 TYPE_PRECISION (float80_type
) = 96;
13315 layout_type (float80_type
);
13316 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
13319 float128_type
= make_node (REAL_TYPE
);
13320 TYPE_PRECISION (float128_type
) = 128;
13321 layout_type (float128_type
);
13322 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
13324 /* Add all builtins that are more or less simple operations on two
13326 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
13328 /* Use one of the operands; the target can have a different mode for
13329 mask-generating compares. */
13330 enum machine_mode mode
;
13335 mode
= insn_data
[d
->icode
].operand
[1].mode
;
13340 type
= v16qi_ftype_v16qi_v16qi
;
13343 type
= v8hi_ftype_v8hi_v8hi
;
13346 type
= v4si_ftype_v4si_v4si
;
13349 type
= v2di_ftype_v2di_v2di
;
13352 type
= v2df_ftype_v2df_v2df
;
13355 type
= ti_ftype_ti_ti
;
13358 type
= v4sf_ftype_v4sf_v4sf
;
13361 type
= v8qi_ftype_v8qi_v8qi
;
13364 type
= v4hi_ftype_v4hi_v4hi
;
13367 type
= v2si_ftype_v2si_v2si
;
13370 type
= di_ftype_di_di
;
13377 /* Override for comparisons. */
13378 if (d
->icode
== CODE_FOR_maskcmpv4sf3
13379 || d
->icode
== CODE_FOR_maskncmpv4sf3
13380 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
13381 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
13382 type
= v4si_ftype_v4sf_v4sf
;
13384 if (d
->icode
== CODE_FOR_maskcmpv2df3
13385 || d
->icode
== CODE_FOR_maskncmpv2df3
13386 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
13387 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
13388 type
= v2di_ftype_v2df_v2df
;
13390 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
13393 /* Add the remaining MMX insns with somewhat more complicated types. */
13394 def_builtin (MASK_MMX
, "__builtin_ia32_mmx_zero", di_ftype_void
, IX86_BUILTIN_MMX_ZERO
);
13395 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
13396 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
13397 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
13398 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
13400 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
13401 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
13402 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
13404 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
13405 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
13407 def_builtin (MASK_MMX
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
13408 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
13410 /* comi/ucomi insns. */
13411 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
13412 if (d
->mask
== MASK_SSE2
)
13413 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
13415 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
13417 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
13418 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
13419 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
13421 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
13422 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
13423 def_builtin (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
13424 def_builtin (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
13425 def_builtin (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
13426 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
13427 def_builtin (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
13428 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
13429 def_builtin (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
13430 def_builtin (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
13431 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
13433 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pextrw", int_ftype_v4hi_int
, IX86_BUILTIN_PEXTRW
);
13434 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int
, IX86_BUILTIN_PINSRW
);
13436 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
13438 def_builtin (MASK_SSE
, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADAPS
);
13439 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
13440 def_builtin (MASK_SSE
, "__builtin_ia32_loadss", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADSS
);
13441 def_builtin (MASK_SSE
, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREAPS
);
13442 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
13443 def_builtin (MASK_SSE
, "__builtin_ia32_storess", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORESS
);
13445 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
13446 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
13447 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
13448 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
13450 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
13451 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
13452 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
13453 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
13455 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
13457 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
13459 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
13460 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
13461 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
13462 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
13463 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
13464 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
13466 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
13468 /* Original 3DNow! */
13469 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
13470 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
13471 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
13472 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
13473 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
13474 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
13475 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
13476 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
13477 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
13478 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
13479 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
13480 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
13481 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
13482 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
13483 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
13484 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
13485 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
13486 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
13487 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
13488 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
13490 /* 3DNow! extension as used in the Athlon CPU. */
13491 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
13492 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
13493 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
13494 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
13495 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
13496 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
13498 def_builtin (MASK_SSE
, "__builtin_ia32_setzerops", v4sf_ftype_void
, IX86_BUILTIN_SSE_ZERO
);
13501 def_builtin (MASK_SSE2
, "__builtin_ia32_pextrw128", int_ftype_v8hi_int
, IX86_BUILTIN_PEXTRW128
);
13502 def_builtin (MASK_SSE2
, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int
, IX86_BUILTIN_PINSRW128
);
13504 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
13505 def_builtin (MASK_SSE2
, "__builtin_ia32_movq2dq", v2di_ftype_di
, IX86_BUILTIN_MOVQ2DQ
);
13506 def_builtin (MASK_SSE2
, "__builtin_ia32_movdq2q", di_ftype_v2di
, IX86_BUILTIN_MOVDQ2Q
);
13508 def_builtin (MASK_SSE2
, "__builtin_ia32_loadapd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADAPD
);
13509 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
13510 def_builtin (MASK_SSE2
, "__builtin_ia32_loadsd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADSD
);
13511 def_builtin (MASK_SSE2
, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREAPD
);
13512 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
13513 def_builtin (MASK_SSE2
, "__builtin_ia32_storesd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORESD
);
13515 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADHPD
);
13516 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADLPD
);
13517 def_builtin (MASK_SSE2
, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STOREHPD
);
13518 def_builtin (MASK_SSE2
, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STORELPD
);
13520 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
13521 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
13522 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
13523 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
13524 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
13526 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
13527 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
13528 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
13529 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
13531 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
13532 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
13534 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
13536 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
13537 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
13539 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
13540 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
13541 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
13542 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
13543 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
13545 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
13547 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
13548 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
13549 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
13550 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
13552 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
13553 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
13554 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
13556 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
13557 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
13558 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
13559 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
13561 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd1", v2df_ftype_double
, IX86_BUILTIN_SETPD1
);
13562 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd", v2df_ftype_double_double
, IX86_BUILTIN_SETPD
);
13563 def_builtin (MASK_SSE2
, "__builtin_ia32_setzeropd", ti_ftype_void
, IX86_BUILTIN_CLRPD
);
13564 def_builtin (MASK_SSE2
, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADPD1
);
13565 def_builtin (MASK_SSE2
, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADRPD
);
13566 def_builtin (MASK_SSE2
, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREPD1
);
13567 def_builtin (MASK_SSE2
, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORERPD
);
13569 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
13570 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
13571 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
13573 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQA
);
13574 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
13575 def_builtin (MASK_SSE2
, "__builtin_ia32_loadd", v4si_ftype_pcint
, IX86_BUILTIN_LOADD
);
13576 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQA
);
13577 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
13578 def_builtin (MASK_SSE2
, "__builtin_ia32_stored", void_ftype_pcint_v4si
, IX86_BUILTIN_STORED
);
13579 def_builtin (MASK_SSE2
, "__builtin_ia32_movq", v2di_ftype_v2di
, IX86_BUILTIN_MOVQ
);
13581 def_builtin (MASK_SSE
, "__builtin_ia32_setzero128", v2di_ftype_void
, IX86_BUILTIN_CLRTI
);
13583 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
13584 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
13585 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
13587 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
13588 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
13589 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
13591 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
13592 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
13594 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
13595 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
13596 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
13597 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
13599 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
13600 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
13601 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
13602 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
13604 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
13605 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
13607 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
13609 /* Prescott New Instructions. */
13610 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
13611 void_ftype_pcvoid_unsigned_unsigned
,
13612 IX86_BUILTIN_MONITOR
);
13613 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
13614 void_ftype_unsigned_unsigned
,
13615 IX86_BUILTIN_MWAIT
);
13616 def_builtin (MASK_SSE3
, "__builtin_ia32_movshdup",
13618 IX86_BUILTIN_MOVSHDUP
);
13619 def_builtin (MASK_SSE3
, "__builtin_ia32_movsldup",
13621 IX86_BUILTIN_MOVSLDUP
);
13622 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
13623 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
13624 def_builtin (MASK_SSE3
, "__builtin_ia32_loadddup",
13625 v2df_ftype_pcdouble
, IX86_BUILTIN_LOADDDUP
);
13626 def_builtin (MASK_SSE3
, "__builtin_ia32_movddup",
13627 v2df_ftype_v2df
, IX86_BUILTIN_MOVDDUP
);
13630 /* Errors in the source file can cause expand_expr to return const0_rtx
13631 where we expect a vector. To avoid crashing, use one of the vector
13632 clear instructions. */
13634 safe_vector_operand (rtx x
, enum machine_mode mode
)
13636 if (x
!= const0_rtx
)
13638 x
= gen_reg_rtx (mode
);
13640 if (VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
))
13641 emit_insn (gen_mmx_clrdi (mode
== DImode
? x
13642 : gen_rtx_SUBREG (DImode
, x
, 0)));
13644 emit_insn (gen_sse_clrv4sf (mode
== V4SFmode
? x
13645 : gen_rtx_SUBREG (V4SFmode
, x
, 0),
13646 CONST0_RTX (V4SFmode
)));
13650 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13653 ix86_expand_binop_builtin (enum insn_code icode
, tree arglist
, rtx target
)
13656 tree arg0
= TREE_VALUE (arglist
);
13657 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13658 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13659 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13660 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
13661 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
13662 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
13664 if (VECTOR_MODE_P (mode0
))
13665 op0
= safe_vector_operand (op0
, mode0
);
13666 if (VECTOR_MODE_P (mode1
))
13667 op1
= safe_vector_operand (op1
, mode1
);
13670 || GET_MODE (target
) != tmode
13671 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13672 target
= gen_reg_rtx (tmode
);
13674 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
13676 rtx x
= gen_reg_rtx (V4SImode
);
13677 emit_insn (gen_sse2_loadd (x
, op1
));
13678 op1
= gen_lowpart (TImode
, x
);
13681 /* In case the insn wants input operands in modes different from
13682 the result, abort. */
13683 if ((GET_MODE (op0
) != mode0
&& GET_MODE (op0
) != VOIDmode
)
13684 || (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
))
13687 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13688 op0
= copy_to_mode_reg (mode0
, op0
);
13689 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13690 op1
= copy_to_mode_reg (mode1
, op1
);
13692 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13693 yet one of the two must not be a memory. This is normally enforced
13694 by expanders, but we didn't bother to create one here. */
13695 if (GET_CODE (op0
) == MEM
&& GET_CODE (op1
) == MEM
)
13696 op0
= copy_to_mode_reg (mode0
, op0
);
13698 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13705 /* Subroutine of ix86_expand_builtin to take care of stores. */
13708 ix86_expand_store_builtin (enum insn_code icode
, tree arglist
)
13711 tree arg0
= TREE_VALUE (arglist
);
13712 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13713 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13714 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13715 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
13716 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
13718 if (VECTOR_MODE_P (mode1
))
13719 op1
= safe_vector_operand (op1
, mode1
);
13721 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13722 op1
= copy_to_mode_reg (mode1
, op1
);
13724 pat
= GEN_FCN (icode
) (op0
, op1
);
13730 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13733 ix86_expand_unop_builtin (enum insn_code icode
, tree arglist
,
13734 rtx target
, int do_load
)
13737 tree arg0
= TREE_VALUE (arglist
);
13738 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13739 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
13740 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
13743 || GET_MODE (target
) != tmode
13744 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13745 target
= gen_reg_rtx (tmode
);
13747 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13750 if (VECTOR_MODE_P (mode0
))
13751 op0
= safe_vector_operand (op0
, mode0
);
13753 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13754 op0
= copy_to_mode_reg (mode0
, op0
);
13757 pat
= GEN_FCN (icode
) (target
, op0
);
13764 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13765 sqrtss, rsqrtss, rcpss. */
13768 ix86_expand_unop1_builtin (enum insn_code icode
, tree arglist
, rtx target
)
13771 tree arg0
= TREE_VALUE (arglist
);
13772 rtx op1
, op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13773 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
13774 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
13777 || GET_MODE (target
) != tmode
13778 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13779 target
= gen_reg_rtx (tmode
);
13781 if (VECTOR_MODE_P (mode0
))
13782 op0
= safe_vector_operand (op0
, mode0
);
13784 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13785 op0
= copy_to_mode_reg (mode0
, op0
);
13788 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
13789 op1
= copy_to_mode_reg (mode0
, op1
);
13791 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13798 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13801 ix86_expand_sse_compare (const struct builtin_description
*d
, tree arglist
,
13805 tree arg0
= TREE_VALUE (arglist
);
13806 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13807 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13808 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13810 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
13811 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
13812 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
13813 enum rtx_code comparison
= d
->comparison
;
13815 if (VECTOR_MODE_P (mode0
))
13816 op0
= safe_vector_operand (op0
, mode0
);
13817 if (VECTOR_MODE_P (mode1
))
13818 op1
= safe_vector_operand (op1
, mode1
);
13820 /* Swap operands if we have a comparison that isn't available in
13824 rtx tmp
= gen_reg_rtx (mode1
);
13825 emit_move_insn (tmp
, op1
);
13831 || GET_MODE (target
) != tmode
13832 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
13833 target
= gen_reg_rtx (tmode
);
13835 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
13836 op0
= copy_to_mode_reg (mode0
, op0
);
13837 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
13838 op1
= copy_to_mode_reg (mode1
, op1
);
13840 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
13841 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
13848 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13851 ix86_expand_sse_comi (const struct builtin_description
*d
, tree arglist
,
13855 tree arg0
= TREE_VALUE (arglist
);
13856 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13857 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13858 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13860 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
13861 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
13862 enum rtx_code comparison
= d
->comparison
;
13864 if (VECTOR_MODE_P (mode0
))
13865 op0
= safe_vector_operand (op0
, mode0
);
13866 if (VECTOR_MODE_P (mode1
))
13867 op1
= safe_vector_operand (op1
, mode1
);
13869 /* Swap operands if we have a comparison that isn't available in
13878 target
= gen_reg_rtx (SImode
);
13879 emit_move_insn (target
, const0_rtx
);
13880 target
= gen_rtx_SUBREG (QImode
, target
, 0);
13882 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
13883 op0
= copy_to_mode_reg (mode0
, op0
);
13884 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
13885 op1
= copy_to_mode_reg (mode1
, op1
);
13887 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
13888 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
13892 emit_insn (gen_rtx_SET (VOIDmode
,
13893 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
13894 gen_rtx_fmt_ee (comparison
, QImode
,
13898 return SUBREG_REG (target
);
13901 /* Expand an expression EXP that calls a built-in function,
13902 with result going to TARGET if that's convenient
13903 (and in mode MODE if that's convenient).
13904 SUBTARGET may be used as the target for computing one of EXP's operands.
13905 IGNORE is nonzero if the value is to be ignored. */
13908 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
13909 enum machine_mode mode ATTRIBUTE_UNUSED
,
13910 int ignore ATTRIBUTE_UNUSED
)
13912 const struct builtin_description
*d
;
13914 enum insn_code icode
;
13915 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
13916 tree arglist
= TREE_OPERAND (exp
, 1);
13917 tree arg0
, arg1
, arg2
;
13918 rtx op0
, op1
, op2
, pat
;
13919 enum machine_mode tmode
, mode0
, mode1
, mode2
;
13920 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
13924 case IX86_BUILTIN_EMMS
:
13925 emit_insn (gen_emms ());
13928 case IX86_BUILTIN_SFENCE
:
13929 emit_insn (gen_sfence ());
13932 case IX86_BUILTIN_PEXTRW
:
13933 case IX86_BUILTIN_PEXTRW128
:
13934 icode
= (fcode
== IX86_BUILTIN_PEXTRW
13935 ? CODE_FOR_mmx_pextrw
13936 : CODE_FOR_sse2_pextrw
);
13937 arg0
= TREE_VALUE (arglist
);
13938 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13939 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13940 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13941 tmode
= insn_data
[icode
].operand
[0].mode
;
13942 mode0
= insn_data
[icode
].operand
[1].mode
;
13943 mode1
= insn_data
[icode
].operand
[2].mode
;
13945 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13946 op0
= copy_to_mode_reg (mode0
, op0
);
13947 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13949 error ("selector must be an integer constant in the range 0..%i",
13950 fcode
== IX86_BUILTIN_PEXTRW
? 3:7);
13951 return gen_reg_rtx (tmode
);
13954 || GET_MODE (target
) != tmode
13955 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13956 target
= gen_reg_rtx (tmode
);
13957 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13963 case IX86_BUILTIN_PINSRW
:
13964 case IX86_BUILTIN_PINSRW128
:
13965 icode
= (fcode
== IX86_BUILTIN_PINSRW
13966 ? CODE_FOR_mmx_pinsrw
13967 : CODE_FOR_sse2_pinsrw
);
13968 arg0
= TREE_VALUE (arglist
);
13969 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13970 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13971 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13972 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13973 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13974 tmode
= insn_data
[icode
].operand
[0].mode
;
13975 mode0
= insn_data
[icode
].operand
[1].mode
;
13976 mode1
= insn_data
[icode
].operand
[2].mode
;
13977 mode2
= insn_data
[icode
].operand
[3].mode
;
13979 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13980 op0
= copy_to_mode_reg (mode0
, op0
);
13981 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13982 op1
= copy_to_mode_reg (mode1
, op1
);
13983 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
13985 error ("selector must be an integer constant in the range 0..%i",
13986 fcode
== IX86_BUILTIN_PINSRW
? 15:255);
13990 || GET_MODE (target
) != tmode
13991 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13992 target
= gen_reg_rtx (tmode
);
13993 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
13999 case IX86_BUILTIN_MASKMOVQ
:
14000 case IX86_BUILTIN_MASKMOVDQU
:
14001 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
14002 ? (TARGET_64BIT
? CODE_FOR_mmx_maskmovq_rex
: CODE_FOR_mmx_maskmovq
)
14003 : (TARGET_64BIT
? CODE_FOR_sse2_maskmovdqu_rex64
14004 : CODE_FOR_sse2_maskmovdqu
));
14005 /* Note the arg order is different from the operand order. */
14006 arg1
= TREE_VALUE (arglist
);
14007 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
14008 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
14009 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14010 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14011 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
14012 mode0
= insn_data
[icode
].operand
[0].mode
;
14013 mode1
= insn_data
[icode
].operand
[1].mode
;
14014 mode2
= insn_data
[icode
].operand
[2].mode
;
14016 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
14017 op0
= copy_to_mode_reg (mode0
, op0
);
14018 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
14019 op1
= copy_to_mode_reg (mode1
, op1
);
14020 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
14021 op2
= copy_to_mode_reg (mode2
, op2
);
14022 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
14028 case IX86_BUILTIN_SQRTSS
:
14029 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2
, arglist
, target
);
14030 case IX86_BUILTIN_RSQRTSS
:
14031 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2
, arglist
, target
);
14032 case IX86_BUILTIN_RCPSS
:
14033 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2
, arglist
, target
);
14035 case IX86_BUILTIN_LOADAPS
:
14036 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
, target
, 1);
14038 case IX86_BUILTIN_LOADUPS
:
14039 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
14041 case IX86_BUILTIN_STOREAPS
:
14042 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
);
14044 case IX86_BUILTIN_STOREUPS
:
14045 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
14047 case IX86_BUILTIN_LOADSS
:
14048 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
, target
, 1);
14050 case IX86_BUILTIN_STORESS
:
14051 return ix86_expand_store_builtin (CODE_FOR_sse_storess
, arglist
);
14053 case IX86_BUILTIN_LOADHPS
:
14054 case IX86_BUILTIN_LOADLPS
:
14055 case IX86_BUILTIN_LOADHPD
:
14056 case IX86_BUILTIN_LOADLPD
:
14057 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_movhps
14058 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_movlps
14059 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_movhpd
14060 : CODE_FOR_sse2_movsd
);
14061 arg0
= TREE_VALUE (arglist
);
14062 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14063 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14064 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14065 tmode
= insn_data
[icode
].operand
[0].mode
;
14066 mode0
= insn_data
[icode
].operand
[1].mode
;
14067 mode1
= insn_data
[icode
].operand
[2].mode
;
14069 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14070 op0
= copy_to_mode_reg (mode0
, op0
);
14071 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
14073 || GET_MODE (target
) != tmode
14074 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14075 target
= gen_reg_rtx (tmode
);
14076 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
14082 case IX86_BUILTIN_STOREHPS
:
14083 case IX86_BUILTIN_STORELPS
:
14084 case IX86_BUILTIN_STOREHPD
:
14085 case IX86_BUILTIN_STORELPD
:
14086 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_movhps
14087 : fcode
== IX86_BUILTIN_STORELPS
? CODE_FOR_sse_movlps
14088 : fcode
== IX86_BUILTIN_STOREHPD
? CODE_FOR_sse2_movhpd
14089 : CODE_FOR_sse2_movsd
);
14090 arg0
= TREE_VALUE (arglist
);
14091 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14092 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14093 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14094 mode0
= insn_data
[icode
].operand
[1].mode
;
14095 mode1
= insn_data
[icode
].operand
[2].mode
;
14097 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
14098 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14099 op1
= copy_to_mode_reg (mode1
, op1
);
14101 pat
= GEN_FCN (icode
) (op0
, op0
, op1
);
14107 case IX86_BUILTIN_MOVNTPS
:
14108 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
14109 case IX86_BUILTIN_MOVNTQ
:
14110 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
14112 case IX86_BUILTIN_LDMXCSR
:
14113 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
14114 target
= assign_386_stack_local (SImode
, 0);
14115 emit_move_insn (target
, op0
);
14116 emit_insn (gen_ldmxcsr (target
));
14119 case IX86_BUILTIN_STMXCSR
:
14120 target
= assign_386_stack_local (SImode
, 0);
14121 emit_insn (gen_stmxcsr (target
));
14122 return copy_to_mode_reg (SImode
, target
);
14124 case IX86_BUILTIN_SHUFPS
:
14125 case IX86_BUILTIN_SHUFPD
:
14126 icode
= (fcode
== IX86_BUILTIN_SHUFPS
14127 ? CODE_FOR_sse_shufps
14128 : CODE_FOR_sse2_shufpd
);
14129 arg0
= TREE_VALUE (arglist
);
14130 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14131 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
14132 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14133 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14134 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
14135 tmode
= insn_data
[icode
].operand
[0].mode
;
14136 mode0
= insn_data
[icode
].operand
[1].mode
;
14137 mode1
= insn_data
[icode
].operand
[2].mode
;
14138 mode2
= insn_data
[icode
].operand
[3].mode
;
14140 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14141 op0
= copy_to_mode_reg (mode0
, op0
);
14142 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14143 op1
= copy_to_mode_reg (mode1
, op1
);
14144 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
14146 /* @@@ better error message */
14147 error ("mask must be an immediate");
14148 return gen_reg_rtx (tmode
);
14151 || GET_MODE (target
) != tmode
14152 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14153 target
= gen_reg_rtx (tmode
);
14154 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
14160 case IX86_BUILTIN_PSHUFW
:
14161 case IX86_BUILTIN_PSHUFD
:
14162 case IX86_BUILTIN_PSHUFHW
:
14163 case IX86_BUILTIN_PSHUFLW
:
14164 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
14165 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
14166 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
14167 : CODE_FOR_mmx_pshufw
);
14168 arg0
= TREE_VALUE (arglist
);
14169 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14170 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14171 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14172 tmode
= insn_data
[icode
].operand
[0].mode
;
14173 mode1
= insn_data
[icode
].operand
[1].mode
;
14174 mode2
= insn_data
[icode
].operand
[2].mode
;
14176 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
14177 op0
= copy_to_mode_reg (mode1
, op0
);
14178 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
14180 /* @@@ better error message */
14181 error ("mask must be an immediate");
14185 || GET_MODE (target
) != tmode
14186 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14187 target
= gen_reg_rtx (tmode
);
14188 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
14194 case IX86_BUILTIN_PSLLDQI128
:
14195 case IX86_BUILTIN_PSRLDQI128
:
14196 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
14197 : CODE_FOR_sse2_lshrti3
);
14198 arg0
= TREE_VALUE (arglist
);
14199 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14200 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14201 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14202 tmode
= insn_data
[icode
].operand
[0].mode
;
14203 mode1
= insn_data
[icode
].operand
[1].mode
;
14204 mode2
= insn_data
[icode
].operand
[2].mode
;
14206 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
14208 op0
= copy_to_reg (op0
);
14209 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
14211 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
14213 error ("shift must be an immediate");
14216 target
= gen_reg_rtx (V2DImode
);
14217 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
14223 case IX86_BUILTIN_FEMMS
:
14224 emit_insn (gen_femms ());
14227 case IX86_BUILTIN_PAVGUSB
:
14228 return ix86_expand_binop_builtin (CODE_FOR_pavgusb
, arglist
, target
);
14230 case IX86_BUILTIN_PF2ID
:
14231 return ix86_expand_unop_builtin (CODE_FOR_pf2id
, arglist
, target
, 0);
14233 case IX86_BUILTIN_PFACC
:
14234 return ix86_expand_binop_builtin (CODE_FOR_pfacc
, arglist
, target
);
14236 case IX86_BUILTIN_PFADD
:
14237 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3
, arglist
, target
);
14239 case IX86_BUILTIN_PFCMPEQ
:
14240 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3
, arglist
, target
);
14242 case IX86_BUILTIN_PFCMPGE
:
14243 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3
, arglist
, target
);
14245 case IX86_BUILTIN_PFCMPGT
:
14246 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3
, arglist
, target
);
14248 case IX86_BUILTIN_PFMAX
:
14249 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3
, arglist
, target
);
14251 case IX86_BUILTIN_PFMIN
:
14252 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3
, arglist
, target
);
14254 case IX86_BUILTIN_PFMUL
:
14255 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3
, arglist
, target
);
14257 case IX86_BUILTIN_PFRCP
:
14258 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2
, arglist
, target
, 0);
14260 case IX86_BUILTIN_PFRCPIT1
:
14261 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3
, arglist
, target
);
14263 case IX86_BUILTIN_PFRCPIT2
:
14264 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3
, arglist
, target
);
14266 case IX86_BUILTIN_PFRSQIT1
:
14267 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3
, arglist
, target
);
14269 case IX86_BUILTIN_PFRSQRT
:
14270 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2
, arglist
, target
, 0);
14272 case IX86_BUILTIN_PFSUB
:
14273 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3
, arglist
, target
);
14275 case IX86_BUILTIN_PFSUBR
:
14276 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3
, arglist
, target
);
14278 case IX86_BUILTIN_PI2FD
:
14279 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2
, arglist
, target
, 0);
14281 case IX86_BUILTIN_PMULHRW
:
14282 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3
, arglist
, target
);
14284 case IX86_BUILTIN_PF2IW
:
14285 return ix86_expand_unop_builtin (CODE_FOR_pf2iw
, arglist
, target
, 0);
14287 case IX86_BUILTIN_PFNACC
:
14288 return ix86_expand_binop_builtin (CODE_FOR_pfnacc
, arglist
, target
);
14290 case IX86_BUILTIN_PFPNACC
:
14291 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc
, arglist
, target
);
14293 case IX86_BUILTIN_PI2FW
:
14294 return ix86_expand_unop_builtin (CODE_FOR_pi2fw
, arglist
, target
, 0);
14296 case IX86_BUILTIN_PSWAPDSI
:
14297 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2
, arglist
, target
, 0);
14299 case IX86_BUILTIN_PSWAPDSF
:
14300 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2
, arglist
, target
, 0);
14302 case IX86_BUILTIN_SSE_ZERO
:
14303 target
= gen_reg_rtx (V4SFmode
);
14304 emit_insn (gen_sse_clrv4sf (target
, CONST0_RTX (V4SFmode
)));
14307 case IX86_BUILTIN_MMX_ZERO
:
14308 target
= gen_reg_rtx (DImode
);
14309 emit_insn (gen_mmx_clrdi (target
));
14312 case IX86_BUILTIN_CLRTI
:
14313 target
= gen_reg_rtx (V2DImode
);
14314 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode
, target
, V2DImode
, 0)));
14318 case IX86_BUILTIN_SQRTSD
:
14319 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2
, arglist
, target
);
14320 case IX86_BUILTIN_LOADAPD
:
14321 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
, target
, 1);
14322 case IX86_BUILTIN_LOADUPD
:
14323 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
14325 case IX86_BUILTIN_STOREAPD
:
14326 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
14327 case IX86_BUILTIN_STOREUPD
:
14328 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
14330 case IX86_BUILTIN_LOADSD
:
14331 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
, target
, 1);
14333 case IX86_BUILTIN_STORESD
:
14334 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd
, arglist
);
14336 case IX86_BUILTIN_SETPD1
:
14337 target
= assign_386_stack_local (DFmode
, 0);
14338 arg0
= TREE_VALUE (arglist
);
14339 emit_move_insn (adjust_address (target
, DFmode
, 0),
14340 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
14341 op0
= gen_reg_rtx (V2DFmode
);
14342 emit_insn (gen_sse2_loadsd (op0
, adjust_address (target
, V2DFmode
, 0)));
14343 emit_insn (gen_sse2_shufpd (op0
, op0
, op0
, const0_rtx
));
14346 case IX86_BUILTIN_SETPD
:
14347 target
= assign_386_stack_local (V2DFmode
, 0);
14348 arg0
= TREE_VALUE (arglist
);
14349 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14350 emit_move_insn (adjust_address (target
, DFmode
, 0),
14351 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
14352 emit_move_insn (adjust_address (target
, DFmode
, 8),
14353 expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0));
14354 op0
= gen_reg_rtx (V2DFmode
);
14355 emit_insn (gen_sse2_movapd (op0
, target
));
14358 case IX86_BUILTIN_LOADRPD
:
14359 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
,
14360 gen_reg_rtx (V2DFmode
), 1);
14361 emit_insn (gen_sse2_shufpd (target
, target
, target
, const1_rtx
));
14364 case IX86_BUILTIN_LOADPD1
:
14365 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
,
14366 gen_reg_rtx (V2DFmode
), 1);
14367 emit_insn (gen_sse2_shufpd (target
, target
, target
, const0_rtx
));
14370 case IX86_BUILTIN_STOREPD1
:
14371 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
14372 case IX86_BUILTIN_STORERPD
:
14373 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
14375 case IX86_BUILTIN_CLRPD
:
14376 target
= gen_reg_rtx (V2DFmode
);
14377 emit_insn (gen_sse_clrv2df (target
));
14380 case IX86_BUILTIN_MFENCE
:
14381 emit_insn (gen_sse2_mfence ());
14383 case IX86_BUILTIN_LFENCE
:
14384 emit_insn (gen_sse2_lfence ());
14387 case IX86_BUILTIN_CLFLUSH
:
14388 arg0
= TREE_VALUE (arglist
);
14389 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14390 icode
= CODE_FOR_sse2_clflush
;
14391 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
14392 op0
= copy_to_mode_reg (Pmode
, op0
);
14394 emit_insn (gen_sse2_clflush (op0
));
14397 case IX86_BUILTIN_MOVNTPD
:
14398 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
14399 case IX86_BUILTIN_MOVNTDQ
:
14400 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
14401 case IX86_BUILTIN_MOVNTI
:
14402 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
14404 case IX86_BUILTIN_LOADDQA
:
14405 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa
, arglist
, target
, 1);
14406 case IX86_BUILTIN_LOADDQU
:
14407 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, arglist
, target
, 1);
14408 case IX86_BUILTIN_LOADD
:
14409 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd
, arglist
, target
, 1);
14411 case IX86_BUILTIN_STOREDQA
:
14412 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa
, arglist
);
14413 case IX86_BUILTIN_STOREDQU
:
14414 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, arglist
);
14415 case IX86_BUILTIN_STORED
:
14416 return ix86_expand_store_builtin (CODE_FOR_sse2_stored
, arglist
);
14418 case IX86_BUILTIN_MONITOR
:
14419 arg0
= TREE_VALUE (arglist
);
14420 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14421 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
14422 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14423 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14424 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
14426 op0
= copy_to_mode_reg (SImode
, op0
);
14428 op1
= copy_to_mode_reg (SImode
, op1
);
14430 op2
= copy_to_mode_reg (SImode
, op2
);
14431 emit_insn (gen_monitor (op0
, op1
, op2
));
14434 case IX86_BUILTIN_MWAIT
:
14435 arg0
= TREE_VALUE (arglist
);
14436 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14437 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14438 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14440 op0
= copy_to_mode_reg (SImode
, op0
);
14442 op1
= copy_to_mode_reg (SImode
, op1
);
14443 emit_insn (gen_mwait (op0
, op1
));
14446 case IX86_BUILTIN_LOADDDUP
:
14447 return ix86_expand_unop_builtin (CODE_FOR_loadddup
, arglist
, target
, 1);
14449 case IX86_BUILTIN_LDDQU
:
14450 return ix86_expand_unop_builtin (CODE_FOR_lddqu
, arglist
, target
,
14457 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
14458 if (d
->code
== fcode
)
14460 /* Compares are treated specially. */
14461 if (d
->icode
== CODE_FOR_maskcmpv4sf3
14462 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
14463 || d
->icode
== CODE_FOR_maskncmpv4sf3
14464 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
14465 || d
->icode
== CODE_FOR_maskcmpv2df3
14466 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
14467 || d
->icode
== CODE_FOR_maskncmpv2df3
14468 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
14469 return ix86_expand_sse_compare (d
, arglist
, target
);
14471 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
14474 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
14475 if (d
->code
== fcode
)
14476 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
14478 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
14479 if (d
->code
== fcode
)
14480 return ix86_expand_sse_comi (d
, arglist
, target
);
14482 /* @@@ Should really do something sensible here. */
14486 /* Store OPERAND to the memory after reload is completed. This means
14487 that we can't easily use assign_stack_local. */
14489 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
14492 if (!reload_completed
)
14494 if (TARGET_RED_ZONE
)
14496 result
= gen_rtx_MEM (mode
,
14497 gen_rtx_PLUS (Pmode
,
14499 GEN_INT (-RED_ZONE_SIZE
)));
14500 emit_move_insn (result
, operand
);
14502 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
14508 operand
= gen_lowpart (DImode
, operand
);
14512 gen_rtx_SET (VOIDmode
,
14513 gen_rtx_MEM (DImode
,
14514 gen_rtx_PRE_DEC (DImode
,
14515 stack_pointer_rtx
)),
14521 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
14530 split_di (&operand
, 1, operands
, operands
+ 1);
14532 gen_rtx_SET (VOIDmode
,
14533 gen_rtx_MEM (SImode
,
14534 gen_rtx_PRE_DEC (Pmode
,
14535 stack_pointer_rtx
)),
14538 gen_rtx_SET (VOIDmode
,
14539 gen_rtx_MEM (SImode
,
14540 gen_rtx_PRE_DEC (Pmode
,
14541 stack_pointer_rtx
)),
14546 /* It is better to store HImodes as SImodes. */
14547 if (!TARGET_PARTIAL_REG_STALL
)
14548 operand
= gen_lowpart (SImode
, operand
);
14552 gen_rtx_SET (VOIDmode
,
14553 gen_rtx_MEM (GET_MODE (operand
),
14554 gen_rtx_PRE_DEC (SImode
,
14555 stack_pointer_rtx
)),
14561 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
14566 /* Free operand from the memory. */
14568 ix86_free_from_memory (enum machine_mode mode
)
14570 if (!TARGET_RED_ZONE
)
14574 if (mode
== DImode
|| TARGET_64BIT
)
14576 else if (mode
== HImode
&& TARGET_PARTIAL_REG_STALL
)
14580 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14581 to pop or add instruction if registers are available. */
14582 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
14583 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
14588 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14589 QImode must go into class Q_REGS.
14590 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14591 movdf to do mem-to-mem moves through integer regs. */
14593 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
14595 if (GET_CODE (x
) == CONST_VECTOR
&& x
!= CONST0_RTX (GET_MODE (x
)))
14597 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
14599 /* SSE can't load any constant directly yet. */
14600 if (SSE_CLASS_P (class))
14602 /* Floats can load 0 and 1. */
14603 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x
))
14605 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14606 if (MAYBE_SSE_CLASS_P (class))
14607 return (reg_class_subset_p (class, GENERAL_REGS
)
14608 ? GENERAL_REGS
: FLOAT_REGS
);
14612 /* General regs can load everything. */
14613 if (reg_class_subset_p (class, GENERAL_REGS
))
14614 return GENERAL_REGS
;
14615 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14616 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14619 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x
))
14621 if (GET_MODE (x
) == QImode
&& ! reg_class_subset_p (class, Q_REGS
))
14626 /* If we are copying between general and FP registers, we need a memory
14627 location. The same is true for SSE and MMX registers.
14629 The macro can't work reliably when one of the CLASSES is class containing
14630 registers from multiple units (SSE, MMX, integer). We avoid this by never
14631 combining those units in single alternative in the machine description.
14632 Ensure that this constraint holds to avoid unexpected surprises.
14634 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14635 enforce these sanity checks. */
14637 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
14638 enum machine_mode mode
, int strict
)
14640 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
14641 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
14642 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
14643 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
14644 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
14645 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
14652 return (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
)
14653 || ((SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
)
14654 || MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
14655 && ((mode
!= SImode
&& (mode
!= DImode
|| !TARGET_64BIT
))
14656 || (!TARGET_INTER_UNIT_MOVES
&& !optimize_size
))));
14658 /* Return the cost of moving data from a register in class CLASS1 to
14659 one in class CLASS2.
14661 It is not required that the cost always equal 2 when FROM is the same as TO;
14662 on some machines it is expensive to move between registers if they are not
14663 general registers. */
14665 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
14666 enum reg_class class2
)
14668 /* In case we require secondary memory, compute cost of the store followed
14669 by load. In order to avoid bad register allocation choices, we need
14670 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14672 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
14676 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
14677 MEMORY_MOVE_COST (mode
, class1
, 1));
14678 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
14679 MEMORY_MOVE_COST (mode
, class2
, 1));
14681 /* In case of copying from general_purpose_register we may emit multiple
14682 stores followed by single load causing memory size mismatch stall.
14683 Count this as arbitrarily high cost of 20. */
14684 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
14687 /* In the case of FP/MMX moves, the registers actually overlap, and we
14688 have to switch modes in order to treat them differently. */
14689 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
14690 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
14696 /* Moves between SSE/MMX and integer unit are expensive. */
14697 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
14698 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
14699 return ix86_cost
->mmxsse_to_integer
;
14700 if (MAYBE_FLOAT_CLASS_P (class1
))
14701 return ix86_cost
->fp_move
;
14702 if (MAYBE_SSE_CLASS_P (class1
))
14703 return ix86_cost
->sse_move
;
14704 if (MAYBE_MMX_CLASS_P (class1
))
14705 return ix86_cost
->mmx_move
;
14709 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14711 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
14713 /* Flags and only flags can only hold CCmode values. */
14714 if (CC_REGNO_P (regno
))
14715 return GET_MODE_CLASS (mode
) == MODE_CC
;
14716 if (GET_MODE_CLASS (mode
) == MODE_CC
14717 || GET_MODE_CLASS (mode
) == MODE_RANDOM
14718 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
14720 if (FP_REGNO_P (regno
))
14721 return VALID_FP_MODE_P (mode
);
14722 if (SSE_REGNO_P (regno
))
14723 return (TARGET_SSE
? VALID_SSE_REG_MODE (mode
) : 0);
14724 if (MMX_REGNO_P (regno
))
14726 ? VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
) : 0);
14727 /* We handle both integer and floats in the general purpose registers.
14728 In future we should be able to handle vector modes as well. */
14729 if (!VALID_INT_MODE_P (mode
) && !VALID_FP_MODE_P (mode
))
14731 /* Take care for QImode values - they can be in non-QI regs, but then
14732 they do cause partial register stalls. */
14733 if (regno
< 4 || mode
!= QImode
|| TARGET_64BIT
)
14735 return reload_in_progress
|| reload_completed
|| !TARGET_PARTIAL_REG_STALL
;
14738 /* Return the cost of moving data of mode M between a
14739 register and memory. A value of 2 is the default; this cost is
14740 relative to those in `REGISTER_MOVE_COST'.
14742 If moving between registers and memory is more expensive than
14743 between two registers, you should define this macro to express the
14746 Model also increased moving costs of QImode registers in non
14750 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
14752 if (FLOAT_CLASS_P (class))
14769 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
14771 if (SSE_CLASS_P (class))
14774 switch (GET_MODE_SIZE (mode
))
14788 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
14790 if (MMX_CLASS_P (class))
14793 switch (GET_MODE_SIZE (mode
))
14804 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
14806 switch (GET_MODE_SIZE (mode
))
14810 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
14811 : ix86_cost
->movzbl_load
);
14813 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
14814 : ix86_cost
->int_store
[0] + 4);
14817 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
14819 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14820 if (mode
== TFmode
)
14822 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
14823 * (((int) GET_MODE_SIZE (mode
)
14824 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
14828 /* Compute a (partial) cost for rtx X. Return true if the complete
14829 cost has been computed, and false if subexpressions should be
14830 scanned. In either case, *TOTAL contains the cost result. */
14833 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
14835 enum machine_mode mode
= GET_MODE (x
);
14843 if (TARGET_64BIT
&& !x86_64_sign_extended_value (x
))
14845 else if (TARGET_64BIT
&& !x86_64_zero_extended_value (x
))
14847 else if (flag_pic
&& SYMBOLIC_CONST (x
)
14849 || (!GET_CODE (x
) != LABEL_REF
14850 && (GET_CODE (x
) != SYMBOL_REF
14851 || !SYMBOL_REF_LOCAL_P (x
)))))
14858 if (mode
== VOIDmode
)
14861 switch (standard_80387_constant_p (x
))
14866 default: /* Other constants */
14871 /* Start with (MEM (SYMBOL_REF)), since that's where
14872 it'll probably end up. Add a penalty for size. */
14873 *total
= (COSTS_N_INSNS (1)
14874 + (flag_pic
!= 0 && !TARGET_64BIT
)
14875 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
14881 /* The zero extensions is often completely free on x86_64, so make
14882 it as cheap as possible. */
14883 if (TARGET_64BIT
&& mode
== DImode
14884 && GET_MODE (XEXP (x
, 0)) == SImode
)
14886 else if (TARGET_ZERO_EXTEND_WITH_AND
)
14887 *total
= COSTS_N_INSNS (ix86_cost
->add
);
14889 *total
= COSTS_N_INSNS (ix86_cost
->movzx
);
14893 *total
= COSTS_N_INSNS (ix86_cost
->movsx
);
14897 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
14898 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
14900 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
14903 *total
= COSTS_N_INSNS (ix86_cost
->add
);
14906 if ((value
== 2 || value
== 3)
14907 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
14909 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
14919 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
14921 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
14923 if (INTVAL (XEXP (x
, 1)) > 32)
14924 *total
= COSTS_N_INSNS(ix86_cost
->shift_const
+ 2);
14926 *total
= COSTS_N_INSNS(ix86_cost
->shift_const
* 2);
14930 if (GET_CODE (XEXP (x
, 1)) == AND
)
14931 *total
= COSTS_N_INSNS(ix86_cost
->shift_var
* 2);
14933 *total
= COSTS_N_INSNS(ix86_cost
->shift_var
* 6 + 2);
14938 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
14939 *total
= COSTS_N_INSNS (ix86_cost
->shift_const
);
14941 *total
= COSTS_N_INSNS (ix86_cost
->shift_var
);
14946 if (FLOAT_MODE_P (mode
))
14948 *total
= COSTS_N_INSNS (ix86_cost
->fmul
);
14953 rtx op0
= XEXP (x
, 0);
14954 rtx op1
= XEXP (x
, 1);
14956 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
14958 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
14959 for (nbits
= 0; value
!= 0; value
&= value
- 1)
14963 /* This is arbitrary. */
14966 /* Compute costs correctly for widening multiplication. */
14967 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
14968 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
14969 == GET_MODE_SIZE (mode
))
14971 int is_mulwiden
= 0;
14972 enum machine_mode inner_mode
= GET_MODE (op0
);
14974 if (GET_CODE (op0
) == GET_CODE (op1
))
14975 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
14976 else if (GET_CODE (op1
) == CONST_INT
)
14978 if (GET_CODE (op0
) == SIGN_EXTEND
)
14979 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
14982 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
14986 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
14989 *total
= COSTS_N_INSNS (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
14990 + nbits
* ix86_cost
->mult_bit
)
14991 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
);
15000 if (FLOAT_MODE_P (mode
))
15001 *total
= COSTS_N_INSNS (ix86_cost
->fdiv
);
15003 *total
= COSTS_N_INSNS (ix86_cost
->divide
[MODE_INDEX (mode
)]);
15007 if (FLOAT_MODE_P (mode
))
15008 *total
= COSTS_N_INSNS (ix86_cost
->fadd
);
15009 else if (GET_MODE_CLASS (mode
) == MODE_INT
15010 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
15012 if (GET_CODE (XEXP (x
, 0)) == PLUS
15013 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
15014 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == CONST_INT
15015 && CONSTANT_P (XEXP (x
, 1)))
15017 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
15018 if (val
== 2 || val
== 4 || val
== 8)
15020 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
15021 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
15022 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
15024 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
15028 else if (GET_CODE (XEXP (x
, 0)) == MULT
15029 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)
15031 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
15032 if (val
== 2 || val
== 4 || val
== 8)
15034 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
15035 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
15036 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
15040 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
15042 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
15043 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
15044 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
15045 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
15052 if (FLOAT_MODE_P (mode
))
15054 *total
= COSTS_N_INSNS (ix86_cost
->fadd
);
15062 if (!TARGET_64BIT
&& mode
== DImode
)
15064 *total
= (COSTS_N_INSNS (ix86_cost
->add
) * 2
15065 + (rtx_cost (XEXP (x
, 0), outer_code
)
15066 << (GET_MODE (XEXP (x
, 0)) != DImode
))
15067 + (rtx_cost (XEXP (x
, 1), outer_code
)
15068 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
15074 if (FLOAT_MODE_P (mode
))
15076 *total
= COSTS_N_INSNS (ix86_cost
->fchs
);
15082 if (!TARGET_64BIT
&& mode
== DImode
)
15083 *total
= COSTS_N_INSNS (ix86_cost
->add
* 2);
15085 *total
= COSTS_N_INSNS (ix86_cost
->add
);
15089 if (!TARGET_SSE_MATH
|| !VALID_SSE_REG_MODE (mode
))
15094 if (FLOAT_MODE_P (mode
))
15095 *total
= COSTS_N_INSNS (ix86_cost
->fabs
);
15099 if (FLOAT_MODE_P (mode
))
15100 *total
= COSTS_N_INSNS (ix86_cost
->fsqrt
);
15104 if (XINT (x
, 1) == UNSPEC_TP
)
15113 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15115 ix86_svr3_asm_out_constructor (rtx symbol
, int priority ATTRIBUTE_UNUSED
)
15118 fputs ("\tpushl $", asm_out_file
);
15119 assemble_name (asm_out_file
, XSTR (symbol
, 0));
15120 fputc ('\n', asm_out_file
);
15126 static int current_machopic_label_num
;
15128 /* Given a symbol name and its associated stub, write out the
15129 definition of the stub. */
15132 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
15134 unsigned int length
;
15135 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
15136 int label
= ++current_machopic_label_num
;
15138 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15139 symb
= (*targetm
.strip_name_encoding
) (symb
);
15141 length
= strlen (stub
);
15142 binder_name
= alloca (length
+ 32);
15143 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
15145 length
= strlen (symb
);
15146 symbol_name
= alloca (length
+ 32);
15147 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
15149 sprintf (lazy_ptr_name
, "L%d$lz", label
);
15152 machopic_picsymbol_stub_section ();
15154 machopic_symbol_stub_section ();
15156 fprintf (file
, "%s:\n", stub
);
15157 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
15161 fprintf (file
, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label
, label
);
15162 fprintf (file
, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
15163 fprintf (file
, "\tjmp %%edx\n");
15166 fprintf (file
, "\tjmp *%s\n", lazy_ptr_name
);
15168 fprintf (file
, "%s:\n", binder_name
);
15172 fprintf (file
, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
15173 fprintf (file
, "\tpushl %%eax\n");
15176 fprintf (file
, "\t pushl $%s\n", lazy_ptr_name
);
15178 fprintf (file
, "\tjmp dyld_stub_binding_helper\n");
15180 machopic_lazy_symbol_ptr_section ();
15181 fprintf (file
, "%s:\n", lazy_ptr_name
);
15182 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
15183 fprintf (file
, "\t.long %s\n", binder_name
);
15185 #endif /* TARGET_MACHO */
15187 /* Order the registers for register allocator. */
15190 x86_order_regs_for_local_alloc (void)
15195 /* First allocate the local general purpose registers. */
15196 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
15197 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
15198 reg_alloc_order
[pos
++] = i
;
15200 /* Global general purpose registers. */
15201 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
15202 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
15203 reg_alloc_order
[pos
++] = i
;
15205 /* x87 registers come first in case we are doing FP math
15207 if (!TARGET_SSE_MATH
)
15208 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
15209 reg_alloc_order
[pos
++] = i
;
15211 /* SSE registers. */
15212 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
15213 reg_alloc_order
[pos
++] = i
;
15214 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
15215 reg_alloc_order
[pos
++] = i
;
15217 /* x87 registers. */
15218 if (TARGET_SSE_MATH
)
15219 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
15220 reg_alloc_order
[pos
++] = i
;
15222 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
15223 reg_alloc_order
[pos
++] = i
;
15225 /* Initialize the rest of array as we do not allocate some registers
15227 while (pos
< FIRST_PSEUDO_REGISTER
)
15228 reg_alloc_order
[pos
++] = 0;
15231 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15232 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15235 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15236 struct attribute_spec.handler. */
15238 ix86_handle_struct_attribute (tree
*node
, tree name
,
15239 tree args ATTRIBUTE_UNUSED
,
15240 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
15243 if (DECL_P (*node
))
15245 if (TREE_CODE (*node
) == TYPE_DECL
)
15246 type
= &TREE_TYPE (*node
);
15251 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
15252 || TREE_CODE (*type
) == UNION_TYPE
)))
15254 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
15255 *no_add_attrs
= true;
15258 else if ((is_attribute_p ("ms_struct", name
)
15259 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
15260 || ((is_attribute_p ("gcc_struct", name
)
15261 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
15263 warning ("`%s' incompatible attribute ignored",
15264 IDENTIFIER_POINTER (name
));
15265 *no_add_attrs
= true;
15272 ix86_ms_bitfield_layout_p (tree record_type
)
15274 return (TARGET_USE_MS_BITFIELD_LAYOUT
&&
15275 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
15276 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
15279 /* Returns an expression indicating where the this parameter is
15280 located on entry to the FUNCTION. */
15283 x86_this_parameter (tree function
)
15285 tree type
= TREE_TYPE (function
);
15289 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
15290 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
15293 if (ix86_function_regparm (type
, function
) > 0)
15297 parm
= TYPE_ARG_TYPES (type
);
15298 /* Figure out whether or not the function has a variable number of
15300 for (; parm
; parm
= TREE_CHAIN (parm
))
15301 if (TREE_VALUE (parm
) == void_type_node
)
15303 /* If not, the this parameter is in the first argument. */
15307 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
15309 return gen_rtx_REG (SImode
, regno
);
15313 if (aggregate_value_p (TREE_TYPE (type
), type
))
15314 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
15316 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
15319 /* Determine whether x86_output_mi_thunk can succeed. */
15322 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
15323 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
15324 HOST_WIDE_INT vcall_offset
, tree function
)
15326 /* 64-bit can handle anything. */
15330 /* For 32-bit, everything's fine if we have one free register. */
15331 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
15334 /* Need a free register for vcall_offset. */
15338 /* Need a free register for GOT references. */
15339 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
15342 /* Otherwise ok. */
15346 /* Output the assembler code for a thunk function. THUNK_DECL is the
15347 declaration for the thunk function itself, FUNCTION is the decl for
15348 the target function. DELTA is an immediate constant offset to be
15349 added to THIS. If VCALL_OFFSET is nonzero, the word at
15350 *(*this + vcall_offset) should be added to THIS. */
15353 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
15354 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
15355 HOST_WIDE_INT vcall_offset
, tree function
)
15358 rtx
this = x86_this_parameter (function
);
15361 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15362 pull it in now and let DELTA benefit. */
15365 else if (vcall_offset
)
15367 /* Put the this parameter into %eax. */
15369 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
15370 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
15373 this_reg
= NULL_RTX
;
15375 /* Adjust the this parameter by a fixed constant. */
15378 xops
[0] = GEN_INT (delta
);
15379 xops
[1] = this_reg
? this_reg
: this;
15382 if (!x86_64_general_operand (xops
[0], DImode
))
15384 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
15386 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
15390 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
15393 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
15396 /* Adjust the this parameter by a value stored in the vtable. */
15400 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
15403 int tmp_regno
= 2 /* ECX */;
15404 if (lookup_attribute ("fastcall",
15405 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
15406 tmp_regno
= 0 /* EAX */;
15407 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
15410 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
15413 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
15415 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
15417 /* Adjust the this parameter. */
15418 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
15419 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
15421 rtx tmp2
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
15422 xops
[0] = GEN_INT (vcall_offset
);
15424 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
15425 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
15427 xops
[1] = this_reg
;
15429 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
15431 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
15434 /* If necessary, drop THIS back to its stack slot. */
15435 if (this_reg
&& this_reg
!= this)
15437 xops
[0] = this_reg
;
15439 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
15442 xops
[0] = XEXP (DECL_RTL (function
), 0);
15445 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
15446 output_asm_insn ("jmp\t%P0", xops
);
15449 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
15450 tmp
= gen_rtx_CONST (Pmode
, tmp
);
15451 tmp
= gen_rtx_MEM (QImode
, tmp
);
15453 output_asm_insn ("jmp\t%A0", xops
);
15458 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
15459 output_asm_insn ("jmp\t%P0", xops
);
15464 const char *ip
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function
));
15465 tmp
= gen_rtx_SYMBOL_REF (Pmode
, machopic_stub_name (ip
));
15466 tmp
= gen_rtx_MEM (QImode
, tmp
);
15468 output_asm_insn ("jmp\t%0", xops
);
15471 #endif /* TARGET_MACHO */
15473 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
15474 output_set_got (tmp
);
15477 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
15478 output_asm_insn ("jmp\t{*}%1", xops
);
15484 x86_file_start (void)
15486 default_file_start ();
15487 if (X86_FILE_START_VERSION_DIRECTIVE
)
15488 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
15489 if (X86_FILE_START_FLTUSED
)
15490 fputs ("\t.global\t__fltused\n", asm_out_file
);
15491 if (ix86_asm_dialect
== ASM_INTEL
)
15492 fputs ("\t.intel_syntax\n", asm_out_file
);
15496 x86_field_alignment (tree field
, int computed
)
15498 enum machine_mode mode
;
15499 tree type
= TREE_TYPE (field
);
15501 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
15503 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
15504 ? get_inner_array_type (type
) : type
);
15505 if (mode
== DFmode
|| mode
== DCmode
15506 || GET_MODE_CLASS (mode
) == MODE_INT
15507 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
15508 return MIN (32, computed
);
15512 /* Output assembler code to FILE to increment profiler label # LABELNO
15513 for profiling a function entry. */
15515 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
15520 #ifndef NO_PROFILE_COUNTERS
15521 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
15523 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
15527 #ifndef NO_PROFILE_COUNTERS
15528 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
15530 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
15534 #ifndef NO_PROFILE_COUNTERS
15535 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15536 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
15538 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
15542 #ifndef NO_PROFILE_COUNTERS
15543 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
15544 PROFILE_COUNT_REGISTER
);
15546 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
15550 /* We don't have exact information about the insn sizes, but we may assume
15551 quite safely that we are informed about all 1 byte insns and memory
15552 address sizes. This is enough to eliminate unnecessary padding in
15556 min_insn_size (rtx insn
)
15560 if (!INSN_P (insn
) || !active_insn_p (insn
))
15563 /* Discard alignments we've emit and jump instructions. */
15564 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
15565 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
15567 if (GET_CODE (insn
) == JUMP_INSN
15568 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
15569 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
15572 /* Important case - calls are always 5 bytes.
15573 It is common to have many calls in the row. */
15574 if (GET_CODE (insn
) == CALL_INSN
15575 && symbolic_reference_mentioned_p (PATTERN (insn
))
15576 && !SIBLING_CALL_P (insn
))
15578 if (get_attr_length (insn
) <= 1)
15581 /* For normal instructions we may rely on the sizes of addresses
15582 and the presence of symbol to require 4 bytes of encoding.
15583 This is not the case for jumps where references are PC relative. */
15584 if (GET_CODE (insn
) != JUMP_INSN
)
15586 l
= get_attr_length_address (insn
);
15587 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
15596 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15600 ix86_avoid_jump_misspredicts (void)
15602 rtx insn
, start
= get_insns ();
15603 int nbytes
= 0, njumps
= 0;
15606 /* Look for all minimal intervals of instructions containing 4 jumps.
15607 The intervals are bounded by START and INSN. NBYTES is the total
15608 size of instructions in the interval including INSN and not including
15609 START. When the NBYTES is smaller than 16 bytes, it is possible
15610 that the end of START and INSN ends up in the same 16byte page.
15612 The smallest offset in the page INSN can start is the case where START
15613 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15614 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15616 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
15619 nbytes
+= min_insn_size (insn
);
15621 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
15622 INSN_UID (insn
), min_insn_size (insn
));
15623 if ((GET_CODE (insn
) == JUMP_INSN
15624 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
15625 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
15626 || GET_CODE (insn
) == CALL_INSN
)
15633 start
= NEXT_INSN (start
);
15634 if ((GET_CODE (start
) == JUMP_INSN
15635 && GET_CODE (PATTERN (start
)) != ADDR_VEC
15636 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
15637 || GET_CODE (start
) == CALL_INSN
)
15638 njumps
--, isjump
= 1;
15641 nbytes
-= min_insn_size (start
);
15646 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
15647 INSN_UID (start
), INSN_UID (insn
), nbytes
);
15649 if (njumps
== 3 && isjump
&& nbytes
< 16)
15651 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
15654 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
15655 INSN_UID (insn
), padsize
);
15656 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
15661 /* AMD Athlon works faster
15662 when RET is not destination of conditional jump or directly preceded
15663 by other jump instruction. We avoid the penalty by inserting NOP just
15664 before the RET instructions in such cases. */
15666 ix86_pad_returns (void)
15670 for (e
= EXIT_BLOCK_PTR
->pred
; e
; e
= e
->pred_next
)
15672 basic_block bb
= e
->src
;
15673 rtx ret
= BB_END (bb
);
15675 bool replace
= false;
15677 if (GET_CODE (ret
) != JUMP_INSN
|| GET_CODE (PATTERN (ret
)) != RETURN
15678 || !maybe_hot_bb_p (bb
))
15680 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
15681 if (active_insn_p (prev
) || GET_CODE (prev
) == CODE_LABEL
)
15683 if (prev
&& GET_CODE (prev
) == CODE_LABEL
)
15686 for (e
= bb
->pred
; e
; e
= e
->pred_next
)
15687 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
15688 && !(e
->flags
& EDGE_FALLTHRU
))
15693 prev
= prev_active_insn (ret
);
15695 && ((GET_CODE (prev
) == JUMP_INSN
&& any_condjump_p (prev
))
15696 || GET_CODE (prev
) == CALL_INSN
))
15698 /* Empty functions get branch mispredict even when the jump destination
15699 is not visible to us. */
15700 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
15705 emit_insn_before (gen_return_internal_long (), ret
);
15711 /* Implement machine specific optimizations. We implement padding of returns
15712 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15716 if (TARGET_ATHLON_K8
&& optimize
&& !optimize_size
)
15717 ix86_pad_returns ();
15718 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
15719 ix86_avoid_jump_misspredicts ();
15722 /* Return nonzero when QImode register that must be represented via REX prefix
15725 x86_extended_QIreg_mentioned_p (rtx insn
)
15728 extract_insn_cached (insn
);
15729 for (i
= 0; i
< recog_data
.n_operands
; i
++)
15730 if (REG_P (recog_data
.operand
[i
])
15731 && REGNO (recog_data
.operand
[i
]) >= 4)
15736 /* Return nonzero when P points to register encoded via REX prefix.
15737 Called via for_each_rtx. */
15739 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
15741 unsigned int regno
;
15744 regno
= REGNO (*p
);
15745 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
15748 /* Return true when INSN mentions register that must be encoded using REX
15751 x86_extended_reg_mentioned_p (rtx insn
)
15753 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
15756 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15757 optabs would emit if we didn't have TFmode patterns. */
15760 x86_emit_floatuns (rtx operands
[2])
15762 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
15763 enum machine_mode mode
, inmode
;
15765 inmode
= GET_MODE (operands
[1]);
15766 if (inmode
!= SImode
15767 && inmode
!= DImode
)
15771 in
= force_reg (inmode
, operands
[1]);
15772 mode
= GET_MODE (out
);
15773 neglab
= gen_label_rtx ();
15774 donelab
= gen_label_rtx ();
15775 i1
= gen_reg_rtx (Pmode
);
15776 f0
= gen_reg_rtx (mode
);
15778 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, Pmode
, 0, neglab
);
15780 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_FLOAT (mode
, in
)));
15781 emit_jump_insn (gen_jump (donelab
));
15784 emit_label (neglab
);
15786 i0
= expand_simple_binop (Pmode
, LSHIFTRT
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
15787 i1
= expand_simple_binop (Pmode
, AND
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
15788 i0
= expand_simple_binop (Pmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
15789 expand_float (f0
, i0
, 0);
15790 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
15792 emit_label (donelab
);
15795 /* Return if we do not know how to pass TYPE solely in registers. */
15797 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
15799 if (default_must_pass_in_stack (mode
, type
))
15801 return (!TARGET_64BIT
&& type
&& mode
== TImode
);
15804 /* Initialize vector TARGET via VALS. */
15806 ix86_expand_vector_init (rtx target
, rtx vals
)
15808 enum machine_mode mode
= GET_MODE (target
);
15809 int elt_size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
15810 int n_elts
= (GET_MODE_SIZE (mode
) / elt_size
);
15813 for (i
= n_elts
- 1; i
>= 0; i
--)
15814 if (GET_CODE (XVECEXP (vals
, 0, i
)) != CONST_INT
15815 && GET_CODE (XVECEXP (vals
, 0, i
)) != CONST_DOUBLE
)
15818 /* Few special cases first...
15819 ... constants are best loaded from constant pool. */
15822 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
15826 /* ... values where only first field is non-constant are best loaded
15827 from the pool and overwritten via move later. */
15830 rtx op
= simplify_gen_subreg (mode
, XVECEXP (vals
, 0, 0),
15831 GET_MODE_INNER (mode
), 0);
15833 op
= force_reg (mode
, op
);
15834 XVECEXP (vals
, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode
));
15835 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
15836 switch (GET_MODE (target
))
15839 emit_insn (gen_sse2_movsd (target
, target
, op
));
15842 emit_insn (gen_sse_movss (target
, target
, op
));
15850 /* And the busy sequence doing rotations. */
15851 switch (GET_MODE (target
))
15856 simplify_gen_subreg (V2DFmode
, XVECEXP (vals
, 0, 0), DFmode
, 0);
15858 simplify_gen_subreg (V2DFmode
, XVECEXP (vals
, 0, 1), DFmode
, 0);
15860 vecop0
= force_reg (V2DFmode
, vecop0
);
15861 vecop1
= force_reg (V2DFmode
, vecop1
);
15862 emit_insn (gen_sse2_unpcklpd (target
, vecop0
, vecop1
));
15868 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 0), SFmode
, 0);
15870 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 1), SFmode
, 0);
15872 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 2), SFmode
, 0);
15874 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 3), SFmode
, 0);
15875 rtx tmp1
= gen_reg_rtx (V4SFmode
);
15876 rtx tmp2
= gen_reg_rtx (V4SFmode
);
15878 vecop0
= force_reg (V4SFmode
, vecop0
);
15879 vecop1
= force_reg (V4SFmode
, vecop1
);
15880 vecop2
= force_reg (V4SFmode
, vecop2
);
15881 vecop3
= force_reg (V4SFmode
, vecop3
);
15882 emit_insn (gen_sse_unpcklps (tmp1
, vecop1
, vecop3
));
15883 emit_insn (gen_sse_unpcklps (tmp2
, vecop0
, vecop2
));
15884 emit_insn (gen_sse_unpcklps (target
, tmp2
, tmp1
));
15892 /* Worker function for TARGET_MD_ASM_CLOBBERS.
15894 We do this in the new i386 backend to maintain source compatibility
15895 with the old cc0-based compiler. */
15898 ix86_md_asm_clobbers (tree clobbers
)
15900 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
15902 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
15904 clobbers
= tree_cons (NULL_TREE
, build_string (7, "dirflag"),
15909 /* Worker function for REVERSE_CONDITION. */
15912 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
15914 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
15915 ? reverse_condition (code
)
15916 : reverse_condition_maybe_unordered (code
));
15919 /* Output code to perform an x87 FP register move, from OPERANDS[1]
15923 output_387_reg_move (rtx insn
, rtx
*operands
)
15925 if (REG_P (operands
[1])
15926 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15928 if (REGNO (operands
[0]) == FIRST_STACK_REG
15929 && TARGET_USE_FFREEP
)
15930 return "ffreep\t%y0";
15931 return "fstp\t%y0";
15933 if (STACK_TOP_P (operands
[0]))
15934 return "fld%z1\t%y1";
15938 /* Output code to perform a conditional jump to LABEL, if C2 flag in
15939 FP status register is set. */
15942 ix86_emit_fp_unordered_jump (rtx label
)
15944 rtx reg
= gen_reg_rtx (HImode
);
15947 emit_insn (gen_x86_fnstsw_1 (reg
));
15949 if (TARGET_USE_SAHF
)
15951 emit_insn (gen_x86_sahf_1 (reg
));
15953 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
15954 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
15958 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
15960 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15961 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
15964 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
15965 gen_rtx_LABEL_REF (VOIDmode
, label
),
15967 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
15968 emit_jump_insn (temp
);
15971 /* Output code to perform a log1p XFmode calculation. */
15973 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
15975 rtx label1
= gen_label_rtx ();
15976 rtx label2
= gen_label_rtx ();
15978 rtx tmp
= gen_reg_rtx (XFmode
);
15979 rtx tmp2
= gen_reg_rtx (XFmode
);
15981 emit_insn (gen_absxf2 (tmp
, op1
));
15982 emit_insn (gen_cmpxf (tmp
,
15983 CONST_DOUBLE_FROM_REAL_VALUE (
15984 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
15986 emit_jump_insn (gen_bge (label1
));
15988 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
15989 emit_insn (gen_fyl2xp1_xf3 (op0
, tmp2
, op1
));
15990 emit_jump (label2
);
15992 emit_label (label1
);
15993 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
15994 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
15995 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
15996 emit_insn (gen_fyl2x_xf3 (op0
, tmp2
, tmp
));
15998 emit_label (label2
);
16001 #include "gt-i386.h"