1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
58 #include "sched-int.h"
62 #include "diagnostic.h"
64 enum upper_128bits_state
71 typedef struct block_info_def
73 /* State of the upper 128bits of AVX registers at exit. */
74 enum upper_128bits_state state
;
75 /* TRUE if state of the upper 128bits of AVX registers is unchanged
78 /* TRUE if block has been processed. */
80 /* TRUE if block has been scanned. */
82 /* Previous state of the upper 128bits of AVX registers at entry. */
83 enum upper_128bits_state prev
;
86 #define BLOCK_INFO(B) ((block_info) (B)->aux)
88 enum call_avx256_state
90 /* Callee returns 256bit AVX register. */
91 callee_return_avx256
= -1,
92 /* Callee returns and passes 256bit AVX register. */
93 callee_return_pass_avx256
,
94 /* Callee passes 256bit AVX register. */
96 /* Callee doesn't return nor passe 256bit AVX register, or no
97 256bit AVX register in function return. */
99 /* vzeroupper intrinsic. */
103 /* Check if a 256bit AVX register is referenced in stores. */
106 check_avx256_stores (rtx dest
, const_rtx set
, void *data
)
109 && VALID_AVX256_REG_MODE (GET_MODE (dest
)))
110 || (GET_CODE (set
) == SET
111 && REG_P (SET_SRC (set
))
112 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set
)))))
114 enum upper_128bits_state
*state
115 = (enum upper_128bits_state
*) data
;
120 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
121 in basic block BB. Delete it if upper 128bit AVX registers are
122 unused. If it isn't deleted, move it to just before a jump insn.
124 STATE is state of the upper 128bits of AVX registers at entry. */
127 move_or_delete_vzeroupper_2 (basic_block bb
,
128 enum upper_128bits_state state
)
131 rtx vzeroupper_insn
= NULL_RTX
;
136 if (BLOCK_INFO (bb
)->unchanged
)
139 fprintf (dump_file
, " [bb %i] unchanged: upper 128bits: %d\n",
142 BLOCK_INFO (bb
)->state
= state
;
146 if (BLOCK_INFO (bb
)->scanned
&& BLOCK_INFO (bb
)->prev
== state
)
149 fprintf (dump_file
, " [bb %i] scanned: upper 128bits: %d\n",
150 bb
->index
, BLOCK_INFO (bb
)->state
);
154 BLOCK_INFO (bb
)->prev
= state
;
157 fprintf (dump_file
, " [bb %i] entry: upper 128bits: %d\n",
162 /* BB_END changes when it is deleted. */
163 bb_end
= BB_END (bb
);
165 while (insn
!= bb_end
)
167 insn
= NEXT_INSN (insn
);
169 if (!NONDEBUG_INSN_P (insn
))
172 /* Move vzeroupper before jump/call. */
173 if (JUMP_P (insn
) || CALL_P (insn
))
175 if (!vzeroupper_insn
)
178 if (PREV_INSN (insn
) != vzeroupper_insn
)
182 fprintf (dump_file
, "Move vzeroupper after:\n");
183 print_rtl_single (dump_file
, PREV_INSN (insn
));
184 fprintf (dump_file
, "before:\n");
185 print_rtl_single (dump_file
, insn
);
187 reorder_insns_nobb (vzeroupper_insn
, vzeroupper_insn
,
190 vzeroupper_insn
= NULL_RTX
;
194 pat
= PATTERN (insn
);
196 /* Check insn for vzeroupper intrinsic. */
197 if (GET_CODE (pat
) == UNSPEC_VOLATILE
198 && XINT (pat
, 1) == UNSPECV_VZEROUPPER
)
202 /* Found vzeroupper intrinsic. */
203 fprintf (dump_file
, "Found vzeroupper:\n");
204 print_rtl_single (dump_file
, insn
);
209 /* Check insn for vzeroall intrinsic. */
210 if (GET_CODE (pat
) == PARALLEL
211 && GET_CODE (XVECEXP (pat
, 0, 0)) == UNSPEC_VOLATILE
212 && XINT (XVECEXP (pat
, 0, 0), 1) == UNSPECV_VZEROALL
)
217 /* Delete pending vzeroupper insertion. */
220 delete_insn (vzeroupper_insn
);
221 vzeroupper_insn
= NULL_RTX
;
224 else if (state
!= used
)
226 note_stores (pat
, check_avx256_stores
, &state
);
233 /* Process vzeroupper intrinsic. */
234 avx256
= INTVAL (XVECEXP (pat
, 0, 0));
238 /* Since the upper 128bits are cleared, callee must not pass
239 256bit AVX register. We only need to check if callee
240 returns 256bit AVX register. */
241 if (avx256
== callee_return_avx256
)
247 /* Remove unnecessary vzeroupper since upper 128bits are
251 fprintf (dump_file
, "Delete redundant vzeroupper:\n");
252 print_rtl_single (dump_file
, insn
);
258 /* Set state to UNUSED if callee doesn't return 256bit AVX
260 if (avx256
!= callee_return_pass_avx256
)
263 if (avx256
== callee_return_pass_avx256
264 || avx256
== callee_pass_avx256
)
266 /* Must remove vzeroupper since callee passes in 256bit
270 fprintf (dump_file
, "Delete callee pass vzeroupper:\n");
271 print_rtl_single (dump_file
, insn
);
277 vzeroupper_insn
= insn
;
283 BLOCK_INFO (bb
)->state
= state
;
284 BLOCK_INFO (bb
)->unchanged
= unchanged
;
285 BLOCK_INFO (bb
)->scanned
= true;
288 fprintf (dump_file
, " [bb %i] exit: %s: upper 128bits: %d\n",
289 bb
->index
, unchanged
? "unchanged" : "changed",
293 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
294 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
295 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
299 move_or_delete_vzeroupper_1 (basic_block block
, bool unknown_is_unused
)
303 enum upper_128bits_state state
, old_state
, new_state
;
307 fprintf (dump_file
, " Process [bb %i]: status: %d\n",
308 block
->index
, BLOCK_INFO (block
)->processed
);
310 if (BLOCK_INFO (block
)->processed
)
315 /* Check all predecessor edges of this block. */
316 seen_unknown
= false;
317 FOR_EACH_EDGE (e
, ei
, block
->preds
)
321 switch (BLOCK_INFO (e
->src
)->state
)
324 if (!unknown_is_unused
)
338 old_state
= BLOCK_INFO (block
)->state
;
339 move_or_delete_vzeroupper_2 (block
, state
);
340 new_state
= BLOCK_INFO (block
)->state
;
342 if (state
!= unknown
|| new_state
== used
)
343 BLOCK_INFO (block
)->processed
= true;
345 /* Need to rescan if the upper 128bits of AVX registers are changed
347 if (new_state
!= old_state
)
349 if (new_state
== used
)
350 cfun
->machine
->rescan_vzeroupper_p
= 1;
357 /* Go through the instruction stream looking for vzeroupper. Delete
358 it if upper 128bit AVX registers are unused. If it isn't deleted,
359 move it to just before a jump insn. */
362 move_or_delete_vzeroupper (void)
367 fibheap_t worklist
, pending
, fibheap_swap
;
368 sbitmap visited
, in_worklist
, in_pending
, sbitmap_swap
;
373 /* Set up block info for each basic block. */
374 alloc_aux_for_blocks (sizeof (struct block_info_def
));
376 /* Process outgoing edges of entry point. */
378 fprintf (dump_file
, "Process outgoing edges of entry point\n");
380 FOR_EACH_EDGE (e
, ei
, ENTRY_BLOCK_PTR
->succs
)
382 move_or_delete_vzeroupper_2 (e
->dest
,
383 cfun
->machine
->caller_pass_avx256_p
385 BLOCK_INFO (e
->dest
)->processed
= true;
388 /* Compute reverse completion order of depth first search of the CFG
389 so that the data-flow runs faster. */
390 rc_order
= XNEWVEC (int, n_basic_blocks
- NUM_FIXED_BLOCKS
);
391 bb_order
= XNEWVEC (int, last_basic_block
);
392 pre_and_rev_post_order_compute (NULL
, rc_order
, false);
393 for (i
= 0; i
< n_basic_blocks
- NUM_FIXED_BLOCKS
; i
++)
394 bb_order
[rc_order
[i
]] = i
;
397 worklist
= fibheap_new ();
398 pending
= fibheap_new ();
399 visited
= sbitmap_alloc (last_basic_block
);
400 in_worklist
= sbitmap_alloc (last_basic_block
);
401 in_pending
= sbitmap_alloc (last_basic_block
);
402 sbitmap_zero (in_worklist
);
404 /* Don't check outgoing edges of entry point. */
405 sbitmap_ones (in_pending
);
407 if (BLOCK_INFO (bb
)->processed
)
408 RESET_BIT (in_pending
, bb
->index
);
411 move_or_delete_vzeroupper_1 (bb
, false);
412 fibheap_insert (pending
, bb_order
[bb
->index
], bb
);
416 fprintf (dump_file
, "Check remaining basic blocks\n");
418 while (!fibheap_empty (pending
))
420 fibheap_swap
= pending
;
422 worklist
= fibheap_swap
;
423 sbitmap_swap
= in_pending
;
424 in_pending
= in_worklist
;
425 in_worklist
= sbitmap_swap
;
427 sbitmap_zero (visited
);
429 cfun
->machine
->rescan_vzeroupper_p
= 0;
431 while (!fibheap_empty (worklist
))
433 bb
= (basic_block
) fibheap_extract_min (worklist
);
434 RESET_BIT (in_worklist
, bb
->index
);
435 gcc_assert (!TEST_BIT (visited
, bb
->index
));
436 if (!TEST_BIT (visited
, bb
->index
))
440 SET_BIT (visited
, bb
->index
);
442 if (move_or_delete_vzeroupper_1 (bb
, false))
443 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
445 if (e
->dest
== EXIT_BLOCK_PTR
446 || BLOCK_INFO (e
->dest
)->processed
)
449 if (TEST_BIT (visited
, e
->dest
->index
))
451 if (!TEST_BIT (in_pending
, e
->dest
->index
))
453 /* Send E->DEST to next round. */
454 SET_BIT (in_pending
, e
->dest
->index
);
455 fibheap_insert (pending
,
456 bb_order
[e
->dest
->index
],
460 else if (!TEST_BIT (in_worklist
, e
->dest
->index
))
462 /* Add E->DEST to current round. */
463 SET_BIT (in_worklist
, e
->dest
->index
);
464 fibheap_insert (worklist
, bb_order
[e
->dest
->index
],
471 if (!cfun
->machine
->rescan_vzeroupper_p
)
476 fibheap_delete (worklist
);
477 fibheap_delete (pending
);
478 sbitmap_free (visited
);
479 sbitmap_free (in_worklist
);
480 sbitmap_free (in_pending
);
483 fprintf (dump_file
, "Process remaining basic blocks\n");
486 move_or_delete_vzeroupper_1 (bb
, true);
488 free_aux_for_blocks ();
491 static rtx
legitimize_dllimport_symbol (rtx
, bool);
493 #ifndef CHECK_STACK_LIMIT
494 #define CHECK_STACK_LIMIT (-1)
497 /* Return index of given mode in mult and division cost tables. */
498 #define MODE_INDEX(mode) \
499 ((mode) == QImode ? 0 \
500 : (mode) == HImode ? 1 \
501 : (mode) == SImode ? 2 \
502 : (mode) == DImode ? 3 \
505 /* Processor costs (relative to an add) */
506 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
507 #define COSTS_N_BYTES(N) ((N) * 2)
509 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
512 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
513 COSTS_N_BYTES (2), /* cost of an add instruction */
514 COSTS_N_BYTES (3), /* cost of a lea instruction */
515 COSTS_N_BYTES (2), /* variable shift costs */
516 COSTS_N_BYTES (3), /* constant shift costs */
517 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
518 COSTS_N_BYTES (3), /* HI */
519 COSTS_N_BYTES (3), /* SI */
520 COSTS_N_BYTES (3), /* DI */
521 COSTS_N_BYTES (5)}, /* other */
522 0, /* cost of multiply per each bit set */
523 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
524 COSTS_N_BYTES (3), /* HI */
525 COSTS_N_BYTES (3), /* SI */
526 COSTS_N_BYTES (3), /* DI */
527 COSTS_N_BYTES (5)}, /* other */
528 COSTS_N_BYTES (3), /* cost of movsx */
529 COSTS_N_BYTES (3), /* cost of movzx */
530 0, /* "large" insn */
532 2, /* cost for loading QImode using movzbl */
533 {2, 2, 2}, /* cost of loading integer registers
534 in QImode, HImode and SImode.
535 Relative to reg-reg move (2). */
536 {2, 2, 2}, /* cost of storing integer registers */
537 2, /* cost of reg,reg fld/fst */
538 {2, 2, 2}, /* cost of loading fp registers
539 in SFmode, DFmode and XFmode */
540 {2, 2, 2}, /* cost of storing fp registers
541 in SFmode, DFmode and XFmode */
542 3, /* cost of moving MMX register */
543 {3, 3}, /* cost of loading MMX registers
544 in SImode and DImode */
545 {3, 3}, /* cost of storing MMX registers
546 in SImode and DImode */
547 3, /* cost of moving SSE register */
548 {3, 3, 3}, /* cost of loading SSE registers
549 in SImode, DImode and TImode */
550 {3, 3, 3}, /* cost of storing SSE registers
551 in SImode, DImode and TImode */
552 3, /* MMX or SSE register to integer */
553 0, /* size of l1 cache */
554 0, /* size of l2 cache */
555 0, /* size of prefetch block */
556 0, /* number of parallel prefetches */
558 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
559 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
560 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
561 COSTS_N_BYTES (2), /* cost of FABS instruction. */
562 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
563 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
564 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
565 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
566 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
567 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
568 1, /* scalar_stmt_cost. */
569 1, /* scalar load_cost. */
570 1, /* scalar_store_cost. */
571 1, /* vec_stmt_cost. */
572 1, /* vec_to_scalar_cost. */
573 1, /* scalar_to_vec_cost. */
574 1, /* vec_align_load_cost. */
575 1, /* vec_unalign_load_cost. */
576 1, /* vec_store_cost. */
577 1, /* cond_taken_branch_cost. */
578 1, /* cond_not_taken_branch_cost. */
581 /* Processor costs (relative to an add) */
583 struct processor_costs i386_cost
= { /* 386 specific costs */
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (1), /* cost of a lea instruction */
586 COSTS_N_INSNS (3), /* variable shift costs */
587 COSTS_N_INSNS (2), /* constant shift costs */
588 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (6), /* HI */
590 COSTS_N_INSNS (6), /* SI */
591 COSTS_N_INSNS (6), /* DI */
592 COSTS_N_INSNS (6)}, /* other */
593 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (23), /* HI */
596 COSTS_N_INSNS (23), /* SI */
597 COSTS_N_INSNS (23), /* DI */
598 COSTS_N_INSNS (23)}, /* other */
599 COSTS_N_INSNS (3), /* cost of movsx */
600 COSTS_N_INSNS (2), /* cost of movzx */
601 15, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {2, 4, 2}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {2, 4, 2}, /* cost of storing integer registers */
608 2, /* cost of reg,reg fld/fst */
609 {8, 8, 8}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {8, 8, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 8}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 8}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 8, 16}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 8, 16}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 3, /* MMX or SSE register to integer */
624 0, /* size of l1 cache */
625 0, /* size of l2 cache */
626 0, /* size of prefetch block */
627 0, /* number of parallel prefetches */
629 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (22), /* cost of FABS instruction. */
633 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
635 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
636 DUMMY_STRINGOP_ALGS
},
637 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
638 DUMMY_STRINGOP_ALGS
},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
653 struct processor_costs i486_cost
= { /* 486 specific costs */
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (1), /* cost of a lea instruction */
656 COSTS_N_INSNS (3), /* variable shift costs */
657 COSTS_N_INSNS (2), /* constant shift costs */
658 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (12), /* HI */
660 COSTS_N_INSNS (12), /* SI */
661 COSTS_N_INSNS (12), /* DI */
662 COSTS_N_INSNS (12)}, /* other */
663 1, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (40), /* HI */
666 COSTS_N_INSNS (40), /* SI */
667 COSTS_N_INSNS (40), /* DI */
668 COSTS_N_INSNS (40)}, /* other */
669 COSTS_N_INSNS (3), /* cost of movsx */
670 COSTS_N_INSNS (2), /* cost of movzx */
671 15, /* "large" insn */
673 4, /* cost for loading QImode using movzbl */
674 {2, 4, 2}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {2, 4, 2}, /* cost of storing integer registers */
678 2, /* cost of reg,reg fld/fst */
679 {8, 8, 8}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {8, 8, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {4, 8}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 8}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 8, 16}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 8, 16}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 3, /* MMX or SSE register to integer */
694 4, /* size of l1 cache. 486 has 8kB cache
695 shared for code and data, so 4kB is
696 not really precise. */
697 4, /* size of l2 cache */
698 0, /* size of prefetch block */
699 0, /* number of parallel prefetches */
701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (3), /* cost of FABS instruction. */
705 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
707 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
708 DUMMY_STRINGOP_ALGS
},
709 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
710 DUMMY_STRINGOP_ALGS
},
711 1, /* scalar_stmt_cost. */
712 1, /* scalar load_cost. */
713 1, /* scalar_store_cost. */
714 1, /* vec_stmt_cost. */
715 1, /* vec_to_scalar_cost. */
716 1, /* scalar_to_vec_cost. */
717 1, /* vec_align_load_cost. */
718 2, /* vec_unalign_load_cost. */
719 1, /* vec_store_cost. */
720 3, /* cond_taken_branch_cost. */
721 1, /* cond_not_taken_branch_cost. */
725 struct processor_costs pentium_cost
= {
726 COSTS_N_INSNS (1), /* cost of an add instruction */
727 COSTS_N_INSNS (1), /* cost of a lea instruction */
728 COSTS_N_INSNS (4), /* variable shift costs */
729 COSTS_N_INSNS (1), /* constant shift costs */
730 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
731 COSTS_N_INSNS (11), /* HI */
732 COSTS_N_INSNS (11), /* SI */
733 COSTS_N_INSNS (11), /* DI */
734 COSTS_N_INSNS (11)}, /* other */
735 0, /* cost of multiply per each bit set */
736 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
737 COSTS_N_INSNS (25), /* HI */
738 COSTS_N_INSNS (25), /* SI */
739 COSTS_N_INSNS (25), /* DI */
740 COSTS_N_INSNS (25)}, /* other */
741 COSTS_N_INSNS (3), /* cost of movsx */
742 COSTS_N_INSNS (2), /* cost of movzx */
743 8, /* "large" insn */
745 6, /* cost for loading QImode using movzbl */
746 {2, 4, 2}, /* cost of loading integer registers
747 in QImode, HImode and SImode.
748 Relative to reg-reg move (2). */
749 {2, 4, 2}, /* cost of storing integer registers */
750 2, /* cost of reg,reg fld/fst */
751 {2, 2, 6}, /* cost of loading fp registers
752 in SFmode, DFmode and XFmode */
753 {4, 4, 6}, /* cost of storing fp registers
754 in SFmode, DFmode and XFmode */
755 8, /* cost of moving MMX register */
756 {8, 8}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {8, 8}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {4, 8, 16}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {4, 8, 16}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 3, /* MMX or SSE register to integer */
766 8, /* size of l1 cache. */
767 8, /* size of l2 cache */
768 0, /* size of prefetch block */
769 0, /* number of parallel prefetches */
771 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
772 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
773 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
774 COSTS_N_INSNS (1), /* cost of FABS instruction. */
775 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
776 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
777 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
778 DUMMY_STRINGOP_ALGS
},
779 {{libcall
, {{-1, rep_prefix_4_byte
}}},
780 DUMMY_STRINGOP_ALGS
},
781 1, /* scalar_stmt_cost. */
782 1, /* scalar load_cost. */
783 1, /* scalar_store_cost. */
784 1, /* vec_stmt_cost. */
785 1, /* vec_to_scalar_cost. */
786 1, /* scalar_to_vec_cost. */
787 1, /* vec_align_load_cost. */
788 2, /* vec_unalign_load_cost. */
789 1, /* vec_store_cost. */
790 3, /* cond_taken_branch_cost. */
791 1, /* cond_not_taken_branch_cost. */
795 struct processor_costs pentiumpro_cost
= {
796 COSTS_N_INSNS (1), /* cost of an add instruction */
797 COSTS_N_INSNS (1), /* cost of a lea instruction */
798 COSTS_N_INSNS (1), /* variable shift costs */
799 COSTS_N_INSNS (1), /* constant shift costs */
800 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
801 COSTS_N_INSNS (4), /* HI */
802 COSTS_N_INSNS (4), /* SI */
803 COSTS_N_INSNS (4), /* DI */
804 COSTS_N_INSNS (4)}, /* other */
805 0, /* cost of multiply per each bit set */
806 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
807 COSTS_N_INSNS (17), /* HI */
808 COSTS_N_INSNS (17), /* SI */
809 COSTS_N_INSNS (17), /* DI */
810 COSTS_N_INSNS (17)}, /* other */
811 COSTS_N_INSNS (1), /* cost of movsx */
812 COSTS_N_INSNS (1), /* cost of movzx */
813 8, /* "large" insn */
815 2, /* cost for loading QImode using movzbl */
816 {4, 4, 4}, /* cost of loading integer registers
817 in QImode, HImode and SImode.
818 Relative to reg-reg move (2). */
819 {2, 2, 2}, /* cost of storing integer registers */
820 2, /* cost of reg,reg fld/fst */
821 {2, 2, 6}, /* cost of loading fp registers
822 in SFmode, DFmode and XFmode */
823 {4, 4, 6}, /* cost of storing fp registers
824 in SFmode, DFmode and XFmode */
825 2, /* cost of moving MMX register */
826 {2, 2}, /* cost of loading MMX registers
827 in SImode and DImode */
828 {2, 2}, /* cost of storing MMX registers
829 in SImode and DImode */
830 2, /* cost of moving SSE register */
831 {2, 2, 8}, /* cost of loading SSE registers
832 in SImode, DImode and TImode */
833 {2, 2, 8}, /* cost of storing SSE registers
834 in SImode, DImode and TImode */
835 3, /* MMX or SSE register to integer */
836 8, /* size of l1 cache. */
837 256, /* size of l2 cache */
838 32, /* size of prefetch block */
839 6, /* number of parallel prefetches */
841 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
842 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
843 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
844 COSTS_N_INSNS (2), /* cost of FABS instruction. */
845 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
846 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
847 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
848 (we ensure the alignment). For small blocks inline loop is still a
849 noticeable win, for bigger blocks either rep movsl or rep movsb is
850 way to go. Rep movsb has apparently more expensive startup time in CPU,
851 but after 4K the difference is down in the noise. */
852 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
853 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
854 DUMMY_STRINGOP_ALGS
},
855 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
856 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
857 DUMMY_STRINGOP_ALGS
},
858 1, /* scalar_stmt_cost. */
859 1, /* scalar load_cost. */
860 1, /* scalar_store_cost. */
861 1, /* vec_stmt_cost. */
862 1, /* vec_to_scalar_cost. */
863 1, /* scalar_to_vec_cost. */
864 1, /* vec_align_load_cost. */
865 2, /* vec_unalign_load_cost. */
866 1, /* vec_store_cost. */
867 3, /* cond_taken_branch_cost. */
868 1, /* cond_not_taken_branch_cost. */
872 struct processor_costs geode_cost
= {
873 COSTS_N_INSNS (1), /* cost of an add instruction */
874 COSTS_N_INSNS (1), /* cost of a lea instruction */
875 COSTS_N_INSNS (2), /* variable shift costs */
876 COSTS_N_INSNS (1), /* constant shift costs */
877 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
878 COSTS_N_INSNS (4), /* HI */
879 COSTS_N_INSNS (7), /* SI */
880 COSTS_N_INSNS (7), /* DI */
881 COSTS_N_INSNS (7)}, /* other */
882 0, /* cost of multiply per each bit set */
883 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
884 COSTS_N_INSNS (23), /* HI */
885 COSTS_N_INSNS (39), /* SI */
886 COSTS_N_INSNS (39), /* DI */
887 COSTS_N_INSNS (39)}, /* other */
888 COSTS_N_INSNS (1), /* cost of movsx */
889 COSTS_N_INSNS (1), /* cost of movzx */
890 8, /* "large" insn */
892 1, /* cost for loading QImode using movzbl */
893 {1, 1, 1}, /* cost of loading integer registers
894 in QImode, HImode and SImode.
895 Relative to reg-reg move (2). */
896 {1, 1, 1}, /* cost of storing integer registers */
897 1, /* cost of reg,reg fld/fst */
898 {1, 1, 1}, /* cost of loading fp registers
899 in SFmode, DFmode and XFmode */
900 {4, 6, 6}, /* cost of storing fp registers
901 in SFmode, DFmode and XFmode */
903 1, /* cost of moving MMX register */
904 {1, 1}, /* cost of loading MMX registers
905 in SImode and DImode */
906 {1, 1}, /* cost of storing MMX registers
907 in SImode and DImode */
908 1, /* cost of moving SSE register */
909 {1, 1, 1}, /* cost of loading SSE registers
910 in SImode, DImode and TImode */
911 {1, 1, 1}, /* cost of storing SSE registers
912 in SImode, DImode and TImode */
913 1, /* MMX or SSE register to integer */
914 64, /* size of l1 cache. */
915 128, /* size of l2 cache. */
916 32, /* size of prefetch block */
917 1, /* number of parallel prefetches */
919 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
920 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
921 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
922 COSTS_N_INSNS (1), /* cost of FABS instruction. */
923 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
924 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
925 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
926 DUMMY_STRINGOP_ALGS
},
927 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
928 DUMMY_STRINGOP_ALGS
},
929 1, /* scalar_stmt_cost. */
930 1, /* scalar load_cost. */
931 1, /* scalar_store_cost. */
932 1, /* vec_stmt_cost. */
933 1, /* vec_to_scalar_cost. */
934 1, /* scalar_to_vec_cost. */
935 1, /* vec_align_load_cost. */
936 2, /* vec_unalign_load_cost. */
937 1, /* vec_store_cost. */
938 3, /* cond_taken_branch_cost. */
939 1, /* cond_not_taken_branch_cost. */
943 struct processor_costs k6_cost
= {
944 COSTS_N_INSNS (1), /* cost of an add instruction */
945 COSTS_N_INSNS (2), /* cost of a lea instruction */
946 COSTS_N_INSNS (1), /* variable shift costs */
947 COSTS_N_INSNS (1), /* constant shift costs */
948 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
949 COSTS_N_INSNS (3), /* HI */
950 COSTS_N_INSNS (3), /* SI */
951 COSTS_N_INSNS (3), /* DI */
952 COSTS_N_INSNS (3)}, /* other */
953 0, /* cost of multiply per each bit set */
954 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
955 COSTS_N_INSNS (18), /* HI */
956 COSTS_N_INSNS (18), /* SI */
957 COSTS_N_INSNS (18), /* DI */
958 COSTS_N_INSNS (18)}, /* other */
959 COSTS_N_INSNS (2), /* cost of movsx */
960 COSTS_N_INSNS (2), /* cost of movzx */
961 8, /* "large" insn */
963 3, /* cost for loading QImode using movzbl */
964 {4, 5, 4}, /* cost of loading integer registers
965 in QImode, HImode and SImode.
966 Relative to reg-reg move (2). */
967 {2, 3, 2}, /* cost of storing integer registers */
968 4, /* cost of reg,reg fld/fst */
969 {6, 6, 6}, /* cost of loading fp registers
970 in SFmode, DFmode and XFmode */
971 {4, 4, 4}, /* cost of storing fp registers
972 in SFmode, DFmode and XFmode */
973 2, /* cost of moving MMX register */
974 {2, 2}, /* cost of loading MMX registers
975 in SImode and DImode */
976 {2, 2}, /* cost of storing MMX registers
977 in SImode and DImode */
978 2, /* cost of moving SSE register */
979 {2, 2, 8}, /* cost of loading SSE registers
980 in SImode, DImode and TImode */
981 {2, 2, 8}, /* cost of storing SSE registers
982 in SImode, DImode and TImode */
983 6, /* MMX or SSE register to integer */
984 32, /* size of l1 cache. */
985 32, /* size of l2 cache. Some models
986 have integrated l2 cache, but
987 optimizing for k6 is not important
988 enough to worry about that. */
989 32, /* size of prefetch block */
990 1, /* number of parallel prefetches */
992 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
993 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
994 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
995 COSTS_N_INSNS (2), /* cost of FABS instruction. */
996 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
997 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
998 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
999 DUMMY_STRINGOP_ALGS
},
1000 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
1001 DUMMY_STRINGOP_ALGS
},
1002 1, /* scalar_stmt_cost. */
1003 1, /* scalar load_cost. */
1004 1, /* scalar_store_cost. */
1005 1, /* vec_stmt_cost. */
1006 1, /* vec_to_scalar_cost. */
1007 1, /* scalar_to_vec_cost. */
1008 1, /* vec_align_load_cost. */
1009 2, /* vec_unalign_load_cost. */
1010 1, /* vec_store_cost. */
1011 3, /* cond_taken_branch_cost. */
1012 1, /* cond_not_taken_branch_cost. */
1016 struct processor_costs athlon_cost
= {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (2), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (5), /* HI */
1023 COSTS_N_INSNS (5), /* SI */
1024 COSTS_N_INSNS (5), /* DI */
1025 COSTS_N_INSNS (5)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (26), /* HI */
1029 COSTS_N_INSNS (42), /* SI */
1030 COSTS_N_INSNS (74), /* DI */
1031 COSTS_N_INSNS (74)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1036 4, /* cost for loading QImode using movzbl */
1037 {3, 4, 3}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {3, 4, 3}, /* cost of storing integer registers */
1041 4, /* cost of reg,reg fld/fst */
1042 {4, 4, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {6, 6, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 6}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 5}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 5, /* MMX or SSE register to integer */
1057 64, /* size of l1 cache. */
1058 256, /* size of l2 cache. */
1059 64, /* size of prefetch block */
1060 6, /* number of parallel prefetches */
1061 5, /* Branch cost */
1062 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1063 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1064 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1065 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1066 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1067 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1068 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1069 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1070 128 bytes for memset. */
1071 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1072 DUMMY_STRINGOP_ALGS
},
1073 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1074 DUMMY_STRINGOP_ALGS
},
1075 1, /* scalar_stmt_cost. */
1076 1, /* scalar load_cost. */
1077 1, /* scalar_store_cost. */
1078 1, /* vec_stmt_cost. */
1079 1, /* vec_to_scalar_cost. */
1080 1, /* scalar_to_vec_cost. */
1081 1, /* vec_align_load_cost. */
1082 2, /* vec_unalign_load_cost. */
1083 1, /* vec_store_cost. */
1084 3, /* cond_taken_branch_cost. */
1085 1, /* cond_not_taken_branch_cost. */
1089 struct processor_costs k8_cost
= {
1090 COSTS_N_INSNS (1), /* cost of an add instruction */
1091 COSTS_N_INSNS (2), /* cost of a lea instruction */
1092 COSTS_N_INSNS (1), /* variable shift costs */
1093 COSTS_N_INSNS (1), /* constant shift costs */
1094 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1095 COSTS_N_INSNS (4), /* HI */
1096 COSTS_N_INSNS (3), /* SI */
1097 COSTS_N_INSNS (4), /* DI */
1098 COSTS_N_INSNS (5)}, /* other */
1099 0, /* cost of multiply per each bit set */
1100 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1101 COSTS_N_INSNS (26), /* HI */
1102 COSTS_N_INSNS (42), /* SI */
1103 COSTS_N_INSNS (74), /* DI */
1104 COSTS_N_INSNS (74)}, /* other */
1105 COSTS_N_INSNS (1), /* cost of movsx */
1106 COSTS_N_INSNS (1), /* cost of movzx */
1107 8, /* "large" insn */
1109 4, /* cost for loading QImode using movzbl */
1110 {3, 4, 3}, /* cost of loading integer registers
1111 in QImode, HImode and SImode.
1112 Relative to reg-reg move (2). */
1113 {3, 4, 3}, /* cost of storing integer registers */
1114 4, /* cost of reg,reg fld/fst */
1115 {4, 4, 12}, /* cost of loading fp registers
1116 in SFmode, DFmode and XFmode */
1117 {6, 6, 8}, /* cost of storing fp registers
1118 in SFmode, DFmode and XFmode */
1119 2, /* cost of moving MMX register */
1120 {3, 3}, /* cost of loading MMX registers
1121 in SImode and DImode */
1122 {4, 4}, /* cost of storing MMX registers
1123 in SImode and DImode */
1124 2, /* cost of moving SSE register */
1125 {4, 3, 6}, /* cost of loading SSE registers
1126 in SImode, DImode and TImode */
1127 {4, 4, 5}, /* cost of storing SSE registers
1128 in SImode, DImode and TImode */
1129 5, /* MMX or SSE register to integer */
1130 64, /* size of l1 cache. */
1131 512, /* size of l2 cache. */
1132 64, /* size of prefetch block */
1133 /* New AMD processors never drop prefetches; if they cannot be performed
1134 immediately, they are queued. We set number of simultaneous prefetches
1135 to a large constant to reflect this (it probably is not a good idea not
1136 to limit number of prefetches at all, as their execution also takes some
1138 100, /* number of parallel prefetches */
1139 3, /* Branch cost */
1140 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1141 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1142 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1143 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1144 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1145 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1146 /* K8 has optimized REP instruction for medium sized blocks, but for very
1147 small blocks it is better to use loop. For large blocks, libcall can
1148 do nontemporary accesses and beat inline considerably. */
1149 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1150 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1151 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1152 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1153 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1154 4, /* scalar_stmt_cost. */
1155 2, /* scalar load_cost. */
1156 2, /* scalar_store_cost. */
1157 5, /* vec_stmt_cost. */
1158 0, /* vec_to_scalar_cost. */
1159 2, /* scalar_to_vec_cost. */
1160 2, /* vec_align_load_cost. */
1161 3, /* vec_unalign_load_cost. */
1162 3, /* vec_store_cost. */
1163 3, /* cond_taken_branch_cost. */
1164 2, /* cond_not_taken_branch_cost. */
1167 struct processor_costs amdfam10_cost
= {
1168 COSTS_N_INSNS (1), /* cost of an add instruction */
1169 COSTS_N_INSNS (2), /* cost of a lea instruction */
1170 COSTS_N_INSNS (1), /* variable shift costs */
1171 COSTS_N_INSNS (1), /* constant shift costs */
1172 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1173 COSTS_N_INSNS (4), /* HI */
1174 COSTS_N_INSNS (3), /* SI */
1175 COSTS_N_INSNS (4), /* DI */
1176 COSTS_N_INSNS (5)}, /* other */
1177 0, /* cost of multiply per each bit set */
1178 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1179 COSTS_N_INSNS (35), /* HI */
1180 COSTS_N_INSNS (51), /* SI */
1181 COSTS_N_INSNS (83), /* DI */
1182 COSTS_N_INSNS (83)}, /* other */
1183 COSTS_N_INSNS (1), /* cost of movsx */
1184 COSTS_N_INSNS (1), /* cost of movzx */
1185 8, /* "large" insn */
1187 4, /* cost for loading QImode using movzbl */
1188 {3, 4, 3}, /* cost of loading integer registers
1189 in QImode, HImode and SImode.
1190 Relative to reg-reg move (2). */
1191 {3, 4, 3}, /* cost of storing integer registers */
1192 4, /* cost of reg,reg fld/fst */
1193 {4, 4, 12}, /* cost of loading fp registers
1194 in SFmode, DFmode and XFmode */
1195 {6, 6, 8}, /* cost of storing fp registers
1196 in SFmode, DFmode and XFmode */
1197 2, /* cost of moving MMX register */
1198 {3, 3}, /* cost of loading MMX registers
1199 in SImode and DImode */
1200 {4, 4}, /* cost of storing MMX registers
1201 in SImode and DImode */
1202 2, /* cost of moving SSE register */
1203 {4, 4, 3}, /* cost of loading SSE registers
1204 in SImode, DImode and TImode */
1205 {4, 4, 5}, /* cost of storing SSE registers
1206 in SImode, DImode and TImode */
1207 3, /* MMX or SSE register to integer */
1209 MOVD reg64, xmmreg Double FSTORE 4
1210 MOVD reg32, xmmreg Double FSTORE 4
1212 MOVD reg64, xmmreg Double FADD 3
1214 MOVD reg32, xmmreg Double FADD 3
1216 64, /* size of l1 cache. */
1217 512, /* size of l2 cache. */
1218 64, /* size of prefetch block */
1219 /* New AMD processors never drop prefetches; if they cannot be performed
1220 immediately, they are queued. We set number of simultaneous prefetches
1221 to a large constant to reflect this (it probably is not a good idea not
1222 to limit number of prefetches at all, as their execution also takes some
1224 100, /* number of parallel prefetches */
1225 2, /* Branch cost */
1226 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1227 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1228 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1229 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1230 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1231 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1233 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1234 very small blocks it is better to use loop. For large blocks, libcall can
1235 do nontemporary accesses and beat inline considerably. */
1236 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1237 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1238 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1239 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1240 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1241 4, /* scalar_stmt_cost. */
1242 2, /* scalar load_cost. */
1243 2, /* scalar_store_cost. */
1244 6, /* vec_stmt_cost. */
1245 0, /* vec_to_scalar_cost. */
1246 2, /* scalar_to_vec_cost. */
1247 2, /* vec_align_load_cost. */
1248 2, /* vec_unalign_load_cost. */
1249 2, /* vec_store_cost. */
1250 2, /* cond_taken_branch_cost. */
1251 1, /* cond_not_taken_branch_cost. */
1254 struct processor_costs bdver1_cost
= {
1255 COSTS_N_INSNS (1), /* cost of an add instruction */
1256 COSTS_N_INSNS (1), /* cost of a lea instruction */
1257 COSTS_N_INSNS (1), /* variable shift costs */
1258 COSTS_N_INSNS (1), /* constant shift costs */
1259 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1260 COSTS_N_INSNS (4), /* HI */
1261 COSTS_N_INSNS (4), /* SI */
1262 COSTS_N_INSNS (6), /* DI */
1263 COSTS_N_INSNS (6)}, /* other */
1264 0, /* cost of multiply per each bit set */
1265 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1266 COSTS_N_INSNS (35), /* HI */
1267 COSTS_N_INSNS (51), /* SI */
1268 COSTS_N_INSNS (83), /* DI */
1269 COSTS_N_INSNS (83)}, /* other */
1270 COSTS_N_INSNS (1), /* cost of movsx */
1271 COSTS_N_INSNS (1), /* cost of movzx */
1272 8, /* "large" insn */
1274 4, /* cost for loading QImode using movzbl */
1275 {5, 5, 4}, /* cost of loading integer registers
1276 in QImode, HImode and SImode.
1277 Relative to reg-reg move (2). */
1278 {4, 4, 4}, /* cost of storing integer registers */
1279 2, /* cost of reg,reg fld/fst */
1280 {5, 5, 12}, /* cost of loading fp registers
1281 in SFmode, DFmode and XFmode */
1282 {4, 4, 8}, /* cost of storing fp registers
1283 in SFmode, DFmode and XFmode */
1284 2, /* cost of moving MMX register */
1285 {4, 4}, /* cost of loading MMX registers
1286 in SImode and DImode */
1287 {4, 4}, /* cost of storing MMX registers
1288 in SImode and DImode */
1289 2, /* cost of moving SSE register */
1290 {4, 4, 4}, /* cost of loading SSE registers
1291 in SImode, DImode and TImode */
1292 {4, 4, 4}, /* cost of storing SSE registers
1293 in SImode, DImode and TImode */
1294 2, /* MMX or SSE register to integer */
1296 MOVD reg64, xmmreg Double FSTORE 4
1297 MOVD reg32, xmmreg Double FSTORE 4
1299 MOVD reg64, xmmreg Double FADD 3
1301 MOVD reg32, xmmreg Double FADD 3
1303 16, /* size of l1 cache. */
1304 2048, /* size of l2 cache. */
1305 64, /* size of prefetch block */
1306 /* New AMD processors never drop prefetches; if they cannot be performed
1307 immediately, they are queued. We set number of simultaneous prefetches
1308 to a large constant to reflect this (it probably is not a good idea not
1309 to limit number of prefetches at all, as their execution also takes some
1311 100, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1320 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1321 very small blocks it is better to use loop. For large blocks, libcall
1322 can do nontemporary accesses and beat inline considerably. */
1323 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1324 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1325 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1326 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1327 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1328 6, /* scalar_stmt_cost. */
1329 4, /* scalar load_cost. */
1330 4, /* scalar_store_cost. */
1331 6, /* vec_stmt_cost. */
1332 0, /* vec_to_scalar_cost. */
1333 2, /* scalar_to_vec_cost. */
1334 4, /* vec_align_load_cost. */
1335 4, /* vec_unalign_load_cost. */
1336 4, /* vec_store_cost. */
1337 2, /* cond_taken_branch_cost. */
1338 1, /* cond_not_taken_branch_cost. */
1341 struct processor_costs bdver2_cost
= {
1342 COSTS_N_INSNS (1), /* cost of an add instruction */
1343 COSTS_N_INSNS (1), /* cost of a lea instruction */
1344 COSTS_N_INSNS (1), /* variable shift costs */
1345 COSTS_N_INSNS (1), /* constant shift costs */
1346 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1347 COSTS_N_INSNS (4), /* HI */
1348 COSTS_N_INSNS (4), /* SI */
1349 COSTS_N_INSNS (6), /* DI */
1350 COSTS_N_INSNS (6)}, /* other */
1351 0, /* cost of multiply per each bit set */
1352 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1353 COSTS_N_INSNS (35), /* HI */
1354 COSTS_N_INSNS (51), /* SI */
1355 COSTS_N_INSNS (83), /* DI */
1356 COSTS_N_INSNS (83)}, /* other */
1357 COSTS_N_INSNS (1), /* cost of movsx */
1358 COSTS_N_INSNS (1), /* cost of movzx */
1359 8, /* "large" insn */
1361 4, /* cost for loading QImode using movzbl */
1362 {5, 5, 4}, /* cost of loading integer registers
1363 in QImode, HImode and SImode.
1364 Relative to reg-reg move (2). */
1365 {4, 4, 4}, /* cost of storing integer registers */
1366 2, /* cost of reg,reg fld/fst */
1367 {5, 5, 12}, /* cost of loading fp registers
1368 in SFmode, DFmode and XFmode */
1369 {4, 4, 8}, /* cost of storing fp registers
1370 in SFmode, DFmode and XFmode */
1371 2, /* cost of moving MMX register */
1372 {4, 4}, /* cost of loading MMX registers
1373 in SImode and DImode */
1374 {4, 4}, /* cost of storing MMX registers
1375 in SImode and DImode */
1376 2, /* cost of moving SSE register */
1377 {4, 4, 4}, /* cost of loading SSE registers
1378 in SImode, DImode and TImode */
1379 {4, 4, 4}, /* cost of storing SSE registers
1380 in SImode, DImode and TImode */
1381 2, /* MMX or SSE register to integer */
1383 MOVD reg64, xmmreg Double FSTORE 4
1384 MOVD reg32, xmmreg Double FSTORE 4
1386 MOVD reg64, xmmreg Double FADD 3
1388 MOVD reg32, xmmreg Double FADD 3
1390 16, /* size of l1 cache. */
1391 2048, /* size of l2 cache. */
1392 64, /* size of prefetch block */
1393 /* New AMD processors never drop prefetches; if they cannot be performed
1394 immediately, they are queued. We set number of simultaneous prefetches
1395 to a large constant to reflect this (it probably is not a good idea not
1396 to limit number of prefetches at all, as their execution also takes some
1398 100, /* number of parallel prefetches */
1399 2, /* Branch cost */
1400 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1401 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1402 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1403 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1404 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1405 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1407 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1408 very small blocks it is better to use loop. For large blocks, libcall
1409 can do nontemporary accesses and beat inline considerably. */
1410 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1411 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1412 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1413 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1414 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1415 6, /* scalar_stmt_cost. */
1416 4, /* scalar load_cost. */
1417 4, /* scalar_store_cost. */
1418 6, /* vec_stmt_cost. */
1419 0, /* vec_to_scalar_cost. */
1420 2, /* scalar_to_vec_cost. */
1421 4, /* vec_align_load_cost. */
1422 4, /* vec_unalign_load_cost. */
1423 4, /* vec_store_cost. */
1424 2, /* cond_taken_branch_cost. */
1425 1, /* cond_not_taken_branch_cost. */
1428 struct processor_costs btver1_cost
= {
1429 COSTS_N_INSNS (1), /* cost of an add instruction */
1430 COSTS_N_INSNS (2), /* cost of a lea instruction */
1431 COSTS_N_INSNS (1), /* variable shift costs */
1432 COSTS_N_INSNS (1), /* constant shift costs */
1433 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1434 COSTS_N_INSNS (4), /* HI */
1435 COSTS_N_INSNS (3), /* SI */
1436 COSTS_N_INSNS (4), /* DI */
1437 COSTS_N_INSNS (5)}, /* other */
1438 0, /* cost of multiply per each bit set */
1439 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1440 COSTS_N_INSNS (35), /* HI */
1441 COSTS_N_INSNS (51), /* SI */
1442 COSTS_N_INSNS (83), /* DI */
1443 COSTS_N_INSNS (83)}, /* other */
1444 COSTS_N_INSNS (1), /* cost of movsx */
1445 COSTS_N_INSNS (1), /* cost of movzx */
1446 8, /* "large" insn */
1448 4, /* cost for loading QImode using movzbl */
1449 {3, 4, 3}, /* cost of loading integer registers
1450 in QImode, HImode and SImode.
1451 Relative to reg-reg move (2). */
1452 {3, 4, 3}, /* cost of storing integer registers */
1453 4, /* cost of reg,reg fld/fst */
1454 {4, 4, 12}, /* cost of loading fp registers
1455 in SFmode, DFmode and XFmode */
1456 {6, 6, 8}, /* cost of storing fp registers
1457 in SFmode, DFmode and XFmode */
1458 2, /* cost of moving MMX register */
1459 {3, 3}, /* cost of loading MMX registers
1460 in SImode and DImode */
1461 {4, 4}, /* cost of storing MMX registers
1462 in SImode and DImode */
1463 2, /* cost of moving SSE register */
1464 {4, 4, 3}, /* cost of loading SSE registers
1465 in SImode, DImode and TImode */
1466 {4, 4, 5}, /* cost of storing SSE registers
1467 in SImode, DImode and TImode */
1468 3, /* MMX or SSE register to integer */
1470 MOVD reg64, xmmreg Double FSTORE 4
1471 MOVD reg32, xmmreg Double FSTORE 4
1473 MOVD reg64, xmmreg Double FADD 3
1475 MOVD reg32, xmmreg Double FADD 3
1477 32, /* size of l1 cache. */
1478 512, /* size of l2 cache. */
1479 64, /* size of prefetch block */
1480 100, /* number of parallel prefetches */
1481 2, /* Branch cost */
1482 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1483 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1484 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1485 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1486 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1487 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1489 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1490 very small blocks it is better to use loop. For large blocks, libcall can
1491 do nontemporary accesses and beat inline considerably. */
1492 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1493 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1494 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1495 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1496 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1497 4, /* scalar_stmt_cost. */
1498 2, /* scalar load_cost. */
1499 2, /* scalar_store_cost. */
1500 6, /* vec_stmt_cost. */
1501 0, /* vec_to_scalar_cost. */
1502 2, /* scalar_to_vec_cost. */
1503 2, /* vec_align_load_cost. */
1504 2, /* vec_unalign_load_cost. */
1505 2, /* vec_store_cost. */
1506 2, /* cond_taken_branch_cost. */
1507 1, /* cond_not_taken_branch_cost. */
1511 struct processor_costs pentium4_cost
= {
1512 COSTS_N_INSNS (1), /* cost of an add instruction */
1513 COSTS_N_INSNS (3), /* cost of a lea instruction */
1514 COSTS_N_INSNS (4), /* variable shift costs */
1515 COSTS_N_INSNS (4), /* constant shift costs */
1516 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1517 COSTS_N_INSNS (15), /* HI */
1518 COSTS_N_INSNS (15), /* SI */
1519 COSTS_N_INSNS (15), /* DI */
1520 COSTS_N_INSNS (15)}, /* other */
1521 0, /* cost of multiply per each bit set */
1522 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1523 COSTS_N_INSNS (56), /* HI */
1524 COSTS_N_INSNS (56), /* SI */
1525 COSTS_N_INSNS (56), /* DI */
1526 COSTS_N_INSNS (56)}, /* other */
1527 COSTS_N_INSNS (1), /* cost of movsx */
1528 COSTS_N_INSNS (1), /* cost of movzx */
1529 16, /* "large" insn */
1531 2, /* cost for loading QImode using movzbl */
1532 {4, 5, 4}, /* cost of loading integer registers
1533 in QImode, HImode and SImode.
1534 Relative to reg-reg move (2). */
1535 {2, 3, 2}, /* cost of storing integer registers */
1536 2, /* cost of reg,reg fld/fst */
1537 {2, 2, 6}, /* cost of loading fp registers
1538 in SFmode, DFmode and XFmode */
1539 {4, 4, 6}, /* cost of storing fp registers
1540 in SFmode, DFmode and XFmode */
1541 2, /* cost of moving MMX register */
1542 {2, 2}, /* cost of loading MMX registers
1543 in SImode and DImode */
1544 {2, 2}, /* cost of storing MMX registers
1545 in SImode and DImode */
1546 12, /* cost of moving SSE register */
1547 {12, 12, 12}, /* cost of loading SSE registers
1548 in SImode, DImode and TImode */
1549 {2, 2, 8}, /* cost of storing SSE registers
1550 in SImode, DImode and TImode */
1551 10, /* MMX or SSE register to integer */
1552 8, /* size of l1 cache. */
1553 256, /* size of l2 cache. */
1554 64, /* size of prefetch block */
1555 6, /* number of parallel prefetches */
1556 2, /* Branch cost */
1557 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1558 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1559 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1560 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1561 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1562 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1563 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1564 DUMMY_STRINGOP_ALGS
},
1565 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1567 DUMMY_STRINGOP_ALGS
},
1568 1, /* scalar_stmt_cost. */
1569 1, /* scalar load_cost. */
1570 1, /* scalar_store_cost. */
1571 1, /* vec_stmt_cost. */
1572 1, /* vec_to_scalar_cost. */
1573 1, /* scalar_to_vec_cost. */
1574 1, /* vec_align_load_cost. */
1575 2, /* vec_unalign_load_cost. */
1576 1, /* vec_store_cost. */
1577 3, /* cond_taken_branch_cost. */
1578 1, /* cond_not_taken_branch_cost. */
1582 struct processor_costs nocona_cost
= {
1583 COSTS_N_INSNS (1), /* cost of an add instruction */
1584 COSTS_N_INSNS (1), /* cost of a lea instruction */
1585 COSTS_N_INSNS (1), /* variable shift costs */
1586 COSTS_N_INSNS (1), /* constant shift costs */
1587 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1588 COSTS_N_INSNS (10), /* HI */
1589 COSTS_N_INSNS (10), /* SI */
1590 COSTS_N_INSNS (10), /* DI */
1591 COSTS_N_INSNS (10)}, /* other */
1592 0, /* cost of multiply per each bit set */
1593 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1594 COSTS_N_INSNS (66), /* HI */
1595 COSTS_N_INSNS (66), /* SI */
1596 COSTS_N_INSNS (66), /* DI */
1597 COSTS_N_INSNS (66)}, /* other */
1598 COSTS_N_INSNS (1), /* cost of movsx */
1599 COSTS_N_INSNS (1), /* cost of movzx */
1600 16, /* "large" insn */
1601 17, /* MOVE_RATIO */
1602 4, /* cost for loading QImode using movzbl */
1603 {4, 4, 4}, /* cost of loading integer registers
1604 in QImode, HImode and SImode.
1605 Relative to reg-reg move (2). */
1606 {4, 4, 4}, /* cost of storing integer registers */
1607 3, /* cost of reg,reg fld/fst */
1608 {12, 12, 12}, /* cost of loading fp registers
1609 in SFmode, DFmode and XFmode */
1610 {4, 4, 4}, /* cost of storing fp registers
1611 in SFmode, DFmode and XFmode */
1612 6, /* cost of moving MMX register */
1613 {12, 12}, /* cost of loading MMX registers
1614 in SImode and DImode */
1615 {12, 12}, /* cost of storing MMX registers
1616 in SImode and DImode */
1617 6, /* cost of moving SSE register */
1618 {12, 12, 12}, /* cost of loading SSE registers
1619 in SImode, DImode and TImode */
1620 {12, 12, 12}, /* cost of storing SSE registers
1621 in SImode, DImode and TImode */
1622 8, /* MMX or SSE register to integer */
1623 8, /* size of l1 cache. */
1624 1024, /* size of l2 cache. */
1625 128, /* size of prefetch block */
1626 8, /* number of parallel prefetches */
1627 1, /* Branch cost */
1628 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1629 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1630 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1631 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1632 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1633 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1634 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1635 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
1636 {100000, unrolled_loop
}, {-1, libcall
}}}},
1637 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1639 {libcall
, {{24, loop
}, {64, unrolled_loop
},
1640 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1641 1, /* scalar_stmt_cost. */
1642 1, /* scalar load_cost. */
1643 1, /* scalar_store_cost. */
1644 1, /* vec_stmt_cost. */
1645 1, /* vec_to_scalar_cost. */
1646 1, /* scalar_to_vec_cost. */
1647 1, /* vec_align_load_cost. */
1648 2, /* vec_unalign_load_cost. */
1649 1, /* vec_store_cost. */
1650 3, /* cond_taken_branch_cost. */
1651 1, /* cond_not_taken_branch_cost. */
1655 struct processor_costs atom_cost
= {
1656 COSTS_N_INSNS (1), /* cost of an add instruction */
1657 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1658 COSTS_N_INSNS (1), /* variable shift costs */
1659 COSTS_N_INSNS (1), /* constant shift costs */
1660 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1661 COSTS_N_INSNS (4), /* HI */
1662 COSTS_N_INSNS (3), /* SI */
1663 COSTS_N_INSNS (4), /* DI */
1664 COSTS_N_INSNS (2)}, /* other */
1665 0, /* cost of multiply per each bit set */
1666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1667 COSTS_N_INSNS (26), /* HI */
1668 COSTS_N_INSNS (42), /* SI */
1669 COSTS_N_INSNS (74), /* DI */
1670 COSTS_N_INSNS (74)}, /* other */
1671 COSTS_N_INSNS (1), /* cost of movsx */
1672 COSTS_N_INSNS (1), /* cost of movzx */
1673 8, /* "large" insn */
1674 17, /* MOVE_RATIO */
1675 2, /* cost for loading QImode using movzbl */
1676 {4, 4, 4}, /* cost of loading integer registers
1677 in QImode, HImode and SImode.
1678 Relative to reg-reg move (2). */
1679 {4, 4, 4}, /* cost of storing integer registers */
1680 4, /* cost of reg,reg fld/fst */
1681 {12, 12, 12}, /* cost of loading fp registers
1682 in SFmode, DFmode and XFmode */
1683 {6, 6, 8}, /* cost of storing fp registers
1684 in SFmode, DFmode and XFmode */
1685 2, /* cost of moving MMX register */
1686 {8, 8}, /* cost of loading MMX registers
1687 in SImode and DImode */
1688 {8, 8}, /* cost of storing MMX registers
1689 in SImode and DImode */
1690 2, /* cost of moving SSE register */
1691 {8, 8, 8}, /* cost of loading SSE registers
1692 in SImode, DImode and TImode */
1693 {8, 8, 8}, /* cost of storing SSE registers
1694 in SImode, DImode and TImode */
1695 5, /* MMX or SSE register to integer */
1696 32, /* size of l1 cache. */
1697 256, /* size of l2 cache. */
1698 64, /* size of prefetch block */
1699 6, /* number of parallel prefetches */
1700 3, /* Branch cost */
1701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1702 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1703 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1704 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1705 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1706 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1707 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
1708 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
1709 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1710 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
1711 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1712 {libcall
, {{24, loop
}, {32, unrolled_loop
},
1713 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1714 1, /* scalar_stmt_cost. */
1715 1, /* scalar load_cost. */
1716 1, /* scalar_store_cost. */
1717 1, /* vec_stmt_cost. */
1718 1, /* vec_to_scalar_cost. */
1719 1, /* scalar_to_vec_cost. */
1720 1, /* vec_align_load_cost. */
1721 2, /* vec_unalign_load_cost. */
1722 1, /* vec_store_cost. */
1723 3, /* cond_taken_branch_cost. */
1724 1, /* cond_not_taken_branch_cost. */
1727 /* Generic64 should produce code tuned for Nocona and K8. */
1729 struct processor_costs generic64_cost
= {
1730 COSTS_N_INSNS (1), /* cost of an add instruction */
1731 /* On all chips taken into consideration lea is 2 cycles and more. With
1732 this cost however our current implementation of synth_mult results in
1733 use of unnecessary temporary registers causing regression on several
1734 SPECfp benchmarks. */
1735 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1736 COSTS_N_INSNS (1), /* variable shift costs */
1737 COSTS_N_INSNS (1), /* constant shift costs */
1738 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1739 COSTS_N_INSNS (4), /* HI */
1740 COSTS_N_INSNS (3), /* SI */
1741 COSTS_N_INSNS (4), /* DI */
1742 COSTS_N_INSNS (2)}, /* other */
1743 0, /* cost of multiply per each bit set */
1744 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1745 COSTS_N_INSNS (26), /* HI */
1746 COSTS_N_INSNS (42), /* SI */
1747 COSTS_N_INSNS (74), /* DI */
1748 COSTS_N_INSNS (74)}, /* other */
1749 COSTS_N_INSNS (1), /* cost of movsx */
1750 COSTS_N_INSNS (1), /* cost of movzx */
1751 8, /* "large" insn */
1752 17, /* MOVE_RATIO */
1753 4, /* cost for loading QImode using movzbl */
1754 {4, 4, 4}, /* cost of loading integer registers
1755 in QImode, HImode and SImode.
1756 Relative to reg-reg move (2). */
1757 {4, 4, 4}, /* cost of storing integer registers */
1758 4, /* cost of reg,reg fld/fst */
1759 {12, 12, 12}, /* cost of loading fp registers
1760 in SFmode, DFmode and XFmode */
1761 {6, 6, 8}, /* cost of storing fp registers
1762 in SFmode, DFmode and XFmode */
1763 2, /* cost of moving MMX register */
1764 {8, 8}, /* cost of loading MMX registers
1765 in SImode and DImode */
1766 {8, 8}, /* cost of storing MMX registers
1767 in SImode and DImode */
1768 2, /* cost of moving SSE register */
1769 {8, 8, 8}, /* cost of loading SSE registers
1770 in SImode, DImode and TImode */
1771 {8, 8, 8}, /* cost of storing SSE registers
1772 in SImode, DImode and TImode */
1773 5, /* MMX or SSE register to integer */
1774 32, /* size of l1 cache. */
1775 512, /* size of l2 cache. */
1776 64, /* size of prefetch block */
1777 6, /* number of parallel prefetches */
1778 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1779 value is increased to perhaps more appropriate value of 5. */
1780 3, /* Branch cost */
1781 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1782 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1783 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1784 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1785 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1786 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1787 {DUMMY_STRINGOP_ALGS
,
1788 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1789 {DUMMY_STRINGOP_ALGS
,
1790 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1791 1, /* scalar_stmt_cost. */
1792 1, /* scalar load_cost. */
1793 1, /* scalar_store_cost. */
1794 1, /* vec_stmt_cost. */
1795 1, /* vec_to_scalar_cost. */
1796 1, /* scalar_to_vec_cost. */
1797 1, /* vec_align_load_cost. */
1798 2, /* vec_unalign_load_cost. */
1799 1, /* vec_store_cost. */
1800 3, /* cond_taken_branch_cost. */
1801 1, /* cond_not_taken_branch_cost. */
1804 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1807 struct processor_costs generic32_cost
= {
1808 COSTS_N_INSNS (1), /* cost of an add instruction */
1809 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1810 COSTS_N_INSNS (1), /* variable shift costs */
1811 COSTS_N_INSNS (1), /* constant shift costs */
1812 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1813 COSTS_N_INSNS (4), /* HI */
1814 COSTS_N_INSNS (3), /* SI */
1815 COSTS_N_INSNS (4), /* DI */
1816 COSTS_N_INSNS (2)}, /* other */
1817 0, /* cost of multiply per each bit set */
1818 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1819 COSTS_N_INSNS (26), /* HI */
1820 COSTS_N_INSNS (42), /* SI */
1821 COSTS_N_INSNS (74), /* DI */
1822 COSTS_N_INSNS (74)}, /* other */
1823 COSTS_N_INSNS (1), /* cost of movsx */
1824 COSTS_N_INSNS (1), /* cost of movzx */
1825 8, /* "large" insn */
1826 17, /* MOVE_RATIO */
1827 4, /* cost for loading QImode using movzbl */
1828 {4, 4, 4}, /* cost of loading integer registers
1829 in QImode, HImode and SImode.
1830 Relative to reg-reg move (2). */
1831 {4, 4, 4}, /* cost of storing integer registers */
1832 4, /* cost of reg,reg fld/fst */
1833 {12, 12, 12}, /* cost of loading fp registers
1834 in SFmode, DFmode and XFmode */
1835 {6, 6, 8}, /* cost of storing fp registers
1836 in SFmode, DFmode and XFmode */
1837 2, /* cost of moving MMX register */
1838 {8, 8}, /* cost of loading MMX registers
1839 in SImode and DImode */
1840 {8, 8}, /* cost of storing MMX registers
1841 in SImode and DImode */
1842 2, /* cost of moving SSE register */
1843 {8, 8, 8}, /* cost of loading SSE registers
1844 in SImode, DImode and TImode */
1845 {8, 8, 8}, /* cost of storing SSE registers
1846 in SImode, DImode and TImode */
1847 5, /* MMX or SSE register to integer */
1848 32, /* size of l1 cache. */
1849 256, /* size of l2 cache. */
1850 64, /* size of prefetch block */
1851 6, /* number of parallel prefetches */
1852 3, /* Branch cost */
1853 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1854 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1855 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1856 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1857 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1858 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1859 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1860 DUMMY_STRINGOP_ALGS
},
1861 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1862 DUMMY_STRINGOP_ALGS
},
1863 1, /* scalar_stmt_cost. */
1864 1, /* scalar load_cost. */
1865 1, /* scalar_store_cost. */
1866 1, /* vec_stmt_cost. */
1867 1, /* vec_to_scalar_cost. */
1868 1, /* scalar_to_vec_cost. */
1869 1, /* vec_align_load_cost. */
1870 2, /* vec_unalign_load_cost. */
1871 1, /* vec_store_cost. */
1872 3, /* cond_taken_branch_cost. */
1873 1, /* cond_not_taken_branch_cost. */
1876 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1878 /* Processor feature/optimization bitmasks. */
1879 #define m_386 (1<<PROCESSOR_I386)
1880 #define m_486 (1<<PROCESSOR_I486)
1881 #define m_PENT (1<<PROCESSOR_PENTIUM)
1882 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1883 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1884 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1885 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1886 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1887 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1888 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1889 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1890 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1891 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1892 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1893 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1894 #define m_ATOM (1<<PROCESSOR_ATOM)
1896 #define m_GEODE (1<<PROCESSOR_GEODE)
1897 #define m_K6 (1<<PROCESSOR_K6)
1898 #define m_K6_GEODE (m_K6 | m_GEODE)
1899 #define m_K8 (1<<PROCESSOR_K8)
1900 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1901 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1902 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1903 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1904 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1905 #define m_BDVER (m_BDVER1 | m_BDVER2)
1906 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1907 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1909 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1910 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1912 /* Generic instruction choice should be common subset of supported CPUs
1913 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1914 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1916 /* Feature tests against the various tunings. */
1917 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1919 /* Feature tests against the various tunings used to create ix86_tune_features
1920 based on the processor mask. */
1921 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1922 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1923 negatively, so enabling for Generic64 seems like good code size
1924 tradeoff. We can't enable it for 32bit generic because it does not
1925 work well with PPro base chips. */
1926 m_386
| m_CORE2I7_64
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC64
,
1928 /* X86_TUNE_PUSH_MEMORY */
1929 m_386
| m_P4_NOCONA
| m_CORE2I7
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1931 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1934 /* X86_TUNE_UNROLL_STRLEN */
1935 m_486
| m_PENT
| m_PPRO
| m_ATOM
| m_CORE2I7
| m_K6
| m_AMD_MULTIPLE
| m_GENERIC
,
1937 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1938 on simulation result. But after P4 was made, no performance benefit
1939 was observed with branch hints. It also increases the code size.
1940 As a result, icc never generates branch hints. */
1943 /* X86_TUNE_DOUBLE_WITH_ADD */
1946 /* X86_TUNE_USE_SAHF */
1947 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC
,
1949 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1950 partial dependencies. */
1951 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1953 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1954 register stalls on Generic32 compilation setting as well. However
1955 in current implementation the partial register stalls are not eliminated
1956 very well - they can be introduced via subregs synthesized by combine
1957 and can happen in caller/callee saving sequences. Because this option
1958 pays back little on PPro based chips and is in conflict with partial reg
1959 dependencies used by Athlon/P4 based chips, it is better to leave it off
1960 for generic32 for now. */
1963 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1964 m_CORE2I7
| m_GENERIC
,
1966 /* X86_TUNE_USE_HIMODE_FIOP */
1967 m_386
| m_486
| m_K6_GEODE
,
1969 /* X86_TUNE_USE_SIMODE_FIOP */
1970 ~(m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
),
1972 /* X86_TUNE_USE_MOV0 */
1975 /* X86_TUNE_USE_CLTD */
1976 ~(m_PENT
| m_CORE2I7
| m_ATOM
| m_K6
| m_GENERIC
),
1978 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1981 /* X86_TUNE_SPLIT_LONG_MOVES */
1984 /* X86_TUNE_READ_MODIFY_WRITE */
1987 /* X86_TUNE_READ_MODIFY */
1990 /* X86_TUNE_PROMOTE_QIMODE */
1991 m_386
| m_486
| m_PENT
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1993 /* X86_TUNE_FAST_PREFIX */
1994 ~(m_386
| m_486
| m_PENT
),
1996 /* X86_TUNE_SINGLE_STRINGOP */
1997 m_386
| m_P4_NOCONA
,
1999 /* X86_TUNE_QIMODE_MATH */
2002 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2003 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2004 might be considered for Generic32 if our scheme for avoiding partial
2005 stalls was more effective. */
2008 /* X86_TUNE_PROMOTE_QI_REGS */
2011 /* X86_TUNE_PROMOTE_HI_REGS */
2014 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2015 over esp addition. */
2016 m_386
| m_486
| m_PENT
| m_PPRO
,
2018 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2019 over esp addition. */
2022 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2023 over esp subtraction. */
2024 m_386
| m_486
| m_PENT
| m_K6_GEODE
,
2026 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2027 over esp subtraction. */
2028 m_PENT
| m_K6_GEODE
,
2030 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2031 for DFmode copies */
2032 ~(m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_ATOM
| m_GENERIC
),
2034 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2035 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2037 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2038 conflict here in between PPro/Pentium4 based chips that thread 128bit
2039 SSE registers as single units versus K8 based chips that divide SSE
2040 registers to two 64bit halves. This knob promotes all store destinations
2041 to be 128bit to allow register renaming on 128bit SSE units, but usually
2042 results in one extra microop on 64bit SSE units. Experimental results
2043 shows that disabling this option on P4 brings over 20% SPECfp regression,
2044 while enabling it on K8 brings roughly 2.4% regression that can be partly
2045 masked by careful scheduling of moves. */
2046 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMDFAM10
| m_BDVER
| m_GENERIC
,
2048 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2049 m_COREI7
| m_AMDFAM10
| m_BDVER
| m_BTVER1
,
2051 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2054 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2057 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2058 are resolved on SSE register parts instead of whole registers, so we may
2059 maintain just lower part of scalar values in proper format leaving the
2060 upper part undefined. */
2063 /* X86_TUNE_SSE_TYPELESS_STORES */
2066 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2067 m_PPRO
| m_P4_NOCONA
,
2069 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2070 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2072 /* X86_TUNE_PROLOGUE_USING_MOVE */
2073 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2075 /* X86_TUNE_EPILOGUE_USING_MOVE */
2076 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2078 /* X86_TUNE_SHIFT1 */
2081 /* X86_TUNE_USE_FFREEP */
2084 /* X86_TUNE_INTER_UNIT_MOVES */
2085 ~(m_AMD_MULTIPLE
| m_GENERIC
),
2087 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2088 ~(m_AMDFAM10
| m_BDVER
),
2090 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2091 than 4 branch instructions in the 16 byte window. */
2092 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2094 /* X86_TUNE_SCHEDULE */
2095 m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2097 /* X86_TUNE_USE_BT */
2098 m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2100 /* X86_TUNE_USE_INCDEC */
2101 ~(m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GENERIC
),
2103 /* X86_TUNE_PAD_RETURNS */
2104 m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
,
2106 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2109 /* X86_TUNE_EXT_80387_CONSTANTS */
2110 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_ATHLON_K8
| m_GENERIC
,
2112 /* X86_TUNE_SHORTEN_X87_SSE */
2115 /* X86_TUNE_AVOID_VECTOR_DECODE */
2116 m_CORE2I7_64
| m_K8
| m_GENERIC64
,
2118 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2119 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2122 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2123 vector path on AMD machines. */
2124 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC64
,
2126 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2128 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC64
,
2130 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2134 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2135 but one byte longer. */
2138 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2139 operand that cannot be represented using a modRM byte. The XOR
2140 replacement is long decoded, so this split helps here as well. */
2143 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2145 m_CORE2I7
| m_AMDFAM10
| m_GENERIC
,
2147 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2148 from integer to FP. */
2151 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2152 with a subsequent conditional jump instruction into a single
2153 compare-and-branch uop. */
2156 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2157 will impact LEA instruction selection. */
2160 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2164 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2165 at -O3. For the moment, the prefetching seems badly tuned for Intel
2167 m_K6_GEODE
| m_AMD_MULTIPLE
,
2169 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2170 the auto-vectorizer. */
2174 /* Feature tests against the various architecture variations. */
2175 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
2177 /* Feature tests against the various architecture variations, used to create
2178 ix86_arch_features based on the processor mask. */
2179 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
2180 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2181 ~(m_386
| m_486
| m_PENT
| m_K6
),
2183 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2186 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2189 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2192 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2196 static const unsigned int x86_accumulate_outgoing_args
2197 = m_PPRO
| m_P4_NOCONA
| m_ATOM
| m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
;
2199 static const unsigned int x86_arch_always_fancy_math_387
2200 = m_PENT
| m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
;
2202 static const unsigned int x86_avx256_split_unaligned_load
2203 = m_COREI7
| m_GENERIC
;
2205 static const unsigned int x86_avx256_split_unaligned_store
2206 = m_COREI7
| m_BDVER
| m_GENERIC
;
2208 /* In case the average insn count for single function invocation is
2209 lower than this constant, emit fast (but longer) prologue and
2211 #define FAST_PROLOGUE_INSN_COUNT 20
2213 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2214 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
2215 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2216 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2218 /* Array of the smallest class containing reg number REGNO, indexed by
2219 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2221 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2223 /* ax, dx, cx, bx */
2224 AREG
, DREG
, CREG
, BREG
,
2225 /* si, di, bp, sp */
2226 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2228 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2229 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2232 /* flags, fpsr, fpcr, frame */
2233 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2235 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2238 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2241 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2242 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2243 /* SSE REX registers */
2244 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2248 /* The "default" register map used in 32bit mode. */
2250 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2252 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2253 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2254 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2255 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2256 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2257 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2258 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2261 /* The "default" register map used in 64bit mode. */
2263 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2265 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2266 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2267 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2268 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2269 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2270 8,9,10,11,12,13,14,15, /* extended integer registers */
2271 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2274 /* Define the register numbers to be used in Dwarf debugging information.
2275 The SVR4 reference port C compiler uses the following register numbers
2276 in its Dwarf output code:
2277 0 for %eax (gcc regno = 0)
2278 1 for %ecx (gcc regno = 2)
2279 2 for %edx (gcc regno = 1)
2280 3 for %ebx (gcc regno = 3)
2281 4 for %esp (gcc regno = 7)
2282 5 for %ebp (gcc regno = 6)
2283 6 for %esi (gcc regno = 4)
2284 7 for %edi (gcc regno = 5)
2285 The following three DWARF register numbers are never generated by
2286 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2287 believes these numbers have these meanings.
2288 8 for %eip (no gcc equivalent)
2289 9 for %eflags (gcc regno = 17)
2290 10 for %trapno (no gcc equivalent)
2291 It is not at all clear how we should number the FP stack registers
2292 for the x86 architecture. If the version of SDB on x86/svr4 were
2293 a bit less brain dead with respect to floating-point then we would
2294 have a precedent to follow with respect to DWARF register numbers
2295 for x86 FP registers, but the SDB on x86/svr4 is so completely
2296 broken with respect to FP registers that it is hardly worth thinking
2297 of it as something to strive for compatibility with.
2298 The version of x86/svr4 SDB I have at the moment does (partially)
2299 seem to believe that DWARF register number 11 is associated with
2300 the x86 register %st(0), but that's about all. Higher DWARF
2301 register numbers don't seem to be associated with anything in
2302 particular, and even for DWARF regno 11, SDB only seems to under-
2303 stand that it should say that a variable lives in %st(0) (when
2304 asked via an `=' command) if we said it was in DWARF regno 11,
2305 but SDB still prints garbage when asked for the value of the
2306 variable in question (via a `/' command).
2307 (Also note that the labels SDB prints for various FP stack regs
2308 when doing an `x' command are all wrong.)
2309 Note that these problems generally don't affect the native SVR4
2310 C compiler because it doesn't allow the use of -O with -g and
2311 because when it is *not* optimizing, it allocates a memory
2312 location for each floating-point variable, and the memory
2313 location is what gets described in the DWARF AT_location
2314 attribute for the variable in question.
2315 Regardless of the severe mental illness of the x86/svr4 SDB, we
2316 do something sensible here and we use the following DWARF
2317 register numbers. Note that these are all stack-top-relative
2319 11 for %st(0) (gcc regno = 8)
2320 12 for %st(1) (gcc regno = 9)
2321 13 for %st(2) (gcc regno = 10)
2322 14 for %st(3) (gcc regno = 11)
2323 15 for %st(4) (gcc regno = 12)
2324 16 for %st(5) (gcc regno = 13)
2325 17 for %st(6) (gcc regno = 14)
2326 18 for %st(7) (gcc regno = 15)
2328 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2330 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2331 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2332 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2333 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2334 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2335 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2336 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2339 /* Define parameter passing and return registers. */
2341 static int const x86_64_int_parameter_registers
[6] =
2343 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2346 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2348 CX_REG
, DX_REG
, R8_REG
, R9_REG
2351 static int const x86_64_int_return_registers
[4] =
2353 AX_REG
, DX_REG
, DI_REG
, SI_REG
2356 /* Define the structure for the machine field in struct function. */
2358 struct GTY(()) stack_local_entry
{
2359 unsigned short mode
;
2362 struct stack_local_entry
*next
;
2365 /* Structure describing stack frame layout.
2366 Stack grows downward:
2372 saved static chain if ix86_static_chain_on_stack
2374 saved frame pointer if frame_pointer_needed
2375 <- HARD_FRAME_POINTER
2381 <- sse_regs_save_offset
2384 [va_arg registers] |
2388 [padding2] | = to_allocate
2397 int outgoing_arguments_size
;
2398 HOST_WIDE_INT frame
;
2400 /* The offsets relative to ARG_POINTER. */
2401 HOST_WIDE_INT frame_pointer_offset
;
2402 HOST_WIDE_INT hard_frame_pointer_offset
;
2403 HOST_WIDE_INT stack_pointer_offset
;
2404 HOST_WIDE_INT hfp_save_offset
;
2405 HOST_WIDE_INT reg_save_offset
;
2406 HOST_WIDE_INT sse_reg_save_offset
;
2408 /* When save_regs_using_mov is set, emit prologue using
2409 move instead of push instructions. */
2410 bool save_regs_using_mov
;
2413 /* Which cpu are we scheduling for. */
2414 enum attr_cpu ix86_schedule
;
2416 /* Which cpu are we optimizing for. */
2417 enum processor_type ix86_tune
;
2419 /* Which instruction set architecture to use. */
2420 enum processor_type ix86_arch
;
2422 /* true if sse prefetch instruction is not NOOP. */
2423 int x86_prefetch_sse
;
2425 /* -mstackrealign option */
2426 static const char ix86_force_align_arg_pointer_string
[]
2427 = "force_align_arg_pointer";
2429 static rtx (*ix86_gen_leave
) (void);
2430 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2431 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2432 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2433 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2434 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2435 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2436 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2437 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2438 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2440 /* Preferred alignment for stack boundary in bits. */
2441 unsigned int ix86_preferred_stack_boundary
;
2443 /* Alignment for incoming stack boundary in bits specified at
2445 static unsigned int ix86_user_incoming_stack_boundary
;
2447 /* Default alignment for incoming stack boundary in bits. */
2448 static unsigned int ix86_default_incoming_stack_boundary
;
2450 /* Alignment for incoming stack boundary in bits. */
2451 unsigned int ix86_incoming_stack_boundary
;
2453 /* Calling abi specific va_list type nodes. */
2454 static GTY(()) tree sysv_va_list_type_node
;
2455 static GTY(()) tree ms_va_list_type_node
;
2457 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2458 char internal_label_prefix
[16];
2459 int internal_label_prefix_len
;
2461 /* Fence to use after loop using movnt. */
2464 /* Register class used for passing given 64bit part of the argument.
2465 These represent classes as documented by the PS ABI, with the exception
2466 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2467 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2469 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2470 whenever possible (upper half does contain padding). */
2471 enum x86_64_reg_class
2474 X86_64_INTEGER_CLASS
,
2475 X86_64_INTEGERSI_CLASS
,
2482 X86_64_COMPLEX_X87_CLASS
,
2486 #define MAX_CLASSES 4
2488 /* Table of constants used by fldpi, fldln2, etc.... */
2489 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2490 static bool ext_80387_constants_init
= 0;
2493 static struct machine_function
* ix86_init_machine_status (void);
2494 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2495 static bool ix86_function_value_regno_p (const unsigned int);
2496 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2498 static rtx
ix86_static_chain (const_tree
, bool);
2499 static int ix86_function_regparm (const_tree
, const_tree
);
2500 static void ix86_compute_frame_layout (struct ix86_frame
*);
2501 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2503 static void ix86_add_new_builtins (int);
2504 static rtx
ix86_expand_vec_perm_builtin (tree
);
2505 static tree
ix86_canonical_va_list_type (tree
);
2506 static void predict_jump (int);
2507 static unsigned int split_stack_prologue_scratch_regno (void);
2508 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2510 enum ix86_function_specific_strings
2512 IX86_FUNCTION_SPECIFIC_ARCH
,
2513 IX86_FUNCTION_SPECIFIC_TUNE
,
2514 IX86_FUNCTION_SPECIFIC_MAX
2517 static char *ix86_target_string (int, int, const char *, const char *,
2518 enum fpmath_unit
, bool);
2519 static void ix86_debug_options (void) ATTRIBUTE_UNUSED
;
2520 static void ix86_function_specific_save (struct cl_target_option
*);
2521 static void ix86_function_specific_restore (struct cl_target_option
*);
2522 static void ix86_function_specific_print (FILE *, int,
2523 struct cl_target_option
*);
2524 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2525 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2526 struct gcc_options
*);
2527 static bool ix86_can_inline_p (tree
, tree
);
2528 static void ix86_set_current_function (tree
);
2529 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2531 static enum calling_abi
ix86_function_abi (const_tree
);
2534 #ifndef SUBTARGET32_DEFAULT_CPU
2535 #define SUBTARGET32_DEFAULT_CPU "i386"
2538 /* The svr4 ABI for the i386 says that records and unions are returned
2540 #ifndef DEFAULT_PCC_STRUCT_RETURN
2541 #define DEFAULT_PCC_STRUCT_RETURN 1
2544 /* Whether -mtune= or -march= were specified */
2545 static int ix86_tune_defaulted
;
2546 static int ix86_arch_specified
;
2548 /* Vectorization library interface and handlers. */
2549 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2551 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2552 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2554 /* Processor target table, indexed by processor number */
2557 const struct processor_costs
*cost
; /* Processor costs */
2558 const int align_loop
; /* Default alignments. */
2559 const int align_loop_max_skip
;
2560 const int align_jump
;
2561 const int align_jump_max_skip
;
2562 const int align_func
;
2565 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2567 {&i386_cost
, 4, 3, 4, 3, 4},
2568 {&i486_cost
, 16, 15, 16, 15, 16},
2569 {&pentium_cost
, 16, 7, 16, 7, 16},
2570 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2571 {&geode_cost
, 0, 0, 0, 0, 0},
2572 {&k6_cost
, 32, 7, 32, 7, 32},
2573 {&athlon_cost
, 16, 7, 16, 7, 16},
2574 {&pentium4_cost
, 0, 0, 0, 0, 0},
2575 {&k8_cost
, 16, 7, 16, 7, 16},
2576 {&nocona_cost
, 0, 0, 0, 0, 0},
2577 /* Core 2 32-bit. */
2578 {&generic32_cost
, 16, 10, 16, 10, 16},
2579 /* Core 2 64-bit. */
2580 {&generic64_cost
, 16, 10, 16, 10, 16},
2581 /* Core i7 32-bit. */
2582 {&generic32_cost
, 16, 10, 16, 10, 16},
2583 /* Core i7 64-bit. */
2584 {&generic64_cost
, 16, 10, 16, 10, 16},
2585 {&generic32_cost
, 16, 7, 16, 7, 16},
2586 {&generic64_cost
, 16, 10, 16, 10, 16},
2587 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2588 {&bdver1_cost
, 32, 24, 32, 7, 32},
2589 {&bdver2_cost
, 32, 24, 32, 7, 32},
2590 {&btver1_cost
, 32, 24, 32, 7, 32},
2591 {&atom_cost
, 16, 7, 16, 7, 16}
2594 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2624 /* Return true if a red-zone is in use. */
2627 ix86_using_red_zone (void)
2629 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2632 /* Return a string that documents the current -m options. The caller is
2633 responsible for freeing the string. */
2636 ix86_target_string (int isa
, int flags
, const char *arch
, const char *tune
,
2637 enum fpmath_unit fpmath
, bool add_nl_p
)
2639 struct ix86_target_opts
2641 const char *option
; /* option string */
2642 int mask
; /* isa mask options */
2645 /* This table is ordered so that options like -msse4.2 that imply
2646 preceding options while match those first. */
2647 static struct ix86_target_opts isa_opts
[] =
2649 { "-m64", OPTION_MASK_ISA_64BIT
},
2650 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2651 { "-mfma", OPTION_MASK_ISA_FMA
},
2652 { "-mxop", OPTION_MASK_ISA_XOP
},
2653 { "-mlwp", OPTION_MASK_ISA_LWP
},
2654 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2655 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2656 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2657 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2658 { "-msse3", OPTION_MASK_ISA_SSE3
},
2659 { "-msse2", OPTION_MASK_ISA_SSE2
},
2660 { "-msse", OPTION_MASK_ISA_SSE
},
2661 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2662 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2663 { "-mmmx", OPTION_MASK_ISA_MMX
},
2664 { "-mabm", OPTION_MASK_ISA_ABM
},
2665 { "-mbmi", OPTION_MASK_ISA_BMI
},
2666 { "-mtbm", OPTION_MASK_ISA_TBM
},
2667 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2668 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2669 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2670 { "-maes", OPTION_MASK_ISA_AES
},
2671 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2672 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2673 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2674 { "-mf16c", OPTION_MASK_ISA_F16C
},
2678 static struct ix86_target_opts flag_opts
[] =
2680 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2681 { "-m80387", MASK_80387
},
2682 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2683 { "-malign-double", MASK_ALIGN_DOUBLE
},
2684 { "-mcld", MASK_CLD
},
2685 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2686 { "-mieee-fp", MASK_IEEE_FP
},
2687 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2688 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2689 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2690 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2691 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2692 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2693 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2694 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2695 { "-mrecip", MASK_RECIP
},
2696 { "-mrtd", MASK_RTD
},
2697 { "-msseregparm", MASK_SSEREGPARM
},
2698 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2699 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2700 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2701 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2702 { "-mvzeroupper", MASK_VZEROUPPER
},
2703 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2704 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2705 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2708 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2711 char target_other
[40];
2720 memset (opts
, '\0', sizeof (opts
));
2722 /* Add -march= option. */
2725 opts
[num
][0] = "-march=";
2726 opts
[num
++][1] = arch
;
2729 /* Add -mtune= option. */
2732 opts
[num
][0] = "-mtune=";
2733 opts
[num
++][1] = tune
;
2736 /* Pick out the options in isa options. */
2737 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2739 if ((isa
& isa_opts
[i
].mask
) != 0)
2741 opts
[num
++][0] = isa_opts
[i
].option
;
2742 isa
&= ~ isa_opts
[i
].mask
;
2746 if (isa
&& add_nl_p
)
2748 opts
[num
++][0] = isa_other
;
2749 sprintf (isa_other
, "(other isa: %#x)", isa
);
2752 /* Add flag options. */
2753 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2755 if ((flags
& flag_opts
[i
].mask
) != 0)
2757 opts
[num
++][0] = flag_opts
[i
].option
;
2758 flags
&= ~ flag_opts
[i
].mask
;
2762 if (flags
&& add_nl_p
)
2764 opts
[num
++][0] = target_other
;
2765 sprintf (target_other
, "(other flags: %#x)", flags
);
2768 /* Add -fpmath= option. */
2771 opts
[num
][0] = "-mfpmath=";
2772 switch ((int) fpmath
)
2775 opts
[num
++][1] = "387";
2779 opts
[num
++][1] = "sse";
2782 case FPMATH_387
| FPMATH_SSE
:
2783 opts
[num
++][1] = "sse+387";
2795 gcc_assert (num
< ARRAY_SIZE (opts
));
2797 /* Size the string. */
2799 sep_len
= (add_nl_p
) ? 3 : 1;
2800 for (i
= 0; i
< num
; i
++)
2803 for (j
= 0; j
< 2; j
++)
2805 len
+= strlen (opts
[i
][j
]);
2808 /* Build the string. */
2809 ret
= ptr
= (char *) xmalloc (len
);
2812 for (i
= 0; i
< num
; i
++)
2816 for (j
= 0; j
< 2; j
++)
2817 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2824 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2832 for (j
= 0; j
< 2; j
++)
2835 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2837 line_len
+= len2
[j
];
2842 gcc_assert (ret
+ len
>= ptr
);
2847 /* Return true, if profiling code should be emitted before
2848 prologue. Otherwise it returns false.
2849 Note: For x86 with "hotfix" it is sorried. */
2851 ix86_profile_before_prologue (void)
2853 return flag_fentry
!= 0;
2856 /* Function that is callable from the debugger to print the current
2859 ix86_debug_options (void)
2861 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2862 ix86_arch_string
, ix86_tune_string
,
2867 fprintf (stderr
, "%s\n\n", opts
);
2871 fputs ("<no options>\n\n", stderr
);
2876 /* Override various settings based on options. If MAIN_ARGS_P, the
2877 options are from the command line, otherwise they are from
2881 ix86_option_override_internal (bool main_args_p
)
2884 unsigned int ix86_arch_mask
, ix86_tune_mask
;
2885 const bool ix86_tune_specified
= (ix86_tune_string
!= NULL
);
2896 PTA_PREFETCH_SSE
= 1 << 4,
2898 PTA_3DNOW_A
= 1 << 6,
2902 PTA_POPCNT
= 1 << 10,
2904 PTA_SSE4A
= 1 << 12,
2905 PTA_NO_SAHF
= 1 << 13,
2906 PTA_SSE4_1
= 1 << 14,
2907 PTA_SSE4_2
= 1 << 15,
2909 PTA_PCLMUL
= 1 << 17,
2912 PTA_MOVBE
= 1 << 20,
2916 PTA_FSGSBASE
= 1 << 24,
2917 PTA_RDRND
= 1 << 25,
2921 /* if this reaches 32, need to widen struct pta flags below */
2926 const char *const name
; /* processor name or nickname. */
2927 const enum processor_type processor
;
2928 const enum attr_cpu schedule
;
2929 const unsigned /*enum pta_flags*/ flags
;
2931 const processor_alias_table
[] =
2933 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
2934 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
2935 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2936 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2937 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
2938 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
2939 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2940 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2941 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
2942 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2943 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2944 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
},
2945 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2947 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2949 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2950 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2951 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
2952 PTA_MMX
|PTA_SSE
| PTA_SSE2
},
2953 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
2954 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2955 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
2956 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
},
2957 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
2958 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2959 | PTA_CX16
| PTA_NO_SAHF
},
2960 {"core2", PROCESSOR_CORE2_64
, CPU_CORE2
,
2961 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2962 | PTA_SSSE3
| PTA_CX16
},
2963 {"corei7", PROCESSOR_COREI7_64
, CPU_COREI7
,
2964 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2965 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
},
2966 {"corei7-avx", PROCESSOR_COREI7_64
, CPU_COREI7
,
2967 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2968 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
2969 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
},
2970 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
2971 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2972 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
},
2973 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
2974 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
|PTA_PREFETCH_SSE
},
2975 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
2976 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
2977 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
2978 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
2979 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
2980 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
2981 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
2982 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
2983 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
2984 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
2985 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
2986 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
2987 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
2988 {"x86-64", PROCESSOR_K8
, CPU_K8
,
2989 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
2990 {"k8", PROCESSOR_K8
, CPU_K8
,
2991 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2992 | PTA_SSE2
| PTA_NO_SAHF
},
2993 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
2994 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2995 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
2996 {"opteron", PROCESSOR_K8
, CPU_K8
,
2997 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2998 | PTA_SSE2
| PTA_NO_SAHF
},
2999 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3000 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3001 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3002 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3003 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3004 | PTA_SSE2
| PTA_NO_SAHF
},
3005 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3006 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3007 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3008 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3009 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3010 | PTA_SSE2
| PTA_NO_SAHF
},
3011 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3012 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3013 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3014 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3015 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3016 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3017 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3018 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3019 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3020 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3021 | PTA_XOP
| PTA_LWP
},
3022 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
3023 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3024 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3025 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3026 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3028 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC64
,
3029 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3030 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
},
3031 {"generic32", PROCESSOR_GENERIC32
, CPU_PENTIUMPRO
,
3032 0 /* flags are only used for -march switch. */ },
3033 {"generic64", PROCESSOR_GENERIC64
, CPU_GENERIC64
,
3034 PTA_64BIT
/* flags are only used for -march switch. */ },
3037 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3039 /* Set up prefix/suffix so the error messages refer to either the command
3040 line argument, or the attribute(target). */
3049 prefix
= "option(\"";
3054 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3055 SUBTARGET_OVERRIDE_OPTIONS
;
3058 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3059 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3063 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3065 /* -fPIC is the default for x86_64. */
3066 if (TARGET_MACHO
&& TARGET_64BIT
)
3069 /* Need to check -mtune=generic first. */
3070 if (ix86_tune_string
)
3072 if (!strcmp (ix86_tune_string
, "generic")
3073 || !strcmp (ix86_tune_string
, "i686")
3074 /* As special support for cross compilers we read -mtune=native
3075 as -mtune=generic. With native compilers we won't see the
3076 -mtune=native, as it was changed by the driver. */
3077 || !strcmp (ix86_tune_string
, "native"))
3080 ix86_tune_string
= "generic64";
3082 ix86_tune_string
= "generic32";
3084 /* If this call is for setting the option attribute, allow the
3085 generic32/generic64 that was previously set. */
3086 else if (!main_args_p
3087 && (!strcmp (ix86_tune_string
, "generic32")
3088 || !strcmp (ix86_tune_string
, "generic64")))
3090 else if (!strncmp (ix86_tune_string
, "generic", 7))
3091 error ("bad value (%s) for %stune=%s %s",
3092 ix86_tune_string
, prefix
, suffix
, sw
);
3093 else if (!strcmp (ix86_tune_string
, "x86-64"))
3094 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3095 "%stune=k8%s or %stune=generic%s instead as appropriate",
3096 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3100 if (ix86_arch_string
)
3101 ix86_tune_string
= ix86_arch_string
;
3102 if (!ix86_tune_string
)
3104 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3105 ix86_tune_defaulted
= 1;
3108 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3109 need to use a sensible tune option. */
3110 if (!strcmp (ix86_tune_string
, "generic")
3111 || !strcmp (ix86_tune_string
, "x86-64")
3112 || !strcmp (ix86_tune_string
, "i686"))
3115 ix86_tune_string
= "generic64";
3117 ix86_tune_string
= "generic32";
3121 if (ix86_stringop_alg
== rep_prefix_8_byte
&& !TARGET_64BIT
)
3123 /* rep; movq isn't available in 32-bit code. */
3124 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3125 ix86_stringop_alg
= no_stringop
;
3128 if (!ix86_arch_string
)
3129 ix86_arch_string
= TARGET_64BIT
? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3131 ix86_arch_specified
= 1;
3133 if (!global_options_set
.x_ix86_abi
)
3134 ix86_abi
= DEFAULT_ABI
;
3136 if (global_options_set
.x_ix86_cmodel
)
3138 switch (ix86_cmodel
)
3143 ix86_cmodel
= CM_SMALL_PIC
;
3145 error ("code model %qs not supported in the %s bit mode",
3152 ix86_cmodel
= CM_MEDIUM_PIC
;
3154 error ("code model %qs not supported in the %s bit mode",
3156 else if (TARGET_X32
)
3157 error ("code model %qs not supported in x32 mode",
3164 ix86_cmodel
= CM_LARGE_PIC
;
3166 error ("code model %qs not supported in the %s bit mode",
3168 else if (TARGET_X32
)
3169 error ("code model %qs not supported in x32 mode",
3175 error ("code model %s does not support PIC mode", "32");
3177 error ("code model %qs not supported in the %s bit mode",
3184 error ("code model %s does not support PIC mode", "kernel");
3185 ix86_cmodel
= CM_32
;
3188 error ("code model %qs not supported in the %s bit mode",
3198 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3199 use of rip-relative addressing. This eliminates fixups that
3200 would otherwise be needed if this object is to be placed in a
3201 DLL, and is essentially just as efficient as direct addressing. */
3202 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
3203 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
3204 else if (TARGET_64BIT
)
3205 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3207 ix86_cmodel
= CM_32
;
3209 if (TARGET_MACHO
&& ix86_asm_dialect
== ASM_INTEL
)
3211 error ("-masm=intel not supported in this configuration");
3212 ix86_asm_dialect
= ASM_ATT
;
3214 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3215 sorry ("%i-bit mode not compiled in",
3216 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3218 for (i
= 0; i
< pta_size
; i
++)
3219 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
3221 ix86_schedule
= processor_alias_table
[i
].schedule
;
3222 ix86_arch
= processor_alias_table
[i
].processor
;
3223 /* Default cpu tuning to the architecture. */
3224 ix86_tune
= ix86_arch
;
3226 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3227 error ("CPU you selected does not support x86-64 "
3230 if (processor_alias_table
[i
].flags
& PTA_MMX
3231 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3232 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3233 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3234 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3235 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3236 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3237 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3238 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3239 if (processor_alias_table
[i
].flags
& PTA_SSE
3240 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3241 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3242 if (processor_alias_table
[i
].flags
& PTA_SSE2
3243 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3244 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3245 if (processor_alias_table
[i
].flags
& PTA_SSE3
3246 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3247 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3248 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3249 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3250 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3251 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3252 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3253 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3254 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3255 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3256 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3257 if (processor_alias_table
[i
].flags
& PTA_AVX
3258 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3259 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3260 if (processor_alias_table
[i
].flags
& PTA_FMA
3261 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3262 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3263 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3264 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3265 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3266 if (processor_alias_table
[i
].flags
& PTA_FMA4
3267 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3268 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3269 if (processor_alias_table
[i
].flags
& PTA_XOP
3270 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3271 ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3272 if (processor_alias_table
[i
].flags
& PTA_LWP
3273 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3274 ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3275 if (processor_alias_table
[i
].flags
& PTA_ABM
3276 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3277 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3278 if (processor_alias_table
[i
].flags
& PTA_BMI
3279 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3280 ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3281 if (processor_alias_table
[i
].flags
& PTA_TBM
3282 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3283 ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3284 if (processor_alias_table
[i
].flags
& PTA_CX16
3285 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3286 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3287 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3288 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3289 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3290 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3291 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3292 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3293 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3294 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3295 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3296 if (processor_alias_table
[i
].flags
& PTA_AES
3297 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3298 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3299 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3300 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3301 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3302 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3303 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3304 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3305 if (processor_alias_table
[i
].flags
& PTA_RDRND
3306 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3307 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3308 if (processor_alias_table
[i
].flags
& PTA_F16C
3309 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3310 ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3311 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3312 x86_prefetch_sse
= true;
3317 if (!strcmp (ix86_arch_string
, "generic"))
3318 error ("generic CPU can be used only for %stune=%s %s",
3319 prefix
, suffix
, sw
);
3320 else if (!strncmp (ix86_arch_string
, "generic", 7) || i
== pta_size
)
3321 error ("bad value (%s) for %sarch=%s %s",
3322 ix86_arch_string
, prefix
, suffix
, sw
);
3324 ix86_arch_mask
= 1u << ix86_arch
;
3325 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3326 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3328 for (i
= 0; i
< pta_size
; i
++)
3329 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
3331 ix86_schedule
= processor_alias_table
[i
].schedule
;
3332 ix86_tune
= processor_alias_table
[i
].processor
;
3335 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3337 if (ix86_tune_defaulted
)
3339 ix86_tune_string
= "x86-64";
3340 for (i
= 0; i
< pta_size
; i
++)
3341 if (! strcmp (ix86_tune_string
,
3342 processor_alias_table
[i
].name
))
3344 ix86_schedule
= processor_alias_table
[i
].schedule
;
3345 ix86_tune
= processor_alias_table
[i
].processor
;
3348 error ("CPU you selected does not support x86-64 "
3354 /* Adjust tuning when compiling for 32-bit ABI. */
3357 case PROCESSOR_GENERIC64
:
3358 ix86_tune
= PROCESSOR_GENERIC32
;
3359 ix86_schedule
= CPU_PENTIUMPRO
;
3362 case PROCESSOR_CORE2_64
:
3363 ix86_tune
= PROCESSOR_CORE2_32
;
3366 case PROCESSOR_COREI7_64
:
3367 ix86_tune
= PROCESSOR_COREI7_32
;
3374 /* Intel CPUs have always interpreted SSE prefetch instructions as
3375 NOPs; so, we can enable SSE prefetch instructions even when
3376 -mtune (rather than -march) points us to a processor that has them.
3377 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3378 higher processors. */
3380 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3381 x86_prefetch_sse
= true;
3385 if (ix86_tune_specified
&& i
== pta_size
)
3386 error ("bad value (%s) for %stune=%s %s",
3387 ix86_tune_string
, prefix
, suffix
, sw
);
3389 ix86_tune_mask
= 1u << ix86_tune
;
3390 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3391 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3393 #ifndef USE_IX86_FRAME_POINTER
3394 #define USE_IX86_FRAME_POINTER 0
3397 #ifndef USE_X86_64_FRAME_POINTER
3398 #define USE_X86_64_FRAME_POINTER 0
3401 /* Set the default values for switches whose default depends on TARGET_64BIT
3402 in case they weren't overwritten by command line options. */
3405 if (optimize
> 1 && !global_options_set
.x_flag_zee
)
3407 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3408 flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3409 if (flag_asynchronous_unwind_tables
== 2)
3410 flag_unwind_tables
= flag_asynchronous_unwind_tables
= 1;
3411 if (flag_pcc_struct_return
== 2)
3412 flag_pcc_struct_return
= 0;
3416 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3417 flag_omit_frame_pointer
= !(USE_IX86_FRAME_POINTER
|| optimize_size
);
3418 if (flag_asynchronous_unwind_tables
== 2)
3419 flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3420 if (flag_pcc_struct_return
== 2)
3421 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3425 ix86_cost
= &ix86_size_cost
;
3427 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
3429 /* Arrange to set up i386_stack_locals for all functions. */
3430 init_machine_status
= ix86_init_machine_status
;
3432 /* Validate -mregparm= value. */
3433 if (global_options_set
.x_ix86_regparm
)
3436 warning (0, "-mregparm is ignored in 64-bit mode");
3437 if (ix86_regparm
> REGPARM_MAX
)
3439 error ("-mregparm=%d is not between 0 and %d",
3440 ix86_regparm
, REGPARM_MAX
);
3445 ix86_regparm
= REGPARM_MAX
;
3447 /* Default align_* from the processor table. */
3448 if (align_loops
== 0)
3450 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3451 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3453 if (align_jumps
== 0)
3455 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3456 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3458 if (align_functions
== 0)
3460 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3463 /* Provide default for -mbranch-cost= value. */
3464 if (!global_options_set
.x_ix86_branch_cost
)
3465 ix86_branch_cost
= ix86_cost
->branch_cost
;
3469 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3471 /* Enable by default the SSE and MMX builtins. Do allow the user to
3472 explicitly disable any of these. In particular, disabling SSE and
3473 MMX for kernel code is extremely useful. */
3474 if (!ix86_arch_specified
)
3476 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3477 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3480 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3484 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3486 if (!ix86_arch_specified
)
3488 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3490 /* i386 ABI does not specify red zone. It still makes sense to use it
3491 when programmer takes care to stack from being destroyed. */
3492 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3493 target_flags
|= MASK_NO_RED_ZONE
;
3496 /* Keep nonleaf frame pointers. */
3497 if (flag_omit_frame_pointer
)
3498 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3499 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3500 flag_omit_frame_pointer
= 1;
3502 /* If we're doing fast math, we don't care about comparison order
3503 wrt NaNs. This lets us use a shorter comparison sequence. */
3504 if (flag_finite_math_only
)
3505 target_flags
&= ~MASK_IEEE_FP
;
3507 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3508 since the insns won't need emulation. */
3509 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3510 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3512 /* Likewise, if the target doesn't have a 387, or we've specified
3513 software floating point, don't use 387 inline intrinsics. */
3515 target_flags
|= MASK_NO_FANCY_MATH_387
;
3517 /* Turn on MMX builtins for -msse. */
3520 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3521 x86_prefetch_sse
= true;
3524 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3525 if (TARGET_SSE4_2
|| TARGET_ABM
)
3526 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3528 /* Validate -mpreferred-stack-boundary= value or default it to
3529 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3530 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3531 if (global_options_set
.x_ix86_preferred_stack_boundary_arg
)
3533 int min
= (TARGET_64BIT
? 4 : 2);
3534 int max
= (TARGET_SEH
? 4 : 12);
3536 if (ix86_preferred_stack_boundary_arg
< min
3537 || ix86_preferred_stack_boundary_arg
> max
)
3540 error ("-mpreferred-stack-boundary is not supported "
3543 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3544 ix86_preferred_stack_boundary_arg
, min
, max
);
3547 ix86_preferred_stack_boundary
3548 = (1 << ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3551 /* Set the default value for -mstackrealign. */
3552 if (ix86_force_align_arg_pointer
== -1)
3553 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3555 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3557 /* Validate -mincoming-stack-boundary= value or default it to
3558 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3559 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3560 if (global_options_set
.x_ix86_incoming_stack_boundary_arg
)
3562 if (ix86_incoming_stack_boundary_arg
< (TARGET_64BIT
? 4 : 2)
3563 || ix86_incoming_stack_boundary_arg
> 12)
3564 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3565 ix86_incoming_stack_boundary_arg
, TARGET_64BIT
? 4 : 2);
3568 ix86_user_incoming_stack_boundary
3569 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3570 ix86_incoming_stack_boundary
3571 = ix86_user_incoming_stack_boundary
;
3575 /* Accept -msseregparm only if at least SSE support is enabled. */
3576 if (TARGET_SSEREGPARM
3578 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3580 if (global_options_set
.x_ix86_fpmath
)
3582 if (ix86_fpmath
& FPMATH_SSE
)
3586 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3587 ix86_fpmath
= FPMATH_387
;
3589 else if ((ix86_fpmath
& FPMATH_387
) && !TARGET_80387
)
3591 warning (0, "387 instruction set disabled, using SSE arithmetics");
3592 ix86_fpmath
= FPMATH_SSE
;
3597 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
3599 /* If the i387 is disabled, then do not return values in it. */
3601 target_flags
&= ~MASK_FLOAT_RETURNS
;
3603 /* Use external vectorized library in vectorizing intrinsics. */
3604 if (global_options_set
.x_ix86_veclibabi_type
)
3605 switch (ix86_veclibabi_type
)
3607 case ix86_veclibabi_type_svml
:
3608 ix86_veclib_handler
= ix86_veclibabi_svml
;
3611 case ix86_veclibabi_type_acml
:
3612 ix86_veclib_handler
= ix86_veclibabi_acml
;
3619 if ((!USE_IX86_FRAME_POINTER
3620 || (x86_accumulate_outgoing_args
& ix86_tune_mask
))
3621 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3623 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3625 /* ??? Unwind info is not correct around the CFG unless either a frame
3626 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3627 unwind info generation to be aware of the CFG and propagating states
3629 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
3630 || flag_exceptions
|| flag_non_call_exceptions
)
3631 && flag_omit_frame_pointer
3632 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3634 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3635 warning (0, "unwind tables currently require either a frame pointer "
3636 "or %saccumulate-outgoing-args%s for correctness",
3638 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3641 /* If stack probes are required, the space used for large function
3642 arguments on the stack must also be probed, so enable
3643 -maccumulate-outgoing-args so this happens in the prologue. */
3644 if (TARGET_STACK_PROBE
3645 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3647 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3648 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3649 "for correctness", prefix
, suffix
);
3650 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3653 /* For sane SSE instruction set generation we need fcomi instruction.
3654 It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
3655 expands to a sequence that includes conditional move. */
3656 if (TARGET_SSE
|| TARGET_RDRND
)
3659 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3662 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3663 p
= strchr (internal_label_prefix
, 'X');
3664 internal_label_prefix_len
= p
- internal_label_prefix
;
3668 /* When scheduling description is not available, disable scheduler pass
3669 so it won't slow down the compilation and make x87 code slower. */
3670 if (!TARGET_SCHEDULE
)
3671 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
3673 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3674 ix86_cost
->simultaneous_prefetches
,
3675 global_options
.x_param_values
,
3676 global_options_set
.x_param_values
);
3677 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
, ix86_cost
->prefetch_block
,
3678 global_options
.x_param_values
,
3679 global_options_set
.x_param_values
);
3680 maybe_set_param_value (PARAM_L1_CACHE_SIZE
, ix86_cost
->l1_cache_size
,
3681 global_options
.x_param_values
,
3682 global_options_set
.x_param_values
);
3683 maybe_set_param_value (PARAM_L2_CACHE_SIZE
, ix86_cost
->l2_cache_size
,
3684 global_options
.x_param_values
,
3685 global_options_set
.x_param_values
);
3687 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3688 if (flag_prefetch_loop_arrays
< 0
3691 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3692 flag_prefetch_loop_arrays
= 1;
3694 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3695 can be optimized to ap = __builtin_next_arg (0). */
3696 if (!TARGET_64BIT
&& !flag_split_stack
)
3697 targetm
.expand_builtin_va_start
= NULL
;
3701 ix86_gen_leave
= gen_leave_rex64
;
3702 ix86_gen_add3
= gen_adddi3
;
3703 ix86_gen_sub3
= gen_subdi3
;
3704 ix86_gen_sub3_carry
= gen_subdi3_carry
;
3705 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3706 ix86_gen_monitor
= gen_sse3_monitor64
;
3707 ix86_gen_andsp
= gen_anddi3
;
3708 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
3709 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
3710 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
3714 ix86_gen_leave
= gen_leave
;
3715 ix86_gen_add3
= gen_addsi3
;
3716 ix86_gen_sub3
= gen_subsi3
;
3717 ix86_gen_sub3_carry
= gen_subsi3_carry
;
3718 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
3719 ix86_gen_monitor
= gen_sse3_monitor
;
3720 ix86_gen_andsp
= gen_andsi3
;
3721 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
3722 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
3723 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
3727 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3729 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
3732 if (!TARGET_64BIT
&& flag_pic
)
3734 if (flag_fentry
> 0)
3735 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3739 else if (TARGET_SEH
)
3741 if (flag_fentry
== 0)
3742 sorry ("-mno-fentry isn%'t compatible with SEH");
3745 else if (flag_fentry
< 0)
3747 #if defined(PROFILE_BEFORE_PROLOGUE)
3756 /* When not optimize for size, enable vzeroupper optimization for
3757 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3758 AVX unaligned load/store. */
3761 if (flag_expensive_optimizations
3762 && !(target_flags_explicit
& MASK_VZEROUPPER
))
3763 target_flags
|= MASK_VZEROUPPER
;
3764 if ((x86_avx256_split_unaligned_load
& ix86_tune_mask
)
3765 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
3766 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
3767 if ((x86_avx256_split_unaligned_store
& ix86_tune_mask
)
3768 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
3769 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
3770 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3771 if (TARGET_AVX128_OPTIMAL
&& !(target_flags_explicit
& MASK_PREFER_AVX128
))
3772 target_flags
|= MASK_PREFER_AVX128
;
3777 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3778 target_flags
&= ~MASK_VZEROUPPER
;
3781 /* Save the initial options in case the user does function specific
3784 target_option_default_node
= target_option_current_node
3785 = build_target_option_node ();
3788 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3791 function_pass_avx256_p (const_rtx val
)
3796 if (REG_P (val
) && VALID_AVX256_REG_MODE (GET_MODE (val
)))
3799 if (GET_CODE (val
) == PARALLEL
)
3804 for (i
= XVECLEN (val
, 0) - 1; i
>= 0; i
--)
3806 r
= XVECEXP (val
, 0, i
);
3807 if (GET_CODE (r
) == EXPR_LIST
3809 && REG_P (XEXP (r
, 0))
3810 && (GET_MODE (XEXP (r
, 0)) == OImode
3811 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r
, 0)))))
3819 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3822 ix86_option_override (void)
3824 ix86_option_override_internal (true);
3827 /* Update register usage after having seen the compiler flags. */
3830 ix86_conditional_register_usage (void)
3835 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3837 if (fixed_regs
[i
] > 1)
3838 fixed_regs
[i
] = (fixed_regs
[i
] == (TARGET_64BIT
? 3 : 2));
3839 if (call_used_regs
[i
] > 1)
3840 call_used_regs
[i
] = (call_used_regs
[i
] == (TARGET_64BIT
? 3 : 2));
3843 /* The PIC register, if it exists, is fixed. */
3844 j
= PIC_OFFSET_TABLE_REGNUM
;
3845 if (j
!= INVALID_REGNUM
)
3846 fixed_regs
[j
] = call_used_regs
[j
] = 1;
3848 /* The 64-bit MS_ABI changes the set of call-used registers. */
3849 if (TARGET_64BIT_MS_ABI
)
3851 call_used_regs
[SI_REG
] = 0;
3852 call_used_regs
[DI_REG
] = 0;
3853 call_used_regs
[XMM6_REG
] = 0;
3854 call_used_regs
[XMM7_REG
] = 0;
3855 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
3856 call_used_regs
[i
] = 0;
3859 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3860 other call-clobbered regs for 64-bit. */
3863 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
3865 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3866 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
3867 && call_used_regs
[i
])
3868 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
3871 /* If MMX is disabled, squash the registers. */
3873 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3874 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
3875 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3877 /* If SSE is disabled, squash the registers. */
3879 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3880 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
3881 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3883 /* If the FPU is disabled, squash the registers. */
3884 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
3885 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3886 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
3887 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3889 /* If 32-bit, squash the 64-bit registers. */
3892 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
3894 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
3900 /* Save the current options */
3903 ix86_function_specific_save (struct cl_target_option
*ptr
)
3905 ptr
->arch
= ix86_arch
;
3906 ptr
->schedule
= ix86_schedule
;
3907 ptr
->tune
= ix86_tune
;
3908 ptr
->branch_cost
= ix86_branch_cost
;
3909 ptr
->tune_defaulted
= ix86_tune_defaulted
;
3910 ptr
->arch_specified
= ix86_arch_specified
;
3911 ptr
->x_ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
3912 ptr
->ix86_target_flags_explicit
= target_flags_explicit
;
3914 /* The fields are char but the variables are not; make sure the
3915 values fit in the fields. */
3916 gcc_assert (ptr
->arch
== ix86_arch
);
3917 gcc_assert (ptr
->schedule
== ix86_schedule
);
3918 gcc_assert (ptr
->tune
== ix86_tune
);
3919 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
3922 /* Restore the current options */
3925 ix86_function_specific_restore (struct cl_target_option
*ptr
)
3927 enum processor_type old_tune
= ix86_tune
;
3928 enum processor_type old_arch
= ix86_arch
;
3929 unsigned int ix86_arch_mask
, ix86_tune_mask
;
3932 ix86_arch
= (enum processor_type
) ptr
->arch
;
3933 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
3934 ix86_tune
= (enum processor_type
) ptr
->tune
;
3935 ix86_branch_cost
= ptr
->branch_cost
;
3936 ix86_tune_defaulted
= ptr
->tune_defaulted
;
3937 ix86_arch_specified
= ptr
->arch_specified
;
3938 ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
3939 target_flags_explicit
= ptr
->ix86_target_flags_explicit
;
3941 /* Recreate the arch feature tests if the arch changed */
3942 if (old_arch
!= ix86_arch
)
3944 ix86_arch_mask
= 1u << ix86_arch
;
3945 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3946 ix86_arch_features
[i
]
3947 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3950 /* Recreate the tune optimization tests */
3951 if (old_tune
!= ix86_tune
)
3953 ix86_tune_mask
= 1u << ix86_tune
;
3954 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3955 ix86_tune_features
[i
]
3956 = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3960 /* Print the current options */
3963 ix86_function_specific_print (FILE *file
, int indent
,
3964 struct cl_target_option
*ptr
)
3967 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
3968 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
3970 fprintf (file
, "%*sarch = %d (%s)\n",
3973 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
3974 ? cpu_names
[ptr
->arch
]
3977 fprintf (file
, "%*stune = %d (%s)\n",
3980 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
3981 ? cpu_names
[ptr
->tune
]
3984 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
3988 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
3989 free (target_string
);
3994 /* Inner function to process the attribute((target(...))), take an argument and
3995 set the current options from the argument. If we have a list, recursively go
3999 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4000 struct gcc_options
*enum_opts_set
)
4005 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4006 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4007 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4008 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4009 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4025 enum ix86_opt_type type
;
4030 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4031 IX86_ATTR_ISA ("abm", OPT_mabm
),
4032 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4033 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4034 IX86_ATTR_ISA ("aes", OPT_maes
),
4035 IX86_ATTR_ISA ("avx", OPT_mavx
),
4036 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4037 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4038 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4039 IX86_ATTR_ISA ("sse", OPT_msse
),
4040 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4041 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4042 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4043 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4044 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4045 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4046 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4047 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4048 IX86_ATTR_ISA ("xop", OPT_mxop
),
4049 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4050 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4051 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4052 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4055 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4057 /* string options */
4058 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4059 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4062 IX86_ATTR_YES ("cld",
4066 IX86_ATTR_NO ("fancy-math-387",
4067 OPT_mfancy_math_387
,
4068 MASK_NO_FANCY_MATH_387
),
4070 IX86_ATTR_YES ("ieee-fp",
4074 IX86_ATTR_YES ("inline-all-stringops",
4075 OPT_minline_all_stringops
,
4076 MASK_INLINE_ALL_STRINGOPS
),
4078 IX86_ATTR_YES ("inline-stringops-dynamically",
4079 OPT_minline_stringops_dynamically
,
4080 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4082 IX86_ATTR_NO ("align-stringops",
4083 OPT_mno_align_stringops
,
4084 MASK_NO_ALIGN_STRINGOPS
),
4086 IX86_ATTR_YES ("recip",
4092 /* If this is a list, recurse to get the options. */
4093 if (TREE_CODE (args
) == TREE_LIST
)
4097 for (; args
; args
= TREE_CHAIN (args
))
4098 if (TREE_VALUE (args
)
4099 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4100 p_strings
, enum_opts_set
))
4106 else if (TREE_CODE (args
) != STRING_CST
)
4109 /* Handle multiple arguments separated by commas. */
4110 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4112 while (next_optstr
&& *next_optstr
!= '\0')
4114 char *p
= next_optstr
;
4116 char *comma
= strchr (next_optstr
, ',');
4117 const char *opt_string
;
4118 size_t len
, opt_len
;
4123 enum ix86_opt_type type
= ix86_opt_unknown
;
4129 len
= comma
- next_optstr
;
4130 next_optstr
= comma
+ 1;
4138 /* Recognize no-xxx. */
4139 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4148 /* Find the option. */
4151 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4153 type
= attrs
[i
].type
;
4154 opt_len
= attrs
[i
].len
;
4155 if (ch
== attrs
[i
].string
[0]
4156 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4159 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4162 mask
= attrs
[i
].mask
;
4163 opt_string
= attrs
[i
].string
;
4168 /* Process the option. */
4171 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4175 else if (type
== ix86_opt_isa
)
4177 struct cl_decoded_option decoded
;
4179 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4180 ix86_handle_option (&global_options
, &global_options_set
,
4181 &decoded
, input_location
);
4184 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4186 if (type
== ix86_opt_no
)
4187 opt_set_p
= !opt_set_p
;
4190 target_flags
|= mask
;
4192 target_flags
&= ~mask
;
4195 else if (type
== ix86_opt_str
)
4199 error ("option(\"%s\") was already specified", opt_string
);
4203 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4206 else if (type
== ix86_opt_enum
)
4211 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4213 set_option (&global_options
, enum_opts_set
, opt
, value
,
4214 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4218 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4230 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4233 ix86_valid_target_attribute_tree (tree args
)
4235 const char *orig_arch_string
= ix86_arch_string
;
4236 const char *orig_tune_string
= ix86_tune_string
;
4237 enum fpmath_unit orig_fpmath_set
= global_options_set
.x_ix86_fpmath
;
4238 int orig_tune_defaulted
= ix86_tune_defaulted
;
4239 int orig_arch_specified
= ix86_arch_specified
;
4240 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4243 struct cl_target_option
*def
4244 = TREE_TARGET_OPTION (target_option_default_node
);
4245 struct gcc_options enum_opts_set
;
4247 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4249 /* Process each of the options on the chain. */
4250 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
,
4254 /* If the changed options are different from the default, rerun
4255 ix86_option_override_internal, and then save the options away.
4256 The string options are are attribute options, and will be undone
4257 when we copy the save structure. */
4258 if (ix86_isa_flags
!= def
->x_ix86_isa_flags
4259 || target_flags
!= def
->x_target_flags
4260 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4261 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4262 || enum_opts_set
.x_ix86_fpmath
)
4264 /* If we are using the default tune= or arch=, undo the string assigned,
4265 and use the default. */
4266 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4267 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4268 else if (!orig_arch_specified
)
4269 ix86_arch_string
= NULL
;
4271 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4272 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4273 else if (orig_tune_defaulted
)
4274 ix86_tune_string
= NULL
;
4276 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4277 if (enum_opts_set
.x_ix86_fpmath
)
4278 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4279 else if (!TARGET_64BIT
&& TARGET_SSE
)
4281 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4282 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4285 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4286 ix86_option_override_internal (false);
4288 /* Add any builtin functions with the new isa if any. */
4289 ix86_add_new_builtins (ix86_isa_flags
);
4291 /* Save the current options unless we are validating options for
4293 t
= build_target_option_node ();
4295 ix86_arch_string
= orig_arch_string
;
4296 ix86_tune_string
= orig_tune_string
;
4297 global_options_set
.x_ix86_fpmath
= orig_fpmath_set
;
4299 /* Free up memory allocated to hold the strings */
4300 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4301 free (option_strings
[i
]);
4307 /* Hook to validate attribute((target("string"))). */
4310 ix86_valid_target_attribute_p (tree fndecl
,
4311 tree
ARG_UNUSED (name
),
4313 int ARG_UNUSED (flags
))
4315 struct cl_target_option cur_target
;
4317 tree old_optimize
= build_optimization_node ();
4318 tree new_target
, new_optimize
;
4319 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4321 /* If the function changed the optimization levels as well as setting target
4322 options, start with the optimizations specified. */
4323 if (func_optimize
&& func_optimize
!= old_optimize
)
4324 cl_optimization_restore (&global_options
,
4325 TREE_OPTIMIZATION (func_optimize
));
4327 /* The target attributes may also change some optimization flags, so update
4328 the optimization options if necessary. */
4329 cl_target_option_save (&cur_target
, &global_options
);
4330 new_target
= ix86_valid_target_attribute_tree (args
);
4331 new_optimize
= build_optimization_node ();
4338 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4340 if (old_optimize
!= new_optimize
)
4341 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4344 cl_target_option_restore (&global_options
, &cur_target
);
4346 if (old_optimize
!= new_optimize
)
4347 cl_optimization_restore (&global_options
,
4348 TREE_OPTIMIZATION (old_optimize
));
4354 /* Hook to determine if one function can safely inline another. */
4357 ix86_can_inline_p (tree caller
, tree callee
)
4360 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4361 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4363 /* If callee has no option attributes, then it is ok to inline. */
4367 /* If caller has no option attributes, but callee does then it is not ok to
4369 else if (!caller_tree
)
4374 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4375 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4377 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4378 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4380 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4381 != callee_opts
->x_ix86_isa_flags
)
4384 /* See if we have the same non-isa options. */
4385 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4388 /* See if arch, tune, etc. are the same. */
4389 else if (caller_opts
->arch
!= callee_opts
->arch
)
4392 else if (caller_opts
->tune
!= callee_opts
->tune
)
4395 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4398 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4409 /* Remember the last target of ix86_set_current_function. */
4410 static GTY(()) tree ix86_previous_fndecl
;
4412 /* Establish appropriate back-end context for processing the function
4413 FNDECL. The argument might be NULL to indicate processing at top
4414 level, outside of any function scope. */
4416 ix86_set_current_function (tree fndecl
)
4418 /* Only change the context if the function changes. This hook is called
4419 several times in the course of compiling a function, and we don't want to
4420 slow things down too much or call target_reinit when it isn't safe. */
4421 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4423 tree old_tree
= (ix86_previous_fndecl
4424 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4427 tree new_tree
= (fndecl
4428 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4431 ix86_previous_fndecl
= fndecl
;
4432 if (old_tree
== new_tree
)
4437 cl_target_option_restore (&global_options
,
4438 TREE_TARGET_OPTION (new_tree
));
4444 struct cl_target_option
*def
4445 = TREE_TARGET_OPTION (target_option_current_node
);
4447 cl_target_option_restore (&global_options
, def
);
4454 /* Return true if this goes in large data/bss. */
4457 ix86_in_large_data_p (tree exp
)
4459 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4462 /* Functions are never large data. */
4463 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4466 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4468 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4469 if (strcmp (section
, ".ldata") == 0
4470 || strcmp (section
, ".lbss") == 0)
4476 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4478 /* If this is an incomplete type with size 0, then we can't put it
4479 in data because it might be too big when completed. */
4480 if (!size
|| size
> ix86_section_threshold
)
4487 /* Switch to the appropriate section for output of DECL.
4488 DECL is either a `VAR_DECL' node or a constant of some sort.
4489 RELOC indicates whether forming the initial value of DECL requires
4490 link-time relocations. */
4492 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
4496 x86_64_elf_select_section (tree decl
, int reloc
,
4497 unsigned HOST_WIDE_INT align
)
4499 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4500 && ix86_in_large_data_p (decl
))
4502 const char *sname
= NULL
;
4503 unsigned int flags
= SECTION_WRITE
;
4504 switch (categorize_decl_for_section (decl
, reloc
))
4509 case SECCAT_DATA_REL
:
4510 sname
= ".ldata.rel";
4512 case SECCAT_DATA_REL_LOCAL
:
4513 sname
= ".ldata.rel.local";
4515 case SECCAT_DATA_REL_RO
:
4516 sname
= ".ldata.rel.ro";
4518 case SECCAT_DATA_REL_RO_LOCAL
:
4519 sname
= ".ldata.rel.ro.local";
4523 flags
|= SECTION_BSS
;
4526 case SECCAT_RODATA_MERGE_STR
:
4527 case SECCAT_RODATA_MERGE_STR_INIT
:
4528 case SECCAT_RODATA_MERGE_CONST
:
4532 case SECCAT_SRODATA
:
4539 /* We don't split these for medium model. Place them into
4540 default sections and hope for best. */
4545 /* We might get called with string constants, but get_named_section
4546 doesn't like them as they are not DECLs. Also, we need to set
4547 flags in that case. */
4549 return get_section (sname
, flags
, NULL
);
4550 return get_named_section (decl
, sname
, reloc
);
4553 return default_elf_select_section (decl
, reloc
, align
);
4556 /* Build up a unique section name, expressed as a
4557 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4558 RELOC indicates whether the initial value of EXP requires
4559 link-time relocations. */
4561 static void ATTRIBUTE_UNUSED
4562 x86_64_elf_unique_section (tree decl
, int reloc
)
4564 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4565 && ix86_in_large_data_p (decl
))
4567 const char *prefix
= NULL
;
4568 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4569 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4571 switch (categorize_decl_for_section (decl
, reloc
))
4574 case SECCAT_DATA_REL
:
4575 case SECCAT_DATA_REL_LOCAL
:
4576 case SECCAT_DATA_REL_RO
:
4577 case SECCAT_DATA_REL_RO_LOCAL
:
4578 prefix
= one_only
? ".ld" : ".ldata";
4581 prefix
= one_only
? ".lb" : ".lbss";
4584 case SECCAT_RODATA_MERGE_STR
:
4585 case SECCAT_RODATA_MERGE_STR_INIT
:
4586 case SECCAT_RODATA_MERGE_CONST
:
4587 prefix
= one_only
? ".lr" : ".lrodata";
4589 case SECCAT_SRODATA
:
4596 /* We don't split these for medium model. Place them into
4597 default sections and hope for best. */
4602 const char *name
, *linkonce
;
4605 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4606 name
= targetm
.strip_name_encoding (name
);
4608 /* If we're using one_only, then there needs to be a .gnu.linkonce
4609 prefix to the section name. */
4610 linkonce
= one_only
? ".gnu.linkonce" : "";
4612 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4614 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4618 default_unique_section (decl
, reloc
);
4621 #ifdef COMMON_ASM_OP
4622 /* This says how to output assembler code to declare an
4623 uninitialized external linkage data object.
4625 For medium model x86-64 we need to use .largecomm opcode for
4628 x86_elf_aligned_common (FILE *file
,
4629 const char *name
, unsigned HOST_WIDE_INT size
,
4632 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4633 && size
> (unsigned int)ix86_section_threshold
)
4634 fputs (".largecomm\t", file
);
4636 fputs (COMMON_ASM_OP
, file
);
4637 assemble_name (file
, name
);
4638 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4639 size
, align
/ BITS_PER_UNIT
);
4643 /* Utility function for targets to use in implementing
4644 ASM_OUTPUT_ALIGNED_BSS. */
4647 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4648 const char *name
, unsigned HOST_WIDE_INT size
,
4651 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4652 && size
> (unsigned int)ix86_section_threshold
)
4653 switch_to_section (get_named_section (decl
, ".lbss", 0));
4655 switch_to_section (bss_section
);
4656 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4657 #ifdef ASM_DECLARE_OBJECT_NAME
4658 last_assemble_variable_decl
= decl
;
4659 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4661 /* Standard thing is just output label for the object. */
4662 ASM_OUTPUT_LABEL (file
, name
);
4663 #endif /* ASM_DECLARE_OBJECT_NAME */
4664 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
4667 /* Decide whether we must probe the stack before any space allocation
4668 on this target. It's essentially TARGET_STACK_PROBE except when
4669 -fstack-check causes the stack to be already probed differently. */
4672 ix86_target_stack_probe (void)
4674 /* Do not probe the stack twice if static stack checking is enabled. */
4675 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
4678 return TARGET_STACK_PROBE
;
4681 /* Decide whether we can make a sibling call to a function. DECL is the
4682 declaration of the function being targeted by the call and EXP is the
4683 CALL_EXPR representing the call. */
4686 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
4688 tree type
, decl_or_type
;
4691 /* If we are generating position-independent code, we cannot sibcall
4692 optimize any indirect call, or a direct call to a global function,
4693 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4697 && (!decl
|| !targetm
.binds_local_p (decl
)))
4700 /* If we need to align the outgoing stack, then sibcalling would
4701 unalign the stack, which may break the called function. */
4702 if (ix86_minimum_incoming_stack_boundary (true)
4703 < PREFERRED_STACK_BOUNDARY
)
4708 decl_or_type
= decl
;
4709 type
= TREE_TYPE (decl
);
4713 /* We're looking at the CALL_EXPR, we need the type of the function. */
4714 type
= CALL_EXPR_FN (exp
); /* pointer expression */
4715 type
= TREE_TYPE (type
); /* pointer type */
4716 type
= TREE_TYPE (type
); /* function type */
4717 decl_or_type
= type
;
4720 /* Check that the return value locations are the same. Like
4721 if we are returning floats on the 80387 register stack, we cannot
4722 make a sibcall from a function that doesn't return a float to a
4723 function that does or, conversely, from a function that does return
4724 a float to a function that doesn't; the necessary stack adjustment
4725 would not be executed. This is also the place we notice
4726 differences in the return value ABI. Note that it is ok for one
4727 of the functions to have void return type as long as the return
4728 value of the other is passed in a register. */
4729 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
4730 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
4732 if (STACK_REG_P (a
) || STACK_REG_P (b
))
4734 if (!rtx_equal_p (a
, b
))
4737 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
4739 /* Disable sibcall if we need to generate vzeroupper after
4741 if (TARGET_VZEROUPPER
4742 && cfun
->machine
->callee_return_avx256_p
4743 && !cfun
->machine
->caller_return_avx256_p
)
4746 else if (!rtx_equal_p (a
, b
))
4751 /* The SYSV ABI has more call-clobbered registers;
4752 disallow sibcalls from MS to SYSV. */
4753 if (cfun
->machine
->call_abi
== MS_ABI
4754 && ix86_function_type_abi (type
) == SYSV_ABI
)
4759 /* If this call is indirect, we'll need to be able to use a
4760 call-clobbered register for the address of the target function.
4761 Make sure that all such registers are not used for passing
4762 parameters. Note that DLLIMPORT functions are indirect. */
4764 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
4766 if (ix86_function_regparm (type
, NULL
) >= 3)
4768 /* ??? Need to count the actual number of registers to be used,
4769 not the possible number of registers. Fix later. */
4775 /* Otherwise okay. That also includes certain types of indirect calls. */
4779 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4780 and "sseregparm" calling convention attributes;
4781 arguments as in struct attribute_spec.handler. */
4784 ix86_handle_cconv_attribute (tree
*node
, tree name
,
4786 int flags ATTRIBUTE_UNUSED
,
4789 if (TREE_CODE (*node
) != FUNCTION_TYPE
4790 && TREE_CODE (*node
) != METHOD_TYPE
4791 && TREE_CODE (*node
) != FIELD_DECL
4792 && TREE_CODE (*node
) != TYPE_DECL
)
4794 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4796 *no_add_attrs
= true;
4800 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4801 if (is_attribute_p ("regparm", name
))
4805 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4807 error ("fastcall and regparm attributes are not compatible");
4810 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4812 error ("regparam and thiscall attributes are not compatible");
4815 cst
= TREE_VALUE (args
);
4816 if (TREE_CODE (cst
) != INTEGER_CST
)
4818 warning (OPT_Wattributes
,
4819 "%qE attribute requires an integer constant argument",
4821 *no_add_attrs
= true;
4823 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
4825 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
4827 *no_add_attrs
= true;
4835 /* Do not warn when emulating the MS ABI. */
4836 if ((TREE_CODE (*node
) != FUNCTION_TYPE
4837 && TREE_CODE (*node
) != METHOD_TYPE
)
4838 || ix86_function_type_abi (*node
) != MS_ABI
)
4839 warning (OPT_Wattributes
, "%qE attribute ignored",
4841 *no_add_attrs
= true;
4845 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4846 if (is_attribute_p ("fastcall", name
))
4848 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4850 error ("fastcall and cdecl attributes are not compatible");
4852 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
4854 error ("fastcall and stdcall attributes are not compatible");
4856 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
4858 error ("fastcall and regparm attributes are not compatible");
4860 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4862 error ("fastcall and thiscall attributes are not compatible");
4866 /* Can combine stdcall with fastcall (redundant), regparm and
4868 else if (is_attribute_p ("stdcall", name
))
4870 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4872 error ("stdcall and cdecl attributes are not compatible");
4874 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4876 error ("stdcall and fastcall attributes are not compatible");
4878 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4880 error ("stdcall and thiscall attributes are not compatible");
4884 /* Can combine cdecl with regparm and sseregparm. */
4885 else if (is_attribute_p ("cdecl", name
))
4887 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
4889 error ("stdcall and cdecl attributes are not compatible");
4891 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4893 error ("fastcall and cdecl attributes are not compatible");
4895 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4897 error ("cdecl and thiscall attributes are not compatible");
4900 else if (is_attribute_p ("thiscall", name
))
4902 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
4903 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
4905 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
4907 error ("stdcall and thiscall attributes are not compatible");
4909 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4911 error ("fastcall and thiscall attributes are not compatible");
4913 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4915 error ("cdecl and thiscall attributes are not compatible");
4919 /* Can combine sseregparm with all attributes. */
4924 /* This function determines from TYPE the calling-convention. */
4927 ix86_get_callcvt (const_tree type
)
4929 unsigned int ret
= 0;
4934 return IX86_CALLCVT_CDECL
;
4936 attrs
= TYPE_ATTRIBUTES (type
);
4937 if (attrs
!= NULL_TREE
)
4939 if (lookup_attribute ("cdecl", attrs
))
4940 ret
|= IX86_CALLCVT_CDECL
;
4941 else if (lookup_attribute ("stdcall", attrs
))
4942 ret
|= IX86_CALLCVT_STDCALL
;
4943 else if (lookup_attribute ("fastcall", attrs
))
4944 ret
|= IX86_CALLCVT_FASTCALL
;
4945 else if (lookup_attribute ("thiscall", attrs
))
4946 ret
|= IX86_CALLCVT_THISCALL
;
4948 /* Regparam isn't allowed for thiscall and fastcall. */
4949 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
4951 if (lookup_attribute ("regparm", attrs
))
4952 ret
|= IX86_CALLCVT_REGPARM
;
4953 if (lookup_attribute ("sseregparm", attrs
))
4954 ret
|= IX86_CALLCVT_SSEREGPARM
;
4957 if (IX86_BASE_CALLCVT(ret
) != 0)
4961 is_stdarg
= stdarg_p (type
);
4962 if (TARGET_RTD
&& !is_stdarg
)
4963 return IX86_CALLCVT_STDCALL
| ret
;
4967 || TREE_CODE (type
) != METHOD_TYPE
4968 || ix86_function_type_abi (type
) != MS_ABI
)
4969 return IX86_CALLCVT_CDECL
| ret
;
4971 return IX86_CALLCVT_THISCALL
;
4974 /* Return 0 if the attributes for two types are incompatible, 1 if they
4975 are compatible, and 2 if they are nearly compatible (which causes a
4976 warning to be generated). */
4979 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
4981 unsigned int ccvt1
, ccvt2
;
4983 if (TREE_CODE (type1
) != FUNCTION_TYPE
4984 && TREE_CODE (type1
) != METHOD_TYPE
)
4987 ccvt1
= ix86_get_callcvt (type1
);
4988 ccvt2
= ix86_get_callcvt (type2
);
4991 if (ix86_function_regparm (type1
, NULL
)
4992 != ix86_function_regparm (type2
, NULL
))
4998 /* Return the regparm value for a function with the indicated TYPE and DECL.
4999 DECL may be NULL when calling function indirectly
5000 or considering a libcall. */
5003 ix86_function_regparm (const_tree type
, const_tree decl
)
5010 return (ix86_function_type_abi (type
) == SYSV_ABI
5011 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5012 ccvt
= ix86_get_callcvt (type
);
5013 regparm
= ix86_regparm
;
5015 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5017 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5020 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5024 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5026 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5029 /* Use register calling convention for local functions when possible. */
5031 && TREE_CODE (decl
) == FUNCTION_DECL
5033 && !(profile_flag
&& !flag_fentry
))
5035 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5036 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5037 if (i
&& i
->local
&& i
->can_change_signature
)
5039 int local_regparm
, globals
= 0, regno
;
5041 /* Make sure no regparm register is taken by a
5042 fixed register variable. */
5043 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5044 if (fixed_regs
[local_regparm
])
5047 /* We don't want to use regparm(3) for nested functions as
5048 these use a static chain pointer in the third argument. */
5049 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5052 /* In 32-bit mode save a register for the split stack. */
5053 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5056 /* Each fixed register usage increases register pressure,
5057 so less registers should be used for argument passing.
5058 This functionality can be overriden by an explicit
5060 for (regno
= 0; regno
<= DI_REG
; regno
++)
5061 if (fixed_regs
[regno
])
5065 = globals
< local_regparm
? local_regparm
- globals
: 0;
5067 if (local_regparm
> regparm
)
5068 regparm
= local_regparm
;
5075 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5076 DFmode (2) arguments in SSE registers for a function with the
5077 indicated TYPE and DECL. DECL may be NULL when calling function
5078 indirectly or considering a libcall. Otherwise return 0. */
5081 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5083 gcc_assert (!TARGET_64BIT
);
5085 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5086 by the sseregparm attribute. */
5087 if (TARGET_SSEREGPARM
5088 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5095 error ("calling %qD with attribute sseregparm without "
5096 "SSE/SSE2 enabled", decl
);
5098 error ("calling %qT with attribute sseregparm without "
5099 "SSE/SSE2 enabled", type
);
5107 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5108 (and DFmode for SSE2) arguments in SSE registers. */
5109 if (decl
&& TARGET_SSE_MATH
&& optimize
5110 && !(profile_flag
&& !flag_fentry
))
5112 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5113 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5114 if (i
&& i
->local
&& i
->can_change_signature
)
5115 return TARGET_SSE2
? 2 : 1;
5121 /* Return true if EAX is live at the start of the function. Used by
5122 ix86_expand_prologue to determine if we need special help before
5123 calling allocate_stack_worker. */
5126 ix86_eax_live_at_start_p (void)
5128 /* Cheat. Don't bother working forward from ix86_function_regparm
5129 to the function type to whether an actual argument is located in
5130 eax. Instead just look at cfg info, which is still close enough
5131 to correct at this point. This gives false positives for broken
5132 functions that might use uninitialized data that happens to be
5133 allocated in eax, but who cares? */
5134 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5138 ix86_keep_aggregate_return_pointer (tree fntype
)
5144 attr
= lookup_attribute ("callee_pop_aggregate_return",
5145 TYPE_ATTRIBUTES (fntype
));
5147 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5149 /* For 32-bit MS-ABI the default is to keep aggregate
5151 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5154 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5157 /* Value is the number of bytes of arguments automatically
5158 popped when returning from a subroutine call.
5159 FUNDECL is the declaration node of the function (as a tree),
5160 FUNTYPE is the data type of the function (as a tree),
5161 or for a library call it is an identifier node for the subroutine name.
5162 SIZE is the number of bytes of arguments passed on the stack.
5164 On the 80386, the RTD insn may be used to pop them if the number
5165 of args is fixed, but if the number is variable then the caller
5166 must pop them all. RTD can't be used for library calls now
5167 because the library is compiled with the Unix compiler.
5168 Use of RTD is a selectable option, since it is incompatible with
5169 standard Unix calling sequences. If the option is not selected,
5170 the caller must always pop the args.
5172 The attribute stdcall is equivalent to RTD on a per module basis. */
5175 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5179 /* None of the 64-bit ABIs pop arguments. */
5183 ccvt
= ix86_get_callcvt (funtype
);
5185 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5186 | IX86_CALLCVT_THISCALL
)) != 0
5187 && ! stdarg_p (funtype
))
5190 /* Lose any fake structure return argument if it is passed on the stack. */
5191 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5192 && !ix86_keep_aggregate_return_pointer (funtype
))
5194 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5196 return GET_MODE_SIZE (Pmode
);
5202 /* Argument support functions. */
5204 /* Return true when register may be used to pass function parameters. */
5206 ix86_function_arg_regno_p (int regno
)
5209 const int *parm_regs
;
5214 return (regno
< REGPARM_MAX
5215 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5217 return (regno
< REGPARM_MAX
5218 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5219 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5220 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5221 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5226 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
5231 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5232 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5236 /* TODO: The function should depend on current function ABI but
5237 builtins.c would need updating then. Therefore we use the
5240 /* RAX is used as hidden argument to va_arg functions. */
5241 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5244 if (ix86_abi
== MS_ABI
)
5245 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5247 parm_regs
= x86_64_int_parameter_registers
;
5248 for (i
= 0; i
< (ix86_abi
== MS_ABI
5249 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5250 if (regno
== parm_regs
[i
])
5255 /* Return if we do not know how to pass TYPE solely in registers. */
5258 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5260 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5263 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5264 The layout_type routine is crafty and tries to trick us into passing
5265 currently unsupported vector types on the stack by using TImode. */
5266 return (!TARGET_64BIT
&& mode
== TImode
5267 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5270 /* It returns the size, in bytes, of the area reserved for arguments passed
5271 in registers for the function represented by fndecl dependent to the used
5274 ix86_reg_parm_stack_space (const_tree fndecl
)
5276 enum calling_abi call_abi
= SYSV_ABI
;
5277 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5278 call_abi
= ix86_function_abi (fndecl
);
5280 call_abi
= ix86_function_type_abi (fndecl
);
5281 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5286 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5289 ix86_function_type_abi (const_tree fntype
)
5291 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5293 enum calling_abi abi
= ix86_abi
;
5294 if (abi
== SYSV_ABI
)
5296 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5299 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5307 ix86_function_ms_hook_prologue (const_tree fn
)
5309 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5311 if (decl_function_context (fn
) != NULL_TREE
)
5312 error_at (DECL_SOURCE_LOCATION (fn
),
5313 "ms_hook_prologue is not compatible with nested function");
5320 static enum calling_abi
5321 ix86_function_abi (const_tree fndecl
)
5325 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5328 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5331 ix86_cfun_abi (void)
5335 return cfun
->machine
->call_abi
;
5338 /* Write the extra assembler code needed to declare a function properly. */
5341 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5344 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5348 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5349 unsigned int filler_cc
= 0xcccccccc;
5351 for (i
= 0; i
< filler_count
; i
+= 4)
5352 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5355 #ifdef SUBTARGET_ASM_UNWIND_INIT
5356 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5359 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5361 /* Output magic byte marker, if hot-patch attribute is set. */
5366 /* leaq [%rsp + 0], %rsp */
5367 asm_fprintf (asm_out_file
, ASM_BYTE
5368 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5372 /* movl.s %edi, %edi
5374 movl.s %esp, %ebp */
5375 asm_fprintf (asm_out_file
, ASM_BYTE
5376 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5382 extern void init_regs (void);
5384 /* Implementation of call abi switching target hook. Specific to FNDECL
5385 the specific call register sets are set. See also
5386 ix86_conditional_register_usage for more details. */
5388 ix86_call_abi_override (const_tree fndecl
)
5390 if (fndecl
== NULL_TREE
)
5391 cfun
->machine
->call_abi
= ix86_abi
;
5393 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5396 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5397 expensive re-initialization of init_regs each time we switch function context
5398 since this is needed only during RTL expansion. */
5400 ix86_maybe_switch_abi (void)
5403 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5407 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5408 for a call to a function whose data type is FNTYPE.
5409 For a library call, FNTYPE is 0. */
5412 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5413 tree fntype
, /* tree ptr for function decl */
5414 rtx libname
, /* SYMBOL_REF of library name or 0 */
5418 struct cgraph_local_info
*i
;
5421 memset (cum
, 0, sizeof (*cum
));
5423 /* Initialize for the current callee. */
5426 cfun
->machine
->callee_pass_avx256_p
= false;
5427 cfun
->machine
->callee_return_avx256_p
= false;
5432 i
= cgraph_local_info (fndecl
);
5433 cum
->call_abi
= ix86_function_abi (fndecl
);
5434 fnret_type
= TREE_TYPE (TREE_TYPE (fndecl
));
5439 cum
->call_abi
= ix86_function_type_abi (fntype
);
5441 fnret_type
= TREE_TYPE (fntype
);
5446 if (TARGET_VZEROUPPER
&& fnret_type
)
5448 rtx fnret_value
= ix86_function_value (fnret_type
, fntype
,
5450 if (function_pass_avx256_p (fnret_value
))
5452 /* The return value of this function uses 256bit AVX modes. */
5454 cfun
->machine
->callee_return_avx256_p
= true;
5456 cfun
->machine
->caller_return_avx256_p
= true;
5460 cum
->caller
= caller
;
5462 /* Set up the number of registers to use for passing arguments. */
5464 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5465 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5466 "or subtarget optimization implying it");
5467 cum
->nregs
= ix86_regparm
;
5470 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5471 ? X86_64_REGPARM_MAX
5472 : X86_64_MS_REGPARM_MAX
);
5476 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5479 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5480 ? X86_64_SSE_REGPARM_MAX
5481 : X86_64_MS_SSE_REGPARM_MAX
);
5485 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5486 cum
->warn_avx
= true;
5487 cum
->warn_sse
= true;
5488 cum
->warn_mmx
= true;
5490 /* Because type might mismatch in between caller and callee, we need to
5491 use actual type of function for local calls.
5492 FIXME: cgraph_analyze can be told to actually record if function uses
5493 va_start so for local functions maybe_vaarg can be made aggressive
5495 FIXME: once typesytem is fixed, we won't need this code anymore. */
5496 if (i
&& i
->local
&& i
->can_change_signature
)
5497 fntype
= TREE_TYPE (fndecl
);
5498 cum
->maybe_vaarg
= (fntype
5499 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5504 /* If there are variable arguments, then we won't pass anything
5505 in registers in 32-bit mode. */
5506 if (stdarg_p (fntype
))
5517 /* Use ecx and edx registers if function has fastcall attribute,
5518 else look for regparm information. */
5521 unsigned int ccvt
= ix86_get_callcvt (fntype
);
5522 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5525 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5527 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5533 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5536 /* Set up the number of SSE registers used for passing SFmode
5537 and DFmode arguments. Warn for mismatching ABI. */
5538 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5542 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5543 But in the case of vector types, it is some vector mode.
5545 When we have only some of our vector isa extensions enabled, then there
5546 are some modes for which vector_mode_supported_p is false. For these
5547 modes, the generic vector support in gcc will choose some non-vector mode
5548 in order to implement the type. By computing the natural mode, we'll
5549 select the proper ABI location for the operand and not depend on whatever
5550 the middle-end decides to do with these vector types.
5552 The midde-end can't deal with the vector types > 16 bytes. In this
5553 case, we return the original mode and warn ABI change if CUM isn't
5556 static enum machine_mode
5557 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
5559 enum machine_mode mode
= TYPE_MODE (type
);
5561 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
5563 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5564 if ((size
== 8 || size
== 16 || size
== 32)
5565 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5566 && TYPE_VECTOR_SUBPARTS (type
) > 1)
5568 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
5570 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
5571 mode
= MIN_MODE_VECTOR_FLOAT
;
5573 mode
= MIN_MODE_VECTOR_INT
;
5575 /* Get the mode which has this inner mode and number of units. */
5576 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
5577 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
5578 && GET_MODE_INNER (mode
) == innermode
)
5580 if (size
== 32 && !TARGET_AVX
)
5582 static bool warnedavx
;
5589 warning (0, "AVX vector argument without AVX "
5590 "enabled changes the ABI");
5592 return TYPE_MODE (type
);
5605 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5606 this may not agree with the mode that the type system has chosen for the
5607 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5608 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5611 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
5616 if (orig_mode
!= BLKmode
)
5617 tmp
= gen_rtx_REG (orig_mode
, regno
);
5620 tmp
= gen_rtx_REG (mode
, regno
);
5621 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
5622 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
5628 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5629 of this code is to classify each 8bytes of incoming argument by the register
5630 class and assign registers accordingly. */
5632 /* Return the union class of CLASS1 and CLASS2.
5633 See the x86-64 PS ABI for details. */
5635 static enum x86_64_reg_class
5636 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
5638 /* Rule #1: If both classes are equal, this is the resulting class. */
5639 if (class1
== class2
)
5642 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5644 if (class1
== X86_64_NO_CLASS
)
5646 if (class2
== X86_64_NO_CLASS
)
5649 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5650 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
5651 return X86_64_MEMORY_CLASS
;
5653 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5654 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
5655 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
5656 return X86_64_INTEGERSI_CLASS
;
5657 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
5658 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
5659 return X86_64_INTEGER_CLASS
;
5661 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5663 if (class1
== X86_64_X87_CLASS
5664 || class1
== X86_64_X87UP_CLASS
5665 || class1
== X86_64_COMPLEX_X87_CLASS
5666 || class2
== X86_64_X87_CLASS
5667 || class2
== X86_64_X87UP_CLASS
5668 || class2
== X86_64_COMPLEX_X87_CLASS
)
5669 return X86_64_MEMORY_CLASS
;
5671 /* Rule #6: Otherwise class SSE is used. */
5672 return X86_64_SSE_CLASS
;
5675 /* Classify the argument of type TYPE and mode MODE.
5676 CLASSES will be filled by the register class used to pass each word
5677 of the operand. The number of words is returned. In case the parameter
5678 should be passed in memory, 0 is returned. As a special case for zero
5679 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5681 BIT_OFFSET is used internally for handling records and specifies offset
5682 of the offset in bits modulo 256 to avoid overflow cases.
5684 See the x86-64 PS ABI for details.
5688 classify_argument (enum machine_mode mode
, const_tree type
,
5689 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
5691 HOST_WIDE_INT bytes
=
5692 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
5693 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5695 /* Variable sized entities are always passed/returned in memory. */
5699 if (mode
!= VOIDmode
5700 && targetm
.calls
.must_pass_in_stack (mode
, type
))
5703 if (type
&& AGGREGATE_TYPE_P (type
))
5707 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
5709 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5713 for (i
= 0; i
< words
; i
++)
5714 classes
[i
] = X86_64_NO_CLASS
;
5716 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5717 signalize memory class, so handle it as special case. */
5720 classes
[0] = X86_64_NO_CLASS
;
5724 /* Classify each field of record and merge classes. */
5725 switch (TREE_CODE (type
))
5728 /* And now merge the fields of structure. */
5729 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5731 if (TREE_CODE (field
) == FIELD_DECL
)
5735 if (TREE_TYPE (field
) == error_mark_node
)
5738 /* Bitfields are always classified as integer. Handle them
5739 early, since later code would consider them to be
5740 misaligned integers. */
5741 if (DECL_BIT_FIELD (field
))
5743 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
5744 i
< ((int_bit_position (field
) + (bit_offset
% 64))
5745 + tree_low_cst (DECL_SIZE (field
), 0)
5748 merge_classes (X86_64_INTEGER_CLASS
,
5755 type
= TREE_TYPE (field
);
5757 /* Flexible array member is ignored. */
5758 if (TYPE_MODE (type
) == BLKmode
5759 && TREE_CODE (type
) == ARRAY_TYPE
5760 && TYPE_SIZE (type
) == NULL_TREE
5761 && TYPE_DOMAIN (type
) != NULL_TREE
5762 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
5767 if (!warned
&& warn_psabi
)
5770 inform (input_location
,
5771 "the ABI of passing struct with"
5772 " a flexible array member has"
5773 " changed in GCC 4.4");
5777 num
= classify_argument (TYPE_MODE (type
), type
,
5779 (int_bit_position (field
)
5780 + bit_offset
) % 256);
5783 pos
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
5784 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
5786 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
5793 /* Arrays are handled as small records. */
5796 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
5797 TREE_TYPE (type
), subclasses
, bit_offset
);
5801 /* The partial classes are now full classes. */
5802 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
5803 subclasses
[0] = X86_64_SSE_CLASS
;
5804 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
5805 && !((bit_offset
% 64) == 0 && bytes
== 4))
5806 subclasses
[0] = X86_64_INTEGER_CLASS
;
5808 for (i
= 0; i
< words
; i
++)
5809 classes
[i
] = subclasses
[i
% num
];
5814 case QUAL_UNION_TYPE
:
5815 /* Unions are similar to RECORD_TYPE but offset is always 0.
5817 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5819 if (TREE_CODE (field
) == FIELD_DECL
)
5823 if (TREE_TYPE (field
) == error_mark_node
)
5826 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
5827 TREE_TYPE (field
), subclasses
,
5831 for (i
= 0; i
< num
; i
++)
5832 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
5843 /* When size > 16 bytes, if the first one isn't
5844 X86_64_SSE_CLASS or any other ones aren't
5845 X86_64_SSEUP_CLASS, everything should be passed in
5847 if (classes
[0] != X86_64_SSE_CLASS
)
5850 for (i
= 1; i
< words
; i
++)
5851 if (classes
[i
] != X86_64_SSEUP_CLASS
)
5855 /* Final merger cleanup. */
5856 for (i
= 0; i
< words
; i
++)
5858 /* If one class is MEMORY, everything should be passed in
5860 if (classes
[i
] == X86_64_MEMORY_CLASS
)
5863 /* The X86_64_SSEUP_CLASS should be always preceded by
5864 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5865 if (classes
[i
] == X86_64_SSEUP_CLASS
5866 && classes
[i
- 1] != X86_64_SSE_CLASS
5867 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
5869 /* The first one should never be X86_64_SSEUP_CLASS. */
5870 gcc_assert (i
!= 0);
5871 classes
[i
] = X86_64_SSE_CLASS
;
5874 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5875 everything should be passed in memory. */
5876 if (classes
[i
] == X86_64_X87UP_CLASS
5877 && (classes
[i
- 1] != X86_64_X87_CLASS
))
5881 /* The first one should never be X86_64_X87UP_CLASS. */
5882 gcc_assert (i
!= 0);
5883 if (!warned
&& warn_psabi
)
5886 inform (input_location
,
5887 "the ABI of passing union with long double"
5888 " has changed in GCC 4.4");
5896 /* Compute alignment needed. We align all types to natural boundaries with
5897 exception of XFmode that is aligned to 64bits. */
5898 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
5900 int mode_alignment
= GET_MODE_BITSIZE (mode
);
5903 mode_alignment
= 128;
5904 else if (mode
== XCmode
)
5905 mode_alignment
= 256;
5906 if (COMPLEX_MODE_P (mode
))
5907 mode_alignment
/= 2;
5908 /* Misaligned fields are always returned in memory. */
5909 if (bit_offset
% mode_alignment
)
5913 /* for V1xx modes, just use the base mode */
5914 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
5915 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
5916 mode
= GET_MODE_INNER (mode
);
5918 /* Classification of atomic types. */
5923 classes
[0] = X86_64_SSE_CLASS
;
5926 classes
[0] = X86_64_SSE_CLASS
;
5927 classes
[1] = X86_64_SSEUP_CLASS
;
5937 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
5941 classes
[0] = X86_64_INTEGERSI_CLASS
;
5944 else if (size
<= 64)
5946 classes
[0] = X86_64_INTEGER_CLASS
;
5949 else if (size
<= 64+32)
5951 classes
[0] = X86_64_INTEGER_CLASS
;
5952 classes
[1] = X86_64_INTEGERSI_CLASS
;
5955 else if (size
<= 64+64)
5957 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
5965 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
5969 /* OImode shouldn't be used directly. */
5974 if (!(bit_offset
% 64))
5975 classes
[0] = X86_64_SSESF_CLASS
;
5977 classes
[0] = X86_64_SSE_CLASS
;
5980 classes
[0] = X86_64_SSEDF_CLASS
;
5983 classes
[0] = X86_64_X87_CLASS
;
5984 classes
[1] = X86_64_X87UP_CLASS
;
5987 classes
[0] = X86_64_SSE_CLASS
;
5988 classes
[1] = X86_64_SSEUP_CLASS
;
5991 classes
[0] = X86_64_SSE_CLASS
;
5992 if (!(bit_offset
% 64))
5998 if (!warned
&& warn_psabi
)
6001 inform (input_location
,
6002 "the ABI of passing structure with complex float"
6003 " member has changed in GCC 4.4");
6005 classes
[1] = X86_64_SSESF_CLASS
;
6009 classes
[0] = X86_64_SSEDF_CLASS
;
6010 classes
[1] = X86_64_SSEDF_CLASS
;
6013 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6016 /* This modes is larger than 16 bytes. */
6024 classes
[0] = X86_64_SSE_CLASS
;
6025 classes
[1] = X86_64_SSEUP_CLASS
;
6026 classes
[2] = X86_64_SSEUP_CLASS
;
6027 classes
[3] = X86_64_SSEUP_CLASS
;
6035 classes
[0] = X86_64_SSE_CLASS
;
6036 classes
[1] = X86_64_SSEUP_CLASS
;
6044 classes
[0] = X86_64_SSE_CLASS
;
6050 gcc_assert (VECTOR_MODE_P (mode
));
6055 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6057 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6058 classes
[0] = X86_64_INTEGERSI_CLASS
;
6060 classes
[0] = X86_64_INTEGER_CLASS
;
6061 classes
[1] = X86_64_INTEGER_CLASS
;
6062 return 1 + (bytes
> 8);
6066 /* Examine the argument and return set number of register required in each
6067 class. Return 0 iff parameter should be passed in memory. */
6069 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6070 int *int_nregs
, int *sse_nregs
)
6072 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6073 int n
= classify_argument (mode
, type
, regclass
, 0);
6079 for (n
--; n
>= 0; n
--)
6080 switch (regclass
[n
])
6082 case X86_64_INTEGER_CLASS
:
6083 case X86_64_INTEGERSI_CLASS
:
6086 case X86_64_SSE_CLASS
:
6087 case X86_64_SSESF_CLASS
:
6088 case X86_64_SSEDF_CLASS
:
6091 case X86_64_NO_CLASS
:
6092 case X86_64_SSEUP_CLASS
:
6094 case X86_64_X87_CLASS
:
6095 case X86_64_X87UP_CLASS
:
6099 case X86_64_COMPLEX_X87_CLASS
:
6100 return in_return
? 2 : 0;
6101 case X86_64_MEMORY_CLASS
:
6107 /* Construct container for the argument used by GCC interface. See
6108 FUNCTION_ARG for the detailed description. */
6111 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6112 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6113 const int *intreg
, int sse_regno
)
6115 /* The following variables hold the static issued_error state. */
6116 static bool issued_sse_arg_error
;
6117 static bool issued_sse_ret_error
;
6118 static bool issued_x87_ret_error
;
6120 enum machine_mode tmpmode
;
6122 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6123 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6127 int needed_sseregs
, needed_intregs
;
6128 rtx exp
[MAX_CLASSES
];
6131 n
= classify_argument (mode
, type
, regclass
, 0);
6134 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6137 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6140 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6141 some less clueful developer tries to use floating-point anyway. */
6142 if (needed_sseregs
&& !TARGET_SSE
)
6146 if (!issued_sse_ret_error
)
6148 error ("SSE register return with SSE disabled");
6149 issued_sse_ret_error
= true;
6152 else if (!issued_sse_arg_error
)
6154 error ("SSE register argument with SSE disabled");
6155 issued_sse_arg_error
= true;
6160 /* Likewise, error if the ABI requires us to return values in the
6161 x87 registers and the user specified -mno-80387. */
6162 if (!TARGET_80387
&& in_return
)
6163 for (i
= 0; i
< n
; i
++)
6164 if (regclass
[i
] == X86_64_X87_CLASS
6165 || regclass
[i
] == X86_64_X87UP_CLASS
6166 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6168 if (!issued_x87_ret_error
)
6170 error ("x87 register return with x87 disabled");
6171 issued_x87_ret_error
= true;
6176 /* First construct simple cases. Avoid SCmode, since we want to use
6177 single register to pass this type. */
6178 if (n
== 1 && mode
!= SCmode
)
6179 switch (regclass
[0])
6181 case X86_64_INTEGER_CLASS
:
6182 case X86_64_INTEGERSI_CLASS
:
6183 return gen_rtx_REG (mode
, intreg
[0]);
6184 case X86_64_SSE_CLASS
:
6185 case X86_64_SSESF_CLASS
:
6186 case X86_64_SSEDF_CLASS
:
6187 if (mode
!= BLKmode
)
6188 return gen_reg_or_parallel (mode
, orig_mode
,
6189 SSE_REGNO (sse_regno
));
6191 case X86_64_X87_CLASS
:
6192 case X86_64_COMPLEX_X87_CLASS
:
6193 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6194 case X86_64_NO_CLASS
:
6195 /* Zero sized array, struct or class. */
6200 if (n
== 2 && regclass
[0] == X86_64_SSE_CLASS
6201 && regclass
[1] == X86_64_SSEUP_CLASS
&& mode
!= BLKmode
)
6202 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
6204 && regclass
[0] == X86_64_SSE_CLASS
6205 && regclass
[1] == X86_64_SSEUP_CLASS
6206 && regclass
[2] == X86_64_SSEUP_CLASS
6207 && regclass
[3] == X86_64_SSEUP_CLASS
6209 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
6212 && regclass
[0] == X86_64_X87_CLASS
&& regclass
[1] == X86_64_X87UP_CLASS
)
6213 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6214 if (n
== 2 && regclass
[0] == X86_64_INTEGER_CLASS
6215 && regclass
[1] == X86_64_INTEGER_CLASS
6216 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6217 && intreg
[0] + 1 == intreg
[1])
6218 return gen_rtx_REG (mode
, intreg
[0]);
6220 /* Otherwise figure out the entries of the PARALLEL. */
6221 for (i
= 0; i
< n
; i
++)
6225 switch (regclass
[i
])
6227 case X86_64_NO_CLASS
:
6229 case X86_64_INTEGER_CLASS
:
6230 case X86_64_INTEGERSI_CLASS
:
6231 /* Merge TImodes on aligned occasions here too. */
6232 if (i
* 8 + 8 > bytes
)
6233 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6234 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6238 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6239 if (tmpmode
== BLKmode
)
6241 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
6242 gen_rtx_REG (tmpmode
, *intreg
),
6246 case X86_64_SSESF_CLASS
:
6247 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
6248 gen_rtx_REG (SFmode
,
6249 SSE_REGNO (sse_regno
)),
6253 case X86_64_SSEDF_CLASS
:
6254 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
6255 gen_rtx_REG (DFmode
,
6256 SSE_REGNO (sse_regno
)),
6260 case X86_64_SSE_CLASS
:
6268 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6278 && regclass
[1] == X86_64_SSEUP_CLASS
6279 && regclass
[2] == X86_64_SSEUP_CLASS
6280 && regclass
[3] == X86_64_SSEUP_CLASS
);
6287 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
6288 gen_rtx_REG (tmpmode
,
6289 SSE_REGNO (sse_regno
)),
6298 /* Empty aligned struct, union or class. */
6302 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6303 for (i
= 0; i
< nexps
; i
++)
6304 XVECEXP (ret
, 0, i
) = exp
[i
];
6308 /* Update the data in CUM to advance over an argument of mode MODE
6309 and data type TYPE. (TYPE is null for libcalls where that information
6310 may not be available.) */
6313 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6314 const_tree type
, HOST_WIDE_INT bytes
,
6315 HOST_WIDE_INT words
)
6331 cum
->words
+= words
;
6332 cum
->nregs
-= words
;
6333 cum
->regno
+= words
;
6335 if (cum
->nregs
<= 0)
6343 /* OImode shouldn't be used directly. */
6347 if (cum
->float_in_sse
< 2)
6350 if (cum
->float_in_sse
< 1)
6367 if (!type
|| !AGGREGATE_TYPE_P (type
))
6369 cum
->sse_words
+= words
;
6370 cum
->sse_nregs
-= 1;
6371 cum
->sse_regno
+= 1;
6372 if (cum
->sse_nregs
<= 0)
6386 if (!type
|| !AGGREGATE_TYPE_P (type
))
6388 cum
->mmx_words
+= words
;
6389 cum
->mmx_nregs
-= 1;
6390 cum
->mmx_regno
+= 1;
6391 if (cum
->mmx_nregs
<= 0)
6402 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6403 const_tree type
, HOST_WIDE_INT words
, bool named
)
6405 int int_nregs
, sse_nregs
;
6407 /* Unnamed 256bit vector mode parameters are passed on stack. */
6408 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6411 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6412 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6414 cum
->nregs
-= int_nregs
;
6415 cum
->sse_nregs
-= sse_nregs
;
6416 cum
->regno
+= int_nregs
;
6417 cum
->sse_regno
+= sse_nregs
;
6421 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6422 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6423 cum
->words
+= words
;
6428 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6429 HOST_WIDE_INT words
)
6431 /* Otherwise, this should be passed indirect. */
6432 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6434 cum
->words
+= words
;
6442 /* Update the data in CUM to advance over an argument of mode MODE and
6443 data type TYPE. (TYPE is null for libcalls where that information
6444 may not be available.) */
6447 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6448 const_tree type
, bool named
)
6450 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6451 HOST_WIDE_INT bytes
, words
;
6453 if (mode
== BLKmode
)
6454 bytes
= int_size_in_bytes (type
);
6456 bytes
= GET_MODE_SIZE (mode
);
6457 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6460 mode
= type_natural_mode (type
, NULL
);
6462 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6463 function_arg_advance_ms_64 (cum
, bytes
, words
);
6464 else if (TARGET_64BIT
)
6465 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6467 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6470 /* Define where to put the arguments to a function.
6471 Value is zero to push the argument on the stack,
6472 or a hard register in which to store the argument.
6474 MODE is the argument's machine mode.
6475 TYPE is the data type of the argument (as a tree).
6476 This is null for libcalls where that information may
6478 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6479 the preceding args and about the function being called.
6480 NAMED is nonzero if this argument is a named parameter
6481 (otherwise it is an extra parameter matching an ellipsis). */
6484 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6485 enum machine_mode orig_mode
, const_tree type
,
6486 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6488 static bool warnedsse
, warnedmmx
;
6490 /* Avoid the AL settings for the Unix64 ABI. */
6491 if (mode
== VOIDmode
)
6507 if (words
<= cum
->nregs
)
6509 int regno
= cum
->regno
;
6511 /* Fastcall allocates the first two DWORD (SImode) or
6512 smaller arguments to ECX and EDX if it isn't an
6518 || (type
&& AGGREGATE_TYPE_P (type
)))
6521 /* ECX not EAX is the first allocated register. */
6522 if (regno
== AX_REG
)
6525 return gen_rtx_REG (mode
, regno
);
6530 if (cum
->float_in_sse
< 2)
6533 if (cum
->float_in_sse
< 1)
6537 /* In 32bit, we pass TImode in xmm registers. */
6544 if (!type
|| !AGGREGATE_TYPE_P (type
))
6546 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
6549 warning (0, "SSE vector argument without SSE enabled "
6553 return gen_reg_or_parallel (mode
, orig_mode
,
6554 cum
->sse_regno
+ FIRST_SSE_REG
);
6559 /* OImode shouldn't be used directly. */
6568 if (!type
|| !AGGREGATE_TYPE_P (type
))
6571 return gen_reg_or_parallel (mode
, orig_mode
,
6572 cum
->sse_regno
+ FIRST_SSE_REG
);
6582 if (!type
|| !AGGREGATE_TYPE_P (type
))
6584 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
6587 warning (0, "MMX vector argument without MMX enabled "
6591 return gen_reg_or_parallel (mode
, orig_mode
,
6592 cum
->mmx_regno
+ FIRST_MMX_REG
);
6601 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6602 enum machine_mode orig_mode
, const_tree type
, bool named
)
6604 /* Handle a hidden AL argument containing number of registers
6605 for varargs x86-64 functions. */
6606 if (mode
== VOIDmode
)
6607 return GEN_INT (cum
->maybe_vaarg
6608 ? (cum
->sse_nregs
< 0
6609 ? X86_64_SSE_REGPARM_MAX
6624 /* Unnamed 256bit vector mode parameters are passed on stack. */
6630 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
6632 &x86_64_int_parameter_registers
[cum
->regno
],
6637 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6638 enum machine_mode orig_mode
, bool named
,
6639 HOST_WIDE_INT bytes
)
6643 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6644 We use value of -2 to specify that current function call is MSABI. */
6645 if (mode
== VOIDmode
)
6646 return GEN_INT (-2);
6648 /* If we've run out of registers, it goes on the stack. */
6649 if (cum
->nregs
== 0)
6652 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
6654 /* Only floating point modes are passed in anything but integer regs. */
6655 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
6658 regno
= cum
->regno
+ FIRST_SSE_REG
;
6663 /* Unnamed floating parameters are passed in both the
6664 SSE and integer registers. */
6665 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
6666 t2
= gen_rtx_REG (mode
, regno
);
6667 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
6668 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
6669 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
6672 /* Handle aggregated types passed in register. */
6673 if (orig_mode
== BLKmode
)
6675 if (bytes
> 0 && bytes
<= 8)
6676 mode
= (bytes
> 4 ? DImode
: SImode
);
6677 if (mode
== BLKmode
)
6681 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
6684 /* Return where to put the arguments to a function.
6685 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6687 MODE is the argument's machine mode. TYPE is the data type of the
6688 argument. It is null for libcalls where that information may not be
6689 available. CUM gives information about the preceding args and about
6690 the function being called. NAMED is nonzero if this argument is a
6691 named parameter (otherwise it is an extra parameter matching an
6695 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
6696 const_tree type
, bool named
)
6698 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6699 enum machine_mode mode
= omode
;
6700 HOST_WIDE_INT bytes
, words
;
6703 if (mode
== BLKmode
)
6704 bytes
= int_size_in_bytes (type
);
6706 bytes
= GET_MODE_SIZE (mode
);
6707 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6709 /* To simplify the code below, represent vector types with a vector mode
6710 even if MMX/SSE are not active. */
6711 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
6712 mode
= type_natural_mode (type
, cum
);
6714 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6715 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
6716 else if (TARGET_64BIT
)
6717 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
6719 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
6721 if (TARGET_VZEROUPPER
&& function_pass_avx256_p (arg
))
6723 /* This argument uses 256bit AVX modes. */
6725 cfun
->machine
->callee_pass_avx256_p
= true;
6727 cfun
->machine
->caller_pass_avx256_p
= true;
6733 /* A C expression that indicates when an argument must be passed by
6734 reference. If nonzero for an argument, a copy of that argument is
6735 made in memory and a pointer to the argument is passed instead of
6736 the argument itself. The pointer is passed in whatever way is
6737 appropriate for passing a pointer to that type. */
6740 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED
,
6741 enum machine_mode mode ATTRIBUTE_UNUSED
,
6742 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6744 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6746 /* See Windows x64 Software Convention. */
6747 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6749 int msize
= (int) GET_MODE_SIZE (mode
);
6752 /* Arrays are passed by reference. */
6753 if (TREE_CODE (type
) == ARRAY_TYPE
)
6756 if (AGGREGATE_TYPE_P (type
))
6758 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6759 are passed by reference. */
6760 msize
= int_size_in_bytes (type
);
6764 /* __m128 is passed by reference. */
6766 case 1: case 2: case 4: case 8:
6772 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
6778 /* Return true when TYPE should be 128bit aligned for 32bit argument
6779 passing ABI. XXX: This function is obsolete and is only used for
6780 checking psABI compatibility with previous versions of GCC. */
6783 ix86_compat_aligned_value_p (const_tree type
)
6785 enum machine_mode mode
= TYPE_MODE (type
);
6786 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
6790 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
6792 if (TYPE_ALIGN (type
) < 128)
6795 if (AGGREGATE_TYPE_P (type
))
6797 /* Walk the aggregates recursively. */
6798 switch (TREE_CODE (type
))
6802 case QUAL_UNION_TYPE
:
6806 /* Walk all the structure fields. */
6807 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6809 if (TREE_CODE (field
) == FIELD_DECL
6810 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
6817 /* Just for use if some languages passes arrays by value. */
6818 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
6829 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
6830 XXX: This function is obsolete and is only used for checking psABI
6831 compatibility with previous versions of GCC. */
6834 ix86_compat_function_arg_boundary (enum machine_mode mode
,
6835 const_tree type
, unsigned int align
)
6837 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6838 natural boundaries. */
6839 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
6841 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6842 make an exception for SSE modes since these require 128bit
6845 The handling here differs from field_alignment. ICC aligns MMX
6846 arguments to 4 byte boundaries, while structure fields are aligned
6847 to 8 byte boundaries. */
6850 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
6851 align
= PARM_BOUNDARY
;
6855 if (!ix86_compat_aligned_value_p (type
))
6856 align
= PARM_BOUNDARY
;
6859 if (align
> BIGGEST_ALIGNMENT
)
6860 align
= BIGGEST_ALIGNMENT
;
6864 /* Return true when TYPE should be 128bit aligned for 32bit argument
6868 ix86_contains_aligned_value_p (const_tree type
)
6870 enum machine_mode mode
= TYPE_MODE (type
);
6872 if (mode
== XFmode
|| mode
== XCmode
)
6875 if (TYPE_ALIGN (type
) < 128)
6878 if (AGGREGATE_TYPE_P (type
))
6880 /* Walk the aggregates recursively. */
6881 switch (TREE_CODE (type
))
6885 case QUAL_UNION_TYPE
:
6889 /* Walk all the structure fields. */
6890 for (field
= TYPE_FIELDS (type
);
6892 field
= DECL_CHAIN (field
))
6894 if (TREE_CODE (field
) == FIELD_DECL
6895 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
6902 /* Just for use if some languages passes arrays by value. */
6903 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
6912 return TYPE_ALIGN (type
) >= 128;
6917 /* Gives the alignment boundary, in bits, of an argument with the
6918 specified mode and type. */
6921 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
6926 /* Since the main variant type is used for call, we convert it to
6927 the main variant type. */
6928 type
= TYPE_MAIN_VARIANT (type
);
6929 align
= TYPE_ALIGN (type
);
6932 align
= GET_MODE_ALIGNMENT (mode
);
6933 if (align
< PARM_BOUNDARY
)
6934 align
= PARM_BOUNDARY
;
6938 unsigned int saved_align
= align
;
6942 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
6945 if (mode
== XFmode
|| mode
== XCmode
)
6946 align
= PARM_BOUNDARY
;
6948 else if (!ix86_contains_aligned_value_p (type
))
6949 align
= PARM_BOUNDARY
;
6952 align
= PARM_BOUNDARY
;
6957 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
6961 inform (input_location
,
6962 "The ABI for passing parameters with %d-byte"
6963 " alignment has changed in GCC 4.6",
6964 align
/ BITS_PER_UNIT
);
6971 /* Return true if N is a possible register number of function value. */
6974 ix86_function_value_regno_p (const unsigned int regno
)
6981 case FIRST_FLOAT_REG
:
6982 /* TODO: The function should depend on current function ABI but
6983 builtins.c would need updating then. Therefore we use the
6985 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
6987 return TARGET_FLOAT_RETURNS_IN_80387
;
6993 if (TARGET_MACHO
|| TARGET_64BIT
)
7001 /* Define how to find the value returned by a function.
7002 VALTYPE is the data type of the value (as a tree).
7003 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7004 otherwise, FUNC is 0. */
7007 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7008 const_tree fntype
, const_tree fn
)
7012 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7013 we normally prevent this case when mmx is not available. However
7014 some ABIs may require the result to be returned like DImode. */
7015 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7016 regno
= TARGET_MMX
? FIRST_MMX_REG
: 0;
7018 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7019 we prevent this case when sse is not available. However some ABIs
7020 may require the result to be returned like integer TImode. */
7021 else if (mode
== TImode
7022 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7023 regno
= TARGET_SSE
? FIRST_SSE_REG
: 0;
7025 /* 32-byte vector modes in %ymm0. */
7026 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7027 regno
= TARGET_AVX
? FIRST_SSE_REG
: 0;
7029 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7030 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7031 regno
= FIRST_FLOAT_REG
;
7033 /* Most things go in %eax. */
7036 /* Override FP return register with %xmm0 for local functions when
7037 SSE math is enabled or for functions with sseregparm attribute. */
7038 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7040 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7041 if ((sse_level
>= 1 && mode
== SFmode
)
7042 || (sse_level
== 2 && mode
== DFmode
))
7043 regno
= FIRST_SSE_REG
;
7046 /* OImode shouldn't be used directly. */
7047 gcc_assert (mode
!= OImode
);
7049 return gen_rtx_REG (orig_mode
, regno
);
7053 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7058 /* Handle libcalls, which don't provide a type node. */
7059 if (valtype
== NULL
)
7071 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
7074 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
7078 return gen_rtx_REG (mode
, AX_REG
);
7081 else if (POINTER_TYPE_P (valtype
))
7083 /* Pointers are always returned in Pmode. */
7087 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7088 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7089 x86_64_int_return_registers
, 0);
7091 /* For zero sized structures, construct_container returns NULL, but we
7092 need to keep rest of compiler happy by returning meaningful value. */
7094 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7100 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
7102 unsigned int regno
= AX_REG
;
7106 switch (GET_MODE_SIZE (mode
))
7109 if((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7110 && !COMPLEX_MODE_P (mode
))
7111 regno
= FIRST_SSE_REG
;
7115 if (mode
== SFmode
|| mode
== DFmode
)
7116 regno
= FIRST_SSE_REG
;
7122 return gen_rtx_REG (orig_mode
, regno
);
7126 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7127 enum machine_mode orig_mode
, enum machine_mode mode
)
7129 const_tree fn
, fntype
;
7132 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7133 fn
= fntype_or_decl
;
7134 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7136 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7137 return function_value_ms_64 (orig_mode
, mode
);
7138 else if (TARGET_64BIT
)
7139 return function_value_64 (orig_mode
, mode
, valtype
);
7141 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7145 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7146 bool outgoing ATTRIBUTE_UNUSED
)
7148 enum machine_mode mode
, orig_mode
;
7150 orig_mode
= TYPE_MODE (valtype
);
7151 mode
= type_natural_mode (valtype
, NULL
);
7152 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7155 /* Pointer function arguments and return values are promoted to Pmode. */
7157 static enum machine_mode
7158 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7159 int *punsignedp
, const_tree fntype
,
7162 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7164 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7167 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7172 ix86_libcall_value (enum machine_mode mode
)
7174 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7177 /* Return true iff type is returned in memory. */
7179 static bool ATTRIBUTE_UNUSED
7180 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7184 if (mode
== BLKmode
)
7187 size
= int_size_in_bytes (type
);
7189 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7192 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7194 /* User-created vectors small enough to fit in EAX. */
7198 /* MMX/3dNow values are returned in MM0,
7199 except when it doesn't exits or the ABI prescribes otherwise. */
7201 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7203 /* SSE values are returned in XMM0, except when it doesn't exist. */
7207 /* AVX values are returned in YMM0, except when it doesn't exist. */
7218 /* OImode shouldn't be used directly. */
7219 gcc_assert (mode
!= OImode
);
7224 static bool ATTRIBUTE_UNUSED
7225 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7227 int needed_intregs
, needed_sseregs
;
7228 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7231 static bool ATTRIBUTE_UNUSED
7232 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7234 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7236 /* __m128 is returned in xmm0. */
7237 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7238 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7241 /* Otherwise, the size must be exactly in [1248]. */
7242 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7246 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7248 #ifdef SUBTARGET_RETURN_IN_MEMORY
7249 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7251 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7255 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7256 return return_in_memory_ms_64 (type
, mode
);
7258 return return_in_memory_64 (type
, mode
);
7261 return return_in_memory_32 (type
, mode
);
7265 /* When returning SSE vector types, we have a choice of either
7266 (1) being abi incompatible with a -march switch, or
7267 (2) generating an error.
7268 Given no good solution, I think the safest thing is one warning.
7269 The user won't be able to use -Werror, but....
7271 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7272 called in response to actually generating a caller or callee that
7273 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7274 via aggregate_value_p for general type probing from tree-ssa. */
7277 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7279 static bool warnedsse
, warnedmmx
;
7281 if (!TARGET_64BIT
&& type
)
7283 /* Look at the return type of the function, not the function type. */
7284 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7286 if (!TARGET_SSE
&& !warnedsse
)
7289 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7292 warning (0, "SSE vector return without SSE enabled "
7297 if (!TARGET_MMX
&& !warnedmmx
)
7299 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7302 warning (0, "MMX vector return without MMX enabled "
7312 /* Create the va_list data type. */
7314 /* Returns the calling convention specific va_list date type.
7315 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7318 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7320 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7322 /* For i386 we use plain pointer to argument area. */
7323 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7324 return build_pointer_type (char_type_node
);
7326 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7327 type_decl
= build_decl (BUILTINS_LOCATION
,
7328 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7330 f_gpr
= build_decl (BUILTINS_LOCATION
,
7331 FIELD_DECL
, get_identifier ("gp_offset"),
7332 unsigned_type_node
);
7333 f_fpr
= build_decl (BUILTINS_LOCATION
,
7334 FIELD_DECL
, get_identifier ("fp_offset"),
7335 unsigned_type_node
);
7336 f_ovf
= build_decl (BUILTINS_LOCATION
,
7337 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7339 f_sav
= build_decl (BUILTINS_LOCATION
,
7340 FIELD_DECL
, get_identifier ("reg_save_area"),
7343 va_list_gpr_counter_field
= f_gpr
;
7344 va_list_fpr_counter_field
= f_fpr
;
7346 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7347 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7348 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7349 DECL_FIELD_CONTEXT (f_sav
) = record
;
7351 TYPE_STUB_DECL (record
) = type_decl
;
7352 TYPE_NAME (record
) = type_decl
;
7353 TYPE_FIELDS (record
) = f_gpr
;
7354 DECL_CHAIN (f_gpr
) = f_fpr
;
7355 DECL_CHAIN (f_fpr
) = f_ovf
;
7356 DECL_CHAIN (f_ovf
) = f_sav
;
7358 layout_type (record
);
7360 /* The correct type is an array type of one element. */
7361 return build_array_type (record
, build_index_type (size_zero_node
));
7364 /* Setup the builtin va_list data type and for 64-bit the additional
7365 calling convention specific va_list data types. */
7368 ix86_build_builtin_va_list (void)
7370 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7372 /* Initialize abi specific va_list builtin types. */
7376 if (ix86_abi
== MS_ABI
)
7378 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7379 if (TREE_CODE (t
) != RECORD_TYPE
)
7380 t
= build_variant_type_copy (t
);
7381 sysv_va_list_type_node
= t
;
7386 if (TREE_CODE (t
) != RECORD_TYPE
)
7387 t
= build_variant_type_copy (t
);
7388 sysv_va_list_type_node
= t
;
7390 if (ix86_abi
!= MS_ABI
)
7392 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7393 if (TREE_CODE (t
) != RECORD_TYPE
)
7394 t
= build_variant_type_copy (t
);
7395 ms_va_list_type_node
= t
;
7400 if (TREE_CODE (t
) != RECORD_TYPE
)
7401 t
= build_variant_type_copy (t
);
7402 ms_va_list_type_node
= t
;
7409 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7412 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7418 /* GPR size of varargs save area. */
7419 if (cfun
->va_list_gpr_size
)
7420 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7422 ix86_varargs_gpr_size
= 0;
7424 /* FPR size of varargs save area. We don't need it if we don't pass
7425 anything in SSE registers. */
7426 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7427 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7429 ix86_varargs_fpr_size
= 0;
7431 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7434 save_area
= frame_pointer_rtx
;
7435 set
= get_varargs_alias_set ();
7437 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7438 if (max
> X86_64_REGPARM_MAX
)
7439 max
= X86_64_REGPARM_MAX
;
7441 for (i
= cum
->regno
; i
< max
; i
++)
7443 mem
= gen_rtx_MEM (Pmode
,
7444 plus_constant (save_area
, i
* UNITS_PER_WORD
));
7445 MEM_NOTRAP_P (mem
) = 1;
7446 set_mem_alias_set (mem
, set
);
7447 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
7448 x86_64_int_parameter_registers
[i
]));
7451 if (ix86_varargs_fpr_size
)
7453 enum machine_mode smode
;
7456 /* Now emit code to save SSE registers. The AX parameter contains number
7457 of SSE parameter registers used to call this function, though all we
7458 actually check here is the zero/non-zero status. */
7460 label
= gen_label_rtx ();
7461 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7462 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7465 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7466 we used movdqa (i.e. TImode) instead? Perhaps even better would
7467 be if we could determine the real mode of the data, via a hook
7468 into pass_stdarg. Ignore all that for now. */
7470 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7471 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7473 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7474 if (max
> X86_64_SSE_REGPARM_MAX
)
7475 max
= X86_64_SSE_REGPARM_MAX
;
7477 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7479 mem
= plus_constant (save_area
, i
* 16 + ix86_varargs_gpr_size
);
7480 mem
= gen_rtx_MEM (smode
, mem
);
7481 MEM_NOTRAP_P (mem
) = 1;
7482 set_mem_alias_set (mem
, set
);
7483 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7485 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7493 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7495 alias_set_type set
= get_varargs_alias_set ();
7498 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
7502 mem
= gen_rtx_MEM (Pmode
,
7503 plus_constant (virtual_incoming_args_rtx
,
7504 i
* UNITS_PER_WORD
));
7505 MEM_NOTRAP_P (mem
) = 1;
7506 set_mem_alias_set (mem
, set
);
7508 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
7509 emit_move_insn (mem
, reg
);
7514 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7515 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7518 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7519 CUMULATIVE_ARGS next_cum
;
7522 /* This argument doesn't appear to be used anymore. Which is good,
7523 because the old code here didn't suppress rtl generation. */
7524 gcc_assert (!no_rtl
);
7529 fntype
= TREE_TYPE (current_function_decl
);
7531 /* For varargs, we do not want to skip the dummy va_dcl argument.
7532 For stdargs, we do want to skip the last named argument. */
7534 if (stdarg_p (fntype
))
7535 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
7538 if (cum
->call_abi
== MS_ABI
)
7539 setup_incoming_varargs_ms_64 (&next_cum
);
7541 setup_incoming_varargs_64 (&next_cum
);
7544 /* Checks if TYPE is of kind va_list char *. */
7547 is_va_list_char_pointer (tree type
)
7551 /* For 32-bit it is always true. */
7554 canonic
= ix86_canonical_va_list_type (type
);
7555 return (canonic
== ms_va_list_type_node
7556 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
7559 /* Implement va_start. */
7562 ix86_va_start (tree valist
, rtx nextarg
)
7564 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
7565 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7566 tree gpr
, fpr
, ovf
, sav
, t
;
7570 if (flag_split_stack
7571 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7573 unsigned int scratch_regno
;
7575 /* When we are splitting the stack, we can't refer to the stack
7576 arguments using internal_arg_pointer, because they may be on
7577 the old stack. The split stack prologue will arrange to
7578 leave a pointer to the old stack arguments in a scratch
7579 register, which we here copy to a pseudo-register. The split
7580 stack prologue can't set the pseudo-register directly because
7581 it (the prologue) runs before any registers have been saved. */
7583 scratch_regno
= split_stack_prologue_scratch_regno ();
7584 if (scratch_regno
!= INVALID_REGNUM
)
7588 reg
= gen_reg_rtx (Pmode
);
7589 cfun
->machine
->split_stack_varargs_pointer
= reg
;
7592 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
7596 push_topmost_sequence ();
7597 emit_insn_after (seq
, entry_of_function ());
7598 pop_topmost_sequence ();
7602 /* Only 64bit target needs something special. */
7603 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7605 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7606 std_expand_builtin_va_start (valist
, nextarg
);
7611 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
7612 next
= expand_binop (ptr_mode
, add_optab
,
7613 cfun
->machine
->split_stack_varargs_pointer
,
7614 crtl
->args
.arg_offset_rtx
,
7615 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
7616 convert_move (va_r
, next
, 0);
7621 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7622 f_fpr
= DECL_CHAIN (f_gpr
);
7623 f_ovf
= DECL_CHAIN (f_fpr
);
7624 f_sav
= DECL_CHAIN (f_ovf
);
7626 valist
= build_simple_mem_ref (valist
);
7627 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
7628 /* The following should be folded into the MEM_REF offset. */
7629 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
7631 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
7633 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
7635 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
7638 /* Count number of gp and fp argument registers used. */
7639 words
= crtl
->args
.info
.words
;
7640 n_gpr
= crtl
->args
.info
.regno
;
7641 n_fpr
= crtl
->args
.info
.sse_regno
;
7643 if (cfun
->va_list_gpr_size
)
7645 type
= TREE_TYPE (gpr
);
7646 t
= build2 (MODIFY_EXPR
, type
,
7647 gpr
, build_int_cst (type
, n_gpr
* 8));
7648 TREE_SIDE_EFFECTS (t
) = 1;
7649 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7652 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7654 type
= TREE_TYPE (fpr
);
7655 t
= build2 (MODIFY_EXPR
, type
, fpr
,
7656 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
7657 TREE_SIDE_EFFECTS (t
) = 1;
7658 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7661 /* Find the overflow area. */
7662 type
= TREE_TYPE (ovf
);
7663 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7664 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
7666 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
7667 t
= make_tree (type
, ovf_rtx
);
7669 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
7670 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
7671 TREE_SIDE_EFFECTS (t
) = 1;
7672 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7674 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
7676 /* Find the register save area.
7677 Prologue of the function save it right above stack frame. */
7678 type
= TREE_TYPE (sav
);
7679 t
= make_tree (type
, frame_pointer_rtx
);
7680 if (!ix86_varargs_gpr_size
)
7681 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
7682 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
7683 TREE_SIDE_EFFECTS (t
) = 1;
7684 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7688 /* Implement va_arg. */
7691 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
7694 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
7695 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7696 tree gpr
, fpr
, ovf
, sav
, t
;
7698 tree lab_false
, lab_over
= NULL_TREE
;
7703 enum machine_mode nat_mode
;
7704 unsigned int arg_boundary
;
7706 /* Only 64bit target needs something special. */
7707 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7708 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
7710 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7711 f_fpr
= DECL_CHAIN (f_gpr
);
7712 f_ovf
= DECL_CHAIN (f_fpr
);
7713 f_sav
= DECL_CHAIN (f_ovf
);
7715 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
7716 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
7717 valist
= build_va_arg_indirect_ref (valist
);
7718 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
7719 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
7720 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
7722 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
7724 type
= build_pointer_type (type
);
7725 size
= int_size_in_bytes (type
);
7726 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7728 nat_mode
= type_natural_mode (type
, NULL
);
7737 /* Unnamed 256bit vector mode parameters are passed on stack. */
7738 if (!TARGET_64BIT_MS_ABI
)
7745 container
= construct_container (nat_mode
, TYPE_MODE (type
),
7746 type
, 0, X86_64_REGPARM_MAX
,
7747 X86_64_SSE_REGPARM_MAX
, intreg
,
7752 /* Pull the value out of the saved registers. */
7754 addr
= create_tmp_var (ptr_type_node
, "addr");
7758 int needed_intregs
, needed_sseregs
;
7760 tree int_addr
, sse_addr
;
7762 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
7763 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
7765 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
7767 need_temp
= (!REG_P (container
)
7768 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
7769 || TYPE_ALIGN (type
) > 128));
7771 /* In case we are passing structure, verify that it is consecutive block
7772 on the register save area. If not we need to do moves. */
7773 if (!need_temp
&& !REG_P (container
))
7775 /* Verify that all registers are strictly consecutive */
7776 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
7780 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
7782 rtx slot
= XVECEXP (container
, 0, i
);
7783 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
7784 || INTVAL (XEXP (slot
, 1)) != i
* 16)
7792 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
7794 rtx slot
= XVECEXP (container
, 0, i
);
7795 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
7796 || INTVAL (XEXP (slot
, 1)) != i
* 8)
7808 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
7809 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
7812 /* First ensure that we fit completely in registers. */
7815 t
= build_int_cst (TREE_TYPE (gpr
),
7816 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
7817 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
7818 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
7819 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
7820 gimplify_and_add (t
, pre_p
);
7824 t
= build_int_cst (TREE_TYPE (fpr
),
7825 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
7826 + X86_64_REGPARM_MAX
* 8);
7827 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
7828 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
7829 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
7830 gimplify_and_add (t
, pre_p
);
7833 /* Compute index to start of area used for integer regs. */
7836 /* int_addr = gpr + sav; */
7837 t
= fold_build_pointer_plus (sav
, gpr
);
7838 gimplify_assign (int_addr
, t
, pre_p
);
7842 /* sse_addr = fpr + sav; */
7843 t
= fold_build_pointer_plus (sav
, fpr
);
7844 gimplify_assign (sse_addr
, t
, pre_p
);
7848 int i
, prev_size
= 0;
7849 tree temp
= create_tmp_var (type
, "va_arg_tmp");
7852 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
7853 gimplify_assign (addr
, t
, pre_p
);
7855 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
7857 rtx slot
= XVECEXP (container
, 0, i
);
7858 rtx reg
= XEXP (slot
, 0);
7859 enum machine_mode mode
= GET_MODE (reg
);
7865 tree dest_addr
, dest
;
7866 int cur_size
= GET_MODE_SIZE (mode
);
7868 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
7869 prev_size
= INTVAL (XEXP (slot
, 1));
7870 if (prev_size
+ cur_size
> size
)
7872 cur_size
= size
- prev_size
;
7873 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
7874 if (mode
== BLKmode
)
7877 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
7878 if (mode
== GET_MODE (reg
))
7879 addr_type
= build_pointer_type (piece_type
);
7881 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
7883 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
7886 if (SSE_REGNO_P (REGNO (reg
)))
7888 src_addr
= sse_addr
;
7889 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
7893 src_addr
= int_addr
;
7894 src_offset
= REGNO (reg
) * 8;
7896 src_addr
= fold_convert (addr_type
, src_addr
);
7897 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
7899 dest_addr
= fold_convert (daddr_type
, addr
);
7900 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
7901 if (cur_size
== GET_MODE_SIZE (mode
))
7903 src
= build_va_arg_indirect_ref (src_addr
);
7904 dest
= build_va_arg_indirect_ref (dest_addr
);
7906 gimplify_assign (dest
, src
, pre_p
);
7911 = build_call_expr (implicit_built_in_decls
[BUILT_IN_MEMCPY
],
7912 3, dest_addr
, src_addr
,
7913 size_int (cur_size
));
7914 gimplify_and_add (copy
, pre_p
);
7916 prev_size
+= cur_size
;
7922 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
7923 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
7924 gimplify_assign (gpr
, t
, pre_p
);
7929 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
7930 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
7931 gimplify_assign (fpr
, t
, pre_p
);
7934 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
7936 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
7939 /* ... otherwise out of the overflow area. */
7941 /* When we align parameter on stack for caller, if the parameter
7942 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7943 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7944 here with caller. */
7945 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
7946 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
7947 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
7949 /* Care for on-stack alignment if needed. */
7950 if (arg_boundary
<= 64 || size
== 0)
7954 HOST_WIDE_INT align
= arg_boundary
/ 8;
7955 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
7956 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
7957 build_int_cst (TREE_TYPE (t
), -align
));
7960 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
7961 gimplify_assign (addr
, t
, pre_p
);
7963 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
7964 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
7967 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
7969 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
7970 addr
= fold_convert (ptrtype
, addr
);
7973 addr
= build_va_arg_indirect_ref (addr
);
7974 return build_va_arg_indirect_ref (addr
);
7977 /* Return true if OPNUM's MEM should be matched
7978 in movabs* patterns. */
7981 ix86_check_movabs (rtx insn
, int opnum
)
7985 set
= PATTERN (insn
);
7986 if (GET_CODE (set
) == PARALLEL
)
7987 set
= XVECEXP (set
, 0, 0);
7988 gcc_assert (GET_CODE (set
) == SET
);
7989 mem
= XEXP (set
, opnum
);
7990 while (GET_CODE (mem
) == SUBREG
)
7991 mem
= SUBREG_REG (mem
);
7992 gcc_assert (MEM_P (mem
));
7993 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
7996 /* Initialize the table of extra 80387 mathematical constants. */
7999 init_ext_80387_constants (void)
8001 static const char * cst
[5] =
8003 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8004 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8005 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8006 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8007 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8011 for (i
= 0; i
< 5; i
++)
8013 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8014 /* Ensure each constant is rounded to XFmode precision. */
8015 real_convert (&ext_80387_constants_table
[i
],
8016 XFmode
, &ext_80387_constants_table
[i
]);
8019 ext_80387_constants_init
= 1;
8022 /* Return non-zero if the constant is something that
8023 can be loaded with a special instruction. */
8026 standard_80387_constant_p (rtx x
)
8028 enum machine_mode mode
= GET_MODE (x
);
8032 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8035 if (x
== CONST0_RTX (mode
))
8037 if (x
== CONST1_RTX (mode
))
8040 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8042 /* For XFmode constants, try to find a special 80387 instruction when
8043 optimizing for size or on those CPUs that benefit from them. */
8045 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8049 if (! ext_80387_constants_init
)
8050 init_ext_80387_constants ();
8052 for (i
= 0; i
< 5; i
++)
8053 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8057 /* Load of the constant -0.0 or -1.0 will be split as
8058 fldz;fchs or fld1;fchs sequence. */
8059 if (real_isnegzero (&r
))
8061 if (real_identical (&r
, &dconstm1
))
8067 /* Return the opcode of the special instruction to be used to load
8071 standard_80387_constant_opcode (rtx x
)
8073 switch (standard_80387_constant_p (x
))
8097 /* Return the CONST_DOUBLE representing the 80387 constant that is
8098 loaded by the specified special instruction. The argument IDX
8099 matches the return value from standard_80387_constant_p. */
8102 standard_80387_constant_rtx (int idx
)
8106 if (! ext_80387_constants_init
)
8107 init_ext_80387_constants ();
8123 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8127 /* Return 1 if X is all 0s and 2 if x is all 1s
8128 in supported SSE vector mode. */
8131 standard_sse_constant_p (rtx x
)
8133 enum machine_mode mode
= GET_MODE (x
);
8135 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8137 if (vector_all_ones_operand (x
, mode
))
8153 /* Return the opcode of the special instruction to be used to load
8157 standard_sse_constant_opcode (rtx insn
, rtx x
)
8159 switch (standard_sse_constant_p (x
))
8162 switch (get_attr_mode (insn
))
8165 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8166 return "%vpxor\t%0, %d0";
8168 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8169 return "%vxorpd\t%0, %d0";
8171 return "%vxorps\t%0, %d0";
8174 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8175 return "vpxor\t%x0, %x0, %x0";
8177 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8178 return "vxorpd\t%x0, %x0, %x0";
8180 return "vxorps\t%x0, %x0, %x0";
8187 return "%vpcmpeqd\t%0, %d0";
8194 /* Returns true if OP contains a symbol reference */
8197 symbolic_reference_mentioned_p (rtx op
)
8202 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8205 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8206 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8212 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8213 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8217 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8224 /* Return true if it is appropriate to emit `ret' instructions in the
8225 body of a function. Do this only if the epilogue is simple, needing a
8226 couple of insns. Prior to reloading, we can't tell how many registers
8227 must be saved, so return false then. Return false if there is no frame
8228 marker to de-allocate. */
8231 ix86_can_use_return_insn_p (void)
8233 struct ix86_frame frame
;
8235 if (! reload_completed
|| frame_pointer_needed
)
8238 /* Don't allow more than 32k pop, since that's all we can do
8239 with one instruction. */
8240 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8243 ix86_compute_frame_layout (&frame
);
8244 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8245 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8248 /* Value should be nonzero if functions must have frame pointers.
8249 Zero means the frame pointer need not be set up (and parms may
8250 be accessed via the stack pointer) in functions that seem suitable. */
8253 ix86_frame_pointer_required (void)
8255 /* If we accessed previous frames, then the generated code expects
8256 to be able to access the saved ebp value in our frame. */
8257 if (cfun
->machine
->accesses_prev_frame
)
8260 /* Several x86 os'es need a frame pointer for other reasons,
8261 usually pertaining to setjmp. */
8262 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8265 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8266 turns off the frame pointer by default. Turn it back on now if
8267 we've not got a leaf function. */
8268 if (TARGET_OMIT_LEAF_FRAME_POINTER
8269 && (!current_function_is_leaf
8270 || ix86_current_function_calls_tls_descriptor
))
8273 if (crtl
->profile
&& !flag_fentry
)
8279 /* Record that the current function accesses previous call frames. */
8282 ix86_setup_frame_addresses (void)
8284 cfun
->machine
->accesses_prev_frame
= 1;
8287 #ifndef USE_HIDDEN_LINKONCE
8288 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8289 # define USE_HIDDEN_LINKONCE 1
8291 # define USE_HIDDEN_LINKONCE 0
8295 static int pic_labels_used
;
8297 /* Fills in the label name that should be used for a pc thunk for
8298 the given register. */
8301 get_pc_thunk_name (char name
[32], unsigned int regno
)
8303 gcc_assert (!TARGET_64BIT
);
8305 if (USE_HIDDEN_LINKONCE
)
8306 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
8308 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8312 /* This function generates code for -fpic that loads %ebx with
8313 the return address of the caller and then returns. */
8316 ix86_code_end (void)
8321 #ifdef TARGET_SOLARIS
8322 solaris_code_end ();
8325 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8330 if (!(pic_labels_used
& (1 << regno
)))
8333 get_pc_thunk_name (name
, regno
);
8335 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8336 get_identifier (name
),
8337 build_function_type_list (void_type_node
, NULL_TREE
));
8338 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8339 NULL_TREE
, void_type_node
);
8340 TREE_PUBLIC (decl
) = 1;
8341 TREE_STATIC (decl
) = 1;
8346 switch_to_section (darwin_sections
[text_coal_section
]);
8347 fputs ("\t.weak_definition\t", asm_out_file
);
8348 assemble_name (asm_out_file
, name
);
8349 fputs ("\n\t.private_extern\t", asm_out_file
);
8350 assemble_name (asm_out_file
, name
);
8351 putc ('\n', asm_out_file
);
8352 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8353 DECL_WEAK (decl
) = 1;
8357 if (USE_HIDDEN_LINKONCE
)
8359 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8361 targetm
.asm_out
.unique_section (decl
, 0);
8362 switch_to_section (get_named_section (decl
, NULL
, 0));
8364 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8365 fputs ("\t.hidden\t", asm_out_file
);
8366 assemble_name (asm_out_file
, name
);
8367 putc ('\n', asm_out_file
);
8368 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8372 switch_to_section (text_section
);
8373 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8376 DECL_INITIAL (decl
) = make_node (BLOCK
);
8377 current_function_decl
= decl
;
8378 init_function_start (decl
);
8379 first_function_block_is_cold
= false;
8380 /* Make sure unwind info is emitted for the thunk if needed. */
8381 final_start_function (emit_barrier (), asm_out_file
, 1);
8383 /* Pad stack IP move with 4 instructions (two NOPs count
8384 as one instruction). */
8385 if (TARGET_PAD_SHORT_FUNCTION
)
8390 fputs ("\tnop\n", asm_out_file
);
8393 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8394 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8395 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8396 fputs ("\tret\n", asm_out_file
);
8397 final_end_function ();
8398 init_insn_lengths ();
8399 free_after_compilation (cfun
);
8401 current_function_decl
= NULL
;
8404 if (flag_split_stack
)
8405 file_end_indicate_split_stack ();
8408 /* Emit code for the SET_GOT patterns. */
8411 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
8417 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8419 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8420 xops
[2] = gen_rtx_MEM (Pmode
,
8421 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8422 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8424 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8425 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8426 an unadorned address. */
8427 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8428 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8429 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8433 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8437 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8439 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8442 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8443 is what will be referenced by the Mach-O PIC subsystem. */
8445 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8448 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8449 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8454 get_pc_thunk_name (name
, REGNO (dest
));
8455 pic_labels_used
|= 1 << REGNO (dest
);
8457 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8458 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8459 output_asm_insn ("call\t%X2", xops
);
8460 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8461 is what will be referenced by the Mach-O PIC subsystem. */
8464 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8466 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8467 CODE_LABEL_NUMBER (label
));
8472 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8477 /* Generate an "push" pattern for input ARG. */
8482 struct machine_function
*m
= cfun
->machine
;
8484 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8485 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8486 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
8488 return gen_rtx_SET (VOIDmode
,
8490 gen_rtx_PRE_DEC (Pmode
,
8491 stack_pointer_rtx
)),
8495 /* Generate an "pop" pattern for input ARG. */
8500 return gen_rtx_SET (VOIDmode
,
8503 gen_rtx_POST_INC (Pmode
,
8504 stack_pointer_rtx
)));
8507 /* Return >= 0 if there is an unused call-clobbered register available
8508 for the entire function. */
8511 ix86_select_alt_pic_regnum (void)
8513 if (current_function_is_leaf
8515 && !ix86_current_function_calls_tls_descriptor
)
8518 /* Can't use the same register for both PIC and DRAP. */
8520 drap
= REGNO (crtl
->drap_reg
);
8523 for (i
= 2; i
>= 0; --i
)
8524 if (i
!= drap
&& !df_regs_ever_live_p (i
))
8528 return INVALID_REGNUM
;
8531 /* Return TRUE if we need to save REGNO. */
8534 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
8536 if (pic_offset_table_rtx
8537 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
8538 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
8540 || crtl
->calls_eh_return
8541 || crtl
->uses_const_pool
))
8542 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
8544 if (crtl
->calls_eh_return
&& maybe_eh_return
)
8549 unsigned test
= EH_RETURN_DATA_REGNO (i
);
8550 if (test
== INVALID_REGNUM
)
8557 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
8560 return (df_regs_ever_live_p (regno
)
8561 && !call_used_regs
[regno
]
8562 && !fixed_regs
[regno
]
8563 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
8566 /* Return number of saved general prupose registers. */
8569 ix86_nsaved_regs (void)
8574 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8575 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8580 /* Return number of saved SSE registrers. */
8583 ix86_nsaved_sseregs (void)
8588 if (!TARGET_64BIT_MS_ABI
)
8590 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8591 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8596 /* Given FROM and TO register numbers, say whether this elimination is
8597 allowed. If stack alignment is needed, we can only replace argument
8598 pointer with hard frame pointer, or replace frame pointer with stack
8599 pointer. Otherwise, frame pointer elimination is automatically
8600 handled and all other eliminations are valid. */
8603 ix86_can_eliminate (const int from
, const int to
)
8605 if (stack_realign_fp
)
8606 return ((from
== ARG_POINTER_REGNUM
8607 && to
== HARD_FRAME_POINTER_REGNUM
)
8608 || (from
== FRAME_POINTER_REGNUM
8609 && to
== STACK_POINTER_REGNUM
));
8611 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
8614 /* Return the offset between two registers, one to be eliminated, and the other
8615 its replacement, at the start of a routine. */
8618 ix86_initial_elimination_offset (int from
, int to
)
8620 struct ix86_frame frame
;
8621 ix86_compute_frame_layout (&frame
);
8623 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
8624 return frame
.hard_frame_pointer_offset
;
8625 else if (from
== FRAME_POINTER_REGNUM
8626 && to
== HARD_FRAME_POINTER_REGNUM
)
8627 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
8630 gcc_assert (to
== STACK_POINTER_REGNUM
);
8632 if (from
== ARG_POINTER_REGNUM
)
8633 return frame
.stack_pointer_offset
;
8635 gcc_assert (from
== FRAME_POINTER_REGNUM
);
8636 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
8640 /* In a dynamically-aligned function, we can't know the offset from
8641 stack pointer to frame pointer, so we must ensure that setjmp
8642 eliminates fp against the hard fp (%ebp) rather than trying to
8643 index from %esp up to the top of the frame across a gap that is
8644 of unknown (at compile-time) size. */
8646 ix86_builtin_setjmp_frame_value (void)
8648 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
8651 /* When using -fsplit-stack, the allocation routines set a field in
8652 the TCB to the bottom of the stack plus this much space, measured
8655 #define SPLIT_STACK_AVAILABLE 256
8657 /* Fill structure ix86_frame about frame of currently computed function. */
8660 ix86_compute_frame_layout (struct ix86_frame
*frame
)
8662 unsigned int stack_alignment_needed
;
8663 HOST_WIDE_INT offset
;
8664 unsigned int preferred_alignment
;
8665 HOST_WIDE_INT size
= get_frame_size ();
8666 HOST_WIDE_INT to_allocate
;
8668 frame
->nregs
= ix86_nsaved_regs ();
8669 frame
->nsseregs
= ix86_nsaved_sseregs ();
8671 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8672 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
8674 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8675 function prologues and leaf. */
8676 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
8677 && (!current_function_is_leaf
|| cfun
->calls_alloca
!= 0
8678 || ix86_current_function_calls_tls_descriptor
))
8680 preferred_alignment
= 16;
8681 stack_alignment_needed
= 16;
8682 crtl
->preferred_stack_boundary
= 128;
8683 crtl
->stack_alignment_needed
= 128;
8686 gcc_assert (!size
|| stack_alignment_needed
);
8687 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
8688 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
8690 /* For SEH we have to limit the amount of code movement into the prologue.
8691 At present we do this via a BLOCKAGE, at which point there's very little
8692 scheduling that can be done, which means that there's very little point
8693 in doing anything except PUSHs. */
8695 cfun
->machine
->use_fast_prologue_epilogue
= false;
8697 /* During reload iteration the amount of registers saved can change.
8698 Recompute the value as needed. Do not recompute when amount of registers
8699 didn't change as reload does multiple calls to the function and does not
8700 expect the decision to change within single iteration. */
8701 else if (!optimize_function_for_size_p (cfun
)
8702 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
8704 int count
= frame
->nregs
;
8705 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
8707 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
8709 /* The fast prologue uses move instead of push to save registers. This
8710 is significantly longer, but also executes faster as modern hardware
8711 can execute the moves in parallel, but can't do that for push/pop.
8713 Be careful about choosing what prologue to emit: When function takes
8714 many instructions to execute we may use slow version as well as in
8715 case function is known to be outside hot spot (this is known with
8716 feedback only). Weight the size of function by number of registers
8717 to save as it is cheap to use one or two push instructions but very
8718 slow to use many of them. */
8720 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
8721 if (node
->frequency
< NODE_FREQUENCY_NORMAL
8722 || (flag_branch_probabilities
8723 && node
->frequency
< NODE_FREQUENCY_HOT
))
8724 cfun
->machine
->use_fast_prologue_epilogue
= false;
8726 cfun
->machine
->use_fast_prologue_epilogue
8727 = !expensive_function_p (count
);
8729 if (TARGET_PROLOGUE_USING_MOVE
8730 && cfun
->machine
->use_fast_prologue_epilogue
)
8731 frame
->save_regs_using_mov
= true;
8733 frame
->save_regs_using_mov
= false;
8735 /* If static stack checking is enabled and done with probes, the registers
8736 need to be saved before allocating the frame. */
8737 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
8738 frame
->save_regs_using_mov
= false;
8740 /* Skip return address. */
8741 offset
= UNITS_PER_WORD
;
8743 /* Skip pushed static chain. */
8744 if (ix86_static_chain_on_stack
)
8745 offset
+= UNITS_PER_WORD
;
8747 /* Skip saved base pointer. */
8748 if (frame_pointer_needed
)
8749 offset
+= UNITS_PER_WORD
;
8750 frame
->hfp_save_offset
= offset
;
8752 /* The traditional frame pointer location is at the top of the frame. */
8753 frame
->hard_frame_pointer_offset
= offset
;
8755 /* Register save area */
8756 offset
+= frame
->nregs
* UNITS_PER_WORD
;
8757 frame
->reg_save_offset
= offset
;
8759 /* Align and set SSE register save area. */
8760 if (frame
->nsseregs
)
8762 /* The only ABI that has saved SSE registers (Win64) also has a
8763 16-byte aligned default stack, and thus we don't need to be
8764 within the re-aligned local stack frame to save them. */
8765 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
8766 offset
= (offset
+ 16 - 1) & -16;
8767 offset
+= frame
->nsseregs
* 16;
8769 frame
->sse_reg_save_offset
= offset
;
8771 /* The re-aligned stack starts here. Values before this point are not
8772 directly comparable with values below this point. In order to make
8773 sure that no value happens to be the same before and after, force
8774 the alignment computation below to add a non-zero value. */
8775 if (stack_realign_fp
)
8776 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
8779 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
8780 offset
+= frame
->va_arg_size
;
8782 /* Align start of frame for local function. */
8783 if (stack_realign_fp
8784 || offset
!= frame
->sse_reg_save_offset
8786 || !current_function_is_leaf
8787 || cfun
->calls_alloca
8788 || ix86_current_function_calls_tls_descriptor
)
8789 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
8791 /* Frame pointer points here. */
8792 frame
->frame_pointer_offset
= offset
;
8796 /* Add outgoing arguments area. Can be skipped if we eliminated
8797 all the function calls as dead code.
8798 Skipping is however impossible when function calls alloca. Alloca
8799 expander assumes that last crtl->outgoing_args_size
8800 of stack frame are unused. */
8801 if (ACCUMULATE_OUTGOING_ARGS
8802 && (!current_function_is_leaf
|| cfun
->calls_alloca
8803 || ix86_current_function_calls_tls_descriptor
))
8805 offset
+= crtl
->outgoing_args_size
;
8806 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
8809 frame
->outgoing_arguments_size
= 0;
8811 /* Align stack boundary. Only needed if we're calling another function
8813 if (!current_function_is_leaf
|| cfun
->calls_alloca
8814 || ix86_current_function_calls_tls_descriptor
)
8815 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
8817 /* We've reached end of stack frame. */
8818 frame
->stack_pointer_offset
= offset
;
8820 /* Size prologue needs to allocate. */
8821 to_allocate
= offset
- frame
->sse_reg_save_offset
;
8823 if ((!to_allocate
&& frame
->nregs
<= 1)
8824 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
8825 frame
->save_regs_using_mov
= false;
8827 if (ix86_using_red_zone ()
8828 && current_function_sp_is_unchanging
8829 && current_function_is_leaf
8830 && !ix86_current_function_calls_tls_descriptor
)
8832 frame
->red_zone_size
= to_allocate
;
8833 if (frame
->save_regs_using_mov
)
8834 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
8835 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
8836 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
8839 frame
->red_zone_size
= 0;
8840 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
8842 /* The SEH frame pointer location is near the bottom of the frame.
8843 This is enforced by the fact that the difference between the
8844 stack pointer and the frame pointer is limited to 240 bytes in
8845 the unwind data structure. */
8850 /* If we can leave the frame pointer where it is, do so. */
8851 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
8852 if (diff
> 240 || (diff
& 15) != 0)
8854 /* Ideally we'd determine what portion of the local stack frame
8855 (within the constraint of the lowest 240) is most heavily used.
8856 But without that complication, simply bias the frame pointer
8857 by 128 bytes so as to maximize the amount of the local stack
8858 frame that is addressable with 8-bit offsets. */
8859 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
8864 /* This is semi-inlined memory_address_length, but simplified
8865 since we know that we're always dealing with reg+offset, and
8866 to avoid having to create and discard all that rtl. */
8869 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
8875 /* EBP and R13 cannot be encoded without an offset. */
8876 len
= (regno
== BP_REG
|| regno
== R13_REG
);
8878 else if (IN_RANGE (offset
, -128, 127))
8881 /* ESP and R12 must be encoded with a SIB byte. */
8882 if (regno
== SP_REG
|| regno
== R12_REG
)
8888 /* Return an RTX that points to CFA_OFFSET within the stack frame.
8889 The valid base registers are taken from CFUN->MACHINE->FS. */
8892 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
8894 const struct machine_function
*m
= cfun
->machine
;
8895 rtx base_reg
= NULL
;
8896 HOST_WIDE_INT base_offset
= 0;
8898 if (m
->use_fast_prologue_epilogue
)
8900 /* Choose the base register most likely to allow the most scheduling
8901 opportunities. Generally FP is valid througout the function,
8902 while DRAP must be reloaded within the epilogue. But choose either
8903 over the SP due to increased encoding size. */
8907 base_reg
= hard_frame_pointer_rtx
;
8908 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
8910 else if (m
->fs
.drap_valid
)
8912 base_reg
= crtl
->drap_reg
;
8913 base_offset
= 0 - cfa_offset
;
8915 else if (m
->fs
.sp_valid
)
8917 base_reg
= stack_pointer_rtx
;
8918 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
8923 HOST_WIDE_INT toffset
;
8926 /* Choose the base register with the smallest address encoding.
8927 With a tie, choose FP > DRAP > SP. */
8930 base_reg
= stack_pointer_rtx
;
8931 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
8932 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
8934 if (m
->fs
.drap_valid
)
8936 toffset
= 0 - cfa_offset
;
8937 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
8940 base_reg
= crtl
->drap_reg
;
8941 base_offset
= toffset
;
8947 toffset
= m
->fs
.fp_offset
- cfa_offset
;
8948 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
8951 base_reg
= hard_frame_pointer_rtx
;
8952 base_offset
= toffset
;
8957 gcc_assert (base_reg
!= NULL
);
8959 return plus_constant (base_reg
, base_offset
);
8962 /* Emit code to save registers in the prologue. */
8965 ix86_emit_save_regs (void)
8970 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
8971 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8973 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
8974 RTX_FRAME_RELATED_P (insn
) = 1;
8978 /* Emit a single register save at CFA - CFA_OFFSET. */
8981 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
8982 HOST_WIDE_INT cfa_offset
)
8984 struct machine_function
*m
= cfun
->machine
;
8985 rtx reg
= gen_rtx_REG (mode
, regno
);
8986 rtx mem
, addr
, base
, insn
;
8988 addr
= choose_baseaddr (cfa_offset
);
8989 mem
= gen_frame_mem (mode
, addr
);
8991 /* For SSE saves, we need to indicate the 128-bit alignment. */
8992 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
8994 insn
= emit_move_insn (mem
, reg
);
8995 RTX_FRAME_RELATED_P (insn
) = 1;
8998 if (GET_CODE (base
) == PLUS
)
8999 base
= XEXP (base
, 0);
9000 gcc_checking_assert (REG_P (base
));
9002 /* When saving registers into a re-aligned local stack frame, avoid
9003 any tricky guessing by dwarf2out. */
9004 if (m
->fs
.realigned
)
9006 gcc_checking_assert (stack_realign_drap
);
9008 if (regno
== REGNO (crtl
->drap_reg
))
9010 /* A bit of a hack. We force the DRAP register to be saved in
9011 the re-aligned stack frame, which provides us with a copy
9012 of the CFA that will last past the prologue. Install it. */
9013 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9014 addr
= plus_constant (hard_frame_pointer_rtx
,
9015 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9016 mem
= gen_rtx_MEM (mode
, addr
);
9017 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9021 /* The frame pointer is a stable reference within the
9022 aligned frame. Use it. */
9023 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9024 addr
= plus_constant (hard_frame_pointer_rtx
,
9025 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9026 mem
= gen_rtx_MEM (mode
, addr
);
9027 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9028 gen_rtx_SET (VOIDmode
, mem
, reg
));
9032 /* The memory may not be relative to the current CFA register,
9033 which means that we may need to generate a new pattern for
9034 use by the unwind info. */
9035 else if (base
!= m
->fs
.cfa_reg
)
9037 addr
= plus_constant (m
->fs
.cfa_reg
, m
->fs
.cfa_offset
- cfa_offset
);
9038 mem
= gen_rtx_MEM (mode
, addr
);
9039 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9043 /* Emit code to save registers using MOV insns.
9044 First register is stored at CFA - CFA_OFFSET. */
9046 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9050 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9051 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9053 ix86_emit_save_reg_using_mov (Pmode
, regno
, cfa_offset
);
9054 cfa_offset
-= UNITS_PER_WORD
;
9058 /* Emit code to save SSE registers using MOV insns.
9059 First register is stored at CFA - CFA_OFFSET. */
9061 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9065 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9066 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9068 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9073 static GTY(()) rtx queued_cfa_restores
;
9075 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9076 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9077 Don't add the note if the previously saved value will be left untouched
9078 within stack red-zone till return, as unwinders can find the same value
9079 in the register and on the stack. */
9082 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9084 if (cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9089 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9090 RTX_FRAME_RELATED_P (insn
) = 1;
9094 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9097 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9100 ix86_add_queued_cfa_restore_notes (rtx insn
)
9103 if (!queued_cfa_restores
)
9105 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9107 XEXP (last
, 1) = REG_NOTES (insn
);
9108 REG_NOTES (insn
) = queued_cfa_restores
;
9109 queued_cfa_restores
= NULL_RTX
;
9110 RTX_FRAME_RELATED_P (insn
) = 1;
9113 /* Expand prologue or epilogue stack adjustment.
9114 The pattern exist to put a dependency on all ebp-based memory accesses.
9115 STYLE should be negative if instructions should be marked as frame related,
9116 zero if %r11 register is live and cannot be freely used and positive
9120 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9121 int style
, bool set_cfa
)
9123 struct machine_function
*m
= cfun
->machine
;
9125 bool add_frame_related_expr
= false;
9128 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9129 else if (x86_64_immediate_operand (offset
, DImode
))
9130 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9134 /* r11 is used by indirect sibcall return as well, set before the
9135 epilogue and used after the epilogue. */
9137 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9140 gcc_assert (src
!= hard_frame_pointer_rtx
9141 && dest
!= hard_frame_pointer_rtx
);
9142 tmp
= hard_frame_pointer_rtx
;
9144 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9146 add_frame_related_expr
= true;
9148 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9151 insn
= emit_insn (insn
);
9153 ix86_add_queued_cfa_restore_notes (insn
);
9159 gcc_assert (m
->fs
.cfa_reg
== src
);
9160 m
->fs
.cfa_offset
+= INTVAL (offset
);
9161 m
->fs
.cfa_reg
= dest
;
9163 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9164 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9165 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9166 RTX_FRAME_RELATED_P (insn
) = 1;
9170 RTX_FRAME_RELATED_P (insn
) = 1;
9171 if (add_frame_related_expr
)
9173 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9174 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9175 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9179 if (dest
== stack_pointer_rtx
)
9181 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9182 bool valid
= m
->fs
.sp_valid
;
9184 if (src
== hard_frame_pointer_rtx
)
9186 valid
= m
->fs
.fp_valid
;
9187 ooffset
= m
->fs
.fp_offset
;
9189 else if (src
== crtl
->drap_reg
)
9191 valid
= m
->fs
.drap_valid
;
9196 /* Else there are two possibilities: SP itself, which we set
9197 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9198 taken care of this by hand along the eh_return path. */
9199 gcc_checking_assert (src
== stack_pointer_rtx
9200 || offset
== const0_rtx
);
9203 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9204 m
->fs
.sp_valid
= valid
;
9208 /* Find an available register to be used as dynamic realign argument
9209 pointer regsiter. Such a register will be written in prologue and
9210 used in begin of body, so it must not be
9211 1. parameter passing register.
9213 We reuse static-chain register if it is available. Otherwise, we
9214 use DI for i386 and R13 for x86-64. We chose R13 since it has
9217 Return: the regno of chosen register. */
9220 find_drap_reg (void)
9222 tree decl
= cfun
->decl
;
9226 /* Use R13 for nested function or function need static chain.
9227 Since function with tail call may use any caller-saved
9228 registers in epilogue, DRAP must not use caller-saved
9229 register in such case. */
9230 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9237 /* Use DI for nested function or function need static chain.
9238 Since function with tail call may use any caller-saved
9239 registers in epilogue, DRAP must not use caller-saved
9240 register in such case. */
9241 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9244 /* Reuse static chain register if it isn't used for parameter
9246 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9248 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9249 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9256 /* Return minimum incoming stack alignment. */
9259 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9261 unsigned int incoming_stack_boundary
;
9263 /* Prefer the one specified at command line. */
9264 if (ix86_user_incoming_stack_boundary
)
9265 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9266 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9267 if -mstackrealign is used, it isn't used for sibcall check and
9268 estimated stack alignment is 128bit. */
9271 && ix86_force_align_arg_pointer
9272 && crtl
->stack_alignment_estimated
== 128)
9273 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9275 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9277 /* Incoming stack alignment can be changed on individual functions
9278 via force_align_arg_pointer attribute. We use the smallest
9279 incoming stack boundary. */
9280 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9281 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9282 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9283 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9285 /* The incoming stack frame has to be aligned at least at
9286 parm_stack_boundary. */
9287 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9288 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9290 /* Stack at entrance of main is aligned by runtime. We use the
9291 smallest incoming stack boundary. */
9292 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9293 && DECL_NAME (current_function_decl
)
9294 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9295 && DECL_FILE_SCOPE_P (current_function_decl
))
9296 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9298 return incoming_stack_boundary
;
9301 /* Update incoming stack boundary and estimated stack alignment. */
9304 ix86_update_stack_boundary (void)
9306 ix86_incoming_stack_boundary
9307 = ix86_minimum_incoming_stack_boundary (false);
9309 /* x86_64 vararg needs 16byte stack alignment for register save
9313 && crtl
->stack_alignment_estimated
< 128)
9314 crtl
->stack_alignment_estimated
= 128;
9317 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9318 needed or an rtx for DRAP otherwise. */
9321 ix86_get_drap_rtx (void)
9323 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9324 crtl
->need_drap
= true;
9326 if (stack_realign_drap
)
9328 /* Assign DRAP to vDRAP and returns vDRAP */
9329 unsigned int regno
= find_drap_reg ();
9334 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9335 crtl
->drap_reg
= arg_ptr
;
9338 drap_vreg
= copy_to_reg (arg_ptr
);
9342 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9345 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9346 RTX_FRAME_RELATED_P (insn
) = 1;
9354 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9357 ix86_internal_arg_pointer (void)
9359 return virtual_incoming_args_rtx
;
9362 struct scratch_reg
{
9367 /* Return a short-lived scratch register for use on function entry.
9368 In 32-bit mode, it is valid only after the registers are saved
9369 in the prologue. This register must be released by means of
9370 release_scratch_register_on_entry once it is dead. */
9373 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9381 /* We always use R11 in 64-bit mode. */
9386 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9388 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9389 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9390 int regparm
= ix86_function_regparm (fntype
, decl
);
9392 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9394 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9395 for the static chain register. */
9396 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9397 && drap_regno
!= AX_REG
)
9399 else if (regparm
< 2 && drap_regno
!= DX_REG
)
9401 /* ecx is the static chain register. */
9402 else if (regparm
< 3 && !fastcall_p
&& !static_chain_p
9403 && drap_regno
!= CX_REG
)
9405 else if (ix86_save_reg (BX_REG
, true))
9407 /* esi is the static chain register. */
9408 else if (!(regparm
== 3 && static_chain_p
)
9409 && ix86_save_reg (SI_REG
, true))
9411 else if (ix86_save_reg (DI_REG
, true))
9415 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9420 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9423 rtx insn
= emit_insn (gen_push (sr
->reg
));
9424 RTX_FRAME_RELATED_P (insn
) = 1;
9428 /* Release a scratch register obtained from the preceding function. */
9431 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9435 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9437 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9438 RTX_FRAME_RELATED_P (insn
) = 1;
9439 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9440 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9441 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9445 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9447 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9450 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9452 /* We skip the probe for the first interval + a small dope of 4 words and
9453 probe that many bytes past the specified size to maintain a protection
9454 area at the botton of the stack. */
9455 const int dope
= 4 * UNITS_PER_WORD
;
9456 rtx size_rtx
= GEN_INT (size
), last
;
9458 /* See if we have a constant small number of probes to generate. If so,
9459 that's the easy case. The run-time loop is made up of 11 insns in the
9460 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9461 for n # of intervals. */
9462 if (size
<= 5 * PROBE_INTERVAL
)
9464 HOST_WIDE_INT i
, adjust
;
9465 bool first_probe
= true;
9467 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9468 values of N from 1 until it exceeds SIZE. If only one probe is
9469 needed, this will not generate any code. Then adjust and probe
9470 to PROBE_INTERVAL + SIZE. */
9471 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9475 adjust
= 2 * PROBE_INTERVAL
+ dope
;
9476 first_probe
= false;
9479 adjust
= PROBE_INTERVAL
;
9481 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9482 plus_constant (stack_pointer_rtx
, -adjust
)));
9483 emit_stack_probe (stack_pointer_rtx
);
9487 adjust
= size
+ PROBE_INTERVAL
+ dope
;
9489 adjust
= size
+ PROBE_INTERVAL
- i
;
9491 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9492 plus_constant (stack_pointer_rtx
, -adjust
)));
9493 emit_stack_probe (stack_pointer_rtx
);
9495 /* Adjust back to account for the additional first interval. */
9496 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9497 plus_constant (stack_pointer_rtx
,
9498 PROBE_INTERVAL
+ dope
)));
9501 /* Otherwise, do the same as above, but in a loop. Note that we must be
9502 extra careful with variables wrapping around because we might be at
9503 the very top (or the very bottom) of the address space and we have
9504 to be able to handle this case properly; in particular, we use an
9505 equality test for the loop condition. */
9508 HOST_WIDE_INT rounded_size
;
9509 struct scratch_reg sr
;
9511 get_scratch_register_on_entry (&sr
);
9514 /* Step 1: round SIZE to the previous multiple of the interval. */
9516 rounded_size
= size
& -PROBE_INTERVAL
;
9519 /* Step 2: compute initial and final value of the loop counter. */
9521 /* SP = SP_0 + PROBE_INTERVAL. */
9522 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9523 plus_constant (stack_pointer_rtx
,
9524 - (PROBE_INTERVAL
+ dope
))));
9526 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9527 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
9528 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
9529 gen_rtx_PLUS (Pmode
, sr
.reg
,
9530 stack_pointer_rtx
)));
9535 while (SP != LAST_ADDR)
9537 SP = SP + PROBE_INTERVAL
9541 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9542 values of N from 1 until it is equal to ROUNDED_SIZE. */
9544 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
9547 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9548 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9550 if (size
!= rounded_size
)
9552 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9553 plus_constant (stack_pointer_rtx
,
9554 rounded_size
- size
)));
9555 emit_stack_probe (stack_pointer_rtx
);
9558 /* Adjust back to account for the additional first interval. */
9559 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9560 plus_constant (stack_pointer_rtx
,
9561 PROBE_INTERVAL
+ dope
)));
9563 release_scratch_register_on_entry (&sr
);
9566 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
9568 /* Even if the stack pointer isn't the CFA register, we need to correctly
9569 describe the adjustments made to it, in particular differentiate the
9570 frame-related ones from the frame-unrelated ones. */
9573 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
9574 XVECEXP (expr
, 0, 0)
9575 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9576 plus_constant (stack_pointer_rtx
, -size
));
9577 XVECEXP (expr
, 0, 1)
9578 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9579 plus_constant (stack_pointer_rtx
,
9580 PROBE_INTERVAL
+ dope
+ size
));
9581 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
9582 RTX_FRAME_RELATED_P (last
) = 1;
9584 cfun
->machine
->fs
.sp_offset
+= size
;
9587 /* Make sure nothing is scheduled before we are done. */
9588 emit_insn (gen_blockage ());
9591 /* Adjust the stack pointer up to REG while probing it. */
9594 output_adjust_stack_and_probe (rtx reg
)
9596 static int labelno
= 0;
9597 char loop_lab
[32], end_lab
[32];
9600 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
9601 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
9603 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
9605 /* Jump to END_LAB if SP == LAST_ADDR. */
9606 xops
[0] = stack_pointer_rtx
;
9608 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
9609 fputs ("\tje\t", asm_out_file
);
9610 assemble_name_raw (asm_out_file
, end_lab
);
9611 fputc ('\n', asm_out_file
);
9613 /* SP = SP + PROBE_INTERVAL. */
9614 xops
[1] = GEN_INT (PROBE_INTERVAL
);
9615 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
9618 xops
[1] = const0_rtx
;
9619 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
9621 fprintf (asm_out_file
, "\tjmp\t");
9622 assemble_name_raw (asm_out_file
, loop_lab
);
9623 fputc ('\n', asm_out_file
);
9625 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
9630 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9631 inclusive. These are offsets from the current stack pointer. */
9634 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
9636 /* See if we have a constant small number of probes to generate. If so,
9637 that's the easy case. The run-time loop is made up of 7 insns in the
9638 generic case while the compile-time loop is made up of n insns for n #
9640 if (size
<= 7 * PROBE_INTERVAL
)
9644 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9645 it exceeds SIZE. If only one probe is needed, this will not
9646 generate any code. Then probe at FIRST + SIZE. */
9647 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9648 emit_stack_probe (plus_constant (stack_pointer_rtx
, -(first
+ i
)));
9650 emit_stack_probe (plus_constant (stack_pointer_rtx
, -(first
+ size
)));
9653 /* Otherwise, do the same as above, but in a loop. Note that we must be
9654 extra careful with variables wrapping around because we might be at
9655 the very top (or the very bottom) of the address space and we have
9656 to be able to handle this case properly; in particular, we use an
9657 equality test for the loop condition. */
9660 HOST_WIDE_INT rounded_size
, last
;
9661 struct scratch_reg sr
;
9663 get_scratch_register_on_entry (&sr
);
9666 /* Step 1: round SIZE to the previous multiple of the interval. */
9668 rounded_size
= size
& -PROBE_INTERVAL
;
9671 /* Step 2: compute initial and final value of the loop counter. */
9673 /* TEST_OFFSET = FIRST. */
9674 emit_move_insn (sr
.reg
, GEN_INT (-first
));
9676 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9677 last
= first
+ rounded_size
;
9682 while (TEST_ADDR != LAST_ADDR)
9684 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9688 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9689 until it is equal to ROUNDED_SIZE. */
9691 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
9694 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9695 that SIZE is equal to ROUNDED_SIZE. */
9697 if (size
!= rounded_size
)
9698 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode
,
9701 rounded_size
- size
));
9703 release_scratch_register_on_entry (&sr
);
9706 /* Make sure nothing is scheduled before we are done. */
9707 emit_insn (gen_blockage ());
9710 /* Probe a range of stack addresses from REG to END, inclusive. These are
9711 offsets from the current stack pointer. */
9714 output_probe_stack_range (rtx reg
, rtx end
)
9716 static int labelno
= 0;
9717 char loop_lab
[32], end_lab
[32];
9720 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
9721 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
9723 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
9725 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9728 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
9729 fputs ("\tje\t", asm_out_file
);
9730 assemble_name_raw (asm_out_file
, end_lab
);
9731 fputc ('\n', asm_out_file
);
9733 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9734 xops
[1] = GEN_INT (PROBE_INTERVAL
);
9735 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
9737 /* Probe at TEST_ADDR. */
9738 xops
[0] = stack_pointer_rtx
;
9740 xops
[2] = const0_rtx
;
9741 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
9743 fprintf (asm_out_file
, "\tjmp\t");
9744 assemble_name_raw (asm_out_file
, loop_lab
);
9745 fputc ('\n', asm_out_file
);
9747 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
9752 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9753 to be generated in correct form. */
9755 ix86_finalize_stack_realign_flags (void)
9757 /* Check if stack realign is really needed after reload, and
9758 stores result in cfun */
9759 unsigned int incoming_stack_boundary
9760 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
9761 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
9762 unsigned int stack_realign
= (incoming_stack_boundary
9763 < (current_function_is_leaf
9764 ? crtl
->max_used_stack_slot_alignment
9765 : crtl
->stack_alignment_needed
));
9767 if (crtl
->stack_realign_finalized
)
9769 /* After stack_realign_needed is finalized, we can't no longer
9771 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
9775 crtl
->stack_realign_needed
= stack_realign
;
9776 crtl
->stack_realign_finalized
= true;
9780 /* Expand the prologue into a bunch of separate insns. */
9783 ix86_expand_prologue (void)
9785 struct machine_function
*m
= cfun
->machine
;
9788 struct ix86_frame frame
;
9789 HOST_WIDE_INT allocate
;
9790 bool int_registers_saved
;
9792 ix86_finalize_stack_realign_flags ();
9794 /* DRAP should not coexist with stack_realign_fp */
9795 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
9797 memset (&m
->fs
, 0, sizeof (m
->fs
));
9799 /* Initialize CFA state for before the prologue. */
9800 m
->fs
.cfa_reg
= stack_pointer_rtx
;
9801 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
9803 /* Track SP offset to the CFA. We continue tracking this after we've
9804 swapped the CFA register away from SP. In the case of re-alignment
9805 this is fudged; we're interested to offsets within the local frame. */
9806 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
9807 m
->fs
.sp_valid
= true;
9809 ix86_compute_frame_layout (&frame
);
9811 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
9813 /* We should have already generated an error for any use of
9814 ms_hook on a nested function. */
9815 gcc_checking_assert (!ix86_static_chain_on_stack
);
9817 /* Check if profiling is active and we shall use profiling before
9818 prologue variant. If so sorry. */
9819 if (crtl
->profile
&& flag_fentry
!= 0)
9820 sorry ("ms_hook_prologue attribute isn%'t compatible "
9821 "with -mfentry for 32-bit");
9823 /* In ix86_asm_output_function_label we emitted:
9824 8b ff movl.s %edi,%edi
9826 8b ec movl.s %esp,%ebp
9828 This matches the hookable function prologue in Win32 API
9829 functions in Microsoft Windows XP Service Pack 2 and newer.
9830 Wine uses this to enable Windows apps to hook the Win32 API
9831 functions provided by Wine.
9833 What that means is that we've already set up the frame pointer. */
9835 if (frame_pointer_needed
9836 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
9840 /* We've decided to use the frame pointer already set up.
9841 Describe this to the unwinder by pretending that both
9842 push and mov insns happen right here.
9844 Putting the unwind info here at the end of the ms_hook
9845 is done so that we can make absolutely certain we get
9846 the required byte sequence at the start of the function,
9847 rather than relying on an assembler that can produce
9848 the exact encoding required.
9850 However it does mean (in the unpatched case) that we have
9851 a 1 insn window where the asynchronous unwind info is
9852 incorrect. However, if we placed the unwind info at
9853 its correct location we would have incorrect unwind info
9854 in the patched case. Which is probably all moot since
9855 I don't expect Wine generates dwarf2 unwind info for the
9856 system libraries that use this feature. */
9858 insn
= emit_insn (gen_blockage ());
9860 push
= gen_push (hard_frame_pointer_rtx
);
9861 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
9863 RTX_FRAME_RELATED_P (push
) = 1;
9864 RTX_FRAME_RELATED_P (mov
) = 1;
9866 RTX_FRAME_RELATED_P (insn
) = 1;
9867 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
9868 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
9870 /* Note that gen_push incremented m->fs.cfa_offset, even
9871 though we didn't emit the push insn here. */
9872 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
9873 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
9874 m
->fs
.fp_valid
= true;
9878 /* The frame pointer is not needed so pop %ebp again.
9879 This leaves us with a pristine state. */
9880 emit_insn (gen_pop (hard_frame_pointer_rtx
));
9884 /* The first insn of a function that accepts its static chain on the
9885 stack is to push the register that would be filled in by a direct
9886 call. This insn will be skipped by the trampoline. */
9887 else if (ix86_static_chain_on_stack
)
9889 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
9890 emit_insn (gen_blockage ());
9892 /* We don't want to interpret this push insn as a register save,
9893 only as a stack adjustment. The real copy of the register as
9894 a save will be done later, if needed. */
9895 t
= plus_constant (stack_pointer_rtx
, -UNITS_PER_WORD
);
9896 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
9897 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
9898 RTX_FRAME_RELATED_P (insn
) = 1;
9901 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9902 of DRAP is needed and stack realignment is really needed after reload */
9903 if (stack_realign_drap
)
9905 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
9907 /* Only need to push parameter pointer reg if it is caller saved. */
9908 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
9910 /* Push arg pointer reg */
9911 insn
= emit_insn (gen_push (crtl
->drap_reg
));
9912 RTX_FRAME_RELATED_P (insn
) = 1;
9915 /* Grab the argument pointer. */
9916 t
= plus_constant (stack_pointer_rtx
, m
->fs
.sp_offset
);
9917 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
9918 RTX_FRAME_RELATED_P (insn
) = 1;
9919 m
->fs
.cfa_reg
= crtl
->drap_reg
;
9920 m
->fs
.cfa_offset
= 0;
9922 /* Align the stack. */
9923 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
9925 GEN_INT (-align_bytes
)));
9926 RTX_FRAME_RELATED_P (insn
) = 1;
9928 /* Replicate the return address on the stack so that return
9929 address can be reached via (argp - 1) slot. This is needed
9930 to implement macro RETURN_ADDR_RTX and intrinsic function
9931 expand_builtin_return_addr etc. */
9932 t
= plus_constant (crtl
->drap_reg
, -UNITS_PER_WORD
);
9933 t
= gen_frame_mem (Pmode
, t
);
9934 insn
= emit_insn (gen_push (t
));
9935 RTX_FRAME_RELATED_P (insn
) = 1;
9937 /* For the purposes of frame and register save area addressing,
9938 we've started over with a new frame. */
9939 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
9940 m
->fs
.realigned
= true;
9943 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
9945 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9946 slower on all targets. Also sdb doesn't like it. */
9947 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
9948 RTX_FRAME_RELATED_P (insn
) = 1;
9950 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
9952 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
9953 RTX_FRAME_RELATED_P (insn
) = 1;
9955 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
9956 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
9957 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
9958 m
->fs
.fp_valid
= true;
9962 int_registers_saved
= (frame
.nregs
== 0);
9964 if (!int_registers_saved
)
9966 /* If saving registers via PUSH, do so now. */
9967 if (!frame
.save_regs_using_mov
)
9969 ix86_emit_save_regs ();
9970 int_registers_saved
= true;
9971 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
9974 /* When using red zone we may start register saving before allocating
9975 the stack frame saving one cycle of the prologue. However, avoid
9976 doing this if we have to probe the stack; at least on x86_64 the
9977 stack probe can turn into a call that clobbers a red zone location. */
9978 else if (ix86_using_red_zone ()
9979 && (! TARGET_STACK_PROBE
9980 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
9982 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
9983 int_registers_saved
= true;
9987 if (stack_realign_fp
)
9989 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
9990 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
9992 /* The computation of the size of the re-aligned stack frame means
9993 that we must allocate the size of the register save area before
9994 performing the actual alignment. Otherwise we cannot guarantee
9995 that there's enough storage above the realignment point. */
9996 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
9997 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
9998 GEN_INT (m
->fs
.sp_offset
9999 - frame
.sse_reg_save_offset
),
10002 /* Align the stack. */
10003 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10005 GEN_INT (-align_bytes
)));
10007 /* For the purposes of register save area addressing, the stack
10008 pointer is no longer valid. As for the value of sp_offset,
10009 see ix86_compute_frame_layout, which we need to match in order
10010 to pass verification of stack_pointer_offset at the end. */
10011 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10012 m
->fs
.sp_valid
= false;
10015 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10017 if (flag_stack_usage_info
)
10019 /* We start to count from ARG_POINTER. */
10020 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10022 /* If it was realigned, take into account the fake frame. */
10023 if (stack_realign_drap
)
10025 if (ix86_static_chain_on_stack
)
10026 stack_size
+= UNITS_PER_WORD
;
10028 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10029 stack_size
+= UNITS_PER_WORD
;
10031 /* This over-estimates by 1 minimal-stack-alignment-unit but
10032 mitigates that by counting in the new return address slot. */
10033 current_function_dynamic_stack_size
10034 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10037 current_function_static_stack_size
= stack_size
;
10040 /* The stack has already been decremented by the instruction calling us
10041 so probe if the size is non-negative to preserve the protection area. */
10042 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10044 /* We expect the registers to be saved when probes are used. */
10045 gcc_assert (int_registers_saved
);
10047 if (STACK_CHECK_MOVING_SP
)
10049 ix86_adjust_stack_and_probe (allocate
);
10054 HOST_WIDE_INT size
= allocate
;
10056 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10057 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10059 if (TARGET_STACK_PROBE
)
10060 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10062 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10068 else if (!ix86_target_stack_probe ()
10069 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10071 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10072 GEN_INT (-allocate
), -1,
10073 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10077 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10079 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10081 bool eax_live
= false;
10082 bool r10_live
= false;
10085 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10086 if (!TARGET_64BIT_MS_ABI
)
10087 eax_live
= ix86_eax_live_at_start_p ();
10091 emit_insn (gen_push (eax
));
10092 allocate
-= UNITS_PER_WORD
;
10096 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10097 emit_insn (gen_push (r10
));
10098 allocate
-= UNITS_PER_WORD
;
10101 emit_move_insn (eax
, GEN_INT (allocate
));
10102 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10104 /* Use the fact that AX still contains ALLOCATE. */
10105 adjust_stack_insn
= (TARGET_64BIT
10106 ? gen_pro_epilogue_adjust_stack_di_sub
10107 : gen_pro_epilogue_adjust_stack_si_sub
);
10109 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10110 stack_pointer_rtx
, eax
));
10112 /* Note that SEH directives need to continue tracking the stack
10113 pointer even after the frame pointer has been set up. */
10114 if (m
->fs
.cfa_reg
== stack_pointer_rtx
|| TARGET_SEH
)
10116 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10117 m
->fs
.cfa_offset
+= allocate
;
10119 RTX_FRAME_RELATED_P (insn
) = 1;
10120 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10121 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10122 plus_constant (stack_pointer_rtx
,
10125 m
->fs
.sp_offset
+= allocate
;
10127 if (r10_live
&& eax_live
)
10129 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10130 emit_move_insn (r10
, gen_frame_mem (Pmode
, t
));
10131 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10132 emit_move_insn (eax
, gen_frame_mem (Pmode
, t
));
10134 else if (eax_live
|| r10_live
)
10136 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10137 emit_move_insn ((eax_live
? eax
: r10
), gen_frame_mem (Pmode
, t
));
10140 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10142 /* If we havn't already set up the frame pointer, do so now. */
10143 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10145 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10146 GEN_INT (frame
.stack_pointer_offset
10147 - frame
.hard_frame_pointer_offset
));
10148 insn
= emit_insn (insn
);
10149 RTX_FRAME_RELATED_P (insn
) = 1;
10150 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10152 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10153 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10154 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10155 m
->fs
.fp_valid
= true;
10158 if (!int_registers_saved
)
10159 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10160 if (frame
.nsseregs
)
10161 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10163 pic_reg_used
= false;
10164 if (pic_offset_table_rtx
10165 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10168 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10170 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10171 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10173 pic_reg_used
= true;
10180 if (ix86_cmodel
== CM_LARGE_PIC
)
10182 rtx tmp_reg
= gen_rtx_REG (DImode
, R11_REG
);
10183 rtx label
= gen_label_rtx ();
10184 emit_label (label
);
10185 LABEL_PRESERVE_P (label
) = 1;
10186 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10187 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
, label
));
10188 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10189 insn
= emit_insn (gen_adddi3 (pic_offset_table_rtx
,
10190 pic_offset_table_rtx
, tmp_reg
));
10193 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10197 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10198 RTX_FRAME_RELATED_P (insn
) = 1;
10199 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10203 /* In the pic_reg_used case, make sure that the got load isn't deleted
10204 when mcount needs it. Blockage to avoid call movement across mcount
10205 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10207 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10208 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10210 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10212 /* vDRAP is setup but after reload it turns out stack realign
10213 isn't necessary, here we will emit prologue to setup DRAP
10214 without stack realign adjustment */
10215 t
= choose_baseaddr (0);
10216 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10219 /* Prevent instructions from being scheduled into register save push
10220 sequence when access to the redzone area is done through frame pointer.
10221 The offset between the frame pointer and the stack pointer is calculated
10222 relative to the value of the stack pointer at the end of the function
10223 prologue, and moving instructions that access redzone area via frame
10224 pointer inside push sequence violates this assumption. */
10225 if (frame_pointer_needed
&& frame
.red_zone_size
)
10226 emit_insn (gen_memory_blockage ());
10228 /* Emit cld instruction if stringops are used in the function. */
10229 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10230 emit_insn (gen_cld ());
10232 /* SEH requires that the prologue end within 256 bytes of the start of
10233 the function. Prevent instruction schedules that would extend that. */
10235 emit_insn (gen_blockage ());
10238 /* Emit code to restore REG using a POP insn. */
10241 ix86_emit_restore_reg_using_pop (rtx reg
)
10243 struct machine_function
*m
= cfun
->machine
;
10244 rtx insn
= emit_insn (gen_pop (reg
));
10246 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10247 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10249 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10250 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10252 /* Previously we'd represented the CFA as an expression
10253 like *(%ebp - 8). We've just popped that value from
10254 the stack, which means we need to reset the CFA to
10255 the drap register. This will remain until we restore
10256 the stack pointer. */
10257 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10258 RTX_FRAME_RELATED_P (insn
) = 1;
10260 /* This means that the DRAP register is valid for addressing too. */
10261 m
->fs
.drap_valid
= true;
10265 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10267 rtx x
= plus_constant (stack_pointer_rtx
, UNITS_PER_WORD
);
10268 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10269 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10270 RTX_FRAME_RELATED_P (insn
) = 1;
10272 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10275 /* When the frame pointer is the CFA, and we pop it, we are
10276 swapping back to the stack pointer as the CFA. This happens
10277 for stack frames that don't allocate other data, so we assume
10278 the stack pointer is now pointing at the return address, i.e.
10279 the function entry state, which makes the offset be 1 word. */
10280 if (reg
== hard_frame_pointer_rtx
)
10282 m
->fs
.fp_valid
= false;
10283 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10285 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10286 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10288 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10289 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10290 GEN_INT (m
->fs
.cfa_offset
)));
10291 RTX_FRAME_RELATED_P (insn
) = 1;
10296 /* Emit code to restore saved registers using POP insns. */
10299 ix86_emit_restore_regs_using_pop (void)
10301 unsigned int regno
;
10303 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10304 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10305 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode
, regno
));
10308 /* Emit code and notes for the LEAVE instruction. */
10311 ix86_emit_leave (void)
10313 struct machine_function
*m
= cfun
->machine
;
10314 rtx insn
= emit_insn (ix86_gen_leave ());
10316 ix86_add_queued_cfa_restore_notes (insn
);
10318 gcc_assert (m
->fs
.fp_valid
);
10319 m
->fs
.sp_valid
= true;
10320 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10321 m
->fs
.fp_valid
= false;
10323 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10325 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10326 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10328 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10329 plus_constant (stack_pointer_rtx
, m
->fs
.sp_offset
));
10330 RTX_FRAME_RELATED_P (insn
) = 1;
10331 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10336 /* Emit code to restore saved registers using MOV insns.
10337 First register is restored from CFA - CFA_OFFSET. */
10339 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10340 bool maybe_eh_return
)
10342 struct machine_function
*m
= cfun
->machine
;
10343 unsigned int regno
;
10345 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10346 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10348 rtx reg
= gen_rtx_REG (Pmode
, regno
);
10351 mem
= choose_baseaddr (cfa_offset
);
10352 mem
= gen_frame_mem (Pmode
, mem
);
10353 insn
= emit_move_insn (reg
, mem
);
10355 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10357 /* Previously we'd represented the CFA as an expression
10358 like *(%ebp - 8). We've just popped that value from
10359 the stack, which means we need to reset the CFA to
10360 the drap register. This will remain until we restore
10361 the stack pointer. */
10362 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10363 RTX_FRAME_RELATED_P (insn
) = 1;
10365 /* This means that the DRAP register is valid for addressing. */
10366 m
->fs
.drap_valid
= true;
10369 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10371 cfa_offset
-= UNITS_PER_WORD
;
10375 /* Emit code to restore saved registers using MOV insns.
10376 First register is restored from CFA - CFA_OFFSET. */
10378 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10379 bool maybe_eh_return
)
10381 unsigned int regno
;
10383 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10384 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10386 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
10389 mem
= choose_baseaddr (cfa_offset
);
10390 mem
= gen_rtx_MEM (V4SFmode
, mem
);
10391 set_mem_align (mem
, 128);
10392 emit_move_insn (reg
, mem
);
10394 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10400 /* Restore function stack, frame, and registers. */
10403 ix86_expand_epilogue (int style
)
10405 struct machine_function
*m
= cfun
->machine
;
10406 struct machine_frame_state frame_state_save
= m
->fs
;
10407 struct ix86_frame frame
;
10408 bool restore_regs_via_mov
;
10411 ix86_finalize_stack_realign_flags ();
10412 ix86_compute_frame_layout (&frame
);
10414 m
->fs
.sp_valid
= (!frame_pointer_needed
10415 || (current_function_sp_is_unchanging
10416 && !stack_realign_fp
));
10417 gcc_assert (!m
->fs
.sp_valid
10418 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10420 /* The FP must be valid if the frame pointer is present. */
10421 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
10422 gcc_assert (!m
->fs
.fp_valid
10423 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
10425 /* We must have *some* valid pointer to the stack frame. */
10426 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
10428 /* The DRAP is never valid at this point. */
10429 gcc_assert (!m
->fs
.drap_valid
);
10431 /* See the comment about red zone and frame
10432 pointer usage in ix86_expand_prologue. */
10433 if (frame_pointer_needed
&& frame
.red_zone_size
)
10434 emit_insn (gen_memory_blockage ());
10436 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
10437 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
10439 /* Determine the CFA offset of the end of the red-zone. */
10440 m
->fs
.red_zone_offset
= 0;
10441 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
10443 /* The red-zone begins below the return address. */
10444 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
10446 /* When the register save area is in the aligned portion of
10447 the stack, determine the maximum runtime displacement that
10448 matches up with the aligned frame. */
10449 if (stack_realign_drap
)
10450 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
10454 /* Special care must be taken for the normal return case of a function
10455 using eh_return: the eax and edx registers are marked as saved, but
10456 not restored along this path. Adjust the save location to match. */
10457 if (crtl
->calls_eh_return
&& style
!= 2)
10458 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
10460 /* EH_RETURN requires the use of moves to function properly. */
10461 if (crtl
->calls_eh_return
)
10462 restore_regs_via_mov
= true;
10463 /* SEH requires the use of pops to identify the epilogue. */
10464 else if (TARGET_SEH
)
10465 restore_regs_via_mov
= false;
10466 /* If we're only restoring one register and sp is not valid then
10467 using a move instruction to restore the register since it's
10468 less work than reloading sp and popping the register. */
10469 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
10470 restore_regs_via_mov
= true;
10471 else if (TARGET_EPILOGUE_USING_MOVE
10472 && cfun
->machine
->use_fast_prologue_epilogue
10473 && (frame
.nregs
> 1
10474 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
10475 restore_regs_via_mov
= true;
10476 else if (frame_pointer_needed
10478 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10479 restore_regs_via_mov
= true;
10480 else if (frame_pointer_needed
10481 && TARGET_USE_LEAVE
10482 && cfun
->machine
->use_fast_prologue_epilogue
10483 && frame
.nregs
== 1)
10484 restore_regs_via_mov
= true;
10486 restore_regs_via_mov
= false;
10488 if (restore_regs_via_mov
|| frame
.nsseregs
)
10490 /* Ensure that the entire register save area is addressable via
10491 the stack pointer, if we will restore via sp. */
10493 && m
->fs
.sp_offset
> 0x7fffffff
10494 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
10495 && (frame
.nsseregs
+ frame
.nregs
) != 0)
10497 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10498 GEN_INT (m
->fs
.sp_offset
10499 - frame
.sse_reg_save_offset
),
10501 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10505 /* If there are any SSE registers to restore, then we have to do it
10506 via moves, since there's obviously no pop for SSE regs. */
10507 if (frame
.nsseregs
)
10508 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
10511 if (restore_regs_via_mov
)
10516 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
10518 /* eh_return epilogues need %ecx added to the stack pointer. */
10521 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
10523 /* Stack align doesn't work with eh_return. */
10524 gcc_assert (!stack_realign_drap
);
10525 /* Neither does regparm nested functions. */
10526 gcc_assert (!ix86_static_chain_on_stack
);
10528 if (frame_pointer_needed
)
10530 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
10531 t
= plus_constant (t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
10532 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
10534 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
10535 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
10537 /* Note that we use SA as a temporary CFA, as the return
10538 address is at the proper place relative to it. We
10539 pretend this happens at the FP restore insn because
10540 prior to this insn the FP would be stored at the wrong
10541 offset relative to SA, and after this insn we have no
10542 other reasonable register to use for the CFA. We don't
10543 bother resetting the CFA to the SP for the duration of
10544 the return insn. */
10545 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10546 plus_constant (sa
, UNITS_PER_WORD
));
10547 ix86_add_queued_cfa_restore_notes (insn
);
10548 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
10549 RTX_FRAME_RELATED_P (insn
) = 1;
10551 m
->fs
.cfa_reg
= sa
;
10552 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10553 m
->fs
.fp_valid
= false;
10555 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
10556 const0_rtx
, style
, false);
10560 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
10561 t
= plus_constant (t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
10562 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
10563 ix86_add_queued_cfa_restore_notes (insn
);
10565 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10566 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
10568 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10569 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10570 plus_constant (stack_pointer_rtx
,
10572 RTX_FRAME_RELATED_P (insn
) = 1;
10575 m
->fs
.sp_offset
= UNITS_PER_WORD
;
10576 m
->fs
.sp_valid
= true;
10581 /* SEH requires that the function end with (1) a stack adjustment
10582 if necessary, (2) a sequence of pops, and (3) a return or
10583 jump instruction. Prevent insns from the function body from
10584 being scheduled into this sequence. */
10587 /* Prevent a catch region from being adjacent to the standard
10588 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10589 several other flags that would be interesting to test are
10591 if (flag_non_call_exceptions
)
10592 emit_insn (gen_nops (const1_rtx
));
10594 emit_insn (gen_blockage ());
10597 /* First step is to deallocate the stack frame so that we can
10598 pop the registers. */
10599 if (!m
->fs
.sp_valid
)
10601 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
10602 GEN_INT (m
->fs
.fp_offset
10603 - frame
.reg_save_offset
),
10606 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10608 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10609 GEN_INT (m
->fs
.sp_offset
10610 - frame
.reg_save_offset
),
10612 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10615 ix86_emit_restore_regs_using_pop ();
10618 /* If we used a stack pointer and haven't already got rid of it,
10620 if (m
->fs
.fp_valid
)
10622 /* If the stack pointer is valid and pointing at the frame
10623 pointer store address, then we only need a pop. */
10624 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
10625 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
10626 /* Leave results in shorter dependency chains on CPUs that are
10627 able to grok it fast. */
10628 else if (TARGET_USE_LEAVE
10629 || optimize_function_for_size_p (cfun
)
10630 || !cfun
->machine
->use_fast_prologue_epilogue
)
10631 ix86_emit_leave ();
10634 pro_epilogue_adjust_stack (stack_pointer_rtx
,
10635 hard_frame_pointer_rtx
,
10636 const0_rtx
, style
, !using_drap
);
10637 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
10643 int param_ptr_offset
= UNITS_PER_WORD
;
10646 gcc_assert (stack_realign_drap
);
10648 if (ix86_static_chain_on_stack
)
10649 param_ptr_offset
+= UNITS_PER_WORD
;
10650 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10651 param_ptr_offset
+= UNITS_PER_WORD
;
10653 insn
= emit_insn (gen_rtx_SET
10654 (VOIDmode
, stack_pointer_rtx
,
10655 gen_rtx_PLUS (Pmode
,
10657 GEN_INT (-param_ptr_offset
))));
10658 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10659 m
->fs
.cfa_offset
= param_ptr_offset
;
10660 m
->fs
.sp_offset
= param_ptr_offset
;
10661 m
->fs
.realigned
= false;
10663 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10664 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10665 GEN_INT (param_ptr_offset
)));
10666 RTX_FRAME_RELATED_P (insn
) = 1;
10668 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10669 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
10672 /* At this point the stack pointer must be valid, and we must have
10673 restored all of the registers. We may not have deallocated the
10674 entire stack frame. We've delayed this until now because it may
10675 be possible to merge the local stack deallocation with the
10676 deallocation forced by ix86_static_chain_on_stack. */
10677 gcc_assert (m
->fs
.sp_valid
);
10678 gcc_assert (!m
->fs
.fp_valid
);
10679 gcc_assert (!m
->fs
.realigned
);
10680 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
10682 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10683 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
10687 /* Sibcall epilogues don't want a return instruction. */
10690 m
->fs
= frame_state_save
;
10694 /* Emit vzeroupper if needed. */
10695 if (TARGET_VZEROUPPER
10696 && !TREE_THIS_VOLATILE (cfun
->decl
)
10697 && !cfun
->machine
->caller_return_avx256_p
)
10698 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256
)));
10700 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
10702 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
10704 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10705 address, do explicit add, and jump indirectly to the caller. */
10707 if (crtl
->args
.pops_args
>= 65536)
10709 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
10712 /* There is no "pascal" calling convention in any 64bit ABI. */
10713 gcc_assert (!TARGET_64BIT
);
10715 insn
= emit_insn (gen_pop (ecx
));
10716 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10717 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10719 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
10720 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
10721 add_reg_note (insn
, REG_CFA_REGISTER
,
10722 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
10723 RTX_FRAME_RELATED_P (insn
) = 1;
10725 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10727 emit_jump_insn (gen_return_indirect_internal (ecx
));
10730 emit_jump_insn (gen_return_pop_internal (popc
));
10733 emit_jump_insn (gen_return_internal ());
10735 /* Restore the state back to the state from the prologue,
10736 so that it's correct for the next epilogue. */
10737 m
->fs
= frame_state_save
;
10740 /* Reset from the function's potential modifications. */
10743 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
10744 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
10746 if (pic_offset_table_rtx
)
10747 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
10749 /* Mach-O doesn't support labels at the end of objects, so if
10750 it looks like we might want one, insert a NOP. */
10752 rtx insn
= get_last_insn ();
10755 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
10756 insn
= PREV_INSN (insn
);
10760 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
10761 fputs ("\tnop\n", file
);
10767 /* Return a scratch register to use in the split stack prologue. The
10768 split stack prologue is used for -fsplit-stack. It is the first
10769 instructions in the function, even before the regular prologue.
10770 The scratch register can be any caller-saved register which is not
10771 used for parameters or for the static chain. */
10773 static unsigned int
10774 split_stack_prologue_scratch_regno (void)
10783 is_fastcall
= (lookup_attribute ("fastcall",
10784 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
10786 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
10790 if (DECL_STATIC_CHAIN (cfun
->decl
))
10792 sorry ("-fsplit-stack does not support fastcall with "
10793 "nested function");
10794 return INVALID_REGNUM
;
10798 else if (regparm
< 3)
10800 if (!DECL_STATIC_CHAIN (cfun
->decl
))
10806 sorry ("-fsplit-stack does not support 2 register "
10807 " parameters for a nested function");
10808 return INVALID_REGNUM
;
10815 /* FIXME: We could make this work by pushing a register
10816 around the addition and comparison. */
10817 sorry ("-fsplit-stack does not support 3 register parameters");
10818 return INVALID_REGNUM
;
10823 /* A SYMBOL_REF for the function which allocates new stackspace for
10826 static GTY(()) rtx split_stack_fn
;
10828 /* A SYMBOL_REF for the more stack function when using the large
10831 static GTY(()) rtx split_stack_fn_large
;
10833 /* Handle -fsplit-stack. These are the first instructions in the
10834 function, even before the regular prologue. */
10837 ix86_expand_split_stack_prologue (void)
10839 struct ix86_frame frame
;
10840 HOST_WIDE_INT allocate
;
10841 unsigned HOST_WIDE_INT args_size
;
10842 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
10843 rtx scratch_reg
= NULL_RTX
;
10844 rtx varargs_label
= NULL_RTX
;
10847 gcc_assert (flag_split_stack
&& reload_completed
);
10849 ix86_finalize_stack_realign_flags ();
10850 ix86_compute_frame_layout (&frame
);
10851 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
10853 /* This is the label we will branch to if we have enough stack
10854 space. We expect the basic block reordering pass to reverse this
10855 branch if optimizing, so that we branch in the unlikely case. */
10856 label
= gen_label_rtx ();
10858 /* We need to compare the stack pointer minus the frame size with
10859 the stack boundary in the TCB. The stack boundary always gives
10860 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10861 can compare directly. Otherwise we need to do an addition. */
10863 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
10864 UNSPEC_STACK_CHECK
);
10865 limit
= gen_rtx_CONST (Pmode
, limit
);
10866 limit
= gen_rtx_MEM (Pmode
, limit
);
10867 if (allocate
< SPLIT_STACK_AVAILABLE
)
10868 current
= stack_pointer_rtx
;
10871 unsigned int scratch_regno
;
10874 /* We need a scratch register to hold the stack pointer minus
10875 the required frame size. Since this is the very start of the
10876 function, the scratch register can be any caller-saved
10877 register which is not used for parameters. */
10878 offset
= GEN_INT (- allocate
);
10879 scratch_regno
= split_stack_prologue_scratch_regno ();
10880 if (scratch_regno
== INVALID_REGNUM
)
10882 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
10883 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
10885 /* We don't use ix86_gen_add3 in this case because it will
10886 want to split to lea, but when not optimizing the insn
10887 will not be split after this point. */
10888 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
10889 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10894 emit_move_insn (scratch_reg
, offset
);
10895 emit_insn (gen_adddi3 (scratch_reg
, scratch_reg
,
10896 stack_pointer_rtx
));
10898 current
= scratch_reg
;
10901 ix86_expand_branch (GEU
, current
, limit
, label
);
10902 jump_insn
= get_last_insn ();
10903 JUMP_LABEL (jump_insn
) = label
;
10905 /* Mark the jump as very likely to be taken. */
10906 add_reg_note (jump_insn
, REG_BR_PROB
,
10907 GEN_INT (REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100));
10909 if (split_stack_fn
== NULL_RTX
)
10910 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
10911 fn
= split_stack_fn
;
10913 /* Get more stack space. We pass in the desired stack space and the
10914 size of the arguments to copy to the new stack. In 32-bit mode
10915 we push the parameters; __morestack will return on a new stack
10916 anyhow. In 64-bit mode we pass the parameters in r10 and
10918 allocate_rtx
= GEN_INT (allocate
);
10919 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
10920 call_fusage
= NULL_RTX
;
10925 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
10926 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
10928 /* If this function uses a static chain, it will be in %r10.
10929 Preserve it across the call to __morestack. */
10930 if (DECL_STATIC_CHAIN (cfun
->decl
))
10934 rax
= gen_rtx_REG (Pmode
, AX_REG
);
10935 emit_move_insn (rax
, reg10
);
10936 use_reg (&call_fusage
, rax
);
10939 if (ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
10941 HOST_WIDE_INT argval
;
10943 /* When using the large model we need to load the address
10944 into a register, and we've run out of registers. So we
10945 switch to a different calling convention, and we call a
10946 different function: __morestack_large. We pass the
10947 argument size in the upper 32 bits of r10 and pass the
10948 frame size in the lower 32 bits. */
10949 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
10950 gcc_assert ((args_size
& 0xffffffff) == args_size
);
10952 if (split_stack_fn_large
== NULL_RTX
)
10953 split_stack_fn_large
=
10954 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
10956 if (ix86_cmodel
== CM_LARGE_PIC
)
10960 label
= gen_label_rtx ();
10961 emit_label (label
);
10962 LABEL_PRESERVE_P (label
) = 1;
10963 emit_insn (gen_set_rip_rex64 (reg10
, label
));
10964 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
10965 emit_insn (gen_adddi3 (reg10
, reg10
, reg11
));
10966 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
10968 x
= gen_rtx_CONST (Pmode
, x
);
10969 emit_move_insn (reg11
, x
);
10970 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
10971 x
= gen_const_mem (Pmode
, x
);
10972 emit_move_insn (reg11
, x
);
10975 emit_move_insn (reg11
, split_stack_fn_large
);
10979 argval
= ((args_size
<< 16) << 16) + allocate
;
10980 emit_move_insn (reg10
, GEN_INT (argval
));
10984 emit_move_insn (reg10
, allocate_rtx
);
10985 emit_move_insn (reg11
, GEN_INT (args_size
));
10986 use_reg (&call_fusage
, reg11
);
10989 use_reg (&call_fusage
, reg10
);
10993 emit_insn (gen_push (GEN_INT (args_size
)));
10994 emit_insn (gen_push (allocate_rtx
));
10996 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
10997 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
10999 add_function_usage_to (call_insn
, call_fusage
);
11001 /* In order to make call/return prediction work right, we now need
11002 to execute a return instruction. See
11003 libgcc/config/i386/morestack.S for the details on how this works.
11005 For flow purposes gcc must not see this as a return
11006 instruction--we need control flow to continue at the subsequent
11007 label. Therefore, we use an unspec. */
11008 gcc_assert (crtl
->args
.pops_args
< 65536);
11009 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11011 /* If we are in 64-bit mode and this function uses a static chain,
11012 we saved %r10 in %rax before calling _morestack. */
11013 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11014 emit_move_insn (gen_rtx_REG (Pmode
, R10_REG
),
11015 gen_rtx_REG (Pmode
, AX_REG
));
11017 /* If this function calls va_start, we need to store a pointer to
11018 the arguments on the old stack, because they may not have been
11019 all copied to the new stack. At this point the old stack can be
11020 found at the frame pointer value used by __morestack, because
11021 __morestack has set that up before calling back to us. Here we
11022 store that pointer in a scratch register, and in
11023 ix86_expand_prologue we store the scratch register in a stack
11025 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11027 unsigned int scratch_regno
;
11031 scratch_regno
= split_stack_prologue_scratch_regno ();
11032 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11033 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11037 return address within this function
11038 return address of caller of this function
11040 So we add three words to get to the stack arguments.
11044 return address within this function
11045 first argument to __morestack
11046 second argument to __morestack
11047 return address of caller of this function
11049 So we add five words to get to the stack arguments.
11051 words
= TARGET_64BIT
? 3 : 5;
11052 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11053 gen_rtx_PLUS (Pmode
, frame_reg
,
11054 GEN_INT (words
* UNITS_PER_WORD
))));
11056 varargs_label
= gen_label_rtx ();
11057 emit_jump_insn (gen_jump (varargs_label
));
11058 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11063 emit_label (label
);
11064 LABEL_NUSES (label
) = 1;
11066 /* If this function calls va_start, we now have to set the scratch
11067 register for the case where we do not call __morestack. In this
11068 case we need to set it based on the stack pointer. */
11069 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11071 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11072 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11073 GEN_INT (UNITS_PER_WORD
))));
11075 emit_label (varargs_label
);
11076 LABEL_NUSES (varargs_label
) = 1;
11080 /* We may have to tell the dataflow pass that the split stack prologue
11081 is initializing a scratch register. */
11084 ix86_live_on_entry (bitmap regs
)
11086 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11088 gcc_assert (flag_split_stack
);
11089 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11093 /* Extract the parts of an RTL expression that is a valid memory address
11094 for an instruction. Return 0 if the structure of the address is
11095 grossly off. Return -1 if the address contains ASHIFT, so it is not
11096 strictly valid, but still used for computing length of lea instruction. */
11099 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11101 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11102 rtx base_reg
, index_reg
;
11103 HOST_WIDE_INT scale
= 1;
11104 rtx scale_rtx
= NULL_RTX
;
11107 enum ix86_address_seg seg
= SEG_DEFAULT
;
11111 else if (GET_CODE (addr
) == SUBREG
)
11113 /* Allow only subregs of DImode hard regs. */
11114 if (register_no_elim_operand (SUBREG_REG (addr
), DImode
))
11119 else if (GET_CODE (addr
) == PLUS
)
11121 rtx addends
[4], op
;
11129 addends
[n
++] = XEXP (op
, 1);
11132 while (GET_CODE (op
) == PLUS
);
11137 for (i
= n
; i
>= 0; --i
)
11140 switch (GET_CODE (op
))
11145 index
= XEXP (op
, 0);
11146 scale_rtx
= XEXP (op
, 1);
11152 index
= XEXP (op
, 0);
11153 tmp
= XEXP (op
, 1);
11154 if (!CONST_INT_P (tmp
))
11156 scale
= INTVAL (tmp
);
11157 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11159 scale
= 1 << scale
;
11163 if (XINT (op
, 1) == UNSPEC_TP
11164 && TARGET_TLS_DIRECT_SEG_REFS
11165 && seg
== SEG_DEFAULT
)
11166 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
11172 /* Allow only subregs of DImode hard regs in PLUS chains. */
11173 if (!register_no_elim_operand (SUBREG_REG (op
), DImode
))
11200 else if (GET_CODE (addr
) == MULT
)
11202 index
= XEXP (addr
, 0); /* index*scale */
11203 scale_rtx
= XEXP (addr
, 1);
11205 else if (GET_CODE (addr
) == ASHIFT
)
11207 /* We're called for lea too, which implements ashift on occasion. */
11208 index
= XEXP (addr
, 0);
11209 tmp
= XEXP (addr
, 1);
11210 if (!CONST_INT_P (tmp
))
11212 scale
= INTVAL (tmp
);
11213 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11215 scale
= 1 << scale
;
11219 disp
= addr
; /* displacement */
11225 /* Allow only subregs of DImode hard regs. */
11226 else if (GET_CODE (index
) == SUBREG
11227 && register_no_elim_operand (SUBREG_REG (index
), DImode
))
11233 /* Extract the integral value of scale. */
11236 if (!CONST_INT_P (scale_rtx
))
11238 scale
= INTVAL (scale_rtx
);
11241 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11242 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11244 /* Avoid useless 0 displacement. */
11245 if (disp
== const0_rtx
&& (base
|| index
))
11248 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11249 if (base_reg
&& index_reg
&& scale
== 1
11250 && (index_reg
== arg_pointer_rtx
11251 || index_reg
== frame_pointer_rtx
11252 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
11255 tmp
= base
, base
= index
, index
= tmp
;
11256 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
11259 /* Special case: %ebp cannot be encoded as a base without a displacement.
11263 && (base_reg
== hard_frame_pointer_rtx
11264 || base_reg
== frame_pointer_rtx
11265 || base_reg
== arg_pointer_rtx
11266 || (REG_P (base_reg
)
11267 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
11268 || REGNO (base_reg
) == R13_REG
))))
11271 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11272 Avoid this by transforming to [%esi+0].
11273 Reload calls address legitimization without cfun defined, so we need
11274 to test cfun for being non-NULL. */
11275 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
11276 && base_reg
&& !index_reg
&& !disp
11277 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
11280 /* Special case: encode reg+reg instead of reg*2. */
11281 if (!base
&& index
&& scale
== 2)
11282 base
= index
, base_reg
= index_reg
, scale
= 1;
11284 /* Special case: scaling cannot be encoded without base or displacement. */
11285 if (!base
&& !disp
&& index
&& scale
!= 1)
11289 out
->index
= index
;
11291 out
->scale
= scale
;
11297 /* Return cost of the memory address x.
11298 For i386, it is better to use a complex address than let gcc copy
11299 the address into a reg and make a new pseudo. But not if the address
11300 requires to two regs - that would mean more pseudos with longer
11303 ix86_address_cost (rtx x
, bool speed ATTRIBUTE_UNUSED
)
11305 struct ix86_address parts
;
11307 int ok
= ix86_decompose_address (x
, &parts
);
11311 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
11312 parts
.base
= SUBREG_REG (parts
.base
);
11313 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
11314 parts
.index
= SUBREG_REG (parts
.index
);
11316 /* Attempt to minimize number of registers in the address. */
11318 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
11320 && (!REG_P (parts
.index
)
11321 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
11325 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
11327 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
11328 && parts
.base
!= parts
.index
)
11331 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11332 since it's predecode logic can't detect the length of instructions
11333 and it degenerates to vector decoded. Increase cost of such
11334 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11335 to split such addresses or even refuse such addresses at all.
11337 Following addressing modes are affected:
11342 The first and last case may be avoidable by explicitly coding the zero in
11343 memory address, but I don't have AMD-K6 machine handy to check this
11347 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11348 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11349 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
11355 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11356 this is used for to form addresses to local data when -fPIC is in
11360 darwin_local_data_pic (rtx disp
)
11362 return (GET_CODE (disp
) == UNSPEC
11363 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
11366 /* Determine if a given RTX is a valid constant. We already know this
11367 satisfies CONSTANT_P. */
11370 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
11372 switch (GET_CODE (x
))
11377 if (GET_CODE (x
) == PLUS
)
11379 if (!CONST_INT_P (XEXP (x
, 1)))
11384 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
11387 /* Only some unspecs are valid as "constants". */
11388 if (GET_CODE (x
) == UNSPEC
)
11389 switch (XINT (x
, 1))
11392 case UNSPEC_GOTOFF
:
11393 case UNSPEC_PLTOFF
:
11394 return TARGET_64BIT
;
11396 case UNSPEC_NTPOFF
:
11397 x
= XVECEXP (x
, 0, 0);
11398 return (GET_CODE (x
) == SYMBOL_REF
11399 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11400 case UNSPEC_DTPOFF
:
11401 x
= XVECEXP (x
, 0, 0);
11402 return (GET_CODE (x
) == SYMBOL_REF
11403 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
11408 /* We must have drilled down to a symbol. */
11409 if (GET_CODE (x
) == LABEL_REF
)
11411 if (GET_CODE (x
) != SYMBOL_REF
)
11416 /* TLS symbols are never valid. */
11417 if (SYMBOL_REF_TLS_MODEL (x
))
11420 /* DLLIMPORT symbols are never valid. */
11421 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11422 && SYMBOL_REF_DLLIMPORT_P (x
))
11426 /* mdynamic-no-pic */
11427 if (MACHO_DYNAMIC_NO_PIC_P
)
11428 return machopic_symbol_defined_p (x
);
11433 if (GET_MODE (x
) == TImode
11434 && x
!= CONST0_RTX (TImode
)
11440 if (!standard_sse_constant_p (x
))
11447 /* Otherwise we handle everything else in the move patterns. */
11451 /* Determine if it's legal to put X into the constant pool. This
11452 is not possible for the address of thread-local symbols, which
11453 is checked above. */
11456 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
11458 /* We can always put integral constants and vectors in memory. */
11459 switch (GET_CODE (x
))
11469 return !ix86_legitimate_constant_p (mode
, x
);
11473 /* Nonzero if the constant value X is a legitimate general operand
11474 when generating PIC code. It is given that flag_pic is on and
11475 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11478 legitimate_pic_operand_p (rtx x
)
11482 switch (GET_CODE (x
))
11485 inner
= XEXP (x
, 0);
11486 if (GET_CODE (inner
) == PLUS
11487 && CONST_INT_P (XEXP (inner
, 1)))
11488 inner
= XEXP (inner
, 0);
11490 /* Only some unspecs are valid as "constants". */
11491 if (GET_CODE (inner
) == UNSPEC
)
11492 switch (XINT (inner
, 1))
11495 case UNSPEC_GOTOFF
:
11496 case UNSPEC_PLTOFF
:
11497 return TARGET_64BIT
;
11499 x
= XVECEXP (inner
, 0, 0);
11500 return (GET_CODE (x
) == SYMBOL_REF
11501 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11502 case UNSPEC_MACHOPIC_OFFSET
:
11503 return legitimate_pic_address_disp_p (x
);
11511 return legitimate_pic_address_disp_p (x
);
11518 /* Determine if a given CONST RTX is a valid memory displacement
11522 legitimate_pic_address_disp_p (rtx disp
)
11526 /* In 64bit mode we can allow direct addresses of symbols and labels
11527 when they are not dynamic symbols. */
11530 rtx op0
= disp
, op1
;
11532 switch (GET_CODE (disp
))
11538 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
11540 op0
= XEXP (XEXP (disp
, 0), 0);
11541 op1
= XEXP (XEXP (disp
, 0), 1);
11542 if (!CONST_INT_P (op1
)
11543 || INTVAL (op1
) >= 16*1024*1024
11544 || INTVAL (op1
) < -16*1024*1024)
11546 if (GET_CODE (op0
) == LABEL_REF
)
11548 if (GET_CODE (op0
) != SYMBOL_REF
)
11553 /* TLS references should always be enclosed in UNSPEC. */
11554 if (SYMBOL_REF_TLS_MODEL (op0
))
11556 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
11557 && ix86_cmodel
!= CM_LARGE_PIC
)
11565 if (GET_CODE (disp
) != CONST
)
11567 disp
= XEXP (disp
, 0);
11571 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11572 of GOT tables. We should not need these anyway. */
11573 if (GET_CODE (disp
) != UNSPEC
11574 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
11575 && XINT (disp
, 1) != UNSPEC_GOTOFF
11576 && XINT (disp
, 1) != UNSPEC_PCREL
11577 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
11580 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
11581 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
11587 if (GET_CODE (disp
) == PLUS
)
11589 if (!CONST_INT_P (XEXP (disp
, 1)))
11591 disp
= XEXP (disp
, 0);
11595 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
11598 if (GET_CODE (disp
) != UNSPEC
)
11601 switch (XINT (disp
, 1))
11606 /* We need to check for both symbols and labels because VxWorks loads
11607 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11609 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
11610 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
11611 case UNSPEC_GOTOFF
:
11612 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11613 While ABI specify also 32bit relocation but we don't produce it in
11614 small PIC model at all. */
11615 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
11616 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
11618 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
11620 case UNSPEC_GOTTPOFF
:
11621 case UNSPEC_GOTNTPOFF
:
11622 case UNSPEC_INDNTPOFF
:
11625 disp
= XVECEXP (disp
, 0, 0);
11626 return (GET_CODE (disp
) == SYMBOL_REF
11627 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
11628 case UNSPEC_NTPOFF
:
11629 disp
= XVECEXP (disp
, 0, 0);
11630 return (GET_CODE (disp
) == SYMBOL_REF
11631 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
11632 case UNSPEC_DTPOFF
:
11633 disp
= XVECEXP (disp
, 0, 0);
11634 return (GET_CODE (disp
) == SYMBOL_REF
11635 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
11641 /* Recognizes RTL expressions that are valid memory addresses for an
11642 instruction. The MODE argument is the machine mode for the MEM
11643 expression that wants to use this address.
11645 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11646 convert common non-canonical forms to canonical form so that they will
11650 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
11651 rtx addr
, bool strict
)
11653 struct ix86_address parts
;
11654 rtx base
, index
, disp
;
11655 HOST_WIDE_INT scale
;
11657 if (ix86_decompose_address (addr
, &parts
) <= 0)
11658 /* Decomposition failed. */
11662 index
= parts
.index
;
11664 scale
= parts
.scale
;
11666 /* Validate base register. */
11673 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
11675 reg
= SUBREG_REG (base
);
11676 gcc_assert (register_no_elim_operand (reg
, DImode
));
11679 /* Base is not a register. */
11682 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
11685 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
11686 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
11687 /* Base is not valid. */
11691 /* Validate index register. */
11698 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
11700 reg
= SUBREG_REG (index
);
11701 gcc_assert (register_no_elim_operand (reg
, DImode
));
11704 /* Index is not a register. */
11707 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
11710 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
11711 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
11712 /* Index is not valid. */
11716 /* Index and base should have the same mode. */
11718 && GET_MODE (base
) != GET_MODE (index
))
11721 /* Validate scale factor. */
11725 /* Scale without index. */
11728 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
11729 /* Scale is not a valid multiplier. */
11733 /* Validate displacement. */
11736 if (GET_CODE (disp
) == CONST
11737 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
11738 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
11739 switch (XINT (XEXP (disp
, 0), 1))
11741 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11742 used. While ABI specify also 32bit relocations, we don't produce
11743 them at all and use IP relative instead. */
11745 case UNSPEC_GOTOFF
:
11746 gcc_assert (flag_pic
);
11748 goto is_legitimate_pic
;
11750 /* 64bit address unspec. */
11753 case UNSPEC_GOTPCREL
:
11755 gcc_assert (flag_pic
);
11756 goto is_legitimate_pic
;
11758 case UNSPEC_GOTTPOFF
:
11759 case UNSPEC_GOTNTPOFF
:
11760 case UNSPEC_INDNTPOFF
:
11761 case UNSPEC_NTPOFF
:
11762 case UNSPEC_DTPOFF
:
11765 case UNSPEC_STACK_CHECK
:
11766 gcc_assert (flag_split_stack
);
11770 /* Invalid address unspec. */
11774 else if (SYMBOLIC_CONST (disp
)
11778 && MACHOPIC_INDIRECT
11779 && !machopic_operand_p (disp
)
11785 if (TARGET_64BIT
&& (index
|| base
))
11787 /* foo@dtpoff(%rX) is ok. */
11788 if (GET_CODE (disp
) != CONST
11789 || GET_CODE (XEXP (disp
, 0)) != PLUS
11790 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
11791 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
11792 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
11793 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
11794 /* Non-constant pic memory reference. */
11797 else if ((!TARGET_MACHO
|| flag_pic
)
11798 && ! legitimate_pic_address_disp_p (disp
))
11799 /* Displacement is an invalid pic construct. */
11802 else if (MACHO_DYNAMIC_NO_PIC_P
11803 && !ix86_legitimate_constant_p (Pmode
, disp
))
11804 /* displacment must be referenced via non_lazy_pointer */
11808 /* This code used to verify that a symbolic pic displacement
11809 includes the pic_offset_table_rtx register.
11811 While this is good idea, unfortunately these constructs may
11812 be created by "adds using lea" optimization for incorrect
11821 This code is nonsensical, but results in addressing
11822 GOT table with pic_offset_table_rtx base. We can't
11823 just refuse it easily, since it gets matched by
11824 "addsi3" pattern, that later gets split to lea in the
11825 case output register differs from input. While this
11826 can be handled by separate addsi pattern for this case
11827 that never results in lea, this seems to be easier and
11828 correct fix for crash to disable this test. */
11830 else if (GET_CODE (disp
) != LABEL_REF
11831 && !CONST_INT_P (disp
)
11832 && (GET_CODE (disp
) != CONST
11833 || !ix86_legitimate_constant_p (Pmode
, disp
))
11834 && (GET_CODE (disp
) != SYMBOL_REF
11835 || !ix86_legitimate_constant_p (Pmode
, disp
)))
11836 /* Displacement is not constant. */
11838 else if (TARGET_64BIT
11839 && !x86_64_immediate_operand (disp
, VOIDmode
))
11840 /* Displacement is out of range. */
11844 /* Everything looks valid. */
11848 /* Determine if a given RTX is a valid constant address. */
11851 constant_address_p (rtx x
)
11853 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
11856 /* Return a unique alias set for the GOT. */
11858 static alias_set_type
11859 ix86_GOT_alias_set (void)
11861 static alias_set_type set
= -1;
11863 set
= new_alias_set ();
11867 /* Return a legitimate reference for ORIG (an address) using the
11868 register REG. If REG is 0, a new pseudo is generated.
11870 There are two types of references that must be handled:
11872 1. Global data references must load the address from the GOT, via
11873 the PIC reg. An insn is emitted to do this load, and the reg is
11876 2. Static data references, constant pool addresses, and code labels
11877 compute the address as an offset from the GOT, whose base is in
11878 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11879 differentiate them from global data objects. The returned
11880 address is the PIC reg + an unspec constant.
11882 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11883 reg also appears in the address. */
11886 legitimize_pic_address (rtx orig
, rtx reg
)
11889 rtx new_rtx
= orig
;
11893 if (TARGET_MACHO
&& !TARGET_64BIT
)
11896 reg
= gen_reg_rtx (Pmode
);
11897 /* Use the generic Mach-O PIC machinery. */
11898 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
11902 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
11904 else if (TARGET_64BIT
11905 && ix86_cmodel
!= CM_SMALL_PIC
11906 && gotoff_operand (addr
, Pmode
))
11909 /* This symbol may be referenced via a displacement from the PIC
11910 base address (@GOTOFF). */
11912 if (reload_in_progress
)
11913 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
11914 if (GET_CODE (addr
) == CONST
)
11915 addr
= XEXP (addr
, 0);
11916 if (GET_CODE (addr
) == PLUS
)
11918 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
11920 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
11923 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
11924 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
11926 tmpreg
= gen_reg_rtx (Pmode
);
11929 emit_move_insn (tmpreg
, new_rtx
);
11933 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
11934 tmpreg
, 1, OPTAB_DIRECT
);
11937 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
11939 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
11941 /* This symbol may be referenced via a displacement from the PIC
11942 base address (@GOTOFF). */
11944 if (reload_in_progress
)
11945 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
11946 if (GET_CODE (addr
) == CONST
)
11947 addr
= XEXP (addr
, 0);
11948 if (GET_CODE (addr
) == PLUS
)
11950 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
11952 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
11955 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
11956 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
11957 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
11961 emit_move_insn (reg
, new_rtx
);
11965 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
11966 /* We can't use @GOTOFF for text labels on VxWorks;
11967 see gotoff_operand. */
11968 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
11970 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
11972 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
11973 return legitimize_dllimport_symbol (addr
, true);
11974 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
11975 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
11976 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
11978 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), true);
11979 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
11983 /* For x64 PE-COFF there is no GOT table. So we use address
11985 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
11987 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
11988 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
11991 reg
= gen_reg_rtx (Pmode
);
11992 emit_move_insn (reg
, new_rtx
);
11995 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
11997 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
11998 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
11999 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12000 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12003 reg
= gen_reg_rtx (Pmode
);
12004 /* Use directly gen_movsi, otherwise the address is loaded
12005 into register for CSE. We don't want to CSE this addresses,
12006 instead we CSE addresses from the GOT table, so skip this. */
12007 emit_insn (gen_movsi (reg
, new_rtx
));
12012 /* This symbol must be referenced via a load from the
12013 Global Offset Table (@GOT). */
12015 if (reload_in_progress
)
12016 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12017 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12018 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12020 new_rtx
= force_reg (Pmode
, new_rtx
);
12021 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12022 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12023 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12026 reg
= gen_reg_rtx (Pmode
);
12027 emit_move_insn (reg
, new_rtx
);
12033 if (CONST_INT_P (addr
)
12034 && !x86_64_immediate_operand (addr
, VOIDmode
))
12038 emit_move_insn (reg
, addr
);
12042 new_rtx
= force_reg (Pmode
, addr
);
12044 else if (GET_CODE (addr
) == CONST
)
12046 addr
= XEXP (addr
, 0);
12048 /* We must match stuff we generate before. Assume the only
12049 unspecs that can get here are ours. Not that we could do
12050 anything with them anyway.... */
12051 if (GET_CODE (addr
) == UNSPEC
12052 || (GET_CODE (addr
) == PLUS
12053 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12055 gcc_assert (GET_CODE (addr
) == PLUS
);
12057 if (GET_CODE (addr
) == PLUS
)
12059 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12061 /* Check first to see if this is a constant offset from a @GOTOFF
12062 symbol reference. */
12063 if (gotoff_operand (op0
, Pmode
)
12064 && CONST_INT_P (op1
))
12068 if (reload_in_progress
)
12069 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12070 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12072 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12073 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12074 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12078 emit_move_insn (reg
, new_rtx
);
12084 if (INTVAL (op1
) < -16*1024*1024
12085 || INTVAL (op1
) >= 16*1024*1024)
12087 if (!x86_64_immediate_operand (op1
, Pmode
))
12088 op1
= force_reg (Pmode
, op1
);
12089 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12095 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
12096 new_rtx
= legitimize_pic_address (XEXP (addr
, 1),
12097 base
== reg
? NULL_RTX
: reg
);
12099 if (CONST_INT_P (new_rtx
))
12100 new_rtx
= plus_constant (base
, INTVAL (new_rtx
));
12103 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
12105 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
12106 new_rtx
= XEXP (new_rtx
, 1);
12108 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
12116 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12119 get_thread_pointer (bool to_reg
)
12123 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12127 reg
= gen_reg_rtx (Pmode
);
12128 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
12129 insn
= emit_insn (insn
);
12134 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12136 static GTY(()) rtx ix86_tls_symbol
;
12139 ix86_tls_get_addr (void)
12141 if (!ix86_tls_symbol
)
12144 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
12145 ? "___tls_get_addr" : "__tls_get_addr");
12147 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
12150 return ix86_tls_symbol
;
12153 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12155 static GTY(()) rtx ix86_tls_module_base_symbol
;
12158 ix86_tls_module_base (void)
12160 if (!ix86_tls_module_base_symbol
)
12162 ix86_tls_module_base_symbol
12163 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
12165 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
12166 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
12169 return ix86_tls_module_base_symbol
;
12172 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12173 false if we expect this to be used for a memory address and true if
12174 we expect to load the address into a register. */
12177 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
12179 rtx dest
, base
, off
;
12180 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
12185 case TLS_MODEL_GLOBAL_DYNAMIC
:
12186 dest
= gen_reg_rtx (Pmode
);
12191 pic
= pic_offset_table_rtx
;
12194 pic
= gen_reg_rtx (Pmode
);
12195 emit_insn (gen_set_got (pic
));
12199 if (TARGET_GNU2_TLS
)
12202 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
12204 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
12206 tp
= get_thread_pointer (true);
12207 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
12209 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
12213 rtx caddr
= ix86_tls_get_addr ();
12217 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
;
12220 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
, caddr
));
12221 insns
= get_insns ();
12224 RTL_CONST_CALL_P (insns
) = 1;
12225 emit_libcall_block (insns
, dest
, rax
, x
);
12228 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
12232 case TLS_MODEL_LOCAL_DYNAMIC
:
12233 base
= gen_reg_rtx (Pmode
);
12238 pic
= pic_offset_table_rtx
;
12241 pic
= gen_reg_rtx (Pmode
);
12242 emit_insn (gen_set_got (pic
));
12246 if (TARGET_GNU2_TLS
)
12248 rtx tmp
= ix86_tls_module_base ();
12251 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
12253 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
12255 tp
= get_thread_pointer (true);
12256 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
12257 gen_rtx_MINUS (Pmode
, tmp
, tp
));
12261 rtx caddr
= ix86_tls_get_addr ();
12265 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
, eqv
;
12268 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
, caddr
));
12269 insns
= get_insns ();
12272 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
12273 share the LD_BASE result with other LD model accesses. */
12274 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
12275 UNSPEC_TLS_LD_BASE
);
12277 RTL_CONST_CALL_P (insns
) = 1;
12278 emit_libcall_block (insns
, base
, rax
, eqv
);
12281 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
12284 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
12285 off
= gen_rtx_CONST (Pmode
, off
);
12287 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
12289 if (TARGET_GNU2_TLS
)
12291 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
12293 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
12297 case TLS_MODEL_INITIAL_EXEC
:
12300 if (TARGET_SUN_TLS
)
12302 /* The Sun linker took the AMD64 TLS spec literally
12303 and can only handle %rax as destination of the
12304 initial executable code sequence. */
12306 dest
= gen_reg_rtx (Pmode
);
12307 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
12312 type
= UNSPEC_GOTNTPOFF
;
12316 if (reload_in_progress
)
12317 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12318 pic
= pic_offset_table_rtx
;
12319 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
12321 else if (!TARGET_ANY_GNU_TLS
)
12323 pic
= gen_reg_rtx (Pmode
);
12324 emit_insn (gen_set_got (pic
));
12325 type
= UNSPEC_GOTTPOFF
;
12330 type
= UNSPEC_INDNTPOFF
;
12333 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
12334 off
= gen_rtx_CONST (Pmode
, off
);
12336 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
12337 off
= gen_const_mem (Pmode
, off
);
12338 set_mem_alias_set (off
, ix86_GOT_alias_set ());
12340 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12342 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12343 off
= force_reg (Pmode
, off
);
12344 return gen_rtx_PLUS (Pmode
, base
, off
);
12348 base
= get_thread_pointer (true);
12349 dest
= gen_reg_rtx (Pmode
);
12350 emit_insn (gen_subsi3 (dest
, base
, off
));
12354 case TLS_MODEL_LOCAL_EXEC
:
12355 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
12356 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12357 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
12358 off
= gen_rtx_CONST (Pmode
, off
);
12360 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12362 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12363 return gen_rtx_PLUS (Pmode
, base
, off
);
12367 base
= get_thread_pointer (true);
12368 dest
= gen_reg_rtx (Pmode
);
12369 emit_insn (gen_subsi3 (dest
, base
, off
));
12374 gcc_unreachable ();
12380 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12383 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
12384 htab_t dllimport_map
;
12387 get_dllimport_decl (tree decl
)
12389 struct tree_map
*h
, in
;
12392 const char *prefix
;
12393 size_t namelen
, prefixlen
;
12398 if (!dllimport_map
)
12399 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
12401 in
.hash
= htab_hash_pointer (decl
);
12402 in
.base
.from
= decl
;
12403 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
12404 h
= (struct tree_map
*) *loc
;
12408 *loc
= h
= ggc_alloc_tree_map ();
12410 h
->base
.from
= decl
;
12411 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
12412 VAR_DECL
, NULL
, ptr_type_node
);
12413 DECL_ARTIFICIAL (to
) = 1;
12414 DECL_IGNORED_P (to
) = 1;
12415 DECL_EXTERNAL (to
) = 1;
12416 TREE_READONLY (to
) = 1;
12418 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
12419 name
= targetm
.strip_name_encoding (name
);
12420 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
12421 ? "*__imp_" : "*__imp__";
12422 namelen
= strlen (name
);
12423 prefixlen
= strlen (prefix
);
12424 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
12425 memcpy (imp_name
, prefix
, prefixlen
);
12426 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
12428 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
12429 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
12430 SET_SYMBOL_REF_DECL (rtl
, to
);
12431 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
12433 rtl
= gen_const_mem (Pmode
, rtl
);
12434 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
12436 SET_DECL_RTL (to
, rtl
);
12437 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
12442 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12443 true if we require the result be a register. */
12446 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
12451 gcc_assert (SYMBOL_REF_DECL (symbol
));
12452 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
12454 x
= DECL_RTL (imp_decl
);
12456 x
= force_reg (Pmode
, x
);
12460 /* Try machine-dependent ways of modifying an illegitimate address
12461 to be legitimate. If we find one, return the new, valid address.
12462 This macro is used in only one place: `memory_address' in explow.c.
12464 OLDX is the address as it was before break_out_memory_refs was called.
12465 In some cases it is useful to look at this to decide what needs to be done.
12467 It is always safe for this macro to do nothing. It exists to recognize
12468 opportunities to optimize the output.
12470 For the 80386, we handle X+REG by loading X into a register R and
12471 using R+REG. R will go in a general reg and indexing will be used.
12472 However, if REG is a broken-out memory address or multiplication,
12473 nothing needs to be done because REG can certainly go in a general reg.
12475 When -fpic is used, special handling is needed for symbolic references.
12476 See comments by legitimize_pic_address in i386.c for details. */
12479 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
12480 enum machine_mode mode
)
12485 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
12487 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
12488 if (GET_CODE (x
) == CONST
12489 && GET_CODE (XEXP (x
, 0)) == PLUS
12490 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
12491 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
12493 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
12494 (enum tls_model
) log
, false);
12495 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
12498 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12500 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
12501 return legitimize_dllimport_symbol (x
, true);
12502 if (GET_CODE (x
) == CONST
12503 && GET_CODE (XEXP (x
, 0)) == PLUS
12504 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
12505 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
12507 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
12508 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
12512 if (flag_pic
&& SYMBOLIC_CONST (x
))
12513 return legitimize_pic_address (x
, 0);
12516 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
12517 return machopic_indirect_data_reference (x
, 0);
12520 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12521 if (GET_CODE (x
) == ASHIFT
12522 && CONST_INT_P (XEXP (x
, 1))
12523 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
12526 log
= INTVAL (XEXP (x
, 1));
12527 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
12528 GEN_INT (1 << log
));
12531 if (GET_CODE (x
) == PLUS
)
12533 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12535 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
12536 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
12537 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
12540 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
12541 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
12542 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
12543 GEN_INT (1 << log
));
12546 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
12547 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
12548 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
12551 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
12552 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
12553 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
12554 GEN_INT (1 << log
));
12557 /* Put multiply first if it isn't already. */
12558 if (GET_CODE (XEXP (x
, 1)) == MULT
)
12560 rtx tmp
= XEXP (x
, 0);
12561 XEXP (x
, 0) = XEXP (x
, 1);
12566 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12567 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12568 created by virtual register instantiation, register elimination, and
12569 similar optimizations. */
12570 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
12573 x
= gen_rtx_PLUS (Pmode
,
12574 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
12575 XEXP (XEXP (x
, 1), 0)),
12576 XEXP (XEXP (x
, 1), 1));
12580 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12581 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12582 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
12583 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
12584 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
12585 && CONSTANT_P (XEXP (x
, 1)))
12588 rtx other
= NULL_RTX
;
12590 if (CONST_INT_P (XEXP (x
, 1)))
12592 constant
= XEXP (x
, 1);
12593 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
12595 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
12597 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
12598 other
= XEXP (x
, 1);
12606 x
= gen_rtx_PLUS (Pmode
,
12607 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
12608 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
12609 plus_constant (other
, INTVAL (constant
)));
12613 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
12616 if (GET_CODE (XEXP (x
, 0)) == MULT
)
12619 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
12622 if (GET_CODE (XEXP (x
, 1)) == MULT
)
12625 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
12629 && REG_P (XEXP (x
, 1))
12630 && REG_P (XEXP (x
, 0)))
12633 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
12636 x
= legitimize_pic_address (x
, 0);
12639 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
12642 if (REG_P (XEXP (x
, 0)))
12644 rtx temp
= gen_reg_rtx (Pmode
);
12645 rtx val
= force_operand (XEXP (x
, 1), temp
);
12648 if (GET_MODE (val
) != Pmode
)
12649 val
= convert_to_mode (Pmode
, val
, 1);
12650 emit_move_insn (temp
, val
);
12653 XEXP (x
, 1) = temp
;
12657 else if (REG_P (XEXP (x
, 1)))
12659 rtx temp
= gen_reg_rtx (Pmode
);
12660 rtx val
= force_operand (XEXP (x
, 0), temp
);
12663 if (GET_MODE (val
) != Pmode
)
12664 val
= convert_to_mode (Pmode
, val
, 1);
12665 emit_move_insn (temp
, val
);
12668 XEXP (x
, 0) = temp
;
12676 /* Print an integer constant expression in assembler syntax. Addition
12677 and subtraction are the only arithmetic that may appear in these
12678 expressions. FILE is the stdio stream to write to, X is the rtx, and
12679 CODE is the operand print code from the output string. */
12682 output_pic_addr_const (FILE *file
, rtx x
, int code
)
12686 switch (GET_CODE (x
))
12689 gcc_assert (flag_pic
);
12694 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
12695 output_addr_const (file
, x
);
12698 const char *name
= XSTR (x
, 0);
12700 /* Mark the decl as referenced so that cgraph will
12701 output the function. */
12702 if (SYMBOL_REF_DECL (x
))
12703 mark_decl_referenced (SYMBOL_REF_DECL (x
));
12706 if (MACHOPIC_INDIRECT
12707 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
12708 name
= machopic_indirection_name (x
, /*stub_p=*/true);
12710 assemble_name (file
, name
);
12712 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
12713 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
12714 fputs ("@PLT", file
);
12721 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
12722 assemble_name (asm_out_file
, buf
);
12726 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
12730 /* This used to output parentheses around the expression,
12731 but that does not work on the 386 (either ATT or BSD assembler). */
12732 output_pic_addr_const (file
, XEXP (x
, 0), code
);
12736 if (GET_MODE (x
) == VOIDmode
)
12738 /* We can use %d if the number is <32 bits and positive. */
12739 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
12740 fprintf (file
, "0x%lx%08lx",
12741 (unsigned long) CONST_DOUBLE_HIGH (x
),
12742 (unsigned long) CONST_DOUBLE_LOW (x
));
12744 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
12747 /* We can't handle floating point constants;
12748 TARGET_PRINT_OPERAND must handle them. */
12749 output_operand_lossage ("floating constant misused");
12753 /* Some assemblers need integer constants to appear first. */
12754 if (CONST_INT_P (XEXP (x
, 0)))
12756 output_pic_addr_const (file
, XEXP (x
, 0), code
);
12758 output_pic_addr_const (file
, XEXP (x
, 1), code
);
12762 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
12763 output_pic_addr_const (file
, XEXP (x
, 1), code
);
12765 output_pic_addr_const (file
, XEXP (x
, 0), code
);
12771 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
12772 output_pic_addr_const (file
, XEXP (x
, 0), code
);
12774 output_pic_addr_const (file
, XEXP (x
, 1), code
);
12776 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
12780 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
12782 bool f
= i386_asm_output_addr_const_extra (file
, x
);
12787 gcc_assert (XVECLEN (x
, 0) == 1);
12788 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
12789 switch (XINT (x
, 1))
12792 fputs ("@GOT", file
);
12794 case UNSPEC_GOTOFF
:
12795 fputs ("@GOTOFF", file
);
12797 case UNSPEC_PLTOFF
:
12798 fputs ("@PLTOFF", file
);
12801 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
12802 "(%rip)" : "[rip]", file
);
12804 case UNSPEC_GOTPCREL
:
12805 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
12806 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
12808 case UNSPEC_GOTTPOFF
:
12809 /* FIXME: This might be @TPOFF in Sun ld too. */
12810 fputs ("@gottpoff", file
);
12813 fputs ("@tpoff", file
);
12815 case UNSPEC_NTPOFF
:
12817 fputs ("@tpoff", file
);
12819 fputs ("@ntpoff", file
);
12821 case UNSPEC_DTPOFF
:
12822 fputs ("@dtpoff", file
);
12824 case UNSPEC_GOTNTPOFF
:
12826 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
12827 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
12829 fputs ("@gotntpoff", file
);
12831 case UNSPEC_INDNTPOFF
:
12832 fputs ("@indntpoff", file
);
12835 case UNSPEC_MACHOPIC_OFFSET
:
12837 machopic_output_function_base_name (file
);
12841 output_operand_lossage ("invalid UNSPEC as operand");
12847 output_operand_lossage ("invalid expression as operand");
12851 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12852 We need to emit DTP-relative relocations. */
12854 static void ATTRIBUTE_UNUSED
12855 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
12857 fputs (ASM_LONG
, file
);
12858 output_addr_const (file
, x
);
12859 fputs ("@dtpoff", file
);
12865 fputs (", 0", file
);
12868 gcc_unreachable ();
12872 /* Return true if X is a representation of the PIC register. This copes
12873 with calls from ix86_find_base_term, where the register might have
12874 been replaced by a cselib value. */
12877 ix86_pic_register_p (rtx x
)
12879 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
12880 return (pic_offset_table_rtx
12881 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
12883 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
12886 /* Helper function for ix86_delegitimize_address.
12887 Attempt to delegitimize TLS local-exec accesses. */
12890 ix86_delegitimize_tls_address (rtx orig_x
)
12892 rtx x
= orig_x
, unspec
;
12893 struct ix86_address addr
;
12895 if (!TARGET_TLS_DIRECT_SEG_REFS
)
12899 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
12901 if (ix86_decompose_address (x
, &addr
) == 0
12902 || addr
.seg
!= (TARGET_64BIT
? SEG_FS
: SEG_GS
)
12903 || addr
.disp
== NULL_RTX
12904 || GET_CODE (addr
.disp
) != CONST
)
12906 unspec
= XEXP (addr
.disp
, 0);
12907 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
12908 unspec
= XEXP (unspec
, 0);
12909 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
12911 x
= XVECEXP (unspec
, 0, 0);
12912 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
12913 if (unspec
!= XEXP (addr
.disp
, 0))
12914 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
12917 rtx idx
= addr
.index
;
12918 if (addr
.scale
!= 1)
12919 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
12920 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
12923 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
12924 if (MEM_P (orig_x
))
12925 x
= replace_equiv_address_nv (orig_x
, x
);
12929 /* In the name of slightly smaller debug output, and to cater to
12930 general assembler lossage, recognize PIC+GOTOFF and turn it back
12931 into a direct symbol reference.
12933 On Darwin, this is necessary to avoid a crash, because Darwin
12934 has a different PIC label for each routine but the DWARF debugging
12935 information is not associated with any particular routine, so it's
12936 necessary to remove references to the PIC label from RTL stored by
12937 the DWARF output code. */
12940 ix86_delegitimize_address (rtx x
)
12942 rtx orig_x
= delegitimize_mem_from_attrs (x
);
12943 /* addend is NULL or some rtx if x is something+GOTOFF where
12944 something doesn't include the PIC register. */
12945 rtx addend
= NULL_RTX
;
12946 /* reg_addend is NULL or a multiple of some register. */
12947 rtx reg_addend
= NULL_RTX
;
12948 /* const_addend is NULL or a const_int. */
12949 rtx const_addend
= NULL_RTX
;
12950 /* This is the result, or NULL. */
12951 rtx result
= NULL_RTX
;
12960 if (GET_CODE (x
) != CONST
12961 || GET_CODE (XEXP (x
, 0)) != UNSPEC
12962 || (XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
12963 && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
)
12964 || !MEM_P (orig_x
))
12965 return ix86_delegitimize_tls_address (orig_x
);
12966 x
= XVECEXP (XEXP (x
, 0), 0, 0);
12967 if (GET_MODE (orig_x
) != GET_MODE (x
))
12969 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
12977 if (GET_CODE (x
) != PLUS
12978 || GET_CODE (XEXP (x
, 1)) != CONST
)
12979 return ix86_delegitimize_tls_address (orig_x
);
12981 if (ix86_pic_register_p (XEXP (x
, 0)))
12982 /* %ebx + GOT/GOTOFF */
12984 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
12986 /* %ebx + %reg * scale + GOT/GOTOFF */
12987 reg_addend
= XEXP (x
, 0);
12988 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
12989 reg_addend
= XEXP (reg_addend
, 1);
12990 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
12991 reg_addend
= XEXP (reg_addend
, 0);
12994 reg_addend
= NULL_RTX
;
12995 addend
= XEXP (x
, 0);
12999 addend
= XEXP (x
, 0);
13001 x
= XEXP (XEXP (x
, 1), 0);
13002 if (GET_CODE (x
) == PLUS
13003 && CONST_INT_P (XEXP (x
, 1)))
13005 const_addend
= XEXP (x
, 1);
13009 if (GET_CODE (x
) == UNSPEC
13010 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13011 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
13012 result
= XVECEXP (x
, 0, 0);
13014 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
13015 && !MEM_P (orig_x
))
13016 result
= XVECEXP (x
, 0, 0);
13019 return ix86_delegitimize_tls_address (orig_x
);
13022 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13024 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13027 /* If the rest of original X doesn't involve the PIC register, add
13028 addend and subtract pic_offset_table_rtx. This can happen e.g.
13030 leal (%ebx, %ecx, 4), %ecx
13032 movl foo@GOTOFF(%ecx), %edx
13033 in which case we return (%ecx - %ebx) + foo. */
13034 if (pic_offset_table_rtx
)
13035 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13036 pic_offset_table_rtx
),
13041 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13043 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
13044 if (result
== NULL_RTX
)
13050 /* If X is a machine specific address (i.e. a symbol or label being
13051 referenced as a displacement from the GOT implemented using an
13052 UNSPEC), then return the base term. Otherwise return X. */
13055 ix86_find_base_term (rtx x
)
13061 if (GET_CODE (x
) != CONST
)
13063 term
= XEXP (x
, 0);
13064 if (GET_CODE (term
) == PLUS
13065 && (CONST_INT_P (XEXP (term
, 1))
13066 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
13067 term
= XEXP (term
, 0);
13068 if (GET_CODE (term
) != UNSPEC
13069 || (XINT (term
, 1) != UNSPEC_GOTPCREL
13070 && XINT (term
, 1) != UNSPEC_PCREL
))
13073 return XVECEXP (term
, 0, 0);
13076 return ix86_delegitimize_address (x
);
13080 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
13081 int fp
, FILE *file
)
13083 const char *suffix
;
13085 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
13087 code
= ix86_fp_compare_code_to_integer (code
);
13091 code
= reverse_condition (code
);
13142 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
13146 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13147 Those same assemblers have the same but opposite lossage on cmov. */
13148 if (mode
== CCmode
)
13149 suffix
= fp
? "nbe" : "a";
13150 else if (mode
== CCCmode
)
13153 gcc_unreachable ();
13169 gcc_unreachable ();
13173 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13190 gcc_unreachable ();
13194 /* ??? As above. */
13195 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13196 suffix
= fp
? "nb" : "ae";
13199 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
13203 /* ??? As above. */
13204 if (mode
== CCmode
)
13206 else if (mode
== CCCmode
)
13207 suffix
= fp
? "nb" : "ae";
13209 gcc_unreachable ();
13212 suffix
= fp
? "u" : "p";
13215 suffix
= fp
? "nu" : "np";
13218 gcc_unreachable ();
13220 fputs (suffix
, file
);
13223 /* Print the name of register X to FILE based on its machine mode and number.
13224 If CODE is 'w', pretend the mode is HImode.
13225 If CODE is 'b', pretend the mode is QImode.
13226 If CODE is 'k', pretend the mode is SImode.
13227 If CODE is 'q', pretend the mode is DImode.
13228 If CODE is 'x', pretend the mode is V4SFmode.
13229 If CODE is 't', pretend the mode is V8SFmode.
13230 If CODE is 'h', pretend the reg is the 'high' byte register.
13231 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13232 If CODE is 'd', duplicate the operand for AVX instruction.
13236 print_reg (rtx x
, int code
, FILE *file
)
13239 bool duplicated
= code
== 'd' && TARGET_AVX
;
13241 gcc_assert (x
== pc_rtx
13242 || (REGNO (x
) != ARG_POINTER_REGNUM
13243 && REGNO (x
) != FRAME_POINTER_REGNUM
13244 && REGNO (x
) != FLAGS_REG
13245 && REGNO (x
) != FPSR_REG
13246 && REGNO (x
) != FPCR_REG
));
13248 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13253 gcc_assert (TARGET_64BIT
);
13254 fputs ("rip", file
);
13258 if (code
== 'w' || MMX_REG_P (x
))
13260 else if (code
== 'b')
13262 else if (code
== 'k')
13264 else if (code
== 'q')
13266 else if (code
== 'y')
13268 else if (code
== 'h')
13270 else if (code
== 'x')
13272 else if (code
== 't')
13275 code
= GET_MODE_SIZE (GET_MODE (x
));
13277 /* Irritatingly, AMD extended registers use different naming convention
13278 from the normal registers. */
13279 if (REX_INT_REG_P (x
))
13281 gcc_assert (TARGET_64BIT
);
13285 error ("extended registers have no high halves");
13288 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
13291 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
13294 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
13297 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
13300 error ("unsupported operand size for extended register");
13310 if (STACK_TOP_P (x
))
13319 if (! ANY_FP_REG_P (x
))
13320 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
13325 reg
= hi_reg_name
[REGNO (x
)];
13328 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
13330 reg
= qi_reg_name
[REGNO (x
)];
13333 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
13335 reg
= qi_high_reg_name
[REGNO (x
)];
13340 gcc_assert (!duplicated
);
13342 fputs (hi_reg_name
[REGNO (x
)] + 1, file
);
13347 gcc_unreachable ();
13353 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13354 fprintf (file
, ", %%%s", reg
);
13356 fprintf (file
, ", %s", reg
);
13360 /* Locate some local-dynamic symbol still in use by this function
13361 so that we can print its name in some tls_local_dynamic_base
13365 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
13369 if (GET_CODE (x
) == SYMBOL_REF
13370 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
13372 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
13379 static const char *
13380 get_some_local_dynamic_name (void)
13384 if (cfun
->machine
->some_ld_name
)
13385 return cfun
->machine
->some_ld_name
;
13387 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
13388 if (NONDEBUG_INSN_P (insn
)
13389 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
13390 return cfun
->machine
->some_ld_name
;
13395 /* Meaning of CODE:
13396 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13397 C -- print opcode suffix for set/cmov insn.
13398 c -- like C, but print reversed condition
13399 F,f -- likewise, but for floating-point.
13400 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13402 R -- print the prefix for register names.
13403 z -- print the opcode suffix for the size of the current operand.
13404 Z -- likewise, with special suffixes for x87 instructions.
13405 * -- print a star (in certain assembler syntax)
13406 A -- print an absolute memory reference.
13407 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13408 s -- print a shift double count, followed by the assemblers argument
13410 b -- print the QImode name of the register for the indicated operand.
13411 %b0 would print %al if operands[0] is reg 0.
13412 w -- likewise, print the HImode name of the register.
13413 k -- likewise, print the SImode name of the register.
13414 q -- likewise, print the DImode name of the register.
13415 x -- likewise, print the V4SFmode name of the register.
13416 t -- likewise, print the V8SFmode name of the register.
13417 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13418 y -- print "st(0)" instead of "st" as a register.
13419 d -- print duplicated register operand for AVX instruction.
13420 D -- print condition for SSE cmp instruction.
13421 P -- if PIC, print an @PLT suffix.
13422 p -- print raw symbol name.
13423 X -- don't print any sort of PIC '@' suffix for a symbol.
13424 & -- print some in-use local-dynamic symbol name.
13425 H -- print a memory address offset by 8; used for sse high-parts
13426 Y -- print condition for XOP pcom* instruction.
13427 + -- print a branch hint as 'cs' or 'ds' prefix
13428 ; -- print a semicolon (after prefixes due to bug in older gas).
13429 @ -- print a segment register of thread base pointer load
13433 ix86_print_operand (FILE *file
, rtx x
, int code
)
13440 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13446 const char *name
= get_some_local_dynamic_name ();
13448 output_operand_lossage ("'%%&' used without any "
13449 "local dynamic TLS references");
13451 assemble_name (file
, name
);
13456 switch (ASSEMBLER_DIALECT
)
13463 /* Intel syntax. For absolute addresses, registers should not
13464 be surrounded by braces. */
13468 ix86_print_operand (file
, x
, 0);
13475 gcc_unreachable ();
13478 ix86_print_operand (file
, x
, 0);
13483 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13488 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13493 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13498 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13503 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13508 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13513 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
13515 /* Opcodes don't get size suffixes if using Intel opcodes. */
13516 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13519 switch (GET_MODE_SIZE (GET_MODE (x
)))
13538 output_operand_lossage
13539 ("invalid operand size for operand code '%c'", code
);
13544 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
13546 (0, "non-integer operand used with operand code '%c'", code
);
13550 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13551 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13554 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
13556 switch (GET_MODE_SIZE (GET_MODE (x
)))
13559 #ifdef HAVE_AS_IX86_FILDS
13569 #ifdef HAVE_AS_IX86_FILDQ
13572 fputs ("ll", file
);
13580 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
13582 /* 387 opcodes don't get size suffixes
13583 if the operands are registers. */
13584 if (STACK_REG_P (x
))
13587 switch (GET_MODE_SIZE (GET_MODE (x
)))
13608 output_operand_lossage
13609 ("invalid operand type used with operand code '%c'", code
);
13613 output_operand_lossage
13614 ("invalid operand size for operand code '%c'", code
);
13632 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
13634 ix86_print_operand (file
, x
, 0);
13635 fputs (", ", file
);
13640 /* Little bit of braindamage here. The SSE compare instructions
13641 does use completely different names for the comparisons that the
13642 fp conditional moves. */
13645 switch (GET_CODE (x
))
13648 fputs ("eq", file
);
13651 fputs ("eq_us", file
);
13654 fputs ("lt", file
);
13657 fputs ("nge", file
);
13660 fputs ("le", file
);
13663 fputs ("ngt", file
);
13666 fputs ("unord", file
);
13669 fputs ("neq", file
);
13672 fputs ("neq_oq", file
);
13675 fputs ("ge", file
);
13678 fputs ("nlt", file
);
13681 fputs ("gt", file
);
13684 fputs ("nle", file
);
13687 fputs ("ord", file
);
13690 output_operand_lossage ("operand is not a condition code, "
13691 "invalid operand code 'D'");
13697 switch (GET_CODE (x
))
13701 fputs ("eq", file
);
13705 fputs ("lt", file
);
13709 fputs ("le", file
);
13712 fputs ("unord", file
);
13716 fputs ("neq", file
);
13720 fputs ("nlt", file
);
13724 fputs ("nle", file
);
13727 fputs ("ord", file
);
13730 output_operand_lossage ("operand is not a condition code, "
13731 "invalid operand code 'D'");
13737 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13738 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13740 switch (GET_MODE (x
))
13742 case HImode
: putc ('w', file
); break;
13744 case SFmode
: putc ('l', file
); break;
13746 case DFmode
: putc ('q', file
); break;
13747 default: gcc_unreachable ();
13754 if (!COMPARISON_P (x
))
13756 output_operand_lossage ("operand is neither a constant nor a "
13757 "condition code, invalid operand code "
13761 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
13764 if (!COMPARISON_P (x
))
13766 output_operand_lossage ("operand is neither a constant nor a "
13767 "condition code, invalid operand code "
13771 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13772 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13775 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
13778 /* Like above, but reverse condition */
13780 /* Check to see if argument to %c is really a constant
13781 and not a condition code which needs to be reversed. */
13782 if (!COMPARISON_P (x
))
13784 output_operand_lossage ("operand is neither a constant nor a "
13785 "condition code, invalid operand "
13789 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
13792 if (!COMPARISON_P (x
))
13794 output_operand_lossage ("operand is neither a constant nor a "
13795 "condition code, invalid operand "
13799 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13800 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13803 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
13807 /* It doesn't actually matter what mode we use here, as we're
13808 only going to use this for printing. */
13809 x
= adjust_address_nv (x
, DImode
, 8);
13817 || optimize_function_for_size_p (cfun
) || !TARGET_BRANCH_PREDICTION_HINTS
)
13820 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
13823 int pred_val
= INTVAL (XEXP (x
, 0));
13825 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
13826 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
13828 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
13829 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
13831 /* Emit hints only in the case default branch prediction
13832 heuristics would fail. */
13833 if (taken
!= cputaken
)
13835 /* We use 3e (DS) prefix for taken branches and
13836 2e (CS) prefix for not taken branches. */
13838 fputs ("ds ; ", file
);
13840 fputs ("cs ; ", file
);
13848 switch (GET_CODE (x
))
13851 fputs ("neq", file
);
13854 fputs ("eq", file
);
13858 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
13862 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
13866 fputs ("le", file
);
13870 fputs ("lt", file
);
13873 fputs ("unord", file
);
13876 fputs ("ord", file
);
13879 fputs ("ueq", file
);
13882 fputs ("nlt", file
);
13885 fputs ("nle", file
);
13888 fputs ("ule", file
);
13891 fputs ("ult", file
);
13894 fputs ("une", file
);
13897 output_operand_lossage ("operand is not a condition code, "
13898 "invalid operand code 'Y'");
13904 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13910 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13913 /* The kernel uses a different segment register for performance
13914 reasons; a system call would not have to trash the userspace
13915 segment register, which would be expensive. */
13916 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
13917 fputs ("fs", file
);
13919 fputs ("gs", file
);
13923 output_operand_lossage ("invalid operand code '%c'", code
);
13928 print_reg (x
, code
, file
);
13930 else if (MEM_P (x
))
13932 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
13933 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
13934 && GET_MODE (x
) != BLKmode
)
13937 switch (GET_MODE_SIZE (GET_MODE (x
)))
13939 case 1: size
= "BYTE"; break;
13940 case 2: size
= "WORD"; break;
13941 case 4: size
= "DWORD"; break;
13942 case 8: size
= "QWORD"; break;
13943 case 12: size
= "TBYTE"; break;
13945 if (GET_MODE (x
) == XFmode
)
13950 case 32: size
= "YMMWORD"; break;
13952 gcc_unreachable ();
13955 /* Check for explicit size override (codes 'b', 'w' and 'k') */
13958 else if (code
== 'w')
13960 else if (code
== 'k')
13963 fputs (size
, file
);
13964 fputs (" PTR ", file
);
13968 /* Avoid (%rip) for call operands. */
13969 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
13970 && !CONST_INT_P (x
))
13971 output_addr_const (file
, x
);
13972 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
13973 output_operand_lossage ("invalid constraints for operand");
13975 output_address (x
);
13978 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
13983 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
13984 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
13986 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13988 /* Sign extend 32bit SFmode immediate to 8 bytes. */
13990 fprintf (file
, "0x%08llx", (unsigned long long) (int) l
);
13992 fprintf (file
, "0x%08x", (unsigned int) l
);
13995 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
14000 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14001 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
14003 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14005 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
14008 /* These float cases don't actually occur as immediate operands. */
14009 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
14013 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14014 fputs (dstr
, file
);
14019 /* We have patterns that allow zero sets of memory, for instance.
14020 In 64-bit mode, we should probably support all 8-byte vectors,
14021 since we can in fact encode that into an immediate. */
14022 if (GET_CODE (x
) == CONST_VECTOR
)
14024 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
14028 if (code
!= 'P' && code
!= 'p')
14030 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
14032 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14035 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
14036 || GET_CODE (x
) == LABEL_REF
)
14038 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14041 fputs ("OFFSET FLAT:", file
);
14044 if (CONST_INT_P (x
))
14045 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14046 else if (flag_pic
|| MACHOPIC_INDIRECT
)
14047 output_pic_addr_const (file
, x
, code
);
14049 output_addr_const (file
, x
);
14054 ix86_print_operand_punct_valid_p (unsigned char code
)
14056 return (code
== '@' || code
== '*' || code
== '+'
14057 || code
== '&' || code
== ';');
14060 /* Print a memory operand whose address is ADDR. */
14063 ix86_print_operand_address (FILE *file
, rtx addr
)
14065 struct ix86_address parts
;
14066 rtx base
, index
, disp
;
14068 int ok
= ix86_decompose_address (addr
, &parts
);
14073 index
= parts
.index
;
14075 scale
= parts
.scale
;
14083 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14085 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
14088 gcc_unreachable ();
14091 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14092 if (TARGET_64BIT
&& !base
&& !index
)
14096 if (GET_CODE (disp
) == CONST
14097 && GET_CODE (XEXP (disp
, 0)) == PLUS
14098 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14099 symbol
= XEXP (XEXP (disp
, 0), 0);
14101 if (GET_CODE (symbol
) == LABEL_REF
14102 || (GET_CODE (symbol
) == SYMBOL_REF
14103 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
14106 if (!base
&& !index
)
14108 /* Displacement only requires special attention. */
14110 if (CONST_INT_P (disp
))
14112 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
14113 fputs ("ds:", file
);
14114 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
14117 output_pic_addr_const (file
, disp
, 0);
14119 output_addr_const (file
, disp
);
14123 /* Print DImode registers on 64bit targets to avoid addr32 prefixes. */
14124 int code
= TARGET_64BIT
? 'q' : 0;
14126 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14131 output_pic_addr_const (file
, disp
, 0);
14132 else if (GET_CODE (disp
) == LABEL_REF
)
14133 output_asm_label (disp
);
14135 output_addr_const (file
, disp
);
14140 print_reg (base
, code
, file
);
14144 print_reg (index
, code
, file
);
14146 fprintf (file
, ",%d", scale
);
14152 rtx offset
= NULL_RTX
;
14156 /* Pull out the offset of a symbol; print any symbol itself. */
14157 if (GET_CODE (disp
) == CONST
14158 && GET_CODE (XEXP (disp
, 0)) == PLUS
14159 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14161 offset
= XEXP (XEXP (disp
, 0), 1);
14162 disp
= gen_rtx_CONST (VOIDmode
,
14163 XEXP (XEXP (disp
, 0), 0));
14167 output_pic_addr_const (file
, disp
, 0);
14168 else if (GET_CODE (disp
) == LABEL_REF
)
14169 output_asm_label (disp
);
14170 else if (CONST_INT_P (disp
))
14173 output_addr_const (file
, disp
);
14179 print_reg (base
, code
, file
);
14182 if (INTVAL (offset
) >= 0)
14184 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14188 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14195 print_reg (index
, code
, file
);
14197 fprintf (file
, "*%d", scale
);
14204 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14207 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
14211 if (GET_CODE (x
) != UNSPEC
)
14214 op
= XVECEXP (x
, 0, 0);
14215 switch (XINT (x
, 1))
14217 case UNSPEC_GOTTPOFF
:
14218 output_addr_const (file
, op
);
14219 /* FIXME: This might be @TPOFF in Sun ld. */
14220 fputs ("@gottpoff", file
);
14223 output_addr_const (file
, op
);
14224 fputs ("@tpoff", file
);
14226 case UNSPEC_NTPOFF
:
14227 output_addr_const (file
, op
);
14229 fputs ("@tpoff", file
);
14231 fputs ("@ntpoff", file
);
14233 case UNSPEC_DTPOFF
:
14234 output_addr_const (file
, op
);
14235 fputs ("@dtpoff", file
);
14237 case UNSPEC_GOTNTPOFF
:
14238 output_addr_const (file
, op
);
14240 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
14241 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
14243 fputs ("@gotntpoff", file
);
14245 case UNSPEC_INDNTPOFF
:
14246 output_addr_const (file
, op
);
14247 fputs ("@indntpoff", file
);
14250 case UNSPEC_MACHOPIC_OFFSET
:
14251 output_addr_const (file
, op
);
14253 machopic_output_function_base_name (file
);
14257 case UNSPEC_STACK_CHECK
:
14261 gcc_assert (flag_split_stack
);
14263 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14264 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
14266 gcc_unreachable ();
14269 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
14280 /* Split one or more double-mode RTL references into pairs of half-mode
14281 references. The RTL can be REG, offsettable MEM, integer constant, or
14282 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14283 split and "num" is its length. lo_half and hi_half are output arrays
14284 that parallel "operands". */
14287 split_double_mode (enum machine_mode mode
, rtx operands
[],
14288 int num
, rtx lo_half
[], rtx hi_half
[])
14290 enum machine_mode half_mode
;
14296 half_mode
= DImode
;
14299 half_mode
= SImode
;
14302 gcc_unreachable ();
14305 byte
= GET_MODE_SIZE (half_mode
);
14309 rtx op
= operands
[num
];
14311 /* simplify_subreg refuse to split volatile memory addresses,
14312 but we still have to handle it. */
14315 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
14316 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
14320 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14321 GET_MODE (op
) == VOIDmode
14322 ? mode
: GET_MODE (op
), 0);
14323 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14324 GET_MODE (op
) == VOIDmode
14325 ? mode
: GET_MODE (op
), byte
);
14330 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14331 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14332 is the expression of the binary operation. The output may either be
14333 emitted here, or returned to the caller, like all output_* functions.
14335 There is no guarantee that the operands are the same mode, as they
14336 might be within FLOAT or FLOAT_EXTEND expressions. */
14338 #ifndef SYSV386_COMPAT
14339 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14340 wants to fix the assemblers because that causes incompatibility
14341 with gcc. No-one wants to fix gcc because that causes
14342 incompatibility with assemblers... You can use the option of
14343 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14344 #define SYSV386_COMPAT 1
14348 output_387_binary_op (rtx insn
, rtx
*operands
)
14350 static char buf
[40];
14353 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
14355 #ifdef ENABLE_CHECKING
14356 /* Even if we do not want to check the inputs, this documents input
14357 constraints. Which helps in understanding the following code. */
14358 if (STACK_REG_P (operands
[0])
14359 && ((REG_P (operands
[1])
14360 && REGNO (operands
[0]) == REGNO (operands
[1])
14361 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
14362 || (REG_P (operands
[2])
14363 && REGNO (operands
[0]) == REGNO (operands
[2])
14364 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
14365 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
14368 gcc_assert (is_sse
);
14371 switch (GET_CODE (operands
[3]))
14374 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14375 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14383 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14384 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14392 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14393 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14401 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14402 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14410 gcc_unreachable ();
14417 strcpy (buf
, ssep
);
14418 if (GET_MODE (operands
[0]) == SFmode
)
14419 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
14421 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
14425 strcpy (buf
, ssep
+ 1);
14426 if (GET_MODE (operands
[0]) == SFmode
)
14427 strcat (buf
, "ss\t{%2, %0|%0, %2}");
14429 strcat (buf
, "sd\t{%2, %0|%0, %2}");
14435 switch (GET_CODE (operands
[3]))
14439 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
14441 rtx temp
= operands
[2];
14442 operands
[2] = operands
[1];
14443 operands
[1] = temp
;
14446 /* know operands[0] == operands[1]. */
14448 if (MEM_P (operands
[2]))
14454 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
14456 if (STACK_TOP_P (operands
[0]))
14457 /* How is it that we are storing to a dead operand[2]?
14458 Well, presumably operands[1] is dead too. We can't
14459 store the result to st(0) as st(0) gets popped on this
14460 instruction. Instead store to operands[2] (which I
14461 think has to be st(1)). st(1) will be popped later.
14462 gcc <= 2.8.1 didn't have this check and generated
14463 assembly code that the Unixware assembler rejected. */
14464 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14466 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14470 if (STACK_TOP_P (operands
[0]))
14471 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14473 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14478 if (MEM_P (operands
[1]))
14484 if (MEM_P (operands
[2]))
14490 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
14493 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14494 derived assemblers, confusingly reverse the direction of
14495 the operation for fsub{r} and fdiv{r} when the
14496 destination register is not st(0). The Intel assembler
14497 doesn't have this brain damage. Read !SYSV386_COMPAT to
14498 figure out what the hardware really does. */
14499 if (STACK_TOP_P (operands
[0]))
14500 p
= "{p\t%0, %2|rp\t%2, %0}";
14502 p
= "{rp\t%2, %0|p\t%0, %2}";
14504 if (STACK_TOP_P (operands
[0]))
14505 /* As above for fmul/fadd, we can't store to st(0). */
14506 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14508 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14513 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
14516 if (STACK_TOP_P (operands
[0]))
14517 p
= "{rp\t%0, %1|p\t%1, %0}";
14519 p
= "{p\t%1, %0|rp\t%0, %1}";
14521 if (STACK_TOP_P (operands
[0]))
14522 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14524 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14529 if (STACK_TOP_P (operands
[0]))
14531 if (STACK_TOP_P (operands
[1]))
14532 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14534 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14537 else if (STACK_TOP_P (operands
[1]))
14540 p
= "{\t%1, %0|r\t%0, %1}";
14542 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14548 p
= "{r\t%2, %0|\t%0, %2}";
14550 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14556 gcc_unreachable ();
14563 /* Return needed mode for entity in optimize_mode_switching pass. */
14566 ix86_mode_needed (int entity
, rtx insn
)
14568 enum attr_i387_cw mode
;
14570 /* The mode UNINITIALIZED is used to store control word after a
14571 function call or ASM pattern. The mode ANY specify that function
14572 has no requirements on the control word and make no changes in the
14573 bits we are interested in. */
14576 || (NONJUMP_INSN_P (insn
)
14577 && (asm_noperands (PATTERN (insn
)) >= 0
14578 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
14579 return I387_CW_UNINITIALIZED
;
14581 if (recog_memoized (insn
) < 0)
14582 return I387_CW_ANY
;
14584 mode
= get_attr_i387_cw (insn
);
14589 if (mode
== I387_CW_TRUNC
)
14594 if (mode
== I387_CW_FLOOR
)
14599 if (mode
== I387_CW_CEIL
)
14604 if (mode
== I387_CW_MASK_PM
)
14609 gcc_unreachable ();
14612 return I387_CW_ANY
;
14615 /* Output code to initialize control word copies used by trunc?f?i and
14616 rounding patterns. CURRENT_MODE is set to current control word,
14617 while NEW_MODE is set to new control word. */
14620 emit_i387_cw_initialization (int mode
)
14622 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
14625 enum ix86_stack_slot slot
;
14627 rtx reg
= gen_reg_rtx (HImode
);
14629 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
14630 emit_move_insn (reg
, copy_rtx (stored_mode
));
14632 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
14633 || optimize_function_for_size_p (cfun
))
14637 case I387_CW_TRUNC
:
14638 /* round toward zero (truncate) */
14639 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
14640 slot
= SLOT_CW_TRUNC
;
14643 case I387_CW_FLOOR
:
14644 /* round down toward -oo */
14645 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
14646 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
14647 slot
= SLOT_CW_FLOOR
;
14651 /* round up toward +oo */
14652 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
14653 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
14654 slot
= SLOT_CW_CEIL
;
14657 case I387_CW_MASK_PM
:
14658 /* mask precision exception for nearbyint() */
14659 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
14660 slot
= SLOT_CW_MASK_PM
;
14664 gcc_unreachable ();
14671 case I387_CW_TRUNC
:
14672 /* round toward zero (truncate) */
14673 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
14674 slot
= SLOT_CW_TRUNC
;
14677 case I387_CW_FLOOR
:
14678 /* round down toward -oo */
14679 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
14680 slot
= SLOT_CW_FLOOR
;
14684 /* round up toward +oo */
14685 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
14686 slot
= SLOT_CW_CEIL
;
14689 case I387_CW_MASK_PM
:
14690 /* mask precision exception for nearbyint() */
14691 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
14692 slot
= SLOT_CW_MASK_PM
;
14696 gcc_unreachable ();
14700 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
14702 new_mode
= assign_386_stack_local (HImode
, slot
);
14703 emit_move_insn (new_mode
, reg
);
14706 /* Output code for INSN to convert a float to a signed int. OPERANDS
14707 are the insn operands. The output may be [HSD]Imode and the input
14708 operand may be [SDX]Fmode. */
14711 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
14713 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
14714 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
14715 int round_mode
= get_attr_i387_cw (insn
);
14717 /* Jump through a hoop or two for DImode, since the hardware has no
14718 non-popping instruction. We used to do this a different way, but
14719 that was somewhat fragile and broke with post-reload splitters. */
14720 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
14721 output_asm_insn ("fld\t%y1", operands
);
14723 gcc_assert (STACK_TOP_P (operands
[1]));
14724 gcc_assert (MEM_P (operands
[0]));
14725 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
14728 output_asm_insn ("fisttp%Z0\t%0", operands
);
14731 if (round_mode
!= I387_CW_ANY
)
14732 output_asm_insn ("fldcw\t%3", operands
);
14733 if (stack_top_dies
|| dimode_p
)
14734 output_asm_insn ("fistp%Z0\t%0", operands
);
14736 output_asm_insn ("fist%Z0\t%0", operands
);
14737 if (round_mode
!= I387_CW_ANY
)
14738 output_asm_insn ("fldcw\t%2", operands
);
14744 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14745 have the values zero or one, indicates the ffreep insn's operand
14746 from the OPERANDS array. */
14748 static const char *
14749 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
14751 if (TARGET_USE_FFREEP
)
14752 #ifdef HAVE_AS_IX86_FFREEP
14753 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
14756 static char retval
[32];
14757 int regno
= REGNO (operands
[opno
]);
14759 gcc_assert (FP_REGNO_P (regno
));
14761 regno
-= FIRST_STACK_REG
;
14763 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
14768 return opno
? "fstp\t%y1" : "fstp\t%y0";
14772 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14773 should be used. UNORDERED_P is true when fucom should be used. */
14776 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
14778 int stack_top_dies
;
14779 rtx cmp_op0
, cmp_op1
;
14780 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
14784 cmp_op0
= operands
[0];
14785 cmp_op1
= operands
[1];
14789 cmp_op0
= operands
[1];
14790 cmp_op1
= operands
[2];
14795 static const char ucomiss
[] = "vucomiss\t{%1, %0|%0, %1}";
14796 static const char ucomisd
[] = "vucomisd\t{%1, %0|%0, %1}";
14797 static const char comiss
[] = "vcomiss\t{%1, %0|%0, %1}";
14798 static const char comisd
[] = "vcomisd\t{%1, %0|%0, %1}";
14800 if (GET_MODE (operands
[0]) == SFmode
)
14802 return &ucomiss
[TARGET_AVX
? 0 : 1];
14804 return &comiss
[TARGET_AVX
? 0 : 1];
14807 return &ucomisd
[TARGET_AVX
? 0 : 1];
14809 return &comisd
[TARGET_AVX
? 0 : 1];
14812 gcc_assert (STACK_TOP_P (cmp_op0
));
14814 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
14816 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
14818 if (stack_top_dies
)
14820 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
14821 return output_387_ffreep (operands
, 1);
14824 return "ftst\n\tfnstsw\t%0";
14827 if (STACK_REG_P (cmp_op1
)
14829 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
14830 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
14832 /* If both the top of the 387 stack dies, and the other operand
14833 is also a stack register that dies, then this must be a
14834 `fcompp' float compare */
14838 /* There is no double popping fcomi variant. Fortunately,
14839 eflags is immune from the fstp's cc clobbering. */
14841 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
14843 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
14844 return output_387_ffreep (operands
, 0);
14849 return "fucompp\n\tfnstsw\t%0";
14851 return "fcompp\n\tfnstsw\t%0";
14856 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
14858 static const char * const alt
[16] =
14860 "fcom%Z2\t%y2\n\tfnstsw\t%0",
14861 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
14862 "fucom%Z2\t%y2\n\tfnstsw\t%0",
14863 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
14865 "ficom%Z2\t%y2\n\tfnstsw\t%0",
14866 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
14870 "fcomi\t{%y1, %0|%0, %y1}",
14871 "fcomip\t{%y1, %0|%0, %y1}",
14872 "fucomi\t{%y1, %0|%0, %y1}",
14873 "fucomip\t{%y1, %0|%0, %y1}",
14884 mask
= eflags_p
<< 3;
14885 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
14886 mask
|= unordered_p
<< 1;
14887 mask
|= stack_top_dies
;
14889 gcc_assert (mask
< 16);
14898 ix86_output_addr_vec_elt (FILE *file
, int value
)
14900 const char *directive
= ASM_LONG
;
14904 directive
= ASM_QUAD
;
14906 gcc_assert (!TARGET_64BIT
);
14909 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
14913 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
14915 const char *directive
= ASM_LONG
;
14918 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
14919 directive
= ASM_QUAD
;
14921 gcc_assert (!TARGET_64BIT
);
14923 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14924 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
14925 fprintf (file
, "%s%s%d-%s%d\n",
14926 directive
, LPREFIX
, value
, LPREFIX
, rel
);
14927 else if (HAVE_AS_GOTOFF_IN_DATA
)
14928 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
14930 else if (TARGET_MACHO
)
14932 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
14933 machopic_output_function_base_name (file
);
14938 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
14939 GOT_SYMBOL_NAME
, LPREFIX
, value
);
14942 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
14946 ix86_expand_clear (rtx dest
)
14950 /* We play register width games, which are only valid after reload. */
14951 gcc_assert (reload_completed
);
14953 /* Avoid HImode and its attendant prefix byte. */
14954 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
14955 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
14956 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
14958 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
14959 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
14961 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
14962 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
14968 /* X is an unchanging MEM. If it is a constant pool reference, return
14969 the constant pool rtx, else NULL. */
14972 maybe_get_pool_constant (rtx x
)
14974 x
= ix86_delegitimize_address (XEXP (x
, 0));
14976 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
14977 return get_pool_constant (x
);
14983 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
14986 enum tls_model model
;
14991 if (GET_CODE (op1
) == SYMBOL_REF
)
14993 model
= SYMBOL_REF_TLS_MODEL (op1
);
14996 op1
= legitimize_tls_address (op1
, model
, true);
14997 op1
= force_operand (op1
, op0
);
15000 if (GET_MODE (op1
) != mode
)
15001 op1
= convert_to_mode (mode
, op1
, 1);
15003 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15004 && SYMBOL_REF_DLLIMPORT_P (op1
))
15005 op1
= legitimize_dllimport_symbol (op1
, false);
15007 else if (GET_CODE (op1
) == CONST
15008 && GET_CODE (XEXP (op1
, 0)) == PLUS
15009 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
15011 rtx addend
= XEXP (XEXP (op1
, 0), 1);
15012 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
15015 model
= SYMBOL_REF_TLS_MODEL (symbol
);
15017 tmp
= legitimize_tls_address (symbol
, model
, true);
15018 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15019 && SYMBOL_REF_DLLIMPORT_P (symbol
))
15020 tmp
= legitimize_dllimport_symbol (symbol
, true);
15024 tmp
= force_operand (tmp
, NULL
);
15025 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
15026 op0
, 1, OPTAB_DIRECT
);
15029 if (GET_MODE (tmp
) != mode
)
15030 op1
= convert_to_mode (mode
, tmp
, 1);
15034 if ((flag_pic
|| MACHOPIC_INDIRECT
)
15035 && (mode
== SImode
|| mode
== DImode
)
15036 && symbolic_operand (op1
, mode
))
15038 if (TARGET_MACHO
&& !TARGET_64BIT
)
15041 /* dynamic-no-pic */
15042 if (MACHOPIC_INDIRECT
)
15044 rtx temp
= ((reload_in_progress
15045 || ((op0
&& REG_P (op0
))
15047 ? op0
: gen_reg_rtx (Pmode
));
15048 op1
= machopic_indirect_data_reference (op1
, temp
);
15050 op1
= machopic_legitimize_pic_address (op1
, mode
,
15051 temp
== op1
? 0 : temp
);
15053 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
15055 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
15059 if (GET_CODE (op0
) == MEM
)
15060 op1
= force_reg (Pmode
, op1
);
15064 if (GET_CODE (temp
) != REG
)
15065 temp
= gen_reg_rtx (Pmode
);
15066 temp
= legitimize_pic_address (op1
, temp
);
15071 /* dynamic-no-pic */
15077 op1
= force_reg (mode
, op1
);
15078 else if (!TARGET_64BIT
|| !x86_64_movabs_operand (op1
, mode
))
15080 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
15081 op1
= legitimize_pic_address (op1
, reg
);
15084 if (GET_MODE (op1
) != mode
)
15085 op1
= convert_to_mode (mode
, op1
, 1);
15092 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
15093 || !push_operand (op0
, mode
))
15095 op1
= force_reg (mode
, op1
);
15097 if (push_operand (op0
, mode
)
15098 && ! general_no_elim_operand (op1
, mode
))
15099 op1
= copy_to_mode_reg (mode
, op1
);
15101 /* Force large constants in 64bit compilation into register
15102 to get them CSEed. */
15103 if (can_create_pseudo_p ()
15104 && (mode
== DImode
) && TARGET_64BIT
15105 && immediate_operand (op1
, mode
)
15106 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
15107 && !register_operand (op0
, mode
)
15109 op1
= copy_to_mode_reg (mode
, op1
);
15111 if (can_create_pseudo_p ()
15112 && FLOAT_MODE_P (mode
)
15113 && GET_CODE (op1
) == CONST_DOUBLE
)
15115 /* If we are loading a floating point constant to a register,
15116 force the value to memory now, since we'll get better code
15117 out the back end. */
15119 op1
= validize_mem (force_const_mem (mode
, op1
));
15120 if (!register_operand (op0
, mode
))
15122 rtx temp
= gen_reg_rtx (mode
);
15123 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
15124 emit_move_insn (op0
, temp
);
15130 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15134 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
15136 rtx op0
= operands
[0], op1
= operands
[1];
15137 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
15139 /* Force constants other than zero into memory. We do not know how
15140 the instructions used to build constants modify the upper 64 bits
15141 of the register, once we have that information we may be able
15142 to handle some of them more efficiently. */
15143 if (can_create_pseudo_p ()
15144 && register_operand (op0
, mode
)
15145 && (CONSTANT_P (op1
)
15146 || (GET_CODE (op1
) == SUBREG
15147 && CONSTANT_P (SUBREG_REG (op1
))))
15148 && !standard_sse_constant_p (op1
))
15149 op1
= validize_mem (force_const_mem (mode
, op1
));
15151 /* We need to check memory alignment for SSE mode since attribute
15152 can make operands unaligned. */
15153 if (can_create_pseudo_p ()
15154 && SSE_REG_MODE_P (mode
)
15155 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
15156 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
15160 /* ix86_expand_vector_move_misalign() does not like constants ... */
15161 if (CONSTANT_P (op1
)
15162 || (GET_CODE (op1
) == SUBREG
15163 && CONSTANT_P (SUBREG_REG (op1
))))
15164 op1
= validize_mem (force_const_mem (mode
, op1
));
15166 /* ... nor both arguments in memory. */
15167 if (!register_operand (op0
, mode
)
15168 && !register_operand (op1
, mode
))
15169 op1
= force_reg (mode
, op1
);
15171 tmp
[0] = op0
; tmp
[1] = op1
;
15172 ix86_expand_vector_move_misalign (mode
, tmp
);
15176 /* Make operand1 a register if it isn't already. */
15177 if (can_create_pseudo_p ()
15178 && !register_operand (op0
, mode
)
15179 && !register_operand (op1
, mode
))
15181 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
15185 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15188 /* Split 32-byte AVX unaligned load and store if needed. */
15191 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
15194 rtx (*extract
) (rtx
, rtx
, rtx
);
15195 rtx (*move_unaligned
) (rtx
, rtx
);
15196 enum machine_mode mode
;
15198 switch (GET_MODE (op0
))
15201 gcc_unreachable ();
15203 extract
= gen_avx_vextractf128v32qi
;
15204 move_unaligned
= gen_avx_movdqu256
;
15208 extract
= gen_avx_vextractf128v8sf
;
15209 move_unaligned
= gen_avx_movups256
;
15213 extract
= gen_avx_vextractf128v4df
;
15214 move_unaligned
= gen_avx_movupd256
;
15219 if (MEM_P (op1
) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
15221 rtx r
= gen_reg_rtx (mode
);
15222 m
= adjust_address (op1
, mode
, 0);
15223 emit_move_insn (r
, m
);
15224 m
= adjust_address (op1
, mode
, 16);
15225 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
15226 emit_move_insn (op0
, r
);
15228 else if (MEM_P (op0
) && TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
15230 m
= adjust_address (op0
, mode
, 0);
15231 emit_insn (extract (m
, op1
, const0_rtx
));
15232 m
= adjust_address (op0
, mode
, 16);
15233 emit_insn (extract (m
, op1
, const1_rtx
));
15236 emit_insn (move_unaligned (op0
, op1
));
15239 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15240 straight to ix86_expand_vector_move. */
15241 /* Code generation for scalar reg-reg moves of single and double precision data:
15242 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15246 if (x86_sse_partial_reg_dependency == true)
15251 Code generation for scalar loads of double precision data:
15252 if (x86_sse_split_regs == true)
15253 movlpd mem, reg (gas syntax)
15257 Code generation for unaligned packed loads of single precision data
15258 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15259 if (x86_sse_unaligned_move_optimal)
15262 if (x86_sse_partial_reg_dependency == true)
15274 Code generation for unaligned packed loads of double precision data
15275 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15276 if (x86_sse_unaligned_move_optimal)
15279 if (x86_sse_split_regs == true)
15292 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
15301 switch (GET_MODE_CLASS (mode
))
15303 case MODE_VECTOR_INT
:
15305 switch (GET_MODE_SIZE (mode
))
15308 /* If we're optimizing for size, movups is the smallest. */
15309 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15311 op0
= gen_lowpart (V4SFmode
, op0
);
15312 op1
= gen_lowpart (V4SFmode
, op1
);
15313 emit_insn (gen_sse_movups (op0
, op1
));
15316 op0
= gen_lowpart (V16QImode
, op0
);
15317 op1
= gen_lowpart (V16QImode
, op1
);
15318 emit_insn (gen_sse2_movdqu (op0
, op1
));
15321 op0
= gen_lowpart (V32QImode
, op0
);
15322 op1
= gen_lowpart (V32QImode
, op1
);
15323 ix86_avx256_split_vector_move_misalign (op0
, op1
);
15326 gcc_unreachable ();
15329 case MODE_VECTOR_FLOAT
:
15330 op0
= gen_lowpart (mode
, op0
);
15331 op1
= gen_lowpart (mode
, op1
);
15336 emit_insn (gen_sse_movups (op0
, op1
));
15339 ix86_avx256_split_vector_move_misalign (op0
, op1
);
15342 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15344 op0
= gen_lowpart (V4SFmode
, op0
);
15345 op1
= gen_lowpart (V4SFmode
, op1
);
15346 emit_insn (gen_sse_movups (op0
, op1
));
15349 emit_insn (gen_sse2_movupd (op0
, op1
));
15352 ix86_avx256_split_vector_move_misalign (op0
, op1
);
15355 gcc_unreachable ();
15360 gcc_unreachable ();
15368 /* If we're optimizing for size, movups is the smallest. */
15369 if (optimize_insn_for_size_p ()
15370 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15372 op0
= gen_lowpart (V4SFmode
, op0
);
15373 op1
= gen_lowpart (V4SFmode
, op1
);
15374 emit_insn (gen_sse_movups (op0
, op1
));
15378 /* ??? If we have typed data, then it would appear that using
15379 movdqu is the only way to get unaligned data loaded with
15381 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15383 op0
= gen_lowpart (V16QImode
, op0
);
15384 op1
= gen_lowpart (V16QImode
, op1
);
15385 emit_insn (gen_sse2_movdqu (op0
, op1
));
15389 if (TARGET_SSE2
&& mode
== V2DFmode
)
15393 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
)
15395 op0
= gen_lowpart (V2DFmode
, op0
);
15396 op1
= gen_lowpart (V2DFmode
, op1
);
15397 emit_insn (gen_sse2_movupd (op0
, op1
));
15401 /* When SSE registers are split into halves, we can avoid
15402 writing to the top half twice. */
15403 if (TARGET_SSE_SPLIT_REGS
)
15405 emit_clobber (op0
);
15410 /* ??? Not sure about the best option for the Intel chips.
15411 The following would seem to satisfy; the register is
15412 entirely cleared, breaking the dependency chain. We
15413 then store to the upper half, with a dependency depth
15414 of one. A rumor has it that Intel recommends two movsd
15415 followed by an unpacklpd, but this is unconfirmed. And
15416 given that the dependency depth of the unpacklpd would
15417 still be one, I'm not sure why this would be better. */
15418 zero
= CONST0_RTX (V2DFmode
);
15421 m
= adjust_address (op1
, DFmode
, 0);
15422 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
15423 m
= adjust_address (op1
, DFmode
, 8);
15424 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
15428 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
)
15430 op0
= gen_lowpart (V4SFmode
, op0
);
15431 op1
= gen_lowpart (V4SFmode
, op1
);
15432 emit_insn (gen_sse_movups (op0
, op1
));
15436 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
15437 emit_move_insn (op0
, CONST0_RTX (mode
));
15439 emit_clobber (op0
);
15441 if (mode
!= V4SFmode
)
15442 op0
= gen_lowpart (V4SFmode
, op0
);
15443 m
= adjust_address (op1
, V2SFmode
, 0);
15444 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
15445 m
= adjust_address (op1
, V2SFmode
, 8);
15446 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
15449 else if (MEM_P (op0
))
15451 /* If we're optimizing for size, movups is the smallest. */
15452 if (optimize_insn_for_size_p ()
15453 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15455 op0
= gen_lowpart (V4SFmode
, op0
);
15456 op1
= gen_lowpart (V4SFmode
, op1
);
15457 emit_insn (gen_sse_movups (op0
, op1
));
15461 /* ??? Similar to above, only less clear because of quote
15462 typeless stores unquote. */
15463 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
15464 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15466 op0
= gen_lowpart (V16QImode
, op0
);
15467 op1
= gen_lowpart (V16QImode
, op1
);
15468 emit_insn (gen_sse2_movdqu (op0
, op1
));
15472 if (TARGET_SSE2
&& mode
== V2DFmode
)
15474 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL
)
15476 op0
= gen_lowpart (V2DFmode
, op0
);
15477 op1
= gen_lowpart (V2DFmode
, op1
);
15478 emit_insn (gen_sse2_movupd (op0
, op1
));
15482 m
= adjust_address (op0
, DFmode
, 0);
15483 emit_insn (gen_sse2_storelpd (m
, op1
));
15484 m
= adjust_address (op0
, DFmode
, 8);
15485 emit_insn (gen_sse2_storehpd (m
, op1
));
15490 if (mode
!= V4SFmode
)
15491 op1
= gen_lowpart (V4SFmode
, op1
);
15493 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL
)
15495 op0
= gen_lowpart (V4SFmode
, op0
);
15496 emit_insn (gen_sse_movups (op0
, op1
));
15500 m
= adjust_address (op0
, V2SFmode
, 0);
15501 emit_insn (gen_sse_storelps (m
, op1
));
15502 m
= adjust_address (op0
, V2SFmode
, 8);
15503 emit_insn (gen_sse_storehps (m
, op1
));
15508 gcc_unreachable ();
15511 /* Expand a push in MODE. This is some mode for which we do not support
15512 proper push instructions, at least from the registers that we expect
15513 the value to live in. */
15516 ix86_expand_push (enum machine_mode mode
, rtx x
)
15520 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
15521 GEN_INT (-GET_MODE_SIZE (mode
)),
15522 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
15523 if (tmp
!= stack_pointer_rtx
)
15524 emit_move_insn (stack_pointer_rtx
, tmp
);
15526 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
15528 /* When we push an operand onto stack, it has to be aligned at least
15529 at the function argument boundary. However since we don't have
15530 the argument type, we can't determine the actual argument
15532 emit_move_insn (tmp
, x
);
15535 /* Helper function of ix86_fixup_binary_operands to canonicalize
15536 operand order. Returns true if the operands should be swapped. */
15539 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
15542 rtx dst
= operands
[0];
15543 rtx src1
= operands
[1];
15544 rtx src2
= operands
[2];
15546 /* If the operation is not commutative, we can't do anything. */
15547 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
15550 /* Highest priority is that src1 should match dst. */
15551 if (rtx_equal_p (dst
, src1
))
15553 if (rtx_equal_p (dst
, src2
))
15556 /* Next highest priority is that immediate constants come second. */
15557 if (immediate_operand (src2
, mode
))
15559 if (immediate_operand (src1
, mode
))
15562 /* Lowest priority is that memory references should come second. */
15572 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15573 destination to use for the operation. If different from the true
15574 destination in operands[0], a copy operation will be required. */
15577 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
15580 rtx dst
= operands
[0];
15581 rtx src1
= operands
[1];
15582 rtx src2
= operands
[2];
15584 /* Canonicalize operand order. */
15585 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
15589 /* It is invalid to swap operands of different modes. */
15590 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
15597 /* Both source operands cannot be in memory. */
15598 if (MEM_P (src1
) && MEM_P (src2
))
15600 /* Optimization: Only read from memory once. */
15601 if (rtx_equal_p (src1
, src2
))
15603 src2
= force_reg (mode
, src2
);
15607 src2
= force_reg (mode
, src2
);
15610 /* If the destination is memory, and we do not have matching source
15611 operands, do things in registers. */
15612 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
15613 dst
= gen_reg_rtx (mode
);
15615 /* Source 1 cannot be a constant. */
15616 if (CONSTANT_P (src1
))
15617 src1
= force_reg (mode
, src1
);
15619 /* Source 1 cannot be a non-matching memory. */
15620 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
15621 src1
= force_reg (mode
, src1
);
15623 operands
[1] = src1
;
15624 operands
[2] = src2
;
15628 /* Similarly, but assume that the destination has already been
15629 set up properly. */
15632 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
15633 enum machine_mode mode
, rtx operands
[])
15635 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
15636 gcc_assert (dst
== operands
[0]);
15639 /* Attempt to expand a binary operator. Make the expansion closer to the
15640 actual machine, then just general_operand, which will allow 3 separate
15641 memory references (one output, two input) in a single insn. */
15644 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
15647 rtx src1
, src2
, dst
, op
, clob
;
15649 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
15650 src1
= operands
[1];
15651 src2
= operands
[2];
15653 /* Emit the instruction. */
15655 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
15656 if (reload_in_progress
)
15658 /* Reload doesn't know about the flags register, and doesn't know that
15659 it doesn't want to clobber it. We can only do this with PLUS. */
15660 gcc_assert (code
== PLUS
);
15663 else if (reload_completed
15665 && !rtx_equal_p (dst
, src1
))
15667 /* This is going to be an LEA; avoid splitting it later. */
15672 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15673 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
15676 /* Fix up the destination if needed. */
15677 if (dst
!= operands
[0])
15678 emit_move_insn (operands
[0], dst
);
15681 /* Return TRUE or FALSE depending on whether the binary operator meets the
15682 appropriate constraints. */
15685 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
15688 rtx dst
= operands
[0];
15689 rtx src1
= operands
[1];
15690 rtx src2
= operands
[2];
15692 /* Both source operands cannot be in memory. */
15693 if (MEM_P (src1
) && MEM_P (src2
))
15696 /* Canonicalize operand order for commutative operators. */
15697 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
15704 /* If the destination is memory, we must have a matching source operand. */
15705 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
15708 /* Source 1 cannot be a constant. */
15709 if (CONSTANT_P (src1
))
15712 /* Source 1 cannot be a non-matching memory. */
15713 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
15715 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15716 return (code
== AND
15719 || (TARGET_64BIT
&& mode
== DImode
))
15720 && CONST_INT_P (src2
)
15721 && (INTVAL (src2
) == 0xff
15722 || INTVAL (src2
) == 0xffff));
15728 /* Attempt to expand a unary operator. Make the expansion closer to the
15729 actual machine, then just general_operand, which will allow 2 separate
15730 memory references (one output, one input) in a single insn. */
15733 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
15736 int matching_memory
;
15737 rtx src
, dst
, op
, clob
;
15742 /* If the destination is memory, and we do not have matching source
15743 operands, do things in registers. */
15744 matching_memory
= 0;
15747 if (rtx_equal_p (dst
, src
))
15748 matching_memory
= 1;
15750 dst
= gen_reg_rtx (mode
);
15753 /* When source operand is memory, destination must match. */
15754 if (MEM_P (src
) && !matching_memory
)
15755 src
= force_reg (mode
, src
);
15757 /* Emit the instruction. */
15759 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
15760 if (reload_in_progress
|| code
== NOT
)
15762 /* Reload doesn't know about the flags register, and doesn't know that
15763 it doesn't want to clobber it. */
15764 gcc_assert (code
== NOT
);
15769 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15770 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
15773 /* Fix up the destination if needed. */
15774 if (dst
!= operands
[0])
15775 emit_move_insn (operands
[0], dst
);
15778 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15779 divisor are within the range [0-255]. */
15782 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
15785 rtx end_label
, qimode_label
;
15786 rtx insn
, div
, mod
;
15787 rtx scratch
, tmp0
, tmp1
, tmp2
;
15788 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
15789 rtx (*gen_zero_extend
) (rtx
, rtx
);
15790 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
15795 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
15796 gen_test_ccno_1
= gen_testsi_ccno_1
;
15797 gen_zero_extend
= gen_zero_extendqisi2
;
15800 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
15801 gen_test_ccno_1
= gen_testdi_ccno_1
;
15802 gen_zero_extend
= gen_zero_extendqidi2
;
15805 gcc_unreachable ();
15808 end_label
= gen_label_rtx ();
15809 qimode_label
= gen_label_rtx ();
15811 scratch
= gen_reg_rtx (mode
);
15813 /* Use 8bit unsigned divimod if dividend and divisor are within
15814 the range [0-255]. */
15815 emit_move_insn (scratch
, operands
[2]);
15816 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
15817 scratch
, 1, OPTAB_DIRECT
);
15818 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
15819 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15820 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
15821 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
15822 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
15824 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
15825 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
15826 JUMP_LABEL (insn
) = qimode_label
;
15828 /* Generate original signed/unsigned divimod. */
15829 div
= gen_divmod4_1 (operands
[0], operands
[1],
15830 operands
[2], operands
[3]);
15833 /* Branch to the end. */
15834 emit_jump_insn (gen_jump (end_label
));
15837 /* Generate 8bit unsigned divide. */
15838 emit_label (qimode_label
);
15839 /* Don't use operands[0] for result of 8bit divide since not all
15840 registers support QImode ZERO_EXTRACT. */
15841 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
15842 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
15843 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
15844 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
15848 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
15849 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
15853 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
15854 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
15857 /* Extract remainder from AH. */
15858 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
15859 if (REG_P (operands
[1]))
15860 insn
= emit_move_insn (operands
[1], tmp1
);
15863 /* Need a new scratch register since the old one has result
15865 scratch
= gen_reg_rtx (mode
);
15866 emit_move_insn (scratch
, tmp1
);
15867 insn
= emit_move_insn (operands
[1], scratch
);
15869 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
15871 /* Zero extend quotient from AL. */
15872 tmp1
= gen_lowpart (QImode
, tmp0
);
15873 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
15874 set_unique_reg_note (insn
, REG_EQUAL
, div
);
15876 emit_label (end_label
);
15879 #define LEA_SEARCH_THRESHOLD 12
15881 /* Search backward for non-agu definition of register number REGNO1
15882 or register number REGNO2 in INSN's basic block until
15883 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15884 2. Reach BB boundary, or
15885 3. Reach agu definition.
15886 Returns the distance between the non-agu definition point and INSN.
15887 If no definition point, returns -1. */
15890 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
15893 basic_block bb
= BLOCK_FOR_INSN (insn
);
15896 enum attr_type insn_type
;
15898 if (insn
!= BB_HEAD (bb
))
15900 rtx prev
= PREV_INSN (insn
);
15901 while (prev
&& distance
< LEA_SEARCH_THRESHOLD
)
15903 if (NONDEBUG_INSN_P (prev
))
15906 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
15907 if (DF_REF_TYPE (*def_rec
) == DF_REF_REG_DEF
15908 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
15909 && (regno1
== DF_REF_REGNO (*def_rec
)
15910 || regno2
== DF_REF_REGNO (*def_rec
)))
15912 insn_type
= get_attr_type (prev
);
15913 if (insn_type
!= TYPE_LEA
)
15917 if (prev
== BB_HEAD (bb
))
15919 prev
= PREV_INSN (prev
);
15923 if (distance
< LEA_SEARCH_THRESHOLD
)
15927 bool simple_loop
= false;
15929 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
15932 simple_loop
= true;
15938 rtx prev
= BB_END (bb
);
15941 && distance
< LEA_SEARCH_THRESHOLD
)
15943 if (NONDEBUG_INSN_P (prev
))
15946 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
15947 if (DF_REF_TYPE (*def_rec
) == DF_REF_REG_DEF
15948 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
15949 && (regno1
== DF_REF_REGNO (*def_rec
)
15950 || regno2
== DF_REF_REGNO (*def_rec
)))
15952 insn_type
= get_attr_type (prev
);
15953 if (insn_type
!= TYPE_LEA
)
15957 prev
= PREV_INSN (prev
);
15965 /* get_attr_type may modify recog data. We want to make sure
15966 that recog data is valid for instruction INSN, on which
15967 distance_non_agu_define is called. INSN is unchanged here. */
15968 extract_insn_cached (insn
);
15972 /* Return the distance between INSN and the next insn that uses
15973 register number REGNO0 in memory address. Return -1 if no such
15974 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
15977 distance_agu_use (unsigned int regno0
, rtx insn
)
15979 basic_block bb
= BLOCK_FOR_INSN (insn
);
15984 if (insn
!= BB_END (bb
))
15986 rtx next
= NEXT_INSN (insn
);
15987 while (next
&& distance
< LEA_SEARCH_THRESHOLD
)
15989 if (NONDEBUG_INSN_P (next
))
15993 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
15994 if ((DF_REF_TYPE (*use_rec
) == DF_REF_REG_MEM_LOAD
15995 || DF_REF_TYPE (*use_rec
) == DF_REF_REG_MEM_STORE
)
15996 && regno0
== DF_REF_REGNO (*use_rec
))
15998 /* Return DISTANCE if OP0 is used in memory
15999 address in NEXT. */
16003 for (def_rec
= DF_INSN_DEFS (next
); *def_rec
; def_rec
++)
16004 if (DF_REF_TYPE (*def_rec
) == DF_REF_REG_DEF
16005 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
16006 && regno0
== DF_REF_REGNO (*def_rec
))
16008 /* Return -1 if OP0 is set in NEXT. */
16012 if (next
== BB_END (bb
))
16014 next
= NEXT_INSN (next
);
16018 if (distance
< LEA_SEARCH_THRESHOLD
)
16022 bool simple_loop
= false;
16024 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16027 simple_loop
= true;
16033 rtx next
= BB_HEAD (bb
);
16036 && distance
< LEA_SEARCH_THRESHOLD
)
16038 if (NONDEBUG_INSN_P (next
))
16042 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
16043 if ((DF_REF_TYPE (*use_rec
) == DF_REF_REG_MEM_LOAD
16044 || DF_REF_TYPE (*use_rec
) == DF_REF_REG_MEM_STORE
)
16045 && regno0
== DF_REF_REGNO (*use_rec
))
16047 /* Return DISTANCE if OP0 is used in memory
16048 address in NEXT. */
16052 for (def_rec
= DF_INSN_DEFS (next
); *def_rec
; def_rec
++)
16053 if (DF_REF_TYPE (*def_rec
) == DF_REF_REG_DEF
16054 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
16055 && regno0
== DF_REF_REGNO (*def_rec
))
16057 /* Return -1 if OP0 is set in NEXT. */
16062 next
= NEXT_INSN (next
);
16070 /* Define this macro to tune LEA priority vs ADD, it take effect when
16071 there is a dilemma of choicing LEA or ADD
16072 Negative value: ADD is more preferred than LEA
16074 Positive value: LEA is more preferred than ADD*/
16075 #define IX86_LEA_PRIORITY 2
16077 /* Return true if it is ok to optimize an ADD operation to LEA
16078 operation to avoid flag register consumation. For most processors,
16079 ADD is faster than LEA. For the processors like ATOM, if the
16080 destination register of LEA holds an actual address which will be
16081 used soon, LEA is better and otherwise ADD is better. */
16084 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
16086 unsigned int regno0
= true_regnum (operands
[0]);
16087 unsigned int regno1
= true_regnum (operands
[1]);
16088 unsigned int regno2
= true_regnum (operands
[2]);
16090 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16091 if (regno0
!= regno1
&& regno0
!= regno2
)
16094 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16098 int dist_define
, dist_use
;
16100 /* Return false if REGNO0 isn't used in memory address. */
16101 dist_use
= distance_agu_use (regno0
, insn
);
16105 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
16106 if (dist_define
<= 0)
16109 /* If this insn has both backward non-agu dependence and forward
16110 agu dependence, the one with short distance take effect. */
16111 if ((dist_define
+ IX86_LEA_PRIORITY
) < dist_use
)
16118 /* Return true if destination reg of SET_BODY is shift count of
16122 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
16128 /* Retrieve destination of SET_BODY. */
16129 switch (GET_CODE (set_body
))
16132 set_dest
= SET_DEST (set_body
);
16133 if (!set_dest
|| !REG_P (set_dest
))
16137 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
16138 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
16146 /* Retrieve shift count of USE_BODY. */
16147 switch (GET_CODE (use_body
))
16150 shift_rtx
= XEXP (use_body
, 1);
16153 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
16154 if (ix86_dep_by_shift_count_body (set_body
,
16155 XVECEXP (use_body
, 0, i
)))
16163 && (GET_CODE (shift_rtx
) == ASHIFT
16164 || GET_CODE (shift_rtx
) == LSHIFTRT
16165 || GET_CODE (shift_rtx
) == ASHIFTRT
16166 || GET_CODE (shift_rtx
) == ROTATE
16167 || GET_CODE (shift_rtx
) == ROTATERT
))
16169 rtx shift_count
= XEXP (shift_rtx
, 1);
16171 /* Return true if shift count is dest of SET_BODY. */
16172 if (REG_P (shift_count
)
16173 && true_regnum (set_dest
) == true_regnum (shift_count
))
16180 /* Return true if destination reg of SET_INSN is shift count of
16184 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
16186 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
16187 PATTERN (use_insn
));
16190 /* Return TRUE or FALSE depending on whether the unary operator meets the
16191 appropriate constraints. */
16194 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
16195 enum machine_mode mode ATTRIBUTE_UNUSED
,
16196 rtx operands
[2] ATTRIBUTE_UNUSED
)
16198 /* If one of operands is memory, source and destination must match. */
16199 if ((MEM_P (operands
[0])
16200 || MEM_P (operands
[1]))
16201 && ! rtx_equal_p (operands
[0], operands
[1]))
16206 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16207 are ok, keeping in mind the possible movddup alternative. */
16210 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
16212 if (MEM_P (operands
[0]))
16213 return rtx_equal_p (operands
[0], operands
[1 + high
]);
16214 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
16215 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
16219 /* Post-reload splitter for converting an SF or DFmode value in an
16220 SSE register into an unsigned SImode. */
16223 ix86_split_convert_uns_si_sse (rtx operands
[])
16225 enum machine_mode vecmode
;
16226 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
16228 large
= operands
[1];
16229 zero_or_two31
= operands
[2];
16230 input
= operands
[3];
16231 two31
= operands
[4];
16232 vecmode
= GET_MODE (large
);
16233 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
16235 /* Load up the value into the low element. We must ensure that the other
16236 elements are valid floats -- zero is the easiest such value. */
16239 if (vecmode
== V4SFmode
)
16240 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
16242 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
16246 input
= gen_rtx_REG (vecmode
, REGNO (input
));
16247 emit_move_insn (value
, CONST0_RTX (vecmode
));
16248 if (vecmode
== V4SFmode
)
16249 emit_insn (gen_sse_movss (value
, value
, input
));
16251 emit_insn (gen_sse2_movsd (value
, value
, input
));
16254 emit_move_insn (large
, two31
);
16255 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
16257 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
16258 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
16260 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
16261 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
16263 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
16264 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
16266 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
16267 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
16269 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
16270 if (vecmode
== V4SFmode
)
16271 emit_insn (gen_sse2_cvttps2dq (x
, value
));
16273 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
16276 emit_insn (gen_xorv4si3 (value
, value
, large
));
16279 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16280 Expects the 64-bit DImode to be supplied in a pair of integral
16281 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16282 -mfpmath=sse, !optimize_size only. */
16285 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
16287 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
16288 rtx int_xmm
, fp_xmm
;
16289 rtx biases
, exponents
;
16292 int_xmm
= gen_reg_rtx (V4SImode
);
16293 if (TARGET_INTER_UNIT_MOVES
)
16294 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
16295 else if (TARGET_SSE_SPLIT_REGS
)
16297 emit_clobber (int_xmm
);
16298 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
16302 x
= gen_reg_rtx (V2DImode
);
16303 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
16304 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
16307 x
= gen_rtx_CONST_VECTOR (V4SImode
,
16308 gen_rtvec (4, GEN_INT (0x43300000UL
),
16309 GEN_INT (0x45300000UL
),
16310 const0_rtx
, const0_rtx
));
16311 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
16313 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16314 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
16316 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16317 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16318 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16319 (0x1.0p84 + double(fp_value_hi_xmm)).
16320 Note these exponents differ by 32. */
16322 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
16324 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16325 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16326 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
16327 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
16328 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
16329 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
16330 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
16331 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
16332 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
16334 /* Add the upper and lower DFmode values together. */
16336 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
16339 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
16340 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
16341 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
16344 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
16347 /* Not used, but eases macroization of patterns. */
16349 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
16350 rtx input ATTRIBUTE_UNUSED
)
16352 gcc_unreachable ();
16355 /* Convert an unsigned SImode value into a DFmode. Only currently used
16356 for SSE, but applicable anywhere. */
16359 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
16361 REAL_VALUE_TYPE TWO31r
;
16364 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
16365 NULL
, 1, OPTAB_DIRECT
);
16367 fp
= gen_reg_rtx (DFmode
);
16368 emit_insn (gen_floatsidf2 (fp
, x
));
16370 real_ldexp (&TWO31r
, &dconst1
, 31);
16371 x
= const_double_from_real_value (TWO31r
, DFmode
);
16373 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
16375 emit_move_insn (target
, x
);
16378 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16379 32-bit mode; otherwise we have a direct convert instruction. */
16382 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
16384 REAL_VALUE_TYPE TWO32r
;
16385 rtx fp_lo
, fp_hi
, x
;
16387 fp_lo
= gen_reg_rtx (DFmode
);
16388 fp_hi
= gen_reg_rtx (DFmode
);
16390 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
16392 real_ldexp (&TWO32r
, &dconst1
, 32);
16393 x
= const_double_from_real_value (TWO32r
, DFmode
);
16394 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
16396 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
16398 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
16401 emit_move_insn (target
, x
);
16404 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16405 For x86_32, -mfpmath=sse, !optimize_size only. */
16407 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
16409 REAL_VALUE_TYPE ONE16r
;
16410 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
16412 real_ldexp (&ONE16r
, &dconst1
, 16);
16413 x
= const_double_from_real_value (ONE16r
, SFmode
);
16414 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
16415 NULL
, 0, OPTAB_DIRECT
);
16416 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
16417 NULL
, 0, OPTAB_DIRECT
);
16418 fp_hi
= gen_reg_rtx (SFmode
);
16419 fp_lo
= gen_reg_rtx (SFmode
);
16420 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
16421 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
16422 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
16424 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
16426 if (!rtx_equal_p (target
, fp_hi
))
16427 emit_move_insn (target
, fp_hi
);
16430 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16431 then replicate the value for all elements of the vector
16435 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
16442 v
= gen_rtvec (4, value
, value
, value
, value
);
16443 return gen_rtx_CONST_VECTOR (V4SImode
, v
);
16447 v
= gen_rtvec (2, value
, value
);
16448 return gen_rtx_CONST_VECTOR (V2DImode
, v
);
16452 v
= gen_rtvec (8, value
, value
, value
, value
,
16453 value
, value
, value
, value
);
16455 v
= gen_rtvec (8, value
, CONST0_RTX (SFmode
),
16456 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
),
16457 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
),
16458 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
16459 return gen_rtx_CONST_VECTOR (V8SFmode
, v
);
16463 v
= gen_rtvec (4, value
, value
, value
, value
);
16465 v
= gen_rtvec (4, value
, CONST0_RTX (SFmode
),
16466 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
16467 return gen_rtx_CONST_VECTOR (V4SFmode
, v
);
16471 v
= gen_rtvec (4, value
, value
, value
, value
);
16473 v
= gen_rtvec (4, value
, CONST0_RTX (DFmode
),
16474 CONST0_RTX (DFmode
), CONST0_RTX (DFmode
));
16475 return gen_rtx_CONST_VECTOR (V4DFmode
, v
);
16479 v
= gen_rtvec (2, value
, value
);
16481 v
= gen_rtvec (2, value
, CONST0_RTX (DFmode
));
16482 return gen_rtx_CONST_VECTOR (V2DFmode
, v
);
16485 gcc_unreachable ();
16489 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16490 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16491 for an SSE register. If VECT is true, then replicate the mask for
16492 all elements of the vector register. If INVERT is true, then create
16493 a mask excluding the sign bit. */
16496 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
16498 enum machine_mode vec_mode
, imode
;
16499 HOST_WIDE_INT hi
, lo
;
16504 /* Find the sign bit, sign extended to 2*HWI. */
16511 mode
= GET_MODE_INNER (mode
);
16513 lo
= 0x80000000, hi
= lo
< 0;
16520 mode
= GET_MODE_INNER (mode
);
16522 if (HOST_BITS_PER_WIDE_INT
>= 64)
16523 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
16525 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
16530 vec_mode
= VOIDmode
;
16531 if (HOST_BITS_PER_WIDE_INT
>= 64)
16534 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
16541 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
16545 lo
= ~lo
, hi
= ~hi
;
16551 mask
= immed_double_const (lo
, hi
, imode
);
16553 vec
= gen_rtvec (2, v
, mask
);
16554 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
16555 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
16562 gcc_unreachable ();
16566 lo
= ~lo
, hi
= ~hi
;
16568 /* Force this value into the low part of a fp vector constant. */
16569 mask
= immed_double_const (lo
, hi
, imode
);
16570 mask
= gen_lowpart (mode
, mask
);
16572 if (vec_mode
== VOIDmode
)
16573 return force_reg (mode
, mask
);
16575 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
16576 return force_reg (vec_mode
, v
);
16579 /* Generate code for floating point ABS or NEG. */
16582 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
16585 rtx mask
, set
, dst
, src
;
16586 bool use_sse
= false;
16587 bool vector_mode
= VECTOR_MODE_P (mode
);
16588 enum machine_mode vmode
= mode
;
16592 else if (mode
== TFmode
)
16594 else if (TARGET_SSE_MATH
)
16596 use_sse
= SSE_FLOAT_MODE_P (mode
);
16597 if (mode
== SFmode
)
16599 else if (mode
== DFmode
)
16603 /* NEG and ABS performed with SSE use bitwise mask operations.
16604 Create the appropriate mask now. */
16606 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
16613 set
= gen_rtx_fmt_e (code
, mode
, src
);
16614 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
16621 use
= gen_rtx_USE (VOIDmode
, mask
);
16623 par
= gen_rtvec (2, set
, use
);
16626 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16627 par
= gen_rtvec (3, set
, use
, clob
);
16629 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
16635 /* Expand a copysign operation. Special case operand 0 being a constant. */
16638 ix86_expand_copysign (rtx operands
[])
16640 enum machine_mode mode
, vmode
;
16641 rtx dest
, op0
, op1
, mask
, nmask
;
16643 dest
= operands
[0];
16647 mode
= GET_MODE (dest
);
16649 if (mode
== SFmode
)
16651 else if (mode
== DFmode
)
16656 if (GET_CODE (op0
) == CONST_DOUBLE
)
16658 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
16660 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
16661 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
16663 if (mode
== SFmode
|| mode
== DFmode
)
16665 if (op0
== CONST0_RTX (mode
))
16666 op0
= CONST0_RTX (vmode
);
16669 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
16671 op0
= force_reg (vmode
, v
);
16674 else if (op0
!= CONST0_RTX (mode
))
16675 op0
= force_reg (mode
, op0
);
16677 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
16679 if (mode
== SFmode
)
16680 copysign_insn
= gen_copysignsf3_const
;
16681 else if (mode
== DFmode
)
16682 copysign_insn
= gen_copysigndf3_const
;
16684 copysign_insn
= gen_copysigntf3_const
;
16686 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
16690 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
16692 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
16693 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
16695 if (mode
== SFmode
)
16696 copysign_insn
= gen_copysignsf3_var
;
16697 else if (mode
== DFmode
)
16698 copysign_insn
= gen_copysigndf3_var
;
16700 copysign_insn
= gen_copysigntf3_var
;
16702 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
16706 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
16707 be a constant, and so has already been expanded into a vector constant. */
16710 ix86_split_copysign_const (rtx operands
[])
16712 enum machine_mode mode
, vmode
;
16713 rtx dest
, op0
, mask
, x
;
16715 dest
= operands
[0];
16717 mask
= operands
[3];
16719 mode
= GET_MODE (dest
);
16720 vmode
= GET_MODE (mask
);
16722 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
16723 x
= gen_rtx_AND (vmode
, dest
, mask
);
16724 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
16726 if (op0
!= CONST0_RTX (vmode
))
16728 x
= gen_rtx_IOR (vmode
, dest
, op0
);
16729 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
16733 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
16734 so we have to do two masks. */
16737 ix86_split_copysign_var (rtx operands
[])
16739 enum machine_mode mode
, vmode
;
16740 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
16742 dest
= operands
[0];
16743 scratch
= operands
[1];
16746 nmask
= operands
[4];
16747 mask
= operands
[5];
16749 mode
= GET_MODE (dest
);
16750 vmode
= GET_MODE (mask
);
16752 if (rtx_equal_p (op0
, op1
))
16754 /* Shouldn't happen often (it's useless, obviously), but when it does
16755 we'd generate incorrect code if we continue below. */
16756 emit_move_insn (dest
, op0
);
16760 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
16762 gcc_assert (REGNO (op1
) == REGNO (scratch
));
16764 x
= gen_rtx_AND (vmode
, scratch
, mask
);
16765 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
16768 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
16769 x
= gen_rtx_NOT (vmode
, dest
);
16770 x
= gen_rtx_AND (vmode
, x
, op0
);
16771 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
16775 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
16777 x
= gen_rtx_AND (vmode
, scratch
, mask
);
16779 else /* alternative 2,4 */
16781 gcc_assert (REGNO (mask
) == REGNO (scratch
));
16782 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
16783 x
= gen_rtx_AND (vmode
, scratch
, op1
);
16785 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
16787 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
16789 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
16790 x
= gen_rtx_AND (vmode
, dest
, nmask
);
16792 else /* alternative 3,4 */
16794 gcc_assert (REGNO (nmask
) == REGNO (dest
));
16796 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
16797 x
= gen_rtx_AND (vmode
, dest
, op0
);
16799 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
16802 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
16803 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
16806 /* Return TRUE or FALSE depending on whether the first SET in INSN
16807 has source and destination with matching CC modes, and that the
16808 CC mode is at least as constrained as REQ_MODE. */
16811 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
16814 enum machine_mode set_mode
;
16816 set
= PATTERN (insn
);
16817 if (GET_CODE (set
) == PARALLEL
)
16818 set
= XVECEXP (set
, 0, 0);
16819 gcc_assert (GET_CODE (set
) == SET
);
16820 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
16822 set_mode
= GET_MODE (SET_DEST (set
));
16826 if (req_mode
!= CCNOmode
16827 && (req_mode
!= CCmode
16828 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
16832 if (req_mode
== CCGCmode
)
16836 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
16840 if (req_mode
== CCZmode
)
16850 if (set_mode
!= req_mode
)
16855 gcc_unreachable ();
16858 return GET_MODE (SET_SRC (set
)) == set_mode
;
16861 /* Generate insn patterns to do an integer compare of OPERANDS. */
16864 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
16866 enum machine_mode cmpmode
;
16869 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
16870 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
16872 /* This is very simple, but making the interface the same as in the
16873 FP case makes the rest of the code easier. */
16874 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
16875 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
16877 /* Return the test that should be put into the flags user, i.e.
16878 the bcc, scc, or cmov instruction. */
16879 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
16882 /* Figure out whether to use ordered or unordered fp comparisons.
16883 Return the appropriate mode to use. */
16886 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
16888 /* ??? In order to make all comparisons reversible, we do all comparisons
16889 non-trapping when compiling for IEEE. Once gcc is able to distinguish
16890 all forms trapping and nontrapping comparisons, we can make inequality
16891 comparisons trapping again, since it results in better code when using
16892 FCOM based compares. */
16893 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
16897 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
16899 enum machine_mode mode
= GET_MODE (op0
);
16901 if (SCALAR_FLOAT_MODE_P (mode
))
16903 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
16904 return ix86_fp_compare_mode (code
);
16909 /* Only zero flag is needed. */
16910 case EQ
: /* ZF=0 */
16911 case NE
: /* ZF!=0 */
16913 /* Codes needing carry flag. */
16914 case GEU
: /* CF=0 */
16915 case LTU
: /* CF=1 */
16916 /* Detect overflow checks. They need just the carry flag. */
16917 if (GET_CODE (op0
) == PLUS
16918 && rtx_equal_p (op1
, XEXP (op0
, 0)))
16922 case GTU
: /* CF=0 & ZF=0 */
16923 case LEU
: /* CF=1 | ZF=1 */
16924 /* Detect overflow checks. They need just the carry flag. */
16925 if (GET_CODE (op0
) == MINUS
16926 && rtx_equal_p (op1
, XEXP (op0
, 0)))
16930 /* Codes possibly doable only with sign flag when
16931 comparing against zero. */
16932 case GE
: /* SF=OF or SF=0 */
16933 case LT
: /* SF<>OF or SF=1 */
16934 if (op1
== const0_rtx
)
16937 /* For other cases Carry flag is not required. */
16939 /* Codes doable only with sign flag when comparing
16940 against zero, but we miss jump instruction for it
16941 so we need to use relational tests against overflow
16942 that thus needs to be zero. */
16943 case GT
: /* ZF=0 & SF=OF */
16944 case LE
: /* ZF=1 | SF<>OF */
16945 if (op1
== const0_rtx
)
16949 /* strcmp pattern do (use flags) and combine may ask us for proper
16954 gcc_unreachable ();
16958 /* Return the fixed registers used for condition codes. */
16961 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
16968 /* If two condition code modes are compatible, return a condition code
16969 mode which is compatible with both. Otherwise, return
16972 static enum machine_mode
16973 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
16978 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
16981 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
16982 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
16988 gcc_unreachable ();
17018 /* These are only compatible with themselves, which we already
17025 /* Return a comparison we can do and that it is equivalent to
17026 swap_condition (code) apart possibly from orderedness.
17027 But, never change orderedness if TARGET_IEEE_FP, returning
17028 UNKNOWN in that case if necessary. */
17030 static enum rtx_code
17031 ix86_fp_swap_condition (enum rtx_code code
)
17035 case GT
: /* GTU - CF=0 & ZF=0 */
17036 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
17037 case GE
: /* GEU - CF=0 */
17038 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
17039 case UNLT
: /* LTU - CF=1 */
17040 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
17041 case UNLE
: /* LEU - CF=1 | ZF=1 */
17042 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
17044 return swap_condition (code
);
17048 /* Return cost of comparison CODE using the best strategy for performance.
17049 All following functions do use number of instructions as a cost metrics.
17050 In future this should be tweaked to compute bytes for optimize_size and
17051 take into account performance of various instructions on various CPUs. */
17054 ix86_fp_comparison_cost (enum rtx_code code
)
17058 /* The cost of code using bit-twiddling on %ah. */
17075 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
17079 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
17082 gcc_unreachable ();
17085 switch (ix86_fp_comparison_strategy (code
))
17087 case IX86_FPCMP_COMI
:
17088 return arith_cost
> 4 ? 3 : 2;
17089 case IX86_FPCMP_SAHF
:
17090 return arith_cost
> 4 ? 4 : 3;
17096 /* Return strategy to use for floating-point. We assume that fcomi is always
17097 preferrable where available, since that is also true when looking at size
17098 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17100 enum ix86_fpcmp_strategy
17101 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
17103 /* Do fcomi/sahf based test when profitable. */
17106 return IX86_FPCMP_COMI
;
17108 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_function_for_size_p (cfun
)))
17109 return IX86_FPCMP_SAHF
;
17111 return IX86_FPCMP_ARITH
;
17114 /* Swap, force into registers, or otherwise massage the two operands
17115 to a fp comparison. The operands are updated in place; the new
17116 comparison code is returned. */
17118 static enum rtx_code
17119 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
17121 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
17122 rtx op0
= *pop0
, op1
= *pop1
;
17123 enum machine_mode op_mode
= GET_MODE (op0
);
17124 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
17126 /* All of the unordered compare instructions only work on registers.
17127 The same is true of the fcomi compare instructions. The XFmode
17128 compare instructions require registers except when comparing
17129 against zero or when converting operand 1 from fixed point to
17133 && (fpcmp_mode
== CCFPUmode
17134 || (op_mode
== XFmode
17135 && ! (standard_80387_constant_p (op0
) == 1
17136 || standard_80387_constant_p (op1
) == 1)
17137 && GET_CODE (op1
) != FLOAT
)
17138 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
17140 op0
= force_reg (op_mode
, op0
);
17141 op1
= force_reg (op_mode
, op1
);
17145 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17146 things around if they appear profitable, otherwise force op0
17147 into a register. */
17149 if (standard_80387_constant_p (op0
) == 0
17151 && ! (standard_80387_constant_p (op1
) == 0
17154 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
17155 if (new_code
!= UNKNOWN
)
17158 tmp
= op0
, op0
= op1
, op1
= tmp
;
17164 op0
= force_reg (op_mode
, op0
);
17166 if (CONSTANT_P (op1
))
17168 int tmp
= standard_80387_constant_p (op1
);
17170 op1
= validize_mem (force_const_mem (op_mode
, op1
));
17174 op1
= force_reg (op_mode
, op1
);
17177 op1
= force_reg (op_mode
, op1
);
17181 /* Try to rearrange the comparison to make it cheaper. */
17182 if (ix86_fp_comparison_cost (code
)
17183 > ix86_fp_comparison_cost (swap_condition (code
))
17184 && (REG_P (op1
) || can_create_pseudo_p ()))
17187 tmp
= op0
, op0
= op1
, op1
= tmp
;
17188 code
= swap_condition (code
);
17190 op0
= force_reg (op_mode
, op0
);
17198 /* Convert comparison codes we use to represent FP comparison to integer
17199 code that will result in proper branch. Return UNKNOWN if no such code
17203 ix86_fp_compare_code_to_integer (enum rtx_code code
)
17232 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17235 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
17237 enum machine_mode fpcmp_mode
, intcmp_mode
;
17240 fpcmp_mode
= ix86_fp_compare_mode (code
);
17241 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
17243 /* Do fcomi/sahf based test when profitable. */
17244 switch (ix86_fp_comparison_strategy (code
))
17246 case IX86_FPCMP_COMI
:
17247 intcmp_mode
= fpcmp_mode
;
17248 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
17249 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
17254 case IX86_FPCMP_SAHF
:
17255 intcmp_mode
= fpcmp_mode
;
17256 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
17257 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
17261 scratch
= gen_reg_rtx (HImode
);
17262 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
17263 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
17266 case IX86_FPCMP_ARITH
:
17267 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17268 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
17269 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
17271 scratch
= gen_reg_rtx (HImode
);
17272 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
17274 /* In the unordered case, we have to check C2 for NaN's, which
17275 doesn't happen to work out to anything nice combination-wise.
17276 So do some bit twiddling on the value we've got in AH to come
17277 up with an appropriate set of condition codes. */
17279 intcmp_mode
= CCNOmode
;
17284 if (code
== GT
|| !TARGET_IEEE_FP
)
17286 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
17291 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
17292 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
17293 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
17294 intcmp_mode
= CCmode
;
17300 if (code
== LT
&& TARGET_IEEE_FP
)
17302 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
17303 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
17304 intcmp_mode
= CCmode
;
17309 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
17315 if (code
== GE
|| !TARGET_IEEE_FP
)
17317 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
17322 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
17323 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
17329 if (code
== LE
&& TARGET_IEEE_FP
)
17331 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
17332 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
17333 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
17334 intcmp_mode
= CCmode
;
17339 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
17345 if (code
== EQ
&& TARGET_IEEE_FP
)
17347 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
17348 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
17349 intcmp_mode
= CCmode
;
17354 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
17360 if (code
== NE
&& TARGET_IEEE_FP
)
17362 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
17363 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
17369 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
17375 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
17379 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
17384 gcc_unreachable ();
17392 /* Return the test that should be put into the flags user, i.e.
17393 the bcc, scc, or cmov instruction. */
17394 return gen_rtx_fmt_ee (code
, VOIDmode
,
17395 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
17400 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
17404 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
17405 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
17407 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
17409 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
17410 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
17413 ret
= ix86_expand_int_compare (code
, op0
, op1
);
17419 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
17421 enum machine_mode mode
= GET_MODE (op0
);
17433 tmp
= ix86_expand_compare (code
, op0
, op1
);
17434 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
17435 gen_rtx_LABEL_REF (VOIDmode
, label
),
17437 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
17444 /* Expand DImode branch into multiple compare+branch. */
17446 rtx lo
[2], hi
[2], label2
;
17447 enum rtx_code code1
, code2
, code3
;
17448 enum machine_mode submode
;
17450 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
17452 tmp
= op0
, op0
= op1
, op1
= tmp
;
17453 code
= swap_condition (code
);
17456 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
17457 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
17459 submode
= mode
== DImode
? SImode
: DImode
;
17461 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17462 avoid two branches. This costs one extra insn, so disable when
17463 optimizing for size. */
17465 if ((code
== EQ
|| code
== NE
)
17466 && (!optimize_insn_for_size_p ()
17467 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
17472 if (hi
[1] != const0_rtx
)
17473 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
17474 NULL_RTX
, 0, OPTAB_WIDEN
);
17477 if (lo
[1] != const0_rtx
)
17478 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
17479 NULL_RTX
, 0, OPTAB_WIDEN
);
17481 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
17482 NULL_RTX
, 0, OPTAB_WIDEN
);
17484 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
17488 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17489 op1 is a constant and the low word is zero, then we can just
17490 examine the high word. Similarly for low word -1 and
17491 less-or-equal-than or greater-than. */
17493 if (CONST_INT_P (hi
[1]))
17496 case LT
: case LTU
: case GE
: case GEU
:
17497 if (lo
[1] == const0_rtx
)
17499 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
17503 case LE
: case LEU
: case GT
: case GTU
:
17504 if (lo
[1] == constm1_rtx
)
17506 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
17514 /* Otherwise, we need two or three jumps. */
17516 label2
= gen_label_rtx ();
17519 code2
= swap_condition (code
);
17520 code3
= unsigned_condition (code
);
17524 case LT
: case GT
: case LTU
: case GTU
:
17527 case LE
: code1
= LT
; code2
= GT
; break;
17528 case GE
: code1
= GT
; code2
= LT
; break;
17529 case LEU
: code1
= LTU
; code2
= GTU
; break;
17530 case GEU
: code1
= GTU
; code2
= LTU
; break;
17532 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
17533 case NE
: code2
= UNKNOWN
; break;
17536 gcc_unreachable ();
17541 * if (hi(a) < hi(b)) goto true;
17542 * if (hi(a) > hi(b)) goto false;
17543 * if (lo(a) < lo(b)) goto true;
17547 if (code1
!= UNKNOWN
)
17548 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
17549 if (code2
!= UNKNOWN
)
17550 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
17552 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
17554 if (code2
!= UNKNOWN
)
17555 emit_label (label2
);
17560 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
17565 /* Split branch based on floating point condition. */
17567 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
17568 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
17573 if (target2
!= pc_rtx
)
17576 code
= reverse_condition_maybe_unordered (code
);
17581 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
17584 /* Remove pushed operand from stack. */
17586 ix86_free_from_memory (GET_MODE (pushed
));
17588 i
= emit_jump_insn (gen_rtx_SET
17590 gen_rtx_IF_THEN_ELSE (VOIDmode
,
17591 condition
, target1
, target2
)));
17592 if (split_branch_probability
>= 0)
17593 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
17597 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
17601 gcc_assert (GET_MODE (dest
) == QImode
);
17603 ret
= ix86_expand_compare (code
, op0
, op1
);
17604 PUT_MODE (ret
, QImode
);
17605 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
17608 /* Expand comparison setting or clearing carry flag. Return true when
17609 successful and set pop for the operation. */
17611 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
17613 enum machine_mode mode
=
17614 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
17616 /* Do not handle double-mode compares that go through special path. */
17617 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
17620 if (SCALAR_FLOAT_MODE_P (mode
))
17622 rtx compare_op
, compare_seq
;
17624 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
17626 /* Shortcut: following common codes never translate
17627 into carry flag compares. */
17628 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
17629 || code
== ORDERED
|| code
== UNORDERED
)
17632 /* These comparisons require zero flag; swap operands so they won't. */
17633 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
17634 && !TARGET_IEEE_FP
)
17639 code
= swap_condition (code
);
17642 /* Try to expand the comparison and verify that we end up with
17643 carry flag based comparison. This fails to be true only when
17644 we decide to expand comparison using arithmetic that is not
17645 too common scenario. */
17647 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
17648 compare_seq
= get_insns ();
17651 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
17652 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
17653 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
17655 code
= GET_CODE (compare_op
);
17657 if (code
!= LTU
&& code
!= GEU
)
17660 emit_insn (compare_seq
);
17665 if (!INTEGRAL_MODE_P (mode
))
17674 /* Convert a==0 into (unsigned)a<1. */
17677 if (op1
!= const0_rtx
)
17680 code
= (code
== EQ
? LTU
: GEU
);
17683 /* Convert a>b into b<a or a>=b-1. */
17686 if (CONST_INT_P (op1
))
17688 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
17689 /* Bail out on overflow. We still can swap operands but that
17690 would force loading of the constant into register. */
17691 if (op1
== const0_rtx
17692 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
17694 code
= (code
== GTU
? GEU
: LTU
);
17701 code
= (code
== GTU
? LTU
: GEU
);
17705 /* Convert a>=0 into (unsigned)a<0x80000000. */
17708 if (mode
== DImode
|| op1
!= const0_rtx
)
17710 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
17711 code
= (code
== LT
? GEU
: LTU
);
17715 if (mode
== DImode
|| op1
!= constm1_rtx
)
17717 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
17718 code
= (code
== LE
? GEU
: LTU
);
17724 /* Swapping operands may cause constant to appear as first operand. */
17725 if (!nonimmediate_operand (op0
, VOIDmode
))
17727 if (!can_create_pseudo_p ())
17729 op0
= force_reg (mode
, op0
);
17731 *pop
= ix86_expand_compare (code
, op0
, op1
);
17732 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
17737 ix86_expand_int_movcc (rtx operands
[])
17739 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
17740 rtx compare_seq
, compare_op
;
17741 enum machine_mode mode
= GET_MODE (operands
[0]);
17742 bool sign_bit_compare_p
= false;
17743 rtx op0
= XEXP (operands
[1], 0);
17744 rtx op1
= XEXP (operands
[1], 1);
17747 compare_op
= ix86_expand_compare (code
, op0
, op1
);
17748 compare_seq
= get_insns ();
17751 compare_code
= GET_CODE (compare_op
);
17753 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
17754 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
17755 sign_bit_compare_p
= true;
17757 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
17758 HImode insns, we'd be swallowed in word prefix ops. */
17760 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
17761 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
17762 && CONST_INT_P (operands
[2])
17763 && CONST_INT_P (operands
[3]))
17765 rtx out
= operands
[0];
17766 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
17767 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
17768 HOST_WIDE_INT diff
;
17771 /* Sign bit compares are better done using shifts than we do by using
17773 if (sign_bit_compare_p
17774 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
17776 /* Detect overlap between destination and compare sources. */
17779 if (!sign_bit_compare_p
)
17782 bool fpcmp
= false;
17784 compare_code
= GET_CODE (compare_op
);
17786 flags
= XEXP (compare_op
, 0);
17788 if (GET_MODE (flags
) == CCFPmode
17789 || GET_MODE (flags
) == CCFPUmode
)
17793 = ix86_fp_compare_code_to_integer (compare_code
);
17796 /* To simplify rest of code, restrict to the GEU case. */
17797 if (compare_code
== LTU
)
17799 HOST_WIDE_INT tmp
= ct
;
17802 compare_code
= reverse_condition (compare_code
);
17803 code
= reverse_condition (code
);
17808 PUT_CODE (compare_op
,
17809 reverse_condition_maybe_unordered
17810 (GET_CODE (compare_op
)));
17812 PUT_CODE (compare_op
,
17813 reverse_condition (GET_CODE (compare_op
)));
17817 if (reg_overlap_mentioned_p (out
, op0
)
17818 || reg_overlap_mentioned_p (out
, op1
))
17819 tmp
= gen_reg_rtx (mode
);
17821 if (mode
== DImode
)
17822 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
17824 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
17825 flags
, compare_op
));
17829 if (code
== GT
|| code
== GE
)
17830 code
= reverse_condition (code
);
17833 HOST_WIDE_INT tmp
= ct
;
17838 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
17851 tmp
= expand_simple_binop (mode
, PLUS
,
17853 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
17864 tmp
= expand_simple_binop (mode
, IOR
,
17866 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
17868 else if (diff
== -1 && ct
)
17878 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
17880 tmp
= expand_simple_binop (mode
, PLUS
,
17881 copy_rtx (tmp
), GEN_INT (cf
),
17882 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
17890 * andl cf - ct, dest
17900 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
17903 tmp
= expand_simple_binop (mode
, AND
,
17905 gen_int_mode (cf
- ct
, mode
),
17906 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
17908 tmp
= expand_simple_binop (mode
, PLUS
,
17909 copy_rtx (tmp
), GEN_INT (ct
),
17910 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
17913 if (!rtx_equal_p (tmp
, out
))
17914 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
17921 enum machine_mode cmp_mode
= GET_MODE (op0
);
17924 tmp
= ct
, ct
= cf
, cf
= tmp
;
17927 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
17929 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
17931 /* We may be reversing unordered compare to normal compare, that
17932 is not valid in general (we may convert non-trapping condition
17933 to trapping one), however on i386 we currently emit all
17934 comparisons unordered. */
17935 compare_code
= reverse_condition_maybe_unordered (compare_code
);
17936 code
= reverse_condition_maybe_unordered (code
);
17940 compare_code
= reverse_condition (compare_code
);
17941 code
= reverse_condition (code
);
17945 compare_code
= UNKNOWN
;
17946 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
17947 && CONST_INT_P (op1
))
17949 if (op1
== const0_rtx
17950 && (code
== LT
|| code
== GE
))
17951 compare_code
= code
;
17952 else if (op1
== constm1_rtx
)
17956 else if (code
== GT
)
17961 /* Optimize dest = (op0 < 0) ? -1 : cf. */
17962 if (compare_code
!= UNKNOWN
17963 && GET_MODE (op0
) == GET_MODE (out
)
17964 && (cf
== -1 || ct
== -1))
17966 /* If lea code below could be used, only optimize
17967 if it results in a 2 insn sequence. */
17969 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
17970 || diff
== 3 || diff
== 5 || diff
== 9)
17971 || (compare_code
== LT
&& ct
== -1)
17972 || (compare_code
== GE
&& cf
== -1))
17975 * notl op1 (if necessary)
17983 code
= reverse_condition (code
);
17986 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
17988 out
= expand_simple_binop (mode
, IOR
,
17990 out
, 1, OPTAB_DIRECT
);
17991 if (out
!= operands
[0])
17992 emit_move_insn (operands
[0], out
);
17999 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
18000 || diff
== 3 || diff
== 5 || diff
== 9)
18001 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
18003 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
18009 * lea cf(dest*(ct-cf)),dest
18013 * This also catches the degenerate setcc-only case.
18019 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
18022 /* On x86_64 the lea instruction operates on Pmode, so we need
18023 to get arithmetics done in proper mode to match. */
18025 tmp
= copy_rtx (out
);
18029 out1
= copy_rtx (out
);
18030 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
18034 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
18040 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
18043 if (!rtx_equal_p (tmp
, out
))
18046 out
= force_operand (tmp
, copy_rtx (out
));
18048 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
18050 if (!rtx_equal_p (out
, operands
[0]))
18051 emit_move_insn (operands
[0], copy_rtx (out
));
18057 * General case: Jumpful:
18058 * xorl dest,dest cmpl op1, op2
18059 * cmpl op1, op2 movl ct, dest
18060 * setcc dest jcc 1f
18061 * decl dest movl cf, dest
18062 * andl (cf-ct),dest 1:
18065 * Size 20. Size 14.
18067 * This is reasonably steep, but branch mispredict costs are
18068 * high on modern cpus, so consider failing only if optimizing
18072 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
18073 && BRANCH_COST (optimize_insn_for_speed_p (),
18078 enum machine_mode cmp_mode
= GET_MODE (op0
);
18083 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
18085 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
18087 /* We may be reversing unordered compare to normal compare,
18088 that is not valid in general (we may convert non-trapping
18089 condition to trapping one), however on i386 we currently
18090 emit all comparisons unordered. */
18091 code
= reverse_condition_maybe_unordered (code
);
18095 code
= reverse_condition (code
);
18096 if (compare_code
!= UNKNOWN
)
18097 compare_code
= reverse_condition (compare_code
);
18101 if (compare_code
!= UNKNOWN
)
18103 /* notl op1 (if needed)
18108 For x < 0 (resp. x <= -1) there will be no notl,
18109 so if possible swap the constants to get rid of the
18111 True/false will be -1/0 while code below (store flag
18112 followed by decrement) is 0/-1, so the constants need
18113 to be exchanged once more. */
18115 if (compare_code
== GE
|| !cf
)
18117 code
= reverse_condition (code
);
18122 HOST_WIDE_INT tmp
= cf
;
18127 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
18131 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
18133 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
18135 copy_rtx (out
), 1, OPTAB_DIRECT
);
18138 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
18139 gen_int_mode (cf
- ct
, mode
),
18140 copy_rtx (out
), 1, OPTAB_DIRECT
);
18142 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
18143 copy_rtx (out
), 1, OPTAB_DIRECT
);
18144 if (!rtx_equal_p (out
, operands
[0]))
18145 emit_move_insn (operands
[0], copy_rtx (out
));
18151 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
18153 /* Try a few things more with specific constants and a variable. */
18156 rtx var
, orig_out
, out
, tmp
;
18158 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18161 /* If one of the two operands is an interesting constant, load a
18162 constant with the above and mask it in with a logical operation. */
18164 if (CONST_INT_P (operands
[2]))
18167 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
18168 operands
[3] = constm1_rtx
, op
= and_optab
;
18169 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
18170 operands
[3] = const0_rtx
, op
= ior_optab
;
18174 else if (CONST_INT_P (operands
[3]))
18177 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
18178 operands
[2] = constm1_rtx
, op
= and_optab
;
18179 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
18180 operands
[2] = const0_rtx
, op
= ior_optab
;
18187 orig_out
= operands
[0];
18188 tmp
= gen_reg_rtx (mode
);
18191 /* Recurse to get the constant loaded. */
18192 if (ix86_expand_int_movcc (operands
) == 0)
18195 /* Mask in the interesting variable. */
18196 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
18198 if (!rtx_equal_p (out
, orig_out
))
18199 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
18205 * For comparison with above,
18215 if (! nonimmediate_operand (operands
[2], mode
))
18216 operands
[2] = force_reg (mode
, operands
[2]);
18217 if (! nonimmediate_operand (operands
[3], mode
))
18218 operands
[3] = force_reg (mode
, operands
[3]);
18220 if (! register_operand (operands
[2], VOIDmode
)
18222 || ! register_operand (operands
[3], VOIDmode
)))
18223 operands
[2] = force_reg (mode
, operands
[2]);
18226 && ! register_operand (operands
[3], VOIDmode
))
18227 operands
[3] = force_reg (mode
, operands
[3]);
18229 emit_insn (compare_seq
);
18230 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
18231 gen_rtx_IF_THEN_ELSE (mode
,
18232 compare_op
, operands
[2],
18237 /* Swap, force into registers, or otherwise massage the two operands
18238 to an sse comparison with a mask result. Thus we differ a bit from
18239 ix86_prepare_fp_compare_args which expects to produce a flags result.
18241 The DEST operand exists to help determine whether to commute commutative
18242 operators. The POP0/POP1 operands are updated in place. The new
18243 comparison code is returned, or UNKNOWN if not implementable. */
18245 static enum rtx_code
18246 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
18247 rtx
*pop0
, rtx
*pop1
)
18255 /* We have no LTGT as an operator. We could implement it with
18256 NE & ORDERED, but this requires an extra temporary. It's
18257 not clear that it's worth it. */
18264 /* These are supported directly. */
18271 /* For commutative operators, try to canonicalize the destination
18272 operand to be first in the comparison - this helps reload to
18273 avoid extra moves. */
18274 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
18282 /* These are not supported directly. Swap the comparison operands
18283 to transform into something that is supported. */
18287 code
= swap_condition (code
);
18291 gcc_unreachable ();
18297 /* Detect conditional moves that exactly match min/max operational
18298 semantics. Note that this is IEEE safe, as long as we don't
18299 interchange the operands.
18301 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18302 and TRUE if the operation is successful and instructions are emitted. */
18305 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
18306 rtx cmp_op1
, rtx if_true
, rtx if_false
)
18308 enum machine_mode mode
;
18314 else if (code
== UNGE
)
18317 if_true
= if_false
;
18323 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
18325 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
18330 mode
= GET_MODE (dest
);
18332 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18333 but MODE may be a vector mode and thus not appropriate. */
18334 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
18336 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
18339 if_true
= force_reg (mode
, if_true
);
18340 v
= gen_rtvec (2, if_true
, if_false
);
18341 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
18345 code
= is_min
? SMIN
: SMAX
;
18346 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
18349 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
18353 /* Expand an sse vector comparison. Return the register with the result. */
18356 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
18357 rtx op_true
, rtx op_false
)
18359 enum machine_mode mode
= GET_MODE (dest
);
18362 cmp_op0
= force_reg (mode
, cmp_op0
);
18363 if (!nonimmediate_operand (cmp_op1
, mode
))
18364 cmp_op1
= force_reg (mode
, cmp_op1
);
18367 || reg_overlap_mentioned_p (dest
, op_true
)
18368 || reg_overlap_mentioned_p (dest
, op_false
))
18369 dest
= gen_reg_rtx (mode
);
18371 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
18372 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18377 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18378 operations. This is used for both scalar and vector conditional moves. */
18381 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
18383 enum machine_mode mode
= GET_MODE (dest
);
18386 if (op_false
== CONST0_RTX (mode
))
18388 op_true
= force_reg (mode
, op_true
);
18389 x
= gen_rtx_AND (mode
, cmp
, op_true
);
18390 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18392 else if (op_true
== CONST0_RTX (mode
))
18394 op_false
= force_reg (mode
, op_false
);
18395 x
= gen_rtx_NOT (mode
, cmp
);
18396 x
= gen_rtx_AND (mode
, x
, op_false
);
18397 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18399 else if (TARGET_XOP
)
18401 rtx pcmov
= gen_rtx_SET (mode
, dest
,
18402 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
18409 op_true
= force_reg (mode
, op_true
);
18410 op_false
= force_reg (mode
, op_false
);
18412 t2
= gen_reg_rtx (mode
);
18414 t3
= gen_reg_rtx (mode
);
18418 x
= gen_rtx_AND (mode
, op_true
, cmp
);
18419 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
18421 x
= gen_rtx_NOT (mode
, cmp
);
18422 x
= gen_rtx_AND (mode
, x
, op_false
);
18423 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
18425 x
= gen_rtx_IOR (mode
, t3
, t2
);
18426 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18430 /* Expand a floating-point conditional move. Return true if successful. */
18433 ix86_expand_fp_movcc (rtx operands
[])
18435 enum machine_mode mode
= GET_MODE (operands
[0]);
18436 enum rtx_code code
= GET_CODE (operands
[1]);
18437 rtx tmp
, compare_op
;
18438 rtx op0
= XEXP (operands
[1], 0);
18439 rtx op1
= XEXP (operands
[1], 1);
18441 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
18443 enum machine_mode cmode
;
18445 /* Since we've no cmove for sse registers, don't force bad register
18446 allocation just to gain access to it. Deny movcc when the
18447 comparison mode doesn't match the move mode. */
18448 cmode
= GET_MODE (op0
);
18449 if (cmode
== VOIDmode
)
18450 cmode
= GET_MODE (op1
);
18454 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
18455 if (code
== UNKNOWN
)
18458 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
18459 operands
[2], operands
[3]))
18462 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
18463 operands
[2], operands
[3]);
18464 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
18468 /* The floating point conditional move instructions don't directly
18469 support conditions resulting from a signed integer comparison. */
18471 compare_op
= ix86_expand_compare (code
, op0
, op1
);
18472 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
18474 tmp
= gen_reg_rtx (QImode
);
18475 ix86_expand_setcc (tmp
, code
, op0
, op1
);
18477 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
18480 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
18481 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
18482 operands
[2], operands
[3])));
18487 /* Expand a floating-point vector conditional move; a vcond operation
18488 rather than a movcc operation. */
18491 ix86_expand_fp_vcond (rtx operands
[])
18493 enum rtx_code code
= GET_CODE (operands
[3]);
18496 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
18497 &operands
[4], &operands
[5]);
18498 if (code
== UNKNOWN
)
18501 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
18502 operands
[5], operands
[1], operands
[2]))
18505 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
18506 operands
[1], operands
[2]);
18507 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
18511 /* Expand a signed/unsigned integral vector conditional move. */
18514 ix86_expand_int_vcond (rtx operands
[])
18516 enum machine_mode mode
= GET_MODE (operands
[0]);
18517 enum rtx_code code
= GET_CODE (operands
[3]);
18518 bool negate
= false;
18521 cop0
= operands
[4];
18522 cop1
= operands
[5];
18524 /* XOP supports all of the comparisons on all vector int types. */
18527 /* Canonicalize the comparison to EQ, GT, GTU. */
18538 code
= reverse_condition (code
);
18544 code
= reverse_condition (code
);
18550 code
= swap_condition (code
);
18551 x
= cop0
, cop0
= cop1
, cop1
= x
;
18555 gcc_unreachable ();
18558 /* Only SSE4.1/SSE4.2 supports V2DImode. */
18559 if (mode
== V2DImode
)
18564 /* SSE4.1 supports EQ. */
18565 if (!TARGET_SSE4_1
)
18571 /* SSE4.2 supports GT/GTU. */
18572 if (!TARGET_SSE4_2
)
18577 gcc_unreachable ();
18581 /* Unsigned parallel compare is not supported by the hardware.
18582 Play some tricks to turn this into a signed comparison
18586 cop0
= force_reg (mode
, cop0
);
18594 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
18596 /* Subtract (-(INT MAX) - 1) from both operands to make
18598 mask
= ix86_build_signbit_mask (mode
, true, false);
18599 gen_sub3
= (mode
== V4SImode
18600 ? gen_subv4si3
: gen_subv2di3
);
18601 t1
= gen_reg_rtx (mode
);
18602 emit_insn (gen_sub3 (t1
, cop0
, mask
));
18604 t2
= gen_reg_rtx (mode
);
18605 emit_insn (gen_sub3 (t2
, cop1
, mask
));
18615 /* Perform a parallel unsigned saturating subtraction. */
18616 x
= gen_reg_rtx (mode
);
18617 emit_insn (gen_rtx_SET (VOIDmode
, x
,
18618 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
18621 cop1
= CONST0_RTX (mode
);
18627 gcc_unreachable ();
18632 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
18633 operands
[1+negate
], operands
[2-negate
]);
18635 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
18636 operands
[2-negate
]);
18640 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
18641 true if we should do zero extension, else sign extension. HIGH_P is
18642 true if we want the N/2 high elements, else the low elements. */
18645 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
18647 enum machine_mode imode
= GET_MODE (operands
[1]);
18652 rtx (*unpack
)(rtx
, rtx
);
18658 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
18660 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
18664 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
18666 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
18670 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
18672 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
18675 gcc_unreachable ();
18680 /* Shift higher 8 bytes to lower 8 bytes. */
18681 tmp
= gen_reg_rtx (imode
);
18682 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, tmp
),
18683 gen_lowpart (V1TImode
, operands
[1]),
18689 emit_insn (unpack (operands
[0], tmp
));
18693 rtx (*unpack
)(rtx
, rtx
, rtx
);
18699 unpack
= gen_vec_interleave_highv16qi
;
18701 unpack
= gen_vec_interleave_lowv16qi
;
18705 unpack
= gen_vec_interleave_highv8hi
;
18707 unpack
= gen_vec_interleave_lowv8hi
;
18711 unpack
= gen_vec_interleave_highv4si
;
18713 unpack
= gen_vec_interleave_lowv4si
;
18716 gcc_unreachable ();
18719 dest
= gen_lowpart (imode
, operands
[0]);
18722 tmp
= force_reg (imode
, CONST0_RTX (imode
));
18724 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
18725 operands
[1], pc_rtx
, pc_rtx
);
18727 emit_insn (unpack (dest
, operands
[1], tmp
));
18731 /* Expand conditional increment or decrement using adb/sbb instructions.
18732 The default case using setcc followed by the conditional move can be
18733 done by generic code. */
18735 ix86_expand_int_addcc (rtx operands
[])
18737 enum rtx_code code
= GET_CODE (operands
[1]);
18739 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
18741 rtx val
= const0_rtx
;
18742 bool fpcmp
= false;
18743 enum machine_mode mode
;
18744 rtx op0
= XEXP (operands
[1], 0);
18745 rtx op1
= XEXP (operands
[1], 1);
18747 if (operands
[3] != const1_rtx
18748 && operands
[3] != constm1_rtx
)
18750 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
18752 code
= GET_CODE (compare_op
);
18754 flags
= XEXP (compare_op
, 0);
18756 if (GET_MODE (flags
) == CCFPmode
18757 || GET_MODE (flags
) == CCFPUmode
)
18760 code
= ix86_fp_compare_code_to_integer (code
);
18767 PUT_CODE (compare_op
,
18768 reverse_condition_maybe_unordered
18769 (GET_CODE (compare_op
)));
18771 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
18774 mode
= GET_MODE (operands
[0]);
18776 /* Construct either adc or sbb insn. */
18777 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
18782 insn
= gen_subqi3_carry
;
18785 insn
= gen_subhi3_carry
;
18788 insn
= gen_subsi3_carry
;
18791 insn
= gen_subdi3_carry
;
18794 gcc_unreachable ();
18802 insn
= gen_addqi3_carry
;
18805 insn
= gen_addhi3_carry
;
18808 insn
= gen_addsi3_carry
;
18811 insn
= gen_adddi3_carry
;
18814 gcc_unreachable ();
18817 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
18823 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
18824 but works for floating pointer parameters and nonoffsetable memories.
18825 For pushes, it returns just stack offsets; the values will be saved
18826 in the right order. Maximally three parts are generated. */
18829 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
18834 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
18836 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
18838 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
18839 gcc_assert (size
>= 2 && size
<= 4);
18841 /* Optimize constant pool reference to immediates. This is used by fp
18842 moves, that force all constants to memory to allow combining. */
18843 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
18845 rtx tmp
= maybe_get_pool_constant (operand
);
18850 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
18852 /* The only non-offsetable memories we handle are pushes. */
18853 int ok
= push_operand (operand
, VOIDmode
);
18857 operand
= copy_rtx (operand
);
18858 PUT_MODE (operand
, Pmode
);
18859 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
18863 if (GET_CODE (operand
) == CONST_VECTOR
)
18865 enum machine_mode imode
= int_mode_for_mode (mode
);
18866 /* Caution: if we looked through a constant pool memory above,
18867 the operand may actually have a different mode now. That's
18868 ok, since we want to pun this all the way back to an integer. */
18869 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
18870 gcc_assert (operand
!= NULL
);
18876 if (mode
== DImode
)
18877 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
18882 if (REG_P (operand
))
18884 gcc_assert (reload_completed
);
18885 for (i
= 0; i
< size
; i
++)
18886 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
18888 else if (offsettable_memref_p (operand
))
18890 operand
= adjust_address (operand
, SImode
, 0);
18891 parts
[0] = operand
;
18892 for (i
= 1; i
< size
; i
++)
18893 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
18895 else if (GET_CODE (operand
) == CONST_DOUBLE
)
18900 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
18904 real_to_target (l
, &r
, mode
);
18905 parts
[3] = gen_int_mode (l
[3], SImode
);
18906 parts
[2] = gen_int_mode (l
[2], SImode
);
18909 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
18910 parts
[2] = gen_int_mode (l
[2], SImode
);
18913 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
18916 gcc_unreachable ();
18918 parts
[1] = gen_int_mode (l
[1], SImode
);
18919 parts
[0] = gen_int_mode (l
[0], SImode
);
18922 gcc_unreachable ();
18927 if (mode
== TImode
)
18928 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
18929 if (mode
== XFmode
|| mode
== TFmode
)
18931 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
18932 if (REG_P (operand
))
18934 gcc_assert (reload_completed
);
18935 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
18936 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
18938 else if (offsettable_memref_p (operand
))
18940 operand
= adjust_address (operand
, DImode
, 0);
18941 parts
[0] = operand
;
18942 parts
[1] = adjust_address (operand
, upper_mode
, 8);
18944 else if (GET_CODE (operand
) == CONST_DOUBLE
)
18949 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
18950 real_to_target (l
, &r
, mode
);
18952 /* Do not use shift by 32 to avoid warning on 32bit systems. */
18953 if (HOST_BITS_PER_WIDE_INT
>= 64)
18956 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
18957 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
18960 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
18962 if (upper_mode
== SImode
)
18963 parts
[1] = gen_int_mode (l
[2], SImode
);
18964 else if (HOST_BITS_PER_WIDE_INT
>= 64)
18967 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
18968 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
18971 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
18974 gcc_unreachable ();
18981 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
18982 Return false when normal moves are needed; true when all required
18983 insns have been emitted. Operands 2-4 contain the input values
18984 int the correct order; operands 5-7 contain the output values. */
18987 ix86_split_long_move (rtx operands
[])
18992 int collisions
= 0;
18993 enum machine_mode mode
= GET_MODE (operands
[0]);
18994 bool collisionparts
[4];
18996 /* The DFmode expanders may ask us to move double.
18997 For 64bit target this is single move. By hiding the fact
18998 here we simplify i386.md splitters. */
18999 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
19001 /* Optimize constant pool reference to immediates. This is used by
19002 fp moves, that force all constants to memory to allow combining. */
19004 if (MEM_P (operands
[1])
19005 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
19006 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
19007 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
19008 if (push_operand (operands
[0], VOIDmode
))
19010 operands
[0] = copy_rtx (operands
[0]);
19011 PUT_MODE (operands
[0], Pmode
);
19014 operands
[0] = gen_lowpart (DImode
, operands
[0]);
19015 operands
[1] = gen_lowpart (DImode
, operands
[1]);
19016 emit_move_insn (operands
[0], operands
[1]);
19020 /* The only non-offsettable memory we handle is push. */
19021 if (push_operand (operands
[0], VOIDmode
))
19024 gcc_assert (!MEM_P (operands
[0])
19025 || offsettable_memref_p (operands
[0]));
19027 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
19028 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
19030 /* When emitting push, take care for source operands on the stack. */
19031 if (push
&& MEM_P (operands
[1])
19032 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
19034 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
19036 /* Compensate for the stack decrement by 4. */
19037 if (!TARGET_64BIT
&& nparts
== 3
19038 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
19039 src_base
= plus_constant (src_base
, 4);
19041 /* src_base refers to the stack pointer and is
19042 automatically decreased by emitted push. */
19043 for (i
= 0; i
< nparts
; i
++)
19044 part
[1][i
] = change_address (part
[1][i
],
19045 GET_MODE (part
[1][i
]), src_base
);
19048 /* We need to do copy in the right order in case an address register
19049 of the source overlaps the destination. */
19050 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
19054 for (i
= 0; i
< nparts
; i
++)
19057 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
19058 if (collisionparts
[i
])
19062 /* Collision in the middle part can be handled by reordering. */
19063 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
19065 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
19066 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
19068 else if (collisions
== 1
19070 && (collisionparts
[1] || collisionparts
[2]))
19072 if (collisionparts
[1])
19074 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
19075 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
19079 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
19080 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
19084 /* If there are more collisions, we can't handle it by reordering.
19085 Do an lea to the last part and use only one colliding move. */
19086 else if (collisions
> 1)
19092 base
= part
[0][nparts
- 1];
19094 /* Handle the case when the last part isn't valid for lea.
19095 Happens in 64-bit mode storing the 12-byte XFmode. */
19096 if (GET_MODE (base
) != Pmode
)
19097 base
= gen_rtx_REG (Pmode
, REGNO (base
));
19099 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
19100 part
[1][0] = replace_equiv_address (part
[1][0], base
);
19101 for (i
= 1; i
< nparts
; i
++)
19103 tmp
= plus_constant (base
, UNITS_PER_WORD
* i
);
19104 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
19115 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
19116 emit_insn (gen_addsi3 (stack_pointer_rtx
,
19117 stack_pointer_rtx
, GEN_INT (-4)));
19118 emit_move_insn (part
[0][2], part
[1][2]);
19120 else if (nparts
== 4)
19122 emit_move_insn (part
[0][3], part
[1][3]);
19123 emit_move_insn (part
[0][2], part
[1][2]);
19128 /* In 64bit mode we don't have 32bit push available. In case this is
19129 register, it is OK - we will just use larger counterpart. We also
19130 retype memory - these comes from attempt to avoid REX prefix on
19131 moving of second half of TFmode value. */
19132 if (GET_MODE (part
[1][1]) == SImode
)
19134 switch (GET_CODE (part
[1][1]))
19137 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
19141 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
19145 gcc_unreachable ();
19148 if (GET_MODE (part
[1][0]) == SImode
)
19149 part
[1][0] = part
[1][1];
19152 emit_move_insn (part
[0][1], part
[1][1]);
19153 emit_move_insn (part
[0][0], part
[1][0]);
19157 /* Choose correct order to not overwrite the source before it is copied. */
19158 if ((REG_P (part
[0][0])
19159 && REG_P (part
[1][1])
19160 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
19162 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
19164 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
19166 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
19168 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
19170 operands
[2 + i
] = part
[0][j
];
19171 operands
[6 + i
] = part
[1][j
];
19176 for (i
= 0; i
< nparts
; i
++)
19178 operands
[2 + i
] = part
[0][i
];
19179 operands
[6 + i
] = part
[1][i
];
19183 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19184 if (optimize_insn_for_size_p ())
19186 for (j
= 0; j
< nparts
- 1; j
++)
19187 if (CONST_INT_P (operands
[6 + j
])
19188 && operands
[6 + j
] != const0_rtx
19189 && REG_P (operands
[2 + j
]))
19190 for (i
= j
; i
< nparts
- 1; i
++)
19191 if (CONST_INT_P (operands
[7 + i
])
19192 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
19193 operands
[7 + i
] = operands
[2 + j
];
19196 for (i
= 0; i
< nparts
; i
++)
19197 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
19202 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19203 left shift by a constant, either using a single shift or
19204 a sequence of add instructions. */
19207 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
19209 rtx (*insn
)(rtx
, rtx
, rtx
);
19212 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
19213 && !optimize_insn_for_size_p ()))
19215 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
19216 while (count
-- > 0)
19217 emit_insn (insn (operand
, operand
, operand
));
19221 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
19222 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
19227 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
19229 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
19230 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
19231 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
19233 rtx low
[2], high
[2];
19236 if (CONST_INT_P (operands
[2]))
19238 split_double_mode (mode
, operands
, 2, low
, high
);
19239 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
19241 if (count
>= half_width
)
19243 emit_move_insn (high
[0], low
[1]);
19244 emit_move_insn (low
[0], const0_rtx
);
19246 if (count
> half_width
)
19247 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
19251 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
19253 if (!rtx_equal_p (operands
[0], operands
[1]))
19254 emit_move_insn (operands
[0], operands
[1]);
19256 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
19257 ix86_expand_ashl_const (low
[0], count
, mode
);
19262 split_double_mode (mode
, operands
, 1, low
, high
);
19264 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
19266 if (operands
[1] == const1_rtx
)
19268 /* Assuming we've chosen a QImode capable registers, then 1 << N
19269 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19270 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
19272 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
19274 ix86_expand_clear (low
[0]);
19275 ix86_expand_clear (high
[0]);
19276 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
19278 d
= gen_lowpart (QImode
, low
[0]);
19279 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
19280 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
19281 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
19283 d
= gen_lowpart (QImode
, high
[0]);
19284 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
19285 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
19286 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
19289 /* Otherwise, we can get the same results by manually performing
19290 a bit extract operation on bit 5/6, and then performing the two
19291 shifts. The two methods of getting 0/1 into low/high are exactly
19292 the same size. Avoiding the shift in the bit extract case helps
19293 pentium4 a bit; no one else seems to care much either way. */
19296 enum machine_mode half_mode
;
19297 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
19298 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
19299 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
19300 HOST_WIDE_INT bits
;
19303 if (mode
== DImode
)
19305 half_mode
= SImode
;
19306 gen_lshr3
= gen_lshrsi3
;
19307 gen_and3
= gen_andsi3
;
19308 gen_xor3
= gen_xorsi3
;
19313 half_mode
= DImode
;
19314 gen_lshr3
= gen_lshrdi3
;
19315 gen_and3
= gen_anddi3
;
19316 gen_xor3
= gen_xordi3
;
19320 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
19321 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
19323 x
= gen_lowpart (half_mode
, operands
[2]);
19324 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
19326 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
19327 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
19328 emit_move_insn (low
[0], high
[0]);
19329 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
19332 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
19333 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
19337 if (operands
[1] == constm1_rtx
)
19339 /* For -1 << N, we can avoid the shld instruction, because we
19340 know that we're shifting 0...31/63 ones into a -1. */
19341 emit_move_insn (low
[0], constm1_rtx
);
19342 if (optimize_insn_for_size_p ())
19343 emit_move_insn (high
[0], low
[0]);
19345 emit_move_insn (high
[0], constm1_rtx
);
19349 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
19351 if (!rtx_equal_p (operands
[0], operands
[1]))
19352 emit_move_insn (operands
[0], operands
[1]);
19354 split_double_mode (mode
, operands
, 1, low
, high
);
19355 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
19358 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
19360 if (TARGET_CMOVE
&& scratch
)
19362 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
19363 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
19365 ix86_expand_clear (scratch
);
19366 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
19370 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
19371 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
19373 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
19378 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
19380 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
19381 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
19382 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
19383 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
19385 rtx low
[2], high
[2];
19388 if (CONST_INT_P (operands
[2]))
19390 split_double_mode (mode
, operands
, 2, low
, high
);
19391 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
19393 if (count
== GET_MODE_BITSIZE (mode
) - 1)
19395 emit_move_insn (high
[0], high
[1]);
19396 emit_insn (gen_ashr3 (high
[0], high
[0],
19397 GEN_INT (half_width
- 1)));
19398 emit_move_insn (low
[0], high
[0]);
19401 else if (count
>= half_width
)
19403 emit_move_insn (low
[0], high
[1]);
19404 emit_move_insn (high
[0], low
[0]);
19405 emit_insn (gen_ashr3 (high
[0], high
[0],
19406 GEN_INT (half_width
- 1)));
19408 if (count
> half_width
)
19409 emit_insn (gen_ashr3 (low
[0], low
[0],
19410 GEN_INT (count
- half_width
)));
19414 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
19416 if (!rtx_equal_p (operands
[0], operands
[1]))
19417 emit_move_insn (operands
[0], operands
[1]);
19419 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
19420 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
19425 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
19427 if (!rtx_equal_p (operands
[0], operands
[1]))
19428 emit_move_insn (operands
[0], operands
[1]);
19430 split_double_mode (mode
, operands
, 1, low
, high
);
19432 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
19433 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
19435 if (TARGET_CMOVE
&& scratch
)
19437 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
19438 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
19440 emit_move_insn (scratch
, high
[0]);
19441 emit_insn (gen_ashr3 (scratch
, scratch
,
19442 GEN_INT (half_width
- 1)));
19443 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
19448 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
19449 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
19451 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
19457 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
19459 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
19460 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
19461 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
19462 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
19464 rtx low
[2], high
[2];
19467 if (CONST_INT_P (operands
[2]))
19469 split_double_mode (mode
, operands
, 2, low
, high
);
19470 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
19472 if (count
>= half_width
)
19474 emit_move_insn (low
[0], high
[1]);
19475 ix86_expand_clear (high
[0]);
19477 if (count
> half_width
)
19478 emit_insn (gen_lshr3 (low
[0], low
[0],
19479 GEN_INT (count
- half_width
)));
19483 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
19485 if (!rtx_equal_p (operands
[0], operands
[1]))
19486 emit_move_insn (operands
[0], operands
[1]);
19488 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
19489 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
19494 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
19496 if (!rtx_equal_p (operands
[0], operands
[1]))
19497 emit_move_insn (operands
[0], operands
[1]);
19499 split_double_mode (mode
, operands
, 1, low
, high
);
19501 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
19502 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
19504 if (TARGET_CMOVE
&& scratch
)
19506 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
19507 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
19509 ix86_expand_clear (scratch
);
19510 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
19515 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
19516 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
19518 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
19523 /* Predict just emitted jump instruction to be taken with probability PROB. */
19525 predict_jump (int prob
)
19527 rtx insn
= get_last_insn ();
19528 gcc_assert (JUMP_P (insn
));
19529 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
19532 /* Helper function for the string operations below. Dest VARIABLE whether
19533 it is aligned to VALUE bytes. If true, jump to the label. */
19535 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
19537 rtx label
= gen_label_rtx ();
19538 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
19539 if (GET_MODE (variable
) == DImode
)
19540 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
19542 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
19543 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
19546 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
19548 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
19552 /* Adjust COUNTER by the VALUE. */
19554 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
19556 rtx (*gen_add
)(rtx
, rtx
, rtx
)
19557 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
19559 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
19562 /* Zero extend possibly SImode EXP to Pmode register. */
19564 ix86_zero_extend_to_Pmode (rtx exp
)
19567 if (GET_MODE (exp
) == VOIDmode
)
19568 return force_reg (Pmode
, exp
);
19569 if (GET_MODE (exp
) == Pmode
)
19570 return copy_to_mode_reg (Pmode
, exp
);
19571 r
= gen_reg_rtx (Pmode
);
19572 emit_insn (gen_zero_extendsidi2 (r
, exp
));
19576 /* Divide COUNTREG by SCALE. */
19578 scale_counter (rtx countreg
, int scale
)
19584 if (CONST_INT_P (countreg
))
19585 return GEN_INT (INTVAL (countreg
) / scale
);
19586 gcc_assert (REG_P (countreg
));
19588 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
19589 GEN_INT (exact_log2 (scale
)),
19590 NULL
, 1, OPTAB_DIRECT
);
19594 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
19595 DImode for constant loop counts. */
19597 static enum machine_mode
19598 counter_mode (rtx count_exp
)
19600 if (GET_MODE (count_exp
) != VOIDmode
)
19601 return GET_MODE (count_exp
);
19602 if (!CONST_INT_P (count_exp
))
19604 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
19609 /* When SRCPTR is non-NULL, output simple loop to move memory
19610 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
19611 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
19612 equivalent loop to set memory by VALUE (supposed to be in MODE).
19614 The size is rounded down to whole number of chunk size moved at once.
19615 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
19619 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
19620 rtx destptr
, rtx srcptr
, rtx value
,
19621 rtx count
, enum machine_mode mode
, int unroll
,
19624 rtx out_label
, top_label
, iter
, tmp
;
19625 enum machine_mode iter_mode
= counter_mode (count
);
19626 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
19627 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
19633 top_label
= gen_label_rtx ();
19634 out_label
= gen_label_rtx ();
19635 iter
= gen_reg_rtx (iter_mode
);
19637 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
19638 NULL
, 1, OPTAB_DIRECT
);
19639 /* Those two should combine. */
19640 if (piece_size
== const1_rtx
)
19642 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
19644 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
19646 emit_move_insn (iter
, const0_rtx
);
19648 emit_label (top_label
);
19650 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
19651 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
19652 destmem
= change_address (destmem
, mode
, x_addr
);
19656 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
19657 srcmem
= change_address (srcmem
, mode
, y_addr
);
19659 /* When unrolling for chips that reorder memory reads and writes,
19660 we can save registers by using single temporary.
19661 Also using 4 temporaries is overkill in 32bit mode. */
19662 if (!TARGET_64BIT
&& 0)
19664 for (i
= 0; i
< unroll
; i
++)
19669 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
19671 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
19673 emit_move_insn (destmem
, srcmem
);
19679 gcc_assert (unroll
<= 4);
19680 for (i
= 0; i
< unroll
; i
++)
19682 tmpreg
[i
] = gen_reg_rtx (mode
);
19686 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
19688 emit_move_insn (tmpreg
[i
], srcmem
);
19690 for (i
= 0; i
< unroll
; i
++)
19695 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
19697 emit_move_insn (destmem
, tmpreg
[i
]);
19702 for (i
= 0; i
< unroll
; i
++)
19706 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
19707 emit_move_insn (destmem
, value
);
19710 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
19711 true, OPTAB_LIB_WIDEN
);
19713 emit_move_insn (iter
, tmp
);
19715 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
19717 if (expected_size
!= -1)
19719 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
19720 if (expected_size
== 0)
19722 else if (expected_size
> REG_BR_PROB_BASE
)
19723 predict_jump (REG_BR_PROB_BASE
- 1);
19725 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
19728 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
19729 iter
= ix86_zero_extend_to_Pmode (iter
);
19730 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
19731 true, OPTAB_LIB_WIDEN
);
19732 if (tmp
!= destptr
)
19733 emit_move_insn (destptr
, tmp
);
19736 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
19737 true, OPTAB_LIB_WIDEN
);
19739 emit_move_insn (srcptr
, tmp
);
19741 emit_label (out_label
);
19744 /* Output "rep; mov" instruction.
19745 Arguments have same meaning as for previous function */
19747 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
19748 rtx destptr
, rtx srcptr
,
19750 enum machine_mode mode
)
19755 HOST_WIDE_INT rounded_count
;
19757 /* If the size is known, it is shorter to use rep movs. */
19758 if (mode
== QImode
&& CONST_INT_P (count
)
19759 && !(INTVAL (count
) & 3))
19762 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
19763 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
19764 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
19765 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
19766 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
19767 if (mode
!= QImode
)
19769 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
19770 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
19771 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
19772 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
19773 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
19774 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
19778 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
19779 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
19781 if (CONST_INT_P (count
))
19783 rounded_count
= (INTVAL (count
)
19784 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
19785 destmem
= shallow_copy_rtx (destmem
);
19786 srcmem
= shallow_copy_rtx (srcmem
);
19787 set_mem_size (destmem
, rounded_count
);
19788 set_mem_size (srcmem
, rounded_count
);
19792 if (MEM_SIZE_KNOWN_P (destmem
))
19793 clear_mem_size (destmem
);
19794 if (MEM_SIZE_KNOWN_P (srcmem
))
19795 clear_mem_size (srcmem
);
19797 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
19801 /* Output "rep; stos" instruction.
19802 Arguments have same meaning as for previous function */
19804 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
19805 rtx count
, enum machine_mode mode
,
19810 HOST_WIDE_INT rounded_count
;
19812 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
19813 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
19814 value
= force_reg (mode
, gen_lowpart (mode
, value
));
19815 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
19816 if (mode
!= QImode
)
19818 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
19819 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
19820 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
19823 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
19824 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
19826 rounded_count
= (INTVAL (count
)
19827 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
19828 destmem
= shallow_copy_rtx (destmem
);
19829 set_mem_size (destmem
, rounded_count
);
19831 else if (MEM_SIZE_KNOWN_P (destmem
))
19832 clear_mem_size (destmem
);
19833 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
19837 emit_strmov (rtx destmem
, rtx srcmem
,
19838 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
19840 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
19841 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
19842 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
19845 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
19847 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
19848 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
19851 if (CONST_INT_P (count
))
19853 HOST_WIDE_INT countval
= INTVAL (count
);
19856 if ((countval
& 0x10) && max_size
> 16)
19860 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
19861 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
19864 gcc_unreachable ();
19867 if ((countval
& 0x08) && max_size
> 8)
19870 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
19873 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
19874 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
19878 if ((countval
& 0x04) && max_size
> 4)
19880 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
19883 if ((countval
& 0x02) && max_size
> 2)
19885 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
19888 if ((countval
& 0x01) && max_size
> 1)
19890 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
19897 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
19898 count
, 1, OPTAB_DIRECT
);
19899 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
19900 count
, QImode
, 1, 4);
19904 /* When there are stringops, we can cheaply increase dest and src pointers.
19905 Otherwise we save code size by maintaining offset (zero is readily
19906 available from preceding rep operation) and using x86 addressing modes.
19908 if (TARGET_SINGLE_STRINGOP
)
19912 rtx label
= ix86_expand_aligntest (count
, 4, true);
19913 src
= change_address (srcmem
, SImode
, srcptr
);
19914 dest
= change_address (destmem
, SImode
, destptr
);
19915 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
19916 emit_label (label
);
19917 LABEL_NUSES (label
) = 1;
19921 rtx label
= ix86_expand_aligntest (count
, 2, true);
19922 src
= change_address (srcmem
, HImode
, srcptr
);
19923 dest
= change_address (destmem
, HImode
, destptr
);
19924 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
19925 emit_label (label
);
19926 LABEL_NUSES (label
) = 1;
19930 rtx label
= ix86_expand_aligntest (count
, 1, true);
19931 src
= change_address (srcmem
, QImode
, srcptr
);
19932 dest
= change_address (destmem
, QImode
, destptr
);
19933 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
19934 emit_label (label
);
19935 LABEL_NUSES (label
) = 1;
19940 rtx offset
= force_reg (Pmode
, const0_rtx
);
19945 rtx label
= ix86_expand_aligntest (count
, 4, true);
19946 src
= change_address (srcmem
, SImode
, srcptr
);
19947 dest
= change_address (destmem
, SImode
, destptr
);
19948 emit_move_insn (dest
, src
);
19949 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
19950 true, OPTAB_LIB_WIDEN
);
19952 emit_move_insn (offset
, tmp
);
19953 emit_label (label
);
19954 LABEL_NUSES (label
) = 1;
19958 rtx label
= ix86_expand_aligntest (count
, 2, true);
19959 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
19960 src
= change_address (srcmem
, HImode
, tmp
);
19961 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
19962 dest
= change_address (destmem
, HImode
, tmp
);
19963 emit_move_insn (dest
, src
);
19964 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
19965 true, OPTAB_LIB_WIDEN
);
19967 emit_move_insn (offset
, tmp
);
19968 emit_label (label
);
19969 LABEL_NUSES (label
) = 1;
19973 rtx label
= ix86_expand_aligntest (count
, 1, true);
19974 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
19975 src
= change_address (srcmem
, QImode
, tmp
);
19976 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
19977 dest
= change_address (destmem
, QImode
, tmp
);
19978 emit_move_insn (dest
, src
);
19979 emit_label (label
);
19980 LABEL_NUSES (label
) = 1;
19985 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19987 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
19988 rtx count
, int max_size
)
19991 expand_simple_binop (counter_mode (count
), AND
, count
,
19992 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
19993 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
19994 gen_lowpart (QImode
, value
), count
, QImode
,
19998 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20000 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
20004 if (CONST_INT_P (count
))
20006 HOST_WIDE_INT countval
= INTVAL (count
);
20009 if ((countval
& 0x10) && max_size
> 16)
20013 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
20014 emit_insn (gen_strset (destptr
, dest
, value
));
20015 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
20016 emit_insn (gen_strset (destptr
, dest
, value
));
20019 gcc_unreachable ();
20022 if ((countval
& 0x08) && max_size
> 8)
20026 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
20027 emit_insn (gen_strset (destptr
, dest
, value
));
20031 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
20032 emit_insn (gen_strset (destptr
, dest
, value
));
20033 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
20034 emit_insn (gen_strset (destptr
, dest
, value
));
20038 if ((countval
& 0x04) && max_size
> 4)
20040 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
20041 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
20044 if ((countval
& 0x02) && max_size
> 2)
20046 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
20047 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
20050 if ((countval
& 0x01) && max_size
> 1)
20052 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
20053 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
20060 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
20065 rtx label
= ix86_expand_aligntest (count
, 16, true);
20068 dest
= change_address (destmem
, DImode
, destptr
);
20069 emit_insn (gen_strset (destptr
, dest
, value
));
20070 emit_insn (gen_strset (destptr
, dest
, value
));
20074 dest
= change_address (destmem
, SImode
, destptr
);
20075 emit_insn (gen_strset (destptr
, dest
, value
));
20076 emit_insn (gen_strset (destptr
, dest
, value
));
20077 emit_insn (gen_strset (destptr
, dest
, value
));
20078 emit_insn (gen_strset (destptr
, dest
, value
));
20080 emit_label (label
);
20081 LABEL_NUSES (label
) = 1;
20085 rtx label
= ix86_expand_aligntest (count
, 8, true);
20088 dest
= change_address (destmem
, DImode
, destptr
);
20089 emit_insn (gen_strset (destptr
, dest
, value
));
20093 dest
= change_address (destmem
, SImode
, destptr
);
20094 emit_insn (gen_strset (destptr
, dest
, value
));
20095 emit_insn (gen_strset (destptr
, dest
, value
));
20097 emit_label (label
);
20098 LABEL_NUSES (label
) = 1;
20102 rtx label
= ix86_expand_aligntest (count
, 4, true);
20103 dest
= change_address (destmem
, SImode
, destptr
);
20104 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
20105 emit_label (label
);
20106 LABEL_NUSES (label
) = 1;
20110 rtx label
= ix86_expand_aligntest (count
, 2, true);
20111 dest
= change_address (destmem
, HImode
, destptr
);
20112 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
20113 emit_label (label
);
20114 LABEL_NUSES (label
) = 1;
20118 rtx label
= ix86_expand_aligntest (count
, 1, true);
20119 dest
= change_address (destmem
, QImode
, destptr
);
20120 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
20121 emit_label (label
);
20122 LABEL_NUSES (label
) = 1;
20126 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
20127 DESIRED_ALIGNMENT. */
20129 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
20130 rtx destptr
, rtx srcptr
, rtx count
,
20131 int align
, int desired_alignment
)
20133 if (align
<= 1 && desired_alignment
> 1)
20135 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
20136 srcmem
= change_address (srcmem
, QImode
, srcptr
);
20137 destmem
= change_address (destmem
, QImode
, destptr
);
20138 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
20139 ix86_adjust_counter (count
, 1);
20140 emit_label (label
);
20141 LABEL_NUSES (label
) = 1;
20143 if (align
<= 2 && desired_alignment
> 2)
20145 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
20146 srcmem
= change_address (srcmem
, HImode
, srcptr
);
20147 destmem
= change_address (destmem
, HImode
, destptr
);
20148 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
20149 ix86_adjust_counter (count
, 2);
20150 emit_label (label
);
20151 LABEL_NUSES (label
) = 1;
20153 if (align
<= 4 && desired_alignment
> 4)
20155 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
20156 srcmem
= change_address (srcmem
, SImode
, srcptr
);
20157 destmem
= change_address (destmem
, SImode
, destptr
);
20158 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
20159 ix86_adjust_counter (count
, 4);
20160 emit_label (label
);
20161 LABEL_NUSES (label
) = 1;
20163 gcc_assert (desired_alignment
<= 8);
20166 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20167 ALIGN_BYTES is how many bytes need to be copied. */
20169 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
20170 int desired_align
, int align_bytes
)
20173 rtx orig_dst
= dst
;
20174 rtx orig_src
= src
;
20176 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
20177 if (src_align_bytes
>= 0)
20178 src_align_bytes
= desired_align
- src_align_bytes
;
20179 if (align_bytes
& 1)
20181 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
20182 src
= adjust_automodify_address_nv (src
, QImode
, srcreg
, 0);
20184 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
20186 if (align_bytes
& 2)
20188 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
20189 src
= adjust_automodify_address_nv (src
, HImode
, srcreg
, off
);
20190 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
20191 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
20192 if (src_align_bytes
>= 0
20193 && (src_align_bytes
& 1) == (align_bytes
& 1)
20194 && MEM_ALIGN (src
) < 2 * BITS_PER_UNIT
)
20195 set_mem_align (src
, 2 * BITS_PER_UNIT
);
20197 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
20199 if (align_bytes
& 4)
20201 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
20202 src
= adjust_automodify_address_nv (src
, SImode
, srcreg
, off
);
20203 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
20204 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
20205 if (src_align_bytes
>= 0)
20207 unsigned int src_align
= 0;
20208 if ((src_align_bytes
& 3) == (align_bytes
& 3))
20210 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
20212 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
20213 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
20216 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
20218 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
20219 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
, off
);
20220 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
20221 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
20222 if (src_align_bytes
>= 0)
20224 unsigned int src_align
= 0;
20225 if ((src_align_bytes
& 7) == (align_bytes
& 7))
20227 else if ((src_align_bytes
& 3) == (align_bytes
& 3))
20229 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
20231 if (src_align
> (unsigned int) desired_align
)
20232 src_align
= desired_align
;
20233 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
20234 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
20236 if (MEM_SIZE_KNOWN_P (orig_dst
))
20237 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
20238 if (MEM_SIZE_KNOWN_P (orig_src
))
20239 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
20244 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20245 DESIRED_ALIGNMENT. */
20247 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
20248 int align
, int desired_alignment
)
20250 if (align
<= 1 && desired_alignment
> 1)
20252 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
20253 destmem
= change_address (destmem
, QImode
, destptr
);
20254 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
20255 ix86_adjust_counter (count
, 1);
20256 emit_label (label
);
20257 LABEL_NUSES (label
) = 1;
20259 if (align
<= 2 && desired_alignment
> 2)
20261 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
20262 destmem
= change_address (destmem
, HImode
, destptr
);
20263 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
20264 ix86_adjust_counter (count
, 2);
20265 emit_label (label
);
20266 LABEL_NUSES (label
) = 1;
20268 if (align
<= 4 && desired_alignment
> 4)
20270 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
20271 destmem
= change_address (destmem
, SImode
, destptr
);
20272 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
20273 ix86_adjust_counter (count
, 4);
20274 emit_label (label
);
20275 LABEL_NUSES (label
) = 1;
20277 gcc_assert (desired_alignment
<= 8);
20280 /* Set enough from DST to align DST known to by aligned by ALIGN to
20281 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20283 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
20284 int desired_align
, int align_bytes
)
20287 rtx orig_dst
= dst
;
20288 if (align_bytes
& 1)
20290 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
20292 emit_insn (gen_strset (destreg
, dst
,
20293 gen_lowpart (QImode
, value
)));
20295 if (align_bytes
& 2)
20297 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
20298 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
20299 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
20301 emit_insn (gen_strset (destreg
, dst
,
20302 gen_lowpart (HImode
, value
)));
20304 if (align_bytes
& 4)
20306 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
20307 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
20308 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
20310 emit_insn (gen_strset (destreg
, dst
,
20311 gen_lowpart (SImode
, value
)));
20313 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
20314 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
20315 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
20316 if (MEM_SIZE_KNOWN_P (orig_dst
))
20317 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
20321 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20322 static enum stringop_alg
20323 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
20324 int *dynamic_check
)
20326 const struct stringop_algs
* algs
;
20327 bool optimize_for_speed
;
20328 /* Algorithms using the rep prefix want at least edi and ecx;
20329 additionally, memset wants eax and memcpy wants esi. Don't
20330 consider such algorithms if the user has appropriated those
20331 registers for their own purposes. */
20332 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
20334 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
20336 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20337 || (alg != rep_prefix_1_byte \
20338 && alg != rep_prefix_4_byte \
20339 && alg != rep_prefix_8_byte))
20340 const struct processor_costs
*cost
;
20342 /* Even if the string operation call is cold, we still might spend a lot
20343 of time processing large blocks. */
20344 if (optimize_function_for_size_p (cfun
)
20345 || (optimize_insn_for_size_p ()
20346 && expected_size
!= -1 && expected_size
< 256))
20347 optimize_for_speed
= false;
20349 optimize_for_speed
= true;
20351 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
20353 *dynamic_check
= -1;
20355 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
20357 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
20358 if (ix86_stringop_alg
!= no_stringop
&& ALG_USABLE_P (ix86_stringop_alg
))
20359 return ix86_stringop_alg
;
20360 /* rep; movq or rep; movl is the smallest variant. */
20361 else if (!optimize_for_speed
)
20363 if (!count
|| (count
& 3))
20364 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
20366 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
20368 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20370 else if (expected_size
!= -1 && expected_size
< 4)
20371 return loop_1_byte
;
20372 else if (expected_size
!= -1)
20375 enum stringop_alg alg
= libcall
;
20376 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
20378 /* We get here if the algorithms that were not libcall-based
20379 were rep-prefix based and we are unable to use rep prefixes
20380 based on global register usage. Break out of the loop and
20381 use the heuristic below. */
20382 if (algs
->size
[i
].max
== 0)
20384 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
20386 enum stringop_alg candidate
= algs
->size
[i
].alg
;
20388 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
20390 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20391 last non-libcall inline algorithm. */
20392 if (TARGET_INLINE_ALL_STRINGOPS
)
20394 /* When the current size is best to be copied by a libcall,
20395 but we are still forced to inline, run the heuristic below
20396 that will pick code for medium sized blocks. */
20397 if (alg
!= libcall
)
20401 else if (ALG_USABLE_P (candidate
))
20405 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
20407 /* When asked to inline the call anyway, try to pick meaningful choice.
20408 We look for maximal size of block that is faster to copy by hand and
20409 take blocks of at most of that size guessing that average size will
20410 be roughly half of the block.
20412 If this turns out to be bad, we might simply specify the preferred
20413 choice in ix86_costs. */
20414 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
20415 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
20418 enum stringop_alg alg
;
20420 bool any_alg_usable_p
= true;
20422 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
20424 enum stringop_alg candidate
= algs
->size
[i
].alg
;
20425 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
20427 if (candidate
!= libcall
&& candidate
20428 && ALG_USABLE_P (candidate
))
20429 max
= algs
->size
[i
].max
;
20431 /* If there aren't any usable algorithms, then recursing on
20432 smaller sizes isn't going to find anything. Just return the
20433 simple byte-at-a-time copy loop. */
20434 if (!any_alg_usable_p
)
20436 /* Pick something reasonable. */
20437 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
20438 *dynamic_check
= 128;
20439 return loop_1_byte
;
20443 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
20444 gcc_assert (*dynamic_check
== -1);
20445 gcc_assert (alg
!= libcall
);
20446 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
20447 *dynamic_check
= max
;
20450 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
20451 #undef ALG_USABLE_P
20454 /* Decide on alignment. We know that the operand is already aligned to ALIGN
20455 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
20457 decide_alignment (int align
,
20458 enum stringop_alg alg
,
20461 int desired_align
= 0;
20465 gcc_unreachable ();
20467 case unrolled_loop
:
20468 desired_align
= GET_MODE_SIZE (Pmode
);
20470 case rep_prefix_8_byte
:
20473 case rep_prefix_4_byte
:
20474 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20475 copying whole cacheline at once. */
20476 if (TARGET_PENTIUMPRO
)
20481 case rep_prefix_1_byte
:
20482 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20483 copying whole cacheline at once. */
20484 if (TARGET_PENTIUMPRO
)
20498 if (desired_align
< align
)
20499 desired_align
= align
;
20500 if (expected_size
!= -1 && expected_size
< 4)
20501 desired_align
= align
;
20502 return desired_align
;
20505 /* Return the smallest power of 2 greater than VAL. */
20507 smallest_pow2_greater_than (int val
)
20515 /* Expand string move (memcpy) operation. Use i386 string operations
20516 when profitable. expand_setmem contains similar code. The code
20517 depends upon architecture, block size and alignment, but always has
20518 the same overall structure:
20520 1) Prologue guard: Conditional that jumps up to epilogues for small
20521 blocks that can be handled by epilogue alone. This is faster
20522 but also needed for correctness, since prologue assume the block
20523 is larger than the desired alignment.
20525 Optional dynamic check for size and libcall for large
20526 blocks is emitted here too, with -minline-stringops-dynamically.
20528 2) Prologue: copy first few bytes in order to get destination
20529 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
20530 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
20531 copied. We emit either a jump tree on power of two sized
20532 blocks, or a byte loop.
20534 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
20535 with specified algorithm.
20537 4) Epilogue: code copying tail of the block that is too small to be
20538 handled by main body (or up to size guarded by prologue guard). */
20541 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
20542 rtx expected_align_exp
, rtx expected_size_exp
)
20548 rtx jump_around_label
= NULL
;
20549 HOST_WIDE_INT align
= 1;
20550 unsigned HOST_WIDE_INT count
= 0;
20551 HOST_WIDE_INT expected_size
= -1;
20552 int size_needed
= 0, epilogue_size_needed
;
20553 int desired_align
= 0, align_bytes
= 0;
20554 enum stringop_alg alg
;
20556 bool need_zero_guard
= false;
20558 if (CONST_INT_P (align_exp
))
20559 align
= INTVAL (align_exp
);
20560 /* i386 can do misaligned access on reasonably increased cost. */
20561 if (CONST_INT_P (expected_align_exp
)
20562 && INTVAL (expected_align_exp
) > align
)
20563 align
= INTVAL (expected_align_exp
);
20564 /* ALIGN is the minimum of destination and source alignment, but we care here
20565 just about destination alignment. */
20566 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
20567 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
20569 if (CONST_INT_P (count_exp
))
20570 count
= expected_size
= INTVAL (count_exp
);
20571 if (CONST_INT_P (expected_size_exp
) && count
== 0)
20572 expected_size
= INTVAL (expected_size_exp
);
20574 /* Make sure we don't need to care about overflow later on. */
20575 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
20578 /* Step 0: Decide on preferred algorithm, desired alignment and
20579 size of chunks to be copied by main loop. */
20581 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
20582 desired_align
= decide_alignment (align
, alg
, expected_size
);
20584 if (!TARGET_ALIGN_STRINGOPS
)
20585 align
= desired_align
;
20587 if (alg
== libcall
)
20589 gcc_assert (alg
!= no_stringop
);
20591 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
20592 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
20593 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
20598 gcc_unreachable ();
20600 need_zero_guard
= true;
20601 size_needed
= GET_MODE_SIZE (Pmode
);
20603 case unrolled_loop
:
20604 need_zero_guard
= true;
20605 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
20607 case rep_prefix_8_byte
:
20610 case rep_prefix_4_byte
:
20613 case rep_prefix_1_byte
:
20617 need_zero_guard
= true;
20622 epilogue_size_needed
= size_needed
;
20624 /* Step 1: Prologue guard. */
20626 /* Alignment code needs count to be in register. */
20627 if (CONST_INT_P (count_exp
) && desired_align
> align
)
20629 if (INTVAL (count_exp
) > desired_align
20630 && INTVAL (count_exp
) > size_needed
)
20633 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
20634 if (align_bytes
<= 0)
20637 align_bytes
= desired_align
- align_bytes
;
20639 if (align_bytes
== 0)
20640 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
20642 gcc_assert (desired_align
>= 1 && align
>= 1);
20644 /* Ensure that alignment prologue won't copy past end of block. */
20645 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
20647 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
20648 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
20649 Make sure it is power of 2. */
20650 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
20654 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
20656 /* If main algorithm works on QImode, no epilogue is needed.
20657 For small sizes just don't align anything. */
20658 if (size_needed
== 1)
20659 desired_align
= align
;
20666 label
= gen_label_rtx ();
20667 emit_cmp_and_jump_insns (count_exp
,
20668 GEN_INT (epilogue_size_needed
),
20669 LTU
, 0, counter_mode (count_exp
), 1, label
);
20670 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
20671 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
20673 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
20677 /* Emit code to decide on runtime whether library call or inline should be
20679 if (dynamic_check
!= -1)
20681 if (CONST_INT_P (count_exp
))
20683 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
20685 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
20686 count_exp
= const0_rtx
;
20692 rtx hot_label
= gen_label_rtx ();
20693 jump_around_label
= gen_label_rtx ();
20694 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
20695 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
20696 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
20697 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
20698 emit_jump (jump_around_label
);
20699 emit_label (hot_label
);
20703 /* Step 2: Alignment prologue. */
20705 if (desired_align
> align
)
20707 if (align_bytes
== 0)
20709 /* Except for the first move in epilogue, we no longer know
20710 constant offset in aliasing info. It don't seems to worth
20711 the pain to maintain it for the first move, so throw away
20713 src
= change_address (src
, BLKmode
, srcreg
);
20714 dst
= change_address (dst
, BLKmode
, destreg
);
20715 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
20720 /* If we know how many bytes need to be stored before dst is
20721 sufficiently aligned, maintain aliasing info accurately. */
20722 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
20723 desired_align
, align_bytes
);
20724 count_exp
= plus_constant (count_exp
, -align_bytes
);
20725 count
-= align_bytes
;
20727 if (need_zero_guard
20728 && (count
< (unsigned HOST_WIDE_INT
) size_needed
20729 || (align_bytes
== 0
20730 && count
< ((unsigned HOST_WIDE_INT
) size_needed
20731 + desired_align
- align
))))
20733 /* It is possible that we copied enough so the main loop will not
20735 gcc_assert (size_needed
> 1);
20736 if (label
== NULL_RTX
)
20737 label
= gen_label_rtx ();
20738 emit_cmp_and_jump_insns (count_exp
,
20739 GEN_INT (size_needed
),
20740 LTU
, 0, counter_mode (count_exp
), 1, label
);
20741 if (expected_size
== -1
20742 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
20743 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
20745 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
20748 if (label
&& size_needed
== 1)
20750 emit_label (label
);
20751 LABEL_NUSES (label
) = 1;
20753 epilogue_size_needed
= 1;
20755 else if (label
== NULL_RTX
)
20756 epilogue_size_needed
= size_needed
;
20758 /* Step 3: Main loop. */
20764 gcc_unreachable ();
20766 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
20767 count_exp
, QImode
, 1, expected_size
);
20770 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
20771 count_exp
, Pmode
, 1, expected_size
);
20773 case unrolled_loop
:
20774 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
20775 registers for 4 temporaries anyway. */
20776 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
20777 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
20780 case rep_prefix_8_byte
:
20781 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
20784 case rep_prefix_4_byte
:
20785 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
20788 case rep_prefix_1_byte
:
20789 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
20793 /* Adjust properly the offset of src and dest memory for aliasing. */
20794 if (CONST_INT_P (count_exp
))
20796 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
20797 (count
/ size_needed
) * size_needed
);
20798 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
20799 (count
/ size_needed
) * size_needed
);
20803 src
= change_address (src
, BLKmode
, srcreg
);
20804 dst
= change_address (dst
, BLKmode
, destreg
);
20807 /* Step 4: Epilogue to copy the remaining bytes. */
20811 /* When the main loop is done, COUNT_EXP might hold original count,
20812 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20813 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20814 bytes. Compensate if needed. */
20816 if (size_needed
< epilogue_size_needed
)
20819 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
20820 GEN_INT (size_needed
- 1), count_exp
, 1,
20822 if (tmp
!= count_exp
)
20823 emit_move_insn (count_exp
, tmp
);
20825 emit_label (label
);
20826 LABEL_NUSES (label
) = 1;
20829 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
20830 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
20831 epilogue_size_needed
);
20832 if (jump_around_label
)
20833 emit_label (jump_around_label
);
20837 /* Helper function for memcpy. For QImode value 0xXY produce
20838 0xXYXYXYXY of wide specified by MODE. This is essentially
20839 a * 0x10101010, but we can do slightly better than
20840 synth_mult by unwinding the sequence by hand on CPUs with
20843 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
20845 enum machine_mode valmode
= GET_MODE (val
);
20847 int nops
= mode
== DImode
? 3 : 2;
20849 gcc_assert (mode
== SImode
|| mode
== DImode
);
20850 if (val
== const0_rtx
)
20851 return copy_to_mode_reg (mode
, const0_rtx
);
20852 if (CONST_INT_P (val
))
20854 HOST_WIDE_INT v
= INTVAL (val
) & 255;
20858 if (mode
== DImode
)
20859 v
|= (v
<< 16) << 16;
20860 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
20863 if (valmode
== VOIDmode
)
20865 if (valmode
!= QImode
)
20866 val
= gen_lowpart (QImode
, val
);
20867 if (mode
== QImode
)
20869 if (!TARGET_PARTIAL_REG_STALL
)
20871 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
20872 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
20873 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
20874 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
20876 rtx reg
= convert_modes (mode
, QImode
, val
, true);
20877 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
20878 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
20883 rtx reg
= convert_modes (mode
, QImode
, val
, true);
20885 if (!TARGET_PARTIAL_REG_STALL
)
20886 if (mode
== SImode
)
20887 emit_insn (gen_movsi_insv_1 (reg
, reg
));
20889 emit_insn (gen_movdi_insv_1 (reg
, reg
));
20892 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
20893 NULL
, 1, OPTAB_DIRECT
);
20895 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
20897 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
20898 NULL
, 1, OPTAB_DIRECT
);
20899 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
20900 if (mode
== SImode
)
20902 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
20903 NULL
, 1, OPTAB_DIRECT
);
20904 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
20909 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
20910 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
20911 alignment from ALIGN to DESIRED_ALIGN. */
20913 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
20918 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
20919 promoted_val
= promote_duplicated_reg (DImode
, val
);
20920 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
20921 promoted_val
= promote_duplicated_reg (SImode
, val
);
20922 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
20923 promoted_val
= promote_duplicated_reg (HImode
, val
);
20925 promoted_val
= val
;
20927 return promoted_val
;
20930 /* Expand string clear operation (bzero). Use i386 string operations when
20931 profitable. See expand_movmem comment for explanation of individual
20932 steps performed. */
20934 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
20935 rtx expected_align_exp
, rtx expected_size_exp
)
20940 rtx jump_around_label
= NULL
;
20941 HOST_WIDE_INT align
= 1;
20942 unsigned HOST_WIDE_INT count
= 0;
20943 HOST_WIDE_INT expected_size
= -1;
20944 int size_needed
= 0, epilogue_size_needed
;
20945 int desired_align
= 0, align_bytes
= 0;
20946 enum stringop_alg alg
;
20947 rtx promoted_val
= NULL
;
20948 bool force_loopy_epilogue
= false;
20950 bool need_zero_guard
= false;
20952 if (CONST_INT_P (align_exp
))
20953 align
= INTVAL (align_exp
);
20954 /* i386 can do misaligned access on reasonably increased cost. */
20955 if (CONST_INT_P (expected_align_exp
)
20956 && INTVAL (expected_align_exp
) > align
)
20957 align
= INTVAL (expected_align_exp
);
20958 if (CONST_INT_P (count_exp
))
20959 count
= expected_size
= INTVAL (count_exp
);
20960 if (CONST_INT_P (expected_size_exp
) && count
== 0)
20961 expected_size
= INTVAL (expected_size_exp
);
20963 /* Make sure we don't need to care about overflow later on. */
20964 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
20967 /* Step 0: Decide on preferred algorithm, desired alignment and
20968 size of chunks to be copied by main loop. */
20970 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
20971 desired_align
= decide_alignment (align
, alg
, expected_size
);
20973 if (!TARGET_ALIGN_STRINGOPS
)
20974 align
= desired_align
;
20976 if (alg
== libcall
)
20978 gcc_assert (alg
!= no_stringop
);
20980 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
20981 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
20986 gcc_unreachable ();
20988 need_zero_guard
= true;
20989 size_needed
= GET_MODE_SIZE (Pmode
);
20991 case unrolled_loop
:
20992 need_zero_guard
= true;
20993 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
20995 case rep_prefix_8_byte
:
20998 case rep_prefix_4_byte
:
21001 case rep_prefix_1_byte
:
21005 need_zero_guard
= true;
21009 epilogue_size_needed
= size_needed
;
21011 /* Step 1: Prologue guard. */
21013 /* Alignment code needs count to be in register. */
21014 if (CONST_INT_P (count_exp
) && desired_align
> align
)
21016 if (INTVAL (count_exp
) > desired_align
21017 && INTVAL (count_exp
) > size_needed
)
21020 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
21021 if (align_bytes
<= 0)
21024 align_bytes
= desired_align
- align_bytes
;
21026 if (align_bytes
== 0)
21028 enum machine_mode mode
= SImode
;
21029 if (TARGET_64BIT
&& (count
& ~0xffffffff))
21031 count_exp
= force_reg (mode
, count_exp
);
21034 /* Do the cheap promotion to allow better CSE across the
21035 main loop and epilogue (ie one load of the big constant in the
21036 front of all code. */
21037 if (CONST_INT_P (val_exp
))
21038 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
21039 desired_align
, align
);
21040 /* Ensure that alignment prologue won't copy past end of block. */
21041 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
21043 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
21044 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
21045 Make sure it is power of 2. */
21046 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
21048 /* To improve performance of small blocks, we jump around the VAL
21049 promoting mode. This mean that if the promoted VAL is not constant,
21050 we might not use it in the epilogue and have to use byte
21052 if (epilogue_size_needed
> 2 && !promoted_val
)
21053 force_loopy_epilogue
= true;
21056 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
21058 /* If main algorithm works on QImode, no epilogue is needed.
21059 For small sizes just don't align anything. */
21060 if (size_needed
== 1)
21061 desired_align
= align
;
21068 label
= gen_label_rtx ();
21069 emit_cmp_and_jump_insns (count_exp
,
21070 GEN_INT (epilogue_size_needed
),
21071 LTU
, 0, counter_mode (count_exp
), 1, label
);
21072 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
21073 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
21075 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
21078 if (dynamic_check
!= -1)
21080 rtx hot_label
= gen_label_rtx ();
21081 jump_around_label
= gen_label_rtx ();
21082 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
21083 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
21084 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
21085 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
21086 emit_jump (jump_around_label
);
21087 emit_label (hot_label
);
21090 /* Step 2: Alignment prologue. */
21092 /* Do the expensive promotion once we branched off the small blocks. */
21094 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
21095 desired_align
, align
);
21096 gcc_assert (desired_align
>= 1 && align
>= 1);
21098 if (desired_align
> align
)
21100 if (align_bytes
== 0)
21102 /* Except for the first move in epilogue, we no longer know
21103 constant offset in aliasing info. It don't seems to worth
21104 the pain to maintain it for the first move, so throw away
21106 dst
= change_address (dst
, BLKmode
, destreg
);
21107 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
21112 /* If we know how many bytes need to be stored before dst is
21113 sufficiently aligned, maintain aliasing info accurately. */
21114 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
21115 desired_align
, align_bytes
);
21116 count_exp
= plus_constant (count_exp
, -align_bytes
);
21117 count
-= align_bytes
;
21119 if (need_zero_guard
21120 && (count
< (unsigned HOST_WIDE_INT
) size_needed
21121 || (align_bytes
== 0
21122 && count
< ((unsigned HOST_WIDE_INT
) size_needed
21123 + desired_align
- align
))))
21125 /* It is possible that we copied enough so the main loop will not
21127 gcc_assert (size_needed
> 1);
21128 if (label
== NULL_RTX
)
21129 label
= gen_label_rtx ();
21130 emit_cmp_and_jump_insns (count_exp
,
21131 GEN_INT (size_needed
),
21132 LTU
, 0, counter_mode (count_exp
), 1, label
);
21133 if (expected_size
== -1
21134 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
21135 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
21137 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
21140 if (label
&& size_needed
== 1)
21142 emit_label (label
);
21143 LABEL_NUSES (label
) = 1;
21145 promoted_val
= val_exp
;
21146 epilogue_size_needed
= 1;
21148 else if (label
== NULL_RTX
)
21149 epilogue_size_needed
= size_needed
;
21151 /* Step 3: Main loop. */
21157 gcc_unreachable ();
21159 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
21160 count_exp
, QImode
, 1, expected_size
);
21163 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
21164 count_exp
, Pmode
, 1, expected_size
);
21166 case unrolled_loop
:
21167 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
21168 count_exp
, Pmode
, 4, expected_size
);
21170 case rep_prefix_8_byte
:
21171 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
21174 case rep_prefix_4_byte
:
21175 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
21178 case rep_prefix_1_byte
:
21179 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
21183 /* Adjust properly the offset of src and dest memory for aliasing. */
21184 if (CONST_INT_P (count_exp
))
21185 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
21186 (count
/ size_needed
) * size_needed
);
21188 dst
= change_address (dst
, BLKmode
, destreg
);
21190 /* Step 4: Epilogue to copy the remaining bytes. */
21194 /* When the main loop is done, COUNT_EXP might hold original count,
21195 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21196 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21197 bytes. Compensate if needed. */
21199 if (size_needed
< epilogue_size_needed
)
21202 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
21203 GEN_INT (size_needed
- 1), count_exp
, 1,
21205 if (tmp
!= count_exp
)
21206 emit_move_insn (count_exp
, tmp
);
21208 emit_label (label
);
21209 LABEL_NUSES (label
) = 1;
21212 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
21214 if (force_loopy_epilogue
)
21215 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
21216 epilogue_size_needed
);
21218 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
21219 epilogue_size_needed
);
21221 if (jump_around_label
)
21222 emit_label (jump_around_label
);
21226 /* Expand the appropriate insns for doing strlen if not just doing
21229 out = result, initialized with the start address
21230 align_rtx = alignment of the address.
21231 scratch = scratch register, initialized with the startaddress when
21232 not aligned, otherwise undefined
21234 This is just the body. It needs the initializations mentioned above and
21235 some address computing at the end. These things are done in i386.md. */
21238 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
21242 rtx align_2_label
= NULL_RTX
;
21243 rtx align_3_label
= NULL_RTX
;
21244 rtx align_4_label
= gen_label_rtx ();
21245 rtx end_0_label
= gen_label_rtx ();
21247 rtx tmpreg
= gen_reg_rtx (SImode
);
21248 rtx scratch
= gen_reg_rtx (SImode
);
21252 if (CONST_INT_P (align_rtx
))
21253 align
= INTVAL (align_rtx
);
21255 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21257 /* Is there a known alignment and is it less than 4? */
21260 rtx scratch1
= gen_reg_rtx (Pmode
);
21261 emit_move_insn (scratch1
, out
);
21262 /* Is there a known alignment and is it not 2? */
21265 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
21266 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
21268 /* Leave just the 3 lower bits. */
21269 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
21270 NULL_RTX
, 0, OPTAB_WIDEN
);
21272 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
21273 Pmode
, 1, align_4_label
);
21274 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
21275 Pmode
, 1, align_2_label
);
21276 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
21277 Pmode
, 1, align_3_label
);
21281 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21282 check if is aligned to 4 - byte. */
21284 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
21285 NULL_RTX
, 0, OPTAB_WIDEN
);
21287 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
21288 Pmode
, 1, align_4_label
);
21291 mem
= change_address (src
, QImode
, out
);
21293 /* Now compare the bytes. */
21295 /* Compare the first n unaligned byte on a byte per byte basis. */
21296 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
21297 QImode
, 1, end_0_label
);
21299 /* Increment the address. */
21300 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
21302 /* Not needed with an alignment of 2 */
21305 emit_label (align_2_label
);
21307 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
21310 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
21312 emit_label (align_3_label
);
21315 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
21318 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
21321 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21322 align this loop. It gives only huge programs, but does not help to
21324 emit_label (align_4_label
);
21326 mem
= change_address (src
, SImode
, out
);
21327 emit_move_insn (scratch
, mem
);
21328 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
21330 /* This formula yields a nonzero result iff one of the bytes is zero.
21331 This saves three branches inside loop and many cycles. */
21333 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
21334 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
21335 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
21336 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
21337 gen_int_mode (0x80808080, SImode
)));
21338 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
21343 rtx reg
= gen_reg_rtx (SImode
);
21344 rtx reg2
= gen_reg_rtx (Pmode
);
21345 emit_move_insn (reg
, tmpreg
);
21346 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
21348 /* If zero is not in the first two bytes, move two bytes forward. */
21349 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
21350 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
21351 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
21352 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
21353 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
21356 /* Emit lea manually to avoid clobbering of flags. */
21357 emit_insn (gen_rtx_SET (SImode
, reg2
,
21358 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
21360 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
21361 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
21362 emit_insn (gen_rtx_SET (VOIDmode
, out
,
21363 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
21369 rtx end_2_label
= gen_label_rtx ();
21370 /* Is zero in the first two bytes? */
21372 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
21373 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
21374 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
21375 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
21376 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
21378 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
21379 JUMP_LABEL (tmp
) = end_2_label
;
21381 /* Not in the first two. Move two bytes forward. */
21382 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
21383 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
21385 emit_label (end_2_label
);
21389 /* Avoid branch in fixing the byte. */
21390 tmpreg
= gen_lowpart (QImode
, tmpreg
);
21391 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
21392 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
21393 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
21394 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
21396 emit_label (end_0_label
);
21399 /* Expand strlen. */
21402 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
21404 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
21406 /* The generic case of strlen expander is long. Avoid it's
21407 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
21409 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
21410 && !TARGET_INLINE_ALL_STRINGOPS
21411 && !optimize_insn_for_size_p ()
21412 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
21415 addr
= force_reg (Pmode
, XEXP (src
, 0));
21416 scratch1
= gen_reg_rtx (Pmode
);
21418 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
21419 && !optimize_insn_for_size_p ())
21421 /* Well it seems that some optimizer does not combine a call like
21422 foo(strlen(bar), strlen(bar));
21423 when the move and the subtraction is done here. It does calculate
21424 the length just once when these instructions are done inside of
21425 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
21426 often used and I use one fewer register for the lifetime of
21427 output_strlen_unroll() this is better. */
21429 emit_move_insn (out
, addr
);
21431 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
21433 /* strlensi_unroll_1 returns the address of the zero at the end of
21434 the string, like memchr(), so compute the length by subtracting
21435 the start address. */
21436 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
21442 /* Can't use this if the user has appropriated eax, ecx, or edi. */
21443 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
21446 scratch2
= gen_reg_rtx (Pmode
);
21447 scratch3
= gen_reg_rtx (Pmode
);
21448 scratch4
= force_reg (Pmode
, constm1_rtx
);
21450 emit_move_insn (scratch3
, addr
);
21451 eoschar
= force_reg (QImode
, eoschar
);
21453 src
= replace_equiv_address_nv (src
, scratch3
);
21455 /* If .md starts supporting :P, this can be done in .md. */
21456 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
21457 scratch4
), UNSPEC_SCAS
);
21458 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
21459 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
21460 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
21465 /* For given symbol (function) construct code to compute address of it's PLT
21466 entry in large x86-64 PIC model. */
21468 construct_plt_address (rtx symbol
)
21470 rtx tmp
= gen_reg_rtx (Pmode
);
21471 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
21473 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
21474 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
21476 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
21477 emit_insn (gen_adddi3 (tmp
, tmp
, pic_offset_table_rtx
));
21482 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
21484 rtx pop
, bool sibcall
)
21486 rtx use
= NULL
, call
;
21488 if (pop
== const0_rtx
)
21490 gcc_assert (!TARGET_64BIT
|| !pop
);
21492 if (TARGET_MACHO
&& !TARGET_64BIT
)
21495 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
21496 fnaddr
= machopic_indirect_call_target (fnaddr
);
21501 /* Static functions and indirect calls don't need the pic register. */
21502 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
21503 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
21504 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
21505 use_reg (&use
, pic_offset_table_rtx
);
21508 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
21510 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
21511 emit_move_insn (al
, callarg2
);
21512 use_reg (&use
, al
);
21515 if (ix86_cmodel
== CM_LARGE_PIC
21517 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
21518 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
21519 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
21521 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), Pmode
)
21522 : !call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
21524 fnaddr
= XEXP (fnaddr
, 0);
21525 if (GET_MODE (fnaddr
) != Pmode
)
21526 fnaddr
= convert_to_mode (Pmode
, fnaddr
, 1);
21527 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (Pmode
, fnaddr
));
21530 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
21532 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
21535 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
21536 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
21537 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
21539 if (TARGET_64BIT_MS_ABI
21540 && (!callarg2
|| INTVAL (callarg2
) != -2))
21542 /* We need to represent that SI and DI registers are clobbered
21544 static int clobbered_registers
[] = {
21545 XMM6_REG
, XMM7_REG
, XMM8_REG
,
21546 XMM9_REG
, XMM10_REG
, XMM11_REG
,
21547 XMM12_REG
, XMM13_REG
, XMM14_REG
,
21548 XMM15_REG
, SI_REG
, DI_REG
21551 rtx vec
[ARRAY_SIZE (clobbered_registers
) + 2];
21552 rtx unspec
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
21553 UNSPEC_MS_TO_SYSV_CALL
);
21557 for (i
= 0; i
< ARRAY_SIZE (clobbered_registers
); i
++)
21558 vec
[i
+ 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers
[i
])
21561 (SSE_REGNO_P (clobbered_registers
[i
])
21563 clobbered_registers
[i
]));
21565 call
= gen_rtx_PARALLEL (VOIDmode
,
21566 gen_rtvec_v (ARRAY_SIZE (clobbered_registers
)
21570 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
21571 if (TARGET_VZEROUPPER
)
21576 if (cfun
->machine
->callee_pass_avx256_p
)
21578 if (cfun
->machine
->callee_return_avx256_p
)
21579 avx256
= callee_return_pass_avx256
;
21581 avx256
= callee_pass_avx256
;
21583 else if (cfun
->machine
->callee_return_avx256_p
)
21584 avx256
= callee_return_avx256
;
21586 avx256
= call_no_avx256
;
21588 if (reload_completed
)
21589 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256
)));
21592 unspec
= gen_rtx_UNSPEC (VOIDmode
,
21593 gen_rtvec (1, GEN_INT (avx256
)),
21594 UNSPEC_CALL_NEEDS_VZEROUPPER
);
21595 call
= gen_rtx_PARALLEL (VOIDmode
,
21596 gen_rtvec (2, call
, unspec
));
21600 call
= emit_call_insn (call
);
21602 CALL_INSN_FUNCTION_USAGE (call
) = use
;
21608 ix86_split_call_vzeroupper (rtx insn
, rtx vzeroupper
)
21610 rtx call
= XVECEXP (PATTERN (insn
), 0, 0);
21611 emit_insn (gen_avx_vzeroupper (vzeroupper
));
21612 emit_call_insn (call
);
21615 /* Output the assembly for a call instruction. */
21618 ix86_output_call_insn (rtx insn
, rtx call_op
)
21620 bool direct_p
= constant_call_address_operand (call_op
, Pmode
);
21621 bool seh_nop_p
= false;
21624 if (SIBLING_CALL_P (insn
))
21628 /* SEH epilogue detection requires the indirect branch case
21629 to include REX.W. */
21630 else if (TARGET_SEH
)
21631 xasm
= "rex.W jmp %A0";
21635 output_asm_insn (xasm
, &call_op
);
21639 /* SEH unwinding can require an extra nop to be emitted in several
21640 circumstances. Determine if we have one of those. */
21645 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
21647 /* If we get to another real insn, we don't need the nop. */
21651 /* If we get to the epilogue note, prevent a catch region from
21652 being adjacent to the standard epilogue sequence. If non-
21653 call-exceptions, we'll have done this during epilogue emission. */
21654 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
21655 && !flag_non_call_exceptions
21656 && !can_throw_internal (insn
))
21663 /* If we didn't find a real insn following the call, prevent the
21664 unwinder from looking into the next function. */
21670 xasm
= "call\t%P0";
21672 xasm
= "call\t%A0";
21674 output_asm_insn (xasm
, &call_op
);
21682 /* Clear stack slot assignments remembered from previous functions.
21683 This is called from INIT_EXPANDERS once before RTL is emitted for each
21686 static struct machine_function
*
21687 ix86_init_machine_status (void)
21689 struct machine_function
*f
;
21691 f
= ggc_alloc_cleared_machine_function ();
21692 f
->use_fast_prologue_epilogue_nregs
= -1;
21693 f
->tls_descriptor_call_expanded_p
= 0;
21694 f
->call_abi
= ix86_abi
;
21699 /* Return a MEM corresponding to a stack slot with mode MODE.
21700 Allocate a new slot if necessary.
21702 The RTL for a function can have several slots available: N is
21703 which slot to use. */
21706 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
21708 struct stack_local_entry
*s
;
21710 gcc_assert (n
< MAX_386_STACK_LOCALS
);
21712 /* Virtual slot is valid only before vregs are instantiated. */
21713 gcc_assert ((n
== SLOT_VIRTUAL
) == !virtuals_instantiated
);
21715 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
21716 if (s
->mode
== mode
&& s
->n
== n
)
21717 return copy_rtx (s
->rtl
);
21719 s
= ggc_alloc_stack_local_entry ();
21722 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
21724 s
->next
= ix86_stack_locals
;
21725 ix86_stack_locals
= s
;
21729 /* Calculate the length of the memory address in the instruction
21730 encoding. Does not include the one-byte modrm, opcode, or prefix. */
21733 memory_address_length (rtx addr
)
21735 struct ix86_address parts
;
21736 rtx base
, index
, disp
;
21740 if (GET_CODE (addr
) == PRE_DEC
21741 || GET_CODE (addr
) == POST_INC
21742 || GET_CODE (addr
) == PRE_MODIFY
21743 || GET_CODE (addr
) == POST_MODIFY
)
21746 ok
= ix86_decompose_address (addr
, &parts
);
21749 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
21750 parts
.base
= SUBREG_REG (parts
.base
);
21751 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
21752 parts
.index
= SUBREG_REG (parts
.index
);
21755 index
= parts
.index
;
21760 - esp as the base always wants an index,
21761 - ebp as the base always wants a displacement,
21762 - r12 as the base always wants an index,
21763 - r13 as the base always wants a displacement. */
21765 /* Register Indirect. */
21766 if (base
&& !index
&& !disp
)
21768 /* esp (for its index) and ebp (for its displacement) need
21769 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
21772 && (addr
== arg_pointer_rtx
21773 || addr
== frame_pointer_rtx
21774 || REGNO (addr
) == SP_REG
21775 || REGNO (addr
) == BP_REG
21776 || REGNO (addr
) == R12_REG
21777 || REGNO (addr
) == R13_REG
))
21781 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
21782 is not disp32, but disp32(%rip), so for disp32
21783 SIB byte is needed, unless print_operand_address
21784 optimizes it into disp32(%rip) or (%rip) is implied
21786 else if (disp
&& !base
&& !index
)
21793 if (GET_CODE (disp
) == CONST
)
21794 symbol
= XEXP (disp
, 0);
21795 if (GET_CODE (symbol
) == PLUS
21796 && CONST_INT_P (XEXP (symbol
, 1)))
21797 symbol
= XEXP (symbol
, 0);
21799 if (GET_CODE (symbol
) != LABEL_REF
21800 && (GET_CODE (symbol
) != SYMBOL_REF
21801 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
21802 && (GET_CODE (symbol
) != UNSPEC
21803 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
21804 && XINT (symbol
, 1) != UNSPEC_PCREL
21805 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
21812 /* Find the length of the displacement constant. */
21815 if (base
&& satisfies_constraint_K (disp
))
21820 /* ebp always wants a displacement. Similarly r13. */
21821 else if (base
&& REG_P (base
)
21822 && (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
21825 /* An index requires the two-byte modrm form.... */
21827 /* ...like esp (or r12), which always wants an index. */
21828 || base
== arg_pointer_rtx
21829 || base
== frame_pointer_rtx
21830 || (base
&& REG_P (base
)
21831 && (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
21848 /* Compute default value for "length_immediate" attribute. When SHORTFORM
21849 is set, expect that insn have 8bit immediate alternative. */
21851 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
21855 extract_insn_cached (insn
);
21856 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
21857 if (CONSTANT_P (recog_data
.operand
[i
]))
21859 enum attr_mode mode
= get_attr_mode (insn
);
21862 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
21864 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
21871 ival
= trunc_int_for_mode (ival
, HImode
);
21874 ival
= trunc_int_for_mode (ival
, SImode
);
21879 if (IN_RANGE (ival
, -128, 127))
21896 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
21901 fatal_insn ("unknown insn mode", insn
);
21906 /* Compute default value for "length_address" attribute. */
21908 ix86_attr_length_address_default (rtx insn
)
21912 if (get_attr_type (insn
) == TYPE_LEA
)
21914 rtx set
= PATTERN (insn
), addr
;
21916 if (GET_CODE (set
) == PARALLEL
)
21917 set
= XVECEXP (set
, 0, 0);
21919 gcc_assert (GET_CODE (set
) == SET
);
21921 addr
= SET_SRC (set
);
21922 if (TARGET_64BIT
&& get_attr_mode (insn
) == MODE_SI
)
21924 if (GET_CODE (addr
) == ZERO_EXTEND
)
21925 addr
= XEXP (addr
, 0);
21926 if (GET_CODE (addr
) == SUBREG
)
21927 addr
= SUBREG_REG (addr
);
21930 return memory_address_length (addr
);
21933 extract_insn_cached (insn
);
21934 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
21935 if (MEM_P (recog_data
.operand
[i
]))
21937 constrain_operands_cached (reload_completed
);
21938 if (which_alternative
!= -1)
21940 const char *constraints
= recog_data
.constraints
[i
];
21941 int alt
= which_alternative
;
21943 while (*constraints
== '=' || *constraints
== '+')
21946 while (*constraints
++ != ',')
21948 /* Skip ignored operands. */
21949 if (*constraints
== 'X')
21952 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
21957 /* Compute default value for "length_vex" attribute. It includes
21958 2 or 3 byte VEX prefix and 1 opcode byte. */
21961 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
21965 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
21966 byte VEX prefix. */
21967 if (!has_0f_opcode
|| has_vex_w
)
21970 /* We can always use 2 byte VEX prefix in 32bit. */
21974 extract_insn_cached (insn
);
21976 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
21977 if (REG_P (recog_data
.operand
[i
]))
21979 /* REX.W bit uses 3 byte VEX prefix. */
21980 if (GET_MODE (recog_data
.operand
[i
]) == DImode
21981 && GENERAL_REG_P (recog_data
.operand
[i
]))
21986 /* REX.X or REX.B bits use 3 byte VEX prefix. */
21987 if (MEM_P (recog_data
.operand
[i
])
21988 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
21995 /* Return the maximum number of instructions a cpu can issue. */
21998 ix86_issue_rate (void)
22002 case PROCESSOR_PENTIUM
:
22003 case PROCESSOR_ATOM
:
22007 case PROCESSOR_PENTIUMPRO
:
22008 case PROCESSOR_PENTIUM4
:
22009 case PROCESSOR_CORE2_32
:
22010 case PROCESSOR_CORE2_64
:
22011 case PROCESSOR_COREI7_32
:
22012 case PROCESSOR_COREI7_64
:
22013 case PROCESSOR_ATHLON
:
22015 case PROCESSOR_AMDFAM10
:
22016 case PROCESSOR_NOCONA
:
22017 case PROCESSOR_GENERIC32
:
22018 case PROCESSOR_GENERIC64
:
22019 case PROCESSOR_BDVER1
:
22020 case PROCESSOR_BDVER2
:
22021 case PROCESSOR_BTVER1
:
22029 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
22030 by DEP_INSN and nothing set by DEP_INSN. */
22033 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
22037 /* Simplify the test for uninteresting insns. */
22038 if (insn_type
!= TYPE_SETCC
22039 && insn_type
!= TYPE_ICMOV
22040 && insn_type
!= TYPE_FCMOV
22041 && insn_type
!= TYPE_IBR
)
22044 if ((set
= single_set (dep_insn
)) != 0)
22046 set
= SET_DEST (set
);
22049 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
22050 && XVECLEN (PATTERN (dep_insn
), 0) == 2
22051 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
22052 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
22054 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
22055 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
22060 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
22063 /* This test is true if the dependent insn reads the flags but
22064 not any other potentially set register. */
22065 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
22068 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
22074 /* Return true iff USE_INSN has a memory address with operands set by
22078 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
22081 extract_insn_cached (use_insn
);
22082 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
22083 if (MEM_P (recog_data
.operand
[i
]))
22085 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
22086 return modified_in_p (addr
, set_insn
) != 0;
22092 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
22094 enum attr_type insn_type
, dep_insn_type
;
22095 enum attr_memory memory
;
22097 int dep_insn_code_number
;
22099 /* Anti and output dependencies have zero cost on all CPUs. */
22100 if (REG_NOTE_KIND (link
) != 0)
22103 dep_insn_code_number
= recog_memoized (dep_insn
);
22105 /* If we can't recognize the insns, we can't really do anything. */
22106 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
22109 insn_type
= get_attr_type (insn
);
22110 dep_insn_type
= get_attr_type (dep_insn
);
22114 case PROCESSOR_PENTIUM
:
22115 /* Address Generation Interlock adds a cycle of latency. */
22116 if (insn_type
== TYPE_LEA
)
22118 rtx addr
= PATTERN (insn
);
22120 if (GET_CODE (addr
) == PARALLEL
)
22121 addr
= XVECEXP (addr
, 0, 0);
22123 gcc_assert (GET_CODE (addr
) == SET
);
22125 addr
= SET_SRC (addr
);
22126 if (modified_in_p (addr
, dep_insn
))
22129 else if (ix86_agi_dependent (dep_insn
, insn
))
22132 /* ??? Compares pair with jump/setcc. */
22133 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
22136 /* Floating point stores require value to be ready one cycle earlier. */
22137 if (insn_type
== TYPE_FMOV
22138 && get_attr_memory (insn
) == MEMORY_STORE
22139 && !ix86_agi_dependent (dep_insn
, insn
))
22143 case PROCESSOR_PENTIUMPRO
:
22144 memory
= get_attr_memory (insn
);
22146 /* INT->FP conversion is expensive. */
22147 if (get_attr_fp_int_src (dep_insn
))
22150 /* There is one cycle extra latency between an FP op and a store. */
22151 if (insn_type
== TYPE_FMOV
22152 && (set
= single_set (dep_insn
)) != NULL_RTX
22153 && (set2
= single_set (insn
)) != NULL_RTX
22154 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
22155 && MEM_P (SET_DEST (set2
)))
22158 /* Show ability of reorder buffer to hide latency of load by executing
22159 in parallel with previous instruction in case
22160 previous instruction is not needed to compute the address. */
22161 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
22162 && !ix86_agi_dependent (dep_insn
, insn
))
22164 /* Claim moves to take one cycle, as core can issue one load
22165 at time and the next load can start cycle later. */
22166 if (dep_insn_type
== TYPE_IMOV
22167 || dep_insn_type
== TYPE_FMOV
)
22175 memory
= get_attr_memory (insn
);
22177 /* The esp dependency is resolved before the instruction is really
22179 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
22180 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
22183 /* INT->FP conversion is expensive. */
22184 if (get_attr_fp_int_src (dep_insn
))
22187 /* Show ability of reorder buffer to hide latency of load by executing
22188 in parallel with previous instruction in case
22189 previous instruction is not needed to compute the address. */
22190 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
22191 && !ix86_agi_dependent (dep_insn
, insn
))
22193 /* Claim moves to take one cycle, as core can issue one load
22194 at time and the next load can start cycle later. */
22195 if (dep_insn_type
== TYPE_IMOV
22196 || dep_insn_type
== TYPE_FMOV
)
22205 case PROCESSOR_ATHLON
:
22207 case PROCESSOR_AMDFAM10
:
22208 case PROCESSOR_BDVER1
:
22209 case PROCESSOR_BDVER2
:
22210 case PROCESSOR_BTVER1
:
22211 case PROCESSOR_ATOM
:
22212 case PROCESSOR_GENERIC32
:
22213 case PROCESSOR_GENERIC64
:
22214 memory
= get_attr_memory (insn
);
22216 /* Show ability of reorder buffer to hide latency of load by executing
22217 in parallel with previous instruction in case
22218 previous instruction is not needed to compute the address. */
22219 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
22220 && !ix86_agi_dependent (dep_insn
, insn
))
22222 enum attr_unit unit
= get_attr_unit (insn
);
22225 /* Because of the difference between the length of integer and
22226 floating unit pipeline preparation stages, the memory operands
22227 for floating point are cheaper.
22229 ??? For Athlon it the difference is most probably 2. */
22230 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
22233 loadcost
= TARGET_ATHLON
? 2 : 0;
22235 if (cost
>= loadcost
)
22248 /* How many alternative schedules to try. This should be as wide as the
22249 scheduling freedom in the DFA, but no wider. Making this value too
22250 large results extra work for the scheduler. */
22253 ia32_multipass_dfa_lookahead (void)
22257 case PROCESSOR_PENTIUM
:
22260 case PROCESSOR_PENTIUMPRO
:
22264 case PROCESSOR_CORE2_32
:
22265 case PROCESSOR_CORE2_64
:
22266 case PROCESSOR_COREI7_32
:
22267 case PROCESSOR_COREI7_64
:
22268 /* Generally, we want haifa-sched:max_issue() to look ahead as far
22269 as many instructions can be executed on a cycle, i.e.,
22270 issue_rate. I wonder why tuning for many CPUs does not do this. */
22271 return ix86_issue_rate ();
22280 /* Model decoder of Core 2/i7.
22281 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
22282 track the instruction fetch block boundaries and make sure that long
22283 (9+ bytes) instructions are assigned to D0. */
22285 /* Maximum length of an insn that can be handled by
22286 a secondary decoder unit. '8' for Core 2/i7. */
22287 static int core2i7_secondary_decoder_max_insn_size
;
22289 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
22290 '16' for Core 2/i7. */
22291 static int core2i7_ifetch_block_size
;
22293 /* Maximum number of instructions decoder can handle per cycle.
22294 '6' for Core 2/i7. */
22295 static int core2i7_ifetch_block_max_insns
;
22297 typedef struct ix86_first_cycle_multipass_data_
*
22298 ix86_first_cycle_multipass_data_t
;
22299 typedef const struct ix86_first_cycle_multipass_data_
*
22300 const_ix86_first_cycle_multipass_data_t
;
22302 /* A variable to store target state across calls to max_issue within
22304 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
22305 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
22307 /* Initialize DATA. */
22309 core2i7_first_cycle_multipass_init (void *_data
)
22311 ix86_first_cycle_multipass_data_t data
22312 = (ix86_first_cycle_multipass_data_t
) _data
;
22314 data
->ifetch_block_len
= 0;
22315 data
->ifetch_block_n_insns
= 0;
22316 data
->ready_try_change
= NULL
;
22317 data
->ready_try_change_size
= 0;
22320 /* Advancing the cycle; reset ifetch block counts. */
22322 core2i7_dfa_post_advance_cycle (void)
22324 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
22326 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
22328 data
->ifetch_block_len
= 0;
22329 data
->ifetch_block_n_insns
= 0;
22332 static int min_insn_size (rtx
);
22334 /* Filter out insns from ready_try that the core will not be able to issue
22335 on current cycle due to decoder. */
22337 core2i7_first_cycle_multipass_filter_ready_try
22338 (const_ix86_first_cycle_multipass_data_t data
,
22339 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
22346 if (ready_try
[n_ready
])
22349 insn
= get_ready_element (n_ready
);
22350 insn_size
= min_insn_size (insn
);
22352 if (/* If this is a too long an insn for a secondary decoder ... */
22353 (!first_cycle_insn_p
22354 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
22355 /* ... or it would not fit into the ifetch block ... */
22356 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
22357 /* ... or the decoder is full already ... */
22358 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
22359 /* ... mask the insn out. */
22361 ready_try
[n_ready
] = 1;
22363 if (data
->ready_try_change
)
22364 SET_BIT (data
->ready_try_change
, n_ready
);
22369 /* Prepare for a new round of multipass lookahead scheduling. */
22371 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
22372 bool first_cycle_insn_p
)
22374 ix86_first_cycle_multipass_data_t data
22375 = (ix86_first_cycle_multipass_data_t
) _data
;
22376 const_ix86_first_cycle_multipass_data_t prev_data
22377 = ix86_first_cycle_multipass_data
;
22379 /* Restore the state from the end of the previous round. */
22380 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
22381 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
22383 /* Filter instructions that cannot be issued on current cycle due to
22384 decoder restrictions. */
22385 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
22386 first_cycle_insn_p
);
22389 /* INSN is being issued in current solution. Account for its impact on
22390 the decoder model. */
22392 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
22393 rtx insn
, const void *_prev_data
)
22395 ix86_first_cycle_multipass_data_t data
22396 = (ix86_first_cycle_multipass_data_t
) _data
;
22397 const_ix86_first_cycle_multipass_data_t prev_data
22398 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
22400 int insn_size
= min_insn_size (insn
);
22402 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
22403 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
22404 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
22405 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
22407 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
22408 if (!data
->ready_try_change
)
22410 data
->ready_try_change
= sbitmap_alloc (n_ready
);
22411 data
->ready_try_change_size
= n_ready
;
22413 else if (data
->ready_try_change_size
< n_ready
)
22415 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
22417 data
->ready_try_change_size
= n_ready
;
22419 sbitmap_zero (data
->ready_try_change
);
22421 /* Filter out insns from ready_try that the core will not be able to issue
22422 on current cycle due to decoder. */
22423 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
22427 /* Revert the effect on ready_try. */
22429 core2i7_first_cycle_multipass_backtrack (const void *_data
,
22431 int n_ready ATTRIBUTE_UNUSED
)
22433 const_ix86_first_cycle_multipass_data_t data
22434 = (const_ix86_first_cycle_multipass_data_t
) _data
;
22435 unsigned int i
= 0;
22436 sbitmap_iterator sbi
;
22438 gcc_assert (sbitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
22439 EXECUTE_IF_SET_IN_SBITMAP (data
->ready_try_change
, 0, i
, sbi
)
22445 /* Save the result of multipass lookahead scheduling for the next round. */
22447 core2i7_first_cycle_multipass_end (const void *_data
)
22449 const_ix86_first_cycle_multipass_data_t data
22450 = (const_ix86_first_cycle_multipass_data_t
) _data
;
22451 ix86_first_cycle_multipass_data_t next_data
22452 = ix86_first_cycle_multipass_data
;
22456 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
22457 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
22461 /* Deallocate target data. */
22463 core2i7_first_cycle_multipass_fini (void *_data
)
22465 ix86_first_cycle_multipass_data_t data
22466 = (ix86_first_cycle_multipass_data_t
) _data
;
22468 if (data
->ready_try_change
)
22470 sbitmap_free (data
->ready_try_change
);
22471 data
->ready_try_change
= NULL
;
22472 data
->ready_try_change_size
= 0;
22476 /* Prepare for scheduling pass. */
22478 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
22479 int verbose ATTRIBUTE_UNUSED
,
22480 int max_uid ATTRIBUTE_UNUSED
)
22482 /* Install scheduling hooks for current CPU. Some of these hooks are used
22483 in time-critical parts of the scheduler, so we only set them up when
22484 they are actually used. */
22487 case PROCESSOR_CORE2_32
:
22488 case PROCESSOR_CORE2_64
:
22489 case PROCESSOR_COREI7_32
:
22490 case PROCESSOR_COREI7_64
:
22491 targetm
.sched
.dfa_post_advance_cycle
22492 = core2i7_dfa_post_advance_cycle
;
22493 targetm
.sched
.first_cycle_multipass_init
22494 = core2i7_first_cycle_multipass_init
;
22495 targetm
.sched
.first_cycle_multipass_begin
22496 = core2i7_first_cycle_multipass_begin
;
22497 targetm
.sched
.first_cycle_multipass_issue
22498 = core2i7_first_cycle_multipass_issue
;
22499 targetm
.sched
.first_cycle_multipass_backtrack
22500 = core2i7_first_cycle_multipass_backtrack
;
22501 targetm
.sched
.first_cycle_multipass_end
22502 = core2i7_first_cycle_multipass_end
;
22503 targetm
.sched
.first_cycle_multipass_fini
22504 = core2i7_first_cycle_multipass_fini
;
22506 /* Set decoder parameters. */
22507 core2i7_secondary_decoder_max_insn_size
= 8;
22508 core2i7_ifetch_block_size
= 16;
22509 core2i7_ifetch_block_max_insns
= 6;
22513 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
22514 targetm
.sched
.first_cycle_multipass_init
= NULL
;
22515 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
22516 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
22517 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
22518 targetm
.sched
.first_cycle_multipass_end
= NULL
;
22519 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
22525 /* Compute the alignment given to a constant that is being placed in memory.
22526 EXP is the constant and ALIGN is the alignment that the object would
22528 The value of this function is used instead of that alignment to align
22532 ix86_constant_alignment (tree exp
, int align
)
22534 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
22535 || TREE_CODE (exp
) == INTEGER_CST
)
22537 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
22539 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
22542 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
22543 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
22544 return BITS_PER_WORD
;
22549 /* Compute the alignment for a static variable.
22550 TYPE is the data type, and ALIGN is the alignment that
22551 the object would ordinarily have. The value of this function is used
22552 instead of that alignment to align the object. */
22555 ix86_data_alignment (tree type
, int align
)
22557 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
22559 if (AGGREGATE_TYPE_P (type
)
22560 && TYPE_SIZE (type
)
22561 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
22562 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
22563 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
22564 && align
< max_align
)
22567 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22568 to 16byte boundary. */
22571 if (AGGREGATE_TYPE_P (type
)
22572 && TYPE_SIZE (type
)
22573 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
22574 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
22575 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
22579 if (TREE_CODE (type
) == ARRAY_TYPE
)
22581 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
22583 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
22586 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
22589 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
22591 if ((TYPE_MODE (type
) == XCmode
22592 || TYPE_MODE (type
) == TCmode
) && align
< 128)
22595 else if ((TREE_CODE (type
) == RECORD_TYPE
22596 || TREE_CODE (type
) == UNION_TYPE
22597 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
22598 && TYPE_FIELDS (type
))
22600 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
22602 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
22605 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
22606 || TREE_CODE (type
) == INTEGER_TYPE
)
22608 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
22610 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
22617 /* Compute the alignment for a local variable or a stack slot. EXP is
22618 the data type or decl itself, MODE is the widest mode available and
22619 ALIGN is the alignment that the object would ordinarily have. The
22620 value of this macro is used instead of that alignment to align the
22624 ix86_local_alignment (tree exp
, enum machine_mode mode
,
22625 unsigned int align
)
22629 if (exp
&& DECL_P (exp
))
22631 type
= TREE_TYPE (exp
);
22640 /* Don't do dynamic stack realignment for long long objects with
22641 -mpreferred-stack-boundary=2. */
22644 && ix86_preferred_stack_boundary
< 64
22645 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
22646 && (!type
|| !TYPE_USER_ALIGN (type
))
22647 && (!decl
|| !DECL_USER_ALIGN (decl
)))
22650 /* If TYPE is NULL, we are allocating a stack slot for caller-save
22651 register in MODE. We will return the largest alignment of XF
22655 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
22656 align
= GET_MODE_ALIGNMENT (DFmode
);
22660 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22661 to 16byte boundary. Exact wording is:
22663 An array uses the same alignment as its elements, except that a local or
22664 global array variable of length at least 16 bytes or
22665 a C99 variable-length array variable always has alignment of at least 16 bytes.
22667 This was added to allow use of aligned SSE instructions at arrays. This
22668 rule is meant for static storage (where compiler can not do the analysis
22669 by itself). We follow it for automatic variables only when convenient.
22670 We fully control everything in the function compiled and functions from
22671 other unit can not rely on the alignment.
22673 Exclude va_list type. It is the common case of local array where
22674 we can not benefit from the alignment. */
22675 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
22678 if (AGGREGATE_TYPE_P (type
)
22679 && (va_list_type_node
== NULL_TREE
22680 || (TYPE_MAIN_VARIANT (type
)
22681 != TYPE_MAIN_VARIANT (va_list_type_node
)))
22682 && TYPE_SIZE (type
)
22683 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
22684 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
22685 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
22688 if (TREE_CODE (type
) == ARRAY_TYPE
)
22690 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
22692 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
22695 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
22697 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
22699 if ((TYPE_MODE (type
) == XCmode
22700 || TYPE_MODE (type
) == TCmode
) && align
< 128)
22703 else if ((TREE_CODE (type
) == RECORD_TYPE
22704 || TREE_CODE (type
) == UNION_TYPE
22705 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
22706 && TYPE_FIELDS (type
))
22708 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
22710 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
22713 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
22714 || TREE_CODE (type
) == INTEGER_TYPE
)
22717 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
22719 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
22725 /* Compute the minimum required alignment for dynamic stack realignment
22726 purposes for a local variable, parameter or a stack slot. EXP is
22727 the data type or decl itself, MODE is its mode and ALIGN is the
22728 alignment that the object would ordinarily have. */
22731 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
22732 unsigned int align
)
22736 if (exp
&& DECL_P (exp
))
22738 type
= TREE_TYPE (exp
);
22747 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
22750 /* Don't do dynamic stack realignment for long long objects with
22751 -mpreferred-stack-boundary=2. */
22752 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
22753 && (!type
|| !TYPE_USER_ALIGN (type
))
22754 && (!decl
|| !DECL_USER_ALIGN (decl
)))
22760 /* Find a location for the static chain incoming to a nested function.
22761 This is a register, unless all free registers are used by arguments. */
22764 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
22768 if (!DECL_STATIC_CHAIN (fndecl
))
22773 /* We always use R10 in 64-bit mode. */
22781 /* By default in 32-bit mode we use ECX to pass the static chain. */
22784 fntype
= TREE_TYPE (fndecl
);
22785 ccvt
= ix86_get_callcvt (fntype
);
22786 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
22788 /* Fastcall functions use ecx/edx for arguments, which leaves
22789 us with EAX for the static chain.
22790 Thiscall functions use ecx for arguments, which also
22791 leaves us with EAX for the static chain. */
22794 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
22796 /* For regparm 3, we have no free call-clobbered registers in
22797 which to store the static chain. In order to implement this,
22798 we have the trampoline push the static chain to the stack.
22799 However, we can't push a value below the return address when
22800 we call the nested function directly, so we have to use an
22801 alternate entry point. For this we use ESI, and have the
22802 alternate entry point push ESI, so that things appear the
22803 same once we're executing the nested function. */
22806 if (fndecl
== current_function_decl
)
22807 ix86_static_chain_on_stack
= true;
22808 return gen_frame_mem (SImode
,
22809 plus_constant (arg_pointer_rtx
, -8));
22815 return gen_rtx_REG (Pmode
, regno
);
22818 /* Emit RTL insns to initialize the variable parts of a trampoline.
22819 FNDECL is the decl of the target address; M_TRAMP is a MEM for
22820 the trampoline, and CHAIN_VALUE is an RTX for the static chain
22821 to be passed to the target function. */
22824 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
22830 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
22836 /* Load the function address to r11. Try to load address using
22837 the shorter movl instead of movabs. We may want to support
22838 movq for kernel mode, but kernel does not use trampolines at
22840 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
22842 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
22844 mem
= adjust_address (m_tramp
, HImode
, offset
);
22845 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
22847 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
22848 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
22853 mem
= adjust_address (m_tramp
, HImode
, offset
);
22854 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
22856 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
22857 emit_move_insn (mem
, fnaddr
);
22861 /* Load static chain using movabs to r10. Use the
22862 shorter movl instead of movabs for x32. */
22874 mem
= adjust_address (m_tramp
, HImode
, offset
);
22875 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
22877 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
22878 emit_move_insn (mem
, chain_value
);
22881 /* Jump to r11; the last (unused) byte is a nop, only there to
22882 pad the write out to a single 32-bit store. */
22883 mem
= adjust_address (m_tramp
, SImode
, offset
);
22884 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
22891 /* Depending on the static chain location, either load a register
22892 with a constant, or push the constant to the stack. All of the
22893 instructions are the same size. */
22894 chain
= ix86_static_chain (fndecl
, true);
22897 switch (REGNO (chain
))
22900 opcode
= 0xb8; break;
22902 opcode
= 0xb9; break;
22904 gcc_unreachable ();
22910 mem
= adjust_address (m_tramp
, QImode
, offset
);
22911 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
22913 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
22914 emit_move_insn (mem
, chain_value
);
22917 mem
= adjust_address (m_tramp
, QImode
, offset
);
22918 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
22920 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
22922 /* Compute offset from the end of the jmp to the target function.
22923 In the case in which the trampoline stores the static chain on
22924 the stack, we need to skip the first insn which pushes the
22925 (call-saved) register static chain; this push is 1 byte. */
22927 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
22928 plus_constant (XEXP (m_tramp
, 0),
22929 offset
- (MEM_P (chain
) ? 1 : 0)),
22930 NULL_RTX
, 1, OPTAB_DIRECT
);
22931 emit_move_insn (mem
, disp
);
22934 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
22936 #ifdef HAVE_ENABLE_EXECUTE_STACK
22937 #ifdef CHECK_EXECUTE_STACK_ENABLED
22938 if (CHECK_EXECUTE_STACK_ENABLED
)
22940 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
22941 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
22945 /* The following file contains several enumerations and data structures
22946 built from the definitions in i386-builtin-types.def. */
22948 #include "i386-builtin-types.inc"
22950 /* Table for the ix86 builtin non-function types. */
22951 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
22953 /* Retrieve an element from the above table, building some of
22954 the types lazily. */
22957 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
22959 unsigned int index
;
22962 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
22964 type
= ix86_builtin_type_tab
[(int) tcode
];
22968 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
22969 if (tcode
<= IX86_BT_LAST_VECT
)
22971 enum machine_mode mode
;
22973 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
22974 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
22975 mode
= ix86_builtin_type_vect_mode
[index
];
22977 type
= build_vector_type_for_mode (itype
, mode
);
22983 index
= tcode
- IX86_BT_LAST_VECT
- 1;
22984 if (tcode
<= IX86_BT_LAST_PTR
)
22985 quals
= TYPE_UNQUALIFIED
;
22987 quals
= TYPE_QUAL_CONST
;
22989 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
22990 if (quals
!= TYPE_UNQUALIFIED
)
22991 itype
= build_qualified_type (itype
, quals
);
22993 type
= build_pointer_type (itype
);
22996 ix86_builtin_type_tab
[(int) tcode
] = type
;
23000 /* Table for the ix86 builtin function types. */
23001 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
23003 /* Retrieve an element from the above table, building some of
23004 the types lazily. */
23007 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
23011 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
23013 type
= ix86_builtin_func_type_tab
[(int) tcode
];
23017 if (tcode
<= IX86_BT_LAST_FUNC
)
23019 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
23020 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
23021 tree rtype
, atype
, args
= void_list_node
;
23024 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
23025 for (i
= after
- 1; i
> start
; --i
)
23027 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
23028 args
= tree_cons (NULL
, atype
, args
);
23031 type
= build_function_type (rtype
, args
);
23035 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
23036 enum ix86_builtin_func_type icode
;
23038 icode
= ix86_builtin_func_alias_base
[index
];
23039 type
= ix86_get_builtin_func_type (icode
);
23042 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
23047 /* Codes for all the SSE/MMX builtins. */
23050 IX86_BUILTIN_ADDPS
,
23051 IX86_BUILTIN_ADDSS
,
23052 IX86_BUILTIN_DIVPS
,
23053 IX86_BUILTIN_DIVSS
,
23054 IX86_BUILTIN_MULPS
,
23055 IX86_BUILTIN_MULSS
,
23056 IX86_BUILTIN_SUBPS
,
23057 IX86_BUILTIN_SUBSS
,
23059 IX86_BUILTIN_CMPEQPS
,
23060 IX86_BUILTIN_CMPLTPS
,
23061 IX86_BUILTIN_CMPLEPS
,
23062 IX86_BUILTIN_CMPGTPS
,
23063 IX86_BUILTIN_CMPGEPS
,
23064 IX86_BUILTIN_CMPNEQPS
,
23065 IX86_BUILTIN_CMPNLTPS
,
23066 IX86_BUILTIN_CMPNLEPS
,
23067 IX86_BUILTIN_CMPNGTPS
,
23068 IX86_BUILTIN_CMPNGEPS
,
23069 IX86_BUILTIN_CMPORDPS
,
23070 IX86_BUILTIN_CMPUNORDPS
,
23071 IX86_BUILTIN_CMPEQSS
,
23072 IX86_BUILTIN_CMPLTSS
,
23073 IX86_BUILTIN_CMPLESS
,
23074 IX86_BUILTIN_CMPNEQSS
,
23075 IX86_BUILTIN_CMPNLTSS
,
23076 IX86_BUILTIN_CMPNLESS
,
23077 IX86_BUILTIN_CMPNGTSS
,
23078 IX86_BUILTIN_CMPNGESS
,
23079 IX86_BUILTIN_CMPORDSS
,
23080 IX86_BUILTIN_CMPUNORDSS
,
23082 IX86_BUILTIN_COMIEQSS
,
23083 IX86_BUILTIN_COMILTSS
,
23084 IX86_BUILTIN_COMILESS
,
23085 IX86_BUILTIN_COMIGTSS
,
23086 IX86_BUILTIN_COMIGESS
,
23087 IX86_BUILTIN_COMINEQSS
,
23088 IX86_BUILTIN_UCOMIEQSS
,
23089 IX86_BUILTIN_UCOMILTSS
,
23090 IX86_BUILTIN_UCOMILESS
,
23091 IX86_BUILTIN_UCOMIGTSS
,
23092 IX86_BUILTIN_UCOMIGESS
,
23093 IX86_BUILTIN_UCOMINEQSS
,
23095 IX86_BUILTIN_CVTPI2PS
,
23096 IX86_BUILTIN_CVTPS2PI
,
23097 IX86_BUILTIN_CVTSI2SS
,
23098 IX86_BUILTIN_CVTSI642SS
,
23099 IX86_BUILTIN_CVTSS2SI
,
23100 IX86_BUILTIN_CVTSS2SI64
,
23101 IX86_BUILTIN_CVTTPS2PI
,
23102 IX86_BUILTIN_CVTTSS2SI
,
23103 IX86_BUILTIN_CVTTSS2SI64
,
23105 IX86_BUILTIN_MAXPS
,
23106 IX86_BUILTIN_MAXSS
,
23107 IX86_BUILTIN_MINPS
,
23108 IX86_BUILTIN_MINSS
,
23110 IX86_BUILTIN_LOADUPS
,
23111 IX86_BUILTIN_STOREUPS
,
23112 IX86_BUILTIN_MOVSS
,
23114 IX86_BUILTIN_MOVHLPS
,
23115 IX86_BUILTIN_MOVLHPS
,
23116 IX86_BUILTIN_LOADHPS
,
23117 IX86_BUILTIN_LOADLPS
,
23118 IX86_BUILTIN_STOREHPS
,
23119 IX86_BUILTIN_STORELPS
,
23121 IX86_BUILTIN_MASKMOVQ
,
23122 IX86_BUILTIN_MOVMSKPS
,
23123 IX86_BUILTIN_PMOVMSKB
,
23125 IX86_BUILTIN_MOVNTPS
,
23126 IX86_BUILTIN_MOVNTQ
,
23128 IX86_BUILTIN_LOADDQU
,
23129 IX86_BUILTIN_STOREDQU
,
23131 IX86_BUILTIN_PACKSSWB
,
23132 IX86_BUILTIN_PACKSSDW
,
23133 IX86_BUILTIN_PACKUSWB
,
23135 IX86_BUILTIN_PADDB
,
23136 IX86_BUILTIN_PADDW
,
23137 IX86_BUILTIN_PADDD
,
23138 IX86_BUILTIN_PADDQ
,
23139 IX86_BUILTIN_PADDSB
,
23140 IX86_BUILTIN_PADDSW
,
23141 IX86_BUILTIN_PADDUSB
,
23142 IX86_BUILTIN_PADDUSW
,
23143 IX86_BUILTIN_PSUBB
,
23144 IX86_BUILTIN_PSUBW
,
23145 IX86_BUILTIN_PSUBD
,
23146 IX86_BUILTIN_PSUBQ
,
23147 IX86_BUILTIN_PSUBSB
,
23148 IX86_BUILTIN_PSUBSW
,
23149 IX86_BUILTIN_PSUBUSB
,
23150 IX86_BUILTIN_PSUBUSW
,
23153 IX86_BUILTIN_PANDN
,
23157 IX86_BUILTIN_PAVGB
,
23158 IX86_BUILTIN_PAVGW
,
23160 IX86_BUILTIN_PCMPEQB
,
23161 IX86_BUILTIN_PCMPEQW
,
23162 IX86_BUILTIN_PCMPEQD
,
23163 IX86_BUILTIN_PCMPGTB
,
23164 IX86_BUILTIN_PCMPGTW
,
23165 IX86_BUILTIN_PCMPGTD
,
23167 IX86_BUILTIN_PMADDWD
,
23169 IX86_BUILTIN_PMAXSW
,
23170 IX86_BUILTIN_PMAXUB
,
23171 IX86_BUILTIN_PMINSW
,
23172 IX86_BUILTIN_PMINUB
,
23174 IX86_BUILTIN_PMULHUW
,
23175 IX86_BUILTIN_PMULHW
,
23176 IX86_BUILTIN_PMULLW
,
23178 IX86_BUILTIN_PSADBW
,
23179 IX86_BUILTIN_PSHUFW
,
23181 IX86_BUILTIN_PSLLW
,
23182 IX86_BUILTIN_PSLLD
,
23183 IX86_BUILTIN_PSLLQ
,
23184 IX86_BUILTIN_PSRAW
,
23185 IX86_BUILTIN_PSRAD
,
23186 IX86_BUILTIN_PSRLW
,
23187 IX86_BUILTIN_PSRLD
,
23188 IX86_BUILTIN_PSRLQ
,
23189 IX86_BUILTIN_PSLLWI
,
23190 IX86_BUILTIN_PSLLDI
,
23191 IX86_BUILTIN_PSLLQI
,
23192 IX86_BUILTIN_PSRAWI
,
23193 IX86_BUILTIN_PSRADI
,
23194 IX86_BUILTIN_PSRLWI
,
23195 IX86_BUILTIN_PSRLDI
,
23196 IX86_BUILTIN_PSRLQI
,
23198 IX86_BUILTIN_PUNPCKHBW
,
23199 IX86_BUILTIN_PUNPCKHWD
,
23200 IX86_BUILTIN_PUNPCKHDQ
,
23201 IX86_BUILTIN_PUNPCKLBW
,
23202 IX86_BUILTIN_PUNPCKLWD
,
23203 IX86_BUILTIN_PUNPCKLDQ
,
23205 IX86_BUILTIN_SHUFPS
,
23207 IX86_BUILTIN_RCPPS
,
23208 IX86_BUILTIN_RCPSS
,
23209 IX86_BUILTIN_RSQRTPS
,
23210 IX86_BUILTIN_RSQRTPS_NR
,
23211 IX86_BUILTIN_RSQRTSS
,
23212 IX86_BUILTIN_RSQRTF
,
23213 IX86_BUILTIN_SQRTPS
,
23214 IX86_BUILTIN_SQRTPS_NR
,
23215 IX86_BUILTIN_SQRTSS
,
23217 IX86_BUILTIN_UNPCKHPS
,
23218 IX86_BUILTIN_UNPCKLPS
,
23220 IX86_BUILTIN_ANDPS
,
23221 IX86_BUILTIN_ANDNPS
,
23223 IX86_BUILTIN_XORPS
,
23226 IX86_BUILTIN_LDMXCSR
,
23227 IX86_BUILTIN_STMXCSR
,
23228 IX86_BUILTIN_SFENCE
,
23230 /* 3DNow! Original */
23231 IX86_BUILTIN_FEMMS
,
23232 IX86_BUILTIN_PAVGUSB
,
23233 IX86_BUILTIN_PF2ID
,
23234 IX86_BUILTIN_PFACC
,
23235 IX86_BUILTIN_PFADD
,
23236 IX86_BUILTIN_PFCMPEQ
,
23237 IX86_BUILTIN_PFCMPGE
,
23238 IX86_BUILTIN_PFCMPGT
,
23239 IX86_BUILTIN_PFMAX
,
23240 IX86_BUILTIN_PFMIN
,
23241 IX86_BUILTIN_PFMUL
,
23242 IX86_BUILTIN_PFRCP
,
23243 IX86_BUILTIN_PFRCPIT1
,
23244 IX86_BUILTIN_PFRCPIT2
,
23245 IX86_BUILTIN_PFRSQIT1
,
23246 IX86_BUILTIN_PFRSQRT
,
23247 IX86_BUILTIN_PFSUB
,
23248 IX86_BUILTIN_PFSUBR
,
23249 IX86_BUILTIN_PI2FD
,
23250 IX86_BUILTIN_PMULHRW
,
23252 /* 3DNow! Athlon Extensions */
23253 IX86_BUILTIN_PF2IW
,
23254 IX86_BUILTIN_PFNACC
,
23255 IX86_BUILTIN_PFPNACC
,
23256 IX86_BUILTIN_PI2FW
,
23257 IX86_BUILTIN_PSWAPDSI
,
23258 IX86_BUILTIN_PSWAPDSF
,
23261 IX86_BUILTIN_ADDPD
,
23262 IX86_BUILTIN_ADDSD
,
23263 IX86_BUILTIN_DIVPD
,
23264 IX86_BUILTIN_DIVSD
,
23265 IX86_BUILTIN_MULPD
,
23266 IX86_BUILTIN_MULSD
,
23267 IX86_BUILTIN_SUBPD
,
23268 IX86_BUILTIN_SUBSD
,
23270 IX86_BUILTIN_CMPEQPD
,
23271 IX86_BUILTIN_CMPLTPD
,
23272 IX86_BUILTIN_CMPLEPD
,
23273 IX86_BUILTIN_CMPGTPD
,
23274 IX86_BUILTIN_CMPGEPD
,
23275 IX86_BUILTIN_CMPNEQPD
,
23276 IX86_BUILTIN_CMPNLTPD
,
23277 IX86_BUILTIN_CMPNLEPD
,
23278 IX86_BUILTIN_CMPNGTPD
,
23279 IX86_BUILTIN_CMPNGEPD
,
23280 IX86_BUILTIN_CMPORDPD
,
23281 IX86_BUILTIN_CMPUNORDPD
,
23282 IX86_BUILTIN_CMPEQSD
,
23283 IX86_BUILTIN_CMPLTSD
,
23284 IX86_BUILTIN_CMPLESD
,
23285 IX86_BUILTIN_CMPNEQSD
,
23286 IX86_BUILTIN_CMPNLTSD
,
23287 IX86_BUILTIN_CMPNLESD
,
23288 IX86_BUILTIN_CMPORDSD
,
23289 IX86_BUILTIN_CMPUNORDSD
,
23291 IX86_BUILTIN_COMIEQSD
,
23292 IX86_BUILTIN_COMILTSD
,
23293 IX86_BUILTIN_COMILESD
,
23294 IX86_BUILTIN_COMIGTSD
,
23295 IX86_BUILTIN_COMIGESD
,
23296 IX86_BUILTIN_COMINEQSD
,
23297 IX86_BUILTIN_UCOMIEQSD
,
23298 IX86_BUILTIN_UCOMILTSD
,
23299 IX86_BUILTIN_UCOMILESD
,
23300 IX86_BUILTIN_UCOMIGTSD
,
23301 IX86_BUILTIN_UCOMIGESD
,
23302 IX86_BUILTIN_UCOMINEQSD
,
23304 IX86_BUILTIN_MAXPD
,
23305 IX86_BUILTIN_MAXSD
,
23306 IX86_BUILTIN_MINPD
,
23307 IX86_BUILTIN_MINSD
,
23309 IX86_BUILTIN_ANDPD
,
23310 IX86_BUILTIN_ANDNPD
,
23312 IX86_BUILTIN_XORPD
,
23314 IX86_BUILTIN_SQRTPD
,
23315 IX86_BUILTIN_SQRTSD
,
23317 IX86_BUILTIN_UNPCKHPD
,
23318 IX86_BUILTIN_UNPCKLPD
,
23320 IX86_BUILTIN_SHUFPD
,
23322 IX86_BUILTIN_LOADUPD
,
23323 IX86_BUILTIN_STOREUPD
,
23324 IX86_BUILTIN_MOVSD
,
23326 IX86_BUILTIN_LOADHPD
,
23327 IX86_BUILTIN_LOADLPD
,
23329 IX86_BUILTIN_CVTDQ2PD
,
23330 IX86_BUILTIN_CVTDQ2PS
,
23332 IX86_BUILTIN_CVTPD2DQ
,
23333 IX86_BUILTIN_CVTPD2PI
,
23334 IX86_BUILTIN_CVTPD2PS
,
23335 IX86_BUILTIN_CVTTPD2DQ
,
23336 IX86_BUILTIN_CVTTPD2PI
,
23338 IX86_BUILTIN_CVTPI2PD
,
23339 IX86_BUILTIN_CVTSI2SD
,
23340 IX86_BUILTIN_CVTSI642SD
,
23342 IX86_BUILTIN_CVTSD2SI
,
23343 IX86_BUILTIN_CVTSD2SI64
,
23344 IX86_BUILTIN_CVTSD2SS
,
23345 IX86_BUILTIN_CVTSS2SD
,
23346 IX86_BUILTIN_CVTTSD2SI
,
23347 IX86_BUILTIN_CVTTSD2SI64
,
23349 IX86_BUILTIN_CVTPS2DQ
,
23350 IX86_BUILTIN_CVTPS2PD
,
23351 IX86_BUILTIN_CVTTPS2DQ
,
23353 IX86_BUILTIN_MOVNTI
,
23354 IX86_BUILTIN_MOVNTPD
,
23355 IX86_BUILTIN_MOVNTDQ
,
23357 IX86_BUILTIN_MOVQ128
,
23360 IX86_BUILTIN_MASKMOVDQU
,
23361 IX86_BUILTIN_MOVMSKPD
,
23362 IX86_BUILTIN_PMOVMSKB128
,
23364 IX86_BUILTIN_PACKSSWB128
,
23365 IX86_BUILTIN_PACKSSDW128
,
23366 IX86_BUILTIN_PACKUSWB128
,
23368 IX86_BUILTIN_PADDB128
,
23369 IX86_BUILTIN_PADDW128
,
23370 IX86_BUILTIN_PADDD128
,
23371 IX86_BUILTIN_PADDQ128
,
23372 IX86_BUILTIN_PADDSB128
,
23373 IX86_BUILTIN_PADDSW128
,
23374 IX86_BUILTIN_PADDUSB128
,
23375 IX86_BUILTIN_PADDUSW128
,
23376 IX86_BUILTIN_PSUBB128
,
23377 IX86_BUILTIN_PSUBW128
,
23378 IX86_BUILTIN_PSUBD128
,
23379 IX86_BUILTIN_PSUBQ128
,
23380 IX86_BUILTIN_PSUBSB128
,
23381 IX86_BUILTIN_PSUBSW128
,
23382 IX86_BUILTIN_PSUBUSB128
,
23383 IX86_BUILTIN_PSUBUSW128
,
23385 IX86_BUILTIN_PAND128
,
23386 IX86_BUILTIN_PANDN128
,
23387 IX86_BUILTIN_POR128
,
23388 IX86_BUILTIN_PXOR128
,
23390 IX86_BUILTIN_PAVGB128
,
23391 IX86_BUILTIN_PAVGW128
,
23393 IX86_BUILTIN_PCMPEQB128
,
23394 IX86_BUILTIN_PCMPEQW128
,
23395 IX86_BUILTIN_PCMPEQD128
,
23396 IX86_BUILTIN_PCMPGTB128
,
23397 IX86_BUILTIN_PCMPGTW128
,
23398 IX86_BUILTIN_PCMPGTD128
,
23400 IX86_BUILTIN_PMADDWD128
,
23402 IX86_BUILTIN_PMAXSW128
,
23403 IX86_BUILTIN_PMAXUB128
,
23404 IX86_BUILTIN_PMINSW128
,
23405 IX86_BUILTIN_PMINUB128
,
23407 IX86_BUILTIN_PMULUDQ
,
23408 IX86_BUILTIN_PMULUDQ128
,
23409 IX86_BUILTIN_PMULHUW128
,
23410 IX86_BUILTIN_PMULHW128
,
23411 IX86_BUILTIN_PMULLW128
,
23413 IX86_BUILTIN_PSADBW128
,
23414 IX86_BUILTIN_PSHUFHW
,
23415 IX86_BUILTIN_PSHUFLW
,
23416 IX86_BUILTIN_PSHUFD
,
23418 IX86_BUILTIN_PSLLDQI128
,
23419 IX86_BUILTIN_PSLLWI128
,
23420 IX86_BUILTIN_PSLLDI128
,
23421 IX86_BUILTIN_PSLLQI128
,
23422 IX86_BUILTIN_PSRAWI128
,
23423 IX86_BUILTIN_PSRADI128
,
23424 IX86_BUILTIN_PSRLDQI128
,
23425 IX86_BUILTIN_PSRLWI128
,
23426 IX86_BUILTIN_PSRLDI128
,
23427 IX86_BUILTIN_PSRLQI128
,
23429 IX86_BUILTIN_PSLLDQ128
,
23430 IX86_BUILTIN_PSLLW128
,
23431 IX86_BUILTIN_PSLLD128
,
23432 IX86_BUILTIN_PSLLQ128
,
23433 IX86_BUILTIN_PSRAW128
,
23434 IX86_BUILTIN_PSRAD128
,
23435 IX86_BUILTIN_PSRLW128
,
23436 IX86_BUILTIN_PSRLD128
,
23437 IX86_BUILTIN_PSRLQ128
,
23439 IX86_BUILTIN_PUNPCKHBW128
,
23440 IX86_BUILTIN_PUNPCKHWD128
,
23441 IX86_BUILTIN_PUNPCKHDQ128
,
23442 IX86_BUILTIN_PUNPCKHQDQ128
,
23443 IX86_BUILTIN_PUNPCKLBW128
,
23444 IX86_BUILTIN_PUNPCKLWD128
,
23445 IX86_BUILTIN_PUNPCKLDQ128
,
23446 IX86_BUILTIN_PUNPCKLQDQ128
,
23448 IX86_BUILTIN_CLFLUSH
,
23449 IX86_BUILTIN_MFENCE
,
23450 IX86_BUILTIN_LFENCE
,
23451 IX86_BUILTIN_PAUSE
,
23453 IX86_BUILTIN_BSRSI
,
23454 IX86_BUILTIN_BSRDI
,
23455 IX86_BUILTIN_RDPMC
,
23456 IX86_BUILTIN_RDTSC
,
23457 IX86_BUILTIN_RDTSCP
,
23458 IX86_BUILTIN_ROLQI
,
23459 IX86_BUILTIN_ROLHI
,
23460 IX86_BUILTIN_RORQI
,
23461 IX86_BUILTIN_RORHI
,
23464 IX86_BUILTIN_ADDSUBPS
,
23465 IX86_BUILTIN_HADDPS
,
23466 IX86_BUILTIN_HSUBPS
,
23467 IX86_BUILTIN_MOVSHDUP
,
23468 IX86_BUILTIN_MOVSLDUP
,
23469 IX86_BUILTIN_ADDSUBPD
,
23470 IX86_BUILTIN_HADDPD
,
23471 IX86_BUILTIN_HSUBPD
,
23472 IX86_BUILTIN_LDDQU
,
23474 IX86_BUILTIN_MONITOR
,
23475 IX86_BUILTIN_MWAIT
,
23478 IX86_BUILTIN_PHADDW
,
23479 IX86_BUILTIN_PHADDD
,
23480 IX86_BUILTIN_PHADDSW
,
23481 IX86_BUILTIN_PHSUBW
,
23482 IX86_BUILTIN_PHSUBD
,
23483 IX86_BUILTIN_PHSUBSW
,
23484 IX86_BUILTIN_PMADDUBSW
,
23485 IX86_BUILTIN_PMULHRSW
,
23486 IX86_BUILTIN_PSHUFB
,
23487 IX86_BUILTIN_PSIGNB
,
23488 IX86_BUILTIN_PSIGNW
,
23489 IX86_BUILTIN_PSIGND
,
23490 IX86_BUILTIN_PALIGNR
,
23491 IX86_BUILTIN_PABSB
,
23492 IX86_BUILTIN_PABSW
,
23493 IX86_BUILTIN_PABSD
,
23495 IX86_BUILTIN_PHADDW128
,
23496 IX86_BUILTIN_PHADDD128
,
23497 IX86_BUILTIN_PHADDSW128
,
23498 IX86_BUILTIN_PHSUBW128
,
23499 IX86_BUILTIN_PHSUBD128
,
23500 IX86_BUILTIN_PHSUBSW128
,
23501 IX86_BUILTIN_PMADDUBSW128
,
23502 IX86_BUILTIN_PMULHRSW128
,
23503 IX86_BUILTIN_PSHUFB128
,
23504 IX86_BUILTIN_PSIGNB128
,
23505 IX86_BUILTIN_PSIGNW128
,
23506 IX86_BUILTIN_PSIGND128
,
23507 IX86_BUILTIN_PALIGNR128
,
23508 IX86_BUILTIN_PABSB128
,
23509 IX86_BUILTIN_PABSW128
,
23510 IX86_BUILTIN_PABSD128
,
23512 /* AMDFAM10 - SSE4A New Instructions. */
23513 IX86_BUILTIN_MOVNTSD
,
23514 IX86_BUILTIN_MOVNTSS
,
23515 IX86_BUILTIN_EXTRQI
,
23516 IX86_BUILTIN_EXTRQ
,
23517 IX86_BUILTIN_INSERTQI
,
23518 IX86_BUILTIN_INSERTQ
,
23521 IX86_BUILTIN_BLENDPD
,
23522 IX86_BUILTIN_BLENDPS
,
23523 IX86_BUILTIN_BLENDVPD
,
23524 IX86_BUILTIN_BLENDVPS
,
23525 IX86_BUILTIN_PBLENDVB128
,
23526 IX86_BUILTIN_PBLENDW128
,
23531 IX86_BUILTIN_INSERTPS128
,
23533 IX86_BUILTIN_MOVNTDQA
,
23534 IX86_BUILTIN_MPSADBW128
,
23535 IX86_BUILTIN_PACKUSDW128
,
23536 IX86_BUILTIN_PCMPEQQ
,
23537 IX86_BUILTIN_PHMINPOSUW128
,
23539 IX86_BUILTIN_PMAXSB128
,
23540 IX86_BUILTIN_PMAXSD128
,
23541 IX86_BUILTIN_PMAXUD128
,
23542 IX86_BUILTIN_PMAXUW128
,
23544 IX86_BUILTIN_PMINSB128
,
23545 IX86_BUILTIN_PMINSD128
,
23546 IX86_BUILTIN_PMINUD128
,
23547 IX86_BUILTIN_PMINUW128
,
23549 IX86_BUILTIN_PMOVSXBW128
,
23550 IX86_BUILTIN_PMOVSXBD128
,
23551 IX86_BUILTIN_PMOVSXBQ128
,
23552 IX86_BUILTIN_PMOVSXWD128
,
23553 IX86_BUILTIN_PMOVSXWQ128
,
23554 IX86_BUILTIN_PMOVSXDQ128
,
23556 IX86_BUILTIN_PMOVZXBW128
,
23557 IX86_BUILTIN_PMOVZXBD128
,
23558 IX86_BUILTIN_PMOVZXBQ128
,
23559 IX86_BUILTIN_PMOVZXWD128
,
23560 IX86_BUILTIN_PMOVZXWQ128
,
23561 IX86_BUILTIN_PMOVZXDQ128
,
23563 IX86_BUILTIN_PMULDQ128
,
23564 IX86_BUILTIN_PMULLD128
,
23566 IX86_BUILTIN_ROUNDPD
,
23567 IX86_BUILTIN_ROUNDPS
,
23568 IX86_BUILTIN_ROUNDSD
,
23569 IX86_BUILTIN_ROUNDSS
,
23571 IX86_BUILTIN_FLOORPD
,
23572 IX86_BUILTIN_CEILPD
,
23573 IX86_BUILTIN_TRUNCPD
,
23574 IX86_BUILTIN_RINTPD
,
23575 IX86_BUILTIN_FLOORPS
,
23576 IX86_BUILTIN_CEILPS
,
23577 IX86_BUILTIN_TRUNCPS
,
23578 IX86_BUILTIN_RINTPS
,
23580 IX86_BUILTIN_PTESTZ
,
23581 IX86_BUILTIN_PTESTC
,
23582 IX86_BUILTIN_PTESTNZC
,
23584 IX86_BUILTIN_VEC_INIT_V2SI
,
23585 IX86_BUILTIN_VEC_INIT_V4HI
,
23586 IX86_BUILTIN_VEC_INIT_V8QI
,
23587 IX86_BUILTIN_VEC_EXT_V2DF
,
23588 IX86_BUILTIN_VEC_EXT_V2DI
,
23589 IX86_BUILTIN_VEC_EXT_V4SF
,
23590 IX86_BUILTIN_VEC_EXT_V4SI
,
23591 IX86_BUILTIN_VEC_EXT_V8HI
,
23592 IX86_BUILTIN_VEC_EXT_V2SI
,
23593 IX86_BUILTIN_VEC_EXT_V4HI
,
23594 IX86_BUILTIN_VEC_EXT_V16QI
,
23595 IX86_BUILTIN_VEC_SET_V2DI
,
23596 IX86_BUILTIN_VEC_SET_V4SF
,
23597 IX86_BUILTIN_VEC_SET_V4SI
,
23598 IX86_BUILTIN_VEC_SET_V8HI
,
23599 IX86_BUILTIN_VEC_SET_V4HI
,
23600 IX86_BUILTIN_VEC_SET_V16QI
,
23602 IX86_BUILTIN_VEC_PACK_SFIX
,
23605 IX86_BUILTIN_CRC32QI
,
23606 IX86_BUILTIN_CRC32HI
,
23607 IX86_BUILTIN_CRC32SI
,
23608 IX86_BUILTIN_CRC32DI
,
23610 IX86_BUILTIN_PCMPESTRI128
,
23611 IX86_BUILTIN_PCMPESTRM128
,
23612 IX86_BUILTIN_PCMPESTRA128
,
23613 IX86_BUILTIN_PCMPESTRC128
,
23614 IX86_BUILTIN_PCMPESTRO128
,
23615 IX86_BUILTIN_PCMPESTRS128
,
23616 IX86_BUILTIN_PCMPESTRZ128
,
23617 IX86_BUILTIN_PCMPISTRI128
,
23618 IX86_BUILTIN_PCMPISTRM128
,
23619 IX86_BUILTIN_PCMPISTRA128
,
23620 IX86_BUILTIN_PCMPISTRC128
,
23621 IX86_BUILTIN_PCMPISTRO128
,
23622 IX86_BUILTIN_PCMPISTRS128
,
23623 IX86_BUILTIN_PCMPISTRZ128
,
23625 IX86_BUILTIN_PCMPGTQ
,
23627 /* AES instructions */
23628 IX86_BUILTIN_AESENC128
,
23629 IX86_BUILTIN_AESENCLAST128
,
23630 IX86_BUILTIN_AESDEC128
,
23631 IX86_BUILTIN_AESDECLAST128
,
23632 IX86_BUILTIN_AESIMC128
,
23633 IX86_BUILTIN_AESKEYGENASSIST128
,
23635 /* PCLMUL instruction */
23636 IX86_BUILTIN_PCLMULQDQ128
,
23639 IX86_BUILTIN_ADDPD256
,
23640 IX86_BUILTIN_ADDPS256
,
23641 IX86_BUILTIN_ADDSUBPD256
,
23642 IX86_BUILTIN_ADDSUBPS256
,
23643 IX86_BUILTIN_ANDPD256
,
23644 IX86_BUILTIN_ANDPS256
,
23645 IX86_BUILTIN_ANDNPD256
,
23646 IX86_BUILTIN_ANDNPS256
,
23647 IX86_BUILTIN_BLENDPD256
,
23648 IX86_BUILTIN_BLENDPS256
,
23649 IX86_BUILTIN_BLENDVPD256
,
23650 IX86_BUILTIN_BLENDVPS256
,
23651 IX86_BUILTIN_DIVPD256
,
23652 IX86_BUILTIN_DIVPS256
,
23653 IX86_BUILTIN_DPPS256
,
23654 IX86_BUILTIN_HADDPD256
,
23655 IX86_BUILTIN_HADDPS256
,
23656 IX86_BUILTIN_HSUBPD256
,
23657 IX86_BUILTIN_HSUBPS256
,
23658 IX86_BUILTIN_MAXPD256
,
23659 IX86_BUILTIN_MAXPS256
,
23660 IX86_BUILTIN_MINPD256
,
23661 IX86_BUILTIN_MINPS256
,
23662 IX86_BUILTIN_MULPD256
,
23663 IX86_BUILTIN_MULPS256
,
23664 IX86_BUILTIN_ORPD256
,
23665 IX86_BUILTIN_ORPS256
,
23666 IX86_BUILTIN_SHUFPD256
,
23667 IX86_BUILTIN_SHUFPS256
,
23668 IX86_BUILTIN_SUBPD256
,
23669 IX86_BUILTIN_SUBPS256
,
23670 IX86_BUILTIN_XORPD256
,
23671 IX86_BUILTIN_XORPS256
,
23672 IX86_BUILTIN_CMPSD
,
23673 IX86_BUILTIN_CMPSS
,
23674 IX86_BUILTIN_CMPPD
,
23675 IX86_BUILTIN_CMPPS
,
23676 IX86_BUILTIN_CMPPD256
,
23677 IX86_BUILTIN_CMPPS256
,
23678 IX86_BUILTIN_CVTDQ2PD256
,
23679 IX86_BUILTIN_CVTDQ2PS256
,
23680 IX86_BUILTIN_CVTPD2PS256
,
23681 IX86_BUILTIN_CVTPS2DQ256
,
23682 IX86_BUILTIN_CVTPS2PD256
,
23683 IX86_BUILTIN_CVTTPD2DQ256
,
23684 IX86_BUILTIN_CVTPD2DQ256
,
23685 IX86_BUILTIN_CVTTPS2DQ256
,
23686 IX86_BUILTIN_EXTRACTF128PD256
,
23687 IX86_BUILTIN_EXTRACTF128PS256
,
23688 IX86_BUILTIN_EXTRACTF128SI256
,
23689 IX86_BUILTIN_VZEROALL
,
23690 IX86_BUILTIN_VZEROUPPER
,
23691 IX86_BUILTIN_VPERMILVARPD
,
23692 IX86_BUILTIN_VPERMILVARPS
,
23693 IX86_BUILTIN_VPERMILVARPD256
,
23694 IX86_BUILTIN_VPERMILVARPS256
,
23695 IX86_BUILTIN_VPERMILPD
,
23696 IX86_BUILTIN_VPERMILPS
,
23697 IX86_BUILTIN_VPERMILPD256
,
23698 IX86_BUILTIN_VPERMILPS256
,
23699 IX86_BUILTIN_VPERMIL2PD
,
23700 IX86_BUILTIN_VPERMIL2PS
,
23701 IX86_BUILTIN_VPERMIL2PD256
,
23702 IX86_BUILTIN_VPERMIL2PS256
,
23703 IX86_BUILTIN_VPERM2F128PD256
,
23704 IX86_BUILTIN_VPERM2F128PS256
,
23705 IX86_BUILTIN_VPERM2F128SI256
,
23706 IX86_BUILTIN_VBROADCASTSS
,
23707 IX86_BUILTIN_VBROADCASTSD256
,
23708 IX86_BUILTIN_VBROADCASTSS256
,
23709 IX86_BUILTIN_VBROADCASTPD256
,
23710 IX86_BUILTIN_VBROADCASTPS256
,
23711 IX86_BUILTIN_VINSERTF128PD256
,
23712 IX86_BUILTIN_VINSERTF128PS256
,
23713 IX86_BUILTIN_VINSERTF128SI256
,
23714 IX86_BUILTIN_LOADUPD256
,
23715 IX86_BUILTIN_LOADUPS256
,
23716 IX86_BUILTIN_STOREUPD256
,
23717 IX86_BUILTIN_STOREUPS256
,
23718 IX86_BUILTIN_LDDQU256
,
23719 IX86_BUILTIN_MOVNTDQ256
,
23720 IX86_BUILTIN_MOVNTPD256
,
23721 IX86_BUILTIN_MOVNTPS256
,
23722 IX86_BUILTIN_LOADDQU256
,
23723 IX86_BUILTIN_STOREDQU256
,
23724 IX86_BUILTIN_MASKLOADPD
,
23725 IX86_BUILTIN_MASKLOADPS
,
23726 IX86_BUILTIN_MASKSTOREPD
,
23727 IX86_BUILTIN_MASKSTOREPS
,
23728 IX86_BUILTIN_MASKLOADPD256
,
23729 IX86_BUILTIN_MASKLOADPS256
,
23730 IX86_BUILTIN_MASKSTOREPD256
,
23731 IX86_BUILTIN_MASKSTOREPS256
,
23732 IX86_BUILTIN_MOVSHDUP256
,
23733 IX86_BUILTIN_MOVSLDUP256
,
23734 IX86_BUILTIN_MOVDDUP256
,
23736 IX86_BUILTIN_SQRTPD256
,
23737 IX86_BUILTIN_SQRTPS256
,
23738 IX86_BUILTIN_SQRTPS_NR256
,
23739 IX86_BUILTIN_RSQRTPS256
,
23740 IX86_BUILTIN_RSQRTPS_NR256
,
23742 IX86_BUILTIN_RCPPS256
,
23744 IX86_BUILTIN_ROUNDPD256
,
23745 IX86_BUILTIN_ROUNDPS256
,
23747 IX86_BUILTIN_FLOORPD256
,
23748 IX86_BUILTIN_CEILPD256
,
23749 IX86_BUILTIN_TRUNCPD256
,
23750 IX86_BUILTIN_RINTPD256
,
23751 IX86_BUILTIN_FLOORPS256
,
23752 IX86_BUILTIN_CEILPS256
,
23753 IX86_BUILTIN_TRUNCPS256
,
23754 IX86_BUILTIN_RINTPS256
,
23756 IX86_BUILTIN_UNPCKHPD256
,
23757 IX86_BUILTIN_UNPCKLPD256
,
23758 IX86_BUILTIN_UNPCKHPS256
,
23759 IX86_BUILTIN_UNPCKLPS256
,
23761 IX86_BUILTIN_SI256_SI
,
23762 IX86_BUILTIN_PS256_PS
,
23763 IX86_BUILTIN_PD256_PD
,
23764 IX86_BUILTIN_SI_SI256
,
23765 IX86_BUILTIN_PS_PS256
,
23766 IX86_BUILTIN_PD_PD256
,
23768 IX86_BUILTIN_VTESTZPD
,
23769 IX86_BUILTIN_VTESTCPD
,
23770 IX86_BUILTIN_VTESTNZCPD
,
23771 IX86_BUILTIN_VTESTZPS
,
23772 IX86_BUILTIN_VTESTCPS
,
23773 IX86_BUILTIN_VTESTNZCPS
,
23774 IX86_BUILTIN_VTESTZPD256
,
23775 IX86_BUILTIN_VTESTCPD256
,
23776 IX86_BUILTIN_VTESTNZCPD256
,
23777 IX86_BUILTIN_VTESTZPS256
,
23778 IX86_BUILTIN_VTESTCPS256
,
23779 IX86_BUILTIN_VTESTNZCPS256
,
23780 IX86_BUILTIN_PTESTZ256
,
23781 IX86_BUILTIN_PTESTC256
,
23782 IX86_BUILTIN_PTESTNZC256
,
23784 IX86_BUILTIN_MOVMSKPD256
,
23785 IX86_BUILTIN_MOVMSKPS256
,
23787 /* TFmode support builtins. */
23789 IX86_BUILTIN_HUGE_VALQ
,
23790 IX86_BUILTIN_FABSQ
,
23791 IX86_BUILTIN_COPYSIGNQ
,
23793 /* Vectorizer support builtins. */
23794 IX86_BUILTIN_CPYSGNPS
,
23795 IX86_BUILTIN_CPYSGNPD
,
23796 IX86_BUILTIN_CPYSGNPS256
,
23797 IX86_BUILTIN_CPYSGNPD256
,
23799 IX86_BUILTIN_CVTUDQ2PS
,
23801 IX86_BUILTIN_VEC_PERM_V2DF
,
23802 IX86_BUILTIN_VEC_PERM_V4SF
,
23803 IX86_BUILTIN_VEC_PERM_V2DI
,
23804 IX86_BUILTIN_VEC_PERM_V4SI
,
23805 IX86_BUILTIN_VEC_PERM_V8HI
,
23806 IX86_BUILTIN_VEC_PERM_V16QI
,
23807 IX86_BUILTIN_VEC_PERM_V2DI_U
,
23808 IX86_BUILTIN_VEC_PERM_V4SI_U
,
23809 IX86_BUILTIN_VEC_PERM_V8HI_U
,
23810 IX86_BUILTIN_VEC_PERM_V16QI_U
,
23811 IX86_BUILTIN_VEC_PERM_V4DF
,
23812 IX86_BUILTIN_VEC_PERM_V8SF
,
23814 /* FMA4 and XOP instructions. */
23815 IX86_BUILTIN_VFMADDSS
,
23816 IX86_BUILTIN_VFMADDSD
,
23817 IX86_BUILTIN_VFMADDPS
,
23818 IX86_BUILTIN_VFMADDPD
,
23819 IX86_BUILTIN_VFMADDPS256
,
23820 IX86_BUILTIN_VFMADDPD256
,
23821 IX86_BUILTIN_VFMADDSUBPS
,
23822 IX86_BUILTIN_VFMADDSUBPD
,
23823 IX86_BUILTIN_VFMADDSUBPS256
,
23824 IX86_BUILTIN_VFMADDSUBPD256
,
23826 IX86_BUILTIN_VPCMOV
,
23827 IX86_BUILTIN_VPCMOV_V2DI
,
23828 IX86_BUILTIN_VPCMOV_V4SI
,
23829 IX86_BUILTIN_VPCMOV_V8HI
,
23830 IX86_BUILTIN_VPCMOV_V16QI
,
23831 IX86_BUILTIN_VPCMOV_V4SF
,
23832 IX86_BUILTIN_VPCMOV_V2DF
,
23833 IX86_BUILTIN_VPCMOV256
,
23834 IX86_BUILTIN_VPCMOV_V4DI256
,
23835 IX86_BUILTIN_VPCMOV_V8SI256
,
23836 IX86_BUILTIN_VPCMOV_V16HI256
,
23837 IX86_BUILTIN_VPCMOV_V32QI256
,
23838 IX86_BUILTIN_VPCMOV_V8SF256
,
23839 IX86_BUILTIN_VPCMOV_V4DF256
,
23841 IX86_BUILTIN_VPPERM
,
23843 IX86_BUILTIN_VPMACSSWW
,
23844 IX86_BUILTIN_VPMACSWW
,
23845 IX86_BUILTIN_VPMACSSWD
,
23846 IX86_BUILTIN_VPMACSWD
,
23847 IX86_BUILTIN_VPMACSSDD
,
23848 IX86_BUILTIN_VPMACSDD
,
23849 IX86_BUILTIN_VPMACSSDQL
,
23850 IX86_BUILTIN_VPMACSSDQH
,
23851 IX86_BUILTIN_VPMACSDQL
,
23852 IX86_BUILTIN_VPMACSDQH
,
23853 IX86_BUILTIN_VPMADCSSWD
,
23854 IX86_BUILTIN_VPMADCSWD
,
23856 IX86_BUILTIN_VPHADDBW
,
23857 IX86_BUILTIN_VPHADDBD
,
23858 IX86_BUILTIN_VPHADDBQ
,
23859 IX86_BUILTIN_VPHADDWD
,
23860 IX86_BUILTIN_VPHADDWQ
,
23861 IX86_BUILTIN_VPHADDDQ
,
23862 IX86_BUILTIN_VPHADDUBW
,
23863 IX86_BUILTIN_VPHADDUBD
,
23864 IX86_BUILTIN_VPHADDUBQ
,
23865 IX86_BUILTIN_VPHADDUWD
,
23866 IX86_BUILTIN_VPHADDUWQ
,
23867 IX86_BUILTIN_VPHADDUDQ
,
23868 IX86_BUILTIN_VPHSUBBW
,
23869 IX86_BUILTIN_VPHSUBWD
,
23870 IX86_BUILTIN_VPHSUBDQ
,
23872 IX86_BUILTIN_VPROTB
,
23873 IX86_BUILTIN_VPROTW
,
23874 IX86_BUILTIN_VPROTD
,
23875 IX86_BUILTIN_VPROTQ
,
23876 IX86_BUILTIN_VPROTB_IMM
,
23877 IX86_BUILTIN_VPROTW_IMM
,
23878 IX86_BUILTIN_VPROTD_IMM
,
23879 IX86_BUILTIN_VPROTQ_IMM
,
23881 IX86_BUILTIN_VPSHLB
,
23882 IX86_BUILTIN_VPSHLW
,
23883 IX86_BUILTIN_VPSHLD
,
23884 IX86_BUILTIN_VPSHLQ
,
23885 IX86_BUILTIN_VPSHAB
,
23886 IX86_BUILTIN_VPSHAW
,
23887 IX86_BUILTIN_VPSHAD
,
23888 IX86_BUILTIN_VPSHAQ
,
23890 IX86_BUILTIN_VFRCZSS
,
23891 IX86_BUILTIN_VFRCZSD
,
23892 IX86_BUILTIN_VFRCZPS
,
23893 IX86_BUILTIN_VFRCZPD
,
23894 IX86_BUILTIN_VFRCZPS256
,
23895 IX86_BUILTIN_VFRCZPD256
,
23897 IX86_BUILTIN_VPCOMEQUB
,
23898 IX86_BUILTIN_VPCOMNEUB
,
23899 IX86_BUILTIN_VPCOMLTUB
,
23900 IX86_BUILTIN_VPCOMLEUB
,
23901 IX86_BUILTIN_VPCOMGTUB
,
23902 IX86_BUILTIN_VPCOMGEUB
,
23903 IX86_BUILTIN_VPCOMFALSEUB
,
23904 IX86_BUILTIN_VPCOMTRUEUB
,
23906 IX86_BUILTIN_VPCOMEQUW
,
23907 IX86_BUILTIN_VPCOMNEUW
,
23908 IX86_BUILTIN_VPCOMLTUW
,
23909 IX86_BUILTIN_VPCOMLEUW
,
23910 IX86_BUILTIN_VPCOMGTUW
,
23911 IX86_BUILTIN_VPCOMGEUW
,
23912 IX86_BUILTIN_VPCOMFALSEUW
,
23913 IX86_BUILTIN_VPCOMTRUEUW
,
23915 IX86_BUILTIN_VPCOMEQUD
,
23916 IX86_BUILTIN_VPCOMNEUD
,
23917 IX86_BUILTIN_VPCOMLTUD
,
23918 IX86_BUILTIN_VPCOMLEUD
,
23919 IX86_BUILTIN_VPCOMGTUD
,
23920 IX86_BUILTIN_VPCOMGEUD
,
23921 IX86_BUILTIN_VPCOMFALSEUD
,
23922 IX86_BUILTIN_VPCOMTRUEUD
,
23924 IX86_BUILTIN_VPCOMEQUQ
,
23925 IX86_BUILTIN_VPCOMNEUQ
,
23926 IX86_BUILTIN_VPCOMLTUQ
,
23927 IX86_BUILTIN_VPCOMLEUQ
,
23928 IX86_BUILTIN_VPCOMGTUQ
,
23929 IX86_BUILTIN_VPCOMGEUQ
,
23930 IX86_BUILTIN_VPCOMFALSEUQ
,
23931 IX86_BUILTIN_VPCOMTRUEUQ
,
23933 IX86_BUILTIN_VPCOMEQB
,
23934 IX86_BUILTIN_VPCOMNEB
,
23935 IX86_BUILTIN_VPCOMLTB
,
23936 IX86_BUILTIN_VPCOMLEB
,
23937 IX86_BUILTIN_VPCOMGTB
,
23938 IX86_BUILTIN_VPCOMGEB
,
23939 IX86_BUILTIN_VPCOMFALSEB
,
23940 IX86_BUILTIN_VPCOMTRUEB
,
23942 IX86_BUILTIN_VPCOMEQW
,
23943 IX86_BUILTIN_VPCOMNEW
,
23944 IX86_BUILTIN_VPCOMLTW
,
23945 IX86_BUILTIN_VPCOMLEW
,
23946 IX86_BUILTIN_VPCOMGTW
,
23947 IX86_BUILTIN_VPCOMGEW
,
23948 IX86_BUILTIN_VPCOMFALSEW
,
23949 IX86_BUILTIN_VPCOMTRUEW
,
23951 IX86_BUILTIN_VPCOMEQD
,
23952 IX86_BUILTIN_VPCOMNED
,
23953 IX86_BUILTIN_VPCOMLTD
,
23954 IX86_BUILTIN_VPCOMLED
,
23955 IX86_BUILTIN_VPCOMGTD
,
23956 IX86_BUILTIN_VPCOMGED
,
23957 IX86_BUILTIN_VPCOMFALSED
,
23958 IX86_BUILTIN_VPCOMTRUED
,
23960 IX86_BUILTIN_VPCOMEQQ
,
23961 IX86_BUILTIN_VPCOMNEQ
,
23962 IX86_BUILTIN_VPCOMLTQ
,
23963 IX86_BUILTIN_VPCOMLEQ
,
23964 IX86_BUILTIN_VPCOMGTQ
,
23965 IX86_BUILTIN_VPCOMGEQ
,
23966 IX86_BUILTIN_VPCOMFALSEQ
,
23967 IX86_BUILTIN_VPCOMTRUEQ
,
23969 /* LWP instructions. */
23970 IX86_BUILTIN_LLWPCB
,
23971 IX86_BUILTIN_SLWPCB
,
23972 IX86_BUILTIN_LWPVAL32
,
23973 IX86_BUILTIN_LWPVAL64
,
23974 IX86_BUILTIN_LWPINS32
,
23975 IX86_BUILTIN_LWPINS64
,
23979 /* BMI instructions. */
23980 IX86_BUILTIN_BEXTR32
,
23981 IX86_BUILTIN_BEXTR64
,
23984 /* TBM instructions. */
23985 IX86_BUILTIN_BEXTRI32
,
23986 IX86_BUILTIN_BEXTRI64
,
23989 /* FSGSBASE instructions. */
23990 IX86_BUILTIN_RDFSBASE32
,
23991 IX86_BUILTIN_RDFSBASE64
,
23992 IX86_BUILTIN_RDGSBASE32
,
23993 IX86_BUILTIN_RDGSBASE64
,
23994 IX86_BUILTIN_WRFSBASE32
,
23995 IX86_BUILTIN_WRFSBASE64
,
23996 IX86_BUILTIN_WRGSBASE32
,
23997 IX86_BUILTIN_WRGSBASE64
,
23999 /* RDRND instructions. */
24000 IX86_BUILTIN_RDRAND16_STEP
,
24001 IX86_BUILTIN_RDRAND32_STEP
,
24002 IX86_BUILTIN_RDRAND64_STEP
,
24004 /* F16C instructions. */
24005 IX86_BUILTIN_CVTPH2PS
,
24006 IX86_BUILTIN_CVTPH2PS256
,
24007 IX86_BUILTIN_CVTPS2PH
,
24008 IX86_BUILTIN_CVTPS2PH256
,
24010 /* CFString built-in for darwin */
24011 IX86_BUILTIN_CFSTRING
,
24016 /* Table for the ix86 builtin decls. */
24017 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
24019 /* Table of all of the builtin functions that are possible with different ISA's
24020 but are waiting to be built until a function is declared to use that
24022 struct builtin_isa
{
24023 const char *name
; /* function name */
24024 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
24025 int isa
; /* isa_flags this builtin is defined for */
24026 bool const_p
; /* true if the declaration is constant */
24027 bool set_and_not_built_p
;
24030 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
24033 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
24034 of which isa_flags to use in the ix86_builtins_isa array. Stores the
24035 function decl in the ix86_builtins array. Returns the function decl or
24036 NULL_TREE, if the builtin was not added.
24038 If the front end has a special hook for builtin functions, delay adding
24039 builtin functions that aren't in the current ISA until the ISA is changed
24040 with function specific optimization. Doing so, can save about 300K for the
24041 default compiler. When the builtin is expanded, check at that time whether
24044 If the front end doesn't have a special hook, record all builtins, even if
24045 it isn't an instruction set in the current ISA in case the user uses
24046 function specific options for a different ISA, so that we don't get scope
24047 errors if a builtin is added in the middle of a function scope. */
24050 def_builtin (int mask
, const char *name
, enum ix86_builtin_func_type tcode
,
24051 enum ix86_builtins code
)
24053 tree decl
= NULL_TREE
;
24055 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
24057 ix86_builtins_isa
[(int) code
].isa
= mask
;
24059 mask
&= ~OPTION_MASK_ISA_64BIT
;
24061 || (mask
& ix86_isa_flags
) != 0
24062 || (lang_hooks
.builtin_function
24063 == lang_hooks
.builtin_function_ext_scope
))
24066 tree type
= ix86_get_builtin_func_type (tcode
);
24067 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
24069 ix86_builtins
[(int) code
] = decl
;
24070 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
24074 ix86_builtins
[(int) code
] = NULL_TREE
;
24075 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
24076 ix86_builtins_isa
[(int) code
].name
= name
;
24077 ix86_builtins_isa
[(int) code
].const_p
= false;
24078 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
24085 /* Like def_builtin, but also marks the function decl "const". */
24088 def_builtin_const (int mask
, const char *name
,
24089 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
24091 tree decl
= def_builtin (mask
, name
, tcode
, code
);
24093 TREE_READONLY (decl
) = 1;
24095 ix86_builtins_isa
[(int) code
].const_p
= true;
24100 /* Add any new builtin functions for a given ISA that may not have been
24101 declared. This saves a bit of space compared to adding all of the
24102 declarations to the tree, even if we didn't use them. */
24105 ix86_add_new_builtins (int isa
)
24109 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
24111 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
24112 && ix86_builtins_isa
[i
].set_and_not_built_p
)
24116 /* Don't define the builtin again. */
24117 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
24119 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
24120 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
24121 type
, i
, BUILT_IN_MD
, NULL
,
24124 ix86_builtins
[i
] = decl
;
24125 if (ix86_builtins_isa
[i
].const_p
)
24126 TREE_READONLY (decl
) = 1;
24131 /* Bits for builtin_description.flag. */
24133 /* Set when we don't support the comparison natively, and should
24134 swap_comparison in order to support it. */
24135 #define BUILTIN_DESC_SWAP_OPERANDS 1
24137 struct builtin_description
24139 const unsigned int mask
;
24140 const enum insn_code icode
;
24141 const char *const name
;
24142 const enum ix86_builtins code
;
24143 const enum rtx_code comparison
;
24147 static const struct builtin_description bdesc_comi
[] =
24149 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
24150 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
24151 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
24152 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
24153 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
24154 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
24155 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
24156 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
24157 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
24158 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
24159 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
24160 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
24161 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
24162 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
24163 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
24164 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
24165 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
24166 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
24167 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
24168 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
24169 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
24170 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
24171 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
24172 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
24175 static const struct builtin_description bdesc_pcmpestr
[] =
24178 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
24179 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
24180 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
24181 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
24182 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
24183 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
24184 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
24187 static const struct builtin_description bdesc_pcmpistr
[] =
24190 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
24191 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
24192 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
24193 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
24194 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
24195 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
24196 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
24199 /* Special builtins with variable number of arguments. */
24200 static const struct builtin_description bdesc_special_args
[] =
24202 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtsc
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
24203 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtscp
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
24204 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
24207 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
24210 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
24213 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
24214 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
24215 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
24217 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
24218 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
24219 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
24220 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
24222 /* SSE or 3DNow!A */
24223 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
24224 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntdi
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
24227 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
24228 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
24229 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
24230 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
24231 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
24232 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
24233 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntsi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
24234 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
24235 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
24237 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
24238 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
24241 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
24244 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
24247 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
24248 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
24251 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
24252 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
24254 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
24255 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
24256 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
24257 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
24258 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
24260 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
24261 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
24262 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
24263 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
24264 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
24265 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
24266 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
24268 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
24269 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
24270 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
24272 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
24273 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
24274 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
24275 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
24276 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
24277 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
24278 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
24279 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
24281 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
24282 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
24283 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
24284 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
24285 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
24286 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
24289 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
24290 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
24291 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
24292 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
24293 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
24294 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
24295 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
24296 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
24299 /* Builtins with variable number of arguments. */
24300 static const struct builtin_description bdesc_args
[] =
24302 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
24303 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
24304 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdpmc
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
24305 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
24306 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
24307 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
24308 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
24311 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24312 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24313 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
24314 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24315 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24316 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
24318 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24319 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24320 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24321 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24322 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24323 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24324 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24325 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24327 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24328 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24330 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
24331 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
24332 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
24333 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
24335 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24336 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24337 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
24338 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24339 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24340 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
24342 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24343 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24344 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
24345 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24346 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24347 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
24349 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
24350 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
24351 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
24353 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
24355 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
24356 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
24357 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
24358 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
24359 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
24360 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
24362 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
24363 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
24364 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
24365 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
24366 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
24367 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
24369 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
24370 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
24371 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
24372 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
24375 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
24376 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
24377 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
24378 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
24380 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24381 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
24382 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
24383 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
24384 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
24385 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
24386 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
24387 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
24388 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
24389 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
24390 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
24391 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
24392 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
24393 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
24394 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24397 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
24398 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
24399 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
24400 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
24401 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
24402 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
24405 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
24406 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
24407 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
24408 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
24409 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
24410 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
24411 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
24412 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
24413 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
24414 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
24415 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
24416 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
24418 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
24420 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24421 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24422 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24423 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24424 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24425 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24426 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24427 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24429 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
24430 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
24431 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
24432 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
24433 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
24434 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
24435 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
24436 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
24437 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
24438 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
24439 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
24440 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
24441 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
24442 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
24443 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
24444 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
24445 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
24446 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
24447 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
24448 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
24449 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
24450 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
24452 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24453 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24454 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24455 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24457 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24458 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24459 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24460 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24462 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24464 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24465 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24466 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24467 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24468 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24470 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
24471 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
24472 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
24474 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
24476 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
24477 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
24478 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
24480 /* SSE MMX or 3Dnow!A */
24481 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24482 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24483 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24485 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24486 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24487 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24488 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24490 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
24491 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
24493 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
24496 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
24498 { OPTION_MASK_ISA_SSE2
, CODE_FOR_nothing
, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DI
},
24499 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SI
},
24500 { OPTION_MASK_ISA_SSE2
, CODE_FOR_nothing
, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_V2DI
},
24501 { OPTION_MASK_ISA_SSE2
, CODE_FOR_nothing
, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_V4SI
},
24502 { OPTION_MASK_ISA_SSE2
, CODE_FOR_nothing
, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_V8HI
},
24503 { OPTION_MASK_ISA_SSE2
, CODE_FOR_nothing
, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
24504 { OPTION_MASK_ISA_SSE2
, CODE_FOR_nothing
, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U
, UNKNOWN
, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI
},
24505 { OPTION_MASK_ISA_SSE2
, CODE_FOR_nothing
, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U
, UNKNOWN
, (int) V4USI_FTYPE_V4USI_V4USI_V4USI
},
24506 { OPTION_MASK_ISA_SSE2
, CODE_FOR_nothing
, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U
, UNKNOWN
, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI
},
24507 { OPTION_MASK_ISA_SSE2
, CODE_FOR_nothing
, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U
, UNKNOWN
, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI
},
24508 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DI
},
24509 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SI
},
24511 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
24512 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
24513 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
24514 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
24515 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2ps
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
24516 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtudq2ps
, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
24518 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
24519 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
24520 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
24521 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
24522 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
24524 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
24526 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
24527 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
24528 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
24529 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
24531 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
24532 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
24533 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttps2dq
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
24535 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24536 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24537 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24538 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24539 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24540 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24541 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24542 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24544 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
24545 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
24546 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
24547 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
24548 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
24549 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
24550 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
24551 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
24552 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
24553 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
24554 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
24555 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
24556 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
24557 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
24558 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
24559 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
24560 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
24561 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
24562 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
24563 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
24565 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24566 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24567 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24568 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24570 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24571 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24572 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24573 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24575 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24577 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24578 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24579 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24581 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
24583 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24584 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24585 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24586 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24587 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24588 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24589 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24590 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24592 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24593 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24594 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24595 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24596 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24597 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24598 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24599 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24601 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24602 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
24604 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24605 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24606 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24607 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24609 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24610 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24612 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24613 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24614 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24615 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24616 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24617 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24619 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24620 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24621 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24622 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24624 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24625 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24626 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24627 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24628 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24629 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24630 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24631 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24633 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
24634 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
24635 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
24637 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24638 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
24640 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
24641 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
24643 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
24645 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
24646 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
24647 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
24648 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
24650 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
24651 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
24652 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
24653 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
24654 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
24655 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
24656 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
24658 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
24659 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
24660 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
24661 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
24662 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
24663 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
24664 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
24666 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
24667 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
24668 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
24669 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
24671 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
24672 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
24673 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
24675 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
24677 { OPTION_MASK_ISA_SSE2
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
24678 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
24680 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
24683 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
24684 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
24687 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
24688 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
24690 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24691 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24692 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24693 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24694 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24695 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24698 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
24699 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
24700 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
24701 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
24702 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
24703 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
24705 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24706 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24707 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24708 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
24709 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24710 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24711 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24712 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24713 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24714 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
24715 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24716 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24717 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
24718 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
24719 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24720 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24721 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24722 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24723 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24724 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24725 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24726 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24727 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24728 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
24731 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
24732 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
24735 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
24736 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
24737 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
24738 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
24739 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
24740 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
24741 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
24742 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
24743 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
24744 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
24746 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
24747 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
24748 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
24749 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
24750 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
24751 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
24752 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
24753 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
24754 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
24755 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
24756 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
24757 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
24758 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
24760 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
24761 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24762 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24763 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24764 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24765 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24766 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24767 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24768 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24769 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24770 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
24771 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24774 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
24775 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
24776 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
24777 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
24779 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
24780 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
24781 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
24782 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
24784 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
24785 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
24786 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
24787 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
24789 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
24790 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
24791 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
24794 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24795 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
24796 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
24797 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
24798 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
24801 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
24802 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
24803 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
24804 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24807 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
24808 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
24810 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24811 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24812 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24813 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24816 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
24819 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24820 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24821 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24822 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24823 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24824 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24825 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24826 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24827 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24828 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24829 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24830 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24831 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24832 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24833 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24834 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24835 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24836 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24837 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24838 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24839 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24840 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24841 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24842 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24843 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24844 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24846 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
24847 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
24848 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
24849 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
24851 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
24852 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
24853 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
24854 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
24855 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
24856 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
24857 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
24858 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
24859 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
24860 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
24861 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
24862 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
24863 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
24864 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
24865 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
24866 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
24867 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtdq2pd256
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
24868 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtdq2ps256
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
24869 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
24870 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
24871 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
24872 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvttpd2dq256
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
24873 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
24874 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvttps2dq256
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
24875 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
24876 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
24877 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
24878 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
24879 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
24880 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
24881 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
24882 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
24883 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
24884 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
24886 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
24887 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
24888 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
24890 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
24891 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
24892 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
24893 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
24894 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
24896 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
24898 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
24899 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
24901 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
24902 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
24903 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
24904 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
24906 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
24907 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
24908 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
24909 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
24911 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24912 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24913 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24914 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24916 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
24917 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
24918 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
24919 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
24920 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
24921 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
24923 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
24924 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
24925 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
24926 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
24927 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
24928 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
24929 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
24930 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
24931 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
24932 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
24933 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
24934 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
24935 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
24936 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
24937 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
24939 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
24940 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
24942 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24943 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24945 { OPTION_MASK_ISA_ABM
, CODE_FOR_clzhi2_abm
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
24948 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
24949 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
24950 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
24953 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
24954 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
24957 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
24958 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
24959 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
24960 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
24963 /* FMA4 and XOP. */
24964 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
24965 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
24966 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
24967 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
24968 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
24969 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
24970 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
24971 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
24972 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
24973 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
24974 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
24975 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
24976 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
24977 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
24978 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
24979 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
24980 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
24981 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
24982 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
24983 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
24984 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
24985 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
24986 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
24987 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
24988 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
24989 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
24990 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
24991 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
24992 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
24993 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
24994 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
24995 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
24996 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
24997 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
24998 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
24999 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
25000 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
25001 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
25002 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
25003 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
25004 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
25005 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
25006 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
25007 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
25008 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
25009 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
25010 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
25011 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
25012 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
25013 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
25014 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
25015 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
25017 static const struct builtin_description bdesc_multi_arg
[] =
25019 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
25020 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
25021 UNKNOWN
, (int)MULTI_ARG_3_SF
},
25022 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
25023 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
25024 UNKNOWN
, (int)MULTI_ARG_3_DF
},
25026 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
25027 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
25028 UNKNOWN
, (int)MULTI_ARG_3_SF
},
25029 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
25030 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
25031 UNKNOWN
, (int)MULTI_ARG_3_DF
},
25032 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
25033 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
25034 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
25035 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
25036 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
25037 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
25039 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
25040 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
25041 UNKNOWN
, (int)MULTI_ARG_3_SF
},
25042 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
25043 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
25044 UNKNOWN
, (int)MULTI_ARG_3_DF
},
25045 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
25046 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
25047 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
25048 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
25049 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
25050 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
25052 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
25053 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
25054 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
25055 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
25056 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
25057 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
25058 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
25060 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
25061 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
25062 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
25063 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
25064 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
25065 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
25066 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
25068 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
25070 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
25071 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
25072 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
25073 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
25074 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
25075 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
25076 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
25077 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
25078 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
25079 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
25080 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
25081 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
25083 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
25084 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
25085 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
25086 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
25087 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
25088 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
25089 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
25090 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
25091 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_ashlv2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
25092 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_ashlv4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
25093 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_ashlv8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
25094 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_ashlv16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
25095 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_lshlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
25096 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_lshlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
25097 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_lshlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
25098 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_lshlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
25100 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
25101 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
25102 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
25103 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
25104 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
25105 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
25107 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
25108 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
25109 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
25110 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
25111 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
25112 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
25113 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
25114 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
25115 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
25116 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
25117 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
25118 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
25119 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
25120 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
25121 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
25123 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
25124 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
25125 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
25126 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
25127 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
25128 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
25129 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
25131 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
25132 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
25133 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
25134 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
25135 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
25136 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
25137 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
25139 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
25140 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
25141 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
25142 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
25143 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
25144 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
25145 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
25147 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
25148 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
25149 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
25150 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
25151 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
25152 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
25153 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
25155 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
25156 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
25157 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
25158 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
25159 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
25160 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
25161 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
25163 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
25164 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
25165 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
25166 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
25167 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
25168 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
25169 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
25171 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
25172 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
25173 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
25174 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
25175 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
25176 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
25177 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
25179 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
25180 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
25181 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
25182 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
25183 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
25184 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
25185 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
25187 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
25188 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
25189 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
25190 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
25191 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
25192 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
25193 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
25194 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
25196 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
25197 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
25198 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
25199 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
25200 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
25201 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
25202 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
25203 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
25205 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
25206 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
25207 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
25208 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
25212 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
25213 in the current target ISA to allow the user to compile particular modules
25214 with different target specific options that differ from the command line
25217 ix86_init_mmx_sse_builtins (void)
25219 const struct builtin_description
* d
;
25220 enum ix86_builtin_func_type ftype
;
25223 /* Add all special builtins with variable number of operands. */
25224 for (i
= 0, d
= bdesc_special_args
;
25225 i
< ARRAY_SIZE (bdesc_special_args
);
25231 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
25232 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
25235 /* Add all builtins with variable number of operands. */
25236 for (i
= 0, d
= bdesc_args
;
25237 i
< ARRAY_SIZE (bdesc_args
);
25243 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
25244 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
25247 /* pcmpestr[im] insns. */
25248 for (i
= 0, d
= bdesc_pcmpestr
;
25249 i
< ARRAY_SIZE (bdesc_pcmpestr
);
25252 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
25253 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
25255 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
25256 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
25259 /* pcmpistr[im] insns. */
25260 for (i
= 0, d
= bdesc_pcmpistr
;
25261 i
< ARRAY_SIZE (bdesc_pcmpistr
);
25264 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
25265 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
25267 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
25268 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
25271 /* comi/ucomi insns. */
25272 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
25274 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
25275 ftype
= INT_FTYPE_V2DF_V2DF
;
25277 ftype
= INT_FTYPE_V4SF_V4SF
;
25278 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
25282 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
25283 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
25284 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
25285 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
25287 /* SSE or 3DNow!A */
25288 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
25289 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
25290 IX86_BUILTIN_MASKMOVQ
);
25293 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
25294 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
25296 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
25297 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
25298 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
25299 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
25302 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
25303 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
25304 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
25305 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
25308 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
25309 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
25310 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
25311 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
25312 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
25313 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
25314 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
25315 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
25316 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
25317 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
25318 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
25319 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
25322 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
25323 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
25326 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
25327 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
25328 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
25329 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
25330 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
25331 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
25332 IX86_BUILTIN_RDRAND64_STEP
);
25334 /* MMX access to the vec_init patterns. */
25335 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
25336 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
25338 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
25339 V4HI_FTYPE_HI_HI_HI_HI
,
25340 IX86_BUILTIN_VEC_INIT_V4HI
);
25342 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
25343 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
25344 IX86_BUILTIN_VEC_INIT_V8QI
);
25346 /* Access to the vec_extract patterns. */
25347 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
25348 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
25349 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
25350 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
25351 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
25352 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
25353 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
25354 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
25355 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
25356 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
25358 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
25359 "__builtin_ia32_vec_ext_v4hi",
25360 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
25362 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
25363 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
25365 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
25366 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
25368 /* Access to the vec_set patterns. */
25369 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
25370 "__builtin_ia32_vec_set_v2di",
25371 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
25373 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
25374 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
25376 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
25377 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
25379 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
25380 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
25382 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
25383 "__builtin_ia32_vec_set_v4hi",
25384 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
25386 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
25387 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
25389 /* Add FMA4 multi-arg argument instructions */
25390 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
25395 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
25396 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
25400 /* Internal method for ix86_init_builtins. */
25403 ix86_init_builtins_va_builtins_abi (void)
25405 tree ms_va_ref
, sysv_va_ref
;
25406 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
25407 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
25408 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
25409 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
25413 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
25414 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
25415 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
25417 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
25420 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
25421 fnvoid_va_start_ms
=
25422 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
25423 fnvoid_va_end_sysv
=
25424 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
25425 fnvoid_va_start_sysv
=
25426 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
25428 fnvoid_va_copy_ms
=
25429 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
25431 fnvoid_va_copy_sysv
=
25432 build_function_type_list (void_type_node
, sysv_va_ref
,
25433 sysv_va_ref
, NULL_TREE
);
25435 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
25436 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
25437 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
25438 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
25439 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
25440 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
25441 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
25442 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
25443 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
25444 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
25445 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
25446 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
25450 ix86_init_builtin_types (void)
25452 tree float128_type_node
, float80_type_node
;
25454 /* The __float80 type. */
25455 float80_type_node
= long_double_type_node
;
25456 if (TYPE_MODE (float80_type_node
) != XFmode
)
25458 /* The __float80 type. */
25459 float80_type_node
= make_node (REAL_TYPE
);
25461 TYPE_PRECISION (float80_type_node
) = 80;
25462 layout_type (float80_type_node
);
25464 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
25466 /* The __float128 type. */
25467 float128_type_node
= make_node (REAL_TYPE
);
25468 TYPE_PRECISION (float128_type_node
) = 128;
25469 layout_type (float128_type_node
);
25470 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
25472 /* This macro is built by i386-builtin-types.awk. */
25473 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
25477 ix86_init_builtins (void)
25481 ix86_init_builtin_types ();
25483 /* TFmode support builtins. */
25484 def_builtin_const (0, "__builtin_infq",
25485 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
25486 def_builtin_const (0, "__builtin_huge_valq",
25487 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
25489 /* We will expand them to normal call if SSE2 isn't available since
25490 they are used by libgcc. */
25491 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
25492 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
25493 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
25494 TREE_READONLY (t
) = 1;
25495 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
25497 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
25498 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
25499 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
25500 TREE_READONLY (t
) = 1;
25501 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
25503 ix86_init_mmx_sse_builtins ();
25506 ix86_init_builtins_va_builtins_abi ();
25508 #ifdef SUBTARGET_INIT_BUILTINS
25509 SUBTARGET_INIT_BUILTINS
;
25513 /* Return the ix86 builtin for CODE. */
25516 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
25518 if (code
>= IX86_BUILTIN_MAX
)
25519 return error_mark_node
;
25521 return ix86_builtins
[code
];
25524 /* Errors in the source file can cause expand_expr to return const0_rtx
25525 where we expect a vector. To avoid crashing, use one of the vector
25526 clear instructions. */
25528 safe_vector_operand (rtx x
, enum machine_mode mode
)
25530 if (x
== const0_rtx
)
25531 x
= CONST0_RTX (mode
);
25535 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
25538 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
25541 tree arg0
= CALL_EXPR_ARG (exp
, 0);
25542 tree arg1
= CALL_EXPR_ARG (exp
, 1);
25543 rtx op0
= expand_normal (arg0
);
25544 rtx op1
= expand_normal (arg1
);
25545 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
25546 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
25547 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
25549 if (VECTOR_MODE_P (mode0
))
25550 op0
= safe_vector_operand (op0
, mode0
);
25551 if (VECTOR_MODE_P (mode1
))
25552 op1
= safe_vector_operand (op1
, mode1
);
25554 if (optimize
|| !target
25555 || GET_MODE (target
) != tmode
25556 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
25557 target
= gen_reg_rtx (tmode
);
25559 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
25561 rtx x
= gen_reg_rtx (V4SImode
);
25562 emit_insn (gen_sse2_loadd (x
, op1
));
25563 op1
= gen_lowpart (TImode
, x
);
25566 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
25567 op0
= copy_to_mode_reg (mode0
, op0
);
25568 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
25569 op1
= copy_to_mode_reg (mode1
, op1
);
25571 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25580 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
25583 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
25584 enum ix86_builtin_func_type m_type
,
25585 enum rtx_code sub_code
)
25590 bool comparison_p
= false;
25592 bool last_arg_constant
= false;
25593 int num_memory
= 0;
25596 enum machine_mode mode
;
25599 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
25603 case MULTI_ARG_4_DF2_DI_I
:
25604 case MULTI_ARG_4_DF2_DI_I1
:
25605 case MULTI_ARG_4_SF2_SI_I
:
25606 case MULTI_ARG_4_SF2_SI_I1
:
25608 last_arg_constant
= true;
25611 case MULTI_ARG_3_SF
:
25612 case MULTI_ARG_3_DF
:
25613 case MULTI_ARG_3_SF2
:
25614 case MULTI_ARG_3_DF2
:
25615 case MULTI_ARG_3_DI
:
25616 case MULTI_ARG_3_SI
:
25617 case MULTI_ARG_3_SI_DI
:
25618 case MULTI_ARG_3_HI
:
25619 case MULTI_ARG_3_HI_SI
:
25620 case MULTI_ARG_3_QI
:
25621 case MULTI_ARG_3_DI2
:
25622 case MULTI_ARG_3_SI2
:
25623 case MULTI_ARG_3_HI2
:
25624 case MULTI_ARG_3_QI2
:
25628 case MULTI_ARG_2_SF
:
25629 case MULTI_ARG_2_DF
:
25630 case MULTI_ARG_2_DI
:
25631 case MULTI_ARG_2_SI
:
25632 case MULTI_ARG_2_HI
:
25633 case MULTI_ARG_2_QI
:
25637 case MULTI_ARG_2_DI_IMM
:
25638 case MULTI_ARG_2_SI_IMM
:
25639 case MULTI_ARG_2_HI_IMM
:
25640 case MULTI_ARG_2_QI_IMM
:
25642 last_arg_constant
= true;
25645 case MULTI_ARG_1_SF
:
25646 case MULTI_ARG_1_DF
:
25647 case MULTI_ARG_1_SF2
:
25648 case MULTI_ARG_1_DF2
:
25649 case MULTI_ARG_1_DI
:
25650 case MULTI_ARG_1_SI
:
25651 case MULTI_ARG_1_HI
:
25652 case MULTI_ARG_1_QI
:
25653 case MULTI_ARG_1_SI_DI
:
25654 case MULTI_ARG_1_HI_DI
:
25655 case MULTI_ARG_1_HI_SI
:
25656 case MULTI_ARG_1_QI_DI
:
25657 case MULTI_ARG_1_QI_SI
:
25658 case MULTI_ARG_1_QI_HI
:
25662 case MULTI_ARG_2_DI_CMP
:
25663 case MULTI_ARG_2_SI_CMP
:
25664 case MULTI_ARG_2_HI_CMP
:
25665 case MULTI_ARG_2_QI_CMP
:
25667 comparison_p
= true;
25670 case MULTI_ARG_2_SF_TF
:
25671 case MULTI_ARG_2_DF_TF
:
25672 case MULTI_ARG_2_DI_TF
:
25673 case MULTI_ARG_2_SI_TF
:
25674 case MULTI_ARG_2_HI_TF
:
25675 case MULTI_ARG_2_QI_TF
:
25681 gcc_unreachable ();
25684 if (optimize
|| !target
25685 || GET_MODE (target
) != tmode
25686 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
25687 target
= gen_reg_rtx (tmode
);
25689 gcc_assert (nargs
<= 4);
25691 for (i
= 0; i
< nargs
; i
++)
25693 tree arg
= CALL_EXPR_ARG (exp
, i
);
25694 rtx op
= expand_normal (arg
);
25695 int adjust
= (comparison_p
) ? 1 : 0;
25696 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
25698 if (last_arg_constant
&& i
== nargs
- 1)
25700 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
25702 enum insn_code new_icode
= icode
;
25705 case CODE_FOR_xop_vpermil2v2df3
:
25706 case CODE_FOR_xop_vpermil2v4sf3
:
25707 case CODE_FOR_xop_vpermil2v4df3
:
25708 case CODE_FOR_xop_vpermil2v8sf3
:
25709 error ("the last argument must be a 2-bit immediate");
25710 return gen_reg_rtx (tmode
);
25711 case CODE_FOR_xop_rotlv2di3
:
25712 new_icode
= CODE_FOR_rotlv2di3
;
25714 case CODE_FOR_xop_rotlv4si3
:
25715 new_icode
= CODE_FOR_rotlv4si3
;
25717 case CODE_FOR_xop_rotlv8hi3
:
25718 new_icode
= CODE_FOR_rotlv8hi3
;
25720 case CODE_FOR_xop_rotlv16qi3
:
25721 new_icode
= CODE_FOR_rotlv16qi3
;
25723 if (CONST_INT_P (op
))
25725 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
25726 op
= GEN_INT (INTVAL (op
) & mask
);
25727 gcc_checking_assert
25728 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
25732 gcc_checking_assert
25734 && insn_data
[new_icode
].operand
[0].mode
== tmode
25735 && insn_data
[new_icode
].operand
[1].mode
== tmode
25736 && insn_data
[new_icode
].operand
[2].mode
== mode
25737 && insn_data
[new_icode
].operand
[0].predicate
25738 == insn_data
[icode
].operand
[0].predicate
25739 && insn_data
[new_icode
].operand
[1].predicate
25740 == insn_data
[icode
].operand
[1].predicate
);
25746 gcc_unreachable ();
25753 if (VECTOR_MODE_P (mode
))
25754 op
= safe_vector_operand (op
, mode
);
25756 /* If we aren't optimizing, only allow one memory operand to be
25758 if (memory_operand (op
, mode
))
25761 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
25764 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
25766 op
= force_reg (mode
, op
);
25770 args
[i
].mode
= mode
;
25776 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
25781 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
25782 GEN_INT ((int)sub_code
));
25783 else if (! comparison_p
)
25784 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
25787 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
25791 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
25796 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
25800 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
25804 gcc_unreachable ();
25814 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
25815 insns with vec_merge. */
25818 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
25822 tree arg0
= CALL_EXPR_ARG (exp
, 0);
25823 rtx op1
, op0
= expand_normal (arg0
);
25824 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
25825 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
25827 if (optimize
|| !target
25828 || GET_MODE (target
) != tmode
25829 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
25830 target
= gen_reg_rtx (tmode
);
25832 if (VECTOR_MODE_P (mode0
))
25833 op0
= safe_vector_operand (op0
, mode0
);
25835 if ((optimize
&& !register_operand (op0
, mode0
))
25836 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
25837 op0
= copy_to_mode_reg (mode0
, op0
);
25840 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
25841 op1
= copy_to_mode_reg (mode0
, op1
);
25843 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25850 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
25853 ix86_expand_sse_compare (const struct builtin_description
*d
,
25854 tree exp
, rtx target
, bool swap
)
25857 tree arg0
= CALL_EXPR_ARG (exp
, 0);
25858 tree arg1
= CALL_EXPR_ARG (exp
, 1);
25859 rtx op0
= expand_normal (arg0
);
25860 rtx op1
= expand_normal (arg1
);
25862 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
25863 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
25864 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
25865 enum rtx_code comparison
= d
->comparison
;
25867 if (VECTOR_MODE_P (mode0
))
25868 op0
= safe_vector_operand (op0
, mode0
);
25869 if (VECTOR_MODE_P (mode1
))
25870 op1
= safe_vector_operand (op1
, mode1
);
25872 /* Swap operands if we have a comparison that isn't available in
25876 rtx tmp
= gen_reg_rtx (mode1
);
25877 emit_move_insn (tmp
, op1
);
25882 if (optimize
|| !target
25883 || GET_MODE (target
) != tmode
25884 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
25885 target
= gen_reg_rtx (tmode
);
25887 if ((optimize
&& !register_operand (op0
, mode0
))
25888 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
25889 op0
= copy_to_mode_reg (mode0
, op0
);
25890 if ((optimize
&& !register_operand (op1
, mode1
))
25891 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
25892 op1
= copy_to_mode_reg (mode1
, op1
);
25894 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
25895 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
25902 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
25905 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
25909 tree arg0
= CALL_EXPR_ARG (exp
, 0);
25910 tree arg1
= CALL_EXPR_ARG (exp
, 1);
25911 rtx op0
= expand_normal (arg0
);
25912 rtx op1
= expand_normal (arg1
);
25913 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
25914 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
25915 enum rtx_code comparison
= d
->comparison
;
25917 if (VECTOR_MODE_P (mode0
))
25918 op0
= safe_vector_operand (op0
, mode0
);
25919 if (VECTOR_MODE_P (mode1
))
25920 op1
= safe_vector_operand (op1
, mode1
);
25922 /* Swap operands if we have a comparison that isn't available in
25924 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
25931 target
= gen_reg_rtx (SImode
);
25932 emit_move_insn (target
, const0_rtx
);
25933 target
= gen_rtx_SUBREG (QImode
, target
, 0);
25935 if ((optimize
&& !register_operand (op0
, mode0
))
25936 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
25937 op0
= copy_to_mode_reg (mode0
, op0
);
25938 if ((optimize
&& !register_operand (op1
, mode1
))
25939 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
25940 op1
= copy_to_mode_reg (mode1
, op1
);
25942 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
25946 emit_insn (gen_rtx_SET (VOIDmode
,
25947 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
25948 gen_rtx_fmt_ee (comparison
, QImode
,
25952 return SUBREG_REG (target
);
25955 /* Subroutine of ix86_expand_args_builtin to take care of round insns. */
25958 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
25962 tree arg0
= CALL_EXPR_ARG (exp
, 0);
25963 rtx op1
, op0
= expand_normal (arg0
);
25964 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
25965 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
25967 if (optimize
|| target
== 0
25968 || GET_MODE (target
) != tmode
25969 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
25970 target
= gen_reg_rtx (tmode
);
25972 if (VECTOR_MODE_P (mode0
))
25973 op0
= safe_vector_operand (op0
, mode0
);
25975 if ((optimize
&& !register_operand (op0
, mode0
))
25976 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
25977 op0
= copy_to_mode_reg (mode0
, op0
);
25979 op1
= GEN_INT (d
->comparison
);
25981 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
25988 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
25991 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
25995 tree arg0
= CALL_EXPR_ARG (exp
, 0);
25996 tree arg1
= CALL_EXPR_ARG (exp
, 1);
25997 rtx op0
= expand_normal (arg0
);
25998 rtx op1
= expand_normal (arg1
);
25999 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
26000 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
26001 enum rtx_code comparison
= d
->comparison
;
26003 if (VECTOR_MODE_P (mode0
))
26004 op0
= safe_vector_operand (op0
, mode0
);
26005 if (VECTOR_MODE_P (mode1
))
26006 op1
= safe_vector_operand (op1
, mode1
);
26008 target
= gen_reg_rtx (SImode
);
26009 emit_move_insn (target
, const0_rtx
);
26010 target
= gen_rtx_SUBREG (QImode
, target
, 0);
26012 if ((optimize
&& !register_operand (op0
, mode0
))
26013 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
26014 op0
= copy_to_mode_reg (mode0
, op0
);
26015 if ((optimize
&& !register_operand (op1
, mode1
))
26016 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
26017 op1
= copy_to_mode_reg (mode1
, op1
);
26019 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
26023 emit_insn (gen_rtx_SET (VOIDmode
,
26024 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
26025 gen_rtx_fmt_ee (comparison
, QImode
,
26029 return SUBREG_REG (target
);
26032 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
26035 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
26036 tree exp
, rtx target
)
26039 tree arg0
= CALL_EXPR_ARG (exp
, 0);
26040 tree arg1
= CALL_EXPR_ARG (exp
, 1);
26041 tree arg2
= CALL_EXPR_ARG (exp
, 2);
26042 tree arg3
= CALL_EXPR_ARG (exp
, 3);
26043 tree arg4
= CALL_EXPR_ARG (exp
, 4);
26044 rtx scratch0
, scratch1
;
26045 rtx op0
= expand_normal (arg0
);
26046 rtx op1
= expand_normal (arg1
);
26047 rtx op2
= expand_normal (arg2
);
26048 rtx op3
= expand_normal (arg3
);
26049 rtx op4
= expand_normal (arg4
);
26050 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
26052 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
26053 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
26054 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
26055 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
26056 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
26057 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
26058 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
26060 if (VECTOR_MODE_P (modev2
))
26061 op0
= safe_vector_operand (op0
, modev2
);
26062 if (VECTOR_MODE_P (modev4
))
26063 op2
= safe_vector_operand (op2
, modev4
);
26065 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
26066 op0
= copy_to_mode_reg (modev2
, op0
);
26067 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
26068 op1
= copy_to_mode_reg (modei3
, op1
);
26069 if ((optimize
&& !register_operand (op2
, modev4
))
26070 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
26071 op2
= copy_to_mode_reg (modev4
, op2
);
26072 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
26073 op3
= copy_to_mode_reg (modei5
, op3
);
26075 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
26077 error ("the fifth argument must be an 8-bit immediate");
26081 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
26083 if (optimize
|| !target
26084 || GET_MODE (target
) != tmode0
26085 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
26086 target
= gen_reg_rtx (tmode0
);
26088 scratch1
= gen_reg_rtx (tmode1
);
26090 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
26092 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
26094 if (optimize
|| !target
26095 || GET_MODE (target
) != tmode1
26096 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
26097 target
= gen_reg_rtx (tmode1
);
26099 scratch0
= gen_reg_rtx (tmode0
);
26101 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
26105 gcc_assert (d
->flag
);
26107 scratch0
= gen_reg_rtx (tmode0
);
26108 scratch1
= gen_reg_rtx (tmode1
);
26110 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
26120 target
= gen_reg_rtx (SImode
);
26121 emit_move_insn (target
, const0_rtx
);
26122 target
= gen_rtx_SUBREG (QImode
, target
, 0);
26125 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
26126 gen_rtx_fmt_ee (EQ
, QImode
,
26127 gen_rtx_REG ((enum machine_mode
) d
->flag
,
26130 return SUBREG_REG (target
);
26137 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
26140 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
26141 tree exp
, rtx target
)
26144 tree arg0
= CALL_EXPR_ARG (exp
, 0);
26145 tree arg1
= CALL_EXPR_ARG (exp
, 1);
26146 tree arg2
= CALL_EXPR_ARG (exp
, 2);
26147 rtx scratch0
, scratch1
;
26148 rtx op0
= expand_normal (arg0
);
26149 rtx op1
= expand_normal (arg1
);
26150 rtx op2
= expand_normal (arg2
);
26151 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
26153 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
26154 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
26155 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
26156 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
26157 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
26159 if (VECTOR_MODE_P (modev2
))
26160 op0
= safe_vector_operand (op0
, modev2
);
26161 if (VECTOR_MODE_P (modev3
))
26162 op1
= safe_vector_operand (op1
, modev3
);
26164 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
26165 op0
= copy_to_mode_reg (modev2
, op0
);
26166 if ((optimize
&& !register_operand (op1
, modev3
))
26167 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
26168 op1
= copy_to_mode_reg (modev3
, op1
);
26170 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
26172 error ("the third argument must be an 8-bit immediate");
26176 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
26178 if (optimize
|| !target
26179 || GET_MODE (target
) != tmode0
26180 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
26181 target
= gen_reg_rtx (tmode0
);
26183 scratch1
= gen_reg_rtx (tmode1
);
26185 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
26187 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
26189 if (optimize
|| !target
26190 || GET_MODE (target
) != tmode1
26191 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
26192 target
= gen_reg_rtx (tmode1
);
26194 scratch0
= gen_reg_rtx (tmode0
);
26196 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
26200 gcc_assert (d
->flag
);
26202 scratch0
= gen_reg_rtx (tmode0
);
26203 scratch1
= gen_reg_rtx (tmode1
);
26205 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
26215 target
= gen_reg_rtx (SImode
);
26216 emit_move_insn (target
, const0_rtx
);
26217 target
= gen_rtx_SUBREG (QImode
, target
, 0);
26220 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
26221 gen_rtx_fmt_ee (EQ
, QImode
,
26222 gen_rtx_REG ((enum machine_mode
) d
->flag
,
26225 return SUBREG_REG (target
);
26231 /* Subroutine of ix86_expand_builtin to take care of insns with
26232 variable number of operands. */
26235 ix86_expand_args_builtin (const struct builtin_description
*d
,
26236 tree exp
, rtx target
)
26238 rtx pat
, real_target
;
26239 unsigned int i
, nargs
;
26240 unsigned int nargs_constant
= 0;
26241 int num_memory
= 0;
26245 enum machine_mode mode
;
26247 bool last_arg_count
= false;
26248 enum insn_code icode
= d
->icode
;
26249 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
26250 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
26251 enum machine_mode rmode
= VOIDmode
;
26253 enum rtx_code comparison
= d
->comparison
;
26255 switch ((enum ix86_builtin_func_type
) d
->flag
)
26257 case V2DF_FTYPE_V2DF_ROUND
:
26258 case V4DF_FTYPE_V4DF_ROUND
:
26259 case V4SF_FTYPE_V4SF_ROUND
:
26260 case V8SF_FTYPE_V8SF_ROUND
:
26261 return ix86_expand_sse_round (d
, exp
, target
);
26262 case INT_FTYPE_V8SF_V8SF_PTEST
:
26263 case INT_FTYPE_V4DI_V4DI_PTEST
:
26264 case INT_FTYPE_V4DF_V4DF_PTEST
:
26265 case INT_FTYPE_V4SF_V4SF_PTEST
:
26266 case INT_FTYPE_V2DI_V2DI_PTEST
:
26267 case INT_FTYPE_V2DF_V2DF_PTEST
:
26268 return ix86_expand_sse_ptest (d
, exp
, target
);
26269 case FLOAT128_FTYPE_FLOAT128
:
26270 case FLOAT_FTYPE_FLOAT
:
26271 case INT_FTYPE_INT
:
26272 case UINT64_FTYPE_INT
:
26273 case UINT16_FTYPE_UINT16
:
26274 case INT64_FTYPE_INT64
:
26275 case INT64_FTYPE_V4SF
:
26276 case INT64_FTYPE_V2DF
:
26277 case INT_FTYPE_V16QI
:
26278 case INT_FTYPE_V8QI
:
26279 case INT_FTYPE_V8SF
:
26280 case INT_FTYPE_V4DF
:
26281 case INT_FTYPE_V4SF
:
26282 case INT_FTYPE_V2DF
:
26283 case V16QI_FTYPE_V16QI
:
26284 case V8SI_FTYPE_V8SF
:
26285 case V8SI_FTYPE_V4SI
:
26286 case V8HI_FTYPE_V8HI
:
26287 case V8HI_FTYPE_V16QI
:
26288 case V8QI_FTYPE_V8QI
:
26289 case V8SF_FTYPE_V8SF
:
26290 case V8SF_FTYPE_V8SI
:
26291 case V8SF_FTYPE_V4SF
:
26292 case V8SF_FTYPE_V8HI
:
26293 case V4SI_FTYPE_V4SI
:
26294 case V4SI_FTYPE_V16QI
:
26295 case V4SI_FTYPE_V4SF
:
26296 case V4SI_FTYPE_V8SI
:
26297 case V4SI_FTYPE_V8HI
:
26298 case V4SI_FTYPE_V4DF
:
26299 case V4SI_FTYPE_V2DF
:
26300 case V4HI_FTYPE_V4HI
:
26301 case V4DF_FTYPE_V4DF
:
26302 case V4DF_FTYPE_V4SI
:
26303 case V4DF_FTYPE_V4SF
:
26304 case V4DF_FTYPE_V2DF
:
26305 case V4SF_FTYPE_V4SF
:
26306 case V4SF_FTYPE_V4SI
:
26307 case V4SF_FTYPE_V8SF
:
26308 case V4SF_FTYPE_V4DF
:
26309 case V4SF_FTYPE_V8HI
:
26310 case V4SF_FTYPE_V2DF
:
26311 case V2DI_FTYPE_V2DI
:
26312 case V2DI_FTYPE_V16QI
:
26313 case V2DI_FTYPE_V8HI
:
26314 case V2DI_FTYPE_V4SI
:
26315 case V2DF_FTYPE_V2DF
:
26316 case V2DF_FTYPE_V4SI
:
26317 case V2DF_FTYPE_V4DF
:
26318 case V2DF_FTYPE_V4SF
:
26319 case V2DF_FTYPE_V2SI
:
26320 case V2SI_FTYPE_V2SI
:
26321 case V2SI_FTYPE_V4SF
:
26322 case V2SI_FTYPE_V2SF
:
26323 case V2SI_FTYPE_V2DF
:
26324 case V2SF_FTYPE_V2SF
:
26325 case V2SF_FTYPE_V2SI
:
26328 case V4SF_FTYPE_V4SF_VEC_MERGE
:
26329 case V2DF_FTYPE_V2DF_VEC_MERGE
:
26330 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
26331 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
26332 case V16QI_FTYPE_V16QI_V16QI
:
26333 case V16QI_FTYPE_V8HI_V8HI
:
26334 case V8QI_FTYPE_V8QI_V8QI
:
26335 case V8QI_FTYPE_V4HI_V4HI
:
26336 case V8HI_FTYPE_V8HI_V8HI
:
26337 case V8HI_FTYPE_V16QI_V16QI
:
26338 case V8HI_FTYPE_V4SI_V4SI
:
26339 case V8SF_FTYPE_V8SF_V8SF
:
26340 case V8SF_FTYPE_V8SF_V8SI
:
26341 case V4SI_FTYPE_V4SI_V4SI
:
26342 case V4SI_FTYPE_V8HI_V8HI
:
26343 case V4SI_FTYPE_V4SF_V4SF
:
26344 case V4SI_FTYPE_V2DF_V2DF
:
26345 case V4HI_FTYPE_V4HI_V4HI
:
26346 case V4HI_FTYPE_V8QI_V8QI
:
26347 case V4HI_FTYPE_V2SI_V2SI
:
26348 case V4DF_FTYPE_V4DF_V4DF
:
26349 case V4DF_FTYPE_V4DF_V4DI
:
26350 case V4SF_FTYPE_V4SF_V4SF
:
26351 case V4SF_FTYPE_V4SF_V4SI
:
26352 case V4SF_FTYPE_V4SF_V2SI
:
26353 case V4SF_FTYPE_V4SF_V2DF
:
26354 case V4SF_FTYPE_V4SF_DI
:
26355 case V4SF_FTYPE_V4SF_SI
:
26356 case V2DI_FTYPE_V2DI_V2DI
:
26357 case V2DI_FTYPE_V16QI_V16QI
:
26358 case V2DI_FTYPE_V4SI_V4SI
:
26359 case V2DI_FTYPE_V2DI_V16QI
:
26360 case V2DI_FTYPE_V2DF_V2DF
:
26361 case V2SI_FTYPE_V2SI_V2SI
:
26362 case V2SI_FTYPE_V4HI_V4HI
:
26363 case V2SI_FTYPE_V2SF_V2SF
:
26364 case V2DF_FTYPE_V2DF_V2DF
:
26365 case V2DF_FTYPE_V2DF_V4SF
:
26366 case V2DF_FTYPE_V2DF_V2DI
:
26367 case V2DF_FTYPE_V2DF_DI
:
26368 case V2DF_FTYPE_V2DF_SI
:
26369 case V2SF_FTYPE_V2SF_V2SF
:
26370 case V1DI_FTYPE_V1DI_V1DI
:
26371 case V1DI_FTYPE_V8QI_V8QI
:
26372 case V1DI_FTYPE_V2SI_V2SI
:
26373 if (comparison
== UNKNOWN
)
26374 return ix86_expand_binop_builtin (icode
, exp
, target
);
26377 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
26378 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
26379 gcc_assert (comparison
!= UNKNOWN
);
26383 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
26384 case V8HI_FTYPE_V8HI_SI_COUNT
:
26385 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
26386 case V4SI_FTYPE_V4SI_SI_COUNT
:
26387 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
26388 case V4HI_FTYPE_V4HI_SI_COUNT
:
26389 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
26390 case V2DI_FTYPE_V2DI_SI_COUNT
:
26391 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
26392 case V2SI_FTYPE_V2SI_SI_COUNT
:
26393 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
26394 case V1DI_FTYPE_V1DI_SI_COUNT
:
26396 last_arg_count
= true;
26398 case UINT64_FTYPE_UINT64_UINT64
:
26399 case UINT_FTYPE_UINT_UINT
:
26400 case UINT_FTYPE_UINT_USHORT
:
26401 case UINT_FTYPE_UINT_UCHAR
:
26402 case UINT16_FTYPE_UINT16_INT
:
26403 case UINT8_FTYPE_UINT8_INT
:
26406 case V2DI_FTYPE_V2DI_INT_CONVERT
:
26409 nargs_constant
= 1;
26411 case V8HI_FTYPE_V8HI_INT
:
26412 case V8HI_FTYPE_V8SF_INT
:
26413 case V8HI_FTYPE_V4SF_INT
:
26414 case V8SF_FTYPE_V8SF_INT
:
26415 case V4SI_FTYPE_V4SI_INT
:
26416 case V4SI_FTYPE_V8SI_INT
:
26417 case V4HI_FTYPE_V4HI_INT
:
26418 case V4DF_FTYPE_V4DF_INT
:
26419 case V4SF_FTYPE_V4SF_INT
:
26420 case V4SF_FTYPE_V8SF_INT
:
26421 case V2DI_FTYPE_V2DI_INT
:
26422 case V2DF_FTYPE_V2DF_INT
:
26423 case V2DF_FTYPE_V4DF_INT
:
26425 nargs_constant
= 1;
26427 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
26428 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
26429 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
26430 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
26431 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
26434 case V16QI_FTYPE_V16QI_V16QI_INT
:
26435 case V8HI_FTYPE_V8HI_V8HI_INT
:
26436 case V8SI_FTYPE_V8SI_V8SI_INT
:
26437 case V8SI_FTYPE_V8SI_V4SI_INT
:
26438 case V8SF_FTYPE_V8SF_V8SF_INT
:
26439 case V8SF_FTYPE_V8SF_V4SF_INT
:
26440 case V4SI_FTYPE_V4SI_V4SI_INT
:
26441 case V4DF_FTYPE_V4DF_V4DF_INT
:
26442 case V4DF_FTYPE_V4DF_V2DF_INT
:
26443 case V4SF_FTYPE_V4SF_V4SF_INT
:
26444 case V2DI_FTYPE_V2DI_V2DI_INT
:
26445 case V2DF_FTYPE_V2DF_V2DF_INT
:
26447 nargs_constant
= 1;
26449 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
26452 nargs_constant
= 1;
26454 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
26457 nargs_constant
= 1;
26459 case V2DI_FTYPE_V2DI_UINT_UINT
:
26461 nargs_constant
= 2;
26463 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
26464 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
26465 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
26466 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
26468 nargs_constant
= 1;
26470 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
26472 nargs_constant
= 2;
26475 gcc_unreachable ();
26478 gcc_assert (nargs
<= ARRAY_SIZE (args
));
26480 if (comparison
!= UNKNOWN
)
26482 gcc_assert (nargs
== 2);
26483 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
26486 if (rmode
== VOIDmode
|| rmode
== tmode
)
26490 || GET_MODE (target
) != tmode
26491 || !insn_p
->operand
[0].predicate (target
, tmode
))
26492 target
= gen_reg_rtx (tmode
);
26493 real_target
= target
;
26497 target
= gen_reg_rtx (rmode
);
26498 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
26501 for (i
= 0; i
< nargs
; i
++)
26503 tree arg
= CALL_EXPR_ARG (exp
, i
);
26504 rtx op
= expand_normal (arg
);
26505 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
26506 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
26508 if (last_arg_count
&& (i
+ 1) == nargs
)
26510 /* SIMD shift insns take either an 8-bit immediate or
26511 register as count. But builtin functions take int as
26512 count. If count doesn't match, we put it in register. */
26515 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
26516 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
26517 op
= copy_to_reg (op
);
26520 else if ((nargs
- i
) <= nargs_constant
)
26525 case CODE_FOR_sse4_1_roundpd
:
26526 case CODE_FOR_sse4_1_roundps
:
26527 case CODE_FOR_sse4_1_roundsd
:
26528 case CODE_FOR_sse4_1_roundss
:
26529 case CODE_FOR_sse4_1_blendps
:
26530 case CODE_FOR_avx_blendpd256
:
26531 case CODE_FOR_avx_vpermilv4df
:
26532 case CODE_FOR_avx_roundpd256
:
26533 case CODE_FOR_avx_roundps256
:
26534 error ("the last argument must be a 4-bit immediate");
26537 case CODE_FOR_sse4_1_blendpd
:
26538 case CODE_FOR_avx_vpermilv2df
:
26539 case CODE_FOR_xop_vpermil2v2df3
:
26540 case CODE_FOR_xop_vpermil2v4sf3
:
26541 case CODE_FOR_xop_vpermil2v4df3
:
26542 case CODE_FOR_xop_vpermil2v8sf3
:
26543 error ("the last argument must be a 2-bit immediate");
26546 case CODE_FOR_avx_vextractf128v4df
:
26547 case CODE_FOR_avx_vextractf128v8sf
:
26548 case CODE_FOR_avx_vextractf128v8si
:
26549 case CODE_FOR_avx_vinsertf128v4df
:
26550 case CODE_FOR_avx_vinsertf128v8sf
:
26551 case CODE_FOR_avx_vinsertf128v8si
:
26552 error ("the last argument must be a 1-bit immediate");
26555 case CODE_FOR_avx_vmcmpv2df3
:
26556 case CODE_FOR_avx_vmcmpv4sf3
:
26557 case CODE_FOR_avx_cmpv2df3
:
26558 case CODE_FOR_avx_cmpv4sf3
:
26559 case CODE_FOR_avx_cmpv4df3
:
26560 case CODE_FOR_avx_cmpv8sf3
:
26561 error ("the last argument must be a 5-bit immediate");
26565 switch (nargs_constant
)
26568 if ((nargs
- i
) == nargs_constant
)
26570 error ("the next to last argument must be an 8-bit immediate");
26574 error ("the last argument must be an 8-bit immediate");
26577 gcc_unreachable ();
26584 if (VECTOR_MODE_P (mode
))
26585 op
= safe_vector_operand (op
, mode
);
26587 /* If we aren't optimizing, only allow one memory operand to
26589 if (memory_operand (op
, mode
))
26592 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
26594 if (optimize
|| !match
|| num_memory
> 1)
26595 op
= copy_to_mode_reg (mode
, op
);
26599 op
= copy_to_reg (op
);
26600 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
26605 args
[i
].mode
= mode
;
26611 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
26614 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
26617 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
26621 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
26622 args
[2].op
, args
[3].op
);
26625 gcc_unreachable ();
26635 /* Subroutine of ix86_expand_builtin to take care of special insns
26636 with variable number of operands. */
26639 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
26640 tree exp
, rtx target
)
26644 unsigned int i
, nargs
, arg_adjust
, memory
;
26648 enum machine_mode mode
;
26650 enum insn_code icode
= d
->icode
;
26651 bool last_arg_constant
= false;
26652 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
26653 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
26654 enum { load
, store
} klass
;
26656 switch ((enum ix86_builtin_func_type
) d
->flag
)
26658 case VOID_FTYPE_VOID
:
26659 if (icode
== CODE_FOR_avx_vzeroupper
)
26660 target
= GEN_INT (vzeroupper_intrinsic
);
26661 emit_insn (GEN_FCN (icode
) (target
));
26663 case VOID_FTYPE_UINT64
:
26664 case VOID_FTYPE_UNSIGNED
:
26670 case UINT64_FTYPE_VOID
:
26671 case UNSIGNED_FTYPE_VOID
:
26676 case UINT64_FTYPE_PUNSIGNED
:
26677 case V2DI_FTYPE_PV2DI
:
26678 case V32QI_FTYPE_PCCHAR
:
26679 case V16QI_FTYPE_PCCHAR
:
26680 case V8SF_FTYPE_PCV4SF
:
26681 case V8SF_FTYPE_PCFLOAT
:
26682 case V4SF_FTYPE_PCFLOAT
:
26683 case V4DF_FTYPE_PCV2DF
:
26684 case V4DF_FTYPE_PCDOUBLE
:
26685 case V2DF_FTYPE_PCDOUBLE
:
26686 case VOID_FTYPE_PVOID
:
26691 case VOID_FTYPE_PV2SF_V4SF
:
26692 case VOID_FTYPE_PV4DI_V4DI
:
26693 case VOID_FTYPE_PV2DI_V2DI
:
26694 case VOID_FTYPE_PCHAR_V32QI
:
26695 case VOID_FTYPE_PCHAR_V16QI
:
26696 case VOID_FTYPE_PFLOAT_V8SF
:
26697 case VOID_FTYPE_PFLOAT_V4SF
:
26698 case VOID_FTYPE_PDOUBLE_V4DF
:
26699 case VOID_FTYPE_PDOUBLE_V2DF
:
26700 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
26701 case VOID_FTYPE_PINT_INT
:
26704 /* Reserve memory operand for target. */
26705 memory
= ARRAY_SIZE (args
);
26707 case V4SF_FTYPE_V4SF_PCV2SF
:
26708 case V2DF_FTYPE_V2DF_PCDOUBLE
:
26713 case V8SF_FTYPE_PCV8SF_V8SI
:
26714 case V4DF_FTYPE_PCV4DF_V4DI
:
26715 case V4SF_FTYPE_PCV4SF_V4SI
:
26716 case V2DF_FTYPE_PCV2DF_V2DI
:
26721 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
26722 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
26723 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
26724 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
26727 /* Reserve memory operand for target. */
26728 memory
= ARRAY_SIZE (args
);
26730 case VOID_FTYPE_UINT_UINT_UINT
:
26731 case VOID_FTYPE_UINT64_UINT_UINT
:
26732 case UCHAR_FTYPE_UINT_UINT_UINT
:
26733 case UCHAR_FTYPE_UINT64_UINT_UINT
:
26736 memory
= ARRAY_SIZE (args
);
26737 last_arg_constant
= true;
26740 gcc_unreachable ();
26743 gcc_assert (nargs
<= ARRAY_SIZE (args
));
26745 if (klass
== store
)
26747 arg
= CALL_EXPR_ARG (exp
, 0);
26748 op
= expand_normal (arg
);
26749 gcc_assert (target
== 0);
26752 if (GET_MODE (op
) != Pmode
)
26753 op
= convert_to_mode (Pmode
, op
, 1);
26754 target
= gen_rtx_MEM (tmode
, force_reg (Pmode
, op
));
26757 target
= force_reg (tmode
, op
);
26765 || GET_MODE (target
) != tmode
26766 || !insn_p
->operand
[0].predicate (target
, tmode
))
26767 target
= gen_reg_rtx (tmode
);
26770 for (i
= 0; i
< nargs
; i
++)
26772 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
26775 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
26776 op
= expand_normal (arg
);
26777 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
26779 if (last_arg_constant
&& (i
+ 1) == nargs
)
26783 if (icode
== CODE_FOR_lwp_lwpvalsi3
26784 || icode
== CODE_FOR_lwp_lwpinssi3
26785 || icode
== CODE_FOR_lwp_lwpvaldi3
26786 || icode
== CODE_FOR_lwp_lwpinsdi3
)
26787 error ("the last argument must be a 32-bit immediate");
26789 error ("the last argument must be an 8-bit immediate");
26797 /* This must be the memory operand. */
26798 if (GET_MODE (op
) != Pmode
)
26799 op
= convert_to_mode (Pmode
, op
, 1);
26800 op
= gen_rtx_MEM (mode
, force_reg (Pmode
, op
));
26801 gcc_assert (GET_MODE (op
) == mode
26802 || GET_MODE (op
) == VOIDmode
);
26806 /* This must be register. */
26807 if (VECTOR_MODE_P (mode
))
26808 op
= safe_vector_operand (op
, mode
);
26810 gcc_assert (GET_MODE (op
) == mode
26811 || GET_MODE (op
) == VOIDmode
);
26812 op
= copy_to_mode_reg (mode
, op
);
26817 args
[i
].mode
= mode
;
26823 pat
= GEN_FCN (icode
) (target
);
26826 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
26829 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
26832 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
26835 gcc_unreachable ();
26841 return klass
== store
? 0 : target
;
26844 /* Return the integer constant in ARG. Constrain it to be in the range
26845 of the subparts of VEC_TYPE; issue an error if not. */
26848 get_element_number (tree vec_type
, tree arg
)
26850 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
26852 if (!host_integerp (arg
, 1)
26853 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
26855 error ("selector must be an integer constant in the range 0..%wi", max
);
26862 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26863 ix86_expand_vector_init. We DO have language-level syntax for this, in
26864 the form of (type){ init-list }. Except that since we can't place emms
26865 instructions from inside the compiler, we can't allow the use of MMX
26866 registers unless the user explicitly asks for it. So we do *not* define
26867 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
26868 we have builtins invoked by mmintrin.h that gives us license to emit
26869 these sorts of instructions. */
26872 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
26874 enum machine_mode tmode
= TYPE_MODE (type
);
26875 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
26876 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
26877 rtvec v
= rtvec_alloc (n_elt
);
26879 gcc_assert (VECTOR_MODE_P (tmode
));
26880 gcc_assert (call_expr_nargs (exp
) == n_elt
);
26882 for (i
= 0; i
< n_elt
; ++i
)
26884 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
26885 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
26888 if (!target
|| !register_operand (target
, tmode
))
26889 target
= gen_reg_rtx (tmode
);
26891 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
26895 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26896 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
26897 had a language-level syntax for referencing vector elements. */
26900 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
26902 enum machine_mode tmode
, mode0
;
26907 arg0
= CALL_EXPR_ARG (exp
, 0);
26908 arg1
= CALL_EXPR_ARG (exp
, 1);
26910 op0
= expand_normal (arg0
);
26911 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
26913 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
26914 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
26915 gcc_assert (VECTOR_MODE_P (mode0
));
26917 op0
= force_reg (mode0
, op0
);
26919 if (optimize
|| !target
|| !register_operand (target
, tmode
))
26920 target
= gen_reg_rtx (tmode
);
26922 ix86_expand_vector_extract (true, target
, op0
, elt
);
26927 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26928 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
26929 a language-level syntax for referencing vector elements. */
26932 ix86_expand_vec_set_builtin (tree exp
)
26934 enum machine_mode tmode
, mode1
;
26935 tree arg0
, arg1
, arg2
;
26937 rtx op0
, op1
, target
;
26939 arg0
= CALL_EXPR_ARG (exp
, 0);
26940 arg1
= CALL_EXPR_ARG (exp
, 1);
26941 arg2
= CALL_EXPR_ARG (exp
, 2);
26943 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
26944 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
26945 gcc_assert (VECTOR_MODE_P (tmode
));
26947 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
26948 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
26949 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
26951 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
26952 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
26954 op0
= force_reg (tmode
, op0
);
26955 op1
= force_reg (mode1
, op1
);
26957 /* OP0 is the source of these builtin functions and shouldn't be
26958 modified. Create a copy, use it and return it as target. */
26959 target
= gen_reg_rtx (tmode
);
26960 emit_move_insn (target
, op0
);
26961 ix86_expand_vector_set (true, target
, op1
, elt
);
26966 /* Expand an expression EXP that calls a built-in function,
26967 with result going to TARGET if that's convenient
26968 (and in mode MODE if that's convenient).
26969 SUBTARGET may be used as the target for computing one of EXP's operands.
26970 IGNORE is nonzero if the value is to be ignored. */
26973 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
26974 enum machine_mode mode ATTRIBUTE_UNUSED
,
26975 int ignore ATTRIBUTE_UNUSED
)
26977 const struct builtin_description
*d
;
26979 enum insn_code icode
;
26980 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
26981 tree arg0
, arg1
, arg2
;
26982 rtx op0
, op1
, op2
, pat
;
26983 enum machine_mode mode0
, mode1
, mode2
;
26984 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
26986 /* Determine whether the builtin function is available under the current ISA.
26987 Originally the builtin was not created if it wasn't applicable to the
26988 current ISA based on the command line switches. With function specific
26989 options, we need to check in the context of the function making the call
26990 whether it is supported. */
26991 if (ix86_builtins_isa
[fcode
].isa
26992 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
26994 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
26995 NULL
, (enum fpmath_unit
) 0, false);
26998 error ("%qE needs unknown isa option", fndecl
);
27001 gcc_assert (opts
!= NULL
);
27002 error ("%qE needs isa option %s", fndecl
, opts
);
27010 case IX86_BUILTIN_MASKMOVQ
:
27011 case IX86_BUILTIN_MASKMOVDQU
:
27012 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
27013 ? CODE_FOR_mmx_maskmovq
27014 : CODE_FOR_sse2_maskmovdqu
);
27015 /* Note the arg order is different from the operand order. */
27016 arg1
= CALL_EXPR_ARG (exp
, 0);
27017 arg2
= CALL_EXPR_ARG (exp
, 1);
27018 arg0
= CALL_EXPR_ARG (exp
, 2);
27019 op0
= expand_normal (arg0
);
27020 op1
= expand_normal (arg1
);
27021 op2
= expand_normal (arg2
);
27022 mode0
= insn_data
[icode
].operand
[0].mode
;
27023 mode1
= insn_data
[icode
].operand
[1].mode
;
27024 mode2
= insn_data
[icode
].operand
[2].mode
;
27026 if (GET_MODE (op0
) != Pmode
)
27027 op0
= convert_to_mode (Pmode
, op0
, 1);
27028 op0
= gen_rtx_MEM (mode1
, force_reg (Pmode
, op0
));
27030 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
27031 op0
= copy_to_mode_reg (mode0
, op0
);
27032 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
27033 op1
= copy_to_mode_reg (mode1
, op1
);
27034 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
27035 op2
= copy_to_mode_reg (mode2
, op2
);
27036 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
27042 case IX86_BUILTIN_LDMXCSR
:
27043 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
27044 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
27045 emit_move_insn (target
, op0
);
27046 emit_insn (gen_sse_ldmxcsr (target
));
27049 case IX86_BUILTIN_STMXCSR
:
27050 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
27051 emit_insn (gen_sse_stmxcsr (target
));
27052 return copy_to_mode_reg (SImode
, target
);
27054 case IX86_BUILTIN_CLFLUSH
:
27055 arg0
= CALL_EXPR_ARG (exp
, 0);
27056 op0
= expand_normal (arg0
);
27057 icode
= CODE_FOR_sse2_clflush
;
27058 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
27060 if (GET_MODE (op0
) != Pmode
)
27061 op0
= convert_to_mode (Pmode
, op0
, 1);
27062 op0
= force_reg (Pmode
, op0
);
27065 emit_insn (gen_sse2_clflush (op0
));
27068 case IX86_BUILTIN_MONITOR
:
27069 arg0
= CALL_EXPR_ARG (exp
, 0);
27070 arg1
= CALL_EXPR_ARG (exp
, 1);
27071 arg2
= CALL_EXPR_ARG (exp
, 2);
27072 op0
= expand_normal (arg0
);
27073 op1
= expand_normal (arg1
);
27074 op2
= expand_normal (arg2
);
27077 if (GET_MODE (op0
) != Pmode
)
27078 op0
= convert_to_mode (Pmode
, op0
, 1);
27079 op0
= force_reg (Pmode
, op0
);
27082 op1
= copy_to_mode_reg (SImode
, op1
);
27084 op2
= copy_to_mode_reg (SImode
, op2
);
27085 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
27088 case IX86_BUILTIN_MWAIT
:
27089 arg0
= CALL_EXPR_ARG (exp
, 0);
27090 arg1
= CALL_EXPR_ARG (exp
, 1);
27091 op0
= expand_normal (arg0
);
27092 op1
= expand_normal (arg1
);
27094 op0
= copy_to_mode_reg (SImode
, op0
);
27096 op1
= copy_to_mode_reg (SImode
, op1
);
27097 emit_insn (gen_sse3_mwait (op0
, op1
));
27100 case IX86_BUILTIN_VEC_INIT_V2SI
:
27101 case IX86_BUILTIN_VEC_INIT_V4HI
:
27102 case IX86_BUILTIN_VEC_INIT_V8QI
:
27103 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
27105 case IX86_BUILTIN_VEC_EXT_V2DF
:
27106 case IX86_BUILTIN_VEC_EXT_V2DI
:
27107 case IX86_BUILTIN_VEC_EXT_V4SF
:
27108 case IX86_BUILTIN_VEC_EXT_V4SI
:
27109 case IX86_BUILTIN_VEC_EXT_V8HI
:
27110 case IX86_BUILTIN_VEC_EXT_V2SI
:
27111 case IX86_BUILTIN_VEC_EXT_V4HI
:
27112 case IX86_BUILTIN_VEC_EXT_V16QI
:
27113 return ix86_expand_vec_ext_builtin (exp
, target
);
27115 case IX86_BUILTIN_VEC_SET_V2DI
:
27116 case IX86_BUILTIN_VEC_SET_V4SF
:
27117 case IX86_BUILTIN_VEC_SET_V4SI
:
27118 case IX86_BUILTIN_VEC_SET_V8HI
:
27119 case IX86_BUILTIN_VEC_SET_V4HI
:
27120 case IX86_BUILTIN_VEC_SET_V16QI
:
27121 return ix86_expand_vec_set_builtin (exp
);
27123 case IX86_BUILTIN_VEC_PERM_V2DF
:
27124 case IX86_BUILTIN_VEC_PERM_V4SF
:
27125 case IX86_BUILTIN_VEC_PERM_V2DI
:
27126 case IX86_BUILTIN_VEC_PERM_V4SI
:
27127 case IX86_BUILTIN_VEC_PERM_V8HI
:
27128 case IX86_BUILTIN_VEC_PERM_V16QI
:
27129 case IX86_BUILTIN_VEC_PERM_V2DI_U
:
27130 case IX86_BUILTIN_VEC_PERM_V4SI_U
:
27131 case IX86_BUILTIN_VEC_PERM_V8HI_U
:
27132 case IX86_BUILTIN_VEC_PERM_V16QI_U
:
27133 case IX86_BUILTIN_VEC_PERM_V4DF
:
27134 case IX86_BUILTIN_VEC_PERM_V8SF
:
27135 return ix86_expand_vec_perm_builtin (exp
);
27137 case IX86_BUILTIN_INFQ
:
27138 case IX86_BUILTIN_HUGE_VALQ
:
27140 REAL_VALUE_TYPE inf
;
27144 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
27146 tmp
= validize_mem (force_const_mem (mode
, tmp
));
27149 target
= gen_reg_rtx (mode
);
27151 emit_move_insn (target
, tmp
);
27155 case IX86_BUILTIN_LLWPCB
:
27156 arg0
= CALL_EXPR_ARG (exp
, 0);
27157 op0
= expand_normal (arg0
);
27158 icode
= CODE_FOR_lwp_llwpcb
;
27159 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
27161 if (GET_MODE (op0
) != Pmode
)
27162 op0
= convert_to_mode (Pmode
, op0
, 1);
27163 op0
= force_reg (Pmode
, op0
);
27165 emit_insn (gen_lwp_llwpcb (op0
));
27168 case IX86_BUILTIN_SLWPCB
:
27169 icode
= CODE_FOR_lwp_slwpcb
;
27171 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
27172 target
= gen_reg_rtx (Pmode
);
27173 emit_insn (gen_lwp_slwpcb (target
));
27176 case IX86_BUILTIN_BEXTRI32
:
27177 case IX86_BUILTIN_BEXTRI64
:
27178 arg0
= CALL_EXPR_ARG (exp
, 0);
27179 arg1
= CALL_EXPR_ARG (exp
, 1);
27180 op0
= expand_normal (arg0
);
27181 op1
= expand_normal (arg1
);
27182 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
27183 ? CODE_FOR_tbm_bextri_si
27184 : CODE_FOR_tbm_bextri_di
);
27185 if (!CONST_INT_P (op1
))
27187 error ("last argument must be an immediate");
27192 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
27193 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
27194 op1
= GEN_INT (length
);
27195 op2
= GEN_INT (lsb_index
);
27196 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
27202 case IX86_BUILTIN_RDRAND16_STEP
:
27203 icode
= CODE_FOR_rdrandhi_1
;
27207 case IX86_BUILTIN_RDRAND32_STEP
:
27208 icode
= CODE_FOR_rdrandsi_1
;
27212 case IX86_BUILTIN_RDRAND64_STEP
:
27213 icode
= CODE_FOR_rdranddi_1
;
27217 op0
= gen_reg_rtx (mode0
);
27218 emit_insn (GEN_FCN (icode
) (op0
));
27220 arg0
= CALL_EXPR_ARG (exp
, 0);
27221 op1
= expand_normal (arg0
);
27222 if (!address_operand (op1
, VOIDmode
))
27224 op1
= convert_memory_address (Pmode
, op1
);
27225 op1
= copy_addr_to_reg (op1
);
27227 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
27229 op1
= gen_reg_rtx (SImode
);
27230 emit_move_insn (op1
, CONST1_RTX (SImode
));
27232 /* Emit SImode conditional move. */
27233 if (mode0
== HImode
)
27235 op2
= gen_reg_rtx (SImode
);
27236 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
27238 else if (mode0
== SImode
)
27241 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
27244 target
= gen_reg_rtx (SImode
);
27246 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
27248 emit_insn (gen_rtx_SET (VOIDmode
, target
,
27249 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
27256 for (i
= 0, d
= bdesc_special_args
;
27257 i
< ARRAY_SIZE (bdesc_special_args
);
27259 if (d
->code
== fcode
)
27260 return ix86_expand_special_args_builtin (d
, exp
, target
);
27262 for (i
= 0, d
= bdesc_args
;
27263 i
< ARRAY_SIZE (bdesc_args
);
27265 if (d
->code
== fcode
)
27268 case IX86_BUILTIN_FABSQ
:
27269 case IX86_BUILTIN_COPYSIGNQ
:
27271 /* Emit a normal call if SSE2 isn't available. */
27272 return expand_call (exp
, target
, ignore
);
27274 return ix86_expand_args_builtin (d
, exp
, target
);
27277 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
27278 if (d
->code
== fcode
)
27279 return ix86_expand_sse_comi (d
, exp
, target
);
27281 for (i
= 0, d
= bdesc_pcmpestr
;
27282 i
< ARRAY_SIZE (bdesc_pcmpestr
);
27284 if (d
->code
== fcode
)
27285 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
27287 for (i
= 0, d
= bdesc_pcmpistr
;
27288 i
< ARRAY_SIZE (bdesc_pcmpistr
);
27290 if (d
->code
== fcode
)
27291 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
27293 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
27294 if (d
->code
== fcode
)
27295 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
27296 (enum ix86_builtin_func_type
)
27297 d
->flag
, d
->comparison
);
27299 gcc_unreachable ();
27302 /* Returns a function decl for a vectorized version of the builtin function
27303 with builtin function code FN and the result vector type TYPE, or NULL_TREE
27304 if it is not available. */
27307 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
27310 enum machine_mode in_mode
, out_mode
;
27312 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
27314 if (TREE_CODE (type_out
) != VECTOR_TYPE
27315 || TREE_CODE (type_in
) != VECTOR_TYPE
27316 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
27319 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
27320 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
27321 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
27322 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
27326 case BUILT_IN_SQRT
:
27327 if (out_mode
== DFmode
&& in_mode
== DFmode
)
27329 if (out_n
== 2 && in_n
== 2)
27330 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
27331 else if (out_n
== 4 && in_n
== 4)
27332 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
27336 case BUILT_IN_SQRTF
:
27337 if (out_mode
== SFmode
&& in_mode
== SFmode
)
27339 if (out_n
== 4 && in_n
== 4)
27340 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
27341 else if (out_n
== 8 && in_n
== 8)
27342 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
27346 case BUILT_IN_LRINT
:
27347 if (out_mode
== SImode
&& out_n
== 4
27348 && in_mode
== DFmode
&& in_n
== 2)
27349 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
27352 case BUILT_IN_LRINTF
:
27353 if (out_mode
== SImode
&& in_mode
== SFmode
)
27355 if (out_n
== 4 && in_n
== 4)
27356 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
27357 else if (out_n
== 8 && in_n
== 8)
27358 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
27362 case BUILT_IN_COPYSIGN
:
27363 if (out_mode
== DFmode
&& in_mode
== DFmode
)
27365 if (out_n
== 2 && in_n
== 2)
27366 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
27367 else if (out_n
== 4 && in_n
== 4)
27368 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
27372 case BUILT_IN_COPYSIGNF
:
27373 if (out_mode
== SFmode
&& in_mode
== SFmode
)
27375 if (out_n
== 4 && in_n
== 4)
27376 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
27377 else if (out_n
== 8 && in_n
== 8)
27378 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
27382 case BUILT_IN_FLOOR
:
27383 /* The round insn does not trap on denormals. */
27384 if (flag_trapping_math
|| !TARGET_ROUND
)
27387 if (out_mode
== DFmode
&& in_mode
== DFmode
)
27389 if (out_n
== 2 && in_n
== 2)
27390 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
27391 else if (out_n
== 4 && in_n
== 4)
27392 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
27396 case BUILT_IN_FLOORF
:
27397 /* The round insn does not trap on denormals. */
27398 if (flag_trapping_math
|| !TARGET_ROUND
)
27401 if (out_mode
== SFmode
&& in_mode
== SFmode
)
27403 if (out_n
== 4 && in_n
== 4)
27404 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
27405 else if (out_n
== 8 && in_n
== 8)
27406 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
27410 case BUILT_IN_CEIL
:
27411 /* The round insn does not trap on denormals. */
27412 if (flag_trapping_math
|| !TARGET_ROUND
)
27415 if (out_mode
== DFmode
&& in_mode
== DFmode
)
27417 if (out_n
== 2 && in_n
== 2)
27418 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
27419 else if (out_n
== 4 && in_n
== 4)
27420 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
27424 case BUILT_IN_CEILF
:
27425 /* The round insn does not trap on denormals. */
27426 if (flag_trapping_math
|| !TARGET_ROUND
)
27429 if (out_mode
== SFmode
&& in_mode
== SFmode
)
27431 if (out_n
== 4 && in_n
== 4)
27432 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
27433 else if (out_n
== 8 && in_n
== 8)
27434 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
27438 case BUILT_IN_TRUNC
:
27439 /* The round insn does not trap on denormals. */
27440 if (flag_trapping_math
|| !TARGET_ROUND
)
27443 if (out_mode
== DFmode
&& in_mode
== DFmode
)
27445 if (out_n
== 2 && in_n
== 2)
27446 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
27447 else if (out_n
== 4 && in_n
== 4)
27448 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
27452 case BUILT_IN_TRUNCF
:
27453 /* The round insn does not trap on denormals. */
27454 if (flag_trapping_math
|| !TARGET_ROUND
)
27457 if (out_mode
== SFmode
&& in_mode
== SFmode
)
27459 if (out_n
== 4 && in_n
== 4)
27460 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
27461 else if (out_n
== 8 && in_n
== 8)
27462 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
27466 case BUILT_IN_RINT
:
27467 /* The round insn does not trap on denormals. */
27468 if (flag_trapping_math
|| !TARGET_ROUND
)
27471 if (out_mode
== DFmode
&& in_mode
== DFmode
)
27473 if (out_n
== 2 && in_n
== 2)
27474 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
27475 else if (out_n
== 4 && in_n
== 4)
27476 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
27480 case BUILT_IN_RINTF
:
27481 /* The round insn does not trap on denormals. */
27482 if (flag_trapping_math
|| !TARGET_ROUND
)
27485 if (out_mode
== SFmode
&& in_mode
== SFmode
)
27487 if (out_n
== 4 && in_n
== 4)
27488 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
27489 else if (out_n
== 8 && in_n
== 8)
27490 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
27495 if (out_mode
== DFmode
&& in_mode
== DFmode
)
27497 if (out_n
== 2 && in_n
== 2)
27498 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
27499 if (out_n
== 4 && in_n
== 4)
27500 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
27504 case BUILT_IN_FMAF
:
27505 if (out_mode
== SFmode
&& in_mode
== SFmode
)
27507 if (out_n
== 4 && in_n
== 4)
27508 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
27509 if (out_n
== 8 && in_n
== 8)
27510 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
27518 /* Dispatch to a handler for a vectorization library. */
27519 if (ix86_veclib_handler
)
27520 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
27526 /* Handler for an SVML-style interface to
27527 a library with vectorized intrinsics. */
27530 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
27533 tree fntype
, new_fndecl
, args
;
27536 enum machine_mode el_mode
, in_mode
;
27539 /* The SVML is suitable for unsafe math only. */
27540 if (!flag_unsafe_math_optimizations
)
27543 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
27544 n
= TYPE_VECTOR_SUBPARTS (type_out
);
27545 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
27546 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
27547 if (el_mode
!= in_mode
27555 case BUILT_IN_LOG10
:
27557 case BUILT_IN_TANH
:
27559 case BUILT_IN_ATAN
:
27560 case BUILT_IN_ATAN2
:
27561 case BUILT_IN_ATANH
:
27562 case BUILT_IN_CBRT
:
27563 case BUILT_IN_SINH
:
27565 case BUILT_IN_ASINH
:
27566 case BUILT_IN_ASIN
:
27567 case BUILT_IN_COSH
:
27569 case BUILT_IN_ACOSH
:
27570 case BUILT_IN_ACOS
:
27571 if (el_mode
!= DFmode
|| n
!= 2)
27575 case BUILT_IN_EXPF
:
27576 case BUILT_IN_LOGF
:
27577 case BUILT_IN_LOG10F
:
27578 case BUILT_IN_POWF
:
27579 case BUILT_IN_TANHF
:
27580 case BUILT_IN_TANF
:
27581 case BUILT_IN_ATANF
:
27582 case BUILT_IN_ATAN2F
:
27583 case BUILT_IN_ATANHF
:
27584 case BUILT_IN_CBRTF
:
27585 case BUILT_IN_SINHF
:
27586 case BUILT_IN_SINF
:
27587 case BUILT_IN_ASINHF
:
27588 case BUILT_IN_ASINF
:
27589 case BUILT_IN_COSHF
:
27590 case BUILT_IN_COSF
:
27591 case BUILT_IN_ACOSHF
:
27592 case BUILT_IN_ACOSF
:
27593 if (el_mode
!= SFmode
|| n
!= 4)
27601 bname
= IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls
[fn
]));
27603 if (fn
== BUILT_IN_LOGF
)
27604 strcpy (name
, "vmlsLn4");
27605 else if (fn
== BUILT_IN_LOG
)
27606 strcpy (name
, "vmldLn2");
27609 sprintf (name
, "vmls%s", bname
+10);
27610 name
[strlen (name
)-1] = '4';
27613 sprintf (name
, "vmld%s2", bname
+10);
27615 /* Convert to uppercase. */
27619 for (args
= DECL_ARGUMENTS (implicit_built_in_decls
[fn
]); args
;
27620 args
= TREE_CHAIN (args
))
27624 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
27626 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
27628 /* Build a function declaration for the vectorized function. */
27629 new_fndecl
= build_decl (BUILTINS_LOCATION
,
27630 FUNCTION_DECL
, get_identifier (name
), fntype
);
27631 TREE_PUBLIC (new_fndecl
) = 1;
27632 DECL_EXTERNAL (new_fndecl
) = 1;
27633 DECL_IS_NOVOPS (new_fndecl
) = 1;
27634 TREE_READONLY (new_fndecl
) = 1;
27639 /* Handler for an ACML-style interface to
27640 a library with vectorized intrinsics. */
27643 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
27645 char name
[20] = "__vr.._";
27646 tree fntype
, new_fndecl
, args
;
27649 enum machine_mode el_mode
, in_mode
;
27652 /* The ACML is 64bits only and suitable for unsafe math only as
27653 it does not correctly support parts of IEEE with the required
27654 precision such as denormals. */
27656 || !flag_unsafe_math_optimizations
)
27659 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
27660 n
= TYPE_VECTOR_SUBPARTS (type_out
);
27661 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
27662 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
27663 if (el_mode
!= in_mode
27673 case BUILT_IN_LOG2
:
27674 case BUILT_IN_LOG10
:
27677 if (el_mode
!= DFmode
27682 case BUILT_IN_SINF
:
27683 case BUILT_IN_COSF
:
27684 case BUILT_IN_EXPF
:
27685 case BUILT_IN_POWF
:
27686 case BUILT_IN_LOGF
:
27687 case BUILT_IN_LOG2F
:
27688 case BUILT_IN_LOG10F
:
27691 if (el_mode
!= SFmode
27700 bname
= IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls
[fn
]));
27701 sprintf (name
+ 7, "%s", bname
+10);
27704 for (args
= DECL_ARGUMENTS (implicit_built_in_decls
[fn
]); args
;
27705 args
= TREE_CHAIN (args
))
27709 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
27711 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
27713 /* Build a function declaration for the vectorized function. */
27714 new_fndecl
= build_decl (BUILTINS_LOCATION
,
27715 FUNCTION_DECL
, get_identifier (name
), fntype
);
27716 TREE_PUBLIC (new_fndecl
) = 1;
27717 DECL_EXTERNAL (new_fndecl
) = 1;
27718 DECL_IS_NOVOPS (new_fndecl
) = 1;
27719 TREE_READONLY (new_fndecl
) = 1;
27725 /* Returns a decl of a function that implements conversion of an integer vector
27726 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
27727 are the types involved when converting according to CODE.
27728 Return NULL_TREE if it is not available. */
27731 ix86_vectorize_builtin_conversion (unsigned int code
,
27732 tree dest_type
, tree src_type
)
27740 switch (TYPE_MODE (src_type
))
27743 switch (TYPE_MODE (dest_type
))
27746 return (TYPE_UNSIGNED (src_type
)
27747 ? ix86_builtins
[IX86_BUILTIN_CVTUDQ2PS
]
27748 : ix86_builtins
[IX86_BUILTIN_CVTDQ2PS
]);
27750 return (TYPE_UNSIGNED (src_type
)
27752 : ix86_builtins
[IX86_BUILTIN_CVTDQ2PD256
]);
27758 switch (TYPE_MODE (dest_type
))
27761 return (TYPE_UNSIGNED (src_type
)
27763 : ix86_builtins
[IX86_BUILTIN_CVTDQ2PS256
]);
27772 case FIX_TRUNC_EXPR
:
27773 switch (TYPE_MODE (dest_type
))
27776 switch (TYPE_MODE (src_type
))
27779 return (TYPE_UNSIGNED (dest_type
)
27781 : ix86_builtins
[IX86_BUILTIN_CVTTPS2DQ
]);
27783 return (TYPE_UNSIGNED (dest_type
)
27785 : ix86_builtins
[IX86_BUILTIN_CVTTPD2DQ256
]);
27792 switch (TYPE_MODE (src_type
))
27795 return (TYPE_UNSIGNED (dest_type
)
27797 : ix86_builtins
[IX86_BUILTIN_CVTTPS2DQ256
]);
27814 /* Returns a code for a target-specific builtin that implements
27815 reciprocal of the function, or NULL_TREE if not available. */
27818 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
27819 bool sqrt ATTRIBUTE_UNUSED
)
27821 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
27822 && flag_finite_math_only
&& !flag_trapping_math
27823 && flag_unsafe_math_optimizations
))
27827 /* Machine dependent builtins. */
27830 /* Vectorized version of sqrt to rsqrt conversion. */
27831 case IX86_BUILTIN_SQRTPS_NR
:
27832 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
27834 case IX86_BUILTIN_SQRTPS_NR256
:
27835 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
27841 /* Normal builtins. */
27844 /* Sqrt to rsqrt conversion. */
27845 case BUILT_IN_SQRTF
:
27846 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
27853 /* Helper for avx_vpermilps256_operand et al. This is also used by
27854 the expansion functions to turn the parallel back into a mask.
27855 The return value is 0 for no match and the imm8+1 for a match. */
27858 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
27860 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
27862 unsigned char ipar
[8];
27864 if (XVECLEN (par
, 0) != (int) nelt
)
27867 /* Validate that all of the elements are constants, and not totally
27868 out of range. Copy the data into an integral array to make the
27869 subsequent checks easier. */
27870 for (i
= 0; i
< nelt
; ++i
)
27872 rtx er
= XVECEXP (par
, 0, i
);
27873 unsigned HOST_WIDE_INT ei
;
27875 if (!CONST_INT_P (er
))
27886 /* In the 256-bit DFmode case, we can only move elements within
27888 for (i
= 0; i
< 2; ++i
)
27892 mask
|= ipar
[i
] << i
;
27894 for (i
= 2; i
< 4; ++i
)
27898 mask
|= (ipar
[i
] - 2) << i
;
27903 /* In the 256-bit SFmode case, we have full freedom of movement
27904 within the low 128-bit lane, but the high 128-bit lane must
27905 mirror the exact same pattern. */
27906 for (i
= 0; i
< 4; ++i
)
27907 if (ipar
[i
] + 4 != ipar
[i
+ 4])
27914 /* In the 128-bit case, we've full freedom in the placement of
27915 the elements from the source operand. */
27916 for (i
= 0; i
< nelt
; ++i
)
27917 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
27921 gcc_unreachable ();
27924 /* Make sure success has a non-zero value by adding one. */
27928 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
27929 the expansion functions to turn the parallel back into a mask.
27930 The return value is 0 for no match and the imm8+1 for a match. */
27933 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
27935 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
27937 unsigned char ipar
[8];
27939 if (XVECLEN (par
, 0) != (int) nelt
)
27942 /* Validate that all of the elements are constants, and not totally
27943 out of range. Copy the data into an integral array to make the
27944 subsequent checks easier. */
27945 for (i
= 0; i
< nelt
; ++i
)
27947 rtx er
= XVECEXP (par
, 0, i
);
27948 unsigned HOST_WIDE_INT ei
;
27950 if (!CONST_INT_P (er
))
27953 if (ei
>= 2 * nelt
)
27958 /* Validate that the halves of the permute are halves. */
27959 for (i
= 0; i
< nelt2
- 1; ++i
)
27960 if (ipar
[i
] + 1 != ipar
[i
+ 1])
27962 for (i
= nelt2
; i
< nelt
- 1; ++i
)
27963 if (ipar
[i
] + 1 != ipar
[i
+ 1])
27966 /* Reconstruct the mask. */
27967 for (i
= 0; i
< 2; ++i
)
27969 unsigned e
= ipar
[i
* nelt2
];
27973 mask
|= e
<< (i
* 4);
27976 /* Make sure success has a non-zero value by adding one. */
27981 /* Store OPERAND to the memory after reload is completed. This means
27982 that we can't easily use assign_stack_local. */
27984 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
27988 gcc_assert (reload_completed
);
27989 if (ix86_using_red_zone ())
27991 result
= gen_rtx_MEM (mode
,
27992 gen_rtx_PLUS (Pmode
,
27994 GEN_INT (-RED_ZONE_SIZE
)));
27995 emit_move_insn (result
, operand
);
27997 else if (TARGET_64BIT
)
28003 operand
= gen_lowpart (DImode
, operand
);
28007 gen_rtx_SET (VOIDmode
,
28008 gen_rtx_MEM (DImode
,
28009 gen_rtx_PRE_DEC (DImode
,
28010 stack_pointer_rtx
)),
28014 gcc_unreachable ();
28016 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
28025 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
28027 gen_rtx_SET (VOIDmode
,
28028 gen_rtx_MEM (SImode
,
28029 gen_rtx_PRE_DEC (Pmode
,
28030 stack_pointer_rtx
)),
28033 gen_rtx_SET (VOIDmode
,
28034 gen_rtx_MEM (SImode
,
28035 gen_rtx_PRE_DEC (Pmode
,
28036 stack_pointer_rtx
)),
28041 /* Store HImodes as SImodes. */
28042 operand
= gen_lowpart (SImode
, operand
);
28046 gen_rtx_SET (VOIDmode
,
28047 gen_rtx_MEM (GET_MODE (operand
),
28048 gen_rtx_PRE_DEC (SImode
,
28049 stack_pointer_rtx
)),
28053 gcc_unreachable ();
28055 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
28060 /* Free operand from the memory. */
28062 ix86_free_from_memory (enum machine_mode mode
)
28064 if (!ix86_using_red_zone ())
28068 if (mode
== DImode
|| TARGET_64BIT
)
28072 /* Use LEA to deallocate stack space. In peephole2 it will be converted
28073 to pop or add instruction if registers are available. */
28074 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
28075 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
28080 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
28082 Put float CONST_DOUBLE in the constant pool instead of fp regs.
28083 QImode must go into class Q_REGS.
28084 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
28085 movdf to do mem-to-mem moves through integer regs. */
28088 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
28090 enum machine_mode mode
= GET_MODE (x
);
28092 /* We're only allowed to return a subclass of CLASS. Many of the
28093 following checks fail for NO_REGS, so eliminate that early. */
28094 if (regclass
== NO_REGS
)
28097 /* All classes can load zeros. */
28098 if (x
== CONST0_RTX (mode
))
28101 /* Force constants into memory if we are loading a (nonzero) constant into
28102 an MMX or SSE register. This is because there are no MMX/SSE instructions
28103 to load from a constant. */
28105 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
28108 /* Prefer SSE regs only, if we can use them for math. */
28109 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
28110 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
28112 /* Floating-point constants need more complex checks. */
28113 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
28115 /* General regs can load everything. */
28116 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
28119 /* Floats can load 0 and 1 plus some others. Note that we eliminated
28120 zero above. We only want to wind up preferring 80387 registers if
28121 we plan on doing computation with them. */
28123 && standard_80387_constant_p (x
) > 0)
28125 /* Limit class to non-sse. */
28126 if (regclass
== FLOAT_SSE_REGS
)
28128 if (regclass
== FP_TOP_SSE_REGS
)
28130 if (regclass
== FP_SECOND_SSE_REGS
)
28131 return FP_SECOND_REG
;
28132 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
28139 /* Generally when we see PLUS here, it's the function invariant
28140 (plus soft-fp const_int). Which can only be computed into general
28142 if (GET_CODE (x
) == PLUS
)
28143 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
28145 /* QImode constants are easy to load, but non-constant QImode data
28146 must go into Q_REGS. */
28147 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
28149 if (reg_class_subset_p (regclass
, Q_REGS
))
28151 if (reg_class_subset_p (Q_REGS
, regclass
))
28159 /* Discourage putting floating-point values in SSE registers unless
28160 SSE math is being used, and likewise for the 387 registers. */
28162 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
28164 enum machine_mode mode
= GET_MODE (x
);
28166 /* Restrict the output reload class to the register bank that we are doing
28167 math on. If we would like not to return a subset of CLASS, reject this
28168 alternative: if reload cannot do this, it will still use its choice. */
28169 mode
= GET_MODE (x
);
28170 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
28171 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
28173 if (X87_FLOAT_MODE_P (mode
))
28175 if (regclass
== FP_TOP_SSE_REGS
)
28177 else if (regclass
== FP_SECOND_SSE_REGS
)
28178 return FP_SECOND_REG
;
28180 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
28187 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
28188 enum machine_mode mode
,
28189 secondary_reload_info
*sri ATTRIBUTE_UNUSED
)
28191 /* QImode spills from non-QI registers require
28192 intermediate register on 32bit targets. */
28194 && !in_p
&& mode
== QImode
28195 && (rclass
== GENERAL_REGS
28196 || rclass
== LEGACY_REGS
28197 || rclass
== INDEX_REGS
))
28206 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
28207 regno
= true_regnum (x
);
28209 /* Return Q_REGS if the operand is in memory. */
28214 /* This condition handles corner case where an expression involving
28215 pointers gets vectorized. We're trying to use the address of a
28216 stack slot as a vector initializer.
28218 (set (reg:V2DI 74 [ vect_cst_.2 ])
28219 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
28221 Eventually frame gets turned into sp+offset like this:
28223 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28224 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28225 (const_int 392 [0x188]))))
28227 That later gets turned into:
28229 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28230 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28231 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
28233 We'll have the following reload recorded:
28235 Reload 0: reload_in (DI) =
28236 (plus:DI (reg/f:DI 7 sp)
28237 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
28238 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28239 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
28240 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
28241 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28242 reload_reg_rtx: (reg:V2DI 22 xmm1)
28244 Which isn't going to work since SSE instructions can't handle scalar
28245 additions. Returning GENERAL_REGS forces the addition into integer
28246 register and reload can handle subsequent reloads without problems. */
28248 if (in_p
&& GET_CODE (x
) == PLUS
28249 && SSE_CLASS_P (rclass
)
28250 && SCALAR_INT_MODE_P (mode
))
28251 return GENERAL_REGS
;
28256 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
28259 ix86_class_likely_spilled_p (reg_class_t rclass
)
28270 case SSE_FIRST_REG
:
28272 case FP_SECOND_REG
:
28282 /* If we are copying between general and FP registers, we need a memory
28283 location. The same is true for SSE and MMX registers.
28285 To optimize register_move_cost performance, allow inline variant.
28287 The macro can't work reliably when one of the CLASSES is class containing
28288 registers from multiple units (SSE, MMX, integer). We avoid this by never
28289 combining those units in single alternative in the machine description.
28290 Ensure that this constraint holds to avoid unexpected surprises.
28292 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
28293 enforce these sanity checks. */
28296 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
28297 enum machine_mode mode
, int strict
)
28299 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
28300 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
28301 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
28302 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
28303 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
28304 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
28306 gcc_assert (!strict
);
28310 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
28313 /* ??? This is a lie. We do have moves between mmx/general, and for
28314 mmx/sse2. But by saying we need secondary memory we discourage the
28315 register allocator from using the mmx registers unless needed. */
28316 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
28319 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
28321 /* SSE1 doesn't have any direct moves from other classes. */
28325 /* If the target says that inter-unit moves are more expensive
28326 than moving through memory, then don't generate them. */
28327 if (!TARGET_INTER_UNIT_MOVES
)
28330 /* Between SSE and general, we have moves no larger than word size. */
28331 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
28339 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
28340 enum machine_mode mode
, int strict
)
28342 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
28345 /* Implement the TARGET_CLASS_MAX_NREGS hook.
28347 On the 80386, this is the size of MODE in words,
28348 except in the FP regs, where a single reg is always enough. */
28350 static unsigned char
28351 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
28353 if (MAYBE_INTEGER_CLASS_P (rclass
))
28355 if (mode
== XFmode
)
28356 return (TARGET_64BIT
? 2 : 3);
28357 else if (mode
== XCmode
)
28358 return (TARGET_64BIT
? 4 : 6);
28360 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
28364 if (COMPLEX_MODE_P (mode
))
28371 /* Return true if the registers in CLASS cannot represent the change from
28372 modes FROM to TO. */
28375 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
28376 enum reg_class regclass
)
28381 /* x87 registers can't do subreg at all, as all values are reformatted
28382 to extended precision. */
28383 if (MAYBE_FLOAT_CLASS_P (regclass
))
28386 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
28388 /* Vector registers do not support QI or HImode loads. If we don't
28389 disallow a change to these modes, reload will assume it's ok to
28390 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
28391 the vec_dupv4hi pattern. */
28392 if (GET_MODE_SIZE (from
) < 4)
28395 /* Vector registers do not support subreg with nonzero offsets, which
28396 are otherwise valid for integer registers. Since we can't see
28397 whether we have a nonzero offset from here, prohibit all
28398 nonparadoxical subregs changing size. */
28399 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
28406 /* Return the cost of moving data of mode M between a
28407 register and memory. A value of 2 is the default; this cost is
28408 relative to those in `REGISTER_MOVE_COST'.
28410 This function is used extensively by register_move_cost that is used to
28411 build tables at startup. Make it inline in this case.
28412 When IN is 2, return maximum of in and out move cost.
28414 If moving between registers and memory is more expensive than
28415 between two registers, you should define this macro to express the
28418 Model also increased moving costs of QImode registers in non
28422 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
28426 if (FLOAT_CLASS_P (regclass
))
28444 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
28445 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
28447 if (SSE_CLASS_P (regclass
))
28450 switch (GET_MODE_SIZE (mode
))
28465 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
28466 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
28468 if (MMX_CLASS_P (regclass
))
28471 switch (GET_MODE_SIZE (mode
))
28483 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
28484 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
28486 switch (GET_MODE_SIZE (mode
))
28489 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
28492 return ix86_cost
->int_store
[0];
28493 if (TARGET_PARTIAL_REG_DEPENDENCY
28494 && optimize_function_for_speed_p (cfun
))
28495 cost
= ix86_cost
->movzbl_load
;
28497 cost
= ix86_cost
->int_load
[0];
28499 return MAX (cost
, ix86_cost
->int_store
[0]);
28505 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
28507 return ix86_cost
->movzbl_load
;
28509 return ix86_cost
->int_store
[0] + 4;
28514 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
28515 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
28517 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
28518 if (mode
== TFmode
)
28521 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
28523 cost
= ix86_cost
->int_load
[2];
28525 cost
= ix86_cost
->int_store
[2];
28526 return (cost
* (((int) GET_MODE_SIZE (mode
)
28527 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
28532 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
28535 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
28539 /* Return the cost of moving data from a register in class CLASS1 to
28540 one in class CLASS2.
28542 It is not required that the cost always equal 2 when FROM is the same as TO;
28543 on some machines it is expensive to move between registers if they are not
28544 general registers. */
28547 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
28548 reg_class_t class2_i
)
28550 enum reg_class class1
= (enum reg_class
) class1_i
;
28551 enum reg_class class2
= (enum reg_class
) class2_i
;
28553 /* In case we require secondary memory, compute cost of the store followed
28554 by load. In order to avoid bad register allocation choices, we need
28555 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
28557 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
28561 cost
+= inline_memory_move_cost (mode
, class1
, 2);
28562 cost
+= inline_memory_move_cost (mode
, class2
, 2);
28564 /* In case of copying from general_purpose_register we may emit multiple
28565 stores followed by single load causing memory size mismatch stall.
28566 Count this as arbitrarily high cost of 20. */
28567 if (targetm
.class_max_nregs (class1
, mode
)
28568 > targetm
.class_max_nregs (class2
, mode
))
28571 /* In the case of FP/MMX moves, the registers actually overlap, and we
28572 have to switch modes in order to treat them differently. */
28573 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
28574 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
28580 /* Moves between SSE/MMX and integer unit are expensive. */
28581 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
28582 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
28584 /* ??? By keeping returned value relatively high, we limit the number
28585 of moves between integer and MMX/SSE registers for all targets.
28586 Additionally, high value prevents problem with x86_modes_tieable_p(),
28587 where integer modes in MMX/SSE registers are not tieable
28588 because of missing QImode and HImode moves to, from or between
28589 MMX/SSE registers. */
28590 return MAX (8, ix86_cost
->mmxsse_to_integer
);
28592 if (MAYBE_FLOAT_CLASS_P (class1
))
28593 return ix86_cost
->fp_move
;
28594 if (MAYBE_SSE_CLASS_P (class1
))
28595 return ix86_cost
->sse_move
;
28596 if (MAYBE_MMX_CLASS_P (class1
))
28597 return ix86_cost
->mmx_move
;
28601 /* Return TRUE if hard register REGNO can hold a value of machine-mode
28605 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
28607 /* Flags and only flags can only hold CCmode values. */
28608 if (CC_REGNO_P (regno
))
28609 return GET_MODE_CLASS (mode
) == MODE_CC
;
28610 if (GET_MODE_CLASS (mode
) == MODE_CC
28611 || GET_MODE_CLASS (mode
) == MODE_RANDOM
28612 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
28614 if (FP_REGNO_P (regno
))
28615 return VALID_FP_MODE_P (mode
);
28616 if (SSE_REGNO_P (regno
))
28618 /* We implement the move patterns for all vector modes into and
28619 out of SSE registers, even when no operation instructions
28620 are available. OImode move is available only when AVX is
28622 return ((TARGET_AVX
&& mode
== OImode
)
28623 || VALID_AVX256_REG_MODE (mode
)
28624 || VALID_SSE_REG_MODE (mode
)
28625 || VALID_SSE2_REG_MODE (mode
)
28626 || VALID_MMX_REG_MODE (mode
)
28627 || VALID_MMX_REG_MODE_3DNOW (mode
));
28629 if (MMX_REGNO_P (regno
))
28631 /* We implement the move patterns for 3DNOW modes even in MMX mode,
28632 so if the register is available at all, then we can move data of
28633 the given mode into or out of it. */
28634 return (VALID_MMX_REG_MODE (mode
)
28635 || VALID_MMX_REG_MODE_3DNOW (mode
));
28638 if (mode
== QImode
)
28640 /* Take care for QImode values - they can be in non-QI regs,
28641 but then they do cause partial register stalls. */
28642 if (regno
<= BX_REG
|| TARGET_64BIT
)
28644 if (!TARGET_PARTIAL_REG_STALL
)
28646 return !can_create_pseudo_p ();
28648 /* We handle both integer and floats in the general purpose registers. */
28649 else if (VALID_INT_MODE_P (mode
))
28651 else if (VALID_FP_MODE_P (mode
))
28653 else if (VALID_DFP_MODE_P (mode
))
28655 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
28656 on to use that value in smaller contexts, this can easily force a
28657 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
28658 supporting DImode, allow it. */
28659 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
28665 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
28666 tieable integer mode. */
28669 ix86_tieable_integer_mode_p (enum machine_mode mode
)
28678 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
28681 return TARGET_64BIT
;
28688 /* Return true if MODE1 is accessible in a register that can hold MODE2
28689 without copying. That is, all register classes that can hold MODE2
28690 can also hold MODE1. */
28693 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
28695 if (mode1
== mode2
)
28698 if (ix86_tieable_integer_mode_p (mode1
)
28699 && ix86_tieable_integer_mode_p (mode2
))
28702 /* MODE2 being XFmode implies fp stack or general regs, which means we
28703 can tie any smaller floating point modes to it. Note that we do not
28704 tie this with TFmode. */
28705 if (mode2
== XFmode
)
28706 return mode1
== SFmode
|| mode1
== DFmode
;
28708 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
28709 that we can tie it with SFmode. */
28710 if (mode2
== DFmode
)
28711 return mode1
== SFmode
;
28713 /* If MODE2 is only appropriate for an SSE register, then tie with
28714 any other mode acceptable to SSE registers. */
28715 if (GET_MODE_SIZE (mode2
) == 16
28716 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
28717 return (GET_MODE_SIZE (mode1
) == 16
28718 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
28720 /* If MODE2 is appropriate for an MMX register, then tie
28721 with any other mode acceptable to MMX registers. */
28722 if (GET_MODE_SIZE (mode2
) == 8
28723 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
28724 return (GET_MODE_SIZE (mode1
) == 8
28725 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
28730 /* Compute a (partial) cost for rtx X. Return true if the complete
28731 cost has been computed, and false if subexpressions should be
28732 scanned. In either case, *TOTAL contains the cost result. */
28735 ix86_rtx_costs (rtx x
, int code
, int outer_code_i
, int *total
, bool speed
)
28737 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
28738 enum machine_mode mode
= GET_MODE (x
);
28739 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
28747 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
28749 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
28751 else if (flag_pic
&& SYMBOLIC_CONST (x
)
28753 || (!GET_CODE (x
) != LABEL_REF
28754 && (GET_CODE (x
) != SYMBOL_REF
28755 || !SYMBOL_REF_LOCAL_P (x
)))))
28762 if (mode
== VOIDmode
)
28765 switch (standard_80387_constant_p (x
))
28770 default: /* Other constants */
28775 /* Start with (MEM (SYMBOL_REF)), since that's where
28776 it'll probably end up. Add a penalty for size. */
28777 *total
= (COSTS_N_INSNS (1)
28778 + (flag_pic
!= 0 && !TARGET_64BIT
)
28779 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
28785 /* The zero extensions is often completely free on x86_64, so make
28786 it as cheap as possible. */
28787 if (TARGET_64BIT
&& mode
== DImode
28788 && GET_MODE (XEXP (x
, 0)) == SImode
)
28790 else if (TARGET_ZERO_EXTEND_WITH_AND
)
28791 *total
= cost
->add
;
28793 *total
= cost
->movzx
;
28797 *total
= cost
->movsx
;
28801 if (CONST_INT_P (XEXP (x
, 1))
28802 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
28804 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
28807 *total
= cost
->add
;
28810 if ((value
== 2 || value
== 3)
28811 && cost
->lea
<= cost
->shift_const
)
28813 *total
= cost
->lea
;
28823 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
28825 if (CONST_INT_P (XEXP (x
, 1)))
28827 if (INTVAL (XEXP (x
, 1)) > 32)
28828 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
28830 *total
= cost
->shift_const
* 2;
28834 if (GET_CODE (XEXP (x
, 1)) == AND
)
28835 *total
= cost
->shift_var
* 2;
28837 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
28842 if (CONST_INT_P (XEXP (x
, 1)))
28843 *total
= cost
->shift_const
;
28845 *total
= cost
->shift_var
;
28853 gcc_assert (FLOAT_MODE_P (mode
));
28854 gcc_assert (TARGET_FMA
|| TARGET_FMA4
);
28856 /* ??? SSE scalar/vector cost should be used here. */
28857 /* ??? Bald assumption that fma has the same cost as fmul. */
28858 *total
= cost
->fmul
;
28859 *total
+= rtx_cost (XEXP (x
, 1), FMA
, speed
);
28861 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
28863 if (GET_CODE (sub
) == NEG
)
28864 sub
= XEXP (sub
, 0);
28865 *total
+= rtx_cost (sub
, FMA
, speed
);
28868 if (GET_CODE (sub
) == NEG
)
28869 sub
= XEXP (sub
, 0);
28870 *total
+= rtx_cost (sub
, FMA
, speed
);
28875 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
28877 /* ??? SSE scalar cost should be used here. */
28878 *total
= cost
->fmul
;
28881 else if (X87_FLOAT_MODE_P (mode
))
28883 *total
= cost
->fmul
;
28886 else if (FLOAT_MODE_P (mode
))
28888 /* ??? SSE vector cost should be used here. */
28889 *total
= cost
->fmul
;
28894 rtx op0
= XEXP (x
, 0);
28895 rtx op1
= XEXP (x
, 1);
28897 if (CONST_INT_P (XEXP (x
, 1)))
28899 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
28900 for (nbits
= 0; value
!= 0; value
&= value
- 1)
28904 /* This is arbitrary. */
28907 /* Compute costs correctly for widening multiplication. */
28908 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
28909 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
28910 == GET_MODE_SIZE (mode
))
28912 int is_mulwiden
= 0;
28913 enum machine_mode inner_mode
= GET_MODE (op0
);
28915 if (GET_CODE (op0
) == GET_CODE (op1
))
28916 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
28917 else if (CONST_INT_P (op1
))
28919 if (GET_CODE (op0
) == SIGN_EXTEND
)
28920 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
28923 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
28927 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
28930 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
28931 + nbits
* cost
->mult_bit
28932 + rtx_cost (op0
, outer_code
, speed
) + rtx_cost (op1
, outer_code
, speed
));
28941 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
28942 /* ??? SSE cost should be used here. */
28943 *total
= cost
->fdiv
;
28944 else if (X87_FLOAT_MODE_P (mode
))
28945 *total
= cost
->fdiv
;
28946 else if (FLOAT_MODE_P (mode
))
28947 /* ??? SSE vector cost should be used here. */
28948 *total
= cost
->fdiv
;
28950 *total
= cost
->divide
[MODE_INDEX (mode
)];
28954 if (GET_MODE_CLASS (mode
) == MODE_INT
28955 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
28957 if (GET_CODE (XEXP (x
, 0)) == PLUS
28958 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
28959 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
28960 && CONSTANT_P (XEXP (x
, 1)))
28962 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
28963 if (val
== 2 || val
== 4 || val
== 8)
28965 *total
= cost
->lea
;
28966 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
, speed
);
28967 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
28968 outer_code
, speed
);
28969 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, speed
);
28973 else if (GET_CODE (XEXP (x
, 0)) == MULT
28974 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
28976 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
28977 if (val
== 2 || val
== 4 || val
== 8)
28979 *total
= cost
->lea
;
28980 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, speed
);
28981 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, speed
);
28985 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
28987 *total
= cost
->lea
;
28988 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, speed
);
28989 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
, speed
);
28990 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, speed
);
28997 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
28999 /* ??? SSE cost should be used here. */
29000 *total
= cost
->fadd
;
29003 else if (X87_FLOAT_MODE_P (mode
))
29005 *total
= cost
->fadd
;
29008 else if (FLOAT_MODE_P (mode
))
29010 /* ??? SSE vector cost should be used here. */
29011 *total
= cost
->fadd
;
29019 if (!TARGET_64BIT
&& mode
== DImode
)
29021 *total
= (cost
->add
* 2
29022 + (rtx_cost (XEXP (x
, 0), outer_code
, speed
)
29023 << (GET_MODE (XEXP (x
, 0)) != DImode
))
29024 + (rtx_cost (XEXP (x
, 1), outer_code
, speed
)
29025 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
29031 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
29033 /* ??? SSE cost should be used here. */
29034 *total
= cost
->fchs
;
29037 else if (X87_FLOAT_MODE_P (mode
))
29039 *total
= cost
->fchs
;
29042 else if (FLOAT_MODE_P (mode
))
29044 /* ??? SSE vector cost should be used here. */
29045 *total
= cost
->fchs
;
29051 if (!TARGET_64BIT
&& mode
== DImode
)
29052 *total
= cost
->add
* 2;
29054 *total
= cost
->add
;
29058 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
29059 && XEXP (XEXP (x
, 0), 1) == const1_rtx
29060 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
29061 && XEXP (x
, 1) == const0_rtx
)
29063 /* This kind of construct is implemented using test[bwl].
29064 Treat it as if we had an AND. */
29065 *total
= (cost
->add
29066 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, speed
)
29067 + rtx_cost (const1_rtx
, outer_code
, speed
));
29073 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
29078 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
29079 /* ??? SSE cost should be used here. */
29080 *total
= cost
->fabs
;
29081 else if (X87_FLOAT_MODE_P (mode
))
29082 *total
= cost
->fabs
;
29083 else if (FLOAT_MODE_P (mode
))
29084 /* ??? SSE vector cost should be used here. */
29085 *total
= cost
->fabs
;
29089 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
29090 /* ??? SSE cost should be used here. */
29091 *total
= cost
->fsqrt
;
29092 else if (X87_FLOAT_MODE_P (mode
))
29093 *total
= cost
->fsqrt
;
29094 else if (FLOAT_MODE_P (mode
))
29095 /* ??? SSE vector cost should be used here. */
29096 *total
= cost
->fsqrt
;
29100 if (XINT (x
, 1) == UNSPEC_TP
)
29107 case VEC_DUPLICATE
:
29108 /* ??? Assume all of these vector manipulation patterns are
29109 recognizable. In which case they all pretty much have the
29111 *total
= COSTS_N_INSNS (1);
29121 static int current_machopic_label_num
;
29123 /* Given a symbol name and its associated stub, write out the
29124 definition of the stub. */
29127 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
29129 unsigned int length
;
29130 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
29131 int label
= ++current_machopic_label_num
;
29133 /* For 64-bit we shouldn't get here. */
29134 gcc_assert (!TARGET_64BIT
);
29136 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
29137 symb
= targetm
.strip_name_encoding (symb
);
29139 length
= strlen (stub
);
29140 binder_name
= XALLOCAVEC (char, length
+ 32);
29141 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
29143 length
= strlen (symb
);
29144 symbol_name
= XALLOCAVEC (char, length
+ 32);
29145 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
29147 sprintf (lazy_ptr_name
, "L%d$lz", label
);
29149 if (MACHOPIC_ATT_STUB
)
29150 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
29151 else if (MACHOPIC_PURE
)
29152 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
29154 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
29156 fprintf (file
, "%s:\n", stub
);
29157 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
29159 if (MACHOPIC_ATT_STUB
)
29161 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
29163 else if (MACHOPIC_PURE
)
29166 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29167 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
29168 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
29169 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
29170 label
, lazy_ptr_name
, label
);
29171 fprintf (file
, "\tjmp\t*%%ecx\n");
29174 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
29176 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
29177 it needs no stub-binding-helper. */
29178 if (MACHOPIC_ATT_STUB
)
29181 fprintf (file
, "%s:\n", binder_name
);
29185 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
29186 fprintf (file
, "\tpushl\t%%ecx\n");
29189 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
29191 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
29193 /* N.B. Keep the correspondence of these
29194 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
29195 old-pic/new-pic/non-pic stubs; altering this will break
29196 compatibility with existing dylibs. */
29199 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29200 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
29203 /* 16-byte -mdynamic-no-pic stub. */
29204 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
29206 fprintf (file
, "%s:\n", lazy_ptr_name
);
29207 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
29208 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
29210 #endif /* TARGET_MACHO */
29212 /* Order the registers for register allocator. */
29215 x86_order_regs_for_local_alloc (void)
29220 /* First allocate the local general purpose registers. */
29221 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
29222 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
29223 reg_alloc_order
[pos
++] = i
;
29225 /* Global general purpose registers. */
29226 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
29227 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
29228 reg_alloc_order
[pos
++] = i
;
29230 /* x87 registers come first in case we are doing FP math
29232 if (!TARGET_SSE_MATH
)
29233 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
29234 reg_alloc_order
[pos
++] = i
;
29236 /* SSE registers. */
29237 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
29238 reg_alloc_order
[pos
++] = i
;
29239 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
29240 reg_alloc_order
[pos
++] = i
;
29242 /* x87 registers. */
29243 if (TARGET_SSE_MATH
)
29244 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
29245 reg_alloc_order
[pos
++] = i
;
29247 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
29248 reg_alloc_order
[pos
++] = i
;
29250 /* Initialize the rest of array as we do not allocate some registers
29252 while (pos
< FIRST_PSEUDO_REGISTER
)
29253 reg_alloc_order
[pos
++] = 0;
29256 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
29257 in struct attribute_spec handler. */
29259 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
29261 int flags ATTRIBUTE_UNUSED
,
29262 bool *no_add_attrs
)
29264 if (TREE_CODE (*node
) != FUNCTION_TYPE
29265 && TREE_CODE (*node
) != METHOD_TYPE
29266 && TREE_CODE (*node
) != FIELD_DECL
29267 && TREE_CODE (*node
) != TYPE_DECL
)
29269 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
29271 *no_add_attrs
= true;
29276 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
29278 *no_add_attrs
= true;
29281 if (is_attribute_p ("callee_pop_aggregate_return", name
))
29285 cst
= TREE_VALUE (args
);
29286 if (TREE_CODE (cst
) != INTEGER_CST
)
29288 warning (OPT_Wattributes
,
29289 "%qE attribute requires an integer constant argument",
29291 *no_add_attrs
= true;
29293 else if (compare_tree_int (cst
, 0) != 0
29294 && compare_tree_int (cst
, 1) != 0)
29296 warning (OPT_Wattributes
,
29297 "argument to %qE attribute is neither zero, nor one",
29299 *no_add_attrs
= true;
29308 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
29309 struct attribute_spec.handler. */
29311 ix86_handle_abi_attribute (tree
*node
, tree name
,
29312 tree args ATTRIBUTE_UNUSED
,
29313 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
29315 if (TREE_CODE (*node
) != FUNCTION_TYPE
29316 && TREE_CODE (*node
) != METHOD_TYPE
29317 && TREE_CODE (*node
) != FIELD_DECL
29318 && TREE_CODE (*node
) != TYPE_DECL
)
29320 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
29322 *no_add_attrs
= true;
29326 /* Can combine regparm with all attributes but fastcall. */
29327 if (is_attribute_p ("ms_abi", name
))
29329 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
29331 error ("ms_abi and sysv_abi attributes are not compatible");
29336 else if (is_attribute_p ("sysv_abi", name
))
29338 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
29340 error ("ms_abi and sysv_abi attributes are not compatible");
29349 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
29350 struct attribute_spec.handler. */
29352 ix86_handle_struct_attribute (tree
*node
, tree name
,
29353 tree args ATTRIBUTE_UNUSED
,
29354 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
29357 if (DECL_P (*node
))
29359 if (TREE_CODE (*node
) == TYPE_DECL
)
29360 type
= &TREE_TYPE (*node
);
29365 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
29366 || TREE_CODE (*type
) == UNION_TYPE
)))
29368 warning (OPT_Wattributes
, "%qE attribute ignored",
29370 *no_add_attrs
= true;
29373 else if ((is_attribute_p ("ms_struct", name
)
29374 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
29375 || ((is_attribute_p ("gcc_struct", name
)
29376 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
29378 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
29380 *no_add_attrs
= true;
29387 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
29388 tree args ATTRIBUTE_UNUSED
,
29389 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
29391 if (TREE_CODE (*node
) != FUNCTION_DECL
)
29393 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
29395 *no_add_attrs
= true;
29401 ix86_ms_bitfield_layout_p (const_tree record_type
)
29403 return ((TARGET_MS_BITFIELD_LAYOUT
29404 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
29405 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
29408 /* Returns an expression indicating where the this parameter is
29409 located on entry to the FUNCTION. */
29412 x86_this_parameter (tree function
)
29414 tree type
= TREE_TYPE (function
);
29415 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
29420 const int *parm_regs
;
29422 if (ix86_function_type_abi (type
) == MS_ABI
)
29423 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
29425 parm_regs
= x86_64_int_parameter_registers
;
29426 return gen_rtx_REG (DImode
, parm_regs
[aggr
]);
29429 nregs
= ix86_function_regparm (type
, function
);
29431 if (nregs
> 0 && !stdarg_p (type
))
29434 unsigned int ccvt
= ix86_get_callcvt (type
);
29436 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
29437 regno
= aggr
? DX_REG
: CX_REG
;
29438 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
29442 return gen_rtx_MEM (SImode
,
29443 plus_constant (stack_pointer_rtx
, 4));
29452 return gen_rtx_MEM (SImode
,
29453 plus_constant (stack_pointer_rtx
, 4));
29456 return gen_rtx_REG (SImode
, regno
);
29459 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, aggr
? 8 : 4));
29462 /* Determine whether x86_output_mi_thunk can succeed. */
29465 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
29466 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
29467 HOST_WIDE_INT vcall_offset
, const_tree function
)
29469 /* 64-bit can handle anything. */
29473 /* For 32-bit, everything's fine if we have one free register. */
29474 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
29477 /* Need a free register for vcall_offset. */
29481 /* Need a free register for GOT references. */
29482 if (flag_pic
&& !targetm
.binds_local_p (function
))
29485 /* Otherwise ok. */
29489 /* Output the assembler code for a thunk function. THUNK_DECL is the
29490 declaration for the thunk function itself, FUNCTION is the decl for
29491 the target function. DELTA is an immediate constant offset to be
29492 added to THIS. If VCALL_OFFSET is nonzero, the word at
29493 *(*this + vcall_offset) should be added to THIS. */
29496 x86_output_mi_thunk (FILE *file
,
29497 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
29498 HOST_WIDE_INT vcall_offset
, tree function
)
29500 rtx this_param
= x86_this_parameter (function
);
29501 rtx this_reg
, tmp
, fnaddr
;
29503 emit_note (NOTE_INSN_PROLOGUE_END
);
29505 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
29506 pull it in now and let DELTA benefit. */
29507 if (REG_P (this_param
))
29508 this_reg
= this_param
;
29509 else if (vcall_offset
)
29511 /* Put the this parameter into %eax. */
29512 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
29513 emit_move_insn (this_reg
, this_param
);
29516 this_reg
= NULL_RTX
;
29518 /* Adjust the this parameter by a fixed constant. */
29521 rtx delta_rtx
= GEN_INT (delta
);
29522 rtx delta_dst
= this_reg
? this_reg
: this_param
;
29526 if (!x86_64_general_operand (delta_rtx
, Pmode
))
29528 tmp
= gen_rtx_REG (Pmode
, R10_REG
);
29529 emit_move_insn (tmp
, delta_rtx
);
29534 emit_insn (ix86_gen_add3 (delta_dst
, delta_dst
, delta_rtx
));
29537 /* Adjust the this parameter by a value stored in the vtable. */
29540 rtx vcall_addr
, vcall_mem
, this_mem
;
29541 unsigned int tmp_regno
;
29544 tmp_regno
= R10_REG
;
29547 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
29548 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
29549 tmp_regno
= AX_REG
;
29551 tmp_regno
= CX_REG
;
29553 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
29555 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
29556 if (Pmode
!= ptr_mode
)
29557 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
29558 emit_move_insn (tmp
, this_mem
);
29560 /* Adjust the this parameter. */
29561 vcall_addr
= plus_constant (tmp
, vcall_offset
);
29563 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
29565 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
29566 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
29567 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
29570 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
29571 if (Pmode
!= ptr_mode
)
29572 emit_insn (gen_addsi_1_zext (this_reg
,
29573 gen_rtx_REG (ptr_mode
,
29577 emit_insn (ix86_gen_add3 (this_reg
, this_reg
, vcall_mem
));
29580 /* If necessary, drop THIS back to its stack slot. */
29581 if (this_reg
&& this_reg
!= this_param
)
29582 emit_move_insn (this_param
, this_reg
);
29584 fnaddr
= XEXP (DECL_RTL (function
), 0);
29587 if (!flag_pic
|| targetm
.binds_local_p (function
)
29588 || cfun
->machine
->call_abi
== MS_ABI
)
29592 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
29593 tmp
= gen_rtx_CONST (Pmode
, tmp
);
29594 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
29599 if (!flag_pic
|| targetm
.binds_local_p (function
))
29602 else if (TARGET_MACHO
)
29604 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
29605 fnaddr
= XEXP (fnaddr
, 0);
29607 #endif /* TARGET_MACHO */
29610 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
29611 output_set_got (tmp
, NULL_RTX
);
29613 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
29614 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
29615 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
29619 /* Our sibling call patterns do not allow memories, because we have no
29620 predicate that can distinguish between frame and non-frame memory.
29621 For our purposes here, we can get away with (ab)using a jump pattern,
29622 because we're going to do no optimization. */
29623 if (MEM_P (fnaddr
))
29624 emit_jump_insn (gen_indirect_jump (fnaddr
));
29627 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
29628 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
29629 tmp
= emit_call_insn (tmp
);
29630 SIBLING_CALL_P (tmp
) = 1;
29634 /* Emit just enough of rest_of_compilation to get the insns emitted.
29635 Note that use_thunk calls assemble_start_function et al. */
29636 tmp
= get_insns ();
29637 insn_locators_alloc ();
29638 shorten_branches (tmp
);
29639 final_start_function (tmp
, file
, 1);
29640 final (tmp
, file
, 1);
29641 final_end_function ();
29645 x86_file_start (void)
29647 default_file_start ();
29649 darwin_file_start ();
29651 if (X86_FILE_START_VERSION_DIRECTIVE
)
29652 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
29653 if (X86_FILE_START_FLTUSED
)
29654 fputs ("\t.global\t__fltused\n", asm_out_file
);
29655 if (ix86_asm_dialect
== ASM_INTEL
)
29656 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
29660 x86_field_alignment (tree field
, int computed
)
29662 enum machine_mode mode
;
29663 tree type
= TREE_TYPE (field
);
29665 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
29667 mode
= TYPE_MODE (strip_array_types (type
));
29668 if (mode
== DFmode
|| mode
== DCmode
29669 || GET_MODE_CLASS (mode
) == MODE_INT
29670 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
29671 return MIN (32, computed
);
29675 /* Output assembler code to FILE to increment profiler label # LABELNO
29676 for profiling a function entry. */
29678 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
29680 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
29685 #ifndef NO_PROFILE_COUNTERS
29686 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
29689 if (DEFAULT_ABI
== SYSV_ABI
&& flag_pic
)
29690 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
29692 fprintf (file
, "\tcall\t%s\n", mcount_name
);
29696 #ifndef NO_PROFILE_COUNTERS
29697 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
29700 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
29704 #ifndef NO_PROFILE_COUNTERS
29705 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
29708 fprintf (file
, "\tcall\t%s\n", mcount_name
);
29712 /* We don't have exact information about the insn sizes, but we may assume
29713 quite safely that we are informed about all 1 byte insns and memory
29714 address sizes. This is enough to eliminate unnecessary padding in
29718 min_insn_size (rtx insn
)
29722 if (!INSN_P (insn
) || !active_insn_p (insn
))
29725 /* Discard alignments we've emit and jump instructions. */
29726 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
29727 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
29729 if (JUMP_TABLE_DATA_P (insn
))
29732 /* Important case - calls are always 5 bytes.
29733 It is common to have many calls in the row. */
29735 && symbolic_reference_mentioned_p (PATTERN (insn
))
29736 && !SIBLING_CALL_P (insn
))
29738 len
= get_attr_length (insn
);
29742 /* For normal instructions we rely on get_attr_length being exact,
29743 with a few exceptions. */
29744 if (!JUMP_P (insn
))
29746 enum attr_type type
= get_attr_type (insn
);
29751 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
29752 || asm_noperands (PATTERN (insn
)) >= 0)
29759 /* Otherwise trust get_attr_length. */
29763 l
= get_attr_length_address (insn
);
29764 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
29773 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
29775 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
29779 ix86_avoid_jump_mispredicts (void)
29781 rtx insn
, start
= get_insns ();
29782 int nbytes
= 0, njumps
= 0;
29785 /* Look for all minimal intervals of instructions containing 4 jumps.
29786 The intervals are bounded by START and INSN. NBYTES is the total
29787 size of instructions in the interval including INSN and not including
29788 START. When the NBYTES is smaller than 16 bytes, it is possible
29789 that the end of START and INSN ends up in the same 16byte page.
29791 The smallest offset in the page INSN can start is the case where START
29792 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
29793 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
29795 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
29799 if (LABEL_P (insn
))
29801 int align
= label_to_alignment (insn
);
29802 int max_skip
= label_to_max_skip (insn
);
29806 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
29807 already in the current 16 byte page, because otherwise
29808 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
29809 bytes to reach 16 byte boundary. */
29811 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
29814 fprintf (dump_file
, "Label %i with max_skip %i\n",
29815 INSN_UID (insn
), max_skip
);
29818 while (nbytes
+ max_skip
>= 16)
29820 start
= NEXT_INSN (start
);
29821 if ((JUMP_P (start
)
29822 && GET_CODE (PATTERN (start
)) != ADDR_VEC
29823 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
29825 njumps
--, isjump
= 1;
29828 nbytes
-= min_insn_size (start
);
29834 min_size
= min_insn_size (insn
);
29835 nbytes
+= min_size
;
29837 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
29838 INSN_UID (insn
), min_size
);
29840 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
29841 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
29849 start
= NEXT_INSN (start
);
29850 if ((JUMP_P (start
)
29851 && GET_CODE (PATTERN (start
)) != ADDR_VEC
29852 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
29854 njumps
--, isjump
= 1;
29857 nbytes
-= min_insn_size (start
);
29859 gcc_assert (njumps
>= 0);
29861 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
29862 INSN_UID (start
), INSN_UID (insn
), nbytes
);
29864 if (njumps
== 3 && isjump
&& nbytes
< 16)
29866 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
29869 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
29870 INSN_UID (insn
), padsize
);
29871 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
29877 /* AMD Athlon works faster
29878 when RET is not destination of conditional jump or directly preceded
29879 by other jump instruction. We avoid the penalty by inserting NOP just
29880 before the RET instructions in such cases. */
29882 ix86_pad_returns (void)
29887 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
29889 basic_block bb
= e
->src
;
29890 rtx ret
= BB_END (bb
);
29892 bool replace
= false;
29894 if (!JUMP_P (ret
) || GET_CODE (PATTERN (ret
)) != RETURN
29895 || optimize_bb_for_size_p (bb
))
29897 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
29898 if (active_insn_p (prev
) || LABEL_P (prev
))
29900 if (prev
&& LABEL_P (prev
))
29905 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
29906 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
29907 && !(e
->flags
& EDGE_FALLTHRU
))
29912 prev
= prev_active_insn (ret
);
29914 && ((JUMP_P (prev
) && any_condjump_p (prev
))
29917 /* Empty functions get branch mispredict even when
29918 the jump destination is not visible to us. */
29919 if (!prev
&& !optimize_function_for_size_p (cfun
))
29924 emit_jump_insn_before (gen_return_internal_long (), ret
);
29930 /* Count the minimum number of instructions in BB. Return 4 if the
29931 number of instructions >= 4. */
29934 ix86_count_insn_bb (basic_block bb
)
29937 int insn_count
= 0;
29939 /* Count number of instructions in this block. Return 4 if the number
29940 of instructions >= 4. */
29941 FOR_BB_INSNS (bb
, insn
)
29943 /* Only happen in exit blocks. */
29945 && GET_CODE (PATTERN (insn
)) == RETURN
)
29948 if (NONDEBUG_INSN_P (insn
)
29949 && GET_CODE (PATTERN (insn
)) != USE
29950 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
29953 if (insn_count
>= 4)
29962 /* Count the minimum number of instructions in code path in BB.
29963 Return 4 if the number of instructions >= 4. */
29966 ix86_count_insn (basic_block bb
)
29970 int min_prev_count
;
29972 /* Only bother counting instructions along paths with no
29973 more than 2 basic blocks between entry and exit. Given
29974 that BB has an edge to exit, determine if a predecessor
29975 of BB has an edge from entry. If so, compute the number
29976 of instructions in the predecessor block. If there
29977 happen to be multiple such blocks, compute the minimum. */
29978 min_prev_count
= 4;
29979 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
29982 edge_iterator prev_ei
;
29984 if (e
->src
== ENTRY_BLOCK_PTR
)
29986 min_prev_count
= 0;
29989 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
29991 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
29993 int count
= ix86_count_insn_bb (e
->src
);
29994 if (count
< min_prev_count
)
29995 min_prev_count
= count
;
30001 if (min_prev_count
< 4)
30002 min_prev_count
+= ix86_count_insn_bb (bb
);
30004 return min_prev_count
;
30007 /* Pad short funtion to 4 instructions. */
30010 ix86_pad_short_function (void)
30015 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
30017 rtx ret
= BB_END (e
->src
);
30018 if (JUMP_P (ret
) && GET_CODE (PATTERN (ret
)) == RETURN
)
30020 int insn_count
= ix86_count_insn (e
->src
);
30022 /* Pad short function. */
30023 if (insn_count
< 4)
30027 /* Find epilogue. */
30030 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
30031 insn
= PREV_INSN (insn
);
30036 /* Two NOPs count as one instruction. */
30037 insn_count
= 2 * (4 - insn_count
);
30038 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
30044 /* Implement machine specific optimizations. We implement padding of returns
30045 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
30049 /* We are freeing block_for_insn in the toplev to keep compatibility
30050 with old MDEP_REORGS that are not CFG based. Recompute it now. */
30051 compute_bb_for_insn ();
30053 /* Run the vzeroupper optimization if needed. */
30054 if (TARGET_VZEROUPPER
)
30055 move_or_delete_vzeroupper ();
30057 if (optimize
&& optimize_function_for_speed_p (cfun
))
30059 if (TARGET_PAD_SHORT_FUNCTION
)
30060 ix86_pad_short_function ();
30061 else if (TARGET_PAD_RETURNS
)
30062 ix86_pad_returns ();
30063 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
30064 if (TARGET_FOUR_JUMP_LIMIT
)
30065 ix86_avoid_jump_mispredicts ();
30070 /* Return nonzero when QImode register that must be represented via REX prefix
30073 x86_extended_QIreg_mentioned_p (rtx insn
)
30076 extract_insn_cached (insn
);
30077 for (i
= 0; i
< recog_data
.n_operands
; i
++)
30078 if (REG_P (recog_data
.operand
[i
])
30079 && REGNO (recog_data
.operand
[i
]) > BX_REG
)
30084 /* Return nonzero when P points to register encoded via REX prefix.
30085 Called via for_each_rtx. */
30087 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
30089 unsigned int regno
;
30092 regno
= REGNO (*p
);
30093 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
30096 /* Return true when INSN mentions register that must be encoded using REX
30099 x86_extended_reg_mentioned_p (rtx insn
)
30101 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
30102 extended_reg_mentioned_1
, NULL
);
30105 /* If profitable, negate (without causing overflow) integer constant
30106 of mode MODE at location LOC. Return true in this case. */
30108 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
30112 if (!CONST_INT_P (*loc
))
30118 /* DImode x86_64 constants must fit in 32 bits. */
30119 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
30130 gcc_unreachable ();
30133 /* Avoid overflows. */
30134 if (mode_signbit_p (mode
, *loc
))
30137 val
= INTVAL (*loc
);
30139 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
30140 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
30141 if ((val
< 0 && val
!= -128)
30144 *loc
= GEN_INT (-val
);
30151 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
30152 optabs would emit if we didn't have TFmode patterns. */
30155 x86_emit_floatuns (rtx operands
[2])
30157 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
30158 enum machine_mode mode
, inmode
;
30160 inmode
= GET_MODE (operands
[1]);
30161 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
30164 in
= force_reg (inmode
, operands
[1]);
30165 mode
= GET_MODE (out
);
30166 neglab
= gen_label_rtx ();
30167 donelab
= gen_label_rtx ();
30168 f0
= gen_reg_rtx (mode
);
30170 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
30172 expand_float (out
, in
, 0);
30174 emit_jump_insn (gen_jump (donelab
));
30177 emit_label (neglab
);
30179 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
30181 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
30183 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
30185 expand_float (f0
, i0
, 0);
30187 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
30189 emit_label (donelab
);
30192 /* AVX does not support 32-byte integer vector operations,
30193 thus the longest vector we are faced with is V16QImode. */
30194 #define MAX_VECT_LEN 16
30196 struct expand_vec_perm_d
30198 rtx target
, op0
, op1
;
30199 unsigned char perm
[MAX_VECT_LEN
];
30200 enum machine_mode vmode
;
30201 unsigned char nelt
;
30205 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
30206 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
30208 /* Get a vector mode of the same size as the original but with elements
30209 twice as wide. This is only guaranteed to apply to integral vectors. */
30211 static inline enum machine_mode
30212 get_mode_wider_vector (enum machine_mode o
)
30214 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
30215 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
30216 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
30217 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
30221 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30222 with all elements equal to VAR. Return true if successful. */
30225 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
30226 rtx target
, rtx val
)
30249 /* First attempt to recognize VAL as-is. */
30250 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
30251 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
30252 if (recog_memoized (insn
) < 0)
30255 /* If that fails, force VAL into a register. */
30258 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
30259 seq
= get_insns ();
30262 emit_insn_before (seq
, insn
);
30264 ok
= recog_memoized (insn
) >= 0;
30273 if (TARGET_SSE
|| TARGET_3DNOW_A
)
30277 val
= gen_lowpart (SImode
, val
);
30278 x
= gen_rtx_TRUNCATE (HImode
, val
);
30279 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
30280 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
30293 struct expand_vec_perm_d dperm
;
30297 memset (&dperm
, 0, sizeof (dperm
));
30298 dperm
.target
= target
;
30299 dperm
.vmode
= mode
;
30300 dperm
.nelt
= GET_MODE_NUNITS (mode
);
30301 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
30303 /* Extend to SImode using a paradoxical SUBREG. */
30304 tmp1
= gen_reg_rtx (SImode
);
30305 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
30307 /* Insert the SImode value as low element of a V4SImode vector. */
30308 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
30309 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
30311 ok
= (expand_vec_perm_1 (&dperm
)
30312 || expand_vec_perm_broadcast_1 (&dperm
));
30324 /* Replicate the value once into the next wider mode and recurse. */
30326 enum machine_mode smode
, wsmode
, wvmode
;
30329 smode
= GET_MODE_INNER (mode
);
30330 wvmode
= get_mode_wider_vector (mode
);
30331 wsmode
= GET_MODE_INNER (wvmode
);
30333 val
= convert_modes (wsmode
, smode
, val
, true);
30334 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
30335 GEN_INT (GET_MODE_BITSIZE (smode
)),
30336 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
30337 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
30339 x
= gen_lowpart (wvmode
, target
);
30340 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
30348 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
30349 rtx x
= gen_reg_rtx (hvmode
);
30351 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
30354 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
30355 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
30364 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30365 whose ONE_VAR element is VAR, and other elements are zero. Return true
30369 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
30370 rtx target
, rtx var
, int one_var
)
30372 enum machine_mode vsimode
;
30375 bool use_vector_set
= false;
30380 /* For SSE4.1, we normally use vector set. But if the second
30381 element is zero and inter-unit moves are OK, we use movq
30383 use_vector_set
= (TARGET_64BIT
30385 && !(TARGET_INTER_UNIT_MOVES
30391 use_vector_set
= TARGET_SSE4_1
;
30394 use_vector_set
= TARGET_SSE2
;
30397 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
30404 use_vector_set
= TARGET_AVX
;
30407 /* Use ix86_expand_vector_set in 64bit mode only. */
30408 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
30414 if (use_vector_set
)
30416 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
30417 var
= force_reg (GET_MODE_INNER (mode
), var
);
30418 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
30434 var
= force_reg (GET_MODE_INNER (mode
), var
);
30435 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
30436 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
30441 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
30442 new_target
= gen_reg_rtx (mode
);
30444 new_target
= target
;
30445 var
= force_reg (GET_MODE_INNER (mode
), var
);
30446 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
30447 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
30448 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
30451 /* We need to shuffle the value to the correct position, so
30452 create a new pseudo to store the intermediate result. */
30454 /* With SSE2, we can use the integer shuffle insns. */
30455 if (mode
!= V4SFmode
&& TARGET_SSE2
)
30457 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
30459 GEN_INT (one_var
== 1 ? 0 : 1),
30460 GEN_INT (one_var
== 2 ? 0 : 1),
30461 GEN_INT (one_var
== 3 ? 0 : 1)));
30462 if (target
!= new_target
)
30463 emit_move_insn (target
, new_target
);
30467 /* Otherwise convert the intermediate result to V4SFmode and
30468 use the SSE1 shuffle instructions. */
30469 if (mode
!= V4SFmode
)
30471 tmp
= gen_reg_rtx (V4SFmode
);
30472 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
30477 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
30479 GEN_INT (one_var
== 1 ? 0 : 1),
30480 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
30481 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
30483 if (mode
!= V4SFmode
)
30484 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
30485 else if (tmp
!= target
)
30486 emit_move_insn (target
, tmp
);
30488 else if (target
!= new_target
)
30489 emit_move_insn (target
, new_target
);
30494 vsimode
= V4SImode
;
30500 vsimode
= V2SImode
;
30506 /* Zero extend the variable element to SImode and recurse. */
30507 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
30509 x
= gen_reg_rtx (vsimode
);
30510 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
30512 gcc_unreachable ();
30514 emit_move_insn (target
, gen_lowpart (mode
, x
));
30522 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30523 consisting of the values in VALS. It is known that all elements
30524 except ONE_VAR are constants. Return true if successful. */
30527 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
30528 rtx target
, rtx vals
, int one_var
)
30530 rtx var
= XVECEXP (vals
, 0, one_var
);
30531 enum machine_mode wmode
;
30534 const_vec
= copy_rtx (vals
);
30535 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
30536 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
30544 /* For the two element vectors, it's just as easy to use
30545 the general case. */
30549 /* Use ix86_expand_vector_set in 64bit mode only. */
30572 /* There's no way to set one QImode entry easily. Combine
30573 the variable value with its adjacent constant value, and
30574 promote to an HImode set. */
30575 x
= XVECEXP (vals
, 0, one_var
^ 1);
30578 var
= convert_modes (HImode
, QImode
, var
, true);
30579 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
30580 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
30581 x
= GEN_INT (INTVAL (x
) & 0xff);
30585 var
= convert_modes (HImode
, QImode
, var
, true);
30586 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
30588 if (x
!= const0_rtx
)
30589 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
30590 1, OPTAB_LIB_WIDEN
);
30592 x
= gen_reg_rtx (wmode
);
30593 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
30594 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
30596 emit_move_insn (target
, gen_lowpart (mode
, x
));
30603 emit_move_insn (target
, const_vec
);
30604 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
30608 /* A subroutine of ix86_expand_vector_init_general. Use vector
30609 concatenate to handle the most general case: all values variable,
30610 and none identical. */
30613 ix86_expand_vector_init_concat (enum machine_mode mode
,
30614 rtx target
, rtx
*ops
, int n
)
30616 enum machine_mode cmode
, hmode
= VOIDmode
;
30617 rtx first
[8], second
[4];
30657 gcc_unreachable ();
30660 if (!register_operand (ops
[1], cmode
))
30661 ops
[1] = force_reg (cmode
, ops
[1]);
30662 if (!register_operand (ops
[0], cmode
))
30663 ops
[0] = force_reg (cmode
, ops
[0]);
30664 emit_insn (gen_rtx_SET (VOIDmode
, target
,
30665 gen_rtx_VEC_CONCAT (mode
, ops
[0],
30685 gcc_unreachable ();
30701 gcc_unreachable ();
30706 /* FIXME: We process inputs backward to help RA. PR 36222. */
30709 for (; i
> 0; i
-= 2, j
--)
30711 first
[j
] = gen_reg_rtx (cmode
);
30712 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
30713 ix86_expand_vector_init (false, first
[j
],
30714 gen_rtx_PARALLEL (cmode
, v
));
30720 gcc_assert (hmode
!= VOIDmode
);
30721 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
30723 second
[j
] = gen_reg_rtx (hmode
);
30724 ix86_expand_vector_init_concat (hmode
, second
[j
],
30728 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
30731 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
30735 gcc_unreachable ();
30739 /* A subroutine of ix86_expand_vector_init_general. Use vector
30740 interleave to handle the most general case: all values variable,
30741 and none identical. */
30744 ix86_expand_vector_init_interleave (enum machine_mode mode
,
30745 rtx target
, rtx
*ops
, int n
)
30747 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
30750 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
30751 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
30752 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
30757 gen_load_even
= gen_vec_setv8hi
;
30758 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
30759 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
30760 inner_mode
= HImode
;
30761 first_imode
= V4SImode
;
30762 second_imode
= V2DImode
;
30763 third_imode
= VOIDmode
;
30766 gen_load_even
= gen_vec_setv16qi
;
30767 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
30768 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
30769 inner_mode
= QImode
;
30770 first_imode
= V8HImode
;
30771 second_imode
= V4SImode
;
30772 third_imode
= V2DImode
;
30775 gcc_unreachable ();
30778 for (i
= 0; i
< n
; i
++)
30780 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
30781 op0
= gen_reg_rtx (SImode
);
30782 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
30784 /* Insert the SImode value as low element of V4SImode vector. */
30785 op1
= gen_reg_rtx (V4SImode
);
30786 op0
= gen_rtx_VEC_MERGE (V4SImode
,
30787 gen_rtx_VEC_DUPLICATE (V4SImode
,
30789 CONST0_RTX (V4SImode
),
30791 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
30793 /* Cast the V4SImode vector back to a vector in orignal mode. */
30794 op0
= gen_reg_rtx (mode
);
30795 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
30797 /* Load even elements into the second positon. */
30798 emit_insn (gen_load_even (op0
,
30799 force_reg (inner_mode
,
30803 /* Cast vector to FIRST_IMODE vector. */
30804 ops
[i
] = gen_reg_rtx (first_imode
);
30805 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
30808 /* Interleave low FIRST_IMODE vectors. */
30809 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
30811 op0
= gen_reg_rtx (first_imode
);
30812 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
30814 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
30815 ops
[j
] = gen_reg_rtx (second_imode
);
30816 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
30819 /* Interleave low SECOND_IMODE vectors. */
30820 switch (second_imode
)
30823 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
30825 op0
= gen_reg_rtx (second_imode
);
30826 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
30829 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
30831 ops
[j
] = gen_reg_rtx (third_imode
);
30832 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
30834 second_imode
= V2DImode
;
30835 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
30839 op0
= gen_reg_rtx (second_imode
);
30840 emit_insn (gen_interleave_second_low (op0
, ops
[0],
30843 /* Cast the SECOND_IMODE vector back to a vector on original
30845 emit_insn (gen_rtx_SET (VOIDmode
, target
,
30846 gen_lowpart (mode
, op0
)));
30850 gcc_unreachable ();
30854 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
30855 all values variable, and none identical. */
30858 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
30859 rtx target
, rtx vals
)
30861 rtx ops
[32], op0
, op1
;
30862 enum machine_mode half_mode
= VOIDmode
;
30869 if (!mmx_ok
&& !TARGET_SSE
)
30881 n
= GET_MODE_NUNITS (mode
);
30882 for (i
= 0; i
< n
; i
++)
30883 ops
[i
] = XVECEXP (vals
, 0, i
);
30884 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
30888 half_mode
= V16QImode
;
30892 half_mode
= V8HImode
;
30896 n
= GET_MODE_NUNITS (mode
);
30897 for (i
= 0; i
< n
; i
++)
30898 ops
[i
] = XVECEXP (vals
, 0, i
);
30899 op0
= gen_reg_rtx (half_mode
);
30900 op1
= gen_reg_rtx (half_mode
);
30901 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
30903 ix86_expand_vector_init_interleave (half_mode
, op1
,
30904 &ops
[n
>> 1], n
>> 2);
30905 emit_insn (gen_rtx_SET (VOIDmode
, target
,
30906 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
30910 if (!TARGET_SSE4_1
)
30918 /* Don't use ix86_expand_vector_init_interleave if we can't
30919 move from GPR to SSE register directly. */
30920 if (!TARGET_INTER_UNIT_MOVES
)
30923 n
= GET_MODE_NUNITS (mode
);
30924 for (i
= 0; i
< n
; i
++)
30925 ops
[i
] = XVECEXP (vals
, 0, i
);
30926 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
30934 gcc_unreachable ();
30938 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
30939 enum machine_mode inner_mode
;
30940 rtx words
[4], shift
;
30942 inner_mode
= GET_MODE_INNER (mode
);
30943 n_elts
= GET_MODE_NUNITS (mode
);
30944 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
30945 n_elt_per_word
= n_elts
/ n_words
;
30946 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
30948 for (i
= 0; i
< n_words
; ++i
)
30950 rtx word
= NULL_RTX
;
30952 for (j
= 0; j
< n_elt_per_word
; ++j
)
30954 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
30955 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
30961 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
30962 word
, 1, OPTAB_LIB_WIDEN
);
30963 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
30964 word
, 1, OPTAB_LIB_WIDEN
);
30972 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
30973 else if (n_words
== 2)
30975 rtx tmp
= gen_reg_rtx (mode
);
30976 emit_clobber (tmp
);
30977 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
30978 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
30979 emit_move_insn (target
, tmp
);
30981 else if (n_words
== 4)
30983 rtx tmp
= gen_reg_rtx (V4SImode
);
30984 gcc_assert (word_mode
== SImode
);
30985 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
30986 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
30987 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
30990 gcc_unreachable ();
30994 /* Initialize vector TARGET via VALS. Suppress the use of MMX
30995 instructions unless MMX_OK is true. */
30998 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
31000 enum machine_mode mode
= GET_MODE (target
);
31001 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
31002 int n_elts
= GET_MODE_NUNITS (mode
);
31003 int n_var
= 0, one_var
= -1;
31004 bool all_same
= true, all_const_zero
= true;
31008 for (i
= 0; i
< n_elts
; ++i
)
31010 x
= XVECEXP (vals
, 0, i
);
31011 if (!(CONST_INT_P (x
)
31012 || GET_CODE (x
) == CONST_DOUBLE
31013 || GET_CODE (x
) == CONST_FIXED
))
31014 n_var
++, one_var
= i
;
31015 else if (x
!= CONST0_RTX (inner_mode
))
31016 all_const_zero
= false;
31017 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
31021 /* Constants are best loaded from the constant pool. */
31024 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
31028 /* If all values are identical, broadcast the value. */
31030 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
31031 XVECEXP (vals
, 0, 0)))
31034 /* Values where only one field is non-constant are best loaded from
31035 the pool and overwritten via move later. */
31039 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
31040 XVECEXP (vals
, 0, one_var
),
31044 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
31048 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
31052 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
31054 enum machine_mode mode
= GET_MODE (target
);
31055 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
31056 enum machine_mode half_mode
;
31057 bool use_vec_merge
= false;
31059 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
31061 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
31062 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
31063 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
31064 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
31065 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
31066 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
31068 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
31070 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
31071 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
31072 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
31073 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
31074 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
31075 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
31085 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
31086 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
31088 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
31090 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
31091 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
31097 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
31101 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
31102 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
31104 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
31106 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
31107 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
31114 /* For the two element vectors, we implement a VEC_CONCAT with
31115 the extraction of the other element. */
31117 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
31118 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
31121 op0
= val
, op1
= tmp
;
31123 op0
= tmp
, op1
= val
;
31125 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
31126 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
31131 use_vec_merge
= TARGET_SSE4_1
;
31138 use_vec_merge
= true;
31142 /* tmp = target = A B C D */
31143 tmp
= copy_to_reg (target
);
31144 /* target = A A B B */
31145 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
31146 /* target = X A B B */
31147 ix86_expand_vector_set (false, target
, val
, 0);
31148 /* target = A X C D */
31149 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
31150 const1_rtx
, const0_rtx
,
31151 GEN_INT (2+4), GEN_INT (3+4)));
31155 /* tmp = target = A B C D */
31156 tmp
= copy_to_reg (target
);
31157 /* tmp = X B C D */
31158 ix86_expand_vector_set (false, tmp
, val
, 0);
31159 /* target = A B X D */
31160 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
31161 const0_rtx
, const1_rtx
,
31162 GEN_INT (0+4), GEN_INT (3+4)));
31166 /* tmp = target = A B C D */
31167 tmp
= copy_to_reg (target
);
31168 /* tmp = X B C D */
31169 ix86_expand_vector_set (false, tmp
, val
, 0);
31170 /* target = A B X D */
31171 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
31172 const0_rtx
, const1_rtx
,
31173 GEN_INT (2+4), GEN_INT (0+4)));
31177 gcc_unreachable ();
31182 use_vec_merge
= TARGET_SSE4_1
;
31186 /* Element 0 handled by vec_merge below. */
31189 use_vec_merge
= true;
31195 /* With SSE2, use integer shuffles to swap element 0 and ELT,
31196 store into element 0, then shuffle them back. */
31200 order
[0] = GEN_INT (elt
);
31201 order
[1] = const1_rtx
;
31202 order
[2] = const2_rtx
;
31203 order
[3] = GEN_INT (3);
31204 order
[elt
] = const0_rtx
;
31206 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
31207 order
[1], order
[2], order
[3]));
31209 ix86_expand_vector_set (false, target
, val
, 0);
31211 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
31212 order
[1], order
[2], order
[3]));
31216 /* For SSE1, we have to reuse the V4SF code. */
31217 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
31218 gen_lowpart (SFmode
, val
), elt
);
31223 use_vec_merge
= TARGET_SSE2
;
31226 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
31230 use_vec_merge
= TARGET_SSE4_1
;
31237 half_mode
= V16QImode
;
31243 half_mode
= V8HImode
;
31249 half_mode
= V4SImode
;
31255 half_mode
= V2DImode
;
31261 half_mode
= V4SFmode
;
31267 half_mode
= V2DFmode
;
31273 /* Compute offset. */
31277 gcc_assert (i
<= 1);
31279 /* Extract the half. */
31280 tmp
= gen_reg_rtx (half_mode
);
31281 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
31283 /* Put val in tmp at elt. */
31284 ix86_expand_vector_set (false, tmp
, val
, elt
);
31287 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
31296 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
31297 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
31298 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
31302 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
31304 emit_move_insn (mem
, target
);
31306 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
31307 emit_move_insn (tmp
, val
);
31309 emit_move_insn (target
, mem
);
31314 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
31316 enum machine_mode mode
= GET_MODE (vec
);
31317 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
31318 bool use_vec_extr
= false;
31331 use_vec_extr
= true;
31335 use_vec_extr
= TARGET_SSE4_1
;
31347 tmp
= gen_reg_rtx (mode
);
31348 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
31349 GEN_INT (elt
), GEN_INT (elt
),
31350 GEN_INT (elt
+4), GEN_INT (elt
+4)));
31354 tmp
= gen_reg_rtx (mode
);
31355 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
31359 gcc_unreachable ();
31362 use_vec_extr
= true;
31367 use_vec_extr
= TARGET_SSE4_1
;
31381 tmp
= gen_reg_rtx (mode
);
31382 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
31383 GEN_INT (elt
), GEN_INT (elt
),
31384 GEN_INT (elt
), GEN_INT (elt
)));
31388 tmp
= gen_reg_rtx (mode
);
31389 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
31393 gcc_unreachable ();
31396 use_vec_extr
= true;
31401 /* For SSE1, we have to reuse the V4SF code. */
31402 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
31403 gen_lowpart (V4SFmode
, vec
), elt
);
31409 use_vec_extr
= TARGET_SSE2
;
31412 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
31416 use_vec_extr
= TARGET_SSE4_1
;
31420 /* ??? Could extract the appropriate HImode element and shift. */
31427 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
31428 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
31430 /* Let the rtl optimizers know about the zero extension performed. */
31431 if (inner_mode
== QImode
|| inner_mode
== HImode
)
31433 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
31434 target
= gen_lowpart (SImode
, target
);
31437 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
31441 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
31443 emit_move_insn (mem
, vec
);
31445 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
31446 emit_move_insn (target
, tmp
);
31450 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
31451 pattern to reduce; DEST is the destination; IN is the input vector. */
31454 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
31456 rtx tmp1
, tmp2
, tmp3
;
31458 tmp1
= gen_reg_rtx (V4SFmode
);
31459 tmp2
= gen_reg_rtx (V4SFmode
);
31460 tmp3
= gen_reg_rtx (V4SFmode
);
31462 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
31463 emit_insn (fn (tmp2
, tmp1
, in
));
31465 emit_insn (gen_sse_shufps_v4sf (tmp3
, tmp2
, tmp2
,
31466 const1_rtx
, const1_rtx
,
31467 GEN_INT (1+4), GEN_INT (1+4)));
31468 emit_insn (fn (dest
, tmp2
, tmp3
));
31471 /* Target hook for scalar_mode_supported_p. */
31473 ix86_scalar_mode_supported_p (enum machine_mode mode
)
31475 if (DECIMAL_FLOAT_MODE_P (mode
))
31476 return default_decimal_float_supported_p ();
31477 else if (mode
== TFmode
)
31480 return default_scalar_mode_supported_p (mode
);
31483 /* Implements target hook vector_mode_supported_p. */
31485 ix86_vector_mode_supported_p (enum machine_mode mode
)
31487 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
31489 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
31491 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
31493 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
31495 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
31500 /* Target hook for c_mode_for_suffix. */
31501 static enum machine_mode
31502 ix86_c_mode_for_suffix (char suffix
)
31512 /* Worker function for TARGET_MD_ASM_CLOBBERS.
31514 We do this in the new i386 backend to maintain source compatibility
31515 with the old cc0-based compiler. */
31518 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
31519 tree inputs ATTRIBUTE_UNUSED
,
31522 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
31524 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
31529 /* Implements target vector targetm.asm.encode_section_info. */
31531 static void ATTRIBUTE_UNUSED
31532 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
31534 default_encode_section_info (decl
, rtl
, first
);
31536 if (TREE_CODE (decl
) == VAR_DECL
31537 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
31538 && ix86_in_large_data_p (decl
))
31539 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
31542 /* Worker function for REVERSE_CONDITION. */
31545 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
31547 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
31548 ? reverse_condition (code
)
31549 : reverse_condition_maybe_unordered (code
));
31552 /* Output code to perform an x87 FP register move, from OPERANDS[1]
31556 output_387_reg_move (rtx insn
, rtx
*operands
)
31558 if (REG_P (operands
[0]))
31560 if (REG_P (operands
[1])
31561 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
31563 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
31564 return output_387_ffreep (operands
, 0);
31565 return "fstp\t%y0";
31567 if (STACK_TOP_P (operands
[0]))
31568 return "fld%Z1\t%y1";
31571 else if (MEM_P (operands
[0]))
31573 gcc_assert (REG_P (operands
[1]));
31574 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
31575 return "fstp%Z0\t%y0";
31578 /* There is no non-popping store to memory for XFmode.
31579 So if we need one, follow the store with a load. */
31580 if (GET_MODE (operands
[0]) == XFmode
)
31581 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
31583 return "fst%Z0\t%y0";
31590 /* Output code to perform a conditional jump to LABEL, if C2 flag in
31591 FP status register is set. */
31594 ix86_emit_fp_unordered_jump (rtx label
)
31596 rtx reg
= gen_reg_rtx (HImode
);
31599 emit_insn (gen_x86_fnstsw_1 (reg
));
31601 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
31603 emit_insn (gen_x86_sahf_1 (reg
));
31605 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
31606 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
31610 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
31612 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
31613 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
31616 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
31617 gen_rtx_LABEL_REF (VOIDmode
, label
),
31619 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
31621 emit_jump_insn (temp
);
31622 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
31625 /* Output code to perform a log1p XFmode calculation. */
31627 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
31629 rtx label1
= gen_label_rtx ();
31630 rtx label2
= gen_label_rtx ();
31632 rtx tmp
= gen_reg_rtx (XFmode
);
31633 rtx tmp2
= gen_reg_rtx (XFmode
);
31636 emit_insn (gen_absxf2 (tmp
, op1
));
31637 test
= gen_rtx_GE (VOIDmode
, tmp
,
31638 CONST_DOUBLE_FROM_REAL_VALUE (
31639 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
31641 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
31643 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
31644 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
31645 emit_jump (label2
);
31647 emit_label (label1
);
31648 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
31649 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
31650 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
31651 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
31653 emit_label (label2
);
31656 /* Output code to perform a Newton-Rhapson approximation of a single precision
31657 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
31659 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
31661 rtx x0
, x1
, e0
, e1
;
31663 x0
= gen_reg_rtx (mode
);
31664 e0
= gen_reg_rtx (mode
);
31665 e1
= gen_reg_rtx (mode
);
31666 x1
= gen_reg_rtx (mode
);
31668 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
31670 /* x0 = rcp(b) estimate */
31671 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
31672 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
31675 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
31676 gen_rtx_MULT (mode
, x0
, b
)));
31679 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
31680 gen_rtx_MULT (mode
, x0
, e0
)));
31683 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
31684 gen_rtx_PLUS (mode
, x0
, x0
)));
31687 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
31688 gen_rtx_MINUS (mode
, e1
, e0
)));
31691 emit_insn (gen_rtx_SET (VOIDmode
, res
,
31692 gen_rtx_MULT (mode
, a
, x1
)));
31695 /* Output code to perform a Newton-Rhapson approximation of a
31696 single precision floating point [reciprocal] square root. */
31698 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
31701 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
31704 x0
= gen_reg_rtx (mode
);
31705 e0
= gen_reg_rtx (mode
);
31706 e1
= gen_reg_rtx (mode
);
31707 e2
= gen_reg_rtx (mode
);
31708 e3
= gen_reg_rtx (mode
);
31710 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
31711 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
31713 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
31714 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
31716 if (VECTOR_MODE_P (mode
))
31718 mthree
= ix86_build_const_vector (mode
, true, mthree
);
31719 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
31722 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
31723 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
31725 /* x0 = rsqrt(a) estimate */
31726 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
31727 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
31730 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
31735 zero
= gen_reg_rtx (mode
);
31736 mask
= gen_reg_rtx (mode
);
31738 zero
= force_reg (mode
, CONST0_RTX(mode
));
31739 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
31740 gen_rtx_NE (mode
, zero
, a
)));
31742 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
31743 gen_rtx_AND (mode
, x0
, mask
)));
31747 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
31748 gen_rtx_MULT (mode
, x0
, a
)));
31750 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
31751 gen_rtx_MULT (mode
, e0
, x0
)));
31754 mthree
= force_reg (mode
, mthree
);
31755 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
31756 gen_rtx_PLUS (mode
, e1
, mthree
)));
31758 mhalf
= force_reg (mode
, mhalf
);
31760 /* e3 = -.5 * x0 */
31761 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
31762 gen_rtx_MULT (mode
, x0
, mhalf
)));
31764 /* e3 = -.5 * e0 */
31765 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
31766 gen_rtx_MULT (mode
, e0
, mhalf
)));
31767 /* ret = e2 * e3 */
31768 emit_insn (gen_rtx_SET (VOIDmode
, res
,
31769 gen_rtx_MULT (mode
, e2
, e3
)));
31772 #ifdef TARGET_SOLARIS
31773 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
31776 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
31779 /* With Binutils 2.15, the "@unwind" marker must be specified on
31780 every occurrence of the ".eh_frame" section, not just the first
31783 && strcmp (name
, ".eh_frame") == 0)
31785 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
31786 flags
& SECTION_WRITE
? "aw" : "a");
31791 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
31793 solaris_elf_asm_comdat_section (name
, flags
, decl
);
31798 default_elf_asm_named_section (name
, flags
, decl
);
31800 #endif /* TARGET_SOLARIS */
31802 /* Return the mangling of TYPE if it is an extended fundamental type. */
31804 static const char *
31805 ix86_mangle_type (const_tree type
)
31807 type
= TYPE_MAIN_VARIANT (type
);
31809 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
31810 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
31813 switch (TYPE_MODE (type
))
31816 /* __float128 is "g". */
31819 /* "long double" or __float80 is "e". */
31826 /* For 32-bit code we can save PIC register setup by using
31827 __stack_chk_fail_local hidden function instead of calling
31828 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
31829 register, so it is better to call __stack_chk_fail directly. */
31831 static tree ATTRIBUTE_UNUSED
31832 ix86_stack_protect_fail (void)
31834 return TARGET_64BIT
31835 ? default_external_stack_protect_fail ()
31836 : default_hidden_stack_protect_fail ();
31839 /* Select a format to encode pointers in exception handling data. CODE
31840 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
31841 true if the symbol may be affected by dynamic relocations.
31843 ??? All x86 object file formats are capable of representing this.
31844 After all, the relocation needed is the same as for the call insn.
31845 Whether or not a particular assembler allows us to enter such, I
31846 guess we'll have to see. */
31848 asm_preferred_eh_data_format (int code
, int global
)
31852 int type
= DW_EH_PE_sdata8
;
31854 || ix86_cmodel
== CM_SMALL_PIC
31855 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
31856 type
= DW_EH_PE_sdata4
;
31857 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
31859 if (ix86_cmodel
== CM_SMALL
31860 || (ix86_cmodel
== CM_MEDIUM
&& code
))
31861 return DW_EH_PE_udata4
;
31862 return DW_EH_PE_absptr
;
31865 /* Expand copysign from SIGN to the positive value ABS_VALUE
31866 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
31869 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
31871 enum machine_mode mode
= GET_MODE (sign
);
31872 rtx sgn
= gen_reg_rtx (mode
);
31873 if (mask
== NULL_RTX
)
31875 enum machine_mode vmode
;
31877 if (mode
== SFmode
)
31879 else if (mode
== DFmode
)
31884 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
31885 if (!VECTOR_MODE_P (mode
))
31887 /* We need to generate a scalar mode mask in this case. */
31888 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
31889 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
31890 mask
= gen_reg_rtx (mode
);
31891 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
31895 mask
= gen_rtx_NOT (mode
, mask
);
31896 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
31897 gen_rtx_AND (mode
, mask
, sign
)));
31898 emit_insn (gen_rtx_SET (VOIDmode
, result
,
31899 gen_rtx_IOR (mode
, abs_value
, sgn
)));
31902 /* Expand fabs (OP0) and return a new rtx that holds the result. The
31903 mask for masking out the sign-bit is stored in *SMASK, if that is
31906 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
31908 enum machine_mode vmode
, mode
= GET_MODE (op0
);
31911 xa
= gen_reg_rtx (mode
);
31912 if (mode
== SFmode
)
31914 else if (mode
== DFmode
)
31918 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
31919 if (!VECTOR_MODE_P (mode
))
31921 /* We need to generate a scalar mode mask in this case. */
31922 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
31923 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
31924 mask
= gen_reg_rtx (mode
);
31925 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
31927 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
31928 gen_rtx_AND (mode
, op0
, mask
)));
31936 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
31937 swapping the operands if SWAP_OPERANDS is true. The expanded
31938 code is a forward jump to a newly created label in case the
31939 comparison is true. The generated label rtx is returned. */
31941 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
31942 bool swap_operands
)
31953 label
= gen_label_rtx ();
31954 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
31955 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
31956 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
31957 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
31958 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
31959 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
31960 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
31961 JUMP_LABEL (tmp
) = label
;
31966 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
31967 using comparison code CODE. Operands are swapped for the comparison if
31968 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
31970 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
31971 bool swap_operands
)
31973 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
31974 enum machine_mode mode
= GET_MODE (op0
);
31975 rtx mask
= gen_reg_rtx (mode
);
31984 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
31986 emit_insn (insn (mask
, op0
, op1
,
31987 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
31991 /* Generate and return a rtx of mode MODE for 2**n where n is the number
31992 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
31994 ix86_gen_TWO52 (enum machine_mode mode
)
31996 REAL_VALUE_TYPE TWO52r
;
31999 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
32000 TWO52
= const_double_from_real_value (TWO52r
, mode
);
32001 TWO52
= force_reg (mode
, TWO52
);
32006 /* Expand SSE sequence for computing lround from OP1 storing
32009 ix86_expand_lround (rtx op0
, rtx op1
)
32011 /* C code for the stuff we're doing below:
32012 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
32015 enum machine_mode mode
= GET_MODE (op1
);
32016 const struct real_format
*fmt
;
32017 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
32020 /* load nextafter (0.5, 0.0) */
32021 fmt
= REAL_MODE_FORMAT (mode
);
32022 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
32023 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
32025 /* adj = copysign (0.5, op1) */
32026 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
32027 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
32029 /* adj = op1 + adj */
32030 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
32032 /* op0 = (imode)adj */
32033 expand_fix (op0
, adj
, 0);
32036 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
32039 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
32041 /* C code for the stuff we're doing below (for do_floor):
32043 xi -= (double)xi > op1 ? 1 : 0;
32046 enum machine_mode fmode
= GET_MODE (op1
);
32047 enum machine_mode imode
= GET_MODE (op0
);
32048 rtx ireg
, freg
, label
, tmp
;
32050 /* reg = (long)op1 */
32051 ireg
= gen_reg_rtx (imode
);
32052 expand_fix (ireg
, op1
, 0);
32054 /* freg = (double)reg */
32055 freg
= gen_reg_rtx (fmode
);
32056 expand_float (freg
, ireg
, 0);
32058 /* ireg = (freg > op1) ? ireg - 1 : ireg */
32059 label
= ix86_expand_sse_compare_and_jump (UNLE
,
32060 freg
, op1
, !do_floor
);
32061 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
32062 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
32063 emit_move_insn (ireg
, tmp
);
32065 emit_label (label
);
32066 LABEL_NUSES (label
) = 1;
32068 emit_move_insn (op0
, ireg
);
32071 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
32072 result in OPERAND0. */
32074 ix86_expand_rint (rtx operand0
, rtx operand1
)
32076 /* C code for the stuff we're doing below:
32077 xa = fabs (operand1);
32078 if (!isless (xa, 2**52))
32080 xa = xa + 2**52 - 2**52;
32081 return copysign (xa, operand1);
32083 enum machine_mode mode
= GET_MODE (operand0
);
32084 rtx res
, xa
, label
, TWO52
, mask
;
32086 res
= gen_reg_rtx (mode
);
32087 emit_move_insn (res
, operand1
);
32089 /* xa = abs (operand1) */
32090 xa
= ix86_expand_sse_fabs (res
, &mask
);
32092 /* if (!isless (xa, TWO52)) goto label; */
32093 TWO52
= ix86_gen_TWO52 (mode
);
32094 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
32096 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
32097 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
32099 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
32101 emit_label (label
);
32102 LABEL_NUSES (label
) = 1;
32104 emit_move_insn (operand0
, res
);
32107 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32110 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
32112 /* C code for the stuff we expand below.
32113 double xa = fabs (x), x2;
32114 if (!isless (xa, TWO52))
32116 xa = xa + TWO52 - TWO52;
32117 x2 = copysign (xa, x);
32126 enum machine_mode mode
= GET_MODE (operand0
);
32127 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
32129 TWO52
= ix86_gen_TWO52 (mode
);
32131 /* Temporary for holding the result, initialized to the input
32132 operand to ease control flow. */
32133 res
= gen_reg_rtx (mode
);
32134 emit_move_insn (res
, operand1
);
32136 /* xa = abs (operand1) */
32137 xa
= ix86_expand_sse_fabs (res
, &mask
);
32139 /* if (!isless (xa, TWO52)) goto label; */
32140 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
32142 /* xa = xa + TWO52 - TWO52; */
32143 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
32144 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
32146 /* xa = copysign (xa, operand1) */
32147 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
32149 /* generate 1.0 or -1.0 */
32150 one
= force_reg (mode
,
32151 const_double_from_real_value (do_floor
32152 ? dconst1
: dconstm1
, mode
));
32154 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32155 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
32156 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
32157 gen_rtx_AND (mode
, one
, tmp
)));
32158 /* We always need to subtract here to preserve signed zero. */
32159 tmp
= expand_simple_binop (mode
, MINUS
,
32160 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
32161 emit_move_insn (res
, tmp
);
32163 emit_label (label
);
32164 LABEL_NUSES (label
) = 1;
32166 emit_move_insn (operand0
, res
);
32169 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32172 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
32174 /* C code for the stuff we expand below.
32175 double xa = fabs (x), x2;
32176 if (!isless (xa, TWO52))
32178 x2 = (double)(long)x;
32185 if (HONOR_SIGNED_ZEROS (mode))
32186 return copysign (x2, x);
32189 enum machine_mode mode
= GET_MODE (operand0
);
32190 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
32192 TWO52
= ix86_gen_TWO52 (mode
);
32194 /* Temporary for holding the result, initialized to the input
32195 operand to ease control flow. */
32196 res
= gen_reg_rtx (mode
);
32197 emit_move_insn (res
, operand1
);
32199 /* xa = abs (operand1) */
32200 xa
= ix86_expand_sse_fabs (res
, &mask
);
32202 /* if (!isless (xa, TWO52)) goto label; */
32203 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
32205 /* xa = (double)(long)x */
32206 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
32207 expand_fix (xi
, res
, 0);
32208 expand_float (xa
, xi
, 0);
32211 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
32213 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32214 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
32215 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
32216 gen_rtx_AND (mode
, one
, tmp
)));
32217 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
32218 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
32219 emit_move_insn (res
, tmp
);
32221 if (HONOR_SIGNED_ZEROS (mode
))
32222 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
32224 emit_label (label
);
32225 LABEL_NUSES (label
) = 1;
32227 emit_move_insn (operand0
, res
);
32230 /* Expand SSE sequence for computing round from OPERAND1 storing
32231 into OPERAND0. Sequence that works without relying on DImode truncation
32232 via cvttsd2siq that is only available on 64bit targets. */
32234 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
32236 /* C code for the stuff we expand below.
32237 double xa = fabs (x), xa2, x2;
32238 if (!isless (xa, TWO52))
32240 Using the absolute value and copying back sign makes
32241 -0.0 -> -0.0 correct.
32242 xa2 = xa + TWO52 - TWO52;
32247 else if (dxa > 0.5)
32249 x2 = copysign (xa2, x);
32252 enum machine_mode mode
= GET_MODE (operand0
);
32253 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
32255 TWO52
= ix86_gen_TWO52 (mode
);
32257 /* Temporary for holding the result, initialized to the input
32258 operand to ease control flow. */
32259 res
= gen_reg_rtx (mode
);
32260 emit_move_insn (res
, operand1
);
32262 /* xa = abs (operand1) */
32263 xa
= ix86_expand_sse_fabs (res
, &mask
);
32265 /* if (!isless (xa, TWO52)) goto label; */
32266 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
32268 /* xa2 = xa + TWO52 - TWO52; */
32269 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
32270 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
32272 /* dxa = xa2 - xa; */
32273 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
32275 /* generate 0.5, 1.0 and -0.5 */
32276 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
32277 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
32278 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
32282 tmp
= gen_reg_rtx (mode
);
32283 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
32284 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
32285 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
32286 gen_rtx_AND (mode
, one
, tmp
)));
32287 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
32288 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
32289 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
32290 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
32291 gen_rtx_AND (mode
, one
, tmp
)));
32292 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
32294 /* res = copysign (xa2, operand1) */
32295 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
32297 emit_label (label
);
32298 LABEL_NUSES (label
) = 1;
32300 emit_move_insn (operand0
, res
);
32303 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32306 ix86_expand_trunc (rtx operand0
, rtx operand1
)
32308 /* C code for SSE variant we expand below.
32309 double xa = fabs (x), x2;
32310 if (!isless (xa, TWO52))
32312 x2 = (double)(long)x;
32313 if (HONOR_SIGNED_ZEROS (mode))
32314 return copysign (x2, x);
32317 enum machine_mode mode
= GET_MODE (operand0
);
32318 rtx xa
, xi
, TWO52
, label
, res
, mask
;
32320 TWO52
= ix86_gen_TWO52 (mode
);
32322 /* Temporary for holding the result, initialized to the input
32323 operand to ease control flow. */
32324 res
= gen_reg_rtx (mode
);
32325 emit_move_insn (res
, operand1
);
32327 /* xa = abs (operand1) */
32328 xa
= ix86_expand_sse_fabs (res
, &mask
);
32330 /* if (!isless (xa, TWO52)) goto label; */
32331 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
32333 /* x = (double)(long)x */
32334 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
32335 expand_fix (xi
, res
, 0);
32336 expand_float (res
, xi
, 0);
32338 if (HONOR_SIGNED_ZEROS (mode
))
32339 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
32341 emit_label (label
);
32342 LABEL_NUSES (label
) = 1;
32344 emit_move_insn (operand0
, res
);
32347 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32350 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
32352 enum machine_mode mode
= GET_MODE (operand0
);
32353 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
32355 /* C code for SSE variant we expand below.
32356 double xa = fabs (x), x2;
32357 if (!isless (xa, TWO52))
32359 xa2 = xa + TWO52 - TWO52;
32363 x2 = copysign (xa2, x);
32367 TWO52
= ix86_gen_TWO52 (mode
);
32369 /* Temporary for holding the result, initialized to the input
32370 operand to ease control flow. */
32371 res
= gen_reg_rtx (mode
);
32372 emit_move_insn (res
, operand1
);
32374 /* xa = abs (operand1) */
32375 xa
= ix86_expand_sse_fabs (res
, &smask
);
32377 /* if (!isless (xa, TWO52)) goto label; */
32378 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
32380 /* res = xa + TWO52 - TWO52; */
32381 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
32382 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
32383 emit_move_insn (res
, tmp
);
32386 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
32388 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
32389 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
32390 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
32391 gen_rtx_AND (mode
, mask
, one
)));
32392 tmp
= expand_simple_binop (mode
, MINUS
,
32393 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
32394 emit_move_insn (res
, tmp
);
32396 /* res = copysign (res, operand1) */
32397 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
32399 emit_label (label
);
32400 LABEL_NUSES (label
) = 1;
32402 emit_move_insn (operand0
, res
);
32405 /* Expand SSE sequence for computing round from OPERAND1 storing
32408 ix86_expand_round (rtx operand0
, rtx operand1
)
32410 /* C code for the stuff we're doing below:
32411 double xa = fabs (x);
32412 if (!isless (xa, TWO52))
32414 xa = (double)(long)(xa + nextafter (0.5, 0.0));
32415 return copysign (xa, x);
32417 enum machine_mode mode
= GET_MODE (operand0
);
32418 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
32419 const struct real_format
*fmt
;
32420 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
32422 /* Temporary for holding the result, initialized to the input
32423 operand to ease control flow. */
32424 res
= gen_reg_rtx (mode
);
32425 emit_move_insn (res
, operand1
);
32427 TWO52
= ix86_gen_TWO52 (mode
);
32428 xa
= ix86_expand_sse_fabs (res
, &mask
);
32429 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
32431 /* load nextafter (0.5, 0.0) */
32432 fmt
= REAL_MODE_FORMAT (mode
);
32433 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
32434 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
32436 /* xa = xa + 0.5 */
32437 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
32438 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
32440 /* xa = (double)(int64_t)xa */
32441 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
32442 expand_fix (xi
, xa
, 0);
32443 expand_float (xa
, xi
, 0);
32445 /* res = copysign (xa, operand1) */
32446 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
32448 emit_label (label
);
32449 LABEL_NUSES (label
) = 1;
32451 emit_move_insn (operand0
, res
);
32455 /* Table of valid machine attributes. */
32456 static const struct attribute_spec ix86_attribute_table
[] =
32458 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
32459 affects_type_identity } */
32460 /* Stdcall attribute says callee is responsible for popping arguments
32461 if they are not variable. */
32462 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
32464 /* Fastcall attribute says callee is responsible for popping arguments
32465 if they are not variable. */
32466 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
32468 /* Thiscall attribute says callee is responsible for popping arguments
32469 if they are not variable. */
32470 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
32472 /* Cdecl attribute says the callee is a normal C declaration */
32473 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
32475 /* Regparm attribute specifies how many integer arguments are to be
32476 passed in registers. */
32477 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
32479 /* Sseregparm attribute says we are using x86_64 calling conventions
32480 for FP arguments. */
32481 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
32483 /* force_align_arg_pointer says this function realigns the stack at entry. */
32484 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
32485 false, true, true, ix86_handle_cconv_attribute
, false },
32486 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32487 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
32488 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
32489 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
32492 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
32494 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
32496 #ifdef SUBTARGET_ATTRIBUTE_TABLE
32497 SUBTARGET_ATTRIBUTE_TABLE
,
32499 /* ms_abi and sysv_abi calling convention function attributes. */
32500 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
32501 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
32502 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
32504 { "callee_pop_aggregate_return", 1, 1, false, true, true,
32505 ix86_handle_callee_pop_aggregate_return
, true },
32507 { NULL
, 0, 0, false, false, false, NULL
, false }
32510 /* Implement targetm.vectorize.builtin_vectorization_cost. */
32512 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
32513 tree vectype ATTRIBUTE_UNUSED
,
32514 int misalign ATTRIBUTE_UNUSED
)
32516 switch (type_of_cost
)
32519 return ix86_cost
->scalar_stmt_cost
;
32522 return ix86_cost
->scalar_load_cost
;
32525 return ix86_cost
->scalar_store_cost
;
32528 return ix86_cost
->vec_stmt_cost
;
32531 return ix86_cost
->vec_align_load_cost
;
32534 return ix86_cost
->vec_store_cost
;
32536 case vec_to_scalar
:
32537 return ix86_cost
->vec_to_scalar_cost
;
32539 case scalar_to_vec
:
32540 return ix86_cost
->scalar_to_vec_cost
;
32542 case unaligned_load
:
32543 case unaligned_store
:
32544 return ix86_cost
->vec_unalign_load_cost
;
32546 case cond_branch_taken
:
32547 return ix86_cost
->cond_taken_branch_cost
;
32549 case cond_branch_not_taken
:
32550 return ix86_cost
->cond_not_taken_branch_cost
;
32556 gcc_unreachable ();
32561 /* Implement targetm.vectorize.builtin_vec_perm. */
32564 ix86_vectorize_builtin_vec_perm (tree vec_type
, tree
*mask_type
)
32566 tree itype
= TREE_TYPE (vec_type
);
32567 bool u
= TYPE_UNSIGNED (itype
);
32568 enum machine_mode vmode
= TYPE_MODE (vec_type
);
32569 enum ix86_builtins fcode
;
32570 bool ok
= TARGET_SSE2
;
32576 fcode
= IX86_BUILTIN_VEC_PERM_V4DF
;
32579 fcode
= IX86_BUILTIN_VEC_PERM_V2DF
;
32581 itype
= ix86_get_builtin_type (IX86_BT_DI
);
32586 fcode
= IX86_BUILTIN_VEC_PERM_V8SF
;
32590 fcode
= IX86_BUILTIN_VEC_PERM_V4SF
;
32592 itype
= ix86_get_builtin_type (IX86_BT_SI
);
32596 fcode
= u
? IX86_BUILTIN_VEC_PERM_V2DI_U
: IX86_BUILTIN_VEC_PERM_V2DI
;
32599 fcode
= u
? IX86_BUILTIN_VEC_PERM_V4SI_U
: IX86_BUILTIN_VEC_PERM_V4SI
;
32602 fcode
= u
? IX86_BUILTIN_VEC_PERM_V8HI_U
: IX86_BUILTIN_VEC_PERM_V8HI
;
32605 fcode
= u
? IX86_BUILTIN_VEC_PERM_V16QI_U
: IX86_BUILTIN_VEC_PERM_V16QI
;
32615 *mask_type
= itype
;
32616 return ix86_builtins
[(int) fcode
];
32619 /* Return a vector mode with twice as many elements as VMODE. */
32620 /* ??? Consider moving this to a table generated by genmodes.c. */
32622 static enum machine_mode
32623 doublesize_vector_mode (enum machine_mode vmode
)
32627 case V2SFmode
: return V4SFmode
;
32628 case V1DImode
: return V2DImode
;
32629 case V2SImode
: return V4SImode
;
32630 case V4HImode
: return V8HImode
;
32631 case V8QImode
: return V16QImode
;
32633 case V2DFmode
: return V4DFmode
;
32634 case V4SFmode
: return V8SFmode
;
32635 case V2DImode
: return V4DImode
;
32636 case V4SImode
: return V8SImode
;
32637 case V8HImode
: return V16HImode
;
32638 case V16QImode
: return V32QImode
;
32640 case V4DFmode
: return V8DFmode
;
32641 case V8SFmode
: return V16SFmode
;
32642 case V4DImode
: return V8DImode
;
32643 case V8SImode
: return V16SImode
;
32644 case V16HImode
: return V32HImode
;
32645 case V32QImode
: return V64QImode
;
32648 gcc_unreachable ();
32652 /* Construct (set target (vec_select op0 (parallel perm))) and
32653 return true if that's a valid instruction in the active ISA. */
32656 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
, unsigned nelt
)
32658 rtx rperm
[MAX_VECT_LEN
], x
;
32661 for (i
= 0; i
< nelt
; ++i
)
32662 rperm
[i
] = GEN_INT (perm
[i
]);
32664 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nelt
, rperm
));
32665 x
= gen_rtx_VEC_SELECT (GET_MODE (target
), op0
, x
);
32666 x
= gen_rtx_SET (VOIDmode
, target
, x
);
32669 if (recog_memoized (x
) < 0)
32677 /* Similar, but generate a vec_concat from op0 and op1 as well. */
32680 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
32681 const unsigned char *perm
, unsigned nelt
)
32683 enum machine_mode v2mode
;
32686 v2mode
= doublesize_vector_mode (GET_MODE (op0
));
32687 x
= gen_rtx_VEC_CONCAT (v2mode
, op0
, op1
);
32688 return expand_vselect (target
, x
, perm
, nelt
);
32691 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32692 in terms of blendp[sd] / pblendw / pblendvb. */
32695 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
32697 enum machine_mode vmode
= d
->vmode
;
32698 unsigned i
, mask
, nelt
= d
->nelt
;
32699 rtx target
, op0
, op1
, x
;
32701 if (!TARGET_SSE4_1
|| d
->op0
== d
->op1
)
32703 if (!(GET_MODE_SIZE (vmode
) == 16 || vmode
== V4DFmode
|| vmode
== V8SFmode
))
32706 /* This is a blend, not a permute. Elements must stay in their
32707 respective lanes. */
32708 for (i
= 0; i
< nelt
; ++i
)
32710 unsigned e
= d
->perm
[i
];
32711 if (!(e
== i
|| e
== i
+ nelt
))
32718 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
32719 decision should be extracted elsewhere, so that we only try that
32720 sequence once all budget==3 options have been tried. */
32722 /* For bytes, see if bytes move in pairs so we can use pblendw with
32723 an immediate argument, rather than pblendvb with a vector argument. */
32724 if (vmode
== V16QImode
)
32726 bool pblendw_ok
= true;
32727 for (i
= 0; i
< 16 && pblendw_ok
; i
+= 2)
32728 pblendw_ok
= (d
->perm
[i
] + 1 == d
->perm
[i
+ 1]);
32732 rtx rperm
[16], vperm
;
32734 for (i
= 0; i
< nelt
; ++i
)
32735 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
32737 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
));
32738 vperm
= force_reg (V16QImode
, vperm
);
32740 emit_insn (gen_sse4_1_pblendvb (d
->target
, d
->op0
, d
->op1
, vperm
));
32745 target
= d
->target
;
32757 for (i
= 0; i
< nelt
; ++i
)
32758 mask
|= (d
->perm
[i
] >= nelt
) << i
;
32762 for (i
= 0; i
< 2; ++i
)
32763 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
32767 for (i
= 0; i
< 4; ++i
)
32768 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
32772 for (i
= 0; i
< 8; ++i
)
32773 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
32777 target
= gen_lowpart (vmode
, target
);
32778 op0
= gen_lowpart (vmode
, op0
);
32779 op1
= gen_lowpart (vmode
, op1
);
32783 gcc_unreachable ();
32786 /* This matches five different patterns with the different modes. */
32787 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
32788 x
= gen_rtx_SET (VOIDmode
, target
, x
);
32794 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32795 in terms of the variable form of vpermilps.
32797 Note that we will have already failed the immediate input vpermilps,
32798 which requires that the high and low part shuffle be identical; the
32799 variable form doesn't require that. */
32802 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
32804 rtx rperm
[8], vperm
;
32807 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| d
->op0
!= d
->op1
)
32810 /* We can only permute within the 128-bit lane. */
32811 for (i
= 0; i
< 8; ++i
)
32813 unsigned e
= d
->perm
[i
];
32814 if (i
< 4 ? e
>= 4 : e
< 4)
32821 for (i
= 0; i
< 8; ++i
)
32823 unsigned e
= d
->perm
[i
];
32825 /* Within each 128-bit lane, the elements of op0 are numbered
32826 from 0 and the elements of op1 are numbered from 4. */
32832 rperm
[i
] = GEN_INT (e
);
32835 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
32836 vperm
= force_reg (V8SImode
, vperm
);
32837 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
32842 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32843 in terms of pshufb or vpperm. */
32846 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
32848 unsigned i
, nelt
, eltsz
;
32849 rtx rperm
[16], vperm
, target
, op0
, op1
;
32851 if (!(d
->op0
== d
->op1
? TARGET_SSSE3
: TARGET_XOP
))
32853 if (GET_MODE_SIZE (d
->vmode
) != 16)
32860 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
32862 for (i
= 0; i
< nelt
; ++i
)
32864 unsigned j
, e
= d
->perm
[i
];
32865 for (j
= 0; j
< eltsz
; ++j
)
32866 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
32869 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
));
32870 vperm
= force_reg (V16QImode
, vperm
);
32872 target
= gen_lowpart (V16QImode
, d
->target
);
32873 op0
= gen_lowpart (V16QImode
, d
->op0
);
32874 if (d
->op0
== d
->op1
)
32875 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
32878 op1
= gen_lowpart (V16QImode
, d
->op1
);
32879 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
32885 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
32886 in a single instruction. */
32889 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
32891 unsigned i
, nelt
= d
->nelt
;
32892 unsigned char perm2
[MAX_VECT_LEN
];
32894 /* Check plain VEC_SELECT first, because AVX has instructions that could
32895 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
32896 input where SEL+CONCAT may not. */
32897 if (d
->op0
== d
->op1
)
32899 int mask
= nelt
- 1;
32901 for (i
= 0; i
< nelt
; i
++)
32902 perm2
[i
] = d
->perm
[i
] & mask
;
32904 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
))
32907 /* There are plenty of patterns in sse.md that are written for
32908 SEL+CONCAT and are not replicated for a single op. Perhaps
32909 that should be changed, to avoid the nastiness here. */
32911 /* Recognize interleave style patterns, which means incrementing
32912 every other permutation operand. */
32913 for (i
= 0; i
< nelt
; i
+= 2)
32915 perm2
[i
] = d
->perm
[i
] & mask
;
32916 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
32918 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
))
32921 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
32924 for (i
= 0; i
< nelt
; i
+= 4)
32926 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
32927 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
32928 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
32929 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
32932 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
))
32937 /* Finally, try the fully general two operand permute. */
32938 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
))
32941 /* Recognize interleave style patterns with reversed operands. */
32942 if (d
->op0
!= d
->op1
)
32944 for (i
= 0; i
< nelt
; ++i
)
32946 unsigned e
= d
->perm
[i
];
32954 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
))
32958 /* Try the SSE4.1 blend variable merge instructions. */
32959 if (expand_vec_perm_blend (d
))
32962 /* Try one of the AVX vpermil variable permutations. */
32963 if (expand_vec_perm_vpermil (d
))
32966 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
32967 if (expand_vec_perm_pshufb (d
))
32973 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32974 in terms of a pair of pshuflw + pshufhw instructions. */
32977 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
32979 unsigned char perm2
[MAX_VECT_LEN
];
32983 if (d
->vmode
!= V8HImode
|| d
->op0
!= d
->op1
)
32986 /* The two permutations only operate in 64-bit lanes. */
32987 for (i
= 0; i
< 4; ++i
)
32988 if (d
->perm
[i
] >= 4)
32990 for (i
= 4; i
< 8; ++i
)
32991 if (d
->perm
[i
] < 4)
32997 /* Emit the pshuflw. */
32998 memcpy (perm2
, d
->perm
, 4);
32999 for (i
= 4; i
< 8; ++i
)
33001 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8);
33004 /* Emit the pshufhw. */
33005 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
33006 for (i
= 0; i
< 4; ++i
)
33008 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8);
33014 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33015 the permutation using the SSSE3 palignr instruction. This succeeds
33016 when all of the elements in PERM fit within one vector and we merely
33017 need to shift them down so that a single vector permutation has a
33018 chance to succeed. */
33021 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
33023 unsigned i
, nelt
= d
->nelt
;
33028 /* Even with AVX, palignr only operates on 128-bit vectors. */
33029 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
33032 min
= nelt
, max
= 0;
33033 for (i
= 0; i
< nelt
; ++i
)
33035 unsigned e
= d
->perm
[i
];
33041 if (min
== 0 || max
- min
>= nelt
)
33044 /* Given that we have SSSE3, we know we'll be able to implement the
33045 single operand permutation after the palignr with pshufb. */
33049 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
33050 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
33051 gen_lowpart (TImode
, d
->op1
),
33052 gen_lowpart (TImode
, d
->op0
), shift
));
33054 d
->op0
= d
->op1
= d
->target
;
33057 for (i
= 0; i
< nelt
; ++i
)
33059 unsigned e
= d
->perm
[i
] - min
;
33065 /* Test for the degenerate case where the alignment by itself
33066 produces the desired permutation. */
33070 ok
= expand_vec_perm_1 (d
);
33076 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33077 a two vector permutation into a single vector permutation by using
33078 an interleave operation to merge the vectors. */
33081 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
33083 struct expand_vec_perm_d dremap
, dfinal
;
33084 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
33085 unsigned contents
, h1
, h2
, h3
, h4
;
33086 unsigned char remap
[2 * MAX_VECT_LEN
];
33090 if (d
->op0
== d
->op1
)
33093 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
33094 lanes. We can use similar techniques with the vperm2f128 instruction,
33095 but it requires slightly different logic. */
33096 if (GET_MODE_SIZE (d
->vmode
) != 16)
33099 /* Examine from whence the elements come. */
33101 for (i
= 0; i
< nelt
; ++i
)
33102 contents
|= 1u << d
->perm
[i
];
33104 /* Split the two input vectors into 4 halves. */
33105 h1
= (1u << nelt2
) - 1;
33110 memset (remap
, 0xff, sizeof (remap
));
33113 /* If the elements from the low halves use interleave low, and similarly
33114 for interleave high. If the elements are from mis-matched halves, we
33115 can use shufps for V4SF/V4SI or do a DImode shuffle. */
33116 if ((contents
& (h1
| h3
)) == contents
)
33118 for (i
= 0; i
< nelt2
; ++i
)
33121 remap
[i
+ nelt
] = i
* 2 + 1;
33122 dremap
.perm
[i
* 2] = i
;
33123 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
33126 else if ((contents
& (h2
| h4
)) == contents
)
33128 for (i
= 0; i
< nelt2
; ++i
)
33130 remap
[i
+ nelt2
] = i
* 2;
33131 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
33132 dremap
.perm
[i
* 2] = i
+ nelt2
;
33133 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
33136 else if ((contents
& (h1
| h4
)) == contents
)
33138 for (i
= 0; i
< nelt2
; ++i
)
33141 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
33142 dremap
.perm
[i
] = i
;
33143 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
33147 dremap
.vmode
= V2DImode
;
33149 dremap
.perm
[0] = 0;
33150 dremap
.perm
[1] = 3;
33153 else if ((contents
& (h2
| h3
)) == contents
)
33155 for (i
= 0; i
< nelt2
; ++i
)
33157 remap
[i
+ nelt2
] = i
;
33158 remap
[i
+ nelt
] = i
+ nelt2
;
33159 dremap
.perm
[i
] = i
+ nelt2
;
33160 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
33164 dremap
.vmode
= V2DImode
;
33166 dremap
.perm
[0] = 1;
33167 dremap
.perm
[1] = 2;
33173 /* Use the remapping array set up above to move the elements from their
33174 swizzled locations into their final destinations. */
33176 for (i
= 0; i
< nelt
; ++i
)
33178 unsigned e
= remap
[d
->perm
[i
]];
33179 gcc_assert (e
< nelt
);
33180 dfinal
.perm
[i
] = e
;
33182 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
33183 dfinal
.op1
= dfinal
.op0
;
33184 dremap
.target
= dfinal
.op0
;
33186 /* Test if the final remap can be done with a single insn. For V4SFmode or
33187 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
33189 ok
= expand_vec_perm_1 (&dfinal
);
33190 seq
= get_insns ();
33196 if (dremap
.vmode
!= dfinal
.vmode
)
33198 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
33199 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
33200 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
33203 ok
= expand_vec_perm_1 (&dremap
);
33210 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
33211 permutation with two pshufb insns and an ior. We should have already
33212 failed all two instruction sequences. */
33215 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
33217 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
33218 unsigned int i
, nelt
, eltsz
;
33220 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
33222 gcc_assert (d
->op0
!= d
->op1
);
33225 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
33227 /* Generate two permutation masks. If the required element is within
33228 the given vector it is shuffled into the proper lane. If the required
33229 element is in the other vector, force a zero into the lane by setting
33230 bit 7 in the permutation mask. */
33231 m128
= GEN_INT (-128);
33232 for (i
= 0; i
< nelt
; ++i
)
33234 unsigned j
, e
= d
->perm
[i
];
33235 unsigned which
= (e
>= nelt
);
33239 for (j
= 0; j
< eltsz
; ++j
)
33241 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
33242 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
33246 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
33247 vperm
= force_reg (V16QImode
, vperm
);
33249 l
= gen_reg_rtx (V16QImode
);
33250 op
= gen_lowpart (V16QImode
, d
->op0
);
33251 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
33253 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
33254 vperm
= force_reg (V16QImode
, vperm
);
33256 h
= gen_reg_rtx (V16QImode
);
33257 op
= gen_lowpart (V16QImode
, d
->op1
);
33258 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
33260 op
= gen_lowpart (V16QImode
, d
->target
);
33261 emit_insn (gen_iorv16qi3 (op
, l
, h
));
33266 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
33267 and extract-odd permutations. */
33270 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
33277 t1
= gen_reg_rtx (V4DFmode
);
33278 t2
= gen_reg_rtx (V4DFmode
);
33280 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
33281 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
33282 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
33284 /* Now an unpck[lh]pd will produce the result required. */
33286 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
33288 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
33294 int mask
= odd
? 0xdd : 0x88;
33296 t1
= gen_reg_rtx (V8SFmode
);
33297 t2
= gen_reg_rtx (V8SFmode
);
33298 t3
= gen_reg_rtx (V8SFmode
);
33300 /* Shuffle within the 128-bit lanes to produce:
33301 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
33302 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
33305 /* Shuffle the lanes around to produce:
33306 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
33307 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
33310 /* Shuffle within the 128-bit lanes to produce:
33311 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
33312 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
33314 /* Shuffle within the 128-bit lanes to produce:
33315 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
33316 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
33318 /* Shuffle the lanes around to produce:
33319 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
33320 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
33329 /* These are always directly implementable by expand_vec_perm_1. */
33330 gcc_unreachable ();
33334 return expand_vec_perm_pshufb2 (d
);
33337 /* We need 2*log2(N)-1 operations to achieve odd/even
33338 with interleave. */
33339 t1
= gen_reg_rtx (V8HImode
);
33340 t2
= gen_reg_rtx (V8HImode
);
33341 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
33342 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
33343 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
33344 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
33346 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
33348 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
33355 return expand_vec_perm_pshufb2 (d
);
33358 t1
= gen_reg_rtx (V16QImode
);
33359 t2
= gen_reg_rtx (V16QImode
);
33360 t3
= gen_reg_rtx (V16QImode
);
33361 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
33362 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
33363 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
33364 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
33365 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
33366 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
33368 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
33370 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
33376 gcc_unreachable ();
33382 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33383 extract-even and extract-odd permutations. */
33386 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
33388 unsigned i
, odd
, nelt
= d
->nelt
;
33391 if (odd
!= 0 && odd
!= 1)
33394 for (i
= 1; i
< nelt
; ++i
)
33395 if (d
->perm
[i
] != 2 * i
+ odd
)
33398 return expand_vec_perm_even_odd_1 (d
, odd
);
33401 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
33402 permutations. We assume that expand_vec_perm_1 has already failed. */
33405 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
33407 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
33408 enum machine_mode vmode
= d
->vmode
;
33409 unsigned char perm2
[4];
33417 /* These are special-cased in sse.md so that we can optionally
33418 use the vbroadcast instruction. They expand to two insns
33419 if the input happens to be in a register. */
33420 gcc_unreachable ();
33426 /* These are always implementable using standard shuffle patterns. */
33427 gcc_unreachable ();
33431 /* These can be implemented via interleave. We save one insn by
33432 stopping once we have promoted to V4SImode and then use pshufd. */
33435 optab otab
= vec_interleave_low_optab
;
33439 otab
= vec_interleave_high_optab
;
33444 op0
= expand_binop (vmode
, otab
, op0
, op0
, NULL
, 0, OPTAB_DIRECT
);
33445 vmode
= get_mode_wider_vector (vmode
);
33446 op0
= gen_lowpart (vmode
, op0
);
33448 while (vmode
!= V4SImode
);
33450 memset (perm2
, elt
, 4);
33451 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4);
33456 gcc_unreachable ();
33460 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33461 broadcast permutations. */
33464 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
33466 unsigned i
, elt
, nelt
= d
->nelt
;
33468 if (d
->op0
!= d
->op1
)
33472 for (i
= 1; i
< nelt
; ++i
)
33473 if (d
->perm
[i
] != elt
)
33476 return expand_vec_perm_broadcast_1 (d
);
33479 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
33480 With all of the interface bits taken care of, perform the expansion
33481 in D and return true on success. */
33484 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d
*d
)
33486 /* Try a single instruction expansion. */
33487 if (expand_vec_perm_1 (d
))
33490 /* Try sequences of two instructions. */
33492 if (expand_vec_perm_pshuflw_pshufhw (d
))
33495 if (expand_vec_perm_palignr (d
))
33498 if (expand_vec_perm_interleave2 (d
))
33501 if (expand_vec_perm_broadcast (d
))
33504 /* Try sequences of three instructions. */
33506 if (expand_vec_perm_pshufb2 (d
))
33509 /* ??? Look for narrow permutations whose element orderings would
33510 allow the promotion to a wider mode. */
33512 /* ??? Look for sequences of interleave or a wider permute that place
33513 the data into the correct lanes for a half-vector shuffle like
33514 pshuf[lh]w or vpermilps. */
33516 /* ??? Look for sequences of interleave that produce the desired results.
33517 The combinatorics of punpck[lh] get pretty ugly... */
33519 if (expand_vec_perm_even_odd (d
))
33525 /* Extract the values from the vector CST into the permutation array in D.
33526 Return 0 on error, 1 if all values from the permutation come from the
33527 first vector, 2 if all values from the second vector, and 3 otherwise. */
33530 extract_vec_perm_cst (struct expand_vec_perm_d
*d
, tree cst
)
33532 tree list
= TREE_VECTOR_CST_ELTS (cst
);
33533 unsigned i
, nelt
= d
->nelt
;
33536 for (i
= 0; i
< nelt
; ++i
, list
= TREE_CHAIN (list
))
33538 unsigned HOST_WIDE_INT e
;
33540 if (!host_integerp (TREE_VALUE (list
), 1))
33542 e
= tree_low_cst (TREE_VALUE (list
), 1);
33546 ret
|= (e
< nelt
? 1 : 2);
33549 gcc_assert (list
== NULL
);
33551 /* For all elements from second vector, fold the elements to first. */
33553 for (i
= 0; i
< nelt
; ++i
)
33554 d
->perm
[i
] -= nelt
;
33560 ix86_expand_vec_perm_builtin (tree exp
)
33562 struct expand_vec_perm_d d
;
33563 tree arg0
, arg1
, arg2
;
33565 arg0
= CALL_EXPR_ARG (exp
, 0);
33566 arg1
= CALL_EXPR_ARG (exp
, 1);
33567 arg2
= CALL_EXPR_ARG (exp
, 2);
33569 d
.vmode
= TYPE_MODE (TREE_TYPE (arg0
));
33570 d
.nelt
= GET_MODE_NUNITS (d
.vmode
);
33571 d
.testing_p
= false;
33572 gcc_assert (VECTOR_MODE_P (d
.vmode
));
33574 if (TREE_CODE (arg2
) != VECTOR_CST
)
33576 error_at (EXPR_LOCATION (exp
),
33577 "vector permutation requires vector constant");
33581 switch (extract_vec_perm_cst (&d
, arg2
))
33587 error_at (EXPR_LOCATION (exp
), "invalid vector permutation constant");
33591 if (!operand_equal_p (arg0
, arg1
, 0))
33593 d
.op0
= expand_expr (arg0
, NULL_RTX
, d
.vmode
, EXPAND_NORMAL
);
33594 d
.op0
= force_reg (d
.vmode
, d
.op0
);
33595 d
.op1
= expand_expr (arg1
, NULL_RTX
, d
.vmode
, EXPAND_NORMAL
);
33596 d
.op1
= force_reg (d
.vmode
, d
.op1
);
33600 /* The elements of PERM do not suggest that only the first operand
33601 is used, but both operands are identical. Allow easier matching
33602 of the permutation by folding the permutation into the single
33605 unsigned i
, nelt
= d
.nelt
;
33606 for (i
= 0; i
< nelt
; ++i
)
33607 if (d
.perm
[i
] >= nelt
)
33613 d
.op0
= expand_expr (arg0
, NULL_RTX
, d
.vmode
, EXPAND_NORMAL
);
33614 d
.op0
= force_reg (d
.vmode
, d
.op0
);
33619 d
.op0
= expand_expr (arg1
, NULL_RTX
, d
.vmode
, EXPAND_NORMAL
);
33620 d
.op0
= force_reg (d
.vmode
, d
.op0
);
33625 d
.target
= gen_reg_rtx (d
.vmode
);
33626 if (ix86_expand_vec_perm_builtin_1 (&d
))
33629 /* For compiler generated permutations, we should never got here, because
33630 the compiler should also be checking the ok hook. But since this is a
33631 builtin the user has access too, so don't abort. */
33635 sorry ("vector permutation (%d %d)", d
.perm
[0], d
.perm
[1]);
33638 sorry ("vector permutation (%d %d %d %d)",
33639 d
.perm
[0], d
.perm
[1], d
.perm
[2], d
.perm
[3]);
33642 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
33643 d
.perm
[0], d
.perm
[1], d
.perm
[2], d
.perm
[3],
33644 d
.perm
[4], d
.perm
[5], d
.perm
[6], d
.perm
[7]);
33647 sorry ("vector permutation "
33648 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
33649 d
.perm
[0], d
.perm
[1], d
.perm
[2], d
.perm
[3],
33650 d
.perm
[4], d
.perm
[5], d
.perm
[6], d
.perm
[7],
33651 d
.perm
[8], d
.perm
[9], d
.perm
[10], d
.perm
[11],
33652 d
.perm
[12], d
.perm
[13], d
.perm
[14], d
.perm
[15]);
33655 gcc_unreachable ();
33658 return CONST0_RTX (d
.vmode
);
33661 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
33664 ix86_vectorize_builtin_vec_perm_ok (tree vec_type
, tree mask
)
33666 struct expand_vec_perm_d d
;
33670 d
.vmode
= TYPE_MODE (vec_type
);
33671 d
.nelt
= GET_MODE_NUNITS (d
.vmode
);
33672 d
.testing_p
= true;
33674 /* Given sufficient ISA support we can just return true here
33675 for selected vector modes. */
33676 if (GET_MODE_SIZE (d
.vmode
) == 16)
33678 /* All implementable with a single vpperm insn. */
33681 /* All implementable with 2 pshufb + 1 ior. */
33684 /* All implementable with shufpd or unpck[lh]pd. */
33689 vec_mask
= extract_vec_perm_cst (&d
, mask
);
33691 /* This hook is cannot be called in response to something that the
33692 user does (unlike the builtin expander) so we shouldn't ever see
33693 an error generated from the extract. */
33694 gcc_assert (vec_mask
> 0 && vec_mask
<= 3);
33695 one_vec
= (vec_mask
!= 3);
33697 /* Implementable with shufps or pshufd. */
33698 if (one_vec
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
33701 /* Otherwise we have to go through the motions and see if we can
33702 figure out how to generate the requested permutation. */
33703 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
33704 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
33706 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
33709 ret
= ix86_expand_vec_perm_builtin_1 (&d
);
33716 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
33718 struct expand_vec_perm_d d
;
33724 d
.vmode
= GET_MODE (targ
);
33725 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
33726 d
.testing_p
= false;
33728 for (i
= 0; i
< nelt
; ++i
)
33729 d
.perm
[i
] = i
* 2 + odd
;
33731 /* We'll either be able to implement the permutation directly... */
33732 if (expand_vec_perm_1 (&d
))
33735 /* ... or we use the special-case patterns. */
33736 expand_vec_perm_even_odd_1 (&d
, odd
);
33739 /* Expand an insert into a vector register through pinsr insn.
33740 Return true if successful. */
33743 ix86_expand_pinsr (rtx
*operands
)
33745 rtx dst
= operands
[0];
33746 rtx src
= operands
[3];
33748 unsigned int size
= INTVAL (operands
[1]);
33749 unsigned int pos
= INTVAL (operands
[2]);
33751 if (GET_CODE (dst
) == SUBREG
)
33753 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
33754 dst
= SUBREG_REG (dst
);
33757 if (GET_CODE (src
) == SUBREG
)
33758 src
= SUBREG_REG (src
);
33760 switch (GET_MODE (dst
))
33767 enum machine_mode srcmode
, dstmode
;
33768 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
33770 srcmode
= mode_for_size (size
, MODE_INT
, 0);
33775 if (!TARGET_SSE4_1
)
33777 dstmode
= V16QImode
;
33778 pinsr
= gen_sse4_1_pinsrb
;
33784 dstmode
= V8HImode
;
33785 pinsr
= gen_sse2_pinsrw
;
33789 if (!TARGET_SSE4_1
)
33791 dstmode
= V4SImode
;
33792 pinsr
= gen_sse4_1_pinsrd
;
33796 gcc_assert (TARGET_64BIT
);
33797 if (!TARGET_SSE4_1
)
33799 dstmode
= V2DImode
;
33800 pinsr
= gen_sse4_1_pinsrq
;
33807 dst
= gen_lowpart (dstmode
, dst
);
33808 src
= gen_lowpart (srcmode
, src
);
33812 emit_insn (pinsr (dst
, dst
, src
, GEN_INT (1 << pos
)));
33821 /* This function returns the calling abi specific va_list type node.
33822 It returns the FNDECL specific va_list type. */
33825 ix86_fn_abi_va_list (tree fndecl
)
33828 return va_list_type_node
;
33829 gcc_assert (fndecl
!= NULL_TREE
);
33831 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
33832 return ms_va_list_type_node
;
33834 return sysv_va_list_type_node
;
33837 /* Returns the canonical va_list type specified by TYPE. If there
33838 is no valid TYPE provided, it return NULL_TREE. */
33841 ix86_canonical_va_list_type (tree type
)
33845 /* Resolve references and pointers to va_list type. */
33846 if (TREE_CODE (type
) == MEM_REF
)
33847 type
= TREE_TYPE (type
);
33848 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
33849 type
= TREE_TYPE (type
);
33850 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
33851 type
= TREE_TYPE (type
);
33853 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
33855 wtype
= va_list_type_node
;
33856 gcc_assert (wtype
!= NULL_TREE
);
33858 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
33860 /* If va_list is an array type, the argument may have decayed
33861 to a pointer type, e.g. by being passed to another function.
33862 In that case, unwrap both types so that we can compare the
33863 underlying records. */
33864 if (TREE_CODE (htype
) == ARRAY_TYPE
33865 || POINTER_TYPE_P (htype
))
33867 wtype
= TREE_TYPE (wtype
);
33868 htype
= TREE_TYPE (htype
);
33871 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
33872 return va_list_type_node
;
33873 wtype
= sysv_va_list_type_node
;
33874 gcc_assert (wtype
!= NULL_TREE
);
33876 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
33878 /* If va_list is an array type, the argument may have decayed
33879 to a pointer type, e.g. by being passed to another function.
33880 In that case, unwrap both types so that we can compare the
33881 underlying records. */
33882 if (TREE_CODE (htype
) == ARRAY_TYPE
33883 || POINTER_TYPE_P (htype
))
33885 wtype
= TREE_TYPE (wtype
);
33886 htype
= TREE_TYPE (htype
);
33889 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
33890 return sysv_va_list_type_node
;
33891 wtype
= ms_va_list_type_node
;
33892 gcc_assert (wtype
!= NULL_TREE
);
33894 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
33896 /* If va_list is an array type, the argument may have decayed
33897 to a pointer type, e.g. by being passed to another function.
33898 In that case, unwrap both types so that we can compare the
33899 underlying records. */
33900 if (TREE_CODE (htype
) == ARRAY_TYPE
33901 || POINTER_TYPE_P (htype
))
33903 wtype
= TREE_TYPE (wtype
);
33904 htype
= TREE_TYPE (htype
);
33907 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
33908 return ms_va_list_type_node
;
33911 return std_canonical_va_list_type (type
);
33914 /* Iterate through the target-specific builtin types for va_list.
33915 IDX denotes the iterator, *PTREE is set to the result type of
33916 the va_list builtin, and *PNAME to its internal type.
33917 Returns zero if there is no element for this index, otherwise
33918 IDX should be increased upon the next call.
33919 Note, do not iterate a base builtin's name like __builtin_va_list.
33920 Used from c_common_nodes_and_builtins. */
33923 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
33933 *ptree
= ms_va_list_type_node
;
33934 *pname
= "__builtin_ms_va_list";
33938 *ptree
= sysv_va_list_type_node
;
33939 *pname
= "__builtin_sysv_va_list";
33947 #undef TARGET_SCHED_DISPATCH
33948 #define TARGET_SCHED_DISPATCH has_dispatch
33949 #undef TARGET_SCHED_DISPATCH_DO
33950 #define TARGET_SCHED_DISPATCH_DO do_dispatch
33952 /* The size of the dispatch window is the total number of bytes of
33953 object code allowed in a window. */
33954 #define DISPATCH_WINDOW_SIZE 16
33956 /* Number of dispatch windows considered for scheduling. */
33957 #define MAX_DISPATCH_WINDOWS 3
33959 /* Maximum number of instructions in a window. */
33962 /* Maximum number of immediate operands in a window. */
33965 /* Maximum number of immediate bits allowed in a window. */
33966 #define MAX_IMM_SIZE 128
33968 /* Maximum number of 32 bit immediates allowed in a window. */
33969 #define MAX_IMM_32 4
33971 /* Maximum number of 64 bit immediates allowed in a window. */
33972 #define MAX_IMM_64 2
33974 /* Maximum total of loads or prefetches allowed in a window. */
33977 /* Maximum total of stores allowed in a window. */
33978 #define MAX_STORE 1
33984 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
33985 enum dispatch_group
{
34000 /* Number of allowable groups in a dispatch window. It is an array
34001 indexed by dispatch_group enum. 100 is used as a big number,
34002 because the number of these kind of operations does not have any
34003 effect in dispatch window, but we need them for other reasons in
34005 static unsigned int num_allowable_groups
[disp_last
] = {
34006 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
34009 char group_name
[disp_last
+ 1][16] = {
34010 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
34011 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
34012 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
34015 /* Instruction path. */
34018 path_single
, /* Single micro op. */
34019 path_double
, /* Double micro op. */
34020 path_multi
, /* Instructions with more than 2 micro op.. */
34024 /* sched_insn_info defines a window to the instructions scheduled in
34025 the basic block. It contains a pointer to the insn_info table and
34026 the instruction scheduled.
34028 Windows are allocated for each basic block and are linked
34030 typedef struct sched_insn_info_s
{
34032 enum dispatch_group group
;
34033 enum insn_path path
;
34038 /* Linked list of dispatch windows. This is a two way list of
34039 dispatch windows of a basic block. It contains information about
34040 the number of uops in the window and the total number of
34041 instructions and of bytes in the object code for this dispatch
34043 typedef struct dispatch_windows_s
{
34044 int num_insn
; /* Number of insn in the window. */
34045 int num_uops
; /* Number of uops in the window. */
34046 int window_size
; /* Number of bytes in the window. */
34047 int window_num
; /* Window number between 0 or 1. */
34048 int num_imm
; /* Number of immediates in an insn. */
34049 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
34050 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
34051 int imm_size
; /* Total immediates in the window. */
34052 int num_loads
; /* Total memory loads in the window. */
34053 int num_stores
; /* Total memory stores in the window. */
34054 int violation
; /* Violation exists in window. */
34055 sched_insn_info
*window
; /* Pointer to the window. */
34056 struct dispatch_windows_s
*next
;
34057 struct dispatch_windows_s
*prev
;
34058 } dispatch_windows
;
34060 /* Immediate valuse used in an insn. */
34061 typedef struct imm_info_s
34068 static dispatch_windows
*dispatch_window_list
;
34069 static dispatch_windows
*dispatch_window_list1
;
34071 /* Get dispatch group of insn. */
34073 static enum dispatch_group
34074 get_mem_group (rtx insn
)
34076 enum attr_memory memory
;
34078 if (INSN_CODE (insn
) < 0)
34079 return disp_no_group
;
34080 memory
= get_attr_memory (insn
);
34081 if (memory
== MEMORY_STORE
)
34084 if (memory
== MEMORY_LOAD
)
34087 if (memory
== MEMORY_BOTH
)
34088 return disp_load_store
;
34090 return disp_no_group
;
34093 /* Return true if insn is a compare instruction. */
34098 enum attr_type type
;
34100 type
= get_attr_type (insn
);
34101 return (type
== TYPE_TEST
34102 || type
== TYPE_ICMP
34103 || type
== TYPE_FCMP
34104 || GET_CODE (PATTERN (insn
)) == COMPARE
);
34107 /* Return true if a dispatch violation encountered. */
34110 dispatch_violation (void)
34112 if (dispatch_window_list
->next
)
34113 return dispatch_window_list
->next
->violation
;
34114 return dispatch_window_list
->violation
;
34117 /* Return true if insn is a branch instruction. */
34120 is_branch (rtx insn
)
34122 return (CALL_P (insn
) || JUMP_P (insn
));
34125 /* Return true if insn is a prefetch instruction. */
34128 is_prefetch (rtx insn
)
34130 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
34133 /* This function initializes a dispatch window and the list container holding a
34134 pointer to the window. */
34137 init_window (int window_num
)
34140 dispatch_windows
*new_list
;
34142 if (window_num
== 0)
34143 new_list
= dispatch_window_list
;
34145 new_list
= dispatch_window_list1
;
34147 new_list
->num_insn
= 0;
34148 new_list
->num_uops
= 0;
34149 new_list
->window_size
= 0;
34150 new_list
->next
= NULL
;
34151 new_list
->prev
= NULL
;
34152 new_list
->window_num
= window_num
;
34153 new_list
->num_imm
= 0;
34154 new_list
->num_imm_32
= 0;
34155 new_list
->num_imm_64
= 0;
34156 new_list
->imm_size
= 0;
34157 new_list
->num_loads
= 0;
34158 new_list
->num_stores
= 0;
34159 new_list
->violation
= false;
34161 for (i
= 0; i
< MAX_INSN
; i
++)
34163 new_list
->window
[i
].insn
= NULL
;
34164 new_list
->window
[i
].group
= disp_no_group
;
34165 new_list
->window
[i
].path
= no_path
;
34166 new_list
->window
[i
].byte_len
= 0;
34167 new_list
->window
[i
].imm_bytes
= 0;
34172 /* This function allocates and initializes a dispatch window and the
34173 list container holding a pointer to the window. */
34175 static dispatch_windows
*
34176 allocate_window (void)
34178 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
34179 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
34184 /* This routine initializes the dispatch scheduling information. It
34185 initiates building dispatch scheduler tables and constructs the
34186 first dispatch window. */
34189 init_dispatch_sched (void)
34191 /* Allocate a dispatch list and a window. */
34192 dispatch_window_list
= allocate_window ();
34193 dispatch_window_list1
= allocate_window ();
34198 /* This function returns true if a branch is detected. End of a basic block
34199 does not have to be a branch, but here we assume only branches end a
34203 is_end_basic_block (enum dispatch_group group
)
34205 return group
== disp_branch
;
34208 /* This function is called when the end of a window processing is reached. */
34211 process_end_window (void)
34213 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
34214 if (dispatch_window_list
->next
)
34216 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
34217 gcc_assert (dispatch_window_list
->window_size
34218 + dispatch_window_list1
->window_size
<= 48);
34224 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
34225 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
34226 for 48 bytes of instructions. Note that these windows are not dispatch
34227 windows that their sizes are DISPATCH_WINDOW_SIZE. */
34229 static dispatch_windows
*
34230 allocate_next_window (int window_num
)
34232 if (window_num
== 0)
34234 if (dispatch_window_list
->next
)
34237 return dispatch_window_list
;
34240 dispatch_window_list
->next
= dispatch_window_list1
;
34241 dispatch_window_list1
->prev
= dispatch_window_list
;
34243 return dispatch_window_list1
;
34246 /* Increment the number of immediate operands of an instruction. */
34249 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
34254 switch ( GET_CODE (*in_rtx
))
34259 (imm_values
->imm
)++;
34260 if (x86_64_immediate_operand (*in_rtx
, SImode
))
34261 (imm_values
->imm32
)++;
34263 (imm_values
->imm64
)++;
34267 (imm_values
->imm
)++;
34268 (imm_values
->imm64
)++;
34272 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
34274 (imm_values
->imm
)++;
34275 (imm_values
->imm32
)++;
34286 /* Compute number of immediate operands of an instruction. */
34289 find_constant (rtx in_rtx
, imm_info
*imm_values
)
34291 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
34292 (rtx_function
) find_constant_1
, (void *) imm_values
);
34295 /* Return total size of immediate operands of an instruction along with number
34296 of corresponding immediate-operands. It initializes its parameters to zero
34297 befor calling FIND_CONSTANT.
34298 INSN is the input instruction. IMM is the total of immediates.
34299 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
34303 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
34305 imm_info imm_values
= {0, 0, 0};
34307 find_constant (insn
, &imm_values
);
34308 *imm
= imm_values
.imm
;
34309 *imm32
= imm_values
.imm32
;
34310 *imm64
= imm_values
.imm64
;
34311 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
34314 /* This function indicates if an operand of an instruction is an
34318 has_immediate (rtx insn
)
34320 int num_imm_operand
;
34321 int num_imm32_operand
;
34322 int num_imm64_operand
;
34325 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
34326 &num_imm64_operand
);
34330 /* Return single or double path for instructions. */
34332 static enum insn_path
34333 get_insn_path (rtx insn
)
34335 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
34337 if ((int)path
== 0)
34338 return path_single
;
34340 if ((int)path
== 1)
34341 return path_double
;
34346 /* Return insn dispatch group. */
34348 static enum dispatch_group
34349 get_insn_group (rtx insn
)
34351 enum dispatch_group group
= get_mem_group (insn
);
34355 if (is_branch (insn
))
34356 return disp_branch
;
34361 if (has_immediate (insn
))
34364 if (is_prefetch (insn
))
34365 return disp_prefetch
;
34367 return disp_no_group
;
34370 /* Count number of GROUP restricted instructions in a dispatch
34371 window WINDOW_LIST. */
34374 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
34376 enum dispatch_group group
= get_insn_group (insn
);
34378 int num_imm_operand
;
34379 int num_imm32_operand
;
34380 int num_imm64_operand
;
34382 if (group
== disp_no_group
)
34385 if (group
== disp_imm
)
34387 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
34388 &num_imm64_operand
);
34389 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
34390 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
34391 || (num_imm32_operand
> 0
34392 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
34393 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
34394 || (num_imm64_operand
> 0
34395 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
34396 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
34397 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
34398 && num_imm64_operand
> 0
34399 && ((window_list
->num_imm_64
> 0
34400 && window_list
->num_insn
>= 2)
34401 || window_list
->num_insn
>= 3)))
34407 if ((group
== disp_load_store
34408 && (window_list
->num_loads
>= MAX_LOAD
34409 || window_list
->num_stores
>= MAX_STORE
))
34410 || ((group
== disp_load
34411 || group
== disp_prefetch
)
34412 && window_list
->num_loads
>= MAX_LOAD
)
34413 || (group
== disp_store
34414 && window_list
->num_stores
>= MAX_STORE
))
34420 /* This function returns true if insn satisfies dispatch rules on the
34421 last window scheduled. */
34424 fits_dispatch_window (rtx insn
)
34426 dispatch_windows
*window_list
= dispatch_window_list
;
34427 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
34428 unsigned int num_restrict
;
34429 enum dispatch_group group
= get_insn_group (insn
);
34430 enum insn_path path
= get_insn_path (insn
);
34433 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
34434 instructions should be given the lowest priority in the
34435 scheduling process in Haifa scheduler to make sure they will be
34436 scheduled in the same dispatch window as the refrence to them. */
34437 if (group
== disp_jcc
|| group
== disp_cmp
)
34440 /* Check nonrestricted. */
34441 if (group
== disp_no_group
|| group
== disp_branch
)
34444 /* Get last dispatch window. */
34445 if (window_list_next
)
34446 window_list
= window_list_next
;
34448 if (window_list
->window_num
== 1)
34450 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
34453 || (min_insn_size (insn
) + sum
) >= 48)
34454 /* Window 1 is full. Go for next window. */
34458 num_restrict
= count_num_restricted (insn
, window_list
);
34460 if (num_restrict
> num_allowable_groups
[group
])
34463 /* See if it fits in the first window. */
34464 if (window_list
->window_num
== 0)
34466 /* The first widow should have only single and double path
34468 if (path
== path_double
34469 && (window_list
->num_uops
+ 2) > MAX_INSN
)
34471 else if (path
!= path_single
)
34477 /* Add an instruction INSN with NUM_UOPS micro-operations to the
34478 dispatch window WINDOW_LIST. */
34481 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
34483 int byte_len
= min_insn_size (insn
);
34484 int num_insn
= window_list
->num_insn
;
34486 sched_insn_info
*window
= window_list
->window
;
34487 enum dispatch_group group
= get_insn_group (insn
);
34488 enum insn_path path
= get_insn_path (insn
);
34489 int num_imm_operand
;
34490 int num_imm32_operand
;
34491 int num_imm64_operand
;
34493 if (!window_list
->violation
&& group
!= disp_cmp
34494 && !fits_dispatch_window (insn
))
34495 window_list
->violation
= true;
34497 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
34498 &num_imm64_operand
);
34500 /* Initialize window with new instruction. */
34501 window
[num_insn
].insn
= insn
;
34502 window
[num_insn
].byte_len
= byte_len
;
34503 window
[num_insn
].group
= group
;
34504 window
[num_insn
].path
= path
;
34505 window
[num_insn
].imm_bytes
= imm_size
;
34507 window_list
->window_size
+= byte_len
;
34508 window_list
->num_insn
= num_insn
+ 1;
34509 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
34510 window_list
->imm_size
+= imm_size
;
34511 window_list
->num_imm
+= num_imm_operand
;
34512 window_list
->num_imm_32
+= num_imm32_operand
;
34513 window_list
->num_imm_64
+= num_imm64_operand
;
34515 if (group
== disp_store
)
34516 window_list
->num_stores
+= 1;
34517 else if (group
== disp_load
34518 || group
== disp_prefetch
)
34519 window_list
->num_loads
+= 1;
34520 else if (group
== disp_load_store
)
34522 window_list
->num_stores
+= 1;
34523 window_list
->num_loads
+= 1;
34527 /* Adds a scheduled instruction, INSN, to the current dispatch window.
34528 If the total bytes of instructions or the number of instructions in
34529 the window exceed allowable, it allocates a new window. */
34532 add_to_dispatch_window (rtx insn
)
34535 dispatch_windows
*window_list
;
34536 dispatch_windows
*next_list
;
34537 dispatch_windows
*window0_list
;
34538 enum insn_path path
;
34539 enum dispatch_group insn_group
;
34547 if (INSN_CODE (insn
) < 0)
34550 byte_len
= min_insn_size (insn
);
34551 window_list
= dispatch_window_list
;
34552 next_list
= window_list
->next
;
34553 path
= get_insn_path (insn
);
34554 insn_group
= get_insn_group (insn
);
34556 /* Get the last dispatch window. */
34558 window_list
= dispatch_window_list
->next
;
34560 if (path
== path_single
)
34562 else if (path
== path_double
)
34565 insn_num_uops
= (int) path
;
34567 /* If current window is full, get a new window.
34568 Window number zero is full, if MAX_INSN uops are scheduled in it.
34569 Window number one is full, if window zero's bytes plus window
34570 one's bytes is 32, or if the bytes of the new instruction added
34571 to the total makes it greater than 48, or it has already MAX_INSN
34572 instructions in it. */
34573 num_insn
= window_list
->num_insn
;
34574 num_uops
= window_list
->num_uops
;
34575 window_num
= window_list
->window_num
;
34576 insn_fits
= fits_dispatch_window (insn
);
34578 if (num_insn
>= MAX_INSN
34579 || num_uops
+ insn_num_uops
> MAX_INSN
34582 window_num
= ~window_num
& 1;
34583 window_list
= allocate_next_window (window_num
);
34586 if (window_num
== 0)
34588 add_insn_window (insn
, window_list
, insn_num_uops
);
34589 if (window_list
->num_insn
>= MAX_INSN
34590 && insn_group
== disp_branch
)
34592 process_end_window ();
34596 else if (window_num
== 1)
34598 window0_list
= window_list
->prev
;
34599 sum
= window0_list
->window_size
+ window_list
->window_size
;
34601 || (byte_len
+ sum
) >= 48)
34603 process_end_window ();
34604 window_list
= dispatch_window_list
;
34607 add_insn_window (insn
, window_list
, insn_num_uops
);
34610 gcc_unreachable ();
34612 if (is_end_basic_block (insn_group
))
34614 /* End of basic block is reached do end-basic-block process. */
34615 process_end_window ();
34620 /* Print the dispatch window, WINDOW_NUM, to FILE. */
34622 DEBUG_FUNCTION
static void
34623 debug_dispatch_window_file (FILE *file
, int window_num
)
34625 dispatch_windows
*list
;
34628 if (window_num
== 0)
34629 list
= dispatch_window_list
;
34631 list
= dispatch_window_list1
;
34633 fprintf (file
, "Window #%d:\n", list
->window_num
);
34634 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
34635 list
->num_insn
, list
->num_uops
, list
->window_size
);
34636 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34637 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
34639 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
34641 fprintf (file
, " insn info:\n");
34643 for (i
= 0; i
< MAX_INSN
; i
++)
34645 if (!list
->window
[i
].insn
)
34647 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
34648 i
, group_name
[list
->window
[i
].group
],
34649 i
, (void *)list
->window
[i
].insn
,
34650 i
, list
->window
[i
].path
,
34651 i
, list
->window
[i
].byte_len
,
34652 i
, list
->window
[i
].imm_bytes
);
34656 /* Print to stdout a dispatch window. */
34658 DEBUG_FUNCTION
void
34659 debug_dispatch_window (int window_num
)
34661 debug_dispatch_window_file (stdout
, window_num
);
34664 /* Print INSN dispatch information to FILE. */
34666 DEBUG_FUNCTION
static void
34667 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
34670 enum insn_path path
;
34671 enum dispatch_group group
;
34673 int num_imm_operand
;
34674 int num_imm32_operand
;
34675 int num_imm64_operand
;
34677 if (INSN_CODE (insn
) < 0)
34680 byte_len
= min_insn_size (insn
);
34681 path
= get_insn_path (insn
);
34682 group
= get_insn_group (insn
);
34683 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
34684 &num_imm64_operand
);
34686 fprintf (file
, " insn info:\n");
34687 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
34688 group_name
[group
], path
, byte_len
);
34689 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34690 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
34693 /* Print to STDERR the status of the ready list with respect to
34694 dispatch windows. */
34696 DEBUG_FUNCTION
void
34697 debug_ready_dispatch (void)
34700 int no_ready
= number_in_ready ();
34702 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
34704 for (i
= 0; i
< no_ready
; i
++)
34705 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
34708 /* This routine is the driver of the dispatch scheduler. */
34711 do_dispatch (rtx insn
, int mode
)
34713 if (mode
== DISPATCH_INIT
)
34714 init_dispatch_sched ();
34715 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
34716 add_to_dispatch_window (insn
);
34719 /* Return TRUE if Dispatch Scheduling is supported. */
34722 has_dispatch (rtx insn
, int action
)
34724 if ((ix86_tune
== PROCESSOR_BDVER1
|| ix86_tune
== PROCESSOR_BDVER2
)
34725 && flag_dispatch_scheduler
)
34731 case IS_DISPATCH_ON
:
34736 return is_cmp (insn
);
34738 case DISPATCH_VIOLATION
:
34739 return dispatch_violation ();
34741 case FITS_DISPATCH_WINDOW
:
34742 return fits_dispatch_window (insn
);
34748 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
34749 place emms and femms instructions. */
34751 static enum machine_mode
34752 ix86_preferred_simd_mode (enum machine_mode mode
)
34769 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
34775 if (!TARGET_VECTORIZE_DOUBLE
)
34777 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
34779 else if (TARGET_SSE2
)
34788 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
34791 static unsigned int
34792 ix86_autovectorize_vector_sizes (void)
34794 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
34797 /* Initialize the GCC target structure. */
34798 #undef TARGET_RETURN_IN_MEMORY
34799 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
34801 #undef TARGET_LEGITIMIZE_ADDRESS
34802 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
34804 #undef TARGET_ATTRIBUTE_TABLE
34805 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
34806 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34807 # undef TARGET_MERGE_DECL_ATTRIBUTES
34808 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
34811 #undef TARGET_COMP_TYPE_ATTRIBUTES
34812 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
34814 #undef TARGET_INIT_BUILTINS
34815 #define TARGET_INIT_BUILTINS ix86_init_builtins
34816 #undef TARGET_BUILTIN_DECL
34817 #define TARGET_BUILTIN_DECL ix86_builtin_decl
34818 #undef TARGET_EXPAND_BUILTIN
34819 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
34821 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
34822 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
34823 ix86_builtin_vectorized_function
34825 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
34826 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
34828 #undef TARGET_BUILTIN_RECIPROCAL
34829 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
34831 #undef TARGET_ASM_FUNCTION_EPILOGUE
34832 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
34834 #undef TARGET_ENCODE_SECTION_INFO
34835 #ifndef SUBTARGET_ENCODE_SECTION_INFO
34836 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
34838 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
34841 #undef TARGET_ASM_OPEN_PAREN
34842 #define TARGET_ASM_OPEN_PAREN ""
34843 #undef TARGET_ASM_CLOSE_PAREN
34844 #define TARGET_ASM_CLOSE_PAREN ""
34846 #undef TARGET_ASM_BYTE_OP
34847 #define TARGET_ASM_BYTE_OP ASM_BYTE
34849 #undef TARGET_ASM_ALIGNED_HI_OP
34850 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
34851 #undef TARGET_ASM_ALIGNED_SI_OP
34852 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
34854 #undef TARGET_ASM_ALIGNED_DI_OP
34855 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
34858 #undef TARGET_PROFILE_BEFORE_PROLOGUE
34859 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
34861 #undef TARGET_ASM_UNALIGNED_HI_OP
34862 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
34863 #undef TARGET_ASM_UNALIGNED_SI_OP
34864 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
34865 #undef TARGET_ASM_UNALIGNED_DI_OP
34866 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
34868 #undef TARGET_PRINT_OPERAND
34869 #define TARGET_PRINT_OPERAND ix86_print_operand
34870 #undef TARGET_PRINT_OPERAND_ADDRESS
34871 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
34872 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
34873 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
34874 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
34875 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
34877 #undef TARGET_SCHED_INIT_GLOBAL
34878 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
34879 #undef TARGET_SCHED_ADJUST_COST
34880 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
34881 #undef TARGET_SCHED_ISSUE_RATE
34882 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
34883 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
34884 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
34885 ia32_multipass_dfa_lookahead
34887 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
34888 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
34891 #undef TARGET_HAVE_TLS
34892 #define TARGET_HAVE_TLS true
34894 #undef TARGET_CANNOT_FORCE_CONST_MEM
34895 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
34896 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
34897 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
34899 #undef TARGET_DELEGITIMIZE_ADDRESS
34900 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
34902 #undef TARGET_MS_BITFIELD_LAYOUT_P
34903 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
34906 #undef TARGET_BINDS_LOCAL_P
34907 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
34909 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34910 #undef TARGET_BINDS_LOCAL_P
34911 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
34914 #undef TARGET_ASM_OUTPUT_MI_THUNK
34915 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
34916 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
34917 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
34919 #undef TARGET_ASM_FILE_START
34920 #define TARGET_ASM_FILE_START x86_file_start
34922 #undef TARGET_OPTION_OVERRIDE
34923 #define TARGET_OPTION_OVERRIDE ix86_option_override
34925 #undef TARGET_REGISTER_MOVE_COST
34926 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
34927 #undef TARGET_MEMORY_MOVE_COST
34928 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
34929 #undef TARGET_RTX_COSTS
34930 #define TARGET_RTX_COSTS ix86_rtx_costs
34931 #undef TARGET_ADDRESS_COST
34932 #define TARGET_ADDRESS_COST ix86_address_cost
34934 #undef TARGET_FIXED_CONDITION_CODE_REGS
34935 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
34936 #undef TARGET_CC_MODES_COMPATIBLE
34937 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
34939 #undef TARGET_MACHINE_DEPENDENT_REORG
34940 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
34942 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
34943 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
34945 #undef TARGET_BUILD_BUILTIN_VA_LIST
34946 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
34948 #undef TARGET_ENUM_VA_LIST_P
34949 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
34951 #undef TARGET_FN_ABI_VA_LIST
34952 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
34954 #undef TARGET_CANONICAL_VA_LIST_TYPE
34955 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
34957 #undef TARGET_EXPAND_BUILTIN_VA_START
34958 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
34960 #undef TARGET_MD_ASM_CLOBBERS
34961 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
34963 #undef TARGET_PROMOTE_PROTOTYPES
34964 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
34965 #undef TARGET_STRUCT_VALUE_RTX
34966 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
34967 #undef TARGET_SETUP_INCOMING_VARARGS
34968 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
34969 #undef TARGET_MUST_PASS_IN_STACK
34970 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
34971 #undef TARGET_FUNCTION_ARG_ADVANCE
34972 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
34973 #undef TARGET_FUNCTION_ARG
34974 #define TARGET_FUNCTION_ARG ix86_function_arg
34975 #undef TARGET_FUNCTION_ARG_BOUNDARY
34976 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
34977 #undef TARGET_PASS_BY_REFERENCE
34978 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
34979 #undef TARGET_INTERNAL_ARG_POINTER
34980 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
34981 #undef TARGET_UPDATE_STACK_BOUNDARY
34982 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
34983 #undef TARGET_GET_DRAP_RTX
34984 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
34985 #undef TARGET_STRICT_ARGUMENT_NAMING
34986 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
34987 #undef TARGET_STATIC_CHAIN
34988 #define TARGET_STATIC_CHAIN ix86_static_chain
34989 #undef TARGET_TRAMPOLINE_INIT
34990 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
34991 #undef TARGET_RETURN_POPS_ARGS
34992 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
34994 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
34995 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
34997 #undef TARGET_SCALAR_MODE_SUPPORTED_P
34998 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
35000 #undef TARGET_VECTOR_MODE_SUPPORTED_P
35001 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
35003 #undef TARGET_C_MODE_FOR_SUFFIX
35004 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
35007 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
35008 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
35011 #ifdef SUBTARGET_INSERT_ATTRIBUTES
35012 #undef TARGET_INSERT_ATTRIBUTES
35013 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
35016 #undef TARGET_MANGLE_TYPE
35017 #define TARGET_MANGLE_TYPE ix86_mangle_type
35019 #ifndef TARGET_MACHO
35020 #undef TARGET_STACK_PROTECT_FAIL
35021 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
35024 #undef TARGET_FUNCTION_VALUE
35025 #define TARGET_FUNCTION_VALUE ix86_function_value
35027 #undef TARGET_FUNCTION_VALUE_REGNO_P
35028 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
35030 #undef TARGET_PROMOTE_FUNCTION_MODE
35031 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
35033 #undef TARGET_SECONDARY_RELOAD
35034 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
35036 #undef TARGET_CLASS_MAX_NREGS
35037 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
35039 #undef TARGET_PREFERRED_RELOAD_CLASS
35040 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
35041 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
35042 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
35043 #undef TARGET_CLASS_LIKELY_SPILLED_P
35044 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
35046 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
35047 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
35048 ix86_builtin_vectorization_cost
35049 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
35050 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
35051 ix86_vectorize_builtin_vec_perm
35052 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
35053 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
35054 ix86_vectorize_builtin_vec_perm_ok
35055 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
35056 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
35057 ix86_preferred_simd_mode
35058 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
35059 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
35060 ix86_autovectorize_vector_sizes
35062 #undef TARGET_SET_CURRENT_FUNCTION
35063 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
35065 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
35066 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
35068 #undef TARGET_OPTION_SAVE
35069 #define TARGET_OPTION_SAVE ix86_function_specific_save
35071 #undef TARGET_OPTION_RESTORE
35072 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
35074 #undef TARGET_OPTION_PRINT
35075 #define TARGET_OPTION_PRINT ix86_function_specific_print
35077 #undef TARGET_CAN_INLINE_P
35078 #define TARGET_CAN_INLINE_P ix86_can_inline_p
35080 #undef TARGET_EXPAND_TO_RTL_HOOK
35081 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
35083 #undef TARGET_LEGITIMATE_ADDRESS_P
35084 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
35086 #undef TARGET_LEGITIMATE_CONSTANT_P
35087 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
35089 #undef TARGET_FRAME_POINTER_REQUIRED
35090 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
35092 #undef TARGET_CAN_ELIMINATE
35093 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
35095 #undef TARGET_EXTRA_LIVE_ON_ENTRY
35096 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
35098 #undef TARGET_ASM_CODE_END
35099 #define TARGET_ASM_CODE_END ix86_code_end
35101 #undef TARGET_CONDITIONAL_REGISTER_USAGE
35102 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
35105 #undef TARGET_INIT_LIBFUNCS
35106 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
35109 struct gcc_target targetm
= TARGET_INITIALIZER
;
35111 #include "gt-i386.h"